]> git.wh0rd.org Git - ICEs.git/blob - hppa-ICE-insn-constraints/regex.i.9
initial import
[ICEs.git] / hppa-ICE-insn-constraints / regex.i.9
1 struct locale_data {
2         unsigned int nstrings;
3         union locale_data_value {
4                 char *string;
5                 unsigned int word;
6         } values[];
7 };
8 extern __thread struct locale_data **_nl_current_LC_CTYPE
9     __attribute__ ((tls_model("initial-exec")));
10 typedef unsigned long int reg_syntax_t;
11 extern reg_syntax_t re_syntax_options;
12 typedef enum {
13         REG_ENOSYS
14 } reg_errcode_t;
15 typedef struct {
16         unsigned char *buffer;
17         reg_syntax_t syntax;
18         char *fastmap;
19         unsigned char *translate;
20         unsigned int re_nsub;
21 } regex_t;
22 extern __thread void *__libc_tsd_CTYPE_TOLOWER;
23 inline int **__ctype_tolower_loc(void)
24 {
25         union {
26                 void **ptr;
27                 int **tablep;
28         } u;
29         u.ptr = (&__libc_tsd_CTYPE_TOLOWER);
30         if (*u.tablep == 0)
31                 *u.tablep =
32                     ((int *)((*_nl_current_LC_CTYPE)->values[0].string) + 128);
33         return u.tablep;
34 }
35 inline int tolower(int __c)
36 {
37         return *__ctype_tolower_loc()[__c];
38 }
39 typedef struct {
40 } mbstate_t;
41 typedef unsigned long int *bitset_t;
42 typedef unsigned long int *re_bitset_ptr_t;
43 typedef struct {
44         int nelem;
45         int *elems;
46 } re_node_set;
47 typedef enum {
48         NON_TYPE = 0, CHARACTER = 1, END_OF_RE = 2, SIMPLE_BRACKET =
49             3, OP_BACK_REF = 4, OP_PERIOD = 5, COMPLEX_BRACKET =
50             6, OP_UTF8_PERIOD = 7, OP_OPEN_SUBEXP = 8 | 0, OP_CLOSE_SUBEXP =
51             8 | 1, OP_ALT = 8 | 2, OP_DUP_ASTERISK = 8 | 3, ANCHOR =
52             8 | 4, CONCAT = 16, SUBEXP = 17, OP_DUP_PLUS = 18,
53 } re_token_type_t;
54 typedef struct {
55         long int *mbchars;
56         int nmbchars;
57 } re_charset_t;
58 typedef struct {
59         union {
60                 unsigned char c;
61                 re_bitset_ptr_t sbcset;
62                 re_charset_t *mbcset;
63         } opr;
64         re_token_type_t type:8;
65 } re_token_t;
66 typedef struct re_dfa_t re_dfa_t;
67 typedef struct bin_tree_t bin_tree_t;
68 struct re_dfastate_t {
69         re_node_set nodes;
70 };
71 typedef struct re_dfastate_t re_dfastate_t;
72 struct re_dfa_t {
73         re_token_t *nodes;
74         unsigned int nodes_len;
75         re_dfastate_t *init_state;
76         bin_tree_t *str_tree;
77         re_bitset_ptr_t sb_char;
78         int nbackref;
79         unsigned int has_mb_node:1;
80         unsigned int is_utf8:1;
81         unsigned int map_notascii:1;
82         int mb_cur_max;
83         int *subexp_map;
84 };
85 static reg_errcode_t re_compile_internal(regex_t * preg, const char *pattern,
86                                          unsigned int length,
87                                          reg_syntax_t syntax);
88 static void re_compile_fastmap_iter(regex_t * bufp,
89                                     const re_dfastate_t * init_state,
90                                     char *fastmap);
91 static reg_errcode_t analyze(regex_t * preg);
92 static reg_errcode_t optimize_subexps(void *extra, bin_tree_t * node);
93 const char *__re_compile_pattern(pattern, length, bufp)
94 const char *pattern;
95 {
96         reg_errcode_t ret;
97         ret = re_compile_internal(bufp, pattern, length, re_syntax_options);
98 }
99
100 int __re_compile_fastmap(regex_t * bufp)
101 {
102         re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
103         char *fastmap = bufp->fastmap;
104         re_compile_fastmap_iter(bufp, dfa->init_state, fastmap);
105 }
106 static inline void re_set_fastmap(char *fastmap, int icase, int ch)
107 {
108         if (icase) {
109                 int __res;
110                 if (sizeof(ch) > 1) {
111                         if (ch) {
112                         } else
113                                 __res = tolower(ch);
114                 }
115                 fastmap[__res] = 1;
116         }
117 }
118 static void re_compile_fastmap_iter(regex_t * bufp,
119                                     const re_dfastate_t * init_state,
120                                     char *fastmap)
121 {
122         re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
123         int node_cnt;
124         int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & 0x40000));
125         for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) {
126                 int node = init_state->nodes.elems[node_cnt];
127                 re_token_type_t type = dfa->nodes[node].type;
128                 if (type == CHARACTER) {
129                         if ((bufp->syntax & 0x40000) && dfa->mb_cur_max > 1) {
130                                 void *buf = alloca(dfa->mb_cur_max);
131                         }
132                         int i;
133                         int ch;
134                         for (; i < (256 / (sizeof(unsigned long int) * 8));) {
135                                 int j;
136                                 unsigned long int w =
137                                     dfa->nodes[node].opr.sbcset[i];
138                                 for (; j < 8;)
139                                         if (w & 1)
140                                                 re_set_fastmap(fastmap, icase,
141                                                                ch);
142                         }
143                 } else if (type == COMPLEX_BRACKET) {
144                         int i;
145                         re_charset_t *cset = dfa->nodes[node].opr.mbcset;
146                         for (; i < cset->nmbchars;) {
147                                 char buf[256];
148                                 mbstate_t state;
149                                 if (__wcrtomb(buf, cset->mbchars[i], &state) !=
150                                     -1)
151                                         re_set_fastmap(fastmap, icase, *buf);
152                                 if ((bufp->syntax & 0x4000)
153                                     && dfa->mb_cur_max > 1) {
154                                 }
155                         }
156                 }
157         }
158 }
159 static bitset_t utf8_sb_map;
160 static reg_errcode_t re_compile_internal(regex_t * preg, const char *pattern,
161                                          unsigned int length,
162                                          reg_syntax_t syntax)
163 {
164         reg_errcode_t err;
165         re_dfa_t *dfa;
166         err = analyze(preg);
167         if (dfa->is_utf8 && !(syntax & 0x400000) && preg->translate == 0)
168                 optimize_utf8(dfa);
169
170 unsigned int __s1_len = strlen(((*_nl_current_LC_CTYPE)->values[0].string));
171 unsigned int __s2_len = strlen("UTF-8");
172 int result = (((!((int)((((*_nl_current_LC_CTYPE)->values[0].string)) + 1) - (int)(((*_nl_current_LC_CTYPE)->values[0].string)) == 1) || __s1_len >= 4) && (!((int)(("UTF-8") + 1) - (int)("UTF-8") == 1) || __s2_len >= 4)) ? strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8") : (((int)((((*_nl_current_LC_CTYPE)->values[0].string)) + 1) - (int)(((*_nl_current_LC_CTYPE)->values[0].string)) == 1) && (__s1_len = strlen(((*_nl_current_LC_CTYPE)->values[0].string)), __s1_len < 4) ? (((int)(("UTF-8") + 1) - (int)("UTF-8") == 1) ? strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8") : (((*_nl_current_LC_CTYPE)->values[0].string[0] - 'U'))) : (((int)(("UTF-8") + 1) - (int)("UTF-8") == 1) && (__s2_len < 4) ? (((int)((((*_nl_current_LC_CTYPE)->values[0].string)) + 1) - (int)(((*_nl_current_LC_CTYPE)->values[0].string)) == 1) ? strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8") : (((*_nl_current_LC_CTYPE)->values[0].string[0] - 'U'))) : strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8"))));
173         if (dfa->mb_cur_max == 6 && result
174              == 0)
175                 dfa->is_utf8 = 1;
176         dfa->map_notascii = (*_nl_current_LC_CTYPE)->values[0].word != 0;
177         if (dfa->mb_cur_max > 1) {
178                 if (dfa->is_utf8)
179                         dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
180                 else {
181                         int i;
182                         int j;
183                         int ch;
184                         for (;
185                              i < (256 / (sizeof(unsigned long int) * 8));
186                              ++j, ++ch) {
187                                 unsigned int wch = __btowc(ch);
188                                 if (wch != (0xffffffffu))
189                                         dfa->sb_char[i] |= 1 << j;
190                         }
191                 }
192         }
193         int node;
194         int i;
195         int mb_chars;
196         int has_period;
197         for (; node < dfa->nodes_len;)
198                 switch (dfa->nodes[node].type) {
199                 case CHARACTER:
200                         if (dfa->nodes[node].opr.c >= 0x80)
201                                 mb_chars = 1;
202                         for (; i < (256 / (sizeof(unsigned long int) * 8));)
203                                 if (dfa->nodes[node].opr.sbcset[i])
204                                         return;
205                 }
206         dfa->has_mb_node = dfa->nbackref > 0 || has_period;
207 }
208 static reg_errcode_t analyze(regex_t * preg)
209 {
210         re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
211         if (dfa->subexp_map != 0) {
212                 int i;
213                 for (; i < preg->re_nsub;)
214                         dfa->subexp_map[i] = i;
215                 preorder(dfa->str_tree, optimize_subexps, dfa);
216                 for (; i < preg->re_nsub; i++)
217                         if (dfa->subexp_map[i] != i)
218                                 break;
219         }
220 }