]> git.wh0rd.org - ICEs.git/blame - hppa-ICE-insn-constraints/regex.i.4
add bfin ice
[ICEs.git] / hppa-ICE-insn-constraints / regex.i.4
CommitLineData
45516216 1struct locale_data {
2 unsigned int nstrings;
3 union locale_data_value {
4 const char *string;
5 unsigned int word;
6 } values[];
7};
8extern __thread struct locale_data *const *_nl_current_LC_CTYPE
9 __attribute__ ((tls_model("initial-exec")));
10typedef unsigned long int reg_syntax_t;
11extern reg_syntax_t re_syntax_options;
12typedef enum {
13 REG_ENOSYS = -1, REG_NOERROR =
14 0, REG_NOMATCH, REG_BADPAT, REG_ECOLLATE, REG_ECTYPE, REG_EESCAPE,
15 REG_ESUBREG, REG_EBRACK, REG_EPAREN, REG_EBRACE, REG_BADBR,
16 REG_ERANGE, REG_ESPACE, REG_BADRPT, REG_EEND, REG_ESIZE, REG_ERPAREN
17} reg_errcode_t;
18typedef struct {
19 unsigned char *buffer;
20 reg_syntax_t syntax;
21 char *fastmap;
22 unsigned char *translate;
23 unsigned int re_nsub;
24 unsigned fastmap_accurate:1;
25 unsigned not_bol:1;
26 unsigned not_eol:1;
27} regex_t;
28extern __thread void *__libc_tsd_CTYPE_TOLOWER
29 __attribute__ ((tls_model("initial-exec")));
30extern inline const int ** __attribute__ ((const))__ctype_tolower_loc(void)
31{
32 union {
33 void **ptr;
34 const int **tablep;
35 } u;
36 u.ptr = (&__libc_tsd_CTYPE_TOLOWER);
37 if (__builtin_expect(*u.tablep == 0, 0))
38 *u.tablep =
39 ((int *)((*_nl_current_LC_CTYPE)->
40 values[0].
41 string) + 128);
42 return u.tablep;
43}
44extern
45__inline
46int
47tolower(int __c)
48{
49 return __c >= -128 && __c < 256 ? (*__ctype_tolower_loc())[__c] : __c;
50}
51typedef struct {
52} mbstate_t;
53typedef unsigned long int *bitset_t;
54typedef unsigned long int *re_bitset_ptr_t;
55typedef struct {
56 int nelem;
57 int *elems;
58} re_node_set;
59typedef enum {
60 NON_TYPE = 0, CHARACTER = 1, END_OF_RE = 2, SIMPLE_BRACKET =
61 3, OP_BACK_REF = 4, OP_PERIOD = 5, COMPLEX_BRACKET =
62 6, OP_UTF8_PERIOD = 7, OP_OPEN_SUBEXP = 8 | 0, OP_CLOSE_SUBEXP =
63 8 | 1, OP_ALT = 8 | 2, OP_DUP_ASTERISK = 8 | 3, ANCHOR =
64 8 | 4, CONCAT = 16, SUBEXP = 17, OP_DUP_PLUS =
65 18, OP_DUP_QUESTION, OP_OPEN_BRACKET, OP_CLOSE_BRACKET,
66 OP_CHARSET_RANGE, OP_OPEN_DUP_NUM, OP_CLOSE_DUP_NUM,
67 OP_NON_MATCH_LIST, OP_OPEN_COLL_ELEM, OP_CLOSE_COLL_ELEM,
68 OP_OPEN_EQUIV_CLASS, OP_CLOSE_EQUIV_CLASS, OP_OPEN_CHAR_CLASS,
69 OP_CLOSE_CHAR_CLASS, OP_WORD, OP_NOTWORD, OP_SPACE, OP_NOTSPACE,
70 BACK_SLASH
71} re_token_type_t;
72typedef struct {
73 long int *mbchars;
74 int nmbchars;
75} re_charset_t;
76typedef struct {
77 union {
78 unsigned char c;
79 re_bitset_ptr_t sbcset;
80 re_charset_t *mbcset;
81 int idx;
82 } opr;
83 re_token_type_t type:8;
84} re_token_t;
85typedef struct re_dfa_t re_dfa_t;
86typedef struct bin_tree_t bin_tree_t;
87struct re_dfastate_t {
88 re_node_set nodes;
89};
90typedef struct re_dfastate_t re_dfastate_t;
91struct re_dfa_t {
92 re_token_t *nodes;
93 unsigned int nodes_len;
94 int *nexts;
95 int *org_indices;
96 re_node_set *edests;
97 re_node_set *eclosures;
98 re_dfastate_t *init_state;
99 bin_tree_t *str_tree;
100 re_bitset_ptr_t sb_char;
101 int nbackref;
102 unsigned int has_mb_node:1;
103 unsigned int is_utf8:1;
104 unsigned int map_notascii:1;
105 int mb_cur_max;
106 int *subexp_map;
107};
108static reg_errcode_t re_compile_internal(regex_t * preg, const char *pattern,
109 unsigned int length,
110 reg_syntax_t syntax);
111static void re_compile_fastmap_iter(regex_t * bufp,
112 const re_dfastate_t * init_state,
113 char *fastmap);
114static reg_errcode_t analyze(regex_t * preg);
115static reg_errcode_t optimize_subexps(void *extra, bin_tree_t * node);
116const char *__re_compile_pattern(pattern, length, bufp)
117const char *pattern;
118{
119 reg_errcode_t ret;
120 ret = re_compile_internal(bufp, pattern, length, re_syntax_options);
121}
122
123int __re_compile_fastmap(bufp)
124regex_t *bufp;
125{
126 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
127 char *fastmap = bufp->fastmap;
128 re_compile_fastmap_iter(bufp, dfa->init_state, fastmap);
129}
130static inline void
131 __attribute__ ((always_inline)) re_set_fastmap(char *fastmap, int icase,
132 int ch)
133{
134 if (icase) {
135 int __res;
136 if (sizeof(ch) > 1) {
137 if (__builtin_constant_p(ch)) {
138 } else
139 __res = tolower(ch);
140 }
141 fastmap[__res] = 1;
142 }
143}
144static void re_compile_fastmap_iter(regex_t * bufp,
145 const re_dfastate_t * init_state,
146 char *fastmap)
147{
148 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
149 int node_cnt;
150 int icase = (dfa->mb_cur_max == 1
151 && (bufp->
152 syntax &
1530x40000));
154 for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) {
155 int node = init_state->nodes.elems[node_cnt];
156 re_token_type_t type = dfa->nodes[node].type;
157 if (type == CHARACTER) {
158 if ((bufp->
159 syntax & 0x40000)
160&& dfa->mb_cur_max > 1) {
161 unsigned char *buf =
162 __builtin_alloca(dfa->mb_cur_max);
163 }
164 int i;
165 int ch;
166 for (i = 0, ch = 0;
167 i < (256 / (sizeof(unsigned long int) * 8)); ++i) {
168 int j;
169 unsigned long int w =
170 dfa->nodes[node].opr.sbcset[i];
171 for (j = 0; j < (sizeof(unsigned long int) * 8);
172 ++j, ++ch)
173 if (w & ((unsigned long int)1 << j))
174 re_set_fastmap(fastmap, icase,
175 ch);
176 }
177 } else if (type == COMPLEX_BRACKET) {
178 int i;
179 re_charset_t *cset = dfa->nodes[node].opr.mbcset;
180 for (i = 0; i < cset->nmbchars; ++i) {
181 char buf[256];
182 mbstate_t state;
183 if (__wcrtomb(buf, cset->mbchars[i], &state) !=
184 (unsigned int)-1)
185 re_set_fastmap(fastmap, icase,
186 *(unsigned char *)buf);
187 if ((bufp->
188 syntax &
1890x4000)
190 && dfa->mb_cur_max > 1) {
191 if (__wcrtomb
192 (buf, towlower(cset->mbchars[i]),
193 &state) != (unsigned int)-1)
194 re_set_fastmap(fastmap, 0,
195 *(unsigned char
196 *)buf);
197 }
198 }
199 }
200 }
201}
202static const bitset_t utf8_sb_map;
203static reg_errcode_t re_compile_internal(regex_t * preg, const char *pattern,
204 unsigned int length,
205 reg_syntax_t syntax)
206{
207 reg_errcode_t err;
208 re_dfa_t *dfa;
209 preg->fastmap_accurate = 0;
210 preg->not_bol = preg->not_eol = 0;
211 err = analyze(preg);
212 if (dfa->is_utf8
213 && !(syntax &
2140x400000)
215&& preg->translate == 0)
216 optimize_utf8(dfa);
217 if (dfa->mb_cur_max == 6 && __extension__( {
218 unsigned int __s1_len;
219 unsigned int __s2_len;
220 (__builtin_constant_p
221 (((*_nl_current_LC_CTYPE)->
222 values[0].string))
223 &&
224 __builtin_constant_p("UTF-8")
225 && (__s1_len =
226 strlen(((*_nl_current_LC_CTYPE)->values[
2270
228].string)), __s2_len = strlen("UTF-8"), (!((unsigned int)(const void *)((((*_nl_current_LC_CTYPE)->values[
2290].string)) + 1) - (unsigned int)(const void *)(((*_nl_current_LC_CTYPE)->values[
2300
231].string)) == 1) || __s1_len >= 4) && (!((unsigned int)(const void *)(("UTF-8") + 1) - (unsigned int)(const void *)("UTF-8") == 1) || __s2_len >= 4)) ? __builtin_strcmp(((*_nl_current_LC_CTYPE)->values[
2320
233].string), "UTF-8") : (__builtin_constant_p(((*_nl_current_LC_CTYPE)->values[
2340
235].string)) && ((unsigned int)(const void *)((((*_nl_current_LC_CTYPE)->values[
2360].string)) + 1) - (unsigned int)(const void *)(((*_nl_current_LC_CTYPE)->values[
2370].string)) == 1) && (__s1_len = strlen(((*_nl_current_LC_CTYPE)->values[
2380].string)), __s1_len < 4) ? (__builtin_constant_p("UTF-8") && ((unsigned int)(const void *)(("UTF-8") + 1) - (unsigned int)(const void *)("UTF-8") == 1) ? __builtin_strcmp(((*_nl_current_LC_CTYPE)->values[
2390
240].string), "UTF-8") : (__extension__( {
241 __const
242 unsigned
243 char
244 *__s2
245 =
246 (__const
247 unsigned
248 char
249 *)
250 (__const
251 char
252 *)
253 ("UTF-8");
254 register
255 int
256 __result
257 =
258 (((__const unsigned char *)(__const char *)(((*_nl_current_LC_CTYPE)->values[
2590
260].string)))[0] - __s2[0]); __result;}
261 ))): (__builtin_constant_p("UTF-8") && ((unsigned int)(const void *)(("UTF-8") + 1) - (unsigned int)(const void *)("UTF-8") == 1) && (__s2_len = strlen("UTF-8"), __s2_len < 4) ? (__builtin_constant_p(((*_nl_current_LC_CTYPE)->values[
2620].string)) && ((unsigned int)(const void *)((((*_nl_current_LC_CTYPE)->values[
2630].string)) + 1) - (unsigned int)(const void *)(((*_nl_current_LC_CTYPE)->values[
2640].string)) == 1) ? __builtin_strcmp(((*_nl_current_LC_CTYPE)->values[
2650].string), "UTF-8") : (__extension__( {
266 __const
267 unsigned
268 char
269 *__s1
270 =
271 (__const
272 unsigned
273 char
274 *)
275 (__const
276 char
277 *)
278 (((*_nl_current_LC_CTYPE)->values[
2790
280].string)); register int __result = __s1[0] - ((__const unsigned char *)(__const char *)("UTF-8"))[0]; __result;}
281 ))): __builtin_strcmp(((*_nl_current_LC_CTYPE)->values[
2820
283].string), "UTF-8"))));}
284 ) == 0)
285 dfa->is_utf8 = 1;
286 dfa->map_notascii =
287 (((unsigned int)(*_nl_current_LC_CTYPE)->
288 values[
2890
290].word) != 0);
291 if (dfa->mb_cur_max > 1) {
292 if (dfa->is_utf8)
293 dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
294 else {
295 int i;
296 int j;
297 int ch;
298 dfa->sb_char =
299 (re_bitset_ptr_t) calloc(sizeof(bitset_t), 1);
300 for (i = 0, ch = 0;
301 i < (256 / (sizeof(unsigned long int) * 8)); ++i)
302 for (j = 0; j < (sizeof(unsigned long int) * 8);
303 ++j, ++ch) {
304 unsigned int wch = __btowc(ch);
305 if (wch != (0xffffffffu))
306 dfa->sb_char[i] |=
307 (unsigned long int)1 << j;
308 }
309 }
310 }
311 int node;
312 int i;
313 int mb_chars = 0;
314 int has_period = 0;
315 for (node = 0; node < dfa->nodes_len; ++node)
316 switch (dfa->nodes[node].type) {
317 case CHARACTER:
318 if (dfa->nodes[node].opr.c >= 0x80)
319 mb_chars = 1;
320 case ANCHOR:
321 switch (dfa->nodes[node].opr.idx) {
322 }
323 case OP_PERIOD:
324 has_period = 1;
325 for (i = 0x80 / (sizeof(unsigned long int) * 8);
326 i < (256 / (sizeof(unsigned long int) * 8)); ++i)
327 if (dfa->nodes[node].opr.sbcset[i])
328 return;
329 }
330 dfa->has_mb_node = dfa->nbackref > 0 || has_period;
331}
332static reg_errcode_t analyze(regex_t * preg)
333{
334 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
335 if (__builtin_expect
336 (dfa->nexts == 0 || dfa->org_indices == 0 || dfa->edests == 0
337 || dfa->eclosures == 0, 0))
338 return REG_ESPACE;
339 if (dfa->subexp_map != 0) {
340 int i;
341 for (i = 0; i < preg->re_nsub; i++)
342 dfa->subexp_map[i] = i;
343 preorder(dfa->str_tree, optimize_subexps, dfa);
344 for (i = 0; i < preg->re_nsub; i++)
345 if (dfa->subexp_map[i] != i)
346 break;
347 }
348}