]> git.wh0rd.org - ICEs.git/blame - hppa-ICE-insn-constraints/regex.i.3
initial import
[ICEs.git] / hppa-ICE-insn-constraints / regex.i.3
CommitLineData
45516216 1enum {
2 _NL_COLLATE_NRULES, _NL_COLLATE_TABLEMB, _NL_CTYPE_TOLOWER,
3 _NL_CTYPE_CODESET_NAME, _NL_CTYPE_MAP_TO_NONASCII
4};
5struct locale_data {
6 unsigned int nstrings;
7 union locale_data_value {
8 const char *string;
9 unsigned int word;
10 } values[];
11};
12extern __thread struct locale_data *const *_nl_current_LC_CTYPE
13 __attribute__ ((tls_model("initial-exec")));
14typedef unsigned long int reg_syntax_t;
15extern reg_syntax_t re_syntax_options;
16typedef enum {
17 REG_ENOSYS = -1, REG_NOERROR =
18 0, REG_NOMATCH, REG_BADPAT, REG_ECOLLATE, REG_ECTYPE, REG_EESCAPE,
19 REG_ESUBREG, REG_EBRACK, REG_EPAREN, REG_EBRACE, REG_BADBR,
20 REG_ERANGE, REG_ESPACE, REG_BADRPT, REG_EEND, REG_ESIZE, REG_ERPAREN
21} reg_errcode_t;
22typedef struct {
23 unsigned char *buffer;
24 reg_syntax_t syntax;
25 char *fastmap;
26 unsigned char *translate;
27 unsigned int re_nsub;
28 unsigned fastmap_accurate:1;
29 unsigned not_bol:1;
30 unsigned not_eol:1;
31} regex_t;
32extern __thread void *__libc_tsd_CTYPE_TOLOWER
33 __attribute__ ((tls_model("initial-exec")));
34extern inline const int ** __attribute__ ((const))__ctype_tolower_loc(void)
35{
36 union {
37 void **ptr;
38 const int **tablep;
39 } u;
40 u.ptr = (&__libc_tsd_CTYPE_TOLOWER);
41 if (__builtin_expect(*u.tablep == 0, 0))
42 *u.tablep =
43 ((int *)((*_nl_current_LC_CTYPE)->
44 values[((int)(_NL_CTYPE_TOLOWER) & 0xffff)].
45 string) + 128);
46 return u.tablep;
47}
48extern __inline int __attribute__ ((__nothrow__)) tolower(int __c)
49{
50 return __c >= -128 && __c < 256 ? (*__ctype_tolower_loc())[__c] : __c;
51}
52typedef struct {
53} mbstate_t;
54typedef unsigned long int *bitset_t;
55typedef unsigned long int *re_bitset_ptr_t;
56typedef struct {
57 int nelem;
58 int *elems;
59} re_node_set;
60typedef enum {
61 NON_TYPE = 0, CHARACTER = 1, END_OF_RE = 2, SIMPLE_BRACKET =
62 3, OP_BACK_REF = 4, OP_PERIOD = 5, COMPLEX_BRACKET =
63 6, OP_UTF8_PERIOD = 7, OP_OPEN_SUBEXP = 8 | 0, OP_CLOSE_SUBEXP =
64 8 | 1, OP_ALT = 8 | 2, OP_DUP_ASTERISK = 8 | 3, ANCHOR =
65 8 | 4, CONCAT = 16, SUBEXP = 17, OP_DUP_PLUS =
66 18, OP_DUP_QUESTION, OP_OPEN_BRACKET, OP_CLOSE_BRACKET,
67 OP_CHARSET_RANGE, OP_OPEN_DUP_NUM, OP_CLOSE_DUP_NUM,
68 OP_NON_MATCH_LIST, OP_OPEN_COLL_ELEM, OP_CLOSE_COLL_ELEM,
69 OP_OPEN_EQUIV_CLASS, OP_CLOSE_EQUIV_CLASS, OP_OPEN_CHAR_CLASS,
70 OP_CLOSE_CHAR_CLASS, OP_WORD, OP_NOTWORD, OP_SPACE, OP_NOTSPACE,
71 BACK_SLASH
72} re_token_type_t;
73typedef struct {
74 long int *mbchars;
75 int nmbchars;
76} re_charset_t;
77typedef struct {
78 union {
79 unsigned char c;
80 re_bitset_ptr_t sbcset;
81 re_charset_t *mbcset;
82 int idx;
83 } opr;
84 re_token_type_t type:8;
85} re_token_t;
86typedef struct re_dfa_t re_dfa_t;
87typedef struct bin_tree_t bin_tree_t;
88struct re_dfastate_t {
89 re_node_set nodes;
90};
91typedef struct re_dfastate_t re_dfastate_t;
92struct re_dfa_t {
93 re_token_t *nodes;
94 unsigned int nodes_len;
95 int *nexts;
96 int *org_indices;
97 re_node_set *edests;
98 re_node_set *eclosures;
99 re_dfastate_t *init_state;
100 bin_tree_t *str_tree;
101 re_bitset_ptr_t sb_char;
102 int nbackref;
103 unsigned int has_mb_node:1;
104 unsigned int is_utf8:1;
105 unsigned int map_notascii:1;
106 int mb_cur_max;
107 int *subexp_map;
108};
109static reg_errcode_t re_compile_internal(regex_t * preg, const char *pattern,
110 unsigned int length,
111 reg_syntax_t syntax);
112static void re_compile_fastmap_iter(regex_t * bufp,
113 const re_dfastate_t * init_state,
114 char *fastmap);
115static reg_errcode_t analyze(regex_t * preg);
116static reg_errcode_t optimize_subexps(void *extra, bin_tree_t * node);
117const char *__re_compile_pattern(pattern, length, bufp)
118const char *pattern;
119{
120 reg_errcode_t ret;
121 ret = re_compile_internal(bufp, pattern, length, re_syntax_options);
122}
123
124int __re_compile_fastmap(bufp)
125regex_t *bufp;
126{
127 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
128 char *fastmap = bufp->fastmap;
129 re_compile_fastmap_iter(bufp, dfa->init_state, fastmap);
130}
131static inline void
132 __attribute__ ((always_inline)) re_set_fastmap(char *fastmap, int icase,
133 int ch)
134{
135 if (icase) {
136 int __res;
137 if (sizeof(ch) > 1) {
138 if (__builtin_constant_p(ch)) {
139 ;
140 } else
141 __res = tolower(ch);
142 }
143 fastmap[__res] = 1;
144 }
145}
146static void re_compile_fastmap_iter(regex_t * bufp,
147 const re_dfastate_t * init_state,
148 char *fastmap)
149{
150 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
151 int node_cnt;
152 int icase = (dfa->mb_cur_max == 1
153 && (bufp->
154 syntax &
155 ((((((((((((((((((((((((unsigned long int)1) << 1) <<
156 1) << 1) << 1) << 1) << 1) << 1)
157 << 1) << 1) << 1) << 1) << 1) << 1) <<
158 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1)
159 << 1)));
160 for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) {
161 int node = init_state->nodes.elems[node_cnt];
162 re_token_type_t type = dfa->nodes[node].type;
163 if (type == CHARACTER) {
164 if ((bufp->
165 syntax &
166 ((((((((((((((((((((((((unsigned long int)1) << 1)
167 << 1) << 1) << 1) << 1) << 1)
168 << 1) << 1) << 1) << 1) << 1) << 1)
169 << 1) << 1) << 1) << 1) << 1) << 1) << 1)
170 << 1) << 1) << 1)) && dfa->mb_cur_max > 1) {
171 unsigned char *buf =
172 __builtin_alloca(dfa->mb_cur_max), *p;
173 }
174 int i;
175 int ch;
176 for (i = 0, ch = 0;
177 i < (256 / (sizeof(unsigned long int) * 8)); ++i) {
178 int j;
179 unsigned long int w =
180 dfa->nodes[node].opr.sbcset[i];
181 for (j = 0; j < (sizeof(unsigned long int) * 8);
182 ++j, ++ch)
183 if (w & ((unsigned long int)1 << j))
184 re_set_fastmap(fastmap, icase,
185 ch);
186 }
187 } else if (type == COMPLEX_BRACKET) {
188 int i;
189 re_charset_t *cset = dfa->nodes[node].opr.mbcset;
190 for (i = 0; i < cset->nmbchars; ++i) {
191 char buf[256];
192 mbstate_t state;
193 if (__wcrtomb(buf, cset->mbchars[i], &state) !=
194 (unsigned int)-1)
195 re_set_fastmap(fastmap, icase,
196 *(unsigned char *)buf);
197 if ((bufp->
198 syntax &
199 ((((((((((((((((((((((((unsigned long int)
200 1) << 1) << 1) << 1)
201 << 1) << 1) << 1) << 1)
202 << 1) << 1) << 1) << 1) <<
203 1) << 1) << 1) << 1) << 1) << 1)
204 << 1) << 1) << 1) << 1) << 1))
205 && dfa->mb_cur_max > 1) {
206 if (__wcrtomb
207 (buf, towlower(cset->mbchars[i]),
208 &state) != (unsigned int)-1)
209 re_set_fastmap(fastmap, 0,
210 *(unsigned char
211 *)buf);
212 }
213 }
214 }
215 }
216}
217static const bitset_t utf8_sb_map;
218static reg_errcode_t re_compile_internal(regex_t * preg, const char *pattern,
219 unsigned int length,
220 reg_syntax_t syntax)
221{
222 reg_errcode_t err = REG_NOERROR;
223 re_dfa_t *dfa;
224 preg->fastmap_accurate = 0;
225 preg->not_bol = preg->not_eol = 0;
226 err = analyze(preg);
227 if (dfa->is_utf8
228 && !(syntax &
229 ((((((((((((((((((((((((unsigned long int)1) << 1) << 1) << 1)
230 << 1) << 1) << 1) << 1) << 1) << 1) << 1) <<
231 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1)
232 << 1) << 1) << 1)) && preg->translate == 0)
233 optimize_utf8(dfa);
234 if (dfa->mb_cur_max == 6 && __extension__( {
235 unsigned int __s1_len,
236 __s2_len;
237 (__builtin_constant_p
238 (((*_nl_current_LC_CTYPE)->
239 values[((int)
240 (_NL_CTYPE_CODESET_NAME)
241 & 0xffff)].string))
242 &&
243 __builtin_constant_p("UTF-8")
244 && (__s1_len =
245 strlen(((*_nl_current_LC_CTYPE)->values[((int)(_NL_CTYPE_CODESET_NAME) & 0xffff)].string)), __s2_len = strlen("UTF-8"), (!((unsigned int)(const void *)((((*_nl_current_LC_CTYPE)->values[((int)(_NL_CTYPE_CODESET_NAME) & 0xffff)].string)) + 1) - (unsigned int)(const void *)(((*_nl_current_LC_CTYPE)->values[((int)(_NL_CTYPE_CODESET_NAME) & 0xffff)].string)) == 1) || __s1_len >= 4) && (!((unsigned int)(const void *)(("UTF-8") + 1) - (unsigned int)(const void *)("UTF-8") == 1) || __s2_len >= 4)) ? __builtin_strcmp(((*_nl_current_LC_CTYPE)->values[((int)(_NL_CTYPE_CODESET_NAME) & 0xffff)].string), "UTF-8") : (__builtin_constant_p(((*_nl_current_LC_CTYPE)->values[((int)(_NL_CTYPE_CODESET_NAME) & 0xffff)].string)) && ((unsigned int)(const void *)((((*_nl_current_LC_CTYPE)->values[((int)(_NL_CTYPE_CODESET_NAME) & 0xffff)].string)) + 1) - (unsigned int)(const void *)(((*_nl_current_LC_CTYPE)->values[((int)(_NL_CTYPE_CODESET_NAME) & 0xffff)].string)) == 1) && (__s1_len = strlen(((*_nl_current_LC_CTYPE)->values[((int)(_NL_CTYPE_CODESET_NAME) & 0xffff)].string)), __s1_len < 4) ? (__builtin_constant_p("UTF-8") && ((unsigned int)(const void *)(("UTF-8") + 1) - (unsigned int)(const void *)("UTF-8") == 1) ? __builtin_strcmp(((*_nl_current_LC_CTYPE)->values[((int)(_NL_CTYPE_CODESET_NAME) & 0xffff)].string), "UTF-8") : (__extension__( {
246 __const
247 unsigned
248 char
249 *__s2
250 =
251 (__const
252 unsigned
253 char
254 *)
255 (__const
256 char
257 *)
258 ("UTF-8");
259 register
260 int
261 __result
262 =
263 (((__const unsigned char *)(__const char *)(((*_nl_current_LC_CTYPE)->values[((int)(_NL_CTYPE_CODESET_NAME) & 0xffff)].string)))[0] - __s2[0]); __result;}
264 ))): (__builtin_constant_p("UTF-8") && ((unsigned int)(const void *)(("UTF-8") + 1) - (unsigned int)(const void *)("UTF-8") == 1) && (__s2_len = strlen("UTF-8"), __s2_len < 4) ? (__builtin_constant_p(((*_nl_current_LC_CTYPE)->values[((int)(_NL_CTYPE_CODESET_NAME) & 0xffff)].string)) && ((unsigned int)(const void *)((((*_nl_current_LC_CTYPE)->values[((int)(_NL_CTYPE_CODESET_NAME) & 0xffff)].string)) + 1) - (unsigned int)(const void *)(((*_nl_current_LC_CTYPE)->values[((int)(_NL_CTYPE_CODESET_NAME) & 0xffff)].string)) == 1) ? __builtin_strcmp(((*_nl_current_LC_CTYPE)->values[((int)(_NL_CTYPE_CODESET_NAME) & 0xffff)].string), "UTF-8") : (__extension__( {
265 __const
266 unsigned
267 char
268 *__s1
269 =
270 (__const
271 unsigned
272 char
273 *)
274 (__const
275 char
276 *)
277 (((*_nl_current_LC_CTYPE)->values[((int)(_NL_CTYPE_CODESET_NAME) & 0xffff)].string)); register int __result = __s1[0] - ((__const unsigned char *)(__const char *)("UTF-8"))[0]; if (__s2_len > 0 && __result == 0) {
278 }
279 __result;}
280 ))): __builtin_strcmp(((*_nl_current_LC_CTYPE)->values[((int)(_NL_CTYPE_CODESET_NAME) & 0xffff)].string), "UTF-8"))));}
281 ) == 0)
282 dfa->is_utf8 = 1;
283 dfa->map_notascii =
284 (((unsigned int)(*_nl_current_LC_CTYPE)->
285 values[((int)(_NL_CTYPE_MAP_TO_NONASCII) & 0xffff)].word) != 0);
286 if (dfa->mb_cur_max > 1) {
287 if (dfa->is_utf8)
288 dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
289 else {
290 int i, j, ch;
291 dfa->sb_char =
292 (re_bitset_ptr_t) calloc(sizeof(bitset_t), 1);
293 if (__builtin_expect(dfa->sb_char == 0, 0))
294 return REG_ESPACE;
295 for (i = 0, ch = 0;
296 i < (256 / (sizeof(unsigned long int) * 8)); ++i)
297 for (j = 0;
298 j < (sizeof(unsigned long int) * 8);
299 ++j, ++ch) {
300 unsigned int wch = __btowc(ch);
301 if (wch != (0xffffffffu))
302 dfa->sb_char[i] |=
303 (unsigned long int)1 << j;
304 }
305 }
306 }
307 int node, i, mb_chars = 0, has_period = 0;
308 for (node = 0; node < dfa->nodes_len; ++node)
309 switch (dfa->nodes[node].type) {
310 case CHARACTER:
311 if (dfa->nodes[node].opr.c >= 0x80)
312 mb_chars = 1;
313 case ANCHOR:
314 switch (dfa->nodes[node].opr.idx) {
315 }
316 case OP_PERIOD:
317 has_period = 1;
318 for (i = 0x80 / (sizeof(unsigned long int) * 8);
319 i < (256 / (sizeof(unsigned long int) * 8)); ++i)
320 if (dfa->nodes[node].opr.sbcset[i])
321 return;
322 }
323 dfa->has_mb_node = dfa->nbackref > 0 || has_period;
324}
325static reg_errcode_t analyze(regex_t * preg)
326{
327 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
328 if (__builtin_expect
329 (dfa->nexts == 0 || dfa->org_indices == 0 || dfa->edests == 0
330 || dfa->eclosures == 0, 0))
331 return REG_ESPACE;
332 if (dfa->subexp_map != 0) {
333 int i;
334 for (i = 0; i < preg->re_nsub; i++)
335 dfa->subexp_map[i] = i;
336 preorder(dfa->str_tree, optimize_subexps, dfa);
337 for (i = 0; i < preg->re_nsub; i++)
338 if (dfa->subexp_map[i] != i)
339 break;
340 }
341}