]> git.wh0rd.org - ICEs.git/blame - hppa-ICE-insn-constraints/regex.i.5
initial import
[ICEs.git] / hppa-ICE-insn-constraints / regex.i.5
CommitLineData
45516216 1struct locale_data {
2 unsigned int nstrings;
3 union locale_data_value {
4 const char *string;
5 unsigned int word;
6 } values[];
7};
8extern __thread struct locale_data *const *_nl_current_LC_CTYPE
9 __attribute__ ((tls_model("initial-exec")));
10typedef unsigned long int reg_syntax_t;
11extern reg_syntax_t re_syntax_options;
12typedef enum {
13 REG_ENOSYS = -1, REG_NOERROR =
14 0, REG_NOMATCH, REG_BADPAT, REG_ECOLLATE, REG_ECTYPE, REG_EESCAPE,
15 REG_ESUBREG, REG_EBRACK, REG_EPAREN, REG_EBRACE, REG_BADBR,
16 REG_ERANGE, REG_ESPACE, REG_BADRPT, REG_EEND, REG_ESIZE, REG_ERPAREN
17} reg_errcode_t;
18typedef struct {
19 unsigned char *buffer;
20 reg_syntax_t syntax;
21 char *fastmap;
22 unsigned char *translate;
23 unsigned int re_nsub;
24 unsigned fastmap_accurate:1;
25 unsigned not_bol:1;
26 unsigned not_eol:1;
27} regex_t;
28extern __thread void *__libc_tsd_CTYPE_TOLOWER
29 __attribute__ ((tls_model("initial-exec")));
30extern inline const int ** __attribute__ ((const))__ctype_tolower_loc(void)
31{
32 union {
33 void **ptr;
34 const int **tablep;
35 } u;
36 u.ptr = (&__libc_tsd_CTYPE_TOLOWER);
37 if (*u.tablep == 0)
38 *u.tablep =
39 ((int *)((*_nl_current_LC_CTYPE)->values[0].string) + 128);
40 return u.tablep;
41}
42extern __inline int tolower(int __c)
43{
44 return
45__c >= -128
46&&
47__c < 256 ? (*__ctype_tolower_loc())[__c] : __c;
48}
49typedef struct {
50} mbstate_t;
51typedef unsigned long int *bitset_t;
52typedef unsigned long int *re_bitset_ptr_t;
53typedef struct {
54 int nelem;
55 int *elems;
56} re_node_set;
57typedef enum {
58 NON_TYPE = 0, CHARACTER = 1, END_OF_RE = 2, SIMPLE_BRACKET =
59 3, OP_BACK_REF = 4, OP_PERIOD = 5, COMPLEX_BRACKET =
60 6, OP_UTF8_PERIOD = 7, OP_OPEN_SUBEXP = 8 | 0, OP_CLOSE_SUBEXP =
61 8 | 1, OP_ALT = 8 | 2, OP_DUP_ASTERISK = 8 | 3, ANCHOR =
62 8 | 4, CONCAT = 16, SUBEXP = 17, OP_DUP_PLUS =
63 18, OP_DUP_QUESTION, OP_OPEN_BRACKET, OP_CLOSE_BRACKET,
64 OP_CHARSET_RANGE, OP_OPEN_DUP_NUM, OP_CLOSE_DUP_NUM,
65 OP_NON_MATCH_LIST, OP_OPEN_COLL_ELEM, OP_CLOSE_COLL_ELEM,
66 OP_OPEN_EQUIV_CLASS, OP_CLOSE_EQUIV_CLASS, OP_OPEN_CHAR_CLASS,
67 OP_CLOSE_CHAR_CLASS, OP_WORD, OP_NOTWORD, OP_SPACE, OP_NOTSPACE,
68 BACK_SLASH
69} re_token_type_t;
70typedef struct {
71 long int *mbchars;
72 int nmbchars;
73} re_charset_t;
74typedef struct {
75 union {
76 unsigned char c;
77 re_bitset_ptr_t sbcset;
78 re_charset_t *mbcset;
79 int idx;
80 } opr;
81 re_token_type_t type:8;
82} re_token_t;
83typedef struct re_dfa_t re_dfa_t;
84typedef struct bin_tree_t bin_tree_t;
85struct re_dfastate_t {
86 re_node_set nodes;
87};
88typedef struct re_dfastate_t re_dfastate_t;
89struct re_dfa_t {
90 re_token_t *nodes;
91 unsigned int nodes_len;
92 int *nexts;
93 int *org_indices;
94 re_node_set *edests;
95 re_node_set *eclosures;
96 re_dfastate_t *init_state;
97 bin_tree_t *str_tree;
98 re_bitset_ptr_t sb_char;
99 int nbackref;
100 unsigned int has_mb_node:1;
101 unsigned int is_utf8:1;
102 unsigned int map_notascii:1;
103 int mb_cur_max;
104 int *subexp_map;
105};
106static reg_errcode_t re_compile_internal(regex_t * preg, const char *pattern,
107 unsigned int length,
108 reg_syntax_t syntax);
109static void re_compile_fastmap_iter(regex_t * bufp,
110 const re_dfastate_t * init_state,
111 char *fastmap);
112static reg_errcode_t analyze(regex_t * preg);
113static reg_errcode_t optimize_subexps(void *extra, bin_tree_t * node);
114const char *__re_compile_pattern(pattern, length, bufp)
115const char *pattern;
116{
117 reg_errcode_t ret;
118 ret = re_compile_internal(bufp, pattern, length, re_syntax_options);
119}
120
121int __re_compile_fastmap(bufp)
122regex_t *bufp;
123{
124 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
125 char *fastmap = bufp->fastmap;
126 re_compile_fastmap_iter(bufp, dfa->init_state, fastmap);
127}
128static inline void
129 __attribute__ ((always_inline)) re_set_fastmap(char *fastmap, int icase,
130 int ch)
131{
132 if (icase) {
133 int __res;
134 if (sizeof(ch) > 1) {
135 if (ch) {
136 } else
137 __res = tolower(ch);
138 }
139 fastmap[__res] = 1;
140 }
141}
142static void re_compile_fastmap_iter(regex_t * bufp,
143 const re_dfastate_t * init_state,
144 char *fastmap)
145{
146 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
147 int node_cnt;
148 int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & 0x40000));
149 for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) {
150 int node = init_state->nodes.elems[node_cnt];
151 re_token_type_t type = dfa->nodes[node].type;
152 if (type == CHARACTER) {
153 if ((bufp->syntax & 0x40000) && dfa->mb_cur_max > 1) {
154 unsigned char *buf
155 =
156 alloca(dfa->mb_cur_max);
157 }
158 int i;
159 int ch;
160 for (;
161 i < (256 / (sizeof(unsigned long int) * 8)); ) {
162 int j;
163 unsigned long int w =
164 dfa->nodes[node].opr.sbcset[i];
165 for (; j < (sizeof(unsigned long int) * 8);)
166 if (w & ((unsigned long int)1 << j))
167 re_set_fastmap(fastmap, icase,
168 ch);
169 }
170 } else if (type == COMPLEX_BRACKET) {
171 int i;
172 re_charset_t *cset = dfa->nodes[node].opr.mbcset;
173 for (i = 0; i < cset->nmbchars; ++i) {
174 char buf[256];
175 mbstate_t state;
176 if (__wcrtomb(buf, cset->mbchars[i], &state) !=
177 (unsigned int)-1)
178 re_set_fastmap(fastmap, icase,
179 *(unsigned char *)buf);
180 if ((bufp->syntax & 0x4000)
181 && dfa->mb_cur_max > 1) {
182 if (__wcrtomb
183 (buf, towlower(cset->mbchars[i]),
184 &state) != (unsigned int)-1)
185 re_set_fastmap(fastmap, 0,
186 *(unsigned char
187 *)buf);
188 }
189 }
190 }
191 }
192}
193static const bitset_t utf8_sb_map;
194static reg_errcode_t re_compile_internal(regex_t * preg, const char *pattern,
195 unsigned int length,
196 reg_syntax_t syntax)
197{
198 reg_errcode_t err;
199 re_dfa_t *dfa;
200 preg->fastmap_accurate = 0;
201 preg->not_bol = preg->not_eol = 0;
202 err = analyze(preg);
203 if (dfa->is_utf8 && !(syntax & 0x400000) && preg->translate == 0)
204 optimize_utf8(dfa);
205 if (dfa->mb_cur_max == 6 && __extension__( {
206 unsigned int __s1_len;
207 unsigned int __s2_len;
208 (
209(__s1_len =
210 strlen(((*_nl_current_LC_CTYPE)->values[0].string)), __s2_len = strlen("UTF-8"), (!((unsigned int)(const void *)((((*_nl_current_LC_CTYPE)->values[0].string)) + 1) - (unsigned int)(const void *)(((*_nl_current_LC_CTYPE)->values[0].string)) == 1) || __s1_len >= 4) && (!((unsigned int)(const void *)(("UTF-8") + 1) - (unsigned int)(const void *)("UTF-8") == 1) || __s2_len >= 4)) ? strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8") : (
211((unsigned int)(const void *)((((*_nl_current_LC_CTYPE)->values[0].string)) + 1) - (unsigned int)(const void *)(((*_nl_current_LC_CTYPE)->values[0].string)) == 1) && (__s1_len = strlen(((*_nl_current_LC_CTYPE)->values[0].string)), __s1_len < 4) ? (
212((unsigned int)(const void *)(("UTF-8") + 1) - (unsigned int)(const void *)("UTF-8") == 1) ?
213strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8") : (__extension__( {
214 __const
215 unsigned
216 char
217 *__s2
218 =
219 (__const
220 unsigned
221 char
222 *)
223 (__const
224 char
225 *)
226 ("UTF-8");
227 register
228 int
229 __result
230 =
231 (((__const unsigned char *)(__const char *)(((*_nl_current_LC_CTYPE)->values[0].string)))[0] - __s2[0]); __result;}
232 ))): (
233 ((unsigned int)(const void *)(("UTF-8") + 1) - (unsigned int)(const void *)("UTF-8") == 1) && (__s2_len = strlen("UTF-8"), __s2_len < 4) ? (
234((unsigned int)(const void *)((((*_nl_current_LC_CTYPE)->values[0].string)) + 1) - (unsigned int)(const void *)(((*_nl_current_LC_CTYPE)->values[0].string)) == 1) ?
235strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8") : (__extension__( {
236 __const
237 unsigned
238 char
239 *__s1
240 =
241 (__const
242 unsigned
243 char
244 *)
245 (__const
246 char
247 *)
248 (((*_nl_current_LC_CTYPE)->values[0].string)); register int __result = __s1[0] - ((__const unsigned char *)(__const char *)("UTF-8"))[0]; __result;}
249 ))): strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8"))));}
250 ) == 0)
251 dfa->is_utf8 = 1;
252 dfa->map_notascii =
253 (((unsigned int)(*_nl_current_LC_CTYPE)->values[0].word) != 0);
254 if (dfa->mb_cur_max > 1) {
255 if (dfa->is_utf8)
256 dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
257 else {
258 int i;
259 int j;
260 int ch;
261 dfa->sb_char =
262 (re_bitset_ptr_t) calloc(sizeof(bitset_t), 1);
263 for (i = 0, ch = 0;
264 i < (256 / (sizeof(unsigned long int) * 8)); ++i)
265 for (j = 0; j < (sizeof(unsigned long int) * 8);
266 ++j, ++ch) {
267 unsigned int wch = __btowc(ch);
268 if (wch != (0xffffffffu))
269 dfa->sb_char[i] |=
270 (unsigned long int)1 << j;
271 }
272 }
273 }
274 int node;
275 int i;
276 int mb_chars = 0;
277 int has_period = 0;
278 for (node = 0; node < dfa->nodes_len; ++node)
279 switch (dfa->nodes[node].type) {
280 case CHARACTER:
281 if (dfa->nodes[node].opr.c >= 0x80)
282 mb_chars = 1;
283 case ANCHOR:
284 switch (dfa->nodes[node].opr.idx) {
285 }
286 case OP_PERIOD:
287 has_period = 1;
288 for (i = 0x80 / (sizeof(unsigned long int) * 8);
289 i < (256 / (sizeof(unsigned long int) * 8)); ++i)
290 if (dfa->nodes[node].opr.sbcset[i])
291 return;
292 }
293 dfa->has_mb_node = dfa->nbackref > 0 || has_period;
294}
295static reg_errcode_t analyze(regex_t * preg)
296{
297 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
298 if (
299 (dfa->nexts == 0 || dfa->org_indices == 0 || dfa->edests == 0
300 || dfa->eclosures == 0))
301 return REG_ESPACE;
302 if (dfa->subexp_map != 0) {
303 int i;
304 for (i = 0; i < preg->re_nsub; i++)
305 dfa->subexp_map[i] = i;
306 preorder(dfa->str_tree, optimize_subexps, dfa);
307 for (i = 0; i < preg->re_nsub; i++)
308 if (dfa->subexp_map[i] != i)
309 break;
310 }
311}