]> git.wh0rd.org Git - ICEs.git/blob - hppa-ICE-insn-constraints/regex.i.5
initial import
[ICEs.git] / hppa-ICE-insn-constraints / regex.i.5
1 struct locale_data {
2         unsigned int nstrings;
3         union locale_data_value {
4                 const char *string;
5                 unsigned int word;
6         } values[];
7 };
8 extern __thread struct locale_data *const *_nl_current_LC_CTYPE
9     __attribute__ ((tls_model("initial-exec")));
10 typedef unsigned long int reg_syntax_t;
11 extern reg_syntax_t re_syntax_options;
12 typedef enum {
13         REG_ENOSYS = -1, REG_NOERROR =
14             0, REG_NOMATCH, REG_BADPAT, REG_ECOLLATE, REG_ECTYPE, REG_EESCAPE,
15             REG_ESUBREG, REG_EBRACK, REG_EPAREN, REG_EBRACE, REG_BADBR,
16             REG_ERANGE, REG_ESPACE, REG_BADRPT, REG_EEND, REG_ESIZE, REG_ERPAREN
17 } reg_errcode_t;
18 typedef struct {
19         unsigned char *buffer;
20         reg_syntax_t syntax;
21         char *fastmap;
22         unsigned char *translate;
23         unsigned int re_nsub;
24         unsigned fastmap_accurate:1;
25         unsigned not_bol:1;
26         unsigned not_eol:1;
27 } regex_t;
28 extern __thread void *__libc_tsd_CTYPE_TOLOWER
29     __attribute__ ((tls_model("initial-exec")));
30 extern inline const int ** __attribute__ ((const))__ctype_tolower_loc(void)
31 {
32         union {
33                 void **ptr;
34                 const int **tablep;
35         } u;
36         u.ptr = (&__libc_tsd_CTYPE_TOLOWER);
37         if (*u.tablep == 0)
38                 *u.tablep =
39                     ((int *)((*_nl_current_LC_CTYPE)->values[0].string) + 128);
40         return u.tablep;
41 }
42 extern __inline int tolower(int __c)
43 {
44         return 
45 __c >= -128 
46 && 
47 __c < 256 ? (*__ctype_tolower_loc())[__c] : __c;
48 }
49 typedef struct {
50 } mbstate_t;
51 typedef unsigned long int *bitset_t;
52 typedef unsigned long int *re_bitset_ptr_t;
53 typedef struct {
54         int nelem;
55         int *elems;
56 } re_node_set;
57 typedef enum {
58         NON_TYPE = 0, CHARACTER = 1, END_OF_RE = 2, SIMPLE_BRACKET =
59             3, OP_BACK_REF = 4, OP_PERIOD = 5, COMPLEX_BRACKET =
60             6, OP_UTF8_PERIOD = 7, OP_OPEN_SUBEXP = 8 | 0, OP_CLOSE_SUBEXP =
61             8 | 1, OP_ALT = 8 | 2, OP_DUP_ASTERISK = 8 | 3, ANCHOR =
62             8 | 4, CONCAT = 16, SUBEXP = 17, OP_DUP_PLUS =
63             18, OP_DUP_QUESTION, OP_OPEN_BRACKET, OP_CLOSE_BRACKET,
64             OP_CHARSET_RANGE, OP_OPEN_DUP_NUM, OP_CLOSE_DUP_NUM,
65             OP_NON_MATCH_LIST, OP_OPEN_COLL_ELEM, OP_CLOSE_COLL_ELEM,
66             OP_OPEN_EQUIV_CLASS, OP_CLOSE_EQUIV_CLASS, OP_OPEN_CHAR_CLASS,
67             OP_CLOSE_CHAR_CLASS, OP_WORD, OP_NOTWORD, OP_SPACE, OP_NOTSPACE,
68             BACK_SLASH
69 } re_token_type_t;
70 typedef struct {
71         long int *mbchars;
72         int nmbchars;
73 } re_charset_t;
74 typedef struct {
75         union {
76                 unsigned char c;
77                 re_bitset_ptr_t sbcset;
78                 re_charset_t *mbcset;
79                 int idx;
80         } opr;
81         re_token_type_t type:8;
82 } re_token_t;
83 typedef struct re_dfa_t re_dfa_t;
84 typedef struct bin_tree_t bin_tree_t;
85 struct re_dfastate_t {
86         re_node_set nodes;
87 };
88 typedef struct re_dfastate_t re_dfastate_t;
89 struct re_dfa_t {
90         re_token_t *nodes;
91         unsigned int nodes_len;
92         int *nexts;
93         int *org_indices;
94         re_node_set *edests;
95         re_node_set *eclosures;
96         re_dfastate_t *init_state;
97         bin_tree_t *str_tree;
98         re_bitset_ptr_t sb_char;
99         int nbackref;
100         unsigned int has_mb_node:1;
101         unsigned int is_utf8:1;
102         unsigned int map_notascii:1;
103         int mb_cur_max;
104         int *subexp_map;
105 };
106 static reg_errcode_t re_compile_internal(regex_t * preg, const char *pattern,
107                                          unsigned int length,
108                                          reg_syntax_t syntax);
109 static void re_compile_fastmap_iter(regex_t * bufp,
110                                     const re_dfastate_t * init_state,
111                                     char *fastmap);
112 static reg_errcode_t analyze(regex_t * preg);
113 static reg_errcode_t optimize_subexps(void *extra, bin_tree_t * node);
114 const char *__re_compile_pattern(pattern, length, bufp)
115 const char *pattern;
116 {
117         reg_errcode_t ret;
118         ret = re_compile_internal(bufp, pattern, length, re_syntax_options);
119 }
120
121 int __re_compile_fastmap(bufp)
122 regex_t *bufp;
123 {
124         re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
125         char *fastmap = bufp->fastmap;
126         re_compile_fastmap_iter(bufp, dfa->init_state, fastmap);
127 }
128 static inline void
129     __attribute__ ((always_inline)) re_set_fastmap(char *fastmap, int icase,
130                                                    int ch)
131 {
132         if (icase) {
133                 int __res;
134                 if (sizeof(ch) > 1) {
135                         if (ch) {
136                         } else
137                                 __res = tolower(ch);
138                 }
139                 fastmap[__res] = 1;
140         }
141 }
142 static void re_compile_fastmap_iter(regex_t * bufp,
143                                     const re_dfastate_t * init_state,
144                                     char *fastmap)
145 {
146         re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
147         int node_cnt;
148         int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & 0x40000));
149         for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) {
150                 int node = init_state->nodes.elems[node_cnt];
151                 re_token_type_t type = dfa->nodes[node].type;
152                 if (type == CHARACTER) {
153                         if ((bufp->syntax & 0x40000) && dfa->mb_cur_max > 1) {
154                                 unsigned char *buf
155  =
156                                     alloca(dfa->mb_cur_max);
157                                 }
158                         int i;
159                         int ch;
160                         for (;
161                              i < (256 / (sizeof(unsigned long int) * 8)); ) {
162                                 int j;
163                                 unsigned long int w =
164                                     dfa->nodes[node].opr.sbcset[i];
165                                 for (; j < (sizeof(unsigned long int) * 8);)
166                                         if (w & ((unsigned long int)1 << j))
167                                                 re_set_fastmap(fastmap, icase,
168                                                                ch);
169                         }
170                 } else if (type == COMPLEX_BRACKET) {
171                         int i;
172                         re_charset_t *cset = dfa->nodes[node].opr.mbcset;
173                         for (i = 0; i < cset->nmbchars; ++i) {
174                                 char buf[256];
175                                 mbstate_t state;
176                                 if (__wcrtomb(buf, cset->mbchars[i], &state) !=
177                                     (unsigned int)-1)
178                                         re_set_fastmap(fastmap, icase,
179                                                        *(unsigned char *)buf);
180                                 if ((bufp->syntax & 0x4000)
181                                     && dfa->mb_cur_max > 1) {
182                                         if (__wcrtomb
183                                             (buf, towlower(cset->mbchars[i]),
184                                              &state) != (unsigned int)-1)
185                                                 re_set_fastmap(fastmap, 0,
186                                                                *(unsigned char
187                                                                  *)buf);
188                                 }
189                         }
190                 }
191         }
192 }
193 static const bitset_t utf8_sb_map;
194 static reg_errcode_t re_compile_internal(regex_t * preg, const char *pattern,
195                                          unsigned int length,
196                                          reg_syntax_t syntax)
197 {
198         reg_errcode_t err;
199         re_dfa_t *dfa;
200         preg->fastmap_accurate = 0;
201         preg->not_bol = preg->not_eol = 0;
202         err = analyze(preg);
203         if (dfa->is_utf8 && !(syntax & 0x400000) && preg->translate == 0)
204                 optimize_utf8(dfa);
205         if (dfa->mb_cur_max == 6 && __extension__( {
206                                                   unsigned int __s1_len;
207                                                   unsigned int __s2_len;
208                                                   (
209 (__s1_len =
210                                                        strlen(((*_nl_current_LC_CTYPE)->values[0].string)), __s2_len = strlen("UTF-8"), (!((unsigned int)(const void *)((((*_nl_current_LC_CTYPE)->values[0].string)) + 1) - (unsigned int)(const void *)(((*_nl_current_LC_CTYPE)->values[0].string)) == 1) || __s1_len >= 4) && (!((unsigned int)(const void *)(("UTF-8") + 1) - (unsigned int)(const void *)("UTF-8") == 1) || __s2_len >= 4)) ? strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8") : (
211 ((unsigned int)(const void *)((((*_nl_current_LC_CTYPE)->values[0].string)) + 1) - (unsigned int)(const void *)(((*_nl_current_LC_CTYPE)->values[0].string)) == 1) && (__s1_len = strlen(((*_nl_current_LC_CTYPE)->values[0].string)), __s1_len < 4) ? (
212 ((unsigned int)(const void *)(("UTF-8") + 1) - (unsigned int)(const void *)("UTF-8") == 1) ? 
213 strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8") : (__extension__( {
214                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  __const
215                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  unsigned
216                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  char
217                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  *__s2
218                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  =
219                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  (__const
220                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   unsigned
221                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   char
222                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   *)
223                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  (__const
224                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   char
225                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   *)
226                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  ("UTF-8");
227                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  register
228                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  int
229                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  __result
230                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  =
231                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  (((__const unsigned char *)(__const char *)(((*_nl_current_LC_CTYPE)->values[0].string)))[0] - __s2[0]); __result;}
232               ))):                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 (
233  ((unsigned int)(const void *)(("UTF-8") + 1) - (unsigned int)(const void *)("UTF-8") == 1) && (__s2_len = strlen("UTF-8"), __s2_len < 4) ? (
234 ((unsigned int)(const void *)((((*_nl_current_LC_CTYPE)->values[0].string)) + 1) - (unsigned int)(const void *)(((*_nl_current_LC_CTYPE)->values[0].string)) == 1) ? 
235 strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8") : (__extension__( {
236                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    __const
237                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    unsigned
238                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    char
239                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    *__s1
240                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    =
241                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    (__const
242                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     unsigned
243                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     char
244                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     *)
245                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    (__const
246                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     char
247                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     *)
248                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    (((*_nl_current_LC_CTYPE)->values[0].string)); register int __result = __s1[0] - ((__const unsigned char *)(__const char *)("UTF-8"))[0]; __result;}
249       ))):                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8"))));}
250             ) == 0)
251                 dfa->is_utf8 = 1;
252         dfa->map_notascii =
253             (((unsigned int)(*_nl_current_LC_CTYPE)->values[0].word) != 0);
254         if (dfa->mb_cur_max > 1) {
255                 if (dfa->is_utf8)
256                         dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
257                 else {
258                         int i;
259                         int j;
260                         int ch;
261                         dfa->sb_char =
262                             (re_bitset_ptr_t) calloc(sizeof(bitset_t), 1);
263                         for (i = 0, ch = 0;
264                              i < (256 / (sizeof(unsigned long int) * 8)); ++i)
265                                 for (j = 0; j < (sizeof(unsigned long int) * 8);
266                                      ++j, ++ch) {
267                                         unsigned int wch = __btowc(ch);
268                                         if (wch != (0xffffffffu))
269                                                 dfa->sb_char[i] |=
270                                                     (unsigned long int)1 << j;
271                                 }
272                 }
273         }
274         int node;
275         int i;
276         int mb_chars = 0;
277         int has_period = 0;
278         for (node = 0; node < dfa->nodes_len; ++node)
279                 switch (dfa->nodes[node].type) {
280                 case CHARACTER:
281                         if (dfa->nodes[node].opr.c >= 0x80)
282                                 mb_chars = 1;
283                 case ANCHOR:
284                         switch (dfa->nodes[node].opr.idx) {
285                         }
286                 case OP_PERIOD:
287                         has_period = 1;
288                         for (i = 0x80 / (sizeof(unsigned long int) * 8);
289                              i < (256 / (sizeof(unsigned long int) * 8)); ++i)
290                                 if (dfa->nodes[node].opr.sbcset[i])
291                                         return;
292                 }
293         dfa->has_mb_node = dfa->nbackref > 0 || has_period;
294 }
295 static reg_errcode_t analyze(regex_t * preg)
296 {
297         re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
298         if (
299             (dfa->nexts == 0 || dfa->org_indices == 0 || dfa->edests == 0
300              || dfa->eclosures == 0))
301                 return REG_ESPACE;
302         if (dfa->subexp_map != 0) {
303                 int i;
304                 for (i = 0; i < preg->re_nsub; i++)
305                         dfa->subexp_map[i] = i;
306                 preorder(dfa->str_tree, optimize_subexps, dfa);
307                 for (i = 0; i < preg->re_nsub; i++)
308                         if (dfa->subexp_map[i] != i)
309                                 break;
310         }
311 }