]> git.wh0rd.org Git - ICEs.git/blob - hppa-ICE-insn-constraints/regex.i.7
initial import
[ICEs.git] / hppa-ICE-insn-constraints / regex.i.7
1 struct locale_data {
2         unsigned int nstrings;
3         union locale_data_value {
4                 char *string;
5                 unsigned int word;
6         } values[];
7 };
8 extern __thread struct locale_data **_nl_current_LC_CTYPE
9     __attribute__ ((tls_model("initial-exec")));
10 typedef unsigned long int reg_syntax_t;
11 extern reg_syntax_t re_syntax_options;
12 typedef enum {
13         REG_ENOSYS = -1, REG_NOERROR =
14             0, REG_NOMATCH, REG_BADPAT, REG_ECOLLATE, REG_ECTYPE, REG_EESCAPE,
15             REG_ESUBREG, REG_EBRACK, REG_EPAREN, REG_EBRACE, REG_BADBR,
16             REG_ERANGE, REG_ESPACE, REG_BADRPT, REG_EEND, REG_ESIZE, REG_ERPAREN
17 } reg_errcode_t;
18 typedef struct {
19         unsigned char *buffer;
20         reg_syntax_t syntax;
21         char *fastmap;
22         unsigned char *translate;
23         unsigned int re_nsub;
24         unsigned fastmap_accurate:1;
25         unsigned not_bol:1;
26         unsigned not_eol:1;
27 } regex_t;
28 extern __thread void *__libc_tsd_CTYPE_TOLOWER;
29 inline int **__ctype_tolower_loc(void)
30 {
31         union {
32                 void **ptr;
33                 int **tablep;
34         } u;
35         u.ptr = (&__libc_tsd_CTYPE_TOLOWER);
36         if (*u.tablep == 0)
37                 *u.tablep =
38                     ((int *)((*_nl_current_LC_CTYPE)->values[0].string) + 128);
39         return u.tablep;
40 }
41 inline int tolower(int __c)
42 {
43         return *__ctype_tolower_loc()[__c];
44 }
45 typedef struct {
46 } mbstate_t;
47 typedef unsigned long int *bitset_t;
48 typedef unsigned long int *re_bitset_ptr_t;
49 typedef struct {
50         int nelem;
51         int *elems;
52 } re_node_set;
53 typedef enum {
54         NON_TYPE = 0, CHARACTER = 1, END_OF_RE = 2, SIMPLE_BRACKET =
55             3, OP_BACK_REF = 4, OP_PERIOD = 5, COMPLEX_BRACKET =
56             6, OP_UTF8_PERIOD = 7, OP_OPEN_SUBEXP = 8 | 0, OP_CLOSE_SUBEXP =
57             8 | 1, OP_ALT = 8 | 2, OP_DUP_ASTERISK = 8 | 3, ANCHOR =
58             8 | 4, CONCAT = 16, SUBEXP = 17, OP_DUP_PLUS =
59             18, OP_DUP_QUESTION, OP_OPEN_BRACKET, OP_CLOSE_BRACKET,
60             OP_CHARSET_RANGE, OP_OPEN_DUP_NUM, OP_CLOSE_DUP_NUM,
61             OP_NON_MATCH_LIST, OP_OPEN_COLL_ELEM, OP_CLOSE_COLL_ELEM,
62             OP_OPEN_EQUIV_CLASS, OP_CLOSE_EQUIV_CLASS, OP_OPEN_CHAR_CLASS,
63             OP_CLOSE_CHAR_CLASS, OP_WORD, OP_NOTWORD, OP_SPACE, OP_NOTSPACE,
64             BACK_SLASH
65 } re_token_type_t;
66 typedef struct {
67         long int *mbchars;
68         int nmbchars;
69 } re_charset_t;
70 typedef struct {
71         union {
72                 unsigned char c;
73                 re_bitset_ptr_t sbcset;
74                 re_charset_t *mbcset;
75         } opr;
76         re_token_type_t type:8;
77 } re_token_t;
78 typedef struct re_dfa_t re_dfa_t;
79 typedef struct bin_tree_t bin_tree_t;
80 struct re_dfastate_t {
81         re_node_set nodes;
82 };
83 typedef struct re_dfastate_t re_dfastate_t;
84 struct re_dfa_t {
85         re_token_t *nodes;
86         unsigned int nodes_len;
87         int *nexts;
88         int *org_indices;
89         re_node_set *edests;
90         re_node_set *eclosures;
91         re_dfastate_t *init_state;
92         bin_tree_t *str_tree;
93         re_bitset_ptr_t sb_char;
94         int nbackref;
95         unsigned int has_mb_node:1;
96         unsigned int is_utf8:1;
97         unsigned int map_notascii:1;
98         int mb_cur_max;
99         int *subexp_map;
100 };
101 static reg_errcode_t re_compile_internal(regex_t * preg, const char *pattern,
102                                          unsigned int length,
103                                          reg_syntax_t syntax);
104 static void re_compile_fastmap_iter(regex_t * bufp,
105                                     const re_dfastate_t * init_state,
106                                     char *fastmap);
107 static reg_errcode_t analyze(regex_t * preg);
108 static reg_errcode_t optimize_subexps(void *extra, bin_tree_t * node);
109 const char *__re_compile_pattern(pattern, length, bufp)
110 const char *pattern;
111 {
112         reg_errcode_t ret;
113         ret = re_compile_internal(bufp, pattern, length, re_syntax_options);
114 }
115
116 int __re_compile_fastmap(regex_t * bufp)
117 {
118         re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
119         char *fastmap = bufp->fastmap;
120         re_compile_fastmap_iter(bufp, dfa->init_state, fastmap);
121 }
122 static inline void re_set_fastmap(char *fastmap, int icase, int ch)
123 {
124         if (icase) {
125                 int __res;
126                 if (sizeof(ch) > 1) {
127                         if (ch) {
128                         } else
129                                 __res = tolower(ch);
130                 }
131                 fastmap[__res] = 1;
132         }
133 }
134 static void re_compile_fastmap_iter(regex_t * bufp,
135                                     const re_dfastate_t * init_state,
136                                     char *fastmap)
137 {
138         re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
139         int node_cnt;
140         int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & 0x40000));
141         for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) {
142                 int node = init_state->nodes.elems[node_cnt];
143                 re_token_type_t type = dfa->nodes[node].type;
144                 if (type == CHARACTER) {
145                         if ((bufp->syntax & 0x40000) && dfa->mb_cur_max > 1) {
146 void *buf = alloca(dfa->mb_cur_max);
147                         }
148                         int i;
149                         int ch;
150                         for (; i < (256 / (sizeof(unsigned long int) * 8));) {
151                                 int j;
152                                 unsigned long int w =
153                                     dfa->nodes[node].opr.sbcset[i];
154                                 for (; j < 8;)
155                                         if (w & 1)
156                                                 re_set_fastmap(fastmap, icase,
157                                                                ch);
158                         }
159                 } else if (type == COMPLEX_BRACKET) {
160                         int i;
161                         re_charset_t *cset = dfa->nodes[node].opr.mbcset;
162                         for (
163 ; i < cset->nmbchars;
164 ) {
165                                 char buf[256];
166                                 mbstate_t state;
167                                 if (__wcrtomb(buf, cset->mbchars[i], &state) !=
168                                     -1)
169                                         re_set_fastmap(fastmap, icase,
170                                                        *
171 buf);
172                                 if ((bufp->syntax & 0x4000)
173                                     && dfa->mb_cur_max > 1) {
174                                         if (__wcrtomb
175                                             (buf, towlower(cset->mbchars[i]),
176                                              &state) != -1)
177                                                 re_set_fastmap(fastmap, 0,
178 *buf);
179                                 }
180                         }
181                 }
182         }
183 }
184 static bitset_t utf8_sb_map;
185 static reg_errcode_t re_compile_internal(regex_t * preg, const char *pattern,
186                                          unsigned int length,
187                                          reg_syntax_t syntax)
188 {
189         reg_errcode_t err;
190         re_dfa_t *dfa;
191         preg->fastmap_accurate = 0;
192         preg->not_bol = preg->not_eol = 0;
193         err = analyze(preg);
194         if (dfa->is_utf8 && !(syntax & 0x400000) && preg->translate == 0)
195                 optimize_utf8(dfa);
196         if (dfa->mb_cur_max == 6 && __extension__( {
197                                                   unsigned int __s1_len = strlen(((*_nl_current_LC_CTYPE)->values[0].string));
198                                                   unsigned int __s2_len= strlen("UTF-8");
199                                                   ((
200 (!((int)((((*_nl_current_LC_CTYPE)->values[0].string)) + 1) - (int)(((*_nl_current_LC_CTYPE)->values[0].string)) == 1) || __s1_len >= 4) && (!((int)(("UTF-8") + 1) - (int)("UTF-8") == 1) || __s2_len >= 4)) ? strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8") : (((int)((((*_nl_current_LC_CTYPE)->values[0].string)) + 1) - (int)(((*_nl_current_LC_CTYPE)->values[0].string)) == 1) && (__s1_len = strlen(((*_nl_current_LC_CTYPE)->values[0].string)),
201  __s1_len < 4) ? (((int)(("UTF-8") + 1) - (int)("UTF-8") == 1) ? strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8") : (__extension__( {
202                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             __const
203                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             unsigned
204                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             char
205                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             *__s2
206                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             =
207                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             (__const
208                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              unsigned
209                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              char
210                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              *)
211                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             (__const
212                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              char
213                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              *)
214                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             ("UTF-8");
215                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             register
216                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             int
217                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             __result
218                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             =
219                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             (((__const unsigned char *)(__const char *)(((*_nl_current_LC_CTYPE)->values[0].string)))[0] - __s2[0]); __result;}
220               ))):                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            (((int)(("UTF-8") + 1) - (int)("UTF-8") == 1) && (__s2_len = strlen("UTF-8"), __s2_len < 4) ? (((int)((((*_nl_current_LC_CTYPE)->values[0].string)) + 1) - (int)(((*_nl_current_LC_CTYPE)->values[0].string)) == 1) ? strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8") : (__extension__( {
221                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   __const
222                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   unsigned
223                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   char
224                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   *__s1
225                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   =
226                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   (__const
227                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    unsigned
228                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    char
229                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    *)
230                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   (__const
231                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    char
232                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    *)
233                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   (((*_nl_current_LC_CTYPE)->values[0].string)); register int __result = __s1[0] - ((__const unsigned char *)(__const char *)("UTF-8"))[0]; __result;}
234       ))):                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8"))));}
235             ) == 0)
236                 dfa->is_utf8 = 1;
237         dfa->map_notascii =
238             
239 (*_nl_current_LC_CTYPE)->values[0].word != 0;
240         if (dfa->mb_cur_max > 1) {
241                 if (dfa->is_utf8)
242                         dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
243                 else {
244                         int i;
245                         int j;
246                         int ch;
247                         dfa->sb_char =
248                             (re_bitset_ptr_t) calloc(sizeof(bitset_t), 1);
249                         for (
250 ;
251                              i < (256 / (sizeof(unsigned long int) * 8));)
252                                 for (
253 ; j < 8;
254                                      ++j, ++ch
255 ) {
256                                         unsigned int wch = __btowc(ch);
257                                         if (wch != (0xffffffffu))
258                                                 dfa->sb_char[i] |= 1 << j;
259                                 }
260                 }
261         }
262         int node;
263         int i;
264         int mb_chars;
265         int has_period;
266         for (; node < dfa->nodes_len;)
267                 switch (dfa->nodes[node].type) {
268                 case CHARACTER:
269                         if (dfa->nodes[node].opr.c >= 0x80)
270                                 mb_chars = 1;
271                         for (;
272                              i < (256 / (sizeof(unsigned long int) * 8));)
273                                 if (dfa->nodes[node].opr.sbcset[i])
274                                         return;
275                 }
276         dfa->has_mb_node = dfa->nbackref > 0 || has_period;
277 }
278 static reg_errcode_t analyze(regex_t * preg)
279 {
280         re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
281         if (dfa->nexts == 0 || dfa->org_indices == 0 || dfa->edests == 0
282              || dfa->eclosures == 0)
283                 return REG_ESPACE;
284         if (dfa->subexp_map != 0) {
285                 int i;
286                 for (; i < preg->re_nsub;)
287                         dfa->subexp_map[i] = i;
288                 preorder(dfa->str_tree, optimize_subexps, dfa);
289                 for (
290 ; i < preg->re_nsub; i++)
291                         if (dfa->subexp_map[i] != i)
292                                 break;
293         }
294 }