]> git.wh0rd.org Git - ICEs.git/blob - hppa-ICE-insn-constraints/regex.i.11
initial import
[ICEs.git] / hppa-ICE-insn-constraints / regex.i.11
1 struct locale_data {
2         unsigned int nstrings;
3         union locale_data_value {
4                 char *string;
5                 unsigned int word;
6         } values[];
7 };
8 extern __thread struct locale_data **_nl_current_LC_CTYPE;
9 typedef unsigned long int reg_syntax_t;
10 extern reg_syntax_t re_syntax_options;
11 typedef enum {
12         REG_ENOSYS
13 } reg_errcode_t;
14 typedef struct {
15         unsigned char *buffer;
16         reg_syntax_t syntax;
17         unsigned char *translate;
18         unsigned int re_nsub;
19 } regex_t;
20 extern __thread void *__libc_tsd_CTYPE_TOLOWER;
21 inline int **__ctype_tolower_loc(void)
22 {
23         union {
24                 void **ptr;
25                 int **tablep;
26         } u;
27         u.ptr = (&__libc_tsd_CTYPE_TOLOWER);
28         if (*u.tablep == 0)
29                 *u.tablep = (*_nl_current_LC_CTYPE)->values[0].string;
30         return u.tablep;
31 }
32 inline int tolower(int __c)
33 {
34         return *__ctype_tolower_loc()[__c];
35 }
36 typedef struct {
37 } mbstate_t;
38 typedef struct {
39         int nelem;
40         int *elems;
41 } re_node_set;
42 typedef enum {
43         CHARACTER = 1, COMPLEX_BRACKET = 6
44 } re_token_type_t;
45 typedef struct {
46         long int *mbchars;
47         int nmbchars;
48 } re_charset_t;
49 typedef struct {
50         union {
51                 unsigned char c;
52                 unsigned long int * sbcset;
53                 re_charset_t *mbcset;
54         } opr;
55         re_token_type_t type;
56 } re_token_t;
57 typedef struct re_dfa_t re_dfa_t;
58 typedef struct bin_tree_t bin_tree_t;
59 typedef struct {
60         re_node_set nodes;
61 } re_dfastate_t;
62 struct re_dfa_t {
63         re_token_t *nodes;
64         unsigned int nodes_len;
65         re_dfastate_t *init_state;
66         bin_tree_t *str_tree;
67         unsigned long int * sb_char;
68         int nbackref;
69         unsigned int has_mb_node:1;
70         unsigned int is_utf8:1;
71         unsigned int map_notascii:1;
72         int mb_cur_max;
73         int *subexp_map;
74 };
75 static reg_errcode_t analyze(regex_t * preg);
76 static reg_errcode_t optimize_subexps(void *extra, bin_tree_t * node);
77 static inline void re_set_fastmap(char *fastmap, int icase, int ch)
78 {
79         if (icase) {
80                 int __res;
81                 if (sizeof(ch) > 1) {
82                         if (ch) {
83                         } else
84                                 __res = tolower(ch);
85                 }
86                 fastmap[__res] = 1;
87         }
88 }
89 static void re_compile_fastmap_iter(regex_t * bufp, re_dfastate_t * init_state,
90                                     char *fastmap)
91 {
92         re_dfa_t *dfa;
93         dfa = bufp->buffer;
94         int node_cnt;
95         int icase;
96         icase = (dfa->mb_cur_max == 1 && (bufp->syntax & 0x40000));
97         for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) {
98                 int node;
99                 node = init_state->nodes.elems[node_cnt];
100                 re_token_type_t type;
101 type = dfa->nodes[node].type;
102                 if (type == CHARACTER) {
103                         if ((bufp->syntax & 0x40000) && dfa->mb_cur_max > 1) {
104                                 void *buf = alloca(dfa->mb_cur_max);
105                         }
106                         int i;
107                         int ch;
108                         for (; i < (256 / (sizeof(unsigned long int) * 8));) {
109                                 int j;
110 int w;
111 w = 
112                                     dfa->nodes[node].opr.sbcset[i];
113                                 for (; j < 8;)
114                                         if (w & 1)
115                                                 re_set_fastmap(fastmap, icase,
116                                                                ch);
117                         }
118                 } else if (type == COMPLEX_BRACKET) {
119                         int i;
120                         re_charset_t *cset;
121 cset = dfa->nodes[node].opr.mbcset;
122                         for (; i < cset->nmbchars;) {
123                                 char buf[256];
124                                 mbstate_t state;
125                                 if (__wcrtomb(buf, cset->mbchars[i], &state) !=
126                                     -1)
127                                         re_set_fastmap(fastmap, icase, *buf);
128                                 if ((bufp->syntax & 0x4000)
129                                     && dfa->mb_cur_max > 1) {
130                                 }
131                         }
132                 }
133         }
134 }
135 static unsigned long int * utf8_sb_map;
136 static reg_errcode_t re_compile_internal(regex_t * preg, const char *pattern,
137                                          unsigned int length,
138                                          reg_syntax_t syntax)
139 {
140         reg_errcode_t err;
141         re_dfa_t *dfa;
142         err = analyze(preg);
143         if (dfa->is_utf8 && !(syntax & 0x400000) && preg->translate == 0)
144                 optimize_utf8(dfa);
145         unsigned int __s1_len =
146             strlen((*_nl_current_LC_CTYPE)->values[0].string);
147         int result =
148             (((!((int)((((*_nl_current_LC_CTYPE)->values[0].string))) -
149                  (int)(((*_nl_current_LC_CTYPE)->values[0].string))))
150               && (!((int)(("UTF-8") + 1) - (int)("UTF-8")))) ?
151              strcmp(((*_nl_current_LC_CTYPE)->values[0].string),
152                     "UTF-8")
153              : (((int)((((*_nl_current_LC_CTYPE)->values[0].string)) + 1) -
154                  (int)(((*_nl_current_LC_CTYPE)->values[0].string)))
155                 && __s1_len <
156                 4 ? (((int)(("UTF-8")) - (int)("UTF-8")) ?
157                      strcmp(((*_nl_current_LC_CTYPE)->values[0].string),
158                             "UTF-8") : (((*_nl_current_LC_CTYPE)->values[0].
159                                          string[0] -
160                                          'U'))) : (((int)(("UTF-8")) -
161                                                     (int)("UTF-8"))
162                                                    ? (((int)
163                                                        ((((*_nl_current_LC_CTYPE)->values[0].string))) - (int)(((*_nl_current_LC_CTYPE)->values[0].string))) ? strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8") : (((*_nl_current_LC_CTYPE)->values[0].string[0] - 'U'))) : strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8"))));
164         if (dfa->mb_cur_max == 6 && result == 0)
165                 dfa->is_utf8 = 1;
166         dfa->map_notascii = (*_nl_current_LC_CTYPE)->values[0].word != 0;
167         if (dfa->mb_cur_max > 1) {
168                 if (dfa->is_utf8)
169                         dfa->sb_char =
170 utf8_sb_map;
171                 else {
172                         int i;
173                         int j;
174                         int ch;
175                         for (;
176                              i < (256 / (sizeof(unsigned long int) * 8));
177                              ++j, ++ch) {
178                                 unsigned int wch;
179 wch = __btowc(ch);
180                                 if (wch != (0xffffffffu))
181                                         dfa->sb_char[i] |= 1 << j;
182                         }
183                 }
184         }
185         int node;
186         int i;
187         int mb_chars;
188         int has_period;
189         for (; node < dfa->nodes_len;)
190                 switch (dfa->nodes[node].type) {
191                 case CHARACTER:
192                         if (dfa->nodes[node].opr.c >= 0x80)
193                                 mb_chars = 1;
194                         for (; i < (256 / (sizeof(unsigned long int) * 8));)
195                                 if (dfa->nodes[node].opr.sbcset[i])
196                                         return;
197                 }
198         dfa->has_mb_node = dfa->nbackref > 0 || has_period;
199 }
200 static reg_errcode_t analyze(regex_t * preg)
201 {
202         re_dfa_t *dfa;
203 dfa = preg->buffer;
204         if (dfa->subexp_map != 0) {
205                 int i;
206                 for (; i < preg->re_nsub;)
207                         dfa->subexp_map[i] = i;
208                 preorder(dfa->str_tree, optimize_subexps, dfa);
209                 for (; i < preg->re_nsub; i++)
210                         if (dfa->subexp_map[i] != i)
211                                 break;
212         }
213 }
214 const char *__re_compile_pattern(const char *pattern, unsigned int length,
215                                  regex_t * bufp)
216 {
217         reg_errcode_t ret =
218             re_compile_internal(bufp, pattern, length, re_syntax_options);
219 }
220
221 void __re_compile_fastmap(regex_t * bufp)
222 {
223         re_dfa_t *dfa;
224         char *fastmap;
225         re_compile_fastmap_iter(bufp, dfa->init_state, fastmap);
226 }