]> git.wh0rd.org Git - ICEs.git/blob - hppa-ICE-insn-constraints/regex.i.10
initial import
[ICEs.git] / hppa-ICE-insn-constraints / regex.i.10
1 struct locale_data {
2         unsigned int nstrings;
3         union locale_data_value {
4                 char *string;
5                 unsigned int word;
6         } values[];
7 };
8 extern __thread struct locale_data **_nl_current_LC_CTYPE;
9 typedef unsigned long int reg_syntax_t;
10 extern reg_syntax_t re_syntax_options;
11 typedef enum {
12         REG_ENOSYS
13 } reg_errcode_t;
14 typedef struct {
15         unsigned char *buffer;
16         reg_syntax_t syntax;
17         char *fastmap;
18         unsigned char *translate;
19         unsigned int re_nsub;
20 } regex_t;
21 extern __thread void *__libc_tsd_CTYPE_TOLOWER;
22 inline int **__ctype_tolower_loc(void)
23 {
24         union {
25                 void **ptr;
26                 int **tablep;
27         } u;
28         u.ptr = (&__libc_tsd_CTYPE_TOLOWER);
29         if (*u.tablep == 0)
30                 *u.tablep =
31                     ((int *)((*_nl_current_LC_CTYPE)->values[0].string) + 128);
32         return u.tablep;
33 }
34 inline int tolower(int __c)
35 {
36         return *__ctype_tolower_loc()[__c];
37 }
38 typedef struct {
39 } mbstate_t;
40 typedef unsigned long int *bitset_t;
41 typedef unsigned long int *re_bitset_ptr_t;
42 typedef struct {
43         int nelem;
44         int *elems;
45 } re_node_set;
46 typedef enum {
47 CHARACTER = 1, 
48 COMPLEX_BRACKET = 6
49 } re_token_type_t;
50 typedef struct {
51         long int *mbchars;
52         int nmbchars;
53 } re_charset_t;
54 typedef struct {
55         union {
56                 unsigned char c;
57                 re_bitset_ptr_t sbcset;
58                 re_charset_t *mbcset;
59         } opr;
60         re_token_type_t type;
61 } re_token_t;
62 typedef struct re_dfa_t re_dfa_t;
63 typedef struct bin_tree_t bin_tree_t;
64 struct re_dfastate_t {
65         re_node_set nodes;
66 };
67 typedef struct re_dfastate_t re_dfastate_t;
68 struct re_dfa_t {
69         re_token_t *nodes;
70         unsigned int nodes_len;
71         re_dfastate_t *init_state;
72         bin_tree_t *str_tree;
73         re_bitset_ptr_t sb_char;
74         int nbackref;
75         unsigned int has_mb_node:1;
76         unsigned int is_utf8:1;
77         unsigned int map_notascii:1;
78         int mb_cur_max;
79         int *subexp_map;
80 };
81 static reg_errcode_t analyze(regex_t * preg);
82 static reg_errcode_t optimize_subexps(void *extra, bin_tree_t * node);
83
84 static inline void re_set_fastmap(char *fastmap, int icase, int ch)
85 {
86         if (icase) {
87                 int __res;
88                 if (sizeof(ch) > 1) {
89                         if (ch) {
90                         } else
91                                 __res = tolower(ch);
92                 }
93                 fastmap[__res] = 1;
94         }
95 }
96 static void re_compile_fastmap_iter(regex_t * bufp,
97 re_dfastate_t * init_state,
98                                     char *fastmap)
99 {
100         re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
101         int node_cnt;
102         int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & 0x40000));
103         for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) {
104                 int node = init_state->nodes.elems[node_cnt];
105                 re_token_type_t type = dfa->nodes[node].type;
106                 if (type == CHARACTER) {
107                         if ((bufp->syntax & 0x40000) && dfa->mb_cur_max > 1) {
108                                 void *buf = alloca(dfa->mb_cur_max);
109                         }
110                         int i;
111                         int ch;
112                         for (; i < (256 / (sizeof(unsigned long int) * 8));) {
113                                 int j;
114                                 unsigned long int w =
115                                     dfa->nodes[node].opr.sbcset[i];
116                                 for (; j < 8;)
117                                         if (w & 1)
118                                                 re_set_fastmap(fastmap, icase,
119                                                                ch);
120                         }
121                 } else if (type == COMPLEX_BRACKET) {
122                         int i;
123                         re_charset_t *cset = dfa->nodes[node].opr.mbcset;
124                         for (; i < cset->nmbchars;) {
125                                 char buf[256];
126                                 mbstate_t state;
127                                 if (__wcrtomb(buf, cset->mbchars[i], &state) !=
128                                     -1)
129                                         re_set_fastmap(fastmap, icase, *buf);
130                                 if ((bufp->syntax & 0x4000)
131                                     && dfa->mb_cur_max > 1) {
132                                 }
133                         }
134                 }
135         }
136 }
137 static bitset_t utf8_sb_map;
138 static reg_errcode_t re_compile_internal(regex_t * preg, const char *pattern,
139                                          unsigned int length,
140                                          reg_syntax_t syntax)
141 {
142         reg_errcode_t err;
143         re_dfa_t *dfa;
144         err = analyze(preg);
145         if (dfa->is_utf8 && !(syntax & 0x400000) && preg->translate == 0)
146                 optimize_utf8(dfa);
147         unsigned int __s1_len =
148             strlen((*_nl_current_LC_CTYPE)->values[0].string);
149         int result =
150             (((!((int)((((*_nl_current_LC_CTYPE)->values[0].string)) ) -
151                  (int)(((*_nl_current_LC_CTYPE)->values[0].string)) )
152
153 )
154               && (!((int)(("UTF-8") + 1) - (int)("UTF-8"))
155 )) ? strcmp(((*_nl_current_LC_CTYPE)->values[0].string),
156                                "UTF-8")
157              : (((int)((((*_nl_current_LC_CTYPE)->values[0].string)) + 1) -
158                  (int)(((*_nl_current_LC_CTYPE)->values[0].string)) )
159                 && 
160                     __s1_len < 4
161 ? (((int)(("UTF-8") ) - (int)("UTF-8")
162                                       ) ? strcmp(((*_nl_current_LC_CTYPE)->
163                                                    values[0].string),
164                                                   "UTF-8")
165                                      : (((*_nl_current_LC_CTYPE)->values[0].
166                                          string[0] -
167                                          'U'))) : (((int)(("UTF-8") ) -
168                                                     (int)("UTF-8") )
169                                                    ? (((int)
170                                                        ((((*_nl_current_LC_CTYPE)->values[0].string)) ) - (int)(((*_nl_current_LC_CTYPE)->values[0].string)) 
171 ) ? strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8") : (((*_nl_current_LC_CTYPE)->values[0].string[0] - 'U'))) : strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8"))));
172         if (dfa->mb_cur_max == 6 && result == 0)
173                 dfa->is_utf8 = 1;
174         dfa->map_notascii = (*_nl_current_LC_CTYPE)->values[0].word != 0;
175         if (dfa->mb_cur_max > 1) {
176                 if (dfa->is_utf8)
177                         dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
178                 else {
179                         int i;
180                         int j;
181                         int ch;
182                         for (;
183                              i < (256 / (sizeof(unsigned long int) * 8));
184                              ++j, ++ch) {
185                                 unsigned int wch = __btowc(ch);
186                                 if (wch != (0xffffffffu))
187                                         dfa->sb_char[i] |= 1 << j;
188                         }
189                 }
190         }
191         int node;
192         int i;
193         int mb_chars;
194         int has_period;
195         for (; node < dfa->nodes_len;)
196                 switch (dfa->nodes[node].type) {
197                 case CHARACTER:
198                         if (dfa->nodes[node].opr.c >= 0x80)
199                                 mb_chars = 1;
200                         for (; i < (256 / (sizeof(unsigned long int) * 8));)
201                                 if (dfa->nodes[node].opr.sbcset[i])
202                                         return;
203                 }
204         dfa->has_mb_node = dfa->nbackref > 0 || has_period;
205 }
206 static reg_errcode_t analyze(regex_t * preg)
207 {
208         re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
209         if (dfa->subexp_map != 0) {
210                 int i;
211                 for (; i < preg->re_nsub;)
212                         dfa->subexp_map[i] = i;
213                 preorder(dfa->str_tree, optimize_subexps, dfa);
214                 for (; i < preg->re_nsub; i++)
215                         if (dfa->subexp_map[i] != i)
216                                 break;
217         }
218 }
219 const char *__re_compile_pattern(const char *pattern, unsigned int length, regex_t *bufp)
220 {
221         reg_errcode_t ret = re_compile_internal(bufp, pattern, length, re_syntax_options);
222 }
223 void __re_compile_fastmap(regex_t *bufp)
224 {
225         re_dfa_t *dfa;
226         dfa = (re_dfa_t *) bufp->buffer;
227         char *fastmap;
228         fastmap = bufp->fastmap;
229         re_compile_fastmap_iter(bufp, dfa->init_state, fastmap);
230 }