]> git.wh0rd.org - ICEs.git/blob - hppa-ICE-insn-constraints/regex.i.8
initial import
[ICEs.git] / hppa-ICE-insn-constraints / regex.i.8
1 struct locale_data {
2 unsigned int nstrings;
3 union locale_data_value {
4 char *string;
5 unsigned int word;
6 } values[];
7 };
8 extern __thread struct locale_data **_nl_current_LC_CTYPE
9 __attribute__ ((tls_model("initial-exec")));
10 typedef unsigned long int reg_syntax_t;
11 extern reg_syntax_t re_syntax_options;
12 typedef enum {
13 REG_ENOSYS
14 } reg_errcode_t;
15 typedef struct {
16 unsigned char *buffer;
17 reg_syntax_t syntax;
18 char *fastmap;
19 unsigned char *translate;
20 unsigned int re_nsub;
21 } regex_t;
22 extern __thread void *__libc_tsd_CTYPE_TOLOWER;
23 inline int **__ctype_tolower_loc(void)
24 {
25 union {
26 void **ptr;
27 int **tablep;
28 } u;
29 u.ptr = (&__libc_tsd_CTYPE_TOLOWER);
30 if (*u.tablep == 0)
31 *u.tablep =
32 ((int *)((*_nl_current_LC_CTYPE)->values[0].string) + 128);
33 return u.tablep;
34 }
35 inline int tolower(int __c)
36 {
37 return *__ctype_tolower_loc()[__c];
38 }
39 typedef struct {
40 } mbstate_t;
41 typedef unsigned long int *bitset_t;
42 typedef unsigned long int *re_bitset_ptr_t;
43 typedef struct {
44 int nelem;
45 int *elems;
46 } re_node_set;
47 typedef enum {
48 NON_TYPE = 0, CHARACTER = 1, END_OF_RE = 2, SIMPLE_BRACKET =
49 3, OP_BACK_REF = 4, OP_PERIOD = 5, COMPLEX_BRACKET =
50 6, OP_UTF8_PERIOD = 7, OP_OPEN_SUBEXP = 8 | 0, OP_CLOSE_SUBEXP =
51 8 | 1, OP_ALT = 8 | 2, OP_DUP_ASTERISK = 8 | 3, ANCHOR =
52 8 | 4, CONCAT = 16, SUBEXP = 17, OP_DUP_PLUS =
53 18,
54 } re_token_type_t;
55 typedef struct {
56 long int *mbchars;
57 int nmbchars;
58 } re_charset_t;
59 typedef struct {
60 union {
61 unsigned char c;
62 re_bitset_ptr_t sbcset;
63 re_charset_t *mbcset;
64 } opr;
65 re_token_type_t type:8;
66 } re_token_t;
67 typedef struct re_dfa_t re_dfa_t;
68 typedef struct bin_tree_t bin_tree_t;
69 struct re_dfastate_t {
70 re_node_set nodes;
71 };
72 typedef struct re_dfastate_t re_dfastate_t;
73 struct re_dfa_t {
74 re_token_t *nodes;
75 unsigned int nodes_len;
76 re_dfastate_t *init_state;
77 bin_tree_t *str_tree;
78 re_bitset_ptr_t sb_char;
79 int nbackref;
80 unsigned int has_mb_node:1;
81 unsigned int is_utf8:1;
82 unsigned int map_notascii:1;
83 int mb_cur_max;
84 int *subexp_map;
85 };
86 static reg_errcode_t re_compile_internal(regex_t * preg, const char *pattern,
87 unsigned int length,
88 reg_syntax_t syntax);
89 static void re_compile_fastmap_iter(regex_t * bufp,
90 const re_dfastate_t * init_state,
91 char *fastmap);
92 static reg_errcode_t analyze(regex_t * preg);
93 static reg_errcode_t optimize_subexps(void *extra, bin_tree_t * node);
94 const char *__re_compile_pattern(pattern, length, bufp)
95 const char *pattern;
96 {
97 reg_errcode_t ret;
98 ret = re_compile_internal(bufp, pattern, length, re_syntax_options);
99 }
100
101 int __re_compile_fastmap(regex_t * bufp)
102 {
103 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
104 char *fastmap = bufp->fastmap;
105 re_compile_fastmap_iter(bufp, dfa->init_state, fastmap);
106 }
107 static inline void re_set_fastmap(char *fastmap, int icase, int ch)
108 {
109 if (icase) {
110 int __res;
111 if (sizeof(ch) > 1) {
112 if (ch) {
113 } else
114 __res = tolower(ch);
115 }
116 fastmap[__res] = 1;
117 }
118 }
119 static void re_compile_fastmap_iter(regex_t * bufp,
120 const re_dfastate_t * init_state,
121 char *fastmap)
122 {
123 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
124 int node_cnt;
125 int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & 0x40000));
126 for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) {
127 int node = init_state->nodes.elems[node_cnt];
128 re_token_type_t type = dfa->nodes[node].type;
129 if (type == CHARACTER) {
130 if ((bufp->syntax & 0x40000) && dfa->mb_cur_max > 1) {
131 void *buf = alloca(dfa->mb_cur_max);
132 }
133 int i;
134 int ch;
135 for (; i < (256 / (sizeof(unsigned long int) * 8));) {
136 int j;
137 unsigned long int w =
138 dfa->nodes[node].opr.sbcset[i];
139 for (; j < 8;)
140 if (w & 1)
141 re_set_fastmap(fastmap, icase,
142 ch);
143 }
144 } else if (type == COMPLEX_BRACKET) {
145 int i;
146 re_charset_t *cset = dfa->nodes[node].opr.mbcset;
147 for (; i < cset->nmbchars;) {
148 char buf[256];
149 mbstate_t state;
150 if (__wcrtomb(buf, cset->mbchars[i], &state) !=
151 -1)
152 re_set_fastmap(fastmap, icase, *buf);
153 if ((bufp->syntax & 0x4000)
154 && dfa->mb_cur_max > 1) {
155 }
156 }
157 }
158 }
159 }
160 static bitset_t utf8_sb_map;
161 static reg_errcode_t re_compile_internal(regex_t * preg, const char *pattern,
162 unsigned int length,
163 reg_syntax_t syntax)
164 {
165 reg_errcode_t err;
166 re_dfa_t *dfa;
167 err = analyze(preg);
168 if (dfa->is_utf8 && !(syntax & 0x400000) && preg->translate == 0)
169 optimize_utf8(dfa);
170 if (dfa->mb_cur_max == 6 && __extension__( {
171 unsigned int __s1_len =
172 strlen(((*_nl_current_LC_CTYPE)->values[0].string));
173 unsigned int __s2_len = strlen("UTF-8");
174 (((!((int)((((*_nl_current_LC_CTYPE)->values[0].string)) + 1) - (int)(((*_nl_current_LC_CTYPE)->values[0].string)) == 1) || __s1_len >= 4) && (!((int)(("UTF-8") + 1) - (int)("UTF-8") == 1) || __s2_len >= 4)) ? strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8") : (((int)((((*_nl_current_LC_CTYPE)->values[0].string)) + 1) - (int)(((*_nl_current_LC_CTYPE)->values[0].string)) == 1) && (__s1_len = strlen(((*_nl_current_LC_CTYPE)->values[0].string)), __s1_len < 4) ? (((int)(("UTF-8") + 1) - (int)("UTF-8") == 1) ? strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8") : (
175 ((*_nl_current_LC_CTYPE)->values[0].string[0] - 'U')
176
177 )):
178 (((int)(("UTF-8") + 1) - (int)("UTF-8") == 1) && (
179 __s2_len < 4) ? (((int)((((*_nl_current_LC_CTYPE)->values[0].string)) + 1) - (int)(((*_nl_current_LC_CTYPE)->values[0].string)) == 1) ? strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8") : (
180 ((*_nl_current_LC_CTYPE)->values[0].string[0] - 'U')
181 )): strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8"))));}
182 ) == 0)
183 dfa->is_utf8 = 1;
184 dfa->map_notascii = (*_nl_current_LC_CTYPE)->values[0].word != 0;
185 if (dfa->mb_cur_max > 1) {
186 if (dfa->is_utf8)
187 dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
188 else {
189 int i;
190 int j;
191 int ch;
192 for (;
193 i < (256 / (sizeof(unsigned long int) * 8));
194 ++j, ++ch) {
195 unsigned int wch = __btowc(ch);
196 if (wch != (0xffffffffu))
197 dfa->sb_char[i] |= 1 << j;
198 }
199 }
200 }
201 int node;
202 int i;
203 int mb_chars;
204 int has_period;
205 for (; node < dfa->nodes_len;)
206 switch (dfa->nodes[node].type) {
207 case CHARACTER:
208 if (dfa->nodes[node].opr.c >= 0x80)
209 mb_chars = 1;
210 for (; i < (256 / (sizeof(unsigned long int) * 8));)
211 if (dfa->nodes[node].opr.sbcset[i])
212 return;
213 }
214 dfa->has_mb_node = dfa->nbackref > 0 || has_period;
215 }
216 static reg_errcode_t analyze(regex_t * preg)
217 {
218 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
219 if (dfa->subexp_map != 0) {
220 int i;
221 for (; i < preg->re_nsub;)
222 dfa->subexp_map[i] = i;
223 preorder(dfa->str_tree, optimize_subexps, dfa);
224 for (; i < preg->re_nsub; i++)
225 if (dfa->subexp_map[i] != i)
226 break;
227 }
228 }