]>
Commit | Line | Data |
---|---|---|
45516216 | 1 | enum { |
2 | _NL_COLLATE_NRULES, | |
3 | _NL_COLLATE_TABLEMB, | |
4 | _NL_CTYPE_TOLOWER, | |
5 | _NL_CTYPE_CODESET_NAME, | |
6 | _NL_CTYPE_MAP_TO_NONASCII, | |
7 | }; | |
8 | struct locale_data { | |
9 | unsigned int nstrings; | |
10 | union locale_data_value { | |
11 | const char *string; | |
12 | unsigned int word; | |
13 | } | |
14 | values []; | |
15 | }; | |
16 | extern __thread struct locale_data *const *_nl_current_LC_COLLATE __attribute__ ((tls_model ("initial-exec"))); | |
17 | extern __thread struct locale_data *const *_nl_current_LC_CTYPE __attribute__ ((tls_model ("initial-exec"))); | |
18 | typedef unsigned long int reg_syntax_t; | |
19 | extern reg_syntax_t re_syntax_options; | |
20 | typedef enum { | |
21 | REG_ENOSYS = -1, REG_NOERROR = 0, REG_NOMATCH, REG_BADPAT, REG_ECOLLATE, REG_ECTYPE, REG_EESCAPE, REG_ESUBREG, REG_EBRACK, REG_EPAREN, REG_EBRACE, REG_BADBR, REG_ERANGE, REG_ESPACE, REG_BADRPT, REG_EEND, REG_ESIZE, REG_ERPAREN } | |
22 | reg_errcode_t; | |
23 | struct re_pattern_buffer { | |
24 | unsigned char *buffer; | |
25 | reg_syntax_t syntax; | |
26 | char *fastmap; | |
27 | unsigned char * translate; | |
28 | unsigned int re_nsub; | |
29 | unsigned can_be_null : 1; | |
30 | unsigned regs_allocated : 2; | |
31 | unsigned fastmap_accurate : 1; | |
32 | unsigned not_bol : 1; | |
33 | unsigned not_eol : 1; | |
34 | }; | |
35 | typedef struct re_pattern_buffer regex_t; | |
36 | extern __thread void *__libc_tsd_CTYPE_TOLOWER __attribute__ ((tls_model ("initial-exec"))); | |
37 | extern inline const int ** __attribute__ ((const)) __ctype_tolower_loc (void) { | |
38 | union { | |
39 | void **ptr; | |
40 | const int **tablep; | |
41 | } | |
42 | u; | |
43 | u.ptr = (&__libc_tsd_CTYPE_TOLOWER); | |
44 | if (__builtin_expect (*u.tablep == 0, 0)) *u.tablep = ((int *) ((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_TOLOWER) & 0xffff)].string) + 128); | |
45 | return u.tablep; | |
46 | } | |
47 | extern __inline int __attribute__ ((__nothrow__)) tolower (int __c) { | |
48 | return __c >= -128 && __c < 256 ? (*__ctype_tolower_loc ())[__c] : __c; | |
49 | } | |
50 | typedef struct { | |
51 | } | |
52 | __mbstate_t; | |
53 | typedef __mbstate_t mbstate_t; | |
54 | typedef unsigned long int bitset_word_t; | |
55 | typedef bitset_word_t bitset_t[(256 / (sizeof (bitset_word_t) * 8))]; | |
56 | typedef bitset_word_t *re_bitset_ptr_t; | |
57 | typedef enum { | |
58 | LINE_FIRST = 0x0010, | |
59 | LINE_LAST = 0x0020, | |
60 | BUF_FIRST = 0x0040, | |
61 | BUF_LAST = 0x0080, | |
62 | } | |
63 | re_context_type; | |
64 | typedef struct { | |
65 | int nelem; | |
66 | int *elems; | |
67 | } | |
68 | re_node_set; | |
69 | typedef enum { | |
70 | NON_TYPE = 0, CHARACTER = 1, END_OF_RE = 2, SIMPLE_BRACKET = 3, OP_BACK_REF = 4, OP_PERIOD = 5, COMPLEX_BRACKET = 6, OP_UTF8_PERIOD = 7, OP_OPEN_SUBEXP = 8 | 0, OP_CLOSE_SUBEXP = 8 | 1, OP_ALT = 8 | 2, OP_DUP_ASTERISK = 8 | 3, ANCHOR = 8 | 4, CONCAT = 16, SUBEXP = 17, OP_DUP_PLUS = 18, OP_DUP_QUESTION, OP_OPEN_BRACKET, OP_CLOSE_BRACKET, OP_CHARSET_RANGE, OP_OPEN_DUP_NUM, OP_CLOSE_DUP_NUM, OP_NON_MATCH_LIST, OP_OPEN_COLL_ELEM, OP_CLOSE_COLL_ELEM, OP_OPEN_EQUIV_CLASS, OP_CLOSE_EQUIV_CLASS, OP_OPEN_CHAR_CLASS, OP_CLOSE_CHAR_CLASS, OP_WORD, OP_NOTWORD, OP_SPACE, OP_NOTSPACE, BACK_SLASH } | |
71 | re_token_type_t; | |
72 | typedef struct { | |
73 | long int *mbchars; | |
74 | unsigned int non_match : 1; | |
75 | int nmbchars; | |
76 | int ncoll_syms; | |
77 | int nequiv_classes; | |
78 | int nranges; | |
79 | int nchar_classes; | |
80 | } | |
81 | re_charset_t; | |
82 | typedef struct { | |
83 | union { | |
84 | unsigned char c; | |
85 | re_bitset_ptr_t sbcset; | |
86 | re_charset_t *mbcset; | |
87 | int idx; | |
88 | } | |
89 | opr; | |
90 | re_token_type_t type : 8; | |
91 | } | |
92 | re_token_t; | |
93 | struct re_string_t { | |
94 | }; | |
95 | typedef struct re_string_t re_string_t; | |
96 | typedef struct re_dfa_t re_dfa_t; | |
97 | struct bin_tree_t { | |
98 | }; | |
99 | typedef struct bin_tree_t bin_tree_t; | |
100 | struct re_dfastate_t { | |
101 | re_node_set nodes; | |
102 | }; | |
103 | typedef struct re_dfastate_t re_dfastate_t; | |
104 | struct re_dfa_t { | |
105 | re_token_t *nodes; | |
106 | unsigned int nodes_alloc; | |
107 | unsigned int nodes_len; | |
108 | int *nexts; | |
109 | int *org_indices; | |
110 | re_node_set *edests; | |
111 | re_node_set *eclosures; | |
112 | struct re_state_table_entry *state_table; | |
113 | re_dfastate_t *init_state; | |
114 | bin_tree_t *str_tree; | |
115 | re_bitset_ptr_t sb_char; | |
116 | int str_tree_storage_idx; | |
117 | int nbackref; | |
118 | unsigned int has_mb_node : 1; | |
119 | unsigned int is_utf8 : 1; | |
120 | unsigned int map_notascii : 1; | |
121 | int mb_cur_max; | |
122 | int *subexp_map; | |
123 | }; | |
124 | static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, unsigned int length, reg_syntax_t syntax); | |
125 | static void re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, char *fastmap); | |
126 | static reg_errcode_t init_dfa (re_dfa_t *dfa, unsigned int pat_len); | |
127 | static void optimize_utf8 (re_dfa_t *dfa); | |
128 | static reg_errcode_t analyze (regex_t *preg); | |
129 | static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node); | |
130 | const unsigned int __re_error_msgid_idx[] = { | |
131 | 0, (0 + sizeof "Success"), ((0 + sizeof "Success") + sizeof "No match"), (((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"), ((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression") + sizeof "Invalid collation character"), (((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression") + sizeof "Invalid collation character") + sizeof "Invalid character class name"), ((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression") + sizeof "Invalid collation character") + sizeof "Invalid character class name") + sizeof "Trailing backslash"), (((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression") + sizeof "Invalid collation character") + sizeof "Invalid character class name") + sizeof "Trailing backslash") + sizeof "Invalid back reference"), ((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression") + sizeof "Invalid collation character") + sizeof "Invalid character class name") + sizeof "Trailing backslash") + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"), (((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression") + sizeof "Invalid collation character") + sizeof "Invalid character class name") + sizeof "Trailing backslash") + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^") + sizeof "Unmatched ( or \\("), ((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression") + sizeof "Invalid collation character") + sizeof "Invalid character class name") + sizeof "Trailing backslash") + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^") + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{"), (((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression") + sizeof "Invalid collation character") + sizeof "Invalid character class name") + sizeof "Trailing backslash") + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^") + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof "Invalid content of \\{\\}"), ((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression") + sizeof "Invalid collation character") + sizeof "Invalid character class name") + sizeof "Trailing backslash") + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^") + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof "Invalid content of \\{\\}") + sizeof "Invalid range end"), (((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression") + sizeof "Invalid collation character") + sizeof "Invalid character class name") + sizeof "Trailing backslash") + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^") + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof "Invalid content of \\{\\}") + sizeof "Invalid range end") + sizeof "Memory exhausted"), ((((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression") + sizeof "Invalid collation character") + sizeof "Invalid character class name") + sizeof "Trailing backslash") + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^") + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof "Invalid content of \\{\\}") + sizeof "Invalid range end") + sizeof "Memory exhausted") + sizeof "Invalid preceding regular expression"), (((((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression") + sizeof "Invalid collation character") + sizeof "Invalid character class name") + sizeof "Trailing backslash") + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^") + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof "Invalid content of \\{\\}") + sizeof "Invalid range end") + sizeof "Memory exhausted") + sizeof "Invalid preceding regular expression") + sizeof "Premature end of regular expression"), ((((((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression") + sizeof "Invalid collation character") + sizeof "Invalid character class name") + sizeof "Trailing backslash") + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^") + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof "Invalid content of \\{\\}") + sizeof "Invalid range end") + sizeof "Memory exhausted") + sizeof "Invalid preceding regular expression") + sizeof "Premature end of regular expression") + sizeof "Regular expression too big") }; | |
132 | const char * __re_compile_pattern (pattern, length, bufp) const char *pattern; | |
133 | { | |
134 | reg_errcode_t ret; | |
135 | ret = re_compile_internal (bufp, pattern, length, re_syntax_options); | |
136 | } | |
137 | int __re_compile_fastmap (bufp) struct re_pattern_buffer *bufp; | |
138 | { | |
139 | re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; | |
140 | char *fastmap = bufp->fastmap; | |
141 | re_compile_fastmap_iter (bufp, dfa->init_state, fastmap); | |
142 | } | |
143 | static inline void __attribute__ ((always_inline)) re_set_fastmap (char *fastmap, int icase, int ch) { | |
144 | if (icase) fastmap[(__extension__ ({ | |
145 | int __res; | |
146 | if (sizeof (ch) > 1) { | |
147 | if (__builtin_constant_p (ch)) { | |
148 | } | |
149 | else __res = tolower (ch); | |
150 | } | |
151 | __res; | |
152 | } | |
153 | ))] = 1; | |
154 | } | |
155 | static void re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, char *fastmap) { | |
156 | re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; | |
157 | int node_cnt; | |
158 | int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & ((((((((((((((((((((((((unsigned long int) 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1))); | |
159 | for (node_cnt = 0; | |
160 | node_cnt < init_state->nodes.nelem; | |
161 | ++node_cnt) { | |
162 | int node = init_state->nodes.elems[node_cnt]; | |
163 | re_token_type_t type = dfa->nodes[node].type; | |
164 | if (type == CHARACTER) { | |
165 | if ((bufp->syntax & ((((((((((((((((((((((((unsigned long int) 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1)) && dfa->mb_cur_max > 1) { | |
166 | unsigned char *buf = __builtin_alloca (dfa->mb_cur_max), *p; | |
167 | } | |
168 | int i; int ch; | |
169 | for (i = 0, ch = 0; | |
170 | i < (256 / (sizeof (bitset_word_t) * 8)); | |
171 | ++i) { | |
172 | int j; | |
173 | bitset_word_t w = dfa->nodes[node].opr.sbcset[i]; | |
174 | for (j = 0; | |
175 | j < (sizeof (bitset_word_t) * 8); | |
176 | ++j, ++ch) if (w & ((bitset_word_t) 1 << j)) re_set_fastmap (fastmap, icase, ch); | |
177 | } | |
178 | } | |
179 | else if (type == COMPLEX_BRACKET) { | |
180 | int i; | |
181 | re_charset_t *cset = dfa->nodes[node].opr.mbcset; | |
182 | if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes || cset->nranges || cset->nchar_classes) { | |
183 | if (((unsigned int) (*_nl_current_LC_COLLATE)->values[((int) (_NL_COLLATE_NRULES) & 0xffff)].word) != 0) { | |
184 | const int *table = (const int *) ((*_nl_current_LC_COLLATE)->values[((int) (_NL_COLLATE_TABLEMB) & 0xffff)].string); | |
185 | for (i = 0; | |
186 | i < 256; | |
187 | ++i) if (table[i] < 0) re_set_fastmap (fastmap, icase, i); | |
188 | } | |
189 | } | |
190 | for (i = 0; | |
191 | i < cset->nmbchars; | |
192 | ++i) { | |
193 | char buf[256]; | |
194 | mbstate_t state; | |
195 | if (__wcrtomb (buf, cset->mbchars[i], &state) != (unsigned int) -1) re_set_fastmap (fastmap, icase, *(unsigned char *) buf); | |
196 | if ((bufp->syntax & ((((((((((((((((((((((((unsigned long int) 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1)) && dfa->mb_cur_max > 1) { | |
197 | if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state) != (unsigned int) -1) re_set_fastmap (fastmap, 0, *(unsigned char *) buf); | |
198 | } | |
199 | } | |
200 | } | |
201 | else if (type == OP_PERIOD || type == OP_UTF8_PERIOD || type == END_OF_RE) { | |
202 | memset (fastmap, '\1', sizeof (char) * 256); | |
203 | if (type == END_OF_RE) bufp->can_be_null = 1; | |
204 | return; | |
205 | } | |
206 | } | |
207 | } | |
208 | static const bitset_t utf8_sb_map; | |
209 | static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, unsigned int length, reg_syntax_t syntax) { | |
210 | reg_errcode_t err = REG_NOERROR; | |
211 | re_dfa_t *dfa; | |
212 | re_string_t regexp; | |
213 | preg->fastmap_accurate = 0; | |
214 | preg->not_bol = preg->not_eol = 0; | |
215 | preg->can_be_null = 0; | |
216 | preg->regs_allocated = 0; | |
217 | err = init_dfa (dfa, length); | |
218 | err = re_string_construct (®exp, pattern, length, preg->translate, syntax & ((((((((((((((((((((((((unsigned long int) 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1), dfa); | |
219 | dfa->str_tree = parse (®exp, preg, syntax, &err); | |
220 | err = analyze (preg); | |
221 | if (dfa->is_utf8 && !(syntax & ((((((((((((((((((((((((unsigned long int) 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1)) && preg->translate == 0) optimize_utf8 (dfa); | |
222 | } | |
223 | static reg_errcode_t init_dfa (re_dfa_t *dfa, unsigned int pat_len) { | |
224 | memset (dfa, '\0', sizeof (re_dfa_t)); | |
225 | dfa->str_tree_storage_idx = ((1024 - sizeof (void *)) / sizeof (bin_tree_t)); | |
226 | if (pat_len == (4294967295U)) return REG_ESPACE; | |
227 | if (dfa->mb_cur_max == 6 && __extension__ ({ | |
228 | unsigned int __s1_len, __s2_len; | |
229 | (__builtin_constant_p (((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string)) && __builtin_constant_p ("UTF-8") && (__s1_len = strlen (((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string)), __s2_len = strlen ("UTF-8"), (!((unsigned int)(const void *)((((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string)) + 1) - (unsigned int)(const void *)(((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string)) == 1) || __s1_len >= 4) && (!((unsigned int)(const void *)(("UTF-8") + 1) - (unsigned int)(const void *)("UTF-8") == 1) || __s2_len >= 4)) ? __builtin_strcmp (((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string), "UTF-8") : (__builtin_constant_p (((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string)) && ((unsigned int)(const void *)((((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string)) + 1) - (unsigned int)(const void *)(((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string)) == 1) && (__s1_len = strlen (((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string)), __s1_len < 4) ? (__builtin_constant_p ("UTF-8") && ((unsigned int)(const void *)(("UTF-8") + 1) - (unsigned int)(const void *)("UTF-8") == 1) ? __builtin_strcmp (((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string), "UTF-8") : (__extension__ ({ | |
230 | __const unsigned char *__s2 = (__const unsigned char *) (__const char *) ("UTF-8"); | |
231 | register int __result = (((__const unsigned char *) (__const char *) (((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string)))[0] - __s2[0]); | |
232 | __result; | |
233 | } | |
234 | ))) : (__builtin_constant_p ("UTF-8") && ((unsigned int)(const void *)(("UTF-8") + 1) - (unsigned int)(const void *)("UTF-8") == 1) && (__s2_len = strlen ("UTF-8"), __s2_len < 4) ? (__builtin_constant_p (((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string)) && ((unsigned int)(const void *)((((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string)) + 1) - (unsigned int)(const void *)(((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string)) == 1) ? __builtin_strcmp (((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string), "UTF-8") : (__extension__ ({ | |
235 | __const unsigned char *__s1 = (__const unsigned char *) (__const char *) (((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string)); | |
236 | register int __result = __s1[0] - ((__const unsigned char *) (__const char *) ("UTF-8"))[0]; | |
237 | if (__s2_len > 0 && __result == 0) { | |
238 | if (__s2_len > 1 && __result == 0) { | |
239 | } | |
240 | } | |
241 | __result; | |
242 | } | |
243 | ))) : __builtin_strcmp (((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string), "UTF-8")))); | |
244 | } | |
245 | ) == 0) dfa->is_utf8 = 1; | |
246 | dfa->map_notascii = (((unsigned int) (*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_MAP_TO_NONASCII) & 0xffff)].word) != 0); | |
247 | if (dfa->mb_cur_max > 1) { | |
248 | if (dfa->is_utf8) dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map; | |
249 | else { | |
250 | int i, j, ch; | |
251 | dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); | |
252 | if (__builtin_expect (dfa->sb_char == 0, 0)) return REG_ESPACE; | |
253 | for (i = 0, ch = 0; | |
254 | i < (256 / (sizeof (bitset_word_t) * 8)); | |
255 | ++i) for (j = 0; | |
256 | j < (sizeof (bitset_word_t) * 8); | |
257 | ++j, ++ch) { | |
258 | unsigned int wch = __btowc (ch); | |
259 | if (wch != (0xffffffffu)) dfa->sb_char[i] |= (bitset_word_t) 1 << j; | |
260 | } | |
261 | } | |
262 | } | |
263 | if (__builtin_expect (dfa->nodes == 0 || dfa->state_table == 0, 0)) return REG_ESPACE; | |
264 | return REG_NOERROR; | |
265 | } | |
266 | static void optimize_utf8 (re_dfa_t *dfa) { | |
267 | int node, i, mb_chars = 0, has_period = 0; | |
268 | for (node = 0; | |
269 | node < dfa->nodes_len; | |
270 | ++node) | |
271 | switch (dfa->nodes[node].type) { | |
272 | case CHARACTER: | |
273 | if (dfa->nodes[node].opr.c >= 0x80) mb_chars = 1; | |
274 | case ANCHOR: | |
275 | switch (dfa->nodes[node].opr.idx) { | |
276 | case LINE_FIRST: case LINE_LAST: case BUF_FIRST: case BUF_LAST: break; | |
277 | default: return; | |
278 | } | |
279 | case OP_PERIOD: has_period = 1; | |
280 | case SIMPLE_BRACKET: ((0x80 % (sizeof (bitset_word_t) * 8) == 0) ? (void) (0) : (__assert_fail ("0x80 % (sizeof (bitset_word_t) * 8) == 0", "regcomp.c", 1059, __PRETTY_FUNCTION__), (void) (0))); | |
281 | for (i = 0x80 / (sizeof (bitset_word_t) * 8); | |
282 | i < (256 / (sizeof (bitset_word_t) * 8)); | |
283 | ++i) if (dfa->nodes[node].opr.sbcset[i]) return; | |
284 | } | |
285 | dfa->has_mb_node = dfa->nbackref > 0 || has_period; | |
286 | } | |
287 | static reg_errcode_t analyze (regex_t *preg) { | |
288 | re_dfa_t *dfa = (re_dfa_t *) preg->buffer; | |
289 | dfa->nexts = ((int *) malloc ((dfa->nodes_alloc) * sizeof (int))); | |
290 | dfa->org_indices = ((int *) malloc ((dfa->nodes_alloc) * sizeof (int))); | |
291 | dfa->edests = ((re_node_set *) malloc ((dfa->nodes_alloc) * sizeof (re_node_set))); | |
292 | dfa->eclosures = ((re_node_set *) malloc ((dfa->nodes_alloc) * sizeof (re_node_set))); | |
293 | if (__builtin_expect (dfa->nexts == 0 || dfa->org_indices == 0 || dfa->edests == 0 || dfa->eclosures == 0, 0)) return REG_ESPACE; | |
294 | if (dfa->subexp_map != 0) { | |
295 | int i; | |
296 | for (i = 0; | |
297 | i < preg->re_nsub; | |
298 | i++) dfa->subexp_map[i] = i; | |
299 | preorder (dfa->str_tree, optimize_subexps, dfa); | |
300 | for (i = 0; | |
301 | i < preg->re_nsub; | |
302 | i++) if (dfa->subexp_map[i] != i) break; | |
303 | } | |
304 | } |