2 _NL_COLLATE_NRULES = (((
6 _NL_CTYPE_CODESET_NAME,
7 _NL_CTYPE_MAP_TO_NONASCII,
10 unsigned int nstrings;
11 union locale_data_value {
17 extern __thread struct locale_data *const *_nl_current_LC_COLLATE __attribute__ ((tls_model ("initial-exec")));
18 extern __thread struct locale_data *const *_nl_current_LC_CTYPE __attribute__ ((tls_model ("initial-exec")));
19 typedef unsigned long int reg_syntax_t;
20 extern reg_syntax_t re_syntax_options;
22 REG_ENOSYS = -1, REG_NOERROR = 0, REG_NOMATCH, REG_BADPAT, REG_ECOLLATE, REG_ECTYPE, REG_EESCAPE, REG_ESUBREG, REG_EBRACK, REG_EPAREN, REG_EBRACE, REG_BADBR, REG_ERANGE, REG_ESPACE, REG_BADRPT, REG_EEND, REG_ESIZE, REG_ERPAREN }
24 struct re_pattern_buffer {
25 unsigned char *buffer;
28 unsigned char * translate;
30 unsigned can_be_null : 1;
31 unsigned regs_allocated : 2;
32 unsigned fastmap_accurate : 1;
36 typedef struct re_pattern_buffer regex_t;
37 extern __thread void *__libc_tsd_CTYPE_TOLOWER __attribute__ ((tls_model ("initial-exec")));
38 extern inline const int ** __attribute__ ((const)) __ctype_tolower_loc (void) {
44 u.ptr = (&__libc_tsd_CTYPE_TOLOWER);
45 if (__builtin_expect (*u.tablep == ((void *)0), 0)) *u.tablep = ((int *) ((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_TOLOWER) & 0xffff)].string) + 128);
48 extern __inline int __attribute__ ((__nothrow__)) tolower (int __c) {
49 return __c >= -128 && __c < 256 ? (*__ctype_tolower_loc ())[__c] : __c;
54 typedef __mbstate_t mbstate_t;
55 typedef unsigned long int bitset_word_t;
56 typedef bitset_word_t bitset_t[(256 / (sizeof (bitset_word_t) * 8))];
57 typedef bitset_word_t *re_bitset_ptr_t;
59 INSIDE_WORD = 0x0001 | 0x0004, WORD_FIRST = 0x0002 | 0x0004, WORD_LAST = 0x0001 | 0x0008, INSIDE_NOTWORD = 0x0002 | 0x0008, LINE_FIRST = 0x0010, LINE_LAST = 0x0020, BUF_FIRST = 0x0040, BUF_LAST = 0x0080, WORD_DELIM = 0x0100, NOT_WORD_DELIM = 0x0200 }
67 NON_TYPE = 0, CHARACTER = 1, END_OF_RE = 2, SIMPLE_BRACKET = 3, OP_BACK_REF = 4, OP_PERIOD = 5, COMPLEX_BRACKET = 6, OP_UTF8_PERIOD = 7, OP_OPEN_SUBEXP = 8 | 0, OP_CLOSE_SUBEXP = 8 | 1, OP_ALT = 8 | 2, OP_DUP_ASTERISK = 8 | 3, ANCHOR = 8 | 4, CONCAT = 16, SUBEXP = 17, OP_DUP_PLUS = 18, OP_DUP_QUESTION, OP_OPEN_BRACKET, OP_CLOSE_BRACKET, OP_CHARSET_RANGE, OP_OPEN_DUP_NUM, OP_CLOSE_DUP_NUM, OP_NON_MATCH_LIST, OP_OPEN_COLL_ELEM, OP_CLOSE_COLL_ELEM, OP_OPEN_EQUIV_CLASS, OP_CLOSE_EQUIV_CLASS, OP_OPEN_CHAR_CLASS, OP_CLOSE_CHAR_CLASS, OP_WORD, OP_NOTWORD, OP_SPACE, OP_NOTSPACE, BACK_SLASH }
71 unsigned int non_match : 1;
82 re_bitset_ptr_t sbcset;
87 re_token_type_t type : 8;
92 typedef struct re_string_t re_string_t;
93 typedef struct re_dfa_t re_dfa_t;
96 typedef struct bin_tree_t bin_tree_t;
97 struct re_dfastate_t {
100 typedef struct re_dfastate_t re_dfastate_t;
103 unsigned int nodes_alloc;
104 unsigned int nodes_len;
108 re_node_set *eclosures;
109 struct re_state_table_entry *state_table;
110 re_dfastate_t *init_state;
111 bin_tree_t *str_tree;
112 re_bitset_ptr_t sb_char;
113 int str_tree_storage_idx;
115 unsigned int has_mb_node : 1;
116 unsigned int is_utf8 : 1;
117 unsigned int map_notascii : 1;
121 static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, unsigned int length, reg_syntax_t syntax);
122 static void re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, char *fastmap);
123 static reg_errcode_t init_dfa (re_dfa_t *dfa, unsigned int pat_len);
124 static void optimize_utf8 (re_dfa_t *dfa);
125 static reg_errcode_t analyze (regex_t *preg);
126 static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node);
127 const unsigned int __re_error_msgid_idx[] = {
128 0, (0 + sizeof "Success"), ((0 + sizeof "Success") + sizeof "No match"), (((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression"), ((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression") + sizeof "Invalid collation character"), (((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression") + sizeof "Invalid collation character") + sizeof "Invalid character class name"), ((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression") + sizeof "Invalid collation character") + sizeof "Invalid character class name") + sizeof "Trailing backslash"), (((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression") + sizeof "Invalid collation character") + sizeof "Invalid character class name") + sizeof "Trailing backslash") + sizeof "Invalid back reference"), ((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression") + sizeof "Invalid collation character") + sizeof "Invalid character class name") + sizeof "Trailing backslash") + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^"), (((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression") + sizeof "Invalid collation character") + sizeof "Invalid character class name") + sizeof "Trailing backslash") + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^") + sizeof "Unmatched ( or \\("), ((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression") + sizeof "Invalid collation character") + sizeof "Invalid character class name") + sizeof "Trailing backslash") + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^") + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{"), (((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression") + sizeof "Invalid collation character") + sizeof "Invalid character class name") + sizeof "Trailing backslash") + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^") + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof "Invalid content of \\{\\}"), ((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression") + sizeof "Invalid collation character") + sizeof "Invalid character class name") + sizeof "Trailing backslash") + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^") + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof "Invalid content of \\{\\}") + sizeof "Invalid range end"), (((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression") + sizeof "Invalid collation character") + sizeof "Invalid character class name") + sizeof "Trailing backslash") + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^") + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof "Invalid content of \\{\\}") + sizeof "Invalid range end") + sizeof "Memory exhausted"), ((((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression") + sizeof "Invalid collation character") + sizeof "Invalid character class name") + sizeof "Trailing backslash") + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^") + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof "Invalid content of \\{\\}") + sizeof "Invalid range end") + sizeof "Memory exhausted") + sizeof "Invalid preceding regular expression"), (((((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression") + sizeof "Invalid collation character") + sizeof "Invalid character class name") + sizeof "Trailing backslash") + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^") + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof "Invalid content of \\{\\}") + sizeof "Invalid range end") + sizeof "Memory exhausted") + sizeof "Invalid preceding regular expression") + sizeof "Premature end of regular expression"), ((((((((((((((((0 + sizeof "Success") + sizeof "No match") + sizeof "Invalid regular expression") + sizeof "Invalid collation character") + sizeof "Invalid character class name") + sizeof "Trailing backslash") + sizeof "Invalid back reference") + sizeof "Unmatched [ or [^") + sizeof "Unmatched ( or \\(") + sizeof "Unmatched \\{") + sizeof "Invalid content of \\{\\}") + sizeof "Invalid range end") + sizeof "Memory exhausted") + sizeof "Invalid preceding regular expression") + sizeof "Premature end of regular expression") + sizeof "Regular expression too big") };
129 const char * __re_compile_pattern (pattern, length, bufp) const char *pattern;
132 ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
134 int __re_compile_fastmap (bufp) struct re_pattern_buffer *bufp;
136 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
137 char *fastmap = bufp->fastmap;
138 re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
140 static inline void __attribute__ ((always_inline)) re_set_fastmap (char *fastmap, int icase, int ch) {
141 if (icase) fastmap[(__extension__ ({
143 if (sizeof (ch) > 1) {
144 if (__builtin_constant_p (ch)) {
146 else __res = tolower (ch);
152 static void re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, char *fastmap) {
153 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
155 int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & ((((((((((((((((((((((((unsigned long int) 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1)));
157 node_cnt < init_state->nodes.nelem;
159 int node = init_state->nodes.elems[node_cnt];
160 re_token_type_t type = dfa->nodes[node].type;
161 if (type == CHARACTER) {
162 if ((bufp->syntax & ((((((((((((((((((((((((unsigned long int) 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1)) && dfa->mb_cur_max > 1) {
163 unsigned char *buf = __builtin_alloca (dfa->mb_cur_max), *p;
167 i < (256 / (sizeof (bitset_word_t) * 8));
170 bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
172 j < (sizeof (bitset_word_t) * 8);
173 ++j, ++ch) if (w & ((bitset_word_t) 1 << j)) re_set_fastmap (fastmap, icase, ch);
176 else if (type == COMPLEX_BRACKET) {
178 re_charset_t *cset = dfa->nodes[node].opr.mbcset;
179 if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes || cset->nranges || cset->nchar_classes) {
180 if (((unsigned int) (*_nl_current_LC_COLLATE)->values[((int) (_NL_COLLATE_NRULES) & 0xffff)].word) != 0) {
181 const int *table = (const int *) ((*_nl_current_LC_COLLATE)->values[((int) (_NL_COLLATE_TABLEMB) & 0xffff)].string);
184 ++i) if (table[i] < 0) re_set_fastmap (fastmap, icase, i);
192 if (__wcrtomb (buf, cset->mbchars[i], &state) != (unsigned int) -1) re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
193 if ((bufp->syntax & ((((((((((((((((((((((((unsigned long int) 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1)) && dfa->mb_cur_max > 1) {
194 if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state) != (unsigned int) -1) re_set_fastmap (fastmap, 0, *(unsigned char *) buf);
198 else if (type == OP_PERIOD || type == OP_UTF8_PERIOD || type == END_OF_RE) {
199 memset (fastmap, '\1', sizeof (char) * 256);
200 if (type == END_OF_RE) bufp->can_be_null = 1;
205 static const bitset_t utf8_sb_map = {
206 [0 ... 0x80 / (sizeof (bitset_word_t) * 8) - 1] = (2147483647L * 2UL + 1UL) };
207 static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, unsigned int length, reg_syntax_t syntax) {
208 reg_errcode_t err = REG_NOERROR;
211 preg->fastmap_accurate = 0;
212 preg->not_bol = preg->not_eol = 0;
213 preg->can_be_null = 0;
214 preg->regs_allocated = 0;
215 err = init_dfa (dfa, length);
216 err = re_string_construct (®exp, pattern, length, preg->translate, syntax & ((((((((((((((((((((((((unsigned long int) 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1), dfa);
217 dfa->str_tree = parse (®exp, preg, syntax, &err);
218 err = analyze (preg);
219 if (dfa->is_utf8 && !(syntax & ((((((((((((((((((((((((unsigned long int) 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1) << 1)) && preg->translate == ((void *)0)) optimize_utf8 (dfa);
221 static reg_errcode_t init_dfa (re_dfa_t *dfa, unsigned int pat_len) {
222 memset (dfa, '\0', sizeof (re_dfa_t));
223 dfa->str_tree_storage_idx = ((1024 - sizeof (void *)) / sizeof (bin_tree_t));
224 if (pat_len == (4294967295U)) return REG_ESPACE;
225 if (dfa->mb_cur_max == 6 && __extension__ ({
226 unsigned int __s1_len, __s2_len;
227 (__builtin_constant_p (((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string)) && __builtin_constant_p ("UTF-8") && (__s1_len = strlen (((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string)), __s2_len = strlen ("UTF-8"), (!((unsigned int)(const void *)((((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string)) + 1) - (unsigned int)(const void *)(((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string)) == 1) || __s1_len >= 4) && (!((unsigned int)(const void *)(("UTF-8") + 1) - (unsigned int)(const void *)("UTF-8") == 1) || __s2_len >= 4)) ? __builtin_strcmp (((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string), "UTF-8") : (__builtin_constant_p (((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string)) && ((unsigned int)(const void *)((((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string)) + 1) - (unsigned int)(const void *)(((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string)) == 1) && (__s1_len = strlen (((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string)), __s1_len < 4) ? (__builtin_constant_p ("UTF-8") && ((unsigned int)(const void *)(("UTF-8") + 1) - (unsigned int)(const void *)("UTF-8") == 1) ? __builtin_strcmp (((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string), "UTF-8") : (__extension__ ({
228 __const unsigned char *__s2 = (__const unsigned char *) (__const char *) ("UTF-8");
229 register int __result = (((__const unsigned char *) (__const char *) (((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string)))[0] - __s2[0]);
232 ))) : (__builtin_constant_p ("UTF-8") && ((unsigned int)(const void *)(("UTF-8") + 1) - (unsigned int)(const void *)("UTF-8") == 1) && (__s2_len = strlen ("UTF-8"), __s2_len < 4) ? (__builtin_constant_p (((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string)) && ((unsigned int)(const void *)((((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string)) + 1) - (unsigned int)(const void *)(((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string)) == 1) ? __builtin_strcmp (((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string), "UTF-8") : (__extension__ ({
233 __const unsigned char *__s1 = (__const unsigned char *) (__const char *) (((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string));
234 register int __result = __s1[0] - ((__const unsigned char *) (__const char *) ("UTF-8"))[0];
235 if (__s2_len > 0 && __result == 0) {
236 if (__s2_len > 1 && __result == 0) {
241 ))) : __builtin_strcmp (((*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_CODESET_NAME) & 0xffff)].string), "UTF-8"))));
243 ) == 0) dfa->is_utf8 = 1;
244 dfa->map_notascii = (((unsigned int) (*_nl_current_LC_CTYPE)->values[((int) (_NL_CTYPE_MAP_TO_NONASCII) & 0xffff)].word) != 0);
245 if (dfa->mb_cur_max > 1) {
246 if (dfa->is_utf8) dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
249 dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
250 if (__builtin_expect (dfa->sb_char == ((void *)0), 0)) return REG_ESPACE;
252 i < (256 / (sizeof (bitset_word_t) * 8));
254 j < (sizeof (bitset_word_t) * 8);
256 unsigned int wch = __btowc (ch);
257 if (wch != (0xffffffffu)) dfa->sb_char[i] |= (bitset_word_t) 1 << j;
261 if (__builtin_expect (dfa->nodes == ((void *)0) || dfa->state_table == ((void *)0), 0)) return REG_ESPACE;
264 static void optimize_utf8 (re_dfa_t *dfa) {
265 int node, i, mb_chars = 0, has_period = 0;
267 node < dfa->nodes_len;
268 ++node) switch (dfa->nodes[node].type) {
269 case CHARACTER: if (dfa->nodes[node].opr.c >= 0x80) mb_chars = 1;
270 case ANCHOR: switch (dfa->nodes[node].opr.idx) {
271 case LINE_FIRST: case LINE_LAST: case BUF_FIRST: case BUF_LAST: break;
274 case OP_PERIOD: has_period = 1;
275 case SIMPLE_BRACKET: ((0x80 % (sizeof (bitset_word_t) * 8) == 0) ? (void) (0) : (__assert_fail ("0x80 % (sizeof (bitset_word_t) * 8) == 0", "regcomp.c", 1059, __PRETTY_FUNCTION__), (void) (0)));
276 for (i = 0x80 / (sizeof (bitset_word_t) * 8);
277 i < (256 / (sizeof (bitset_word_t) * 8));
278 ++i) if (dfa->nodes[node].opr.sbcset[i]) return;
280 dfa->has_mb_node = dfa->nbackref > 0 || has_period;
282 static reg_errcode_t analyze (regex_t *preg) {
283 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
284 dfa->nexts = ((int *) malloc ((dfa->nodes_alloc) * sizeof (int)));
285 dfa->org_indices = ((int *) malloc ((dfa->nodes_alloc) * sizeof (int)));
286 dfa->edests = ((re_node_set *) malloc ((dfa->nodes_alloc) * sizeof (re_node_set)));
287 dfa->eclosures = ((re_node_set *) malloc ((dfa->nodes_alloc) * sizeof (re_node_set)));
288 if (__builtin_expect (dfa->nexts == ((void *)0) || dfa->org_indices == ((void *)0) || dfa->edests == ((void *)0) || dfa->eclosures == ((void *)0), 0)) return REG_ESPACE;
289 if (dfa->subexp_map != ((void *)0)) {
293 i++) dfa->subexp_map[i] = i;
294 preorder (dfa->str_tree, optimize_subexps, dfa);
297 i++) if (dfa->subexp_map[i] != i) break;