3 union locale_data_value {
8 extern __thread struct locale_data **_nl_current_LC_CTYPE
9 __attribute__ ((tls_model("initial-exec")));
10 typedef unsigned long int reg_syntax_t;
11 extern reg_syntax_t re_syntax_options;
16 unsigned char *buffer;
19 unsigned char *translate;
22 extern __thread void *__libc_tsd_CTYPE_TOLOWER;
23 inline int **__ctype_tolower_loc(void)
29 u.ptr = (&__libc_tsd_CTYPE_TOLOWER);
32 ((int *)((*_nl_current_LC_CTYPE)->values[0].string) + 128);
35 inline int tolower(int __c)
37 return *__ctype_tolower_loc()[__c];
41 typedef unsigned long int *bitset_t;
42 typedef unsigned long int *re_bitset_ptr_t;
48 NON_TYPE = 0, CHARACTER = 1, END_OF_RE = 2, SIMPLE_BRACKET =
49 3, OP_BACK_REF = 4, OP_PERIOD = 5, COMPLEX_BRACKET =
50 6, OP_UTF8_PERIOD = 7, OP_OPEN_SUBEXP = 8 | 0, OP_CLOSE_SUBEXP =
51 8 | 1, OP_ALT = 8 | 2, OP_DUP_ASTERISK = 8 | 3, ANCHOR =
52 8 | 4, CONCAT = 16, SUBEXP = 17, OP_DUP_PLUS = 18,
61 re_bitset_ptr_t sbcset;
64 re_token_type_t type:8;
66 typedef struct re_dfa_t re_dfa_t;
67 typedef struct bin_tree_t bin_tree_t;
68 struct re_dfastate_t {
71 typedef struct re_dfastate_t re_dfastate_t;
74 unsigned int nodes_len;
75 re_dfastate_t *init_state;
77 re_bitset_ptr_t sb_char;
79 unsigned int has_mb_node:1;
80 unsigned int is_utf8:1;
81 unsigned int map_notascii:1;
85 static reg_errcode_t re_compile_internal(regex_t * preg, const char *pattern,
88 static void re_compile_fastmap_iter(regex_t * bufp,
89 const re_dfastate_t * init_state,
91 static reg_errcode_t analyze(regex_t * preg);
92 static reg_errcode_t optimize_subexps(void *extra, bin_tree_t * node);
93 const char *__re_compile_pattern(pattern, length, bufp)
97 ret = re_compile_internal(bufp, pattern, length, re_syntax_options);
100 int __re_compile_fastmap(regex_t * bufp)
102 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
103 char *fastmap = bufp->fastmap;
104 re_compile_fastmap_iter(bufp, dfa->init_state, fastmap);
106 static inline void re_set_fastmap(char *fastmap, int icase, int ch)
110 if (sizeof(ch) > 1) {
118 static void re_compile_fastmap_iter(regex_t * bufp,
119 const re_dfastate_t * init_state,
122 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
124 int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & 0x40000));
125 for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) {
126 int node = init_state->nodes.elems[node_cnt];
127 re_token_type_t type = dfa->nodes[node].type;
128 if (type == CHARACTER) {
129 if ((bufp->syntax & 0x40000) && dfa->mb_cur_max > 1) {
130 void *buf = alloca(dfa->mb_cur_max);
134 for (; i < (256 / (sizeof(unsigned long int) * 8));) {
136 unsigned long int w =
137 dfa->nodes[node].opr.sbcset[i];
140 re_set_fastmap(fastmap, icase,
143 } else if (type == COMPLEX_BRACKET) {
145 re_charset_t *cset = dfa->nodes[node].opr.mbcset;
146 for (; i < cset->nmbchars;) {
149 if (__wcrtomb(buf, cset->mbchars[i], &state) !=
151 re_set_fastmap(fastmap, icase, *buf);
152 if ((bufp->syntax & 0x4000)
153 && dfa->mb_cur_max > 1) {
159 static bitset_t utf8_sb_map;
160 static reg_errcode_t re_compile_internal(regex_t * preg, const char *pattern,
167 if (dfa->is_utf8 && !(syntax & 0x400000) && preg->translate == 0)
170 unsigned int __s1_len = strlen(((*_nl_current_LC_CTYPE)->values[0].string));
171 unsigned int __s2_len = strlen("UTF-8");
172 int result = (((!((int)((((*_nl_current_LC_CTYPE)->values[0].string)) + 1) - (int)(((*_nl_current_LC_CTYPE)->values[0].string)) == 1) || __s1_len >= 4) && (!((int)(("UTF-8") + 1) - (int)("UTF-8") == 1) || __s2_len >= 4)) ? strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8") : (((int)((((*_nl_current_LC_CTYPE)->values[0].string)) + 1) - (int)(((*_nl_current_LC_CTYPE)->values[0].string)) == 1) && (__s1_len = strlen(((*_nl_current_LC_CTYPE)->values[0].string)), __s1_len < 4) ? (((int)(("UTF-8") + 1) - (int)("UTF-8") == 1) ? strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8") : (((*_nl_current_LC_CTYPE)->values[0].string[0] - 'U'))) : (((int)(("UTF-8") + 1) - (int)("UTF-8") == 1) && (__s2_len < 4) ? (((int)((((*_nl_current_LC_CTYPE)->values[0].string)) + 1) - (int)(((*_nl_current_LC_CTYPE)->values[0].string)) == 1) ? strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8") : (((*_nl_current_LC_CTYPE)->values[0].string[0] - 'U'))) : strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8"))));
173 if (dfa->mb_cur_max == 6 && result
176 dfa->map_notascii = (*_nl_current_LC_CTYPE)->values[0].word != 0;
177 if (dfa->mb_cur_max > 1) {
179 dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
185 i < (256 / (sizeof(unsigned long int) * 8));
187 unsigned int wch = __btowc(ch);
188 if (wch != (0xffffffffu))
189 dfa->sb_char[i] |= 1 << j;
197 for (; node < dfa->nodes_len;)
198 switch (dfa->nodes[node].type) {
200 if (dfa->nodes[node].opr.c >= 0x80)
202 for (; i < (256 / (sizeof(unsigned long int) * 8));)
203 if (dfa->nodes[node].opr.sbcset[i])
206 dfa->has_mb_node = dfa->nbackref > 0 || has_period;
208 static reg_errcode_t analyze(regex_t * preg)
210 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
211 if (dfa->subexp_map != 0) {
213 for (; i < preg->re_nsub;)
214 dfa->subexp_map[i] = i;
215 preorder(dfa->str_tree, optimize_subexps, dfa);
216 for (; i < preg->re_nsub; i++)
217 if (dfa->subexp_map[i] != i)