struct locale_data { unsigned int nstrings; union locale_data_value { char *string; unsigned int word; } values[]; }; extern __thread struct locale_data **_nl_current_LC_CTYPE; typedef unsigned long int reg_syntax_t; extern reg_syntax_t re_syntax_options; typedef enum { REG_ENOSYS } reg_errcode_t; typedef struct { unsigned char *buffer; reg_syntax_t syntax; unsigned char *translate; unsigned int re_nsub; } regex_t; extern __thread void *__libc_tsd_CTYPE_TOLOWER; inline int **__ctype_tolower_loc(void) { union { void **ptr; int **tablep; } u; u.ptr = (&__libc_tsd_CTYPE_TOLOWER); if (*u.tablep == 0) *u.tablep = (*_nl_current_LC_CTYPE)->values[0].string; return u.tablep; } inline int tolower(int __c) { return *__ctype_tolower_loc()[__c]; } typedef struct { } mbstate_t; typedef struct { int nelem; int *elems; } re_node_set; typedef enum { CHARACTER = 1, COMPLEX_BRACKET = 6 } re_token_type_t; typedef struct { long int *mbchars; int nmbchars; } re_charset_t; typedef struct { union { unsigned char c; unsigned long int * sbcset; re_charset_t *mbcset; } opr; re_token_type_t type; } re_token_t; typedef struct re_dfa_t re_dfa_t; typedef struct bin_tree_t bin_tree_t; typedef struct { re_node_set nodes; } re_dfastate_t; struct re_dfa_t { re_token_t *nodes; unsigned int nodes_len; re_dfastate_t *init_state; bin_tree_t *str_tree; unsigned long int * sb_char; int nbackref; unsigned int has_mb_node:1; unsigned int is_utf8:1; unsigned int map_notascii:1; int mb_cur_max; int *subexp_map; }; static reg_errcode_t analyze(regex_t * preg); static reg_errcode_t optimize_subexps(void *extra, bin_tree_t * node); static inline void re_set_fastmap(char *fastmap, int icase, int ch) { if (icase) { int __res; if (sizeof(ch) > 1) { if (ch) { } else __res = tolower(ch); } fastmap[__res] = 1; } } static void re_compile_fastmap_iter(regex_t * bufp, re_dfastate_t * init_state, char *fastmap) { re_dfa_t *dfa; dfa = bufp->buffer; int node_cnt; int icase; icase = (dfa->mb_cur_max == 1 && (bufp->syntax & 0x40000)); for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) { int node; node = init_state->nodes.elems[node_cnt]; re_token_type_t type; type = dfa->nodes[node].type; if (type == CHARACTER) { if ((bufp->syntax & 0x40000) && dfa->mb_cur_max > 1) { void *buf = alloca(dfa->mb_cur_max); } int i; int ch; for (; i < (256 / (sizeof(unsigned long int) * 8));) { int j; int w; w = dfa->nodes[node].opr.sbcset[i]; for (; j < 8;) if (w & 1) re_set_fastmap(fastmap, icase, ch); } } else if (type == COMPLEX_BRACKET) { int i; re_charset_t *cset; cset = dfa->nodes[node].opr.mbcset; for (; i < cset->nmbchars;) { char buf[256]; mbstate_t state; if (__wcrtomb(buf, cset->mbchars[i], &state) != -1) re_set_fastmap(fastmap, icase, *buf); if ((bufp->syntax & 0x4000) && dfa->mb_cur_max > 1) { } } } } } static unsigned long int * utf8_sb_map; static reg_errcode_t re_compile_internal(regex_t * preg, const char *pattern, unsigned int length, reg_syntax_t syntax) { reg_errcode_t err; re_dfa_t *dfa; err = analyze(preg); if (dfa->is_utf8 && !(syntax & 0x400000) && preg->translate == 0) optimize_utf8(dfa); unsigned int __s1_len = strlen((*_nl_current_LC_CTYPE)->values[0].string); int result = (((!((int)((((*_nl_current_LC_CTYPE)->values[0].string))) - (int)(((*_nl_current_LC_CTYPE)->values[0].string)))) && (!((int)(("UTF-8") + 1) - (int)("UTF-8")))) ? strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8") : (((int)((((*_nl_current_LC_CTYPE)->values[0].string)) + 1) - (int)(((*_nl_current_LC_CTYPE)->values[0].string))) && __s1_len < 4 ? (((int)(("UTF-8")) - (int)("UTF-8")) ? strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8") : (((*_nl_current_LC_CTYPE)->values[0]. string[0] - 'U'))) : (((int)(("UTF-8")) - (int)("UTF-8")) ? (((int) ((((*_nl_current_LC_CTYPE)->values[0].string))) - (int)(((*_nl_current_LC_CTYPE)->values[0].string))) ? strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8") : (((*_nl_current_LC_CTYPE)->values[0].string[0] - 'U'))) : strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8")))); if (dfa->mb_cur_max == 6 && result == 0) dfa->is_utf8 = 1; dfa->map_notascii = (*_nl_current_LC_CTYPE)->values[0].word != 0; if (dfa->mb_cur_max > 1) { if (dfa->is_utf8) dfa->sb_char = utf8_sb_map; else { int i; int j; int ch; for (; i < (256 / (sizeof(unsigned long int) * 8)); ++j, ++ch) { unsigned int wch; wch = __btowc(ch); if (wch != (0xffffffffu)) dfa->sb_char[i] |= 1 << j; } } } int node; int i; int mb_chars; int has_period; for (; node < dfa->nodes_len;) switch (dfa->nodes[node].type) { case CHARACTER: if (dfa->nodes[node].opr.c >= 0x80) mb_chars = 1; for (; i < (256 / (sizeof(unsigned long int) * 8));) if (dfa->nodes[node].opr.sbcset[i]) return; } dfa->has_mb_node = dfa->nbackref > 0 || has_period; } static reg_errcode_t analyze(regex_t * preg) { re_dfa_t *dfa; dfa = preg->buffer; if (dfa->subexp_map != 0) { int i; for (; i < preg->re_nsub;) dfa->subexp_map[i] = i; preorder(dfa->str_tree, optimize_subexps, dfa); for (; i < preg->re_nsub; i++) if (dfa->subexp_map[i] != i) break; } } const char *__re_compile_pattern(const char *pattern, unsigned int length, regex_t * bufp) { reg_errcode_t ret = re_compile_internal(bufp, pattern, length, re_syntax_options); } void __re_compile_fastmap(regex_t * bufp) { re_dfa_t *dfa; char *fastmap; re_compile_fastmap_iter(bufp, dfa->init_state, fastmap); }