struct locale_data { unsigned int nstrings; union locale_data_value { const char *string; unsigned int word; } values[]; }; extern __thread struct locale_data *const *_nl_current_LC_CTYPE __attribute__ ((tls_model("initial-exec"))); typedef unsigned long int reg_syntax_t; extern reg_syntax_t re_syntax_options; typedef enum { REG_ENOSYS = -1, REG_NOERROR = 0, REG_NOMATCH, REG_BADPAT, REG_ECOLLATE, REG_ECTYPE, REG_EESCAPE, REG_ESUBREG, REG_EBRACK, REG_EPAREN, REG_EBRACE, REG_BADBR, REG_ERANGE, REG_ESPACE, REG_BADRPT, REG_EEND, REG_ESIZE, REG_ERPAREN } reg_errcode_t; typedef struct { unsigned char *buffer; reg_syntax_t syntax; char *fastmap; unsigned char *translate; unsigned int re_nsub; unsigned fastmap_accurate:1; unsigned not_bol:1; unsigned not_eol:1; } regex_t; extern __thread void *__libc_tsd_CTYPE_TOLOWER __attribute__ ((tls_model("initial-exec"))); extern inline const int ** __attribute__ ((const))__ctype_tolower_loc(void) { union { void **ptr; const int **tablep; } u; u.ptr = (&__libc_tsd_CTYPE_TOLOWER); if (__builtin_expect(*u.tablep == 0, 0)) *u.tablep = ((int *)((*_nl_current_LC_CTYPE)-> values[0]. string) + 128); return u.tablep; } extern __inline int tolower(int __c) { return __c >= -128 && __c < 256 ? (*__ctype_tolower_loc())[__c] : __c; } typedef struct { } mbstate_t; typedef unsigned long int *bitset_t; typedef unsigned long int *re_bitset_ptr_t; typedef struct { int nelem; int *elems; } re_node_set; typedef enum { NON_TYPE = 0, CHARACTER = 1, END_OF_RE = 2, SIMPLE_BRACKET = 3, OP_BACK_REF = 4, OP_PERIOD = 5, COMPLEX_BRACKET = 6, OP_UTF8_PERIOD = 7, OP_OPEN_SUBEXP = 8 | 0, OP_CLOSE_SUBEXP = 8 | 1, OP_ALT = 8 | 2, OP_DUP_ASTERISK = 8 | 3, ANCHOR = 8 | 4, CONCAT = 16, SUBEXP = 17, OP_DUP_PLUS = 18, OP_DUP_QUESTION, OP_OPEN_BRACKET, OP_CLOSE_BRACKET, OP_CHARSET_RANGE, OP_OPEN_DUP_NUM, OP_CLOSE_DUP_NUM, OP_NON_MATCH_LIST, OP_OPEN_COLL_ELEM, OP_CLOSE_COLL_ELEM, OP_OPEN_EQUIV_CLASS, OP_CLOSE_EQUIV_CLASS, OP_OPEN_CHAR_CLASS, OP_CLOSE_CHAR_CLASS, OP_WORD, OP_NOTWORD, OP_SPACE, OP_NOTSPACE, BACK_SLASH } re_token_type_t; typedef struct { long int *mbchars; int nmbchars; } re_charset_t; typedef struct { union { unsigned char c; re_bitset_ptr_t sbcset; re_charset_t *mbcset; int idx; } opr; re_token_type_t type:8; } re_token_t; typedef struct re_dfa_t re_dfa_t; typedef struct bin_tree_t bin_tree_t; struct re_dfastate_t { re_node_set nodes; }; typedef struct re_dfastate_t re_dfastate_t; struct re_dfa_t { re_token_t *nodes; unsigned int nodes_len; int *nexts; int *org_indices; re_node_set *edests; re_node_set *eclosures; re_dfastate_t *init_state; bin_tree_t *str_tree; re_bitset_ptr_t sb_char; int nbackref; unsigned int has_mb_node:1; unsigned int is_utf8:1; unsigned int map_notascii:1; int mb_cur_max; int *subexp_map; }; static reg_errcode_t re_compile_internal(regex_t * preg, const char *pattern, unsigned int length, reg_syntax_t syntax); static void re_compile_fastmap_iter(regex_t * bufp, const re_dfastate_t * init_state, char *fastmap); static reg_errcode_t analyze(regex_t * preg); static reg_errcode_t optimize_subexps(void *extra, bin_tree_t * node); const char *__re_compile_pattern(pattern, length, bufp) const char *pattern; { reg_errcode_t ret; ret = re_compile_internal(bufp, pattern, length, re_syntax_options); } int __re_compile_fastmap(bufp) regex_t *bufp; { re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; char *fastmap = bufp->fastmap; re_compile_fastmap_iter(bufp, dfa->init_state, fastmap); } static inline void __attribute__ ((always_inline)) re_set_fastmap(char *fastmap, int icase, int ch) { if (icase) { int __res; if (sizeof(ch) > 1) { if (__builtin_constant_p(ch)) { } else __res = tolower(ch); } fastmap[__res] = 1; } } static void re_compile_fastmap_iter(regex_t * bufp, const re_dfastate_t * init_state, char *fastmap) { re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; int node_cnt; int icase = (dfa->mb_cur_max == 1 && (bufp-> syntax & 0x40000)); for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) { int node = init_state->nodes.elems[node_cnt]; re_token_type_t type = dfa->nodes[node].type; if (type == CHARACTER) { if ((bufp-> syntax & 0x40000) && dfa->mb_cur_max > 1) { unsigned char *buf = __builtin_alloca(dfa->mb_cur_max); } int i; int ch; for (i = 0, ch = 0; i < (256 / (sizeof(unsigned long int) * 8)); ++i) { int j; unsigned long int w = dfa->nodes[node].opr.sbcset[i]; for (j = 0; j < (sizeof(unsigned long int) * 8); ++j, ++ch) if (w & ((unsigned long int)1 << j)) re_set_fastmap(fastmap, icase, ch); } } else if (type == COMPLEX_BRACKET) { int i; re_charset_t *cset = dfa->nodes[node].opr.mbcset; for (i = 0; i < cset->nmbchars; ++i) { char buf[256]; mbstate_t state; if (__wcrtomb(buf, cset->mbchars[i], &state) != (unsigned int)-1) re_set_fastmap(fastmap, icase, *(unsigned char *)buf); if ((bufp-> syntax & 0x4000) && dfa->mb_cur_max > 1) { if (__wcrtomb (buf, towlower(cset->mbchars[i]), &state) != (unsigned int)-1) re_set_fastmap(fastmap, 0, *(unsigned char *)buf); } } } } } static const bitset_t utf8_sb_map; static reg_errcode_t re_compile_internal(regex_t * preg, const char *pattern, unsigned int length, reg_syntax_t syntax) { reg_errcode_t err; re_dfa_t *dfa; preg->fastmap_accurate = 0; preg->not_bol = preg->not_eol = 0; err = analyze(preg); if (dfa->is_utf8 && !(syntax & 0x400000) && preg->translate == 0) optimize_utf8(dfa); if (dfa->mb_cur_max == 6 && __extension__( { unsigned int __s1_len; unsigned int __s2_len; (__builtin_constant_p (((*_nl_current_LC_CTYPE)-> values[0].string)) && __builtin_constant_p("UTF-8") && (__s1_len = strlen(((*_nl_current_LC_CTYPE)->values[ 0 ].string)), __s2_len = strlen("UTF-8"), (!((unsigned int)(const void *)((((*_nl_current_LC_CTYPE)->values[ 0].string)) + 1) - (unsigned int)(const void *)(((*_nl_current_LC_CTYPE)->values[ 0 ].string)) == 1) || __s1_len >= 4) && (!((unsigned int)(const void *)(("UTF-8") + 1) - (unsigned int)(const void *)("UTF-8") == 1) || __s2_len >= 4)) ? __builtin_strcmp(((*_nl_current_LC_CTYPE)->values[ 0 ].string), "UTF-8") : (__builtin_constant_p(((*_nl_current_LC_CTYPE)->values[ 0 ].string)) && ((unsigned int)(const void *)((((*_nl_current_LC_CTYPE)->values[ 0].string)) + 1) - (unsigned int)(const void *)(((*_nl_current_LC_CTYPE)->values[ 0].string)) == 1) && (__s1_len = strlen(((*_nl_current_LC_CTYPE)->values[ 0].string)), __s1_len < 4) ? (__builtin_constant_p("UTF-8") && ((unsigned int)(const void *)(("UTF-8") + 1) - (unsigned int)(const void *)("UTF-8") == 1) ? __builtin_strcmp(((*_nl_current_LC_CTYPE)->values[ 0 ].string), "UTF-8") : (__extension__( { __const unsigned char *__s2 = (__const unsigned char *) (__const char *) ("UTF-8"); register int __result = (((__const unsigned char *)(__const char *)(((*_nl_current_LC_CTYPE)->values[ 0 ].string)))[0] - __s2[0]); __result;} ))): (__builtin_constant_p("UTF-8") && ((unsigned int)(const void *)(("UTF-8") + 1) - (unsigned int)(const void *)("UTF-8") == 1) && (__s2_len = strlen("UTF-8"), __s2_len < 4) ? (__builtin_constant_p(((*_nl_current_LC_CTYPE)->values[ 0].string)) && ((unsigned int)(const void *)((((*_nl_current_LC_CTYPE)->values[ 0].string)) + 1) - (unsigned int)(const void *)(((*_nl_current_LC_CTYPE)->values[ 0].string)) == 1) ? __builtin_strcmp(((*_nl_current_LC_CTYPE)->values[ 0].string), "UTF-8") : (__extension__( { __const unsigned char *__s1 = (__const unsigned char *) (__const char *) (((*_nl_current_LC_CTYPE)->values[ 0 ].string)); register int __result = __s1[0] - ((__const unsigned char *)(__const char *)("UTF-8"))[0]; __result;} ))): __builtin_strcmp(((*_nl_current_LC_CTYPE)->values[ 0 ].string), "UTF-8"))));} ) == 0) dfa->is_utf8 = 1; dfa->map_notascii = (((unsigned int)(*_nl_current_LC_CTYPE)-> values[ 0 ].word) != 0); if (dfa->mb_cur_max > 1) { if (dfa->is_utf8) dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map; else { int i; int j; int ch; dfa->sb_char = (re_bitset_ptr_t) calloc(sizeof(bitset_t), 1); for (i = 0, ch = 0; i < (256 / (sizeof(unsigned long int) * 8)); ++i) for (j = 0; j < (sizeof(unsigned long int) * 8); ++j, ++ch) { unsigned int wch = __btowc(ch); if (wch != (0xffffffffu)) dfa->sb_char[i] |= (unsigned long int)1 << j; } } } int node; int i; int mb_chars = 0; int has_period = 0; for (node = 0; node < dfa->nodes_len; ++node) switch (dfa->nodes[node].type) { case CHARACTER: if (dfa->nodes[node].opr.c >= 0x80) mb_chars = 1; case ANCHOR: switch (dfa->nodes[node].opr.idx) { } case OP_PERIOD: has_period = 1; for (i = 0x80 / (sizeof(unsigned long int) * 8); i < (256 / (sizeof(unsigned long int) * 8)); ++i) if (dfa->nodes[node].opr.sbcset[i]) return; } dfa->has_mb_node = dfa->nbackref > 0 || has_period; } static reg_errcode_t analyze(regex_t * preg) { re_dfa_t *dfa = (re_dfa_t *) preg->buffer; if (__builtin_expect (dfa->nexts == 0 || dfa->org_indices == 0 || dfa->edests == 0 || dfa->eclosures == 0, 0)) return REG_ESPACE; if (dfa->subexp_map != 0) { int i; for (i = 0; i < preg->re_nsub; i++) dfa->subexp_map[i] = i; preorder(dfa->str_tree, optimize_subexps, dfa); for (i = 0; i < preg->re_nsub; i++) if (dfa->subexp_map[i] != i) break; } }