]>
Commit | Line | Data |
---|---|---|
45516216 | 1 | struct locale_data { |
2 | unsigned int nstrings; | |
3 | union locale_data_value { | |
4 | char *string; | |
5 | unsigned int word; | |
6 | } | |
7 | values[]; | |
8 | }; | |
9 | extern __thread struct locale_data **_nl_current_LC_CTYPE; | |
10 | typedef unsigned long int reg_syntax_t; | |
11 | extern reg_syntax_t re_syntax_options; | |
12 | typedef enum { | |
13 | REG_ENOSYS } | |
14 | reg_errcode_t; | |
15 | typedef struct { | |
16 | reg_syntax_t syntax; | |
17 | unsigned char *translate; | |
18 | unsigned int re_nsub; | |
19 | } | |
20 | regex_t; | |
21 | extern __thread void *__libc_tsd_CTYPE_TOLOWER; | |
22 | inline int **__ctype_tolower_loc(void) { | |
23 | union { | |
24 | void **ptr; | |
25 | int **tablep; | |
26 | } | |
27 | u; | |
28 | u.ptr = (&__libc_tsd_CTYPE_TOLOWER); | |
29 | if (*u.tablep == 0) *u.tablep = (*_nl_current_LC_CTYPE)->values[0].string; | |
30 | return u.tablep; | |
31 | } | |
32 | inline int tolower(int __c) { | |
33 | return *__ctype_tolower_loc()[__c]; | |
34 | } | |
35 | typedef struct { | |
36 | } | |
37 | mbstate_t; | |
38 | typedef struct { | |
39 | int nelem; | |
40 | } | |
41 | re_node_set; | |
42 | typedef enum { | |
43 | CHARACTER = 1, COMPLEX_BRACKET = 6 } | |
44 | re_token_type_t; | |
45 | typedef struct { | |
46 | long int *mbchars; | |
47 | int nmbchars; | |
48 | } | |
49 | re_charset_t; | |
50 | typedef struct { | |
51 | union { | |
52 | unsigned char c; | |
53 | unsigned long int *sbcset; | |
54 | } | |
55 | opr; | |
56 | re_token_type_t type; | |
57 | } | |
58 | re_token_t; | |
59 | typedef struct re_dfa_t re_dfa_t; | |
60 | typedef struct { | |
61 | re_node_set nodes; | |
62 | } | |
63 | re_dfastate_t; | |
64 | struct re_dfa_t { | |
65 | re_token_t *nodes; | |
66 | unsigned int nodes_len; | |
67 | re_dfastate_t *init_state; | |
68 | unsigned long int *sb_char; | |
69 | int nbackref; | |
70 | unsigned int has_mb_node:1; | |
71 | unsigned int is_utf8:1; | |
72 | unsigned int map_notascii:1; | |
73 | int mb_cur_max; | |
74 | int *subexp_map; | |
75 | }; | |
76 | static inline void re_set_fastmap(char *fastmap, int icase, int ch) { | |
77 | if (icase) { | |
78 | int __res; | |
79 | if (sizeof(ch) > 1) { | |
80 | if (ch) { | |
81 | } | |
82 | else __res = tolower(ch); | |
83 | } | |
84 | fastmap[__res] = 1; | |
85 | } | |
86 | } | |
87 | static void re_compile_fastmap_iter(regex_t * bufp, re_dfastate_t * init_state, char *fastmap) { | |
88 | re_dfa_t *dfa; | |
89 | int node_cnt; | |
90 | int icase; | |
91 | for (node_cnt = 0; | |
92 | node_cnt < init_state->nodes.nelem; | |
93 | ++node_cnt) { | |
94 | re_token_type_t type; | |
95 | if (type == CHARACTER) { | |
96 | if ((bufp->syntax & 0x40000) && dfa->mb_cur_max > 1) { | |
97 | void *buf = alloca(dfa->mb_cur_max); | |
98 | } | |
99 | int i; | |
100 | int ch; | |
101 | for (; | |
102 | i < (256 / (sizeof(unsigned long int) * 8)); | |
103 | ) { | |
104 | int j; | |
105 | int w; | |
106 | for (; | |
107 | j < 8; | |
108 | ) if (w & 1) re_set_fastmap(fastmap, icase, ch); | |
109 | } | |
110 | } | |
111 | else if (type == COMPLEX_BRACKET) { | |
112 | int i; | |
113 | re_charset_t *cset; | |
114 | for (; | |
115 | i < cset->nmbchars; | |
116 | ) { | |
117 | char buf[256]; | |
118 | mbstate_t state; | |
119 | if (__wcrtomb(buf, cset->mbchars[i], &state) != -1) re_set_fastmap(fastmap, icase, *buf); | |
120 | } | |
121 | } | |
122 | } | |
123 | } | |
124 | static unsigned long int *utf8_sb_map; | |
125 | static reg_errcode_t re_compile_internal(regex_t * preg, const char *pattern, unsigned int length, reg_syntax_t syntax) { | |
126 | re_dfa_t *dfa; | |
127 | if (dfa->is_utf8 && !(syntax & 0x400000) && preg->translate == 0) optimize_utf8(dfa); | |
128 | int result; | |
129 | if (strlen((*_nl_current_LC_CTYPE)->values[0].string) < 4) result = (*_nl_current_LC_CTYPE)->values[0].string[0] - 'U'; | |
130 | else result = strcmp((*_nl_current_LC_CTYPE)->values[0].string, "UTF-8"); | |
131 | if (dfa->mb_cur_max == 6 && result == 0) dfa->is_utf8 = 1; | |
132 | if (dfa->mb_cur_max > 1) { | |
133 | if (dfa->is_utf8) dfa->sb_char = utf8_sb_map; | |
134 | else { | |
135 | int i; | |
136 | for (; | |
137 | i < (256 / (sizeof(unsigned long int) * 8)); | |
138 | ) { | |
139 | } | |
140 | } | |
141 | } | |
142 | int node; | |
143 | int i; | |
144 | int mb_chars; | |
145 | int has_period; | |
146 | for (; | |
147 | node < dfa->nodes_len; | |
148 | ) switch (dfa->nodes[node].type) { | |
149 | case CHARACTER: if (dfa->nodes[node].opr.c >= 0x80) mb_chars = 1; | |
150 | for (; | |
151 | i < (256 / (sizeof(unsigned long int) * 8)); | |
152 | ) if (dfa->nodes[node].opr.sbcset[i]) return; | |
153 | } | |
154 | dfa->has_mb_node = dfa->nbackref > 0 || has_period; | |
155 | if (dfa->subexp_map != 0) { | |
156 | for (; | |
157 | i < preg->re_nsub; | |
158 | i++) if (dfa->subexp_map[i] != i) break; | |
159 | } | |
160 | } | |
161 | const char *__re_compile_pattern(const char *pattern, unsigned int length, regex_t * bufp) { | |
162 | reg_errcode_t ret = re_compile_internal(bufp, pattern, length, re_syntax_options); | |
163 | } | |
164 | void __re_compile_fastmap(regex_t * bufp) { | |
165 | re_dfa_t *dfa; | |
166 | char *fastmap; | |
167 | re_compile_fastmap_iter(bufp, dfa->init_state, fastmap); | |
168 | } |