]>
Commit | Line | Data |
---|---|---|
45516216 | 1 | struct locale_data { |
2 | unsigned int nstrings; | |
3 | union locale_data_value { | |
4 | char *string; | |
5 | unsigned int word; | |
6 | } values[]; | |
7 | }; | |
8 | extern __thread struct locale_data **_nl_current_LC_CTYPE; | |
9 | typedef unsigned long int reg_syntax_t; | |
10 | extern reg_syntax_t re_syntax_options; | |
11 | typedef enum { | |
12 | REG_ENOSYS | |
13 | } reg_errcode_t; | |
14 | typedef struct { | |
15 | unsigned char *buffer; | |
16 | reg_syntax_t syntax; | |
17 | char *fastmap; | |
18 | unsigned char *translate; | |
19 | unsigned int re_nsub; | |
20 | } regex_t; | |
21 | extern __thread void *__libc_tsd_CTYPE_TOLOWER; | |
22 | inline int **__ctype_tolower_loc(void) | |
23 | { | |
24 | union { | |
25 | void **ptr; | |
26 | int **tablep; | |
27 | } u; | |
28 | u.ptr = (&__libc_tsd_CTYPE_TOLOWER); | |
29 | if (*u.tablep == 0) | |
30 | *u.tablep = | |
31 | ((int *)((*_nl_current_LC_CTYPE)->values[0].string) + 128); | |
32 | return u.tablep; | |
33 | } | |
34 | inline int tolower(int __c) | |
35 | { | |
36 | return *__ctype_tolower_loc()[__c]; | |
37 | } | |
38 | typedef struct { | |
39 | } mbstate_t; | |
40 | typedef unsigned long int *bitset_t; | |
41 | typedef unsigned long int *re_bitset_ptr_t; | |
42 | typedef struct { | |
43 | int nelem; | |
44 | int *elems; | |
45 | } re_node_set; | |
46 | typedef enum { | |
47 | CHARACTER = 1, | |
48 | COMPLEX_BRACKET = 6 | |
49 | } re_token_type_t; | |
50 | typedef struct { | |
51 | long int *mbchars; | |
52 | int nmbchars; | |
53 | } re_charset_t; | |
54 | typedef struct { | |
55 | union { | |
56 | unsigned char c; | |
57 | re_bitset_ptr_t sbcset; | |
58 | re_charset_t *mbcset; | |
59 | } opr; | |
60 | re_token_type_t type; | |
61 | } re_token_t; | |
62 | typedef struct re_dfa_t re_dfa_t; | |
63 | typedef struct bin_tree_t bin_tree_t; | |
64 | struct re_dfastate_t { | |
65 | re_node_set nodes; | |
66 | }; | |
67 | typedef struct re_dfastate_t re_dfastate_t; | |
68 | struct re_dfa_t { | |
69 | re_token_t *nodes; | |
70 | unsigned int nodes_len; | |
71 | re_dfastate_t *init_state; | |
72 | bin_tree_t *str_tree; | |
73 | re_bitset_ptr_t sb_char; | |
74 | int nbackref; | |
75 | unsigned int has_mb_node:1; | |
76 | unsigned int is_utf8:1; | |
77 | unsigned int map_notascii:1; | |
78 | int mb_cur_max; | |
79 | int *subexp_map; | |
80 | }; | |
81 | static reg_errcode_t analyze(regex_t * preg); | |
82 | static reg_errcode_t optimize_subexps(void *extra, bin_tree_t * node); | |
83 | ||
84 | static inline void re_set_fastmap(char *fastmap, int icase, int ch) | |
85 | { | |
86 | if (icase) { | |
87 | int __res; | |
88 | if (sizeof(ch) > 1) { | |
89 | if (ch) { | |
90 | } else | |
91 | __res = tolower(ch); | |
92 | } | |
93 | fastmap[__res] = 1; | |
94 | } | |
95 | } | |
96 | static void re_compile_fastmap_iter(regex_t * bufp, | |
97 | re_dfastate_t * init_state, | |
98 | char *fastmap) | |
99 | { | |
100 | re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; | |
101 | int node_cnt; | |
102 | int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & 0x40000)); | |
103 | for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) { | |
104 | int node = init_state->nodes.elems[node_cnt]; | |
105 | re_token_type_t type = dfa->nodes[node].type; | |
106 | if (type == CHARACTER) { | |
107 | if ((bufp->syntax & 0x40000) && dfa->mb_cur_max > 1) { | |
108 | void *buf = alloca(dfa->mb_cur_max); | |
109 | } | |
110 | int i; | |
111 | int ch; | |
112 | for (; i < (256 / (sizeof(unsigned long int) * 8));) { | |
113 | int j; | |
114 | unsigned long int w = | |
115 | dfa->nodes[node].opr.sbcset[i]; | |
116 | for (; j < 8;) | |
117 | if (w & 1) | |
118 | re_set_fastmap(fastmap, icase, | |
119 | ch); | |
120 | } | |
121 | } else if (type == COMPLEX_BRACKET) { | |
122 | int i; | |
123 | re_charset_t *cset = dfa->nodes[node].opr.mbcset; | |
124 | for (; i < cset->nmbchars;) { | |
125 | char buf[256]; | |
126 | mbstate_t state; | |
127 | if (__wcrtomb(buf, cset->mbchars[i], &state) != | |
128 | -1) | |
129 | re_set_fastmap(fastmap, icase, *buf); | |
130 | if ((bufp->syntax & 0x4000) | |
131 | && dfa->mb_cur_max > 1) { | |
132 | } | |
133 | } | |
134 | } | |
135 | } | |
136 | } | |
137 | static bitset_t utf8_sb_map; | |
138 | static reg_errcode_t re_compile_internal(regex_t * preg, const char *pattern, | |
139 | unsigned int length, | |
140 | reg_syntax_t syntax) | |
141 | { | |
142 | reg_errcode_t err; | |
143 | re_dfa_t *dfa; | |
144 | err = analyze(preg); | |
145 | if (dfa->is_utf8 && !(syntax & 0x400000) && preg->translate == 0) | |
146 | optimize_utf8(dfa); | |
147 | unsigned int __s1_len = | |
148 | strlen((*_nl_current_LC_CTYPE)->values[0].string); | |
149 | int result = | |
150 | (((!((int)((((*_nl_current_LC_CTYPE)->values[0].string)) ) - | |
151 | (int)(((*_nl_current_LC_CTYPE)->values[0].string)) ) | |
152 | ||
153 | ) | |
154 | && (!((int)(("UTF-8") + 1) - (int)("UTF-8")) | |
155 | )) ? strcmp(((*_nl_current_LC_CTYPE)->values[0].string), | |
156 | "UTF-8") | |
157 | : (((int)((((*_nl_current_LC_CTYPE)->values[0].string)) + 1) - | |
158 | (int)(((*_nl_current_LC_CTYPE)->values[0].string)) ) | |
159 | && | |
160 | __s1_len < 4 | |
161 | ? (((int)(("UTF-8") ) - (int)("UTF-8") | |
162 | ) ? strcmp(((*_nl_current_LC_CTYPE)-> | |
163 | values[0].string), | |
164 | "UTF-8") | |
165 | : (((*_nl_current_LC_CTYPE)->values[0]. | |
166 | string[0] - | |
167 | 'U'))) : (((int)(("UTF-8") ) - | |
168 | (int)("UTF-8") ) | |
169 | ? (((int) | |
170 | ((((*_nl_current_LC_CTYPE)->values[0].string)) ) - (int)(((*_nl_current_LC_CTYPE)->values[0].string)) | |
171 | ) ? strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8") : (((*_nl_current_LC_CTYPE)->values[0].string[0] - 'U'))) : strcmp(((*_nl_current_LC_CTYPE)->values[0].string), "UTF-8")))); | |
172 | if (dfa->mb_cur_max == 6 && result == 0) | |
173 | dfa->is_utf8 = 1; | |
174 | dfa->map_notascii = (*_nl_current_LC_CTYPE)->values[0].word != 0; | |
175 | if (dfa->mb_cur_max > 1) { | |
176 | if (dfa->is_utf8) | |
177 | dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map; | |
178 | else { | |
179 | int i; | |
180 | int j; | |
181 | int ch; | |
182 | for (; | |
183 | i < (256 / (sizeof(unsigned long int) * 8)); | |
184 | ++j, ++ch) { | |
185 | unsigned int wch = __btowc(ch); | |
186 | if (wch != (0xffffffffu)) | |
187 | dfa->sb_char[i] |= 1 << j; | |
188 | } | |
189 | } | |
190 | } | |
191 | int node; | |
192 | int i; | |
193 | int mb_chars; | |
194 | int has_period; | |
195 | for (; node < dfa->nodes_len;) | |
196 | switch (dfa->nodes[node].type) { | |
197 | case CHARACTER: | |
198 | if (dfa->nodes[node].opr.c >= 0x80) | |
199 | mb_chars = 1; | |
200 | for (; i < (256 / (sizeof(unsigned long int) * 8));) | |
201 | if (dfa->nodes[node].opr.sbcset[i]) | |
202 | return; | |
203 | } | |
204 | dfa->has_mb_node = dfa->nbackref > 0 || has_period; | |
205 | } | |
206 | static reg_errcode_t analyze(regex_t * preg) | |
207 | { | |
208 | re_dfa_t *dfa = (re_dfa_t *) preg->buffer; | |
209 | if (dfa->subexp_map != 0) { | |
210 | int i; | |
211 | for (; i < preg->re_nsub;) | |
212 | dfa->subexp_map[i] = i; | |
213 | preorder(dfa->str_tree, optimize_subexps, dfa); | |
214 | for (; i < preg->re_nsub; i++) | |
215 | if (dfa->subexp_map[i] != i) | |
216 | break; | |
217 | } | |
218 | } | |
219 | const char *__re_compile_pattern(const char *pattern, unsigned int length, regex_t *bufp) | |
220 | { | |
221 | reg_errcode_t ret = re_compile_internal(bufp, pattern, length, re_syntax_options); | |
222 | } | |
223 | void __re_compile_fastmap(regex_t *bufp) | |
224 | { | |
225 | re_dfa_t *dfa; | |
226 | dfa = (re_dfa_t *) bufp->buffer; | |
227 | char *fastmap; | |
228 | fastmap = bufp->fastmap; | |
229 | re_compile_fastmap_iter(bufp, dfa->init_state, fastmap); | |
230 | } |