]>
Commit | Line | Data |
---|---|---|
192296d8 KP |
1 | /* |
2 | * $Id$ | |
3 | * | |
4 | * Copyright © 2004 Keith Packard | |
5 | * | |
6 | * Permission to use, copy, modify, distribute, and sell this software and its | |
7 | * documentation for any purpose is hereby granted without fee, provided that | |
8 | * the above copyright notice appear in all copies and that both that | |
9 | * copyright notice and this permission notice appear in supporting | |
10 | * documentation, and that the name of Keith Packard not be used in | |
11 | * advertising or publicity pertaining to distribution of the software without | |
12 | * specific, written prior permission. Keith Packard makes no | |
13 | * representations about the suitability of this software for any purpose. It | |
14 | * is provided "as is" without express or implied warranty. | |
15 | * | |
16 | * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, | |
17 | * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO | |
18 | * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR | |
19 | * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, | |
20 | * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER | |
21 | * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | |
22 | * PERFORMANCE OF THIS SOFTWARE. | |
23 | */ | |
24 | ||
25 | #include "fcint.h" | |
26 | #include <ctype.h> | |
27 | ||
28 | #define MAX_OUT 32 | |
29 | #define MAX_LINE 8192 | |
30 | ||
c003f5ae PL |
31 | /* stub definitions for declarations from fcint.h.. */ |
32 | int * _fcBankId = 0, * _fcBankIdx = 0; | |
33 | ||
34 | int | |
35 | FcCacheBankToIndexMTF (int bank) | |
36 | { | |
37 | return 0; | |
38 | } | |
39 | /* end stub definitions */ | |
40 | ||
192296d8 KP |
41 | typedef enum _caseFoldClass { CaseFoldCommon, CaseFoldFull, CaseFoldSimple, CaseFoldTurkic } CaseFoldClass; |
42 | ||
43 | typedef struct _caseFoldClassMap { | |
67accef4 | 44 | const char *name; |
192296d8 KP |
45 | CaseFoldClass class; |
46 | } CaseFoldClassMap; | |
47 | ||
67accef4 | 48 | static const CaseFoldClassMap caseFoldClassMap[] = { |
192296d8 KP |
49 | { "C", CaseFoldCommon }, |
50 | { "F", CaseFoldFull }, | |
51 | { "S", CaseFoldSimple }, | |
52 | { "T", CaseFoldTurkic }, | |
53 | { 0, 0 } | |
54 | }; | |
55 | ||
56 | typedef struct _caseFoldRaw { | |
57 | FcChar32 upper; | |
58 | CaseFoldClass class; | |
59 | int nout; | |
60 | FcChar32 lower[MAX_OUT]; | |
61 | } CaseFoldRaw; | |
62 | ||
63 | static void | |
67accef4 | 64 | panic (const char *reason) |
192296d8 KP |
65 | { |
66 | fprintf (stderr, "fc-case: panic %s\n", reason); | |
67 | exit (1); | |
68 | } | |
69 | ||
70 | int maxExpand; | |
71 | static FcCaseFold *folds; | |
72 | int nfolds; | |
73 | ||
74 | static FcCaseFold * | |
75 | addFold (void) | |
76 | { | |
77 | if (folds) | |
78 | folds = realloc (folds, (nfolds + 1) * sizeof (FcCaseFold)); | |
79 | else | |
80 | folds = malloc (sizeof (FcCaseFold)); | |
81 | if (!folds) | |
82 | panic ("out of memory"); | |
83 | return &folds[nfolds++]; | |
84 | } | |
85 | ||
86 | static int | |
87 | ucs4_to_utf8 (FcChar32 ucs4, | |
88 | FcChar8 dest[FC_UTF8_MAX_LEN]) | |
89 | { | |
90 | int bits; | |
91 | FcChar8 *d = dest; | |
92 | ||
93 | if (ucs4 < 0x80) { *d++= ucs4; bits= -6; } | |
94 | else if (ucs4 < 0x800) { *d++= ((ucs4 >> 6) & 0x1F) | 0xC0; bits= 0; } | |
95 | else if (ucs4 < 0x10000) { *d++= ((ucs4 >> 12) & 0x0F) | 0xE0; bits= 6; } | |
96 | else if (ucs4 < 0x200000) { *d++= ((ucs4 >> 18) & 0x07) | 0xF0; bits= 12; } | |
97 | else if (ucs4 < 0x4000000) { *d++= ((ucs4 >> 24) & 0x03) | 0xF8; bits= 18; } | |
98 | else if (ucs4 < 0x80000000) { *d++= ((ucs4 >> 30) & 0x01) | 0xFC; bits= 24; } | |
99 | else return 0; | |
100 | ||
101 | for ( ; bits >= 0; bits-= 6) { | |
102 | *d++= ((ucs4 >> bits) & 0x3F) | 0x80; | |
103 | } | |
104 | return d - dest; | |
105 | } | |
106 | ||
107 | static int | |
108 | utf8_size (FcChar32 ucs4) | |
109 | { | |
110 | FcChar8 utf8[FC_UTF8_MAX_LEN]; | |
111 | return ucs4_to_utf8 (ucs4, utf8 ); | |
112 | } | |
113 | ||
114 | static FcChar8 *foldChars; | |
115 | int nfoldChars; | |
116 | int maxFoldChars; | |
117 | FcChar32 minFoldChar; | |
118 | FcChar32 maxFoldChar; | |
119 | ||
120 | static void | |
121 | addChar (FcChar32 c) | |
122 | { | |
123 | FcChar8 utf8[FC_UTF8_MAX_LEN]; | |
124 | int len; | |
125 | int i; | |
126 | ||
127 | len = ucs4_to_utf8 (c, utf8); | |
128 | if (foldChars) | |
129 | foldChars = realloc (foldChars, (nfoldChars + len) * sizeof (FcChar8)); | |
130 | else | |
131 | foldChars = malloc (sizeof (FcChar8) * len); | |
132 | if (!foldChars) | |
133 | panic ("out of memory"); | |
134 | for (i = 0; i < len; i++) | |
135 | foldChars[nfoldChars + i] = utf8[i]; | |
136 | nfoldChars += len; | |
137 | } | |
138 | ||
139 | static int | |
140 | foldExtends (FcCaseFold *fold, CaseFoldRaw *raw) | |
141 | { | |
142 | switch (fold->method) { | |
143 | case FC_CASE_FOLD_RANGE: | |
144 | if ((short) (raw->lower[0] - raw->upper) != fold->offset) | |
145 | return 0; | |
146 | if (raw->upper != fold->upper + fold->count) | |
147 | return 0; | |
148 | return 1; | |
149 | case FC_CASE_FOLD_EVEN_ODD: | |
150 | if ((short) (raw->lower[0] - raw->upper) != 1) | |
151 | return 0; | |
152 | if (raw->upper != fold->upper + fold->count + 1) | |
153 | return 0; | |
154 | return 1; | |
155 | case FC_CASE_FOLD_FULL: | |
156 | break; | |
157 | } | |
158 | return 0; | |
159 | } | |
160 | ||
67accef4 | 161 | static const char * |
192296d8 KP |
162 | case_fold_method_name (FcChar16 method) |
163 | { | |
164 | switch (method) { | |
165 | case FC_CASE_FOLD_RANGE: return "FC_CASE_FOLD_RANGE,"; | |
166 | case FC_CASE_FOLD_EVEN_ODD: return "FC_CASE_FOLD_EVEN_ODD,"; | |
167 | case FC_CASE_FOLD_FULL: return "FC_CASE_FOLD_FULL,"; | |
168 | default: return "unknown"; | |
169 | } | |
170 | } | |
171 | ||
172 | static void | |
173 | dump (void) | |
174 | { | |
175 | int i; | |
176 | ||
177 | printf ( "#define FC_NUM_CASE_FOLD %d\n", nfolds); | |
178 | printf ( "#define FC_NUM_CASE_FOLD_CHARS %d\n", nfoldChars); | |
179 | printf ( "#define FC_MAX_CASE_FOLD_CHARS %d\n", maxFoldChars); | |
180 | printf ( "#define FC_MAX_CASE_FOLD_EXPAND %d\n", maxExpand); | |
181 | printf ( "#define FC_MIN_FOLD_CHAR 0x%08x\n", minFoldChar); | |
182 | printf ( "#define FC_MAX_FOLD_CHAR 0x%08x\n", maxFoldChar); | |
183 | printf ( "\n"); | |
184 | ||
185 | /* | |
186 | * Dump out ranges | |
187 | */ | |
188 | printf ("static const FcCaseFold fcCaseFold[FC_NUM_CASE_FOLD] = {\n"); | |
189 | for (i = 0; i < nfolds; i++) | |
190 | { | |
191 | printf (" { 0x%08x, %-22s 0x%04x, %6d },\n", | |
192 | folds[i].upper, case_fold_method_name (folds[i].method), | |
193 | folds[i].count, folds[i].offset); | |
194 | } | |
195 | printf ("};\n\n"); | |
196 | ||
197 | /* | |
198 | * Dump out "other" values | |
199 | */ | |
200 | ||
201 | printf ("static const FcChar8 fcCaseFoldChars[FC_NUM_CASE_FOLD_CHARS] = {\n"); | |
202 | for (i = 0; i < nfoldChars; i++) | |
203 | { | |
204 | printf ("0x%02x", foldChars[i]); | |
205 | if (i != nfoldChars - 1) | |
206 | { | |
207 | if ((i & 0xf) == 0xf) | |
208 | printf (",\n"); | |
209 | else | |
210 | printf (","); | |
211 | } | |
212 | } | |
213 | printf ("\n};\n"); | |
214 | } | |
215 | ||
216 | /* | |
217 | * Read the standard Unicode CaseFolding.txt file | |
218 | */ | |
219 | #define SEP "; \t\n" | |
220 | ||
221 | static int | |
222 | parseRaw (char *line, CaseFoldRaw *raw) | |
223 | { | |
224 | char *tok, *end; | |
225 | int i; | |
226 | ||
227 | if (!isxdigit (line[0])) | |
228 | return 0; | |
229 | /* | |
230 | * Get upper case value | |
231 | */ | |
232 | tok = strtok (line, SEP); | |
233 | if (!tok || tok[0] == '#') | |
234 | return 0; | |
235 | raw->upper = strtol (tok, &end, 16); | |
236 | if (end == tok) | |
237 | return 0; | |
238 | /* | |
239 | * Get class | |
240 | */ | |
241 | tok = strtok (NULL, SEP); | |
242 | if (!tok || tok[0] == '#') | |
243 | return 0; | |
244 | for (i = 0; caseFoldClassMap[i].name; i++) | |
245 | if (!strcmp (tok, caseFoldClassMap[i].name)) | |
246 | { | |
247 | raw->class = caseFoldClassMap[i].class; | |
248 | break; | |
249 | } | |
250 | if (!caseFoldClassMap[i].name) | |
251 | return 0; | |
252 | ||
253 | /* | |
254 | * Get list of result characters | |
255 | */ | |
256 | for (i = 0; i < MAX_OUT; i++) | |
257 | { | |
258 | tok = strtok (NULL, SEP); | |
259 | if (!tok || tok[0] == '#') | |
260 | break; | |
261 | raw->lower[i] = strtol (tok, &end, 16); | |
262 | if (end == tok) | |
263 | break; | |
264 | } | |
265 | if (i == 0) | |
266 | return 0; | |
267 | raw->nout = i; | |
268 | return 1; | |
269 | } | |
270 | ||
271 | static int | |
272 | caseFoldReadRaw (FILE *in, CaseFoldRaw *raw) | |
273 | { | |
274 | char line[MAX_LINE]; | |
275 | ||
276 | for (;;) | |
277 | { | |
278 | if (!fgets (line, sizeof (line) - 1, in)) | |
279 | return 0; | |
280 | if (parseRaw (line, raw)) | |
281 | return 1; | |
282 | } | |
283 | } | |
284 | ||
285 | int | |
286 | main (int argc, char **argv) | |
287 | { | |
288 | FcCaseFold *fold = 0; | |
289 | CaseFoldRaw raw; | |
290 | int i; | |
291 | FILE *caseFile; | |
292 | char line[MAX_LINE]; | |
293 | int expand; | |
294 | ||
295 | if (argc != 2) | |
296 | panic ("usage: fc-case CaseFolding.txt"); | |
297 | caseFile = fopen (argv[1], "r"); | |
298 | if (!caseFile) | |
299 | panic ("can't open case folding file"); | |
300 | ||
301 | while (caseFoldReadRaw (caseFile, &raw)) | |
302 | { | |
303 | if (!minFoldChar) | |
304 | minFoldChar = raw.upper; | |
305 | maxFoldChar = raw.upper; | |
306 | switch (raw.class) { | |
307 | case CaseFoldCommon: | |
308 | case CaseFoldFull: | |
309 | if (raw.nout == 1) | |
310 | { | |
311 | if (fold && foldExtends (fold, &raw)) | |
312 | fold->count = raw.upper - fold->upper + 1; | |
313 | else | |
314 | { | |
315 | fold = addFold (); | |
316 | fold->upper = raw.upper; | |
317 | fold->offset = raw.lower[0] - raw.upper; | |
318 | if (fold->offset == 1) | |
319 | fold->method = FC_CASE_FOLD_EVEN_ODD; | |
320 | else | |
321 | fold->method = FC_CASE_FOLD_RANGE; | |
322 | fold->count = 1; | |
323 | } | |
324 | expand = utf8_size (raw.lower[0]) - utf8_size(raw.upper); | |
325 | } | |
326 | else | |
327 | { | |
328 | fold = addFold (); | |
329 | fold->upper = raw.upper; | |
330 | fold->method = FC_CASE_FOLD_FULL; | |
331 | fold->offset = nfoldChars; | |
332 | for (i = 0; i < raw.nout; i++) | |
333 | addChar (raw.lower[i]); | |
334 | fold->count = nfoldChars - fold->offset; | |
335 | if (fold->count > maxFoldChars) | |
336 | maxFoldChars = fold->count; | |
337 | expand = fold->count - utf8_size (raw.upper); | |
338 | } | |
339 | if (expand > maxExpand) | |
340 | maxExpand = expand; | |
341 | break; | |
342 | case CaseFoldSimple: | |
343 | break; | |
344 | case CaseFoldTurkic: | |
345 | break; | |
346 | } | |
347 | } | |
348 | /* | |
349 | * Scan the input until the marker is found | |
350 | */ | |
351 | ||
352 | while (fgets (line, sizeof (line), stdin)) | |
353 | { | |
354 | if (!strncmp (line, "@@@", 3)) | |
355 | break; | |
356 | fputs (line, stdout); | |
357 | } | |
358 | ||
359 | /* | |
360 | * Dump these tables | |
361 | */ | |
362 | dump (); | |
363 | ||
364 | /* | |
365 | * And flush out the rest of the input file | |
366 | */ | |
367 | ||
368 | while (fgets (line, sizeof (line), stdin)) | |
369 | fputs (line, stdout); | |
370 | ||
371 | fflush (stdout); | |
372 | exit (ferror (stdout)); | |
373 | } |