]>
Commit | Line | Data |
---|---|---|
192296d8 KP |
1 | /* |
2 | * $Id$ | |
3 | * | |
4 | * Copyright © 2004 Keith Packard | |
5 | * | |
6 | * Permission to use, copy, modify, distribute, and sell this software and its | |
7 | * documentation for any purpose is hereby granted without fee, provided that | |
8 | * the above copyright notice appear in all copies and that both that | |
9 | * copyright notice and this permission notice appear in supporting | |
10 | * documentation, and that the name of Keith Packard not be used in | |
11 | * advertising or publicity pertaining to distribution of the software without | |
12 | * specific, written prior permission. Keith Packard makes no | |
13 | * representations about the suitability of this software for any purpose. It | |
14 | * is provided "as is" without express or implied warranty. | |
15 | * | |
16 | * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, | |
17 | * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO | |
18 | * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR | |
19 | * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, | |
20 | * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER | |
21 | * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | |
22 | * PERFORMANCE OF THIS SOFTWARE. | |
23 | */ | |
24 | ||
25 | #include "fcint.h" | |
26 | #include <ctype.h> | |
27 | ||
28 | #define MAX_OUT 32 | |
29 | #define MAX_LINE 8192 | |
30 | ||
c003f5ae PL |
31 | /* stub definitions for declarations from fcint.h.. */ |
32 | int * _fcBankId = 0, * _fcBankIdx = 0; | |
67ed0b72 PL |
33 | FcValueList ** _fcValueLists = 0; |
34 | FcPatternElt ** _fcPatternElts = 0; | |
35 | int FcDebugVal = 0; | |
c003f5ae PL |
36 | |
37 | int | |
38 | FcCacheBankToIndexMTF (int bank) | |
39 | { | |
40 | return 0; | |
41 | } | |
42 | /* end stub definitions */ | |
43 | ||
192296d8 KP |
44 | typedef enum _caseFoldClass { CaseFoldCommon, CaseFoldFull, CaseFoldSimple, CaseFoldTurkic } CaseFoldClass; |
45 | ||
46 | typedef struct _caseFoldClassMap { | |
67accef4 | 47 | const char *name; |
192296d8 KP |
48 | CaseFoldClass class; |
49 | } CaseFoldClassMap; | |
50 | ||
67accef4 | 51 | static const CaseFoldClassMap caseFoldClassMap[] = { |
192296d8 KP |
52 | { "C", CaseFoldCommon }, |
53 | { "F", CaseFoldFull }, | |
54 | { "S", CaseFoldSimple }, | |
55 | { "T", CaseFoldTurkic }, | |
56 | { 0, 0 } | |
57 | }; | |
58 | ||
59 | typedef struct _caseFoldRaw { | |
60 | FcChar32 upper; | |
61 | CaseFoldClass class; | |
62 | int nout; | |
63 | FcChar32 lower[MAX_OUT]; | |
64 | } CaseFoldRaw; | |
65 | ||
66 | static void | |
67accef4 | 67 | panic (const char *reason) |
192296d8 KP |
68 | { |
69 | fprintf (stderr, "fc-case: panic %s\n", reason); | |
70 | exit (1); | |
71 | } | |
72 | ||
73 | int maxExpand; | |
74 | static FcCaseFold *folds; | |
75 | int nfolds; | |
76 | ||
77 | static FcCaseFold * | |
78 | addFold (void) | |
79 | { | |
80 | if (folds) | |
81 | folds = realloc (folds, (nfolds + 1) * sizeof (FcCaseFold)); | |
82 | else | |
83 | folds = malloc (sizeof (FcCaseFold)); | |
84 | if (!folds) | |
85 | panic ("out of memory"); | |
86 | return &folds[nfolds++]; | |
87 | } | |
88 | ||
89 | static int | |
90 | ucs4_to_utf8 (FcChar32 ucs4, | |
91 | FcChar8 dest[FC_UTF8_MAX_LEN]) | |
92 | { | |
93 | int bits; | |
94 | FcChar8 *d = dest; | |
95 | ||
96 | if (ucs4 < 0x80) { *d++= ucs4; bits= -6; } | |
97 | else if (ucs4 < 0x800) { *d++= ((ucs4 >> 6) & 0x1F) | 0xC0; bits= 0; } | |
98 | else if (ucs4 < 0x10000) { *d++= ((ucs4 >> 12) & 0x0F) | 0xE0; bits= 6; } | |
99 | else if (ucs4 < 0x200000) { *d++= ((ucs4 >> 18) & 0x07) | 0xF0; bits= 12; } | |
100 | else if (ucs4 < 0x4000000) { *d++= ((ucs4 >> 24) & 0x03) | 0xF8; bits= 18; } | |
101 | else if (ucs4 < 0x80000000) { *d++= ((ucs4 >> 30) & 0x01) | 0xFC; bits= 24; } | |
102 | else return 0; | |
103 | ||
104 | for ( ; bits >= 0; bits-= 6) { | |
105 | *d++= ((ucs4 >> bits) & 0x3F) | 0x80; | |
106 | } | |
107 | return d - dest; | |
108 | } | |
109 | ||
110 | static int | |
111 | utf8_size (FcChar32 ucs4) | |
112 | { | |
113 | FcChar8 utf8[FC_UTF8_MAX_LEN]; | |
114 | return ucs4_to_utf8 (ucs4, utf8 ); | |
115 | } | |
116 | ||
117 | static FcChar8 *foldChars; | |
0d745819 PL |
118 | static int nfoldChars; |
119 | static int maxFoldChars; | |
120 | static FcChar32 minFoldChar; | |
121 | static FcChar32 maxFoldChar; | |
192296d8 KP |
122 | |
123 | static void | |
124 | addChar (FcChar32 c) | |
125 | { | |
126 | FcChar8 utf8[FC_UTF8_MAX_LEN]; | |
127 | int len; | |
128 | int i; | |
129 | ||
130 | len = ucs4_to_utf8 (c, utf8); | |
131 | if (foldChars) | |
132 | foldChars = realloc (foldChars, (nfoldChars + len) * sizeof (FcChar8)); | |
133 | else | |
134 | foldChars = malloc (sizeof (FcChar8) * len); | |
135 | if (!foldChars) | |
136 | panic ("out of memory"); | |
137 | for (i = 0; i < len; i++) | |
138 | foldChars[nfoldChars + i] = utf8[i]; | |
139 | nfoldChars += len; | |
140 | } | |
141 | ||
142 | static int | |
143 | foldExtends (FcCaseFold *fold, CaseFoldRaw *raw) | |
144 | { | |
145 | switch (fold->method) { | |
146 | case FC_CASE_FOLD_RANGE: | |
147 | if ((short) (raw->lower[0] - raw->upper) != fold->offset) | |
148 | return 0; | |
149 | if (raw->upper != fold->upper + fold->count) | |
150 | return 0; | |
151 | return 1; | |
152 | case FC_CASE_FOLD_EVEN_ODD: | |
153 | if ((short) (raw->lower[0] - raw->upper) != 1) | |
154 | return 0; | |
155 | if (raw->upper != fold->upper + fold->count + 1) | |
156 | return 0; | |
157 | return 1; | |
158 | case FC_CASE_FOLD_FULL: | |
159 | break; | |
160 | } | |
161 | return 0; | |
162 | } | |
163 | ||
67accef4 | 164 | static const char * |
192296d8 KP |
165 | case_fold_method_name (FcChar16 method) |
166 | { | |
167 | switch (method) { | |
168 | case FC_CASE_FOLD_RANGE: return "FC_CASE_FOLD_RANGE,"; | |
169 | case FC_CASE_FOLD_EVEN_ODD: return "FC_CASE_FOLD_EVEN_ODD,"; | |
170 | case FC_CASE_FOLD_FULL: return "FC_CASE_FOLD_FULL,"; | |
171 | default: return "unknown"; | |
172 | } | |
173 | } | |
174 | ||
175 | static void | |
176 | dump (void) | |
177 | { | |
178 | int i; | |
179 | ||
180 | printf ( "#define FC_NUM_CASE_FOLD %d\n", nfolds); | |
181 | printf ( "#define FC_NUM_CASE_FOLD_CHARS %d\n", nfoldChars); | |
182 | printf ( "#define FC_MAX_CASE_FOLD_CHARS %d\n", maxFoldChars); | |
183 | printf ( "#define FC_MAX_CASE_FOLD_EXPAND %d\n", maxExpand); | |
184 | printf ( "#define FC_MIN_FOLD_CHAR 0x%08x\n", minFoldChar); | |
185 | printf ( "#define FC_MAX_FOLD_CHAR 0x%08x\n", maxFoldChar); | |
186 | printf ( "\n"); | |
187 | ||
188 | /* | |
189 | * Dump out ranges | |
190 | */ | |
191 | printf ("static const FcCaseFold fcCaseFold[FC_NUM_CASE_FOLD] = {\n"); | |
192 | for (i = 0; i < nfolds; i++) | |
193 | { | |
194 | printf (" { 0x%08x, %-22s 0x%04x, %6d },\n", | |
195 | folds[i].upper, case_fold_method_name (folds[i].method), | |
196 | folds[i].count, folds[i].offset); | |
197 | } | |
198 | printf ("};\n\n"); | |
199 | ||
200 | /* | |
201 | * Dump out "other" values | |
202 | */ | |
203 | ||
204 | printf ("static const FcChar8 fcCaseFoldChars[FC_NUM_CASE_FOLD_CHARS] = {\n"); | |
205 | for (i = 0; i < nfoldChars; i++) | |
206 | { | |
207 | printf ("0x%02x", foldChars[i]); | |
208 | if (i != nfoldChars - 1) | |
209 | { | |
210 | if ((i & 0xf) == 0xf) | |
211 | printf (",\n"); | |
212 | else | |
213 | printf (","); | |
214 | } | |
215 | } | |
216 | printf ("\n};\n"); | |
217 | } | |
218 | ||
219 | /* | |
220 | * Read the standard Unicode CaseFolding.txt file | |
221 | */ | |
222 | #define SEP "; \t\n" | |
223 | ||
224 | static int | |
225 | parseRaw (char *line, CaseFoldRaw *raw) | |
226 | { | |
227 | char *tok, *end; | |
228 | int i; | |
229 | ||
230 | if (!isxdigit (line[0])) | |
231 | return 0; | |
232 | /* | |
233 | * Get upper case value | |
234 | */ | |
235 | tok = strtok (line, SEP); | |
236 | if (!tok || tok[0] == '#') | |
237 | return 0; | |
238 | raw->upper = strtol (tok, &end, 16); | |
239 | if (end == tok) | |
240 | return 0; | |
241 | /* | |
242 | * Get class | |
243 | */ | |
244 | tok = strtok (NULL, SEP); | |
245 | if (!tok || tok[0] == '#') | |
246 | return 0; | |
247 | for (i = 0; caseFoldClassMap[i].name; i++) | |
248 | if (!strcmp (tok, caseFoldClassMap[i].name)) | |
249 | { | |
250 | raw->class = caseFoldClassMap[i].class; | |
251 | break; | |
252 | } | |
253 | if (!caseFoldClassMap[i].name) | |
254 | return 0; | |
255 | ||
256 | /* | |
257 | * Get list of result characters | |
258 | */ | |
259 | for (i = 0; i < MAX_OUT; i++) | |
260 | { | |
261 | tok = strtok (NULL, SEP); | |
262 | if (!tok || tok[0] == '#') | |
263 | break; | |
264 | raw->lower[i] = strtol (tok, &end, 16); | |
265 | if (end == tok) | |
266 | break; | |
267 | } | |
268 | if (i == 0) | |
269 | return 0; | |
270 | raw->nout = i; | |
271 | return 1; | |
272 | } | |
273 | ||
274 | static int | |
275 | caseFoldReadRaw (FILE *in, CaseFoldRaw *raw) | |
276 | { | |
277 | char line[MAX_LINE]; | |
278 | ||
279 | for (;;) | |
280 | { | |
281 | if (!fgets (line, sizeof (line) - 1, in)) | |
282 | return 0; | |
283 | if (parseRaw (line, raw)) | |
284 | return 1; | |
285 | } | |
286 | } | |
287 | ||
288 | int | |
289 | main (int argc, char **argv) | |
290 | { | |
291 | FcCaseFold *fold = 0; | |
292 | CaseFoldRaw raw; | |
293 | int i; | |
294 | FILE *caseFile; | |
295 | char line[MAX_LINE]; | |
296 | int expand; | |
297 | ||
298 | if (argc != 2) | |
299 | panic ("usage: fc-case CaseFolding.txt"); | |
300 | caseFile = fopen (argv[1], "r"); | |
301 | if (!caseFile) | |
302 | panic ("can't open case folding file"); | |
303 | ||
304 | while (caseFoldReadRaw (caseFile, &raw)) | |
305 | { | |
306 | if (!minFoldChar) | |
307 | minFoldChar = raw.upper; | |
308 | maxFoldChar = raw.upper; | |
309 | switch (raw.class) { | |
310 | case CaseFoldCommon: | |
311 | case CaseFoldFull: | |
312 | if (raw.nout == 1) | |
313 | { | |
314 | if (fold && foldExtends (fold, &raw)) | |
315 | fold->count = raw.upper - fold->upper + 1; | |
316 | else | |
317 | { | |
318 | fold = addFold (); | |
319 | fold->upper = raw.upper; | |
320 | fold->offset = raw.lower[0] - raw.upper; | |
321 | if (fold->offset == 1) | |
322 | fold->method = FC_CASE_FOLD_EVEN_ODD; | |
323 | else | |
324 | fold->method = FC_CASE_FOLD_RANGE; | |
325 | fold->count = 1; | |
326 | } | |
327 | expand = utf8_size (raw.lower[0]) - utf8_size(raw.upper); | |
328 | } | |
329 | else | |
330 | { | |
331 | fold = addFold (); | |
332 | fold->upper = raw.upper; | |
333 | fold->method = FC_CASE_FOLD_FULL; | |
334 | fold->offset = nfoldChars; | |
335 | for (i = 0; i < raw.nout; i++) | |
336 | addChar (raw.lower[i]); | |
337 | fold->count = nfoldChars - fold->offset; | |
338 | if (fold->count > maxFoldChars) | |
339 | maxFoldChars = fold->count; | |
340 | expand = fold->count - utf8_size (raw.upper); | |
341 | } | |
342 | if (expand > maxExpand) | |
343 | maxExpand = expand; | |
344 | break; | |
345 | case CaseFoldSimple: | |
346 | break; | |
347 | case CaseFoldTurkic: | |
348 | break; | |
349 | } | |
350 | } | |
351 | /* | |
352 | * Scan the input until the marker is found | |
353 | */ | |
354 | ||
355 | while (fgets (line, sizeof (line), stdin)) | |
356 | { | |
357 | if (!strncmp (line, "@@@", 3)) | |
358 | break; | |
359 | fputs (line, stdout); | |
360 | } | |
361 | ||
362 | /* | |
363 | * Dump these tables | |
364 | */ | |
365 | dump (); | |
366 | ||
367 | /* | |
368 | * And flush out the rest of the input file | |
369 | */ | |
370 | ||
371 | while (fgets (line, sizeof (line), stdin)) | |
372 | fputs (line, stdout); | |
373 | ||
374 | fflush (stdout); | |
375 | exit (ferror (stdout)); | |
376 | } |