]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * fontconfig/fc-case/fc-case.c | |
3 | * | |
4 | * Copyright © 2004 Keith Packard | |
5 | * | |
6 | * Permission to use, copy, modify, distribute, and sell this software and its | |
7 | * documentation for any purpose is hereby granted without fee, provided that | |
8 | * the above copyright notice appear in all copies and that both that | |
9 | * copyright notice and this permission notice appear in supporting | |
10 | * documentation, and that the name of Keith Packard not be used in | |
11 | * advertising or publicity pertaining to distribution of the software without | |
12 | * specific, written prior permission. Keith Packard makes no | |
13 | * representations about the suitability of this software for any purpose. It | |
14 | * is provided "as is" without express or implied warranty. | |
15 | * | |
16 | * THE AUTHOR(S) DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, | |
17 | * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO | |
18 | * EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY SPECIAL, INDIRECT OR | |
19 | * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, | |
20 | * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER | |
21 | * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | |
22 | * PERFORMANCE OF THIS SOFTWARE. | |
23 | */ | |
24 | ||
25 | #include "fcint.h" | |
26 | #include <ctype.h> | |
27 | ||
28 | #define MAX_OUT 32 | |
29 | #define MAX_LINE 8192 | |
30 | ||
31 | typedef enum _caseFoldClass { CaseFoldCommon, CaseFoldFull, CaseFoldSimple, CaseFoldTurkic } CaseFoldClass; | |
32 | ||
33 | typedef struct _caseFoldClassMap { | |
34 | const char *name; | |
35 | CaseFoldClass class; | |
36 | } CaseFoldClassMap; | |
37 | ||
38 | static const CaseFoldClassMap caseFoldClassMap[] = { | |
39 | { "C", CaseFoldCommon }, | |
40 | { "F", CaseFoldFull }, | |
41 | { "S", CaseFoldSimple }, | |
42 | { "T", CaseFoldTurkic }, | |
43 | { 0, 0 } | |
44 | }; | |
45 | ||
46 | typedef struct _caseFoldRaw { | |
47 | FcChar32 upper; | |
48 | CaseFoldClass class; | |
49 | int nout; | |
50 | FcChar32 lower[MAX_OUT]; | |
51 | } CaseFoldRaw; | |
52 | ||
53 | static void | |
54 | panic (const char *reason) | |
55 | { | |
56 | fprintf (stderr, "fc-case: panic %s\n", reason); | |
57 | exit (1); | |
58 | } | |
59 | ||
60 | int maxExpand; | |
61 | static FcCaseFold *folds; | |
62 | int nfolds; | |
63 | ||
64 | static FcCaseFold * | |
65 | addFold (void) | |
66 | { | |
67 | if (folds) | |
68 | folds = realloc (folds, (nfolds + 1) * sizeof (FcCaseFold)); | |
69 | else | |
70 | folds = malloc (sizeof (FcCaseFold)); | |
71 | if (!folds) | |
72 | panic ("out of memory"); | |
73 | return &folds[nfolds++]; | |
74 | } | |
75 | ||
76 | static int | |
77 | ucs4_to_utf8 (FcChar32 ucs4, | |
78 | FcChar8 dest[FC_UTF8_MAX_LEN]) | |
79 | { | |
80 | int bits; | |
81 | FcChar8 *d = dest; | |
82 | ||
83 | if (ucs4 < 0x80) { *d++= ucs4; bits= -6; } | |
84 | else if (ucs4 < 0x800) { *d++= ((ucs4 >> 6) & 0x1F) | 0xC0; bits= 0; } | |
85 | else if (ucs4 < 0x10000) { *d++= ((ucs4 >> 12) & 0x0F) | 0xE0; bits= 6; } | |
86 | else if (ucs4 < 0x200000) { *d++= ((ucs4 >> 18) & 0x07) | 0xF0; bits= 12; } | |
87 | else if (ucs4 < 0x4000000) { *d++= ((ucs4 >> 24) & 0x03) | 0xF8; bits= 18; } | |
88 | else if (ucs4 < 0x80000000) { *d++= ((ucs4 >> 30) & 0x01) | 0xFC; bits= 24; } | |
89 | else return 0; | |
90 | ||
91 | for ( ; bits >= 0; bits-= 6) { | |
92 | *d++= ((ucs4 >> bits) & 0x3F) | 0x80; | |
93 | } | |
94 | return d - dest; | |
95 | } | |
96 | ||
97 | static int | |
98 | utf8_size (FcChar32 ucs4) | |
99 | { | |
100 | FcChar8 utf8[FC_UTF8_MAX_LEN]; | |
101 | return ucs4_to_utf8 (ucs4, utf8 ); | |
102 | } | |
103 | ||
104 | static FcChar8 *foldChars; | |
105 | static int nfoldChars; | |
106 | static int maxFoldChars; | |
107 | static FcChar32 minFoldChar; | |
108 | static FcChar32 maxFoldChar; | |
109 | ||
110 | static void | |
111 | addChar (FcChar32 c) | |
112 | { | |
113 | FcChar8 utf8[FC_UTF8_MAX_LEN]; | |
114 | int len; | |
115 | int i; | |
116 | ||
117 | len = ucs4_to_utf8 (c, utf8); | |
118 | if (foldChars) | |
119 | foldChars = realloc (foldChars, (nfoldChars + len) * sizeof (FcChar8)); | |
120 | else | |
121 | foldChars = malloc (sizeof (FcChar8) * len); | |
122 | if (!foldChars) | |
123 | panic ("out of memory"); | |
124 | for (i = 0; i < len; i++) | |
125 | foldChars[nfoldChars + i] = utf8[i]; | |
126 | nfoldChars += len; | |
127 | } | |
128 | ||
129 | static int | |
130 | foldExtends (FcCaseFold *fold, CaseFoldRaw *raw) | |
131 | { | |
132 | switch (fold->method) { | |
133 | case FC_CASE_FOLD_RANGE: | |
134 | if ((short) (raw->lower[0] - raw->upper) != fold->offset) | |
135 | return 0; | |
136 | if (raw->upper != fold->upper + fold->count) | |
137 | return 0; | |
138 | return 1; | |
139 | case FC_CASE_FOLD_EVEN_ODD: | |
140 | if ((short) (raw->lower[0] - raw->upper) != 1) | |
141 | return 0; | |
142 | if (raw->upper != fold->upper + fold->count + 1) | |
143 | return 0; | |
144 | return 1; | |
145 | case FC_CASE_FOLD_FULL: | |
146 | break; | |
147 | } | |
148 | return 0; | |
149 | } | |
150 | ||
151 | static const char * | |
152 | case_fold_method_name (FcChar16 method) | |
153 | { | |
154 | switch (method) { | |
155 | case FC_CASE_FOLD_RANGE: return "FC_CASE_FOLD_RANGE,"; | |
156 | case FC_CASE_FOLD_EVEN_ODD: return "FC_CASE_FOLD_EVEN_ODD,"; | |
157 | case FC_CASE_FOLD_FULL: return "FC_CASE_FOLD_FULL,"; | |
158 | default: return "unknown"; | |
159 | } | |
160 | } | |
161 | ||
162 | static void | |
163 | dump (void) | |
164 | { | |
165 | int i; | |
166 | ||
167 | printf ( "#define FC_NUM_CASE_FOLD %d\n", nfolds); | |
168 | printf ( "#define FC_NUM_CASE_FOLD_CHARS %d\n", nfoldChars); | |
169 | printf ( "#define FC_MAX_CASE_FOLD_CHARS %d\n", maxFoldChars); | |
170 | printf ( "#define FC_MAX_CASE_FOLD_EXPAND %d\n", maxExpand); | |
171 | printf ( "#define FC_MIN_FOLD_CHAR 0x%08x\n", minFoldChar); | |
172 | printf ( "#define FC_MAX_FOLD_CHAR 0x%08x\n", maxFoldChar); | |
173 | printf ( "\n"); | |
174 | ||
175 | /* | |
176 | * Dump out ranges | |
177 | */ | |
178 | printf ("static const FcCaseFold fcCaseFold[FC_NUM_CASE_FOLD] = {\n"); | |
179 | for (i = 0; i < nfolds; i++) | |
180 | { | |
181 | printf (" { 0x%08x, %-22s 0x%04x, %6d },\n", | |
182 | folds[i].upper, case_fold_method_name (folds[i].method), | |
183 | folds[i].count, folds[i].offset); | |
184 | } | |
185 | printf ("};\n\n"); | |
186 | ||
187 | /* | |
188 | * Dump out "other" values | |
189 | */ | |
190 | ||
191 | printf ("static const FcChar8 fcCaseFoldChars[FC_NUM_CASE_FOLD_CHARS] = {\n"); | |
192 | for (i = 0; i < nfoldChars; i++) | |
193 | { | |
194 | printf ("0x%02x", foldChars[i]); | |
195 | if (i != nfoldChars - 1) | |
196 | { | |
197 | if ((i & 0xf) == 0xf) | |
198 | printf (",\n"); | |
199 | else | |
200 | printf (","); | |
201 | } | |
202 | } | |
203 | printf ("\n};\n"); | |
204 | } | |
205 | ||
206 | /* | |
207 | * Read the standard Unicode CaseFolding.txt file | |
208 | */ | |
209 | #define SEP "; \t\n" | |
210 | ||
211 | static int | |
212 | parseRaw (char *line, CaseFoldRaw *raw) | |
213 | { | |
214 | char *tok, *end; | |
215 | int i; | |
216 | ||
217 | if (!isxdigit (line[0])) | |
218 | return 0; | |
219 | /* | |
220 | * Get upper case value | |
221 | */ | |
222 | tok = strtok (line, SEP); | |
223 | if (!tok || tok[0] == '#') | |
224 | return 0; | |
225 | raw->upper = strtol (tok, &end, 16); | |
226 | if (end == tok) | |
227 | return 0; | |
228 | /* | |
229 | * Get class | |
230 | */ | |
231 | tok = strtok (NULL, SEP); | |
232 | if (!tok || tok[0] == '#') | |
233 | return 0; | |
234 | for (i = 0; caseFoldClassMap[i].name; i++) | |
235 | if (!strcmp (tok, caseFoldClassMap[i].name)) | |
236 | { | |
237 | raw->class = caseFoldClassMap[i].class; | |
238 | break; | |
239 | } | |
240 | if (!caseFoldClassMap[i].name) | |
241 | return 0; | |
242 | ||
243 | /* | |
244 | * Get list of result characters | |
245 | */ | |
246 | for (i = 0; i < MAX_OUT; i++) | |
247 | { | |
248 | tok = strtok (NULL, SEP); | |
249 | if (!tok || tok[0] == '#') | |
250 | break; | |
251 | raw->lower[i] = strtol (tok, &end, 16); | |
252 | if (end == tok) | |
253 | break; | |
254 | } | |
255 | if (i == 0) | |
256 | return 0; | |
257 | raw->nout = i; | |
258 | return 1; | |
259 | } | |
260 | ||
261 | static int | |
262 | caseFoldReadRaw (FILE *in, CaseFoldRaw *raw) | |
263 | { | |
264 | char line[MAX_LINE]; | |
265 | ||
266 | for (;;) | |
267 | { | |
268 | if (!fgets (line, sizeof (line) - 1, in)) | |
269 | return 0; | |
270 | if (parseRaw (line, raw)) | |
271 | return 1; | |
272 | } | |
273 | } | |
274 | ||
275 | int | |
276 | main (int argc, char **argv) | |
277 | { | |
278 | FcCaseFold *fold = 0; | |
279 | CaseFoldRaw raw; | |
280 | int i; | |
281 | FILE *caseFile; | |
282 | char line[MAX_LINE]; | |
283 | int expand; | |
284 | ||
285 | if (argc != 2) | |
286 | panic ("usage: fc-case CaseFolding.txt"); | |
287 | caseFile = fopen (argv[1], "r"); | |
288 | if (!caseFile) | |
289 | panic ("can't open case folding file"); | |
290 | ||
291 | while (caseFoldReadRaw (caseFile, &raw)) | |
292 | { | |
293 | if (!minFoldChar) | |
294 | minFoldChar = raw.upper; | |
295 | maxFoldChar = raw.upper; | |
296 | switch (raw.class) { | |
297 | case CaseFoldCommon: | |
298 | case CaseFoldFull: | |
299 | if (raw.nout == 1) | |
300 | { | |
301 | if (fold && foldExtends (fold, &raw)) | |
302 | fold->count = raw.upper - fold->upper + 1; | |
303 | else | |
304 | { | |
305 | fold = addFold (); | |
306 | fold->upper = raw.upper; | |
307 | fold->offset = raw.lower[0] - raw.upper; | |
308 | if (fold->offset == 1) | |
309 | fold->method = FC_CASE_FOLD_EVEN_ODD; | |
310 | else | |
311 | fold->method = FC_CASE_FOLD_RANGE; | |
312 | fold->count = 1; | |
313 | } | |
314 | expand = utf8_size (raw.lower[0]) - utf8_size(raw.upper); | |
315 | } | |
316 | else | |
317 | { | |
318 | fold = addFold (); | |
319 | fold->upper = raw.upper; | |
320 | fold->method = FC_CASE_FOLD_FULL; | |
321 | fold->offset = nfoldChars; | |
322 | for (i = 0; i < raw.nout; i++) | |
323 | addChar (raw.lower[i]); | |
324 | fold->count = nfoldChars - fold->offset; | |
325 | if (fold->count > maxFoldChars) | |
326 | maxFoldChars = fold->count; | |
327 | expand = fold->count - utf8_size (raw.upper); | |
328 | } | |
329 | if (expand > maxExpand) | |
330 | maxExpand = expand; | |
331 | break; | |
332 | case CaseFoldSimple: | |
333 | break; | |
334 | case CaseFoldTurkic: | |
335 | break; | |
336 | } | |
337 | } | |
338 | /* | |
339 | * Scan the input until the marker is found | |
340 | */ | |
341 | ||
342 | while (fgets (line, sizeof (line), stdin)) | |
343 | { | |
344 | if (!strncmp (line, "@@@", 3)) | |
345 | break; | |
346 | fputs (line, stdout); | |
347 | } | |
348 | ||
349 | /* | |
350 | * Dump these tables | |
351 | */ | |
352 | dump (); | |
353 | ||
354 | /* | |
355 | * And flush out the rest of the input file | |
356 | */ | |
357 | ||
358 | while (fgets (line, sizeof (line), stdin)) | |
359 | fputs (line, stdout); | |
360 | ||
361 | fflush (stdout); | |
362 | exit (ferror (stdout)); | |
363 | } |