]> git.wh0rd.org - fontconfig.git/blob - fc-case/fc-case.c
e86b34783188b22f698be8a264c5bf88c451caaa
[fontconfig.git] / fc-case / fc-case.c
1 /*
2 * fontconfig/fc-case/fc-case.c
3 *
4 * Copyright © 2004 Keith Packard
5 *
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of Keith Packard not be used in
11 * advertising or publicity pertaining to distribution of the software without
12 * specific, written prior permission. Keith Packard makes no
13 * representations about the suitability of this software for any purpose. It
14 * is provided "as is" without express or implied warranty.
15 *
16 * THE AUTHOR(S) DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
18 * EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY SPECIAL, INDIRECT OR
19 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
20 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
21 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
22 * PERFORMANCE OF THIS SOFTWARE.
23 */
24
25 #include "fcint.h"
26 #include <ctype.h>
27
28 #define MAX_OUT 32
29 #define MAX_LINE 8192
30
31 typedef enum _caseFoldClass { CaseFoldCommon, CaseFoldFull, CaseFoldSimple, CaseFoldTurkic } CaseFoldClass;
32
33 typedef struct _caseFoldClassMap {
34 const char *name;
35 CaseFoldClass class;
36 } CaseFoldClassMap;
37
38 static const CaseFoldClassMap caseFoldClassMap[] = {
39 { "C", CaseFoldCommon },
40 { "F", CaseFoldFull },
41 { "S", CaseFoldSimple },
42 { "T", CaseFoldTurkic },
43 { 0, 0 }
44 };
45
46 typedef struct _caseFoldRaw {
47 FcChar32 upper;
48 CaseFoldClass class;
49 int nout;
50 FcChar32 lower[MAX_OUT];
51 } CaseFoldRaw;
52
53 static void
54 panic (const char *reason)
55 {
56 fprintf (stderr, "fc-case: panic %s\n", reason);
57 exit (1);
58 }
59
60 int maxExpand;
61 static FcCaseFold *folds;
62 int nfolds;
63
64 static FcCaseFold *
65 addFold (void)
66 {
67 if (folds)
68 folds = realloc (folds, (nfolds + 1) * sizeof (FcCaseFold));
69 else
70 folds = malloc (sizeof (FcCaseFold));
71 if (!folds)
72 panic ("out of memory");
73 return &folds[nfolds++];
74 }
75
76 static int
77 ucs4_to_utf8 (FcChar32 ucs4,
78 FcChar8 dest[FC_UTF8_MAX_LEN])
79 {
80 int bits;
81 FcChar8 *d = dest;
82
83 if (ucs4 < 0x80) { *d++= ucs4; bits= -6; }
84 else if (ucs4 < 0x800) { *d++= ((ucs4 >> 6) & 0x1F) | 0xC0; bits= 0; }
85 else if (ucs4 < 0x10000) { *d++= ((ucs4 >> 12) & 0x0F) | 0xE0; bits= 6; }
86 else if (ucs4 < 0x200000) { *d++= ((ucs4 >> 18) & 0x07) | 0xF0; bits= 12; }
87 else if (ucs4 < 0x4000000) { *d++= ((ucs4 >> 24) & 0x03) | 0xF8; bits= 18; }
88 else if (ucs4 < 0x80000000) { *d++= ((ucs4 >> 30) & 0x01) | 0xFC; bits= 24; }
89 else return 0;
90
91 for ( ; bits >= 0; bits-= 6) {
92 *d++= ((ucs4 >> bits) & 0x3F) | 0x80;
93 }
94 return d - dest;
95 }
96
97 static int
98 utf8_size (FcChar32 ucs4)
99 {
100 FcChar8 utf8[FC_UTF8_MAX_LEN];
101 return ucs4_to_utf8 (ucs4, utf8 );
102 }
103
104 static FcChar8 *foldChars;
105 static int nfoldChars;
106 static int maxFoldChars;
107 static FcChar32 minFoldChar;
108 static FcChar32 maxFoldChar;
109
110 static void
111 addChar (FcChar32 c)
112 {
113 FcChar8 utf8[FC_UTF8_MAX_LEN];
114 int len;
115 int i;
116
117 len = ucs4_to_utf8 (c, utf8);
118 if (foldChars)
119 foldChars = realloc (foldChars, (nfoldChars + len) * sizeof (FcChar8));
120 else
121 foldChars = malloc (sizeof (FcChar8) * len);
122 if (!foldChars)
123 panic ("out of memory");
124 for (i = 0; i < len; i++)
125 foldChars[nfoldChars + i] = utf8[i];
126 nfoldChars += len;
127 }
128
129 static int
130 foldExtends (FcCaseFold *fold, CaseFoldRaw *raw)
131 {
132 switch (fold->method) {
133 case FC_CASE_FOLD_RANGE:
134 if ((short) (raw->lower[0] - raw->upper) != fold->offset)
135 return 0;
136 if (raw->upper != fold->upper + fold->count)
137 return 0;
138 return 1;
139 case FC_CASE_FOLD_EVEN_ODD:
140 if ((short) (raw->lower[0] - raw->upper) != 1)
141 return 0;
142 if (raw->upper != fold->upper + fold->count + 1)
143 return 0;
144 return 1;
145 case FC_CASE_FOLD_FULL:
146 break;
147 }
148 return 0;
149 }
150
151 static const char *
152 case_fold_method_name (FcChar16 method)
153 {
154 switch (method) {
155 case FC_CASE_FOLD_RANGE: return "FC_CASE_FOLD_RANGE,";
156 case FC_CASE_FOLD_EVEN_ODD: return "FC_CASE_FOLD_EVEN_ODD,";
157 case FC_CASE_FOLD_FULL: return "FC_CASE_FOLD_FULL,";
158 default: return "unknown";
159 }
160 }
161
162 static void
163 dump (void)
164 {
165 int i;
166
167 printf ( "#define FC_NUM_CASE_FOLD %d\n", nfolds);
168 printf ( "#define FC_NUM_CASE_FOLD_CHARS %d\n", nfoldChars);
169 printf ( "#define FC_MAX_CASE_FOLD_CHARS %d\n", maxFoldChars);
170 printf ( "#define FC_MAX_CASE_FOLD_EXPAND %d\n", maxExpand);
171 printf ( "#define FC_MIN_FOLD_CHAR 0x%08x\n", minFoldChar);
172 printf ( "#define FC_MAX_FOLD_CHAR 0x%08x\n", maxFoldChar);
173 printf ( "\n");
174
175 /*
176 * Dump out ranges
177 */
178 printf ("static const FcCaseFold fcCaseFold[FC_NUM_CASE_FOLD] = {\n");
179 for (i = 0; i < nfolds; i++)
180 {
181 printf (" { 0x%08x, %-22s 0x%04x, %6d },\n",
182 folds[i].upper, case_fold_method_name (folds[i].method),
183 folds[i].count, folds[i].offset);
184 }
185 printf ("};\n\n");
186
187 /*
188 * Dump out "other" values
189 */
190
191 printf ("static const FcChar8 fcCaseFoldChars[FC_NUM_CASE_FOLD_CHARS] = {\n");
192 for (i = 0; i < nfoldChars; i++)
193 {
194 printf ("0x%02x", foldChars[i]);
195 if (i != nfoldChars - 1)
196 {
197 if ((i & 0xf) == 0xf)
198 printf (",\n");
199 else
200 printf (",");
201 }
202 }
203 printf ("\n};\n");
204 }
205
206 /*
207 * Read the standard Unicode CaseFolding.txt file
208 */
209 #define SEP "; \t\n"
210
211 static int
212 parseRaw (char *line, CaseFoldRaw *raw)
213 {
214 char *tok, *end;
215 int i;
216
217 if (!isxdigit (line[0]))
218 return 0;
219 /*
220 * Get upper case value
221 */
222 tok = strtok (line, SEP);
223 if (!tok || tok[0] == '#')
224 return 0;
225 raw->upper = strtol (tok, &end, 16);
226 if (end == tok)
227 return 0;
228 /*
229 * Get class
230 */
231 tok = strtok (NULL, SEP);
232 if (!tok || tok[0] == '#')
233 return 0;
234 for (i = 0; caseFoldClassMap[i].name; i++)
235 if (!strcmp (tok, caseFoldClassMap[i].name))
236 {
237 raw->class = caseFoldClassMap[i].class;
238 break;
239 }
240 if (!caseFoldClassMap[i].name)
241 return 0;
242
243 /*
244 * Get list of result characters
245 */
246 for (i = 0; i < MAX_OUT; i++)
247 {
248 tok = strtok (NULL, SEP);
249 if (!tok || tok[0] == '#')
250 break;
251 raw->lower[i] = strtol (tok, &end, 16);
252 if (end == tok)
253 break;
254 }
255 if (i == 0)
256 return 0;
257 raw->nout = i;
258 return 1;
259 }
260
261 static int
262 caseFoldReadRaw (FILE *in, CaseFoldRaw *raw)
263 {
264 char line[MAX_LINE];
265
266 for (;;)
267 {
268 if (!fgets (line, sizeof (line) - 1, in))
269 return 0;
270 if (parseRaw (line, raw))
271 return 1;
272 }
273 }
274
275 int
276 main (int argc, char **argv)
277 {
278 FcCaseFold *fold = 0;
279 CaseFoldRaw raw;
280 int i;
281 FILE *caseFile;
282 char line[MAX_LINE];
283 int expand;
284
285 if (argc != 2)
286 panic ("usage: fc-case CaseFolding.txt");
287 caseFile = fopen (argv[1], "r");
288 if (!caseFile)
289 panic ("can't open case folding file");
290
291 while (caseFoldReadRaw (caseFile, &raw))
292 {
293 if (!minFoldChar)
294 minFoldChar = raw.upper;
295 maxFoldChar = raw.upper;
296 switch (raw.class) {
297 case CaseFoldCommon:
298 case CaseFoldFull:
299 if (raw.nout == 1)
300 {
301 if (fold && foldExtends (fold, &raw))
302 fold->count = raw.upper - fold->upper + 1;
303 else
304 {
305 fold = addFold ();
306 fold->upper = raw.upper;
307 fold->offset = raw.lower[0] - raw.upper;
308 if (fold->offset == 1)
309 fold->method = FC_CASE_FOLD_EVEN_ODD;
310 else
311 fold->method = FC_CASE_FOLD_RANGE;
312 fold->count = 1;
313 }
314 expand = utf8_size (raw.lower[0]) - utf8_size(raw.upper);
315 }
316 else
317 {
318 fold = addFold ();
319 fold->upper = raw.upper;
320 fold->method = FC_CASE_FOLD_FULL;
321 fold->offset = nfoldChars;
322 for (i = 0; i < raw.nout; i++)
323 addChar (raw.lower[i]);
324 fold->count = nfoldChars - fold->offset;
325 if (fold->count > maxFoldChars)
326 maxFoldChars = fold->count;
327 expand = fold->count - utf8_size (raw.upper);
328 }
329 if (expand > maxExpand)
330 maxExpand = expand;
331 break;
332 case CaseFoldSimple:
333 break;
334 case CaseFoldTurkic:
335 break;
336 }
337 }
338 /*
339 * Scan the input until the marker is found
340 */
341
342 while (fgets (line, sizeof (line), stdin))
343 {
344 if (!strncmp (line, "@@@", 3))
345 break;
346 fputs (line, stdout);
347 }
348
349 /*
350 * Dump these tables
351 */
352 dump ();
353
354 /*
355 * And flush out the rest of the input file
356 */
357
358 while (fgets (line, sizeof (line), stdin))
359 fputs (line, stdout);
360
361 fflush (stdout);
362 exit (ferror (stdout));
363 }