]> git.wh0rd.org - fontconfig.git/blob - fc-case/fc-case.c
bd28517f49233610f6e73e879300d5bcd603fb45
[fontconfig.git] / fc-case / fc-case.c
1 /*
2 * $Id$
3 *
4 * Copyright © 2004 Keith Packard
5 *
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of Keith Packard not be used in
11 * advertising or publicity pertaining to distribution of the software without
12 * specific, written prior permission. Keith Packard makes no
13 * representations about the suitability of this software for any purpose. It
14 * is provided "as is" without express or implied warranty.
15 *
16 * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
18 * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
19 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
20 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
21 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
22 * PERFORMANCE OF THIS SOFTWARE.
23 */
24
25 #include "fcint.h"
26 #include <ctype.h>
27
28 #define MAX_OUT 32
29 #define MAX_LINE 8192
30
31 /* stub definitions for declarations from fcint.h.. */
32 int * _fcBankId = 0, * _fcBankIdx = 0;
33 FcValueList ** _fcValueLists = 0;
34 FcPatternElt ** _fcPatternElts = 0;
35 int FcDebugVal = 0;
36
37 int
38 FcCacheBankToIndexMTF (int bank)
39 {
40 return 0;
41 }
42 /* end stub definitions */
43
44 typedef enum _caseFoldClass { CaseFoldCommon, CaseFoldFull, CaseFoldSimple, CaseFoldTurkic } CaseFoldClass;
45
46 typedef struct _caseFoldClassMap {
47 const char *name;
48 CaseFoldClass class;
49 } CaseFoldClassMap;
50
51 static const CaseFoldClassMap caseFoldClassMap[] = {
52 { "C", CaseFoldCommon },
53 { "F", CaseFoldFull },
54 { "S", CaseFoldSimple },
55 { "T", CaseFoldTurkic },
56 { 0, 0 }
57 };
58
59 typedef struct _caseFoldRaw {
60 FcChar32 upper;
61 CaseFoldClass class;
62 int nout;
63 FcChar32 lower[MAX_OUT];
64 } CaseFoldRaw;
65
66 static void
67 panic (const char *reason)
68 {
69 fprintf (stderr, "fc-case: panic %s\n", reason);
70 exit (1);
71 }
72
73 int maxExpand;
74 static FcCaseFold *folds;
75 int nfolds;
76
77 static FcCaseFold *
78 addFold (void)
79 {
80 if (folds)
81 folds = realloc (folds, (nfolds + 1) * sizeof (FcCaseFold));
82 else
83 folds = malloc (sizeof (FcCaseFold));
84 if (!folds)
85 panic ("out of memory");
86 return &folds[nfolds++];
87 }
88
89 static int
90 ucs4_to_utf8 (FcChar32 ucs4,
91 FcChar8 dest[FC_UTF8_MAX_LEN])
92 {
93 int bits;
94 FcChar8 *d = dest;
95
96 if (ucs4 < 0x80) { *d++= ucs4; bits= -6; }
97 else if (ucs4 < 0x800) { *d++= ((ucs4 >> 6) & 0x1F) | 0xC0; bits= 0; }
98 else if (ucs4 < 0x10000) { *d++= ((ucs4 >> 12) & 0x0F) | 0xE0; bits= 6; }
99 else if (ucs4 < 0x200000) { *d++= ((ucs4 >> 18) & 0x07) | 0xF0; bits= 12; }
100 else if (ucs4 < 0x4000000) { *d++= ((ucs4 >> 24) & 0x03) | 0xF8; bits= 18; }
101 else if (ucs4 < 0x80000000) { *d++= ((ucs4 >> 30) & 0x01) | 0xFC; bits= 24; }
102 else return 0;
103
104 for ( ; bits >= 0; bits-= 6) {
105 *d++= ((ucs4 >> bits) & 0x3F) | 0x80;
106 }
107 return d - dest;
108 }
109
110 static int
111 utf8_size (FcChar32 ucs4)
112 {
113 FcChar8 utf8[FC_UTF8_MAX_LEN];
114 return ucs4_to_utf8 (ucs4, utf8 );
115 }
116
117 static FcChar8 *foldChars;
118 static int nfoldChars;
119 static int maxFoldChars;
120 static FcChar32 minFoldChar;
121 static FcChar32 maxFoldChar;
122
123 static void
124 addChar (FcChar32 c)
125 {
126 FcChar8 utf8[FC_UTF8_MAX_LEN];
127 int len;
128 int i;
129
130 len = ucs4_to_utf8 (c, utf8);
131 if (foldChars)
132 foldChars = realloc (foldChars, (nfoldChars + len) * sizeof (FcChar8));
133 else
134 foldChars = malloc (sizeof (FcChar8) * len);
135 if (!foldChars)
136 panic ("out of memory");
137 for (i = 0; i < len; i++)
138 foldChars[nfoldChars + i] = utf8[i];
139 nfoldChars += len;
140 }
141
142 static int
143 foldExtends (FcCaseFold *fold, CaseFoldRaw *raw)
144 {
145 switch (fold->method) {
146 case FC_CASE_FOLD_RANGE:
147 if ((short) (raw->lower[0] - raw->upper) != fold->offset)
148 return 0;
149 if (raw->upper != fold->upper + fold->count)
150 return 0;
151 return 1;
152 case FC_CASE_FOLD_EVEN_ODD:
153 if ((short) (raw->lower[0] - raw->upper) != 1)
154 return 0;
155 if (raw->upper != fold->upper + fold->count + 1)
156 return 0;
157 return 1;
158 case FC_CASE_FOLD_FULL:
159 break;
160 }
161 return 0;
162 }
163
164 static const char *
165 case_fold_method_name (FcChar16 method)
166 {
167 switch (method) {
168 case FC_CASE_FOLD_RANGE: return "FC_CASE_FOLD_RANGE,";
169 case FC_CASE_FOLD_EVEN_ODD: return "FC_CASE_FOLD_EVEN_ODD,";
170 case FC_CASE_FOLD_FULL: return "FC_CASE_FOLD_FULL,";
171 default: return "unknown";
172 }
173 }
174
175 static void
176 dump (void)
177 {
178 int i;
179
180 printf ( "#define FC_NUM_CASE_FOLD %d\n", nfolds);
181 printf ( "#define FC_NUM_CASE_FOLD_CHARS %d\n", nfoldChars);
182 printf ( "#define FC_MAX_CASE_FOLD_CHARS %d\n", maxFoldChars);
183 printf ( "#define FC_MAX_CASE_FOLD_EXPAND %d\n", maxExpand);
184 printf ( "#define FC_MIN_FOLD_CHAR 0x%08x\n", minFoldChar);
185 printf ( "#define FC_MAX_FOLD_CHAR 0x%08x\n", maxFoldChar);
186 printf ( "\n");
187
188 /*
189 * Dump out ranges
190 */
191 printf ("static const FcCaseFold fcCaseFold[FC_NUM_CASE_FOLD] = {\n");
192 for (i = 0; i < nfolds; i++)
193 {
194 printf (" { 0x%08x, %-22s 0x%04x, %6d },\n",
195 folds[i].upper, case_fold_method_name (folds[i].method),
196 folds[i].count, folds[i].offset);
197 }
198 printf ("};\n\n");
199
200 /*
201 * Dump out "other" values
202 */
203
204 printf ("static const FcChar8 fcCaseFoldChars[FC_NUM_CASE_FOLD_CHARS] = {\n");
205 for (i = 0; i < nfoldChars; i++)
206 {
207 printf ("0x%02x", foldChars[i]);
208 if (i != nfoldChars - 1)
209 {
210 if ((i & 0xf) == 0xf)
211 printf (",\n");
212 else
213 printf (",");
214 }
215 }
216 printf ("\n};\n");
217 }
218
219 /*
220 * Read the standard Unicode CaseFolding.txt file
221 */
222 #define SEP "; \t\n"
223
224 static int
225 parseRaw (char *line, CaseFoldRaw *raw)
226 {
227 char *tok, *end;
228 int i;
229
230 if (!isxdigit (line[0]))
231 return 0;
232 /*
233 * Get upper case value
234 */
235 tok = strtok (line, SEP);
236 if (!tok || tok[0] == '#')
237 return 0;
238 raw->upper = strtol (tok, &end, 16);
239 if (end == tok)
240 return 0;
241 /*
242 * Get class
243 */
244 tok = strtok (NULL, SEP);
245 if (!tok || tok[0] == '#')
246 return 0;
247 for (i = 0; caseFoldClassMap[i].name; i++)
248 if (!strcmp (tok, caseFoldClassMap[i].name))
249 {
250 raw->class = caseFoldClassMap[i].class;
251 break;
252 }
253 if (!caseFoldClassMap[i].name)
254 return 0;
255
256 /*
257 * Get list of result characters
258 */
259 for (i = 0; i < MAX_OUT; i++)
260 {
261 tok = strtok (NULL, SEP);
262 if (!tok || tok[0] == '#')
263 break;
264 raw->lower[i] = strtol (tok, &end, 16);
265 if (end == tok)
266 break;
267 }
268 if (i == 0)
269 return 0;
270 raw->nout = i;
271 return 1;
272 }
273
274 static int
275 caseFoldReadRaw (FILE *in, CaseFoldRaw *raw)
276 {
277 char line[MAX_LINE];
278
279 for (;;)
280 {
281 if (!fgets (line, sizeof (line) - 1, in))
282 return 0;
283 if (parseRaw (line, raw))
284 return 1;
285 }
286 }
287
288 int
289 main (int argc, char **argv)
290 {
291 FcCaseFold *fold = 0;
292 CaseFoldRaw raw;
293 int i;
294 FILE *caseFile;
295 char line[MAX_LINE];
296 int expand;
297
298 if (argc != 2)
299 panic ("usage: fc-case CaseFolding.txt");
300 caseFile = fopen (argv[1], "r");
301 if (!caseFile)
302 panic ("can't open case folding file");
303
304 while (caseFoldReadRaw (caseFile, &raw))
305 {
306 if (!minFoldChar)
307 minFoldChar = raw.upper;
308 maxFoldChar = raw.upper;
309 switch (raw.class) {
310 case CaseFoldCommon:
311 case CaseFoldFull:
312 if (raw.nout == 1)
313 {
314 if (fold && foldExtends (fold, &raw))
315 fold->count = raw.upper - fold->upper + 1;
316 else
317 {
318 fold = addFold ();
319 fold->upper = raw.upper;
320 fold->offset = raw.lower[0] - raw.upper;
321 if (fold->offset == 1)
322 fold->method = FC_CASE_FOLD_EVEN_ODD;
323 else
324 fold->method = FC_CASE_FOLD_RANGE;
325 fold->count = 1;
326 }
327 expand = utf8_size (raw.lower[0]) - utf8_size(raw.upper);
328 }
329 else
330 {
331 fold = addFold ();
332 fold->upper = raw.upper;
333 fold->method = FC_CASE_FOLD_FULL;
334 fold->offset = nfoldChars;
335 for (i = 0; i < raw.nout; i++)
336 addChar (raw.lower[i]);
337 fold->count = nfoldChars - fold->offset;
338 if (fold->count > maxFoldChars)
339 maxFoldChars = fold->count;
340 expand = fold->count - utf8_size (raw.upper);
341 }
342 if (expand > maxExpand)
343 maxExpand = expand;
344 break;
345 case CaseFoldSimple:
346 break;
347 case CaseFoldTurkic:
348 break;
349 }
350 }
351 /*
352 * Scan the input until the marker is found
353 */
354
355 while (fgets (line, sizeof (line), stdin))
356 {
357 if (!strncmp (line, "@@@", 3))
358 break;
359 fputs (line, stdout);
360 }
361
362 /*
363 * Dump these tables
364 */
365 dump ();
366
367 /*
368 * And flush out the rest of the input file
369 */
370
371 while (fgets (line, sizeof (line), stdin))
372 fputs (line, stdout);
373
374 fflush (stdout);
375 exit (ferror (stdout));
376 }