]> git.wh0rd.org - fontconfig.git/blob - fc-case/fc-case.c
e0548f02d2189a2f21860ffaba1e0829d2c1d76b
[fontconfig.git] / fc-case / fc-case.c
1 /*
2 * $Id$
3 *
4 * Copyright © 2004 Keith Packard
5 *
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of Keith Packard not be used in
11 * advertising or publicity pertaining to distribution of the software without
12 * specific, written prior permission. Keith Packard makes no
13 * representations about the suitability of this software for any purpose. It
14 * is provided "as is" without express or implied warranty.
15 *
16 * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
18 * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
19 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
20 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
21 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
22 * PERFORMANCE OF THIS SOFTWARE.
23 */
24
25 #include "fcint.h"
26 #include <ctype.h>
27
28 #define MAX_OUT 32
29 #define MAX_LINE 8192
30
31 /* stub definitions for declarations from fcint.h.. */
32 int * _fcBankId = 0, * _fcBankIdx = 0;
33
34 int
35 FcCacheBankToIndexMTF (int bank)
36 {
37 return 0;
38 }
39 /* end stub definitions */
40
41 typedef enum _caseFoldClass { CaseFoldCommon, CaseFoldFull, CaseFoldSimple, CaseFoldTurkic } CaseFoldClass;
42
43 typedef struct _caseFoldClassMap {
44 const char *name;
45 CaseFoldClass class;
46 } CaseFoldClassMap;
47
48 static const CaseFoldClassMap caseFoldClassMap[] = {
49 { "C", CaseFoldCommon },
50 { "F", CaseFoldFull },
51 { "S", CaseFoldSimple },
52 { "T", CaseFoldTurkic },
53 { 0, 0 }
54 };
55
56 typedef struct _caseFoldRaw {
57 FcChar32 upper;
58 CaseFoldClass class;
59 int nout;
60 FcChar32 lower[MAX_OUT];
61 } CaseFoldRaw;
62
63 static void
64 panic (const char *reason)
65 {
66 fprintf (stderr, "fc-case: panic %s\n", reason);
67 exit (1);
68 }
69
70 int maxExpand;
71 static FcCaseFold *folds;
72 int nfolds;
73
74 static FcCaseFold *
75 addFold (void)
76 {
77 if (folds)
78 folds = realloc (folds, (nfolds + 1) * sizeof (FcCaseFold));
79 else
80 folds = malloc (sizeof (FcCaseFold));
81 if (!folds)
82 panic ("out of memory");
83 return &folds[nfolds++];
84 }
85
86 static int
87 ucs4_to_utf8 (FcChar32 ucs4,
88 FcChar8 dest[FC_UTF8_MAX_LEN])
89 {
90 int bits;
91 FcChar8 *d = dest;
92
93 if (ucs4 < 0x80) { *d++= ucs4; bits= -6; }
94 else if (ucs4 < 0x800) { *d++= ((ucs4 >> 6) & 0x1F) | 0xC0; bits= 0; }
95 else if (ucs4 < 0x10000) { *d++= ((ucs4 >> 12) & 0x0F) | 0xE0; bits= 6; }
96 else if (ucs4 < 0x200000) { *d++= ((ucs4 >> 18) & 0x07) | 0xF0; bits= 12; }
97 else if (ucs4 < 0x4000000) { *d++= ((ucs4 >> 24) & 0x03) | 0xF8; bits= 18; }
98 else if (ucs4 < 0x80000000) { *d++= ((ucs4 >> 30) & 0x01) | 0xFC; bits= 24; }
99 else return 0;
100
101 for ( ; bits >= 0; bits-= 6) {
102 *d++= ((ucs4 >> bits) & 0x3F) | 0x80;
103 }
104 return d - dest;
105 }
106
107 static int
108 utf8_size (FcChar32 ucs4)
109 {
110 FcChar8 utf8[FC_UTF8_MAX_LEN];
111 return ucs4_to_utf8 (ucs4, utf8 );
112 }
113
114 static FcChar8 *foldChars;
115 int nfoldChars;
116 int maxFoldChars;
117 FcChar32 minFoldChar;
118 FcChar32 maxFoldChar;
119
120 static void
121 addChar (FcChar32 c)
122 {
123 FcChar8 utf8[FC_UTF8_MAX_LEN];
124 int len;
125 int i;
126
127 len = ucs4_to_utf8 (c, utf8);
128 if (foldChars)
129 foldChars = realloc (foldChars, (nfoldChars + len) * sizeof (FcChar8));
130 else
131 foldChars = malloc (sizeof (FcChar8) * len);
132 if (!foldChars)
133 panic ("out of memory");
134 for (i = 0; i < len; i++)
135 foldChars[nfoldChars + i] = utf8[i];
136 nfoldChars += len;
137 }
138
139 static int
140 foldExtends (FcCaseFold *fold, CaseFoldRaw *raw)
141 {
142 switch (fold->method) {
143 case FC_CASE_FOLD_RANGE:
144 if ((short) (raw->lower[0] - raw->upper) != fold->offset)
145 return 0;
146 if (raw->upper != fold->upper + fold->count)
147 return 0;
148 return 1;
149 case FC_CASE_FOLD_EVEN_ODD:
150 if ((short) (raw->lower[0] - raw->upper) != 1)
151 return 0;
152 if (raw->upper != fold->upper + fold->count + 1)
153 return 0;
154 return 1;
155 case FC_CASE_FOLD_FULL:
156 break;
157 }
158 return 0;
159 }
160
161 static const char *
162 case_fold_method_name (FcChar16 method)
163 {
164 switch (method) {
165 case FC_CASE_FOLD_RANGE: return "FC_CASE_FOLD_RANGE,";
166 case FC_CASE_FOLD_EVEN_ODD: return "FC_CASE_FOLD_EVEN_ODD,";
167 case FC_CASE_FOLD_FULL: return "FC_CASE_FOLD_FULL,";
168 default: return "unknown";
169 }
170 }
171
172 static void
173 dump (void)
174 {
175 int i;
176
177 printf ( "#define FC_NUM_CASE_FOLD %d\n", nfolds);
178 printf ( "#define FC_NUM_CASE_FOLD_CHARS %d\n", nfoldChars);
179 printf ( "#define FC_MAX_CASE_FOLD_CHARS %d\n", maxFoldChars);
180 printf ( "#define FC_MAX_CASE_FOLD_EXPAND %d\n", maxExpand);
181 printf ( "#define FC_MIN_FOLD_CHAR 0x%08x\n", minFoldChar);
182 printf ( "#define FC_MAX_FOLD_CHAR 0x%08x\n", maxFoldChar);
183 printf ( "\n");
184
185 /*
186 * Dump out ranges
187 */
188 printf ("static const FcCaseFold fcCaseFold[FC_NUM_CASE_FOLD] = {\n");
189 for (i = 0; i < nfolds; i++)
190 {
191 printf (" { 0x%08x, %-22s 0x%04x, %6d },\n",
192 folds[i].upper, case_fold_method_name (folds[i].method),
193 folds[i].count, folds[i].offset);
194 }
195 printf ("};\n\n");
196
197 /*
198 * Dump out "other" values
199 */
200
201 printf ("static const FcChar8 fcCaseFoldChars[FC_NUM_CASE_FOLD_CHARS] = {\n");
202 for (i = 0; i < nfoldChars; i++)
203 {
204 printf ("0x%02x", foldChars[i]);
205 if (i != nfoldChars - 1)
206 {
207 if ((i & 0xf) == 0xf)
208 printf (",\n");
209 else
210 printf (",");
211 }
212 }
213 printf ("\n};\n");
214 }
215
216 /*
217 * Read the standard Unicode CaseFolding.txt file
218 */
219 #define SEP "; \t\n"
220
221 static int
222 parseRaw (char *line, CaseFoldRaw *raw)
223 {
224 char *tok, *end;
225 int i;
226
227 if (!isxdigit (line[0]))
228 return 0;
229 /*
230 * Get upper case value
231 */
232 tok = strtok (line, SEP);
233 if (!tok || tok[0] == '#')
234 return 0;
235 raw->upper = strtol (tok, &end, 16);
236 if (end == tok)
237 return 0;
238 /*
239 * Get class
240 */
241 tok = strtok (NULL, SEP);
242 if (!tok || tok[0] == '#')
243 return 0;
244 for (i = 0; caseFoldClassMap[i].name; i++)
245 if (!strcmp (tok, caseFoldClassMap[i].name))
246 {
247 raw->class = caseFoldClassMap[i].class;
248 break;
249 }
250 if (!caseFoldClassMap[i].name)
251 return 0;
252
253 /*
254 * Get list of result characters
255 */
256 for (i = 0; i < MAX_OUT; i++)
257 {
258 tok = strtok (NULL, SEP);
259 if (!tok || tok[0] == '#')
260 break;
261 raw->lower[i] = strtol (tok, &end, 16);
262 if (end == tok)
263 break;
264 }
265 if (i == 0)
266 return 0;
267 raw->nout = i;
268 return 1;
269 }
270
271 static int
272 caseFoldReadRaw (FILE *in, CaseFoldRaw *raw)
273 {
274 char line[MAX_LINE];
275
276 for (;;)
277 {
278 if (!fgets (line, sizeof (line) - 1, in))
279 return 0;
280 if (parseRaw (line, raw))
281 return 1;
282 }
283 }
284
285 int
286 main (int argc, char **argv)
287 {
288 FcCaseFold *fold = 0;
289 CaseFoldRaw raw;
290 int i;
291 FILE *caseFile;
292 char line[MAX_LINE];
293 int expand;
294
295 if (argc != 2)
296 panic ("usage: fc-case CaseFolding.txt");
297 caseFile = fopen (argv[1], "r");
298 if (!caseFile)
299 panic ("can't open case folding file");
300
301 while (caseFoldReadRaw (caseFile, &raw))
302 {
303 if (!minFoldChar)
304 minFoldChar = raw.upper;
305 maxFoldChar = raw.upper;
306 switch (raw.class) {
307 case CaseFoldCommon:
308 case CaseFoldFull:
309 if (raw.nout == 1)
310 {
311 if (fold && foldExtends (fold, &raw))
312 fold->count = raw.upper - fold->upper + 1;
313 else
314 {
315 fold = addFold ();
316 fold->upper = raw.upper;
317 fold->offset = raw.lower[0] - raw.upper;
318 if (fold->offset == 1)
319 fold->method = FC_CASE_FOLD_EVEN_ODD;
320 else
321 fold->method = FC_CASE_FOLD_RANGE;
322 fold->count = 1;
323 }
324 expand = utf8_size (raw.lower[0]) - utf8_size(raw.upper);
325 }
326 else
327 {
328 fold = addFold ();
329 fold->upper = raw.upper;
330 fold->method = FC_CASE_FOLD_FULL;
331 fold->offset = nfoldChars;
332 for (i = 0; i < raw.nout; i++)
333 addChar (raw.lower[i]);
334 fold->count = nfoldChars - fold->offset;
335 if (fold->count > maxFoldChars)
336 maxFoldChars = fold->count;
337 expand = fold->count - utf8_size (raw.upper);
338 }
339 if (expand > maxExpand)
340 maxExpand = expand;
341 break;
342 case CaseFoldSimple:
343 break;
344 case CaseFoldTurkic:
345 break;
346 }
347 }
348 /*
349 * Scan the input until the marker is found
350 */
351
352 while (fgets (line, sizeof (line), stdin))
353 {
354 if (!strncmp (line, "@@@", 3))
355 break;
356 fputs (line, stdout);
357 }
358
359 /*
360 * Dump these tables
361 */
362 dump ();
363
364 /*
365 * And flush out the rest of the input file
366 */
367
368 while (fgets (line, sizeof (line), stdin))
369 fputs (line, stdout);
370
371 fflush (stdout);
372 exit (ferror (stdout));
373 }