]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * fontconfig/fc-lang/fc-lang.c | |
3 | * | |
4 | * Copyright © 2002 Keith Packard | |
5 | * | |
6 | * Permission to use, copy, modify, distribute, and sell this software and its | |
7 | * documentation for any purpose is hereby granted without fee, provided that | |
8 | * the above copyright notice appear in all copies and that both that | |
9 | * copyright notice and this permission notice appear in supporting | |
10 | * documentation, and that the name of Keith Packard not be used in | |
11 | * advertising or publicity pertaining to distribution of the software without | |
12 | * specific, written prior permission. Keith Packard makes no | |
13 | * representations about the suitability of this software for any purpose. It | |
14 | * is provided "as is" without express or implied warranty. | |
15 | * | |
16 | * THE AUTHOR(S) DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, | |
17 | * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO | |
18 | * EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY SPECIAL, INDIRECT OR | |
19 | * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, | |
20 | * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER | |
21 | * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | |
22 | * PERFORMANCE OF THIS SOFTWARE. | |
23 | */ | |
24 | ||
25 | #include "fccharset.c" | |
26 | #include "fcstr.c" | |
27 | #include "fcserialize.c" | |
28 | ||
29 | /* | |
30 | * fc-lang | |
31 | * | |
32 | * Read a set of language orthographies and build C declarations for | |
33 | * charsets which can then be used to identify which languages are | |
34 | * supported by a given font. Note that this uses some utilities | |
35 | * from the fontconfig library, so the necessary file is simply | |
36 | * included in this compilation. A couple of extra utility | |
37 | * functions are also needed in slightly modified form | |
38 | */ | |
39 | ||
40 | void | |
41 | FcMemAlloc (int kind, int size) | |
42 | { | |
43 | } | |
44 | ||
45 | void | |
46 | FcMemFree (int kind, int size) | |
47 | { | |
48 | } | |
49 | ||
50 | FcPrivate void | |
51 | FcCacheObjectReference (void *object) | |
52 | { | |
53 | } | |
54 | ||
55 | FcPrivate void | |
56 | FcCacheObjectDereference (void *object) | |
57 | { | |
58 | } | |
59 | ||
60 | int FcDebugVal; | |
61 | ||
62 | FcChar8 * | |
63 | FcConfigHome (void) | |
64 | { | |
65 | return (FcChar8 *) getenv ("HOME"); | |
66 | } | |
67 | ||
68 | static void | |
69 | fatal (const char *file, int lineno, const char *msg) | |
70 | { | |
71 | if (lineno) | |
72 | fprintf (stderr, "%s:%d: %s\n", file, lineno, msg); | |
73 | else | |
74 | fprintf (stderr, "%s: %s\n", file, msg); | |
75 | exit (1); | |
76 | } | |
77 | ||
78 | static char * | |
79 | get_line (FILE *f, char *buf, int *lineno) | |
80 | { | |
81 | char *hash; | |
82 | char *line; | |
83 | int end; | |
84 | ||
85 | next: | |
86 | line = buf; | |
87 | if (!fgets (line, 1024, f)) | |
88 | return 0; | |
89 | ++(*lineno); | |
90 | hash = strchr (line, '#'); | |
91 | if (hash) | |
92 | *hash = '\0'; | |
93 | ||
94 | while (line[0] && isspace (line[0])) | |
95 | line++; | |
96 | end = strlen (line); | |
97 | while (end > 0 && isspace (line[end-1])) | |
98 | line[--end] = '\0'; | |
99 | ||
100 | if (line[0] == '\0' || line[0] == '\n' || line[0] == '\r') | |
101 | goto next; | |
102 | ||
103 | return line; | |
104 | } | |
105 | ||
106 | static char *dir = 0; | |
107 | ||
108 | static FILE * | |
109 | scanopen (char *file) | |
110 | { | |
111 | FILE *f; | |
112 | ||
113 | f = fopen (file, "r"); | |
114 | if (!f && dir) | |
115 | { | |
116 | char path[1024]; | |
117 | ||
118 | strcpy (path, dir); | |
119 | strcat (path, "/"); | |
120 | strcat (path, file); | |
121 | f = fopen (path, "r"); | |
122 | } | |
123 | return f; | |
124 | } | |
125 | ||
126 | /* | |
127 | * build a single charset from a source file | |
128 | * | |
129 | * The file format is quite simple, either | |
130 | * a single hex value or a pair separated with a dash | |
131 | * | |
132 | * Comments begin with '#' | |
133 | */ | |
134 | ||
135 | static FcCharSet * | |
136 | scan (FILE *f, char *file, FcCharSetFreezer *freezer) | |
137 | { | |
138 | FcCharSet *c = 0; | |
139 | FcCharSet *n; | |
140 | FcBool del; | |
141 | int start, end, ucs4; | |
142 | char buf[1024]; | |
143 | char *line; | |
144 | int lineno = 0; | |
145 | ||
146 | while ((line = get_line (f, buf, &lineno))) | |
147 | { | |
148 | if (!strncmp (line, "include", 7)) | |
149 | { | |
150 | FILE *included_f; | |
151 | char *included_file; | |
152 | included_file = strchr (line, ' '); | |
153 | if (!included_file) | |
154 | fatal (file, lineno, | |
155 | "invalid syntax, expected: include filename"); | |
156 | while (isspace(*included_file)) | |
157 | included_file++; | |
158 | included_f = scanopen (included_file); | |
159 | if (!included_f) | |
160 | fatal (included_file, 0, "can't open"); | |
161 | n = scan (included_f, included_file, freezer); | |
162 | fclose (included_f); | |
163 | if (!c) | |
164 | c = FcCharSetCreate (); | |
165 | if (!FcCharSetMerge (c, n, NULL)) | |
166 | fatal (file, lineno, "out of memory"); | |
167 | FcCharSetDestroy (n); | |
168 | continue; | |
169 | } | |
170 | del = FcFalse; | |
171 | if (line[0] == '-') | |
172 | { | |
173 | del = FcTrue; | |
174 | line++; | |
175 | } | |
176 | if (strchr (line, '-')) | |
177 | { | |
178 | if (sscanf (line, "%x-%x", &start, &end) != 2) | |
179 | fatal (file, lineno, "parse error"); | |
180 | } | |
181 | else | |
182 | { | |
183 | if (sscanf (line, "%x", &start) != 1) | |
184 | fatal (file, lineno, "parse error"); | |
185 | end = start; | |
186 | } | |
187 | if (!c) | |
188 | c = FcCharSetCreate (); | |
189 | for (ucs4 = start; ucs4 <= end; ucs4++) | |
190 | { | |
191 | if (!((del ? FcCharSetDelChar : FcCharSetAddChar) (c, ucs4))) | |
192 | fatal (file, lineno, "out of memory"); | |
193 | } | |
194 | } | |
195 | n = FcCharSetFreeze (freezer, c); | |
196 | FcCharSetDestroy (c); | |
197 | return n; | |
198 | } | |
199 | ||
200 | /* | |
201 | * Convert a file name into a name suitable for C declarations | |
202 | */ | |
203 | static char * | |
204 | get_name (char *file) | |
205 | { | |
206 | char *name; | |
207 | char *dot; | |
208 | ||
209 | dot = strchr (file, '.'); | |
210 | if (!dot) | |
211 | dot = file + strlen(file); | |
212 | name = malloc (dot - file + 1); | |
213 | strncpy (name, file, dot - file); | |
214 | name[dot-file] = '\0'; | |
215 | return name; | |
216 | } | |
217 | ||
218 | /* | |
219 | * Convert a C name into a language name | |
220 | */ | |
221 | static char * | |
222 | get_lang (char *name) | |
223 | { | |
224 | char *lang = malloc (strlen (name) + 1); | |
225 | char *l = lang; | |
226 | char c; | |
227 | ||
228 | while ((c = *name++)) | |
229 | { | |
230 | if (isupper ((int) (unsigned char) c)) | |
231 | c = tolower ((int) (unsigned char) c); | |
232 | if (c == '_') | |
233 | c = '-'; | |
234 | if (c == ' ') | |
235 | continue; | |
236 | *l++ = c; | |
237 | } | |
238 | *l++ = '\0'; | |
239 | return lang; | |
240 | } | |
241 | ||
242 | typedef struct _Entry { | |
243 | int id; | |
244 | char *file; | |
245 | } Entry; | |
246 | ||
247 | static int compare (const void *a, const void *b) | |
248 | { | |
249 | const Entry *as = a, *bs = b; | |
250 | return FcStrCmpIgnoreCase ((const FcChar8 *) as->file, (const FcChar8 *) bs->file); | |
251 | } | |
252 | ||
253 | #define MAX_LANG 1024 | |
254 | #define MAX_LANG_SET_MAP ((MAX_LANG + 31) / 32) | |
255 | ||
256 | #define BitSet(map, i) ((map)[(entries[i].id)>>5] |= ((FcChar32) 1 << ((entries[i].id) & 0x1f))) | |
257 | #define BitGet(map, i) ((map)[(entries[i].id)>>5] >> ((entries[i].id) & 0x1f)) & 1) | |
258 | ||
259 | int | |
260 | main (int argc, char **argv) | |
261 | { | |
262 | static Entry entries[MAX_LANG]; | |
263 | static FcCharSet *sets[MAX_LANG]; | |
264 | static int duplicate[MAX_LANG]; | |
265 | static int country[MAX_LANG]; | |
266 | static char *names[MAX_LANG]; | |
267 | static char *langs[MAX_LANG]; | |
268 | static int off[MAX_LANG]; | |
269 | FILE *f; | |
270 | int ncountry = 0; | |
271 | int i = 0; | |
272 | int nsets = 0; | |
273 | int argi; | |
274 | FcCharLeaf **leaves; | |
275 | int total_leaves = 0; | |
276 | int l, sl, tl, tn; | |
277 | static char line[1024]; | |
278 | static FcChar32 map[MAX_LANG_SET_MAP]; | |
279 | int num_lang_set_map; | |
280 | int setRangeStart[26]; | |
281 | int setRangeEnd[26]; | |
282 | FcChar8 setRangeChar; | |
283 | FcCharSetFreezer *freezer; | |
284 | ||
285 | freezer = FcCharSetFreezerCreate (); | |
286 | if (!freezer) | |
287 | fatal (argv[0], 0, "out of memory"); | |
288 | argi = 1; | |
289 | while (argv[argi]) | |
290 | { | |
291 | if (!strcmp (argv[argi], "-d")) | |
292 | { | |
293 | argi++; | |
294 | dir = argv[argi++]; | |
295 | continue; | |
296 | } | |
297 | if (i == MAX_LANG) | |
298 | fatal (argv[0], 0, "Too many languages"); | |
299 | entries[i].id = i; | |
300 | entries[i].file = argv[argi++]; | |
301 | i++; | |
302 | } | |
303 | entries[i].file = 0; | |
304 | qsort (entries, i, sizeof (Entry), compare); | |
305 | i = 0; | |
306 | while (entries[i].file) | |
307 | { | |
308 | f = scanopen (entries[i].file); | |
309 | if (!f) | |
310 | fatal (entries[i].file, 0, strerror (errno)); | |
311 | sets[i] = scan (f, entries[i].file, freezer); | |
312 | names[i] = get_name (entries[i].file); | |
313 | langs[i] = get_lang(names[i]); | |
314 | if (strchr (langs[i], '-')) | |
315 | country[ncountry++] = i; | |
316 | ||
317 | total_leaves += sets[i]->num; | |
318 | i++; | |
319 | fclose (f); | |
320 | } | |
321 | nsets = i; | |
322 | sets[i] = 0; | |
323 | leaves = malloc (total_leaves * sizeof (FcCharLeaf *)); | |
324 | tl = 0; | |
325 | /* | |
326 | * Find unique leaves | |
327 | */ | |
328 | for (i = 0; sets[i]; i++) | |
329 | { | |
330 | for (sl = 0; sl < sets[i]->num; sl++) | |
331 | { | |
332 | for (l = 0; l < tl; l++) | |
333 | if (leaves[l] == FcCharSetLeaf(sets[i], sl)) | |
334 | break; | |
335 | if (l == tl) | |
336 | leaves[tl++] = FcCharSetLeaf(sets[i], sl); | |
337 | } | |
338 | } | |
339 | ||
340 | /* | |
341 | * Scan the input until the marker is found | |
342 | */ | |
343 | ||
344 | while (fgets (line, sizeof (line), stdin)) | |
345 | { | |
346 | if (!strncmp (line, "@@@", 3)) | |
347 | break; | |
348 | fputs (line, stdout); | |
349 | } | |
350 | ||
351 | printf ("/* total size: %d unique leaves: %d */\n\n", | |
352 | total_leaves, tl); | |
353 | ||
354 | /* | |
355 | * Find duplicate charsets | |
356 | */ | |
357 | duplicate[0] = -1; | |
358 | for (i = 1; sets[i]; i++) | |
359 | { | |
360 | int j; | |
361 | ||
362 | duplicate[i] = -1; | |
363 | for (j = 0; j < i; j++) | |
364 | if (sets[j] == sets[i]) | |
365 | { | |
366 | duplicate[i] = j; | |
367 | break; | |
368 | } | |
369 | } | |
370 | ||
371 | tn = 0; | |
372 | for (i = 0; sets[i]; i++) { | |
373 | if (duplicate[i] >= 0) | |
374 | continue; | |
375 | off[i] = tn; | |
376 | tn += sets[i]->num; | |
377 | } | |
378 | ||
379 | printf ("#define LEAF0 (%d * sizeof (FcLangCharSet))\n", nsets); | |
380 | printf ("#define OFF0 (LEAF0 + %d * sizeof (FcCharLeaf))\n", tl); | |
381 | printf ("#define NUM0 (OFF0 + %d * sizeof (uintptr_t))\n", tn); | |
382 | printf ("#define SET(n) (n * sizeof (FcLangCharSet) + offsetof (FcLangCharSet, charset))\n"); | |
383 | printf ("#define OFF(s,o) (OFF0 + o * sizeof (uintptr_t) - SET(s))\n"); | |
384 | printf ("#define NUM(s,n) (NUM0 + n * sizeof (FcChar16) - SET(s))\n"); | |
385 | printf ("#define LEAF(o,l) (LEAF0 + l * sizeof (FcCharLeaf) - (OFF0 + o * sizeof (intptr_t)))\n"); | |
386 | printf ("#define fcLangCharSets (fcLangData.langCharSets)\n"); | |
387 | printf ("#define fcLangCharSetIndices (fcLangData.langIndices)\n"); | |
388 | printf ("#define fcLangCharSetIndicesInv (fcLangData.langIndicesInv)\n"); | |
389 | printf ("\n"); | |
390 | ||
391 | printf ("static const struct {\n" | |
392 | " FcLangCharSet langCharSets[%d];\n" | |
393 | " FcCharLeaf leaves[%d];\n" | |
394 | " uintptr_t leaf_offsets[%d];\n" | |
395 | " FcChar16 numbers[%d];\n" | |
396 | " FcChar%s langIndices[%d];\n" | |
397 | " FcChar%s langIndicesInv[%d];\n" | |
398 | "} fcLangData = {\n", | |
399 | nsets, tl, tn, tn, | |
400 | nsets < 256 ? "8 " : "16", nsets, nsets < 256 ? "8 " : "16", nsets); | |
401 | ||
402 | /* | |
403 | * Dump sets | |
404 | */ | |
405 | ||
406 | printf ("{\n"); | |
407 | for (i = 0; sets[i]; i++) | |
408 | { | |
409 | int j = duplicate[i]; | |
410 | ||
411 | if (j < 0) | |
412 | j = i; | |
413 | ||
414 | printf (" { \"%s\", " | |
415 | " { FC_REF_CONSTANT, %d, OFF(%d,%d), NUM(%d,%d) } }, /* %d */\n", | |
416 | langs[i], | |
417 | sets[j]->num, i, off[j], i, off[j], i); | |
418 | } | |
419 | printf ("},\n"); | |
420 | ||
421 | /* | |
422 | * Dump leaves | |
423 | */ | |
424 | printf ("{\n"); | |
425 | for (l = 0; l < tl; l++) | |
426 | { | |
427 | printf (" { { /* %d */", l); | |
428 | for (i = 0; i < 256/32; i++) | |
429 | { | |
430 | if (i % 4 == 0) | |
431 | printf ("\n "); | |
432 | printf (" 0x%08x,", leaves[l]->map[i]); | |
433 | } | |
434 | printf ("\n } },\n"); | |
435 | } | |
436 | printf ("},\n"); | |
437 | ||
438 | /* | |
439 | * Dump leaves | |
440 | */ | |
441 | printf ("{\n"); | |
442 | for (i = 0; sets[i]; i++) | |
443 | { | |
444 | int n; | |
445 | ||
446 | if (duplicate[i] >= 0) | |
447 | continue; | |
448 | printf (" /* %s */\n", names[i]); | |
449 | for (n = 0; n < sets[i]->num; n++) | |
450 | { | |
451 | if (n % 4 == 0) | |
452 | printf (" "); | |
453 | for (l = 0; l < tl; l++) | |
454 | if (leaves[l] == FcCharSetLeaf(sets[i], n)) | |
455 | break; | |
456 | if (l == tl) | |
457 | fatal (names[i], 0, "can't find leaf"); | |
458 | printf (" LEAF(%3d,%3d),", off[i], l); | |
459 | if (n % 4 == 3) | |
460 | printf ("\n"); | |
461 | } | |
462 | if (n % 4 != 0) | |
463 | printf ("\n"); | |
464 | } | |
465 | printf ("},\n"); | |
466 | ||
467 | ||
468 | printf ("{\n"); | |
469 | for (i = 0; sets[i]; i++) | |
470 | { | |
471 | int n; | |
472 | ||
473 | if (duplicate[i] >= 0) | |
474 | continue; | |
475 | printf (" /* %s */\n", names[i]); | |
476 | for (n = 0; n < sets[i]->num; n++) | |
477 | { | |
478 | if (n % 8 == 0) | |
479 | printf (" "); | |
480 | printf (" 0x%04x,", FcCharSetNumbers (sets[i])[n]); | |
481 | if (n % 8 == 7) | |
482 | printf ("\n"); | |
483 | } | |
484 | if (n % 8 != 0) | |
485 | printf ("\n"); | |
486 | } | |
487 | printf ("},\n"); | |
488 | ||
489 | /* langIndices */ | |
490 | printf ("{\n"); | |
491 | for (i = 0; sets[i]; i++) | |
492 | { | |
493 | printf (" %d, /* %s */\n", entries[i].id, names[i]); | |
494 | } | |
495 | printf ("},\n"); | |
496 | ||
497 | /* langIndicesInv */ | |
498 | printf ("{\n"); | |
499 | { | |
500 | static int entries_inv[MAX_LANG]; | |
501 | for (i = 0; sets[i]; i++) | |
502 | entries_inv[entries[i].id] = i; | |
503 | for (i = 0; sets[i]; i++) | |
504 | printf (" %d, /* %s */\n", entries_inv[i], names[entries_inv[i]]); | |
505 | } | |
506 | printf ("}\n"); | |
507 | ||
508 | printf ("};\n\n"); | |
509 | ||
510 | printf ("#define NUM_LANG_CHAR_SET %d\n", i); | |
511 | num_lang_set_map = (i + 31) / 32; | |
512 | printf ("#define NUM_LANG_SET_MAP %d\n", num_lang_set_map); | |
513 | /* | |
514 | * Dump indices with country codes | |
515 | */ | |
516 | if (ncountry) | |
517 | { | |
518 | int c; | |
519 | int ncountry_ent = 0; | |
520 | printf ("\n"); | |
521 | printf ("static const FcChar32 fcLangCountrySets[][NUM_LANG_SET_MAP] = {\n"); | |
522 | for (c = 0; c < ncountry; c++) | |
523 | { | |
524 | i = country[c]; | |
525 | if (i >= 0) | |
526 | { | |
527 | int lang = strchr (langs[i], '-') - langs[i]; | |
528 | int d, k; | |
529 | ||
530 | for (k = 0; k < num_lang_set_map; k++) | |
531 | map[k] = 0; | |
532 | ||
533 | BitSet (map, i); | |
534 | for (d = c + 1; d < ncountry; d++) | |
535 | { | |
536 | int j = country[d]; | |
537 | if (j >= 0 && !strncmp (langs[j], langs[i], lang + 1)) | |
538 | { | |
539 | BitSet(map, j); | |
540 | country[d] = -1; | |
541 | } | |
542 | } | |
543 | printf (" {"); | |
544 | for (k = 0; k < num_lang_set_map; k++) | |
545 | printf (" 0x%08x,", map[k]); | |
546 | printf (" }, /* %*.*s */\n", | |
547 | lang, lang, langs[i]); | |
548 | ++ncountry_ent; | |
549 | } | |
550 | } | |
551 | printf ("};\n\n"); | |
552 | printf ("#define NUM_COUNTRY_SET %d\n", ncountry_ent); | |
553 | } | |
554 | ||
555 | ||
556 | /* | |
557 | * Find ranges for each letter for faster searching | |
558 | */ | |
559 | setRangeChar = 'a'; | |
560 | memset(setRangeStart, '\0', sizeof (setRangeStart)); | |
561 | memset(setRangeEnd, '\0', sizeof (setRangeEnd)); | |
562 | for (i = 0; sets[i]; i++) | |
563 | { | |
564 | char c = names[i][0]; | |
565 | ||
566 | while (setRangeChar <= c && c <= 'z') | |
567 | setRangeStart[setRangeChar++ - 'a'] = i; | |
568 | } | |
569 | for (setRangeChar = 'a'; setRangeChar < 'z'; setRangeChar++) | |
570 | setRangeEnd[setRangeChar - 'a'] = setRangeStart[setRangeChar+1-'a'] - 1; | |
571 | setRangeEnd[setRangeChar - 'a'] = i - 1; | |
572 | ||
573 | /* | |
574 | * Dump sets start/finish for the fastpath | |
575 | */ | |
576 | printf ("\n"); | |
577 | printf ("static const FcLangCharSetRange fcLangCharSetRanges[] = {\n"); | |
578 | printf ("\n"); | |
579 | for (setRangeChar = 'a'; setRangeChar <= 'z' ; setRangeChar++) | |
580 | { | |
581 | printf (" { %d, %d }, /* %c */\n", | |
582 | setRangeStart[setRangeChar - 'a'], | |
583 | setRangeEnd[setRangeChar - 'a'], setRangeChar); | |
584 | } | |
585 | printf ("};\n\n"); | |
586 | ||
587 | while (fgets (line, sizeof (line), stdin)) | |
588 | fputs (line, stdout); | |
589 | ||
590 | fflush (stdout); | |
591 | exit (ferror (stdout)); | |
592 | } |