]>
Commit | Line | Data |
---|---|---|
c1382a3d | 1 | /* |
0eadb052 | 2 | * $RCSId: xc/lib/fontconfig/fc-lang/fc-lang.c,v 1.3 2002/08/22 07:36:43 keithp Exp $ |
c1382a3d | 3 | * |
46b51147 | 4 | * Copyright © 2002 Keith Packard |
c1382a3d KP |
5 | * |
6 | * Permission to use, copy, modify, distribute, and sell this software and its | |
7 | * documentation for any purpose is hereby granted without fee, provided that | |
8 | * the above copyright notice appear in all copies and that both that | |
9 | * copyright notice and this permission notice appear in supporting | |
10 | * documentation, and that the name of Keith Packard not be used in | |
11 | * advertising or publicity pertaining to distribution of the software without | |
12 | * specific, written prior permission. Keith Packard makes no | |
13 | * representations about the suitability of this software for any purpose. It | |
14 | * is provided "as is" without express or implied warranty. | |
15 | * | |
16 | * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, | |
17 | * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO | |
18 | * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR | |
19 | * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, | |
20 | * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER | |
21 | * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | |
22 | * PERFORMANCE OF THIS SOFTWARE. | |
23 | */ | |
24 | ||
25 | #include "fcint.h" | |
c647f6f1 KP |
26 | #include "fccharset.c" |
27 | #include "fcstr.c" | |
e3096d90 | 28 | #include "fcserialize.c" |
c1382a3d KP |
29 | |
30 | /* | |
31 | * fc-lang | |
32 | * | |
33 | * Read a set of language orthographies and build C declarations for | |
34 | * charsets which can then be used to identify which languages are | |
c647f6f1 KP |
35 | * supported by a given font. Note that this uses some utilities |
36 | * from the fontconfig library, so the necessary file is simply | |
37 | * included in this compilation. A couple of extra utility | |
38 | * functions are also needed in slightly modified form | |
c1382a3d KP |
39 | */ |
40 | ||
c647f6f1 KP |
41 | void |
42 | FcMemAlloc (int kind, int size) | |
43 | { | |
44 | } | |
45 | ||
46 | void | |
47 | FcMemFree (int kind, int size) | |
48 | { | |
49 | } | |
50 | ||
afe5a671 KP |
51 | FcPrivate void |
52 | FcCacheObjectReference (void *object) | |
53 | { | |
54 | } | |
55 | ||
56 | FcPrivate void | |
57 | FcCacheObjectDereference (void *object) | |
58 | { | |
59 | } | |
60 | ||
18b6857c KP |
61 | int FcDebugVal; |
62 | ||
ff3f1f98 KP |
63 | FcChar8 * |
64 | FcConfigHome (void) | |
65 | { | |
8245771d | 66 | return (FcChar8 *) getenv ("HOME"); |
ff3f1f98 KP |
67 | } |
68 | ||
c1382a3d | 69 | static void |
67accef4 | 70 | fatal (const char *file, int lineno, const char *msg) |
c1382a3d | 71 | { |
67accef4 PL |
72 | if (lineno) |
73 | fprintf (stderr, "%s:%d: %s\n", file, lineno, msg); | |
74 | else | |
c7beacf9 | 75 | fprintf (stderr, "%s: %s\n", file, msg); |
c1382a3d KP |
76 | exit (1); |
77 | } | |
78 | ||
79 | static char * | |
80 | get_line (FILE *f, char *line, int *lineno) | |
81 | { | |
82 | char *hash; | |
cf5cf4ca | 83 | int end; |
c1382a3d KP |
84 | if (!fgets (line, 1024, f)) |
85 | return 0; | |
86 | ++(*lineno); | |
87 | hash = strchr (line, '#'); | |
88 | if (hash) | |
89 | *hash = '\0'; | |
cf5cf4ca PL |
90 | |
91 | end = strlen (line); | |
92 | while (end > 0 && isspace (line[end-1])) | |
93 | line[--end] = '\0'; | |
94 | ||
c1382a3d KP |
95 | if (line[0] == '\0' || line[0] == '\n' || line[0] == '\032' || line[0] == '\r') |
96 | return get_line (f, line, lineno); | |
97 | return line; | |
98 | } | |
99 | ||
0d745819 | 100 | static char *dir = 0; |
394b2bf0 | 101 | |
6ae6acf3 | 102 | static FILE * |
394b2bf0 KP |
103 | scanopen (char *file) |
104 | { | |
105 | FILE *f; | |
106 | ||
107 | f = fopen (file, "r"); | |
108 | if (!f && dir) | |
109 | { | |
110 | char path[1024]; | |
111 | ||
112 | strcpy (path, dir); | |
113 | strcat (path, "/"); | |
114 | strcat (path, file); | |
115 | f = fopen (path, "r"); | |
116 | } | |
117 | return f; | |
118 | } | |
119 | ||
c1382a3d KP |
120 | /* |
121 | * build a single charset from a source file | |
122 | * | |
123 | * The file format is quite simple, either | |
124 | * a single hex value or a pair separated with a dash | |
125 | * | |
126 | * Comments begin with '#' | |
127 | */ | |
128 | ||
18b6857c KP |
129 | static const FcCharSet * |
130 | scan (FILE *f, char *file, FcCharSetFreezer *freezer) | |
c1382a3d | 131 | { |
18b6857c KP |
132 | FcCharSet *c = 0; |
133 | const FcCharSet *n; | |
134 | int start, end, ucs4; | |
135 | char line[1024]; | |
136 | int lineno = 0; | |
c1382a3d KP |
137 | |
138 | while (get_line (f, line, &lineno)) | |
139 | { | |
140 | if (!strncmp (line, "include", 7)) | |
141 | { | |
142 | file = strchr (line, ' '); | |
04f7d3e7 PL |
143 | if (!file) |
144 | fatal (line, lineno, | |
145 | "invalid syntax, expected: include filename"); | |
cf5cf4ca | 146 | while (isspace(*file)) |
c1382a3d | 147 | file++; |
394b2bf0 | 148 | f = scanopen (file); |
c1382a3d KP |
149 | if (!f) |
150 | fatal (file, 0, "can't open"); | |
18b6857c | 151 | n = scan (f, file, freezer); |
c1382a3d | 152 | fclose (f); |
18b6857c | 153 | return n; |
c1382a3d KP |
154 | } |
155 | if (strchr (line, '-')) | |
156 | { | |
157 | if (sscanf (line, "%x-%x", &start, &end) != 2) | |
158 | fatal (file, lineno, "parse error"); | |
159 | } | |
160 | else | |
161 | { | |
162 | if (sscanf (line, "%x", &start) != 1) | |
163 | fatal (file, lineno, "parse error"); | |
164 | end = start; | |
165 | } | |
166 | if (!c) | |
167 | c = FcCharSetCreate (); | |
168 | for (ucs4 = start; ucs4 <= end; ucs4++) | |
169 | { | |
170 | if (!FcCharSetAddChar (c, ucs4)) | |
171 | fatal (file, lineno, "out of memory"); | |
172 | } | |
173 | } | |
18b6857c | 174 | n = FcCharSetFreeze (freezer, c); |
c1382a3d KP |
175 | FcCharSetDestroy (c); |
176 | return n; | |
177 | } | |
178 | ||
179 | /* | |
180 | * Convert a file name into a name suitable for C declarations | |
181 | */ | |
182 | static char * | |
183 | get_name (char *file) | |
184 | { | |
185 | char *name; | |
186 | char *dot; | |
187 | ||
188 | dot = strchr (file, '.'); | |
189 | if (!dot) | |
190 | dot = file + strlen(file); | |
191 | name = malloc (dot - file + 1); | |
192 | strncpy (name, file, dot - file); | |
193 | name[dot-file] = '\0'; | |
194 | return name; | |
195 | } | |
196 | ||
197 | /* | |
198 | * Convert a C name into a language name | |
199 | */ | |
200 | static char * | |
201 | get_lang (char *name) | |
202 | { | |
203 | char *lang = malloc (strlen (name) + 1); | |
204 | char *l = lang; | |
205 | char c; | |
206 | ||
207 | while ((c = *name++)) | |
208 | { | |
996580dc KP |
209 | if (isupper ((int) (unsigned char) c)) |
210 | c = tolower ((int) (unsigned char) c); | |
c1382a3d KP |
211 | if (c == '_') |
212 | c = '-'; | |
213 | if (c == ' ') | |
214 | continue; | |
215 | *l++ = c; | |
216 | } | |
217 | *l++ = '\0'; | |
218 | return lang; | |
219 | } | |
220 | ||
d8d73958 KP |
221 | static int compare (const void *a, const void *b) |
222 | { | |
223 | const FcChar8 *const *as = a, *const *bs = b; | |
224 | return FcStrCmpIgnoreCase (*as, *bs); | |
225 | } | |
226 | ||
234397b4 DD |
227 | #define MAX_LANG 1024 |
228 | #define MAX_LANG_SET_MAP ((MAX_LANG + 31) / 32) | |
229 | ||
230 | #define BitSet(map, id) ((map)[(id)>>5] |= ((FcChar32) 1 << ((id) & 0x1f))) | |
231 | #define BitGet(map, id) ((map)[(id)>>5] >> ((id) & 0x1f)) & 1) | |
232 | ||
c1382a3d KP |
233 | int |
234 | main (int argc, char **argv) | |
235 | { | |
69a3fc78 | 236 | static char *files[MAX_LANG]; |
18b6857c | 237 | static const FcCharSet *sets[MAX_LANG]; |
69a3fc78 | 238 | static int duplicate[MAX_LANG]; |
69a3fc78 PL |
239 | static int country[MAX_LANG]; |
240 | static char *names[MAX_LANG]; | |
241 | static char *langs[MAX_LANG]; | |
7ce19673 | 242 | static int off[MAX_LANG]; |
c1382a3d | 243 | FILE *f; |
234397b4 | 244 | int ncountry = 0; |
c1382a3d | 245 | int i = 0; |
7ce19673 | 246 | int nsets = 0; |
67accef4 | 247 | int argi; |
cd2ec1a9 | 248 | FcCharLeaf **leaves; |
c1382a3d | 249 | int total_leaves = 0; |
7ce19673 | 250 | int l, sl, tl, tn; |
69a3fc78 PL |
251 | static char line[1024]; |
252 | static FcChar32 map[MAX_LANG_SET_MAP]; | |
234397b4 | 253 | int num_lang_set_map; |
0eadb052 KP |
254 | int setRangeStart[26]; |
255 | int setRangeEnd[26]; | |
256 | FcChar8 setRangeChar; | |
18b6857c | 257 | FcCharSetFreezer *freezer; |
c1382a3d | 258 | |
18b6857c KP |
259 | freezer = FcCharSetFreezerCreate (); |
260 | if (!freezer) | |
261 | fatal (argv[0], 0, "out of memory"); | |
67accef4 PL |
262 | argi = 1; |
263 | while (argv[argi]) | |
234397b4 | 264 | { |
67accef4 | 265 | if (!strcmp (argv[argi], "-d")) |
394b2bf0 | 266 | { |
67accef4 PL |
267 | argi++; |
268 | dir = argv[argi++]; | |
394b2bf0 KP |
269 | continue; |
270 | } | |
234397b4 | 271 | if (i == MAX_LANG) |
67accef4 PL |
272 | fatal (argv[0], 0, "Too many languages"); |
273 | files[i++] = argv[argi++]; | |
234397b4 | 274 | } |
d8d73958 KP |
275 | files[i] = 0; |
276 | qsort (files, i, sizeof (char *), compare); | |
277 | i = 0; | |
278 | while (files[i]) | |
c1382a3d | 279 | { |
394b2bf0 | 280 | f = scanopen (files[i]); |
c1382a3d | 281 | if (!f) |
d8d73958 | 282 | fatal (files[i], 0, strerror (errno)); |
18b6857c | 283 | sets[i] = scan (f, files[i], freezer); |
d8d73958 | 284 | names[i] = get_name (files[i]); |
234397b4 DD |
285 | langs[i] = get_lang(names[i]); |
286 | if (strchr (langs[i], '-')) | |
287 | country[ncountry++] = i; | |
288 | ||
c1382a3d KP |
289 | total_leaves += sets[i]->num; |
290 | i++; | |
291 | fclose (f); | |
292 | } | |
7ce19673 | 293 | nsets = i; |
c1382a3d KP |
294 | sets[i] = 0; |
295 | leaves = malloc (total_leaves * sizeof (FcCharLeaf *)); | |
296 | tl = 0; | |
297 | /* | |
298 | * Find unique leaves | |
299 | */ | |
300 | for (i = 0; sets[i]; i++) | |
301 | { | |
c1382a3d KP |
302 | for (sl = 0; sl < sets[i]->num; sl++) |
303 | { | |
304 | for (l = 0; l < tl; l++) | |
7ce19673 | 305 | if (leaves[l] == FcCharSetLeaf(sets[i], sl)) |
c1382a3d KP |
306 | break; |
307 | if (l == tl) | |
7ce19673 | 308 | leaves[tl++] = FcCharSetLeaf(sets[i], sl); |
c1382a3d KP |
309 | } |
310 | } | |
311 | ||
312 | /* | |
313 | * Scan the input until the marker is found | |
314 | */ | |
315 | ||
316 | while (fgets (line, sizeof (line), stdin)) | |
317 | { | |
318 | if (!strncmp (line, "@@@", 3)) | |
319 | break; | |
320 | fputs (line, stdout); | |
321 | } | |
322 | ||
323 | printf ("/* total size: %d unique leaves: %d */\n\n", | |
324 | total_leaves, tl); | |
2903c146 KP |
325 | |
326 | /* | |
327 | * Find duplicate charsets | |
328 | */ | |
329 | duplicate[0] = -1; | |
330 | for (i = 1; sets[i]; i++) | |
331 | { | |
332 | int j; | |
333 | ||
334 | duplicate[i] = -1; | |
335 | for (j = 0; j < i; j++) | |
336 | if (sets[j] == sets[i]) | |
337 | { | |
338 | duplicate[i] = j; | |
339 | break; | |
340 | } | |
341 | } | |
342 | ||
7ce19673 KP |
343 | tn = 0; |
344 | for (i = 0; sets[i]; i++) { | |
345 | if (duplicate[i] >= 0) | |
346 | continue; | |
347 | off[i] = tn; | |
348 | tn += sets[i]->num; | |
349 | } | |
350 | ||
351 | printf ("#define LEAF0 (%d * sizeof (FcLangCharSet))\n", nsets); | |
352 | printf ("#define OFF0 (LEAF0 + %d * sizeof (FcCharLeaf))\n", tl); | |
353 | printf ("#define NUM0 (OFF0 + %d * sizeof (intptr_t))\n", tn); | |
354 | printf ("#define SET(n) (n * sizeof (FcLangCharSet) + offsetof (FcLangCharSet, charset))\n"); | |
355 | printf ("#define OFF(s,o) (OFF0 + o * sizeof (intptr_t) - SET(s))\n"); | |
356 | printf ("#define NUM(s,n) (NUM0 + n * sizeof (FcChar16) - SET(s))\n"); | |
357 | printf ("#define LEAF(o,l) (LEAF0 + l * sizeof (FcCharLeaf) - (OFF0 + o * sizeof (intptr_t)))\n"); | |
358 | printf ("#define fcLangCharSets (fcLangData.langCharSets)\n"); | |
359 | printf ("\n"); | |
360 | ||
361 | printf ("static const struct {\n" | |
362 | " FcLangCharSet langCharSets[%d];\n" | |
363 | " FcCharLeaf leaves[%d];\n" | |
364 | " intptr_t leaf_offsets[%d];\n" | |
365 | " FcChar16 numbers[%d];\n" | |
366 | "} fcLangData = {\n", | |
367 | nsets, tl, tn, tn); | |
368 | ||
0eadb052 | 369 | /* |
7ce19673 | 370 | * Dump sets |
0eadb052 | 371 | */ |
7ce19673 KP |
372 | |
373 | printf ("{\n"); | |
0eadb052 KP |
374 | for (i = 0; sets[i]; i++) |
375 | { | |
7ce19673 KP |
376 | int j = duplicate[i]; |
377 | ||
378 | if (j < 0) | |
379 | j = i; | |
380 | ||
381 | printf (" { (FcChar8 *) \"%s\", " | |
382 | " { FC_REF_CONSTANT, %d, OFF(%d,%d), NUM(%d,%d) } }, /* %d */\n", | |
383 | langs[i], | |
384 | sets[j]->num, i, off[j], i, off[j], i); | |
0eadb052 | 385 | } |
7ce19673 | 386 | printf ("},\n"); |
0eadb052 | 387 | |
c1382a3d | 388 | /* |
7ce19673 | 389 | * Dump leaves |
c1382a3d | 390 | */ |
7ce19673 KP |
391 | printf ("{\n"); |
392 | for (l = 0; l < tl; l++) | |
c1382a3d | 393 | { |
7ce19673 KP |
394 | printf (" { { /* %d */", l); |
395 | for (i = 0; i < 256/32; i++) | |
82f35f8b | 396 | { |
7ce19673 KP |
397 | if (i % 4 == 0) |
398 | printf ("\n "); | |
399 | printf (" 0x%08x,", leaves[l]->map[i]); | |
82f35f8b | 400 | } |
7ce19673 | 401 | printf ("\n } },\n"); |
82f35f8b | 402 | } |
7ce19673 | 403 | printf ("},\n"); |
82f35f8b | 404 | |
7ce19673 KP |
405 | /* |
406 | * Dump leaves | |
407 | */ | |
408 | printf ("{\n"); | |
82f35f8b PL |
409 | for (i = 0; sets[i]; i++) |
410 | { | |
411 | int n; | |
412 | ||
413 | if (duplicate[i] >= 0) | |
414 | continue; | |
7ce19673 | 415 | printf (" /* %s */\n", names[i]); |
c1382a3d KP |
416 | for (n = 0; n < sets[i]->num; n++) |
417 | { | |
7ce19673 | 418 | if (n % 4 == 0) |
c1382a3d KP |
419 | printf (" "); |
420 | for (l = 0; l < tl; l++) | |
7ce19673 | 421 | if (leaves[l] == FcCharSetLeaf(sets[i], n)) |
c1382a3d KP |
422 | break; |
423 | if (l == tl) | |
424 | fatal (names[i], 0, "can't find leaf"); | |
7ce19673 KP |
425 | printf (" LEAF(%3d,%3d),", off[i], l); |
426 | if (n % 4 == 3) | |
c1382a3d KP |
427 | printf ("\n"); |
428 | } | |
7ce19673 | 429 | if (n % 4 != 0) |
c1382a3d | 430 | printf ("\n"); |
82f35f8b | 431 | } |
7ce19673 KP |
432 | printf ("},\n"); |
433 | ||
82f35f8b | 434 | |
7ce19673 | 435 | printf ("{\n"); |
82f35f8b PL |
436 | for (i = 0; sets[i]; i++) |
437 | { | |
438 | int n; | |
7ce19673 | 439 | |
a151aced PL |
440 | if (duplicate[i] >= 0) |
441 | continue; | |
7ce19673 | 442 | printf (" /* %s */\n", names[i]); |
c1382a3d KP |
443 | for (n = 0; n < sets[i]->num; n++) |
444 | { | |
445 | if (n % 8 == 0) | |
446 | printf (" "); | |
7ce19673 | 447 | printf (" 0x%04x,", FcCharSetNumbers (sets[i])[n]); |
c1382a3d KP |
448 | if (n % 8 == 7) |
449 | printf ("\n"); | |
450 | } | |
451 | if (n % 8 != 0) | |
452 | printf ("\n"); | |
c1382a3d | 453 | } |
7ce19673 | 454 | printf ("}\n"); |
0eadb052 | 455 | |
c1382a3d | 456 | printf ("};\n\n"); |
7ce19673 | 457 | |
234397b4 DD |
458 | printf ("#define NUM_LANG_CHAR_SET %d\n", i); |
459 | num_lang_set_map = (i + 31) / 32; | |
460 | printf ("#define NUM_LANG_SET_MAP %d\n", num_lang_set_map); | |
461 | /* | |
462 | * Dump indices with country codes | |
463 | */ | |
464 | if (ncountry) | |
465 | { | |
0d745819 | 466 | int c; |
234397b4 DD |
467 | int ncountry_ent = 0; |
468 | printf ("\n"); | |
469 | printf ("static const FcChar32 fcLangCountrySets[][NUM_LANG_SET_MAP] = {\n"); | |
470 | for (c = 0; c < ncountry; c++) | |
471 | { | |
472 | i = country[c]; | |
473 | if (i >= 0) | |
474 | { | |
0d745819 | 475 | int lang = strchr (langs[i], '-') - langs[i]; |
234397b4 DD |
476 | int d, k; |
477 | ||
478 | for (k = 0; k < num_lang_set_map; k++) | |
479 | map[k] = 0; | |
480 | ||
481 | BitSet (map, i); | |
482 | for (d = c + 1; d < ncountry; d++) | |
483 | { | |
484 | int j = country[d]; | |
485 | if (j >= 0 && !strncmp (langs[j], langs[i], l)) | |
486 | { | |
487 | BitSet(map, j); | |
488 | country[d] = -1; | |
489 | } | |
490 | } | |
491 | printf (" {"); | |
492 | for (k = 0; k < num_lang_set_map; k++) | |
493 | printf (" 0x%08x,", map[k]); | |
494 | printf (" }, /* %*.*s */\n", | |
0d745819 | 495 | lang, lang, langs[i]); |
234397b4 DD |
496 | ++ncountry_ent; |
497 | } | |
498 | } | |
499 | printf ("};\n\n"); | |
500 | printf ("#define NUM_COUNTRY_SET %d\n", ncountry_ent); | |
501 | } | |
502 | ||
0eadb052 | 503 | |
7ce19673 KP |
504 | /* |
505 | * Find ranges for each letter for faster searching | |
506 | */ | |
507 | setRangeChar = 'a'; | |
508 | memset(setRangeStart, '\0', sizeof (setRangeStart)); | |
509 | memset(setRangeEnd, '\0', sizeof (setRangeEnd)); | |
510 | for (i = 0; sets[i]; i++) | |
511 | { | |
512 | char c = names[i][0]; | |
513 | ||
514 | while (setRangeChar <= c && c <= 'z') | |
515 | setRangeStart[setRangeChar++ - 'a'] = i; | |
516 | } | |
517 | for (setRangeChar = 'a'; setRangeChar < 'z'; setRangeChar++) | |
518 | setRangeEnd[setRangeChar - 'a'] = setRangeStart[setRangeChar+1-'a'] - 1; | |
519 | setRangeEnd[setRangeChar - 'a'] = i - 1; | |
520 | ||
0eadb052 KP |
521 | /* |
522 | * Dump sets start/finish for the fastpath | |
523 | */ | |
524 | printf ("static const FcLangCharSetRange fcLangCharSetRanges[] = {\n"); | |
525 | for (setRangeChar = 'a'; setRangeChar <= 'z' ; setRangeChar++) | |
526 | { | |
527 | printf (" { %d, %d }, /* %c */\n", | |
528 | setRangeStart[setRangeChar - 'a'], | |
529 | setRangeEnd[setRangeChar - 'a'], setRangeChar); | |
530 | } | |
531 | printf ("};\n\n"); | |
532 | ||
c1382a3d KP |
533 | while (fgets (line, sizeof (line), stdin)) |
534 | fputs (line, stdout); | |
535 | ||
536 | fflush (stdout); | |
537 | exit (ferror (stdout)); | |
538 | } |