]>
Commit | Line | Data |
---|---|---|
c1382a3d | 1 | /* |
0eadb052 | 2 | * $RCSId: xc/lib/fontconfig/fc-lang/fc-lang.c,v 1.3 2002/08/22 07:36:43 keithp Exp $ |
c1382a3d | 3 | * |
46b51147 | 4 | * Copyright © 2002 Keith Packard |
c1382a3d KP |
5 | * |
6 | * Permission to use, copy, modify, distribute, and sell this software and its | |
7 | * documentation for any purpose is hereby granted without fee, provided that | |
8 | * the above copyright notice appear in all copies and that both that | |
9 | * copyright notice and this permission notice appear in supporting | |
10 | * documentation, and that the name of Keith Packard not be used in | |
11 | * advertising or publicity pertaining to distribution of the software without | |
12 | * specific, written prior permission. Keith Packard makes no | |
13 | * representations about the suitability of this software for any purpose. It | |
14 | * is provided "as is" without express or implied warranty. | |
15 | * | |
16 | * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, | |
17 | * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO | |
18 | * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR | |
19 | * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, | |
20 | * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER | |
21 | * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | |
22 | * PERFORMANCE OF THIS SOFTWARE. | |
23 | */ | |
24 | ||
25 | #include "fcint.h" | |
c647f6f1 KP |
26 | #include "fccharset.c" |
27 | #include "fcstr.c" | |
c1382a3d KP |
28 | |
29 | /* | |
30 | * fc-lang | |
31 | * | |
32 | * Read a set of language orthographies and build C declarations for | |
33 | * charsets which can then be used to identify which languages are | |
c647f6f1 KP |
34 | * supported by a given font. Note that this uses some utilities |
35 | * from the fontconfig library, so the necessary file is simply | |
36 | * included in this compilation. A couple of extra utility | |
37 | * functions are also needed in slightly modified form | |
c1382a3d KP |
38 | */ |
39 | ||
c647f6f1 KP |
40 | void |
41 | FcMemAlloc (int kind, int size) | |
42 | { | |
43 | } | |
44 | ||
45 | void | |
46 | FcMemFree (int kind, int size) | |
47 | { | |
48 | } | |
49 | ||
212c9f43 PL |
50 | int |
51 | FcCacheNextOffset (int fd) | |
52 | { | |
53 | return -1; | |
54 | } | |
55 | ||
ff3f1f98 KP |
56 | FcChar8 * |
57 | FcConfigHome (void) | |
58 | { | |
59 | return getenv ("HOME"); | |
60 | } | |
61 | ||
c1382a3d KP |
62 | static void |
63 | fatal (char *file, int lineno, char *msg) | |
64 | { | |
65 | fprintf (stderr, "%s:%d: %s\n", file, lineno, msg); | |
66 | exit (1); | |
67 | } | |
68 | ||
69 | static char * | |
70 | get_line (FILE *f, char *line, int *lineno) | |
71 | { | |
72 | char *hash; | |
73 | if (!fgets (line, 1024, f)) | |
74 | return 0; | |
75 | ++(*lineno); | |
76 | hash = strchr (line, '#'); | |
77 | if (hash) | |
78 | *hash = '\0'; | |
79 | if (line[0] == '\0' || line[0] == '\n' || line[0] == '\032' || line[0] == '\r') | |
80 | return get_line (f, line, lineno); | |
81 | return line; | |
82 | } | |
83 | ||
394b2bf0 KP |
84 | char *dir = 0; |
85 | ||
6ae6acf3 | 86 | static FILE * |
394b2bf0 KP |
87 | scanopen (char *file) |
88 | { | |
89 | FILE *f; | |
90 | ||
91 | f = fopen (file, "r"); | |
92 | if (!f && dir) | |
93 | { | |
94 | char path[1024]; | |
95 | ||
96 | strcpy (path, dir); | |
97 | strcat (path, "/"); | |
98 | strcat (path, file); | |
99 | f = fopen (path, "r"); | |
100 | } | |
101 | return f; | |
102 | } | |
103 | ||
c1382a3d KP |
104 | /* |
105 | * build a single charset from a source file | |
106 | * | |
107 | * The file format is quite simple, either | |
108 | * a single hex value or a pair separated with a dash | |
109 | * | |
110 | * Comments begin with '#' | |
111 | */ | |
112 | ||
113 | static FcCharSet * | |
114 | scan (FILE *f, char *file) | |
115 | { | |
116 | FcCharSet *c = 0; | |
117 | FcCharSet *n; | |
118 | int start, end, ucs4; | |
119 | char line[1024]; | |
120 | int lineno = 0; | |
121 | ||
122 | while (get_line (f, line, &lineno)) | |
123 | { | |
124 | if (!strncmp (line, "include", 7)) | |
125 | { | |
126 | file = strchr (line, ' '); | |
127 | while (*file == ' ') | |
128 | file++; | |
129 | end = strlen (file); | |
130 | if (file[end-1] == '\n') | |
131 | file[end-1] = '\0'; | |
394b2bf0 | 132 | f = scanopen (file); |
c1382a3d KP |
133 | if (!f) |
134 | fatal (file, 0, "can't open"); | |
135 | c = scan (f, file); | |
136 | fclose (f); | |
137 | return c; | |
138 | } | |
139 | if (strchr (line, '-')) | |
140 | { | |
141 | if (sscanf (line, "%x-%x", &start, &end) != 2) | |
142 | fatal (file, lineno, "parse error"); | |
143 | } | |
144 | else | |
145 | { | |
146 | if (sscanf (line, "%x", &start) != 1) | |
147 | fatal (file, lineno, "parse error"); | |
148 | end = start; | |
149 | } | |
150 | if (!c) | |
151 | c = FcCharSetCreate (); | |
152 | for (ucs4 = start; ucs4 <= end; ucs4++) | |
153 | { | |
154 | if (!FcCharSetAddChar (c, ucs4)) | |
155 | fatal (file, lineno, "out of memory"); | |
156 | } | |
157 | } | |
158 | n = FcCharSetFreeze (c); | |
159 | FcCharSetDestroy (c); | |
160 | return n; | |
161 | } | |
162 | ||
163 | /* | |
164 | * Convert a file name into a name suitable for C declarations | |
165 | */ | |
166 | static char * | |
167 | get_name (char *file) | |
168 | { | |
169 | char *name; | |
170 | char *dot; | |
171 | ||
172 | dot = strchr (file, '.'); | |
173 | if (!dot) | |
174 | dot = file + strlen(file); | |
175 | name = malloc (dot - file + 1); | |
176 | strncpy (name, file, dot - file); | |
177 | name[dot-file] = '\0'; | |
178 | return name; | |
179 | } | |
180 | ||
181 | /* | |
182 | * Convert a C name into a language name | |
183 | */ | |
184 | static char * | |
185 | get_lang (char *name) | |
186 | { | |
187 | char *lang = malloc (strlen (name) + 1); | |
188 | char *l = lang; | |
189 | char c; | |
190 | ||
191 | while ((c = *name++)) | |
192 | { | |
996580dc KP |
193 | if (isupper ((int) (unsigned char) c)) |
194 | c = tolower ((int) (unsigned char) c); | |
c1382a3d KP |
195 | if (c == '_') |
196 | c = '-'; | |
197 | if (c == ' ') | |
198 | continue; | |
199 | *l++ = c; | |
200 | } | |
201 | *l++ = '\0'; | |
202 | return lang; | |
203 | } | |
204 | ||
d8d73958 KP |
205 | static int compare (const void *a, const void *b) |
206 | { | |
207 | const FcChar8 *const *as = a, *const *bs = b; | |
208 | return FcStrCmpIgnoreCase (*as, *bs); | |
209 | } | |
210 | ||
234397b4 DD |
211 | #define MAX_LANG 1024 |
212 | #define MAX_LANG_SET_MAP ((MAX_LANG + 31) / 32) | |
213 | ||
214 | #define BitSet(map, id) ((map)[(id)>>5] |= ((FcChar32) 1 << ((id) & 0x1f))) | |
215 | #define BitGet(map, id) ((map)[(id)>>5] >> ((id) & 0x1f)) & 1) | |
216 | ||
c1382a3d KP |
217 | int |
218 | main (int argc, char **argv) | |
219 | { | |
234397b4 DD |
220 | char *files[MAX_LANG]; |
221 | FcCharSet *sets[MAX_LANG]; | |
222 | int duplicate[MAX_LANG]; | |
223 | int country[MAX_LANG]; | |
224 | char *names[MAX_LANG]; | |
225 | char *langs[MAX_LANG]; | |
c1382a3d | 226 | FILE *f; |
234397b4 | 227 | int ncountry = 0; |
c1382a3d | 228 | int i = 0; |
cd2ec1a9 | 229 | FcCharLeaf **leaves; |
c1382a3d KP |
230 | int total_leaves = 0; |
231 | int l, sl, tl; | |
234397b4 | 232 | int c; |
c1382a3d | 233 | char line[1024]; |
234397b4 DD |
234 | FcChar32 map[MAX_LANG_SET_MAP]; |
235 | int num_lang_set_map; | |
0eadb052 KP |
236 | int setRangeStart[26]; |
237 | int setRangeEnd[26]; | |
238 | FcChar8 setRangeChar; | |
c1382a3d KP |
239 | |
240 | while (*++argv) | |
234397b4 | 241 | { |
394b2bf0 KP |
242 | if (!strcmp (*argv, "-d")) |
243 | { | |
244 | dir = *++argv; | |
245 | continue; | |
246 | } | |
234397b4 DD |
247 | if (i == MAX_LANG) |
248 | fatal (*argv, 0, "Too many languages"); | |
d8d73958 | 249 | files[i++] = *argv; |
234397b4 | 250 | } |
d8d73958 KP |
251 | files[i] = 0; |
252 | qsort (files, i, sizeof (char *), compare); | |
253 | i = 0; | |
254 | while (files[i]) | |
c1382a3d | 255 | { |
394b2bf0 | 256 | f = scanopen (files[i]); |
c1382a3d | 257 | if (!f) |
d8d73958 KP |
258 | fatal (files[i], 0, strerror (errno)); |
259 | sets[i] = scan (f, files[i]); | |
260 | names[i] = get_name (files[i]); | |
234397b4 DD |
261 | langs[i] = get_lang(names[i]); |
262 | if (strchr (langs[i], '-')) | |
263 | country[ncountry++] = i; | |
264 | ||
c1382a3d KP |
265 | total_leaves += sets[i]->num; |
266 | i++; | |
267 | fclose (f); | |
268 | } | |
269 | sets[i] = 0; | |
270 | leaves = malloc (total_leaves * sizeof (FcCharLeaf *)); | |
271 | tl = 0; | |
272 | /* | |
273 | * Find unique leaves | |
274 | */ | |
275 | for (i = 0; sets[i]; i++) | |
276 | { | |
c1382a3d KP |
277 | for (sl = 0; sl < sets[i]->num; sl++) |
278 | { | |
279 | for (l = 0; l < tl; l++) | |
cd2ec1a9 | 280 | if (leaves[l] == FcCharSetGetLeaf(sets[i], sl)) |
c1382a3d KP |
281 | break; |
282 | if (l == tl) | |
cd2ec1a9 | 283 | leaves[tl++] = FcCharSetGetLeaf(sets[i], sl); |
c1382a3d KP |
284 | } |
285 | } | |
286 | ||
287 | /* | |
288 | * Scan the input until the marker is found | |
289 | */ | |
290 | ||
291 | while (fgets (line, sizeof (line), stdin)) | |
292 | { | |
293 | if (!strncmp (line, "@@@", 3)) | |
294 | break; | |
295 | fputs (line, stdout); | |
296 | } | |
297 | ||
298 | printf ("/* total size: %d unique leaves: %d */\n\n", | |
299 | total_leaves, tl); | |
300 | /* | |
301 | * Dump leaves | |
302 | */ | |
303 | printf ("static const FcCharLeaf leaves[%d] = {\n", tl); | |
304 | for (l = 0; l < tl; l++) | |
305 | { | |
306 | printf (" { { /* %d */", l); | |
307 | for (i = 0; i < 256/32; i++) | |
308 | { | |
309 | if (i % 4 == 0) | |
310 | printf ("\n "); | |
311 | printf (" 0x%08x,", leaves[l]->map[i]); | |
312 | } | |
313 | printf ("\n } },\n"); | |
314 | } | |
315 | printf ("};\n\n"); | |
316 | printf ("#define L(n) ((FcCharLeaf *) &leaves[n])\n\n"); | |
2903c146 KP |
317 | |
318 | /* | |
319 | * Find duplicate charsets | |
320 | */ | |
321 | duplicate[0] = -1; | |
322 | for (i = 1; sets[i]; i++) | |
323 | { | |
324 | int j; | |
325 | ||
326 | duplicate[i] = -1; | |
327 | for (j = 0; j < i; j++) | |
328 | if (sets[j] == sets[i]) | |
329 | { | |
330 | duplicate[i] = j; | |
331 | break; | |
332 | } | |
333 | } | |
334 | ||
0eadb052 KP |
335 | /* |
336 | * Find ranges for each letter for faster searching | |
337 | */ | |
338 | setRangeChar = 'a'; | |
339 | for (i = 0; sets[i]; i++) | |
340 | { | |
341 | char c = names[i][0]; | |
342 | ||
343 | while (setRangeChar <= c && c <= 'z') | |
344 | setRangeStart[setRangeChar++ - 'a'] = i; | |
345 | } | |
346 | for (setRangeChar = 'a'; setRangeChar < 'z'; setRangeChar++) | |
347 | setRangeEnd[setRangeChar - 'a'] = setRangeStart[setRangeChar+1-'a'] - 1; | |
348 | setRangeEnd[setRangeChar - 'a'] = i - 1; | |
349 | ||
c1382a3d KP |
350 | /* |
351 | * Dump arrays | |
352 | */ | |
353 | for (i = 0; sets[i]; i++) | |
354 | { | |
355 | int n; | |
356 | ||
2903c146 KP |
357 | if (duplicate[i] >= 0) |
358 | continue; | |
c1382a3d KP |
359 | printf ("static const FcCharLeaf *leaves_%s[%d] = {\n", |
360 | names[i], sets[i]->num); | |
361 | for (n = 0; n < sets[i]->num; n++) | |
362 | { | |
363 | if (n % 8 == 0) | |
364 | printf (" "); | |
365 | for (l = 0; l < tl; l++) | |
cd2ec1a9 | 366 | if (leaves[l] == FcCharSetGetLeaf(sets[i], n)) |
c1382a3d KP |
367 | break; |
368 | if (l == tl) | |
369 | fatal (names[i], 0, "can't find leaf"); | |
370 | printf (" L(%3d),", l); | |
371 | if (n % 8 == 7) | |
372 | printf ("\n"); | |
373 | } | |
374 | if (n % 8 != 0) | |
375 | printf ("\n"); | |
376 | printf ("};\n\n"); | |
377 | ||
378 | ||
379 | printf ("static const FcChar16 numbers_%s[%d] = {\n", | |
380 | names[i], sets[i]->num); | |
381 | for (n = 0; n < sets[i]->num; n++) | |
382 | { | |
383 | if (n % 8 == 0) | |
384 | printf (" "); | |
cd2ec1a9 | 385 | printf (" 0x%04x,", FcCharSetGetNumbers(sets[i])[n]); |
c1382a3d KP |
386 | if (n % 8 == 7) |
387 | printf ("\n"); | |
388 | } | |
389 | if (n % 8 != 0) | |
390 | printf ("\n"); | |
391 | printf ("};\n\n"); | |
392 | } | |
393 | printf ("#undef L\n\n"); | |
0eadb052 | 394 | |
c1382a3d KP |
395 | /* |
396 | * Dump sets | |
397 | */ | |
0eadb052 | 398 | |
c1382a3d KP |
399 | printf ("static const FcLangCharSet fcLangCharSets[] = {\n"); |
400 | for (i = 0; sets[i]; i++) | |
401 | { | |
2903c146 | 402 | int j = duplicate[i]; |
0eadb052 | 403 | |
2903c146 KP |
404 | if (j < 0) |
405 | j = i; | |
c1382a3d | 406 | printf (" { (FcChar8 *) \"%s\",\n" |
cd2ec1a9 PL |
407 | " { FC_REF_CONSTANT, %d, FcStorageDynamic, " |
408 | "{ { (FcCharLeaf **) leaves_%s, " | |
409 | "(FcChar16 *) numbers_%s } } } },\n", | |
234397b4 | 410 | langs[i], |
2903c146 | 411 | sets[j]->num, names[j], names[j]); |
c1382a3d KP |
412 | } |
413 | printf ("};\n\n"); | |
234397b4 DD |
414 | printf ("#define NUM_LANG_CHAR_SET %d\n", i); |
415 | num_lang_set_map = (i + 31) / 32; | |
416 | printf ("#define NUM_LANG_SET_MAP %d\n", num_lang_set_map); | |
417 | /* | |
418 | * Dump indices with country codes | |
419 | */ | |
420 | if (ncountry) | |
421 | { | |
422 | int ncountry_ent = 0; | |
423 | printf ("\n"); | |
424 | printf ("static const FcChar32 fcLangCountrySets[][NUM_LANG_SET_MAP] = {\n"); | |
425 | for (c = 0; c < ncountry; c++) | |
426 | { | |
427 | i = country[c]; | |
428 | if (i >= 0) | |
429 | { | |
430 | int l = strchr (langs[i], '-') - langs[i]; | |
431 | int d, k; | |
432 | ||
433 | for (k = 0; k < num_lang_set_map; k++) | |
434 | map[k] = 0; | |
435 | ||
436 | BitSet (map, i); | |
437 | for (d = c + 1; d < ncountry; d++) | |
438 | { | |
439 | int j = country[d]; | |
440 | if (j >= 0 && !strncmp (langs[j], langs[i], l)) | |
441 | { | |
442 | BitSet(map, j); | |
443 | country[d] = -1; | |
444 | } | |
445 | } | |
446 | printf (" {"); | |
447 | for (k = 0; k < num_lang_set_map; k++) | |
448 | printf (" 0x%08x,", map[k]); | |
449 | printf (" }, /* %*.*s */\n", | |
450 | l, l, langs[i]); | |
451 | ++ncountry_ent; | |
452 | } | |
453 | } | |
454 | printf ("};\n\n"); | |
455 | printf ("#define NUM_COUNTRY_SET %d\n", ncountry_ent); | |
456 | } | |
457 | ||
0eadb052 KP |
458 | |
459 | /* | |
460 | * Dump sets start/finish for the fastpath | |
461 | */ | |
462 | printf ("static const FcLangCharSetRange fcLangCharSetRanges[] = {\n"); | |
463 | for (setRangeChar = 'a'; setRangeChar <= 'z' ; setRangeChar++) | |
464 | { | |
465 | printf (" { %d, %d }, /* %c */\n", | |
466 | setRangeStart[setRangeChar - 'a'], | |
467 | setRangeEnd[setRangeChar - 'a'], setRangeChar); | |
468 | } | |
469 | printf ("};\n\n"); | |
470 | ||
c1382a3d KP |
471 | while (fgets (line, sizeof (line), stdin)) |
472 | fputs (line, stdout); | |
473 | ||
474 | fflush (stdout); | |
475 | exit (ferror (stdout)); | |
476 | } |