]> git.wh0rd.org - fontconfig.git/blob - fc-lang/fc-lang.c
b72893b944f2aa0a73a0fbf2ca7f6450fa124fa9
[fontconfig.git] / fc-lang / fc-lang.c
1 /*
2 * $RCSId: xc/lib/fontconfig/fc-lang/fc-lang.c,v 1.3 2002/08/22 07:36:43 keithp Exp $
3 *
4 * Copyright © 2002 Keith Packard
5 *
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of Keith Packard not be used in
11 * advertising or publicity pertaining to distribution of the software without
12 * specific, written prior permission. Keith Packard makes no
13 * representations about the suitability of this software for any purpose. It
14 * is provided "as is" without express or implied warranty.
15 *
16 * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
18 * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
19 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
20 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
21 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
22 * PERFORMANCE OF THIS SOFTWARE.
23 */
24
25 #include "fcint.h"
26 #include "fccharset.c"
27 #include "fcstr.c"
28
29 /*
30 * fc-lang
31 *
32 * Read a set of language orthographies and build C declarations for
33 * charsets which can then be used to identify which languages are
34 * supported by a given font. Note that this uses some utilities
35 * from the fontconfig library, so the necessary file is simply
36 * included in this compilation. A couple of extra utility
37 * functions are also needed in slightly modified form
38 */
39
40 const FcChar16 langBankNumbers[1]; /* place holders so that externs resolve */
41 const FcCharLeaf langBankLeaves[1];
42 const int langBankLeafIdx[1];
43
44 void
45 FcMemAlloc (int kind, int size)
46 {
47 }
48
49 void
50 FcMemFree (int kind, int size)
51 {
52 }
53
54 int* _fcBankId = 0;
55 int* _fcBankIdx = 0;
56 FcValueList ** _fcValueLists = 0;
57 FcPatternElt ** _fcPatternElts = 0;
58 int FcDebugVal = 0;
59
60 int
61 FcCacheBankToIndexMTF (int bank)
62 {
63 return -1;
64 }
65
66 FcChar8 *
67 FcConfigHome (void)
68 {
69 return (FcChar8 *) getenv ("HOME");
70 }
71
72 static void
73 fatal (const char *file, int lineno, const char *msg)
74 {
75 if (lineno)
76 fprintf (stderr, "%s:%d: %s\n", file, lineno, msg);
77 else
78 fprintf (stderr, "%s: %s\n", file, msg);
79 exit (1);
80 }
81
82 static char *
83 get_line (FILE *f, char *line, int *lineno)
84 {
85 char *hash;
86 int end;
87 if (!fgets (line, 1024, f))
88 return 0;
89 ++(*lineno);
90 hash = strchr (line, '#');
91 if (hash)
92 *hash = '\0';
93
94 end = strlen (line);
95 while (end > 0 && isspace (line[end-1]))
96 line[--end] = '\0';
97
98 if (line[0] == '\0' || line[0] == '\n' || line[0] == '\032' || line[0] == '\r')
99 return get_line (f, line, lineno);
100 return line;
101 }
102
103 static char *dir = 0;
104
105 static FILE *
106 scanopen (char *file)
107 {
108 FILE *f;
109
110 f = fopen (file, "r");
111 if (!f && dir)
112 {
113 char path[1024];
114
115 strcpy (path, dir);
116 strcat (path, "/");
117 strcat (path, file);
118 f = fopen (path, "r");
119 }
120 return f;
121 }
122
123 /*
124 * build a single charset from a source file
125 *
126 * The file format is quite simple, either
127 * a single hex value or a pair separated with a dash
128 *
129 * Comments begin with '#'
130 */
131
132 static FcCharSet *
133 scan (FILE *f, char *file)
134 {
135 FcCharSet *c = 0;
136 FcCharSet *n;
137 int start, end, ucs4;
138 char line[1024];
139 int lineno = 0;
140
141 while (get_line (f, line, &lineno))
142 {
143 if (!strncmp (line, "include", 7))
144 {
145 file = strchr (line, ' ');
146 if (!file)
147 fatal (line, lineno,
148 "invalid syntax, expected: include filename");
149 while (isspace(*file))
150 file++;
151 f = scanopen (file);
152 if (!f)
153 fatal (file, 0, "can't open");
154 c = scan (f, file);
155 fclose (f);
156 return c;
157 }
158 if (strchr (line, '-'))
159 {
160 if (sscanf (line, "%x-%x", &start, &end) != 2)
161 fatal (file, lineno, "parse error");
162 }
163 else
164 {
165 if (sscanf (line, "%x", &start) != 1)
166 fatal (file, lineno, "parse error");
167 end = start;
168 }
169 if (!c)
170 c = FcCharSetCreate ();
171 for (ucs4 = start; ucs4 <= end; ucs4++)
172 {
173 if (!FcCharSetAddChar (c, ucs4))
174 fatal (file, lineno, "out of memory");
175 }
176 }
177 n = FcCharSetFreeze (c);
178 FcCharSetDestroy (c);
179 return n;
180 }
181
182 /*
183 * Convert a file name into a name suitable for C declarations
184 */
185 static char *
186 get_name (char *file)
187 {
188 char *name;
189 char *dot;
190
191 dot = strchr (file, '.');
192 if (!dot)
193 dot = file + strlen(file);
194 name = malloc (dot - file + 1);
195 strncpy (name, file, dot - file);
196 name[dot-file] = '\0';
197 return name;
198 }
199
200 /*
201 * Convert a C name into a language name
202 */
203 static char *
204 get_lang (char *name)
205 {
206 char *lang = malloc (strlen (name) + 1);
207 char *l = lang;
208 char c;
209
210 while ((c = *name++))
211 {
212 if (isupper ((int) (unsigned char) c))
213 c = tolower ((int) (unsigned char) c);
214 if (c == '_')
215 c = '-';
216 if (c == ' ')
217 continue;
218 *l++ = c;
219 }
220 *l++ = '\0';
221 return lang;
222 }
223
224 static int compare (const void *a, const void *b)
225 {
226 const FcChar8 *const *as = a, *const *bs = b;
227 return FcStrCmpIgnoreCase (*as, *bs);
228 }
229
230 #define MAX_LANG 1024
231 #define MAX_LANG_SET_MAP ((MAX_LANG + 31) / 32)
232
233 #define BitSet(map, id) ((map)[(id)>>5] |= ((FcChar32) 1 << ((id) & 0x1f)))
234 #define BitGet(map, id) ((map)[(id)>>5] >> ((id) & 0x1f)) & 1)
235
236 int
237 main (int argc, char **argv)
238 {
239 static char *files[MAX_LANG];
240 static FcCharSet *sets[MAX_LANG];
241 static int duplicate[MAX_LANG];
242 static int offsets[MAX_LANG];
243 static int country[MAX_LANG];
244 static char *names[MAX_LANG];
245 static char *langs[MAX_LANG];
246 FILE *f;
247 int offset = 0;
248 int ncountry = 0;
249 int i = 0;
250 int argi;
251 FcCharLeaf **leaves;
252 int total_leaves = 0;
253 int offset_count = 0;
254 int l, sl, tl;
255 static char line[1024];
256 static FcChar32 map[MAX_LANG_SET_MAP];
257 int num_lang_set_map;
258 int setRangeStart[26];
259 int setRangeEnd[26];
260 FcChar8 setRangeChar;
261
262 argi = 1;
263 while (argv[argi])
264 {
265 if (!strcmp (argv[argi], "-d"))
266 {
267 argi++;
268 dir = argv[argi++];
269 continue;
270 }
271 if (i == MAX_LANG)
272 fatal (argv[0], 0, "Too many languages");
273 files[i++] = argv[argi++];
274 }
275 files[i] = 0;
276 qsort (files, i, sizeof (char *), compare);
277 i = 0;
278 while (files[i])
279 {
280 f = scanopen (files[i]);
281 if (!f)
282 fatal (files[i], 0, strerror (errno));
283 sets[i] = scan (f, files[i]);
284 names[i] = get_name (files[i]);
285 langs[i] = get_lang(names[i]);
286 if (strchr (langs[i], '-'))
287 country[ncountry++] = i;
288
289 total_leaves += sets[i]->num;
290 i++;
291 fclose (f);
292 }
293 sets[i] = 0;
294 leaves = malloc (total_leaves * sizeof (FcCharLeaf *));
295 tl = 0;
296 /*
297 * Find unique leaves
298 */
299 for (i = 0; sets[i]; i++)
300 {
301 for (sl = 0; sl < sets[i]->num; sl++)
302 {
303 for (l = 0; l < tl; l++)
304 if (leaves[l] == FcCharSetGetLeaf(sets[i], sl))
305 break;
306 if (l == tl)
307 leaves[tl++] = FcCharSetGetLeaf(sets[i], sl);
308 }
309 }
310
311 /*
312 * Scan the input until the marker is found
313 */
314
315 while (fgets (line, sizeof (line), stdin))
316 {
317 if (!strncmp (line, "@@@", 3))
318 break;
319 fputs (line, stdout);
320 }
321
322 printf ("/* total size: %d unique leaves: %d */\n\n",
323 total_leaves, tl);
324 /*
325 * Dump leaves
326 */
327 printf ("const FcCharLeaf langBankLeaves[%d] = {\n", tl);
328 for (l = 0; l < tl; l++)
329 {
330 printf (" { { /* %d */", l);
331 for (i = 0; i < 256/32; i++)
332 {
333 if (i % 4 == 0)
334 printf ("\n ");
335 printf (" 0x%08x,", leaves[l]->map[i]);
336 }
337 printf ("\n } },\n");
338 }
339 printf ("};\n\n");
340
341 /*
342 * Find duplicate charsets
343 */
344 duplicate[0] = -1;
345 for (i = 1; sets[i]; i++)
346 {
347 int j;
348
349 duplicate[i] = -1;
350 for (j = 0; j < i; j++)
351 if (sets[j] == sets[i])
352 {
353 duplicate[i] = j;
354 break;
355 }
356 }
357
358 /*
359 * Find ranges for each letter for faster searching
360 */
361 setRangeChar = 'a';
362 for (i = 0; sets[i]; i++)
363 {
364 char c = names[i][0];
365
366 while (setRangeChar <= c && c <= 'z')
367 setRangeStart[setRangeChar++ - 'a'] = i;
368 }
369 for (setRangeChar = 'a'; setRangeChar < 'z'; setRangeChar++)
370 setRangeEnd[setRangeChar - 'a'] = setRangeStart[setRangeChar+1-'a'] - 1;
371 setRangeEnd[setRangeChar - 'a'] = i - 1;
372
373 /*
374 * Dump arrays
375 */
376 for (i = 0; sets[i]; i++)
377 {
378 int n;
379
380 if (duplicate[i] >= 0)
381 continue;
382
383 for (n = 0; n < sets[i]->num; n++)
384 {
385 for (l = 0; l < tl; l++)
386 if (leaves[l] == FcCharSetGetLeaf(sets[i], n))
387 break;
388 if (l == tl)
389 fatal (names[i], 0, "can't find leaf");
390 offset_count++;
391 }
392 offsets[i] = offset;
393 offset += sets[i]->num;
394 }
395
396 printf ("const int langBankLeafIdx[%d] = {\n",
397 offset_count);
398 for (i = 0; sets[i]; i++)
399 {
400 int n;
401
402 if (duplicate[i] >= 0)
403 continue;
404 for (n = 0; n < sets[i]->num; n++)
405 {
406 if (n % 8 == 0)
407 printf (" ");
408 for (l = 0; l < tl; l++)
409 if (leaves[l] == FcCharSetGetLeaf(sets[i], n))
410 break;
411 if (l == tl)
412 fatal (names[i], 0, "can't find leaf");
413 printf (" %3d,", l);
414 if (n % 8 == 7)
415 printf ("\n");
416 }
417 if (n % 8 != 0)
418 printf ("\n");
419 }
420 printf ("};\n\n");
421
422 printf ("const FcChar16 langBankNumbers[%d] = {\n",
423 offset_count);
424
425 for (i = 0; sets[i]; i++)
426 {
427 int n;
428
429 if (duplicate[i] >= 0)
430 continue;
431 for (n = 0; n < sets[i]->num; n++)
432 {
433 if (n % 8 == 0)
434 printf (" ");
435 printf (" 0x%04x,", FcCharSetGetNumbers(sets[i])[n]);
436 if (n % 8 == 7)
437 printf ("\n");
438 }
439 if (n % 8 != 0)
440 printf ("\n");
441 }
442 printf ("};\n\n");
443
444 /*
445 * Dump sets
446 */
447
448 printf ("const FcLangCharSet fcLangCharSets[] = {\n");
449 for (i = 0; sets[i]; i++)
450 {
451 int j = duplicate[i];
452
453 if (j < 0)
454 j = i;
455
456 printf (" { (FcChar8 *) \"%s\",\n"
457 " { FC_REF_CONSTANT, %d, FC_BANK_LANGS, "
458 "{ { %d, %d } } } }, /* %d */\n",
459 langs[i],
460 sets[j]->num, offsets[j], offsets[j], j);
461 }
462 printf ("};\n\n");
463 printf ("#define NUM_LANG_CHAR_SET %d\n", i);
464 num_lang_set_map = (i + 31) / 32;
465 printf ("#define NUM_LANG_SET_MAP %d\n", num_lang_set_map);
466 /*
467 * Dump indices with country codes
468 */
469 if (ncountry)
470 {
471 int c;
472 int ncountry_ent = 0;
473 printf ("\n");
474 printf ("static const FcChar32 fcLangCountrySets[][NUM_LANG_SET_MAP] = {\n");
475 for (c = 0; c < ncountry; c++)
476 {
477 i = country[c];
478 if (i >= 0)
479 {
480 int lang = strchr (langs[i], '-') - langs[i];
481 int d, k;
482
483 for (k = 0; k < num_lang_set_map; k++)
484 map[k] = 0;
485
486 BitSet (map, i);
487 for (d = c + 1; d < ncountry; d++)
488 {
489 int j = country[d];
490 if (j >= 0 && !strncmp (langs[j], langs[i], l))
491 {
492 BitSet(map, j);
493 country[d] = -1;
494 }
495 }
496 printf (" {");
497 for (k = 0; k < num_lang_set_map; k++)
498 printf (" 0x%08x,", map[k]);
499 printf (" }, /* %*.*s */\n",
500 lang, lang, langs[i]);
501 ++ncountry_ent;
502 }
503 }
504 printf ("};\n\n");
505 printf ("#define NUM_COUNTRY_SET %d\n", ncountry_ent);
506 }
507
508
509 /*
510 * Dump sets start/finish for the fastpath
511 */
512 printf ("static const FcLangCharSetRange fcLangCharSetRanges[] = {\n");
513 for (setRangeChar = 'a'; setRangeChar <= 'z' ; setRangeChar++)
514 {
515 printf (" { %d, %d }, /* %c */\n",
516 setRangeStart[setRangeChar - 'a'],
517 setRangeEnd[setRangeChar - 'a'], setRangeChar);
518 }
519 printf ("};\n\n");
520
521 while (fgets (line, sizeof (line), stdin))
522 fputs (line, stdout);
523
524 fflush (stdout);
525 exit (ferror (stdout));
526 }