]> git.wh0rd.org - fontconfig.git/blame - fc-lang/fc-lang.c
#ifdef out old cache stuff, replace with first version of new mmapping
[fontconfig.git] / fc-lang / fc-lang.c
CommitLineData
c1382a3d 1/*
0eadb052 2 * $RCSId: xc/lib/fontconfig/fc-lang/fc-lang.c,v 1.3 2002/08/22 07:36:43 keithp Exp $
c1382a3d 3 *
46b51147 4 * Copyright © 2002 Keith Packard
c1382a3d
KP
5 *
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of Keith Packard not be used in
11 * advertising or publicity pertaining to distribution of the software without
12 * specific, written prior permission. Keith Packard makes no
13 * representations about the suitability of this software for any purpose. It
14 * is provided "as is" without express or implied warranty.
15 *
16 * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
18 * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
19 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
20 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
21 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
22 * PERFORMANCE OF THIS SOFTWARE.
23 */
24
25#include "fcint.h"
c647f6f1
KP
26#include "fccharset.c"
27#include "fcstr.c"
c1382a3d
KP
28
29/*
30 * fc-lang
31 *
32 * Read a set of language orthographies and build C declarations for
33 * charsets which can then be used to identify which languages are
c647f6f1
KP
34 * supported by a given font. Note that this uses some utilities
35 * from the fontconfig library, so the necessary file is simply
36 * included in this compilation. A couple of extra utility
37 * functions are also needed in slightly modified form
c1382a3d
KP
38 */
39
c647f6f1
KP
40void
41FcMemAlloc (int kind, int size)
42{
43}
44
45void
46FcMemFree (int kind, int size)
47{
48}
49
212c9f43
PL
50int
51FcCacheNextOffset (int fd)
52{
53 return -1;
54}
55
ff3f1f98
KP
56FcChar8 *
57FcConfigHome (void)
58{
59 return getenv ("HOME");
60}
61
c1382a3d
KP
62static void
63fatal (char *file, int lineno, char *msg)
64{
65 fprintf (stderr, "%s:%d: %s\n", file, lineno, msg);
66 exit (1);
67}
68
69static char *
70get_line (FILE *f, char *line, int *lineno)
71{
72 char *hash;
73 if (!fgets (line, 1024, f))
74 return 0;
75 ++(*lineno);
76 hash = strchr (line, '#');
77 if (hash)
78 *hash = '\0';
79 if (line[0] == '\0' || line[0] == '\n' || line[0] == '\032' || line[0] == '\r')
80 return get_line (f, line, lineno);
81 return line;
82}
83
394b2bf0
KP
84char *dir = 0;
85
6ae6acf3 86static FILE *
394b2bf0
KP
87scanopen (char *file)
88{
89 FILE *f;
90
91 f = fopen (file, "r");
92 if (!f && dir)
93 {
94 char path[1024];
95
96 strcpy (path, dir);
97 strcat (path, "/");
98 strcat (path, file);
99 f = fopen (path, "r");
100 }
101 return f;
102}
103
c1382a3d
KP
104/*
105 * build a single charset from a source file
106 *
107 * The file format is quite simple, either
108 * a single hex value or a pair separated with a dash
109 *
110 * Comments begin with '#'
111 */
112
113static FcCharSet *
114scan (FILE *f, char *file)
115{
116 FcCharSet *c = 0;
117 FcCharSet *n;
118 int start, end, ucs4;
119 char line[1024];
120 int lineno = 0;
121
122 while (get_line (f, line, &lineno))
123 {
124 if (!strncmp (line, "include", 7))
125 {
126 file = strchr (line, ' ');
127 while (*file == ' ')
128 file++;
129 end = strlen (file);
130 if (file[end-1] == '\n')
131 file[end-1] = '\0';
394b2bf0 132 f = scanopen (file);
c1382a3d
KP
133 if (!f)
134 fatal (file, 0, "can't open");
135 c = scan (f, file);
136 fclose (f);
137 return c;
138 }
139 if (strchr (line, '-'))
140 {
141 if (sscanf (line, "%x-%x", &start, &end) != 2)
142 fatal (file, lineno, "parse error");
143 }
144 else
145 {
146 if (sscanf (line, "%x", &start) != 1)
147 fatal (file, lineno, "parse error");
148 end = start;
149 }
150 if (!c)
151 c = FcCharSetCreate ();
152 for (ucs4 = start; ucs4 <= end; ucs4++)
153 {
154 if (!FcCharSetAddChar (c, ucs4))
155 fatal (file, lineno, "out of memory");
156 }
157 }
158 n = FcCharSetFreeze (c);
159 FcCharSetDestroy (c);
160 return n;
161}
162
163/*
164 * Convert a file name into a name suitable for C declarations
165 */
166static char *
167get_name (char *file)
168{
169 char *name;
170 char *dot;
171
172 dot = strchr (file, '.');
173 if (!dot)
174 dot = file + strlen(file);
175 name = malloc (dot - file + 1);
176 strncpy (name, file, dot - file);
177 name[dot-file] = '\0';
178 return name;
179}
180
181/*
182 * Convert a C name into a language name
183 */
184static char *
185get_lang (char *name)
186{
187 char *lang = malloc (strlen (name) + 1);
188 char *l = lang;
189 char c;
190
191 while ((c = *name++))
192 {
996580dc
KP
193 if (isupper ((int) (unsigned char) c))
194 c = tolower ((int) (unsigned char) c);
c1382a3d
KP
195 if (c == '_')
196 c = '-';
197 if (c == ' ')
198 continue;
199 *l++ = c;
200 }
201 *l++ = '\0';
202 return lang;
203}
204
d8d73958
KP
205static int compare (const void *a, const void *b)
206{
207 const FcChar8 *const *as = a, *const *bs = b;
208 return FcStrCmpIgnoreCase (*as, *bs);
209}
210
234397b4
DD
211#define MAX_LANG 1024
212#define MAX_LANG_SET_MAP ((MAX_LANG + 31) / 32)
213
214#define BitSet(map, id) ((map)[(id)>>5] |= ((FcChar32) 1 << ((id) & 0x1f)))
215#define BitGet(map, id) ((map)[(id)>>5] >> ((id) & 0x1f)) & 1)
216
c1382a3d
KP
217int
218main (int argc, char **argv)
219{
234397b4
DD
220 char *files[MAX_LANG];
221 FcCharSet *sets[MAX_LANG];
222 int duplicate[MAX_LANG];
223 int country[MAX_LANG];
224 char *names[MAX_LANG];
225 char *langs[MAX_LANG];
c1382a3d 226 FILE *f;
234397b4 227 int ncountry = 0;
c1382a3d 228 int i = 0;
cd2ec1a9 229 FcCharLeaf **leaves;
c1382a3d
KP
230 int total_leaves = 0;
231 int l, sl, tl;
234397b4 232 int c;
c1382a3d 233 char line[1024];
234397b4
DD
234 FcChar32 map[MAX_LANG_SET_MAP];
235 int num_lang_set_map;
0eadb052
KP
236 int setRangeStart[26];
237 int setRangeEnd[26];
238 FcChar8 setRangeChar;
c1382a3d
KP
239
240 while (*++argv)
234397b4 241 {
394b2bf0
KP
242 if (!strcmp (*argv, "-d"))
243 {
244 dir = *++argv;
245 continue;
246 }
234397b4
DD
247 if (i == MAX_LANG)
248 fatal (*argv, 0, "Too many languages");
d8d73958 249 files[i++] = *argv;
234397b4 250 }
d8d73958
KP
251 files[i] = 0;
252 qsort (files, i, sizeof (char *), compare);
253 i = 0;
254 while (files[i])
c1382a3d 255 {
394b2bf0 256 f = scanopen (files[i]);
c1382a3d 257 if (!f)
d8d73958
KP
258 fatal (files[i], 0, strerror (errno));
259 sets[i] = scan (f, files[i]);
260 names[i] = get_name (files[i]);
234397b4
DD
261 langs[i] = get_lang(names[i]);
262 if (strchr (langs[i], '-'))
263 country[ncountry++] = i;
264
c1382a3d
KP
265 total_leaves += sets[i]->num;
266 i++;
267 fclose (f);
268 }
269 sets[i] = 0;
270 leaves = malloc (total_leaves * sizeof (FcCharLeaf *));
271 tl = 0;
272 /*
273 * Find unique leaves
274 */
275 for (i = 0; sets[i]; i++)
276 {
c1382a3d
KP
277 for (sl = 0; sl < sets[i]->num; sl++)
278 {
279 for (l = 0; l < tl; l++)
cd2ec1a9 280 if (leaves[l] == FcCharSetGetLeaf(sets[i], sl))
c1382a3d
KP
281 break;
282 if (l == tl)
cd2ec1a9 283 leaves[tl++] = FcCharSetGetLeaf(sets[i], sl);
c1382a3d
KP
284 }
285 }
286
287 /*
288 * Scan the input until the marker is found
289 */
290
291 while (fgets (line, sizeof (line), stdin))
292 {
293 if (!strncmp (line, "@@@", 3))
294 break;
295 fputs (line, stdout);
296 }
297
298 printf ("/* total size: %d unique leaves: %d */\n\n",
299 total_leaves, tl);
300 /*
301 * Dump leaves
302 */
303 printf ("static const FcCharLeaf leaves[%d] = {\n", tl);
304 for (l = 0; l < tl; l++)
305 {
306 printf (" { { /* %d */", l);
307 for (i = 0; i < 256/32; i++)
308 {
309 if (i % 4 == 0)
310 printf ("\n ");
311 printf (" 0x%08x,", leaves[l]->map[i]);
312 }
313 printf ("\n } },\n");
314 }
315 printf ("};\n\n");
316 printf ("#define L(n) ((FcCharLeaf *) &leaves[n])\n\n");
2903c146
KP
317
318 /*
319 * Find duplicate charsets
320 */
321 duplicate[0] = -1;
322 for (i = 1; sets[i]; i++)
323 {
324 int j;
325
326 duplicate[i] = -1;
327 for (j = 0; j < i; j++)
328 if (sets[j] == sets[i])
329 {
330 duplicate[i] = j;
331 break;
332 }
333 }
334
0eadb052
KP
335 /*
336 * Find ranges for each letter for faster searching
337 */
338 setRangeChar = 'a';
339 for (i = 0; sets[i]; i++)
340 {
341 char c = names[i][0];
342
343 while (setRangeChar <= c && c <= 'z')
344 setRangeStart[setRangeChar++ - 'a'] = i;
345 }
346 for (setRangeChar = 'a'; setRangeChar < 'z'; setRangeChar++)
347 setRangeEnd[setRangeChar - 'a'] = setRangeStart[setRangeChar+1-'a'] - 1;
348 setRangeEnd[setRangeChar - 'a'] = i - 1;
349
c1382a3d
KP
350 /*
351 * Dump arrays
352 */
353 for (i = 0; sets[i]; i++)
354 {
355 int n;
356
2903c146
KP
357 if (duplicate[i] >= 0)
358 continue;
c1382a3d
KP
359 printf ("static const FcCharLeaf *leaves_%s[%d] = {\n",
360 names[i], sets[i]->num);
361 for (n = 0; n < sets[i]->num; n++)
362 {
363 if (n % 8 == 0)
364 printf (" ");
365 for (l = 0; l < tl; l++)
cd2ec1a9 366 if (leaves[l] == FcCharSetGetLeaf(sets[i], n))
c1382a3d
KP
367 break;
368 if (l == tl)
369 fatal (names[i], 0, "can't find leaf");
370 printf (" L(%3d),", l);
371 if (n % 8 == 7)
372 printf ("\n");
373 }
374 if (n % 8 != 0)
375 printf ("\n");
376 printf ("};\n\n");
377
378
379 printf ("static const FcChar16 numbers_%s[%d] = {\n",
380 names[i], sets[i]->num);
381 for (n = 0; n < sets[i]->num; n++)
382 {
383 if (n % 8 == 0)
384 printf (" ");
cd2ec1a9 385 printf (" 0x%04x,", FcCharSetGetNumbers(sets[i])[n]);
c1382a3d
KP
386 if (n % 8 == 7)
387 printf ("\n");
388 }
389 if (n % 8 != 0)
390 printf ("\n");
391 printf ("};\n\n");
392 }
393 printf ("#undef L\n\n");
0eadb052 394
c1382a3d
KP
395 /*
396 * Dump sets
397 */
0eadb052 398
c1382a3d
KP
399 printf ("static const FcLangCharSet fcLangCharSets[] = {\n");
400 for (i = 0; sets[i]; i++)
401 {
2903c146 402 int j = duplicate[i];
0eadb052 403
2903c146
KP
404 if (j < 0)
405 j = i;
c1382a3d 406 printf (" { (FcChar8 *) \"%s\",\n"
cd2ec1a9
PL
407 " { FC_REF_CONSTANT, %d, FcStorageDynamic, "
408 "{ { (FcCharLeaf **) leaves_%s, "
409 "(FcChar16 *) numbers_%s } } } },\n",
234397b4 410 langs[i],
2903c146 411 sets[j]->num, names[j], names[j]);
c1382a3d
KP
412 }
413 printf ("};\n\n");
234397b4
DD
414 printf ("#define NUM_LANG_CHAR_SET %d\n", i);
415 num_lang_set_map = (i + 31) / 32;
416 printf ("#define NUM_LANG_SET_MAP %d\n", num_lang_set_map);
417 /*
418 * Dump indices with country codes
419 */
420 if (ncountry)
421 {
422 int ncountry_ent = 0;
423 printf ("\n");
424 printf ("static const FcChar32 fcLangCountrySets[][NUM_LANG_SET_MAP] = {\n");
425 for (c = 0; c < ncountry; c++)
426 {
427 i = country[c];
428 if (i >= 0)
429 {
430 int l = strchr (langs[i], '-') - langs[i];
431 int d, k;
432
433 for (k = 0; k < num_lang_set_map; k++)
434 map[k] = 0;
435
436 BitSet (map, i);
437 for (d = c + 1; d < ncountry; d++)
438 {
439 int j = country[d];
440 if (j >= 0 && !strncmp (langs[j], langs[i], l))
441 {
442 BitSet(map, j);
443 country[d] = -1;
444 }
445 }
446 printf (" {");
447 for (k = 0; k < num_lang_set_map; k++)
448 printf (" 0x%08x,", map[k]);
449 printf (" }, /* %*.*s */\n",
450 l, l, langs[i]);
451 ++ncountry_ent;
452 }
453 }
454 printf ("};\n\n");
455 printf ("#define NUM_COUNTRY_SET %d\n", ncountry_ent);
456 }
457
0eadb052
KP
458
459 /*
460 * Dump sets start/finish for the fastpath
461 */
462 printf ("static const FcLangCharSetRange fcLangCharSetRanges[] = {\n");
463 for (setRangeChar = 'a'; setRangeChar <= 'z' ; setRangeChar++)
464 {
465 printf (" { %d, %d }, /* %c */\n",
466 setRangeStart[setRangeChar - 'a'],
467 setRangeEnd[setRangeChar - 'a'], setRangeChar);
468 }
469 printf ("};\n\n");
470
c1382a3d
KP
471 while (fgets (line, sizeof (line), stdin))
472 fputs (line, stdout);
473
474 fflush (stdout);
475 exit (ferror (stdout));
476}