]> git.wh0rd.org - fontconfig.git/blob - src/fclang.c
AddFcLangSetContains for font listing, add first-letter table for language
[fontconfig.git] / src / fclang.c
1 /*
2 * $RCSId: xc/lib/fontconfig/src/fclang.c,v 1.7 2002/08/26 23:34:31 keithp Exp $
3 *
4 * Copyright © 2002 Keith Packard
5 *
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of Keith Packard not be used in
11 * advertising or publicity pertaining to distribution of the software without
12 * specific, written prior permission. Keith Packard makes no
13 * representations about the suitability of this software for any purpose. It
14 * is provided "as is" without express or implied warranty.
15 *
16 * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
18 * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
19 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
20 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
21 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
22 * PERFORMANCE OF THIS SOFTWARE.
23 */
24
25 #include "fcint.h"
26
27 typedef struct {
28 FcChar8 *lang;
29 FcCharSet charset;
30 } FcLangCharSet;
31
32 typedef struct {
33 int begin;
34 int end;
35 } FcLangCharSetRange;
36
37 #include "../fc-lang/fclang.h"
38
39 struct _FcLangSet {
40 FcChar32 map[NUM_LANG_SET_MAP];
41 FcStrSet *extra;
42 };
43
44 #define FcLangSetBitSet(ls, id) ((ls)->map[(id)>>5] |= ((FcChar32) 1 << ((id) & 0x1f)))
45 #define FcLangSetBitGet(ls, id) (((ls)->map[(id)>>5] >> ((id) & 0x1f)) & 1)
46
47 FcLangSet *
48 FcFreeTypeLangSet (const FcCharSet *charset,
49 const FcChar8 *exclusiveLang)
50 {
51 int i;
52 FcChar32 missing;
53 const FcCharSet *exclusiveCharset = 0;
54 FcLangSet *ls;
55
56
57 if (exclusiveLang)
58 exclusiveCharset = FcCharSetForLang (exclusiveLang);
59 ls = FcLangSetCreate ();
60 if (!ls)
61 return 0;
62 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
63 {
64 /*
65 * Check for Han charsets to make fonts
66 * which advertise support for a single language
67 * not support other Han languages
68 */
69 if (exclusiveCharset &&
70 FcFreeTypeIsExclusiveLang (fcLangCharSets[i].lang) &&
71 fcLangCharSets[i].charset.leaves != exclusiveCharset->leaves)
72 {
73 continue;
74 }
75 missing = FcCharSetSubtractCount (&fcLangCharSets[i].charset, charset);
76 if (FcDebug() & FC_DBG_SCANV)
77 {
78 if (missing && missing < 10)
79 {
80 FcCharSet *missed = FcCharSetSubtract (&fcLangCharSets[i].charset,
81 charset);
82 FcChar32 ucs4;
83 FcChar32 map[FC_CHARSET_MAP_SIZE];
84 FcChar32 next;
85
86 printf ("\n%s(%d) ", fcLangCharSets[i].lang, missing);
87 printf ("{");
88 for (ucs4 = FcCharSetFirstPage (missed, map, &next);
89 ucs4 != FC_CHARSET_DONE;
90 ucs4 = FcCharSetNextPage (missed, map, &next))
91 {
92 int i, j;
93 for (i = 0; i < FC_CHARSET_MAP_SIZE; i++)
94 if (map[i])
95 {
96 for (j = 0; j < 32; j++)
97 if (map[i] & (1 << j))
98 printf (" %04x", ucs4 + i * 32 + j);
99 }
100 }
101 printf (" }\n\t");
102 FcCharSetDestroy (missed);
103 }
104 else
105 printf ("%s(%d) ", fcLangCharSets[i].lang, missing);
106 }
107 if (!missing)
108 FcLangSetBitSet (ls, i);
109 }
110
111 if (FcDebug() & FC_DBG_SCANV)
112 printf ("\n");
113
114
115 return ls;
116 }
117
118 #define FcLangEnd(c) ((c) == '-' || (c) == '\0')
119
120 FcLangResult
121 FcLangCompare (const FcChar8 *s1, const FcChar8 *s2)
122 {
123 FcChar8 c1, c2;
124 FcLangResult result = FcLangDifferentLang;
125
126 for (;;)
127 {
128 c1 = *s1++;
129 c2 = *s2++;
130
131 c1 = FcToLower (c1);
132 c2 = FcToLower (c2);
133 if (c1 != c2)
134 {
135 if (FcLangEnd (c1) && FcLangEnd (c2))
136 result = FcLangDifferentCountry;
137 return result;
138 }
139 else if (!c1)
140 return FcLangEqual;
141 else if (c1 == '-')
142 result = FcLangDifferentCountry;
143 }
144 }
145
146 /*
147 * Return FcTrue when s1 contains s2.
148 *
149 * s1 contains s2 if s1 equals s2 or if s1 is a
150 * language with a country and s2 is just a language
151 */
152
153 static FcBool
154 FcLangContains (const FcChar8 *s1, const FcChar8 *s2)
155 {
156 FcChar8 c1, c2;
157
158 for (;;)
159 {
160 c1 = *s1++;
161 c2 = *s2++;
162
163 c1 = FcToLower (c1);
164 c2 = FcToLower (c2);
165 if (c1 != c2)
166 {
167 /* see if s1 has a country while s2 is mising one */
168 if (c1 == '-' && c2 == '\0')
169 return FcTrue;
170 return FcFalse;
171 }
172 else if (!c1)
173 return FcTrue;
174 }
175 }
176
177 const FcCharSet *
178 FcCharSetForLang (const FcChar8 *lang)
179 {
180 int i;
181 int country = -1;
182 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
183 {
184 switch (FcLangCompare (lang, fcLangCharSets[i].lang)) {
185 case FcLangEqual:
186 return &fcLangCharSets[i].charset;
187 case FcLangDifferentCountry:
188 if (country == -1)
189 country = i;
190 default:
191 break;
192 }
193 }
194 if (country == -1)
195 return 0;
196 return &fcLangCharSets[i].charset;
197 }
198
199 FcLangSet *
200 FcLangSetCreate (void)
201 {
202 FcLangSet *ls;
203
204 ls = malloc (sizeof (FcLangSet));
205 if (!ls)
206 return 0;
207 FcMemAlloc (FC_MEM_LANGSET, sizeof (FcLangSet));
208 memset (ls->map, '\0', sizeof (ls->map));
209 ls->extra = 0;
210 return ls;
211 }
212
213 void
214 FcLangSetDestroy (FcLangSet *ls)
215 {
216 if (ls->extra)
217 FcStrSetDestroy (ls->extra);
218 FcMemFree (FC_MEM_LANGSET, sizeof (FcLangSet));
219 free (ls);
220 }
221
222 FcLangSet *
223 FcLangSetCopy (const FcLangSet *ls)
224 {
225 FcLangSet *new;
226
227 new = FcLangSetCreate ();
228 if (!new)
229 goto bail0;
230 memcpy (new->map, ls->map, sizeof (new->map));
231 if (ls->extra)
232 {
233 FcStrList *list;
234 FcChar8 *extra;
235
236 new->extra = FcStrSetCreate ();
237 if (!new->extra)
238 goto bail1;
239
240 list = FcStrListCreate (ls->extra);
241 if (!list)
242 goto bail1;
243
244 while ((extra = FcStrListNext (list)))
245 if (!FcStrSetAdd (new->extra, extra))
246 {
247 FcStrListDone (list);
248 goto bail1;
249 }
250 FcStrListDone (list);
251 }
252 return new;
253 bail1:
254 FcLangSetDestroy (new);
255 bail0:
256 return 0;
257 }
258
259 static int
260 FcLangSetIndex (const FcChar8 *lang)
261 {
262 int low, high, mid;
263 int cmp;
264 FcChar8 firstChar = FcToLower(lang[0]);
265
266 if (firstChar < 'a')
267 {
268 low = 0;
269 high = fcLangCharSetRanges[0].begin;
270 }
271 else if(firstChar > 'z')
272 {
273 low = fcLangCharSetRanges[25].begin;
274 high = NUM_LANG_CHAR_SET - 1;
275 }
276 else
277 {
278 low = fcLangCharSetRanges[firstChar - 'a'].begin;
279 high = fcLangCharSetRanges[firstChar - 'a'].end;
280 /* no matches */
281 if (low > high)
282 return -low; /* next entry after where it would be */
283 }
284
285 while (low <= high)
286 {
287 mid = (high + low) >> 1;
288 if(fcLangCharSets[mid].lang[0] != firstChar)
289 cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang, lang);
290 else
291 { /* fast path for resolving 2-letter languages (by far the most common) after
292 * finding the first char (probably already true because of the hash table) */
293 FcChar8 secondChar = FcToLower(lang[1]);
294 if (fcLangCharSets[mid].lang[1] > secondChar) // check second chars
295 {
296 high = mid - 1;
297 continue;
298 }
299 else if (fcLangCharSets[mid].lang[1] < secondChar)
300 {
301 low = mid + 1;
302 continue;
303 }
304 else if (fcLangCharSets[mid].lang[2] == '\0' && lang[2] == '\0')
305 return mid;
306
307 else /* identical through the first two charcters, but at least one string didn't end there */
308 cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang+2, lang+2);
309 }
310 if (cmp == 0)
311 return mid;
312 if (cmp < 0)
313 low = mid + 1;
314 else
315 high = mid - 1;
316 }
317 if (cmp < 0)
318 mid++;
319 return -(mid + 1);
320 }
321
322 FcBool
323 FcLangSetAdd (FcLangSet *ls, const FcChar8 *lang)
324 {
325 int id;
326
327 id = FcLangSetIndex (lang);
328 if (id >= 0)
329 {
330 FcLangSetBitSet (ls, id);
331 return FcTrue;
332 }
333 if (!ls->extra)
334 {
335 ls->extra = FcStrSetCreate ();
336 if (!ls->extra)
337 return FcFalse;
338 }
339 return FcStrSetAdd (ls->extra, lang);
340 }
341
342 FcLangResult
343 FcLangSetHasLang (const FcLangSet *ls, const FcChar8 *lang)
344 {
345 int id;
346 FcLangResult best, r;
347 int i;
348
349 id = FcLangSetIndex (lang);
350 if (id < 0)
351 id = -id - 1;
352 else if (FcLangSetBitGet (ls, id))
353 return FcLangEqual;
354 best = FcLangDifferentLang;
355 for (i = id - 1; i >= 0; i--)
356 {
357 r = FcLangCompare (lang, fcLangCharSets[i].lang);
358 if (r == FcLangDifferentLang)
359 break;
360 if (FcLangSetBitGet (ls, i) && r < best)
361 best = r;
362 }
363 for (i = id; i < NUM_LANG_CHAR_SET; i++)
364 {
365 r = FcLangCompare (lang, fcLangCharSets[i].lang);
366 if (r == FcLangDifferentLang)
367 break;
368 if (FcLangSetBitGet (ls, i) && r < best)
369 best = r;
370 }
371 if (ls->extra)
372 {
373 FcStrList *list = FcStrListCreate (ls->extra);
374 FcChar8 *extra;
375 FcLangResult r;
376
377 if (list)
378 {
379 while (best > FcLangEqual && (extra = FcStrListNext (list)))
380 {
381 r = FcLangCompare (lang, extra);
382 if (r < best)
383 best = r;
384 }
385 FcStrListDone (list);
386 }
387 }
388 return best;
389 }
390
391 static FcLangResult
392 FcLangSetCompareStrSet (const FcLangSet *ls, FcStrSet *set)
393 {
394 FcStrList *list = FcStrListCreate (set);
395 FcLangResult r, best = FcLangDifferentLang;
396 FcChar8 *extra;
397
398 if (list)
399 {
400 while (best > FcLangEqual && (extra = FcStrListNext (list)))
401 {
402 r = FcLangSetHasLang (ls, extra);
403 if (r < best)
404 best = r;
405 }
406 FcStrListDone (list);
407 }
408 return best;
409 }
410
411 FcLangResult
412 FcLangSetCompare (const FcLangSet *lsa, const FcLangSet *lsb)
413 {
414 int i, j;
415 FcLangResult best, r;
416
417 for (i = 0; i < NUM_LANG_SET_MAP; i++)
418 if (lsa->map[i] & lsb->map[i])
419 return FcLangEqual;
420 best = FcLangDifferentLang;
421 for (j = 0; j < NUM_COUNTRY_SET; j++)
422 for (i = 0; i < NUM_LANG_SET_MAP; i++)
423 if ((lsa->map[i] & fcLangCountrySets[j][i]) &&
424 (lsb->map[i] & fcLangCountrySets[j][i]))
425 {
426 best = FcLangDifferentCountry;
427 break;
428 }
429 if (lsa->extra)
430 {
431 r = FcLangSetCompareStrSet (lsb, lsa->extra);
432 if (r < best)
433 best = r;
434 }
435 if (best > FcLangEqual && lsb->extra)
436 {
437 r = FcLangSetCompareStrSet (lsa, lsb->extra);
438 if (r < best)
439 best = r;
440 }
441 return best;
442 }
443
444 /*
445 * Used in computing values -- mustn't allocate any storage
446 */
447 FcLangSet *
448 FcLangSetPromote (const FcChar8 *lang)
449 {
450 static FcLangSet ls;
451 static FcStrSet strs;
452 static FcChar8 *str;
453 int id;
454
455 memset (ls.map, '\0', sizeof (ls.map));
456 ls.extra = 0;
457 id = FcLangSetIndex (lang);
458 if (id > 0)
459 {
460 FcLangSetBitSet (&ls, id);
461 }
462 else
463 {
464 ls.extra = &strs;
465 strs.num = 1;
466 strs.size = 1;
467 strs.strs = &str;
468 strs.ref = 1;
469 str = (FcChar8 *) lang;
470 }
471 return &ls;
472 }
473
474 FcChar32
475 FcLangSetHash (const FcLangSet *ls)
476 {
477 FcChar32 h = 0;
478 int i;
479
480 for (i = 0; i < NUM_LANG_SET_MAP; i++)
481 h ^= ls->map[i];
482 if (ls->extra)
483 h ^= ls->extra->num;
484 return h;
485 }
486
487 FcLangSet *
488 FcNameParseLangSet (const FcChar8 *string)
489 {
490 FcChar8 lang[32],c;
491 int i;
492 FcLangSet *ls;
493
494 ls = FcLangSetCreate ();
495 if (!ls)
496 goto bail0;
497
498 for(;;)
499 {
500 for(i = 0; i < 31;i++)
501 {
502 c = *string++;
503 if(c == '\0' || c == '|')
504 break; /* end of this code */
505 lang[i] = c;
506 }
507 lang[i] = '\0';
508 if (!FcLangSetAdd (ls, lang))
509 goto bail1;
510 if(c == '\0')
511 break;
512 }
513 return ls;
514 bail1:
515 FcLangSetDestroy (ls);
516 bail0:
517 return 0;
518 }
519
520 FcBool
521 FcNameUnparseLangSet (FcStrBuf *buf, const FcLangSet *ls)
522 {
523 int i, bit;
524 FcChar32 bits;
525 FcBool first = FcTrue;
526
527 for (i = 0; i < NUM_LANG_SET_MAP; i++)
528 {
529 if ((bits = ls->map[i]))
530 {
531 for (bit = 0; bit <= 31; bit++)
532 if (bits & (1 << bit))
533 {
534 int id = (i << 5) | bit;
535 if (!first)
536 if (!FcStrBufChar (buf, '|'))
537 return FcFalse;
538 if (!FcStrBufString (buf, fcLangCharSets[id].lang))
539 return FcFalse;
540 first = FcFalse;
541 }
542 }
543 }
544 if (ls->extra)
545 {
546 FcStrList *list = FcStrListCreate (ls->extra);
547 FcChar8 *extra;
548
549 if (!list)
550 return FcFalse;
551 while ((extra = FcStrListNext (list)))
552 {
553 if (!first)
554 if (!FcStrBufChar (buf, '|'))
555 return FcFalse;
556 if (!FcStrBufString (buf, extra))
557 return FcFalse;
558 first = FcFalse;
559 }
560 }
561 return FcTrue;
562 }
563
564 FcBool
565 FcLangSetEqual (const FcLangSet *lsa, const FcLangSet *lsb)
566 {
567 int i;
568
569 for (i = 0; i < NUM_LANG_SET_MAP; i++)
570 {
571 if (lsa->map[i] != lsb->map[i])
572 return FcFalse;
573 }
574 if (!lsa->extra && !lsb->extra)
575 return FcTrue;
576 if (lsa->extra && lsb->extra)
577 return FcStrSetEqual (lsa->extra, lsb->extra);
578 return FcFalse;
579 }
580
581 static FcBool
582 FcLangSetContainsLang (const FcLangSet *ls, const FcChar8 *lang)
583 {
584 int id;
585 FcLangResult r;
586 int i;
587
588 id = FcLangSetIndex (lang);
589 if (id < 0)
590 id = -id - 1;
591 else if (FcLangSetBitGet (ls, id))
592 return FcTrue;
593 /*
594 * search up and down among equal languages for a match
595 */
596 for (i = id - 1; i >= 0; i--)
597 {
598 if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
599 break;
600 if (FcLangSetBitGet (ls, i) &&
601 FcLangContains (fcLangCharSets[i].lang, lang))
602 return FcTrue;
603 }
604 for (i = id; i < NUM_LANG_CHAR_SET; i++)
605 {
606 if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
607 break;
608 if (FcLangSetBitGet (ls, i) &&
609 FcLangContains (fcLangCharSets[i].lang, lang))
610 return FcTrue;
611 }
612 if (ls->extra)
613 {
614 FcStrList *list = FcStrListCreate (ls->extra);
615 FcChar8 *extra;
616 FcLangResult r;
617
618 if (list)
619 {
620 while ((extra = FcStrListNext (list)))
621 {
622 if (FcLangContains (extra, lang))
623 break;
624 }
625 FcStrListDone (list);
626 if (extra)
627 return FcTrue;
628 }
629 }
630 return FcFalse;
631 }
632
633 /*
634 * return FcTrue if lsa contains every language in lsb
635 */
636 FcBool
637 FcLangSetContains (const FcLangSet *lsa, const FcLangSet *lsb)
638 {
639 int i, j;
640 FcChar32 missing;
641
642 if (FcDebug() & FC_DBG_MATCHV)
643 {
644 printf ("FcLangSet "); FcLangSetPrint (lsa);
645 printf (" contains "); FcLangSetPrint (lsb);
646 printf ("\n");
647 }
648 /*
649 * check bitmaps for missing language support
650 */
651 for (i = 0; i < NUM_LANG_SET_MAP; i++)
652 {
653 missing = lsb->map[i] & ~lsa->map[i];
654 if (missing)
655 {
656 for (j = 0; j < 32; j++)
657 if (missing & (1 << j))
658 {
659 if (!FcLangSetContainsLang (lsa,
660 fcLangCharSets[i*32 + j].lang))
661 {
662 if (FcDebug() & FC_DBG_MATCHV)
663 printf ("\tMissing bitmap %s\n", fcLangCharSets[i*32+j].lang);
664 return FcFalse;
665 }
666 }
667 }
668 }
669 if (lsb->extra)
670 {
671 FcStrList *list = FcStrListCreate (lsb->extra);
672 FcChar8 *extra;
673
674 if (list)
675 {
676 while ((extra = FcStrListNext (list)))
677 {
678 if (!FcLangSetContainsLang (lsa, extra))
679 {
680 if (FcDebug() & FC_DBG_MATCHV)
681 printf ("\tMissing string %s\n", extra);
682 break;
683 }
684 }
685 FcStrListDone (list);
686 if (extra)
687 return FcFalse;
688 }
689 }
690 return FcTrue;
691 }