]> git.wh0rd.org - fontconfig.git/blob - src/fclang.c
552253d2dee366db68685f1670e5ea8ab5dc41e3
[fontconfig.git] / src / fclang.c
1 /*
2 * $RCSId: xc/lib/fontconfig/src/fclang.c,v 1.7 2002/08/26 23:34:31 keithp Exp $
3 *
4 * Copyright © 2002 Keith Packard
5 *
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of Keith Packard not be used in
11 * advertising or publicity pertaining to distribution of the software without
12 * specific, written prior permission. Keith Packard makes no
13 * representations about the suitability of this software for any purpose. It
14 * is provided "as is" without express or implied warranty.
15 *
16 * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
18 * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
19 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
20 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
21 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
22 * PERFORMANCE OF THIS SOFTWARE.
23 */
24
25 #include "fcint.h"
26
27 typedef struct {
28 const FcChar8 *lang;
29 const FcCharSet charset;
30 } FcLangCharSet;
31
32 typedef struct {
33 int begin;
34 int end;
35 } FcLangCharSetRange;
36
37 #include "../fc-lang/fclang.h"
38
39 struct _FcLangSet {
40 FcChar32 map[NUM_LANG_SET_MAP];
41 FcStrSet *extra;
42 };
43
44 #define FcLangSetBitSet(ls, id) ((ls)->map[(id)>>5] |= ((FcChar32) 1 << ((id) & 0x1f)))
45 #define FcLangSetBitGet(ls, id) (((ls)->map[(id)>>5] >> ((id) & 0x1f)) & 1)
46
47 static FcBool langsets_populated = FcFalse;
48
49 FcLangSet *
50 FcFreeTypeLangSet (const FcCharSet *charset,
51 const FcChar8 *exclusiveLang)
52 {
53 int i, j;
54 FcChar32 missing;
55 const FcCharSet *exclusiveCharset = 0;
56 FcLangSet *ls;
57
58 if (!langsets_populated)
59 {
60 FcLangCharSetPopulate ();
61 langsets_populated = FcTrue;
62 }
63
64 if (exclusiveLang)
65 exclusiveCharset = FcCharSetForLang (exclusiveLang);
66 ls = FcLangSetCreate ();
67 if (!ls)
68 return 0;
69 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
70 {
71 /*
72 * Check for Han charsets to make fonts
73 * which advertise support for a single language
74 * not support other Han languages
75 */
76 if (exclusiveCharset &&
77 FcFreeTypeIsExclusiveLang (fcLangCharSets[i].lang))
78 {
79 if (fcLangCharSets[i].charset.num != exclusiveCharset->num)
80 continue;
81
82 for (j = 0; j < fcLangCharSets[i].charset.num; j++)
83 if (FcCharSetGetLeaf(&fcLangCharSets[i].charset, j) !=
84 FcCharSetGetLeaf(exclusiveCharset, j))
85 continue;
86 }
87 missing = FcCharSetSubtractCount (&fcLangCharSets[i].charset, charset);
88 if (FcDebug() & FC_DBG_SCANV)
89 {
90 if (missing && missing < 10)
91 {
92 FcCharSet *missed = FcCharSetSubtract (&fcLangCharSets[i].charset,
93 charset);
94 FcChar32 ucs4;
95 FcChar32 map[FC_CHARSET_MAP_SIZE];
96 FcChar32 next;
97
98 printf ("\n%s(%u) ", fcLangCharSets[i].lang, missing);
99 printf ("{");
100 for (ucs4 = FcCharSetFirstPage (missed, map, &next);
101 ucs4 != FC_CHARSET_DONE;
102 ucs4 = FcCharSetNextPage (missed, map, &next))
103 {
104 int i, j;
105 for (i = 0; i < FC_CHARSET_MAP_SIZE; i++)
106 if (map[i])
107 {
108 for (j = 0; j < 32; j++)
109 if (map[i] & (1 << j))
110 printf (" %04x", ucs4 + i * 32 + j);
111 }
112 }
113 printf (" }\n\t");
114 FcCharSetDestroy (missed);
115 }
116 else
117 printf ("%s(%u) ", fcLangCharSets[i].lang, missing);
118 }
119 if (!missing)
120 FcLangSetBitSet (ls, i);
121 }
122
123 if (FcDebug() & FC_DBG_SCANV)
124 printf ("\n");
125
126
127 return ls;
128 }
129
130 #define FcLangEnd(c) ((c) == '-' || (c) == '\0')
131
132 FcLangResult
133 FcLangCompare (const FcChar8 *s1, const FcChar8 *s2)
134 {
135 FcChar8 c1, c2;
136 FcLangResult result = FcLangDifferentLang;
137
138 for (;;)
139 {
140 c1 = *s1++;
141 c2 = *s2++;
142
143 c1 = FcToLower (c1);
144 c2 = FcToLower (c2);
145 if (c1 != c2)
146 {
147 if (FcLangEnd (c1) && FcLangEnd (c2))
148 result = FcLangDifferentCountry;
149 return result;
150 }
151 else if (!c1)
152 return FcLangEqual;
153 else if (c1 == '-')
154 result = FcLangDifferentCountry;
155 }
156 }
157
158 /*
159 * Return FcTrue when super contains sub.
160 *
161 * super contains sub if super and sub have the same
162 * language and either the same country or one
163 * is missing the country
164 */
165
166 static FcBool
167 FcLangContains (const FcChar8 *super, const FcChar8 *sub)
168 {
169 FcChar8 c1, c2;
170
171 for (;;)
172 {
173 c1 = *super++;
174 c2 = *sub++;
175
176 c1 = FcToLower (c1);
177 c2 = FcToLower (c2);
178 if (c1 != c2)
179 {
180 /* see if super has a country while sub is mising one */
181 if (c1 == '-' && c2 == '\0')
182 return FcTrue;
183 /* see if sub has a country while super is mising one */
184 if (c1 == '\0' && c2 == '-')
185 return FcTrue;
186 return FcFalse;
187 }
188 else if (!c1)
189 return FcTrue;
190 }
191 }
192
193 const FcCharSet *
194 FcCharSetForLang (const FcChar8 *lang)
195 {
196 int i;
197 int country = -1;
198
199 if (!langsets_populated)
200 {
201 FcLangCharSetPopulate ();
202 langsets_populated = FcTrue;
203 }
204
205 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
206 {
207 switch (FcLangCompare (lang, fcLangCharSets[i].lang)) {
208 case FcLangEqual:
209 return &fcLangCharSets[i].charset;
210 case FcLangDifferentCountry:
211 if (country == -1)
212 country = i;
213 default:
214 break;
215 }
216 }
217 if (country == -1)
218 return 0;
219 return &fcLangCharSets[country].charset;
220 }
221
222 FcLangSet *
223 FcLangSetCreate (void)
224 {
225 FcLangSet *ls;
226
227 ls = malloc (sizeof (FcLangSet));
228 if (!ls)
229 return 0;
230 FcMemAlloc (FC_MEM_LANGSET, sizeof (FcLangSet));
231 memset (ls->map, '\0', sizeof (ls->map));
232 ls->extra = 0;
233 return ls;
234 }
235
236 void
237 FcLangSetDestroy (FcLangSet *ls)
238 {
239 if (ls->extra)
240 FcStrSetDestroy (ls->extra);
241 FcMemFree (FC_MEM_LANGSET, sizeof (FcLangSet));
242 free (ls);
243 }
244
245 FcLangSet *
246 FcLangSetCopy (const FcLangSet *ls)
247 {
248 FcLangSet *new;
249
250 new = FcLangSetCreate ();
251 if (!new)
252 goto bail0;
253 memcpy (new->map, ls->map, sizeof (new->map));
254 if (ls->extra)
255 {
256 FcStrList *list;
257 FcChar8 *extra;
258
259 new->extra = FcStrSetCreate ();
260 if (!new->extra)
261 goto bail1;
262
263 list = FcStrListCreate (ls->extra);
264 if (!list)
265 goto bail1;
266
267 while ((extra = FcStrListNext (list)))
268 if (!FcStrSetAdd (new->extra, extra))
269 {
270 FcStrListDone (list);
271 goto bail1;
272 }
273 FcStrListDone (list);
274 }
275 return new;
276 bail1:
277 FcLangSetDestroy (new);
278 bail0:
279 return 0;
280 }
281
282 static int
283 FcLangSetIndex (const FcChar8 *lang)
284 {
285 int low, high, mid = 0;
286 int cmp = 0;
287 FcChar8 firstChar = FcToLower(lang[0]);
288 FcChar8 secondChar = firstChar ? FcToLower(lang[1]) : '\0';
289
290 if (firstChar < 'a')
291 {
292 low = 0;
293 high = fcLangCharSetRanges[0].begin;
294 }
295 else if(firstChar > 'z')
296 {
297 low = fcLangCharSetRanges[25].begin;
298 high = NUM_LANG_CHAR_SET - 1;
299 }
300 else
301 {
302 low = fcLangCharSetRanges[firstChar - 'a'].begin;
303 high = fcLangCharSetRanges[firstChar - 'a'].end;
304 /* no matches */
305 if (low > high)
306 return -low; /* next entry after where it would be */
307 }
308
309 while (low <= high)
310 {
311 mid = (high + low) >> 1;
312 if(fcLangCharSets[mid].lang[0] != firstChar)
313 cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang, lang);
314 else
315 { /* fast path for resolving 2-letter languages (by far the most common) after
316 * finding the first char (probably already true because of the hash table) */
317 cmp = fcLangCharSets[mid].lang[1] - secondChar;
318 if (cmp == 0 &&
319 (fcLangCharSets[mid].lang[2] != '\0' ||
320 lang[2] != '\0'))
321 {
322 cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang+2,
323 lang+2);
324 }
325 }
326 if (cmp == 0)
327 return mid;
328 if (cmp < 0)
329 low = mid + 1;
330 else
331 high = mid - 1;
332 }
333 if (cmp < 0)
334 mid++;
335 return -(mid + 1);
336 }
337
338 FcBool
339 FcLangSetAdd (FcLangSet *ls, const FcChar8 *lang)
340 {
341 int id;
342
343 id = FcLangSetIndex (lang);
344 if (id >= 0)
345 {
346 FcLangSetBitSet (ls, id);
347 return FcTrue;
348 }
349 if (!ls->extra)
350 {
351 ls->extra = FcStrSetCreate ();
352 if (!ls->extra)
353 return FcFalse;
354 }
355 return FcStrSetAdd (ls->extra, lang);
356 }
357
358 FcLangResult
359 FcLangSetHasLang (const FcLangSet *ls, const FcChar8 *lang)
360 {
361 int id;
362 FcLangResult best, r;
363 int i;
364
365 id = FcLangSetIndex (lang);
366 if (id < 0)
367 id = -id - 1;
368 else if (FcLangSetBitGet (ls, id))
369 return FcLangEqual;
370 best = FcLangDifferentLang;
371 for (i = id - 1; i >= 0; i--)
372 {
373 r = FcLangCompare (lang, fcLangCharSets[i].lang);
374 if (r == FcLangDifferentLang)
375 break;
376 if (FcLangSetBitGet (ls, i) && r < best)
377 best = r;
378 }
379 for (i = id; i < NUM_LANG_CHAR_SET; i++)
380 {
381 r = FcLangCompare (lang, fcLangCharSets[i].lang);
382 if (r == FcLangDifferentLang)
383 break;
384 if (FcLangSetBitGet (ls, i) && r < best)
385 best = r;
386 }
387 if (ls->extra)
388 {
389 FcStrList *list = FcStrListCreate (ls->extra);
390 FcChar8 *extra;
391
392 if (list)
393 {
394 while (best > FcLangEqual && (extra = FcStrListNext (list)))
395 {
396 r = FcLangCompare (lang, extra);
397 if (r < best)
398 best = r;
399 }
400 FcStrListDone (list);
401 }
402 }
403 return best;
404 }
405
406 static FcLangResult
407 FcLangSetCompareStrSet (const FcLangSet *ls, FcStrSet *set)
408 {
409 FcStrList *list = FcStrListCreate (set);
410 FcLangResult r, best = FcLangDifferentLang;
411 FcChar8 *extra;
412
413 if (list)
414 {
415 while (best > FcLangEqual && (extra = FcStrListNext (list)))
416 {
417 r = FcLangSetHasLang (ls, extra);
418 if (r < best)
419 best = r;
420 }
421 FcStrListDone (list);
422 }
423 return best;
424 }
425
426 FcLangResult
427 FcLangSetCompare (const FcLangSet *lsa, const FcLangSet *lsb)
428 {
429 int i, j;
430 FcLangResult best, r;
431
432 for (i = 0; i < NUM_LANG_SET_MAP; i++)
433 if (lsa->map[i] & lsb->map[i])
434 return FcLangEqual;
435 best = FcLangDifferentLang;
436 for (j = 0; j < NUM_COUNTRY_SET; j++)
437 for (i = 0; i < NUM_LANG_SET_MAP; i++)
438 if ((lsa->map[i] & fcLangCountrySets[j][i]) &&
439 (lsb->map[i] & fcLangCountrySets[j][i]))
440 {
441 best = FcLangDifferentCountry;
442 break;
443 }
444 if (lsa->extra)
445 {
446 r = FcLangSetCompareStrSet (lsb, lsa->extra);
447 if (r < best)
448 best = r;
449 }
450 if (best > FcLangEqual && lsb->extra)
451 {
452 r = FcLangSetCompareStrSet (lsa, lsb->extra);
453 if (r < best)
454 best = r;
455 }
456 return best;
457 }
458
459 /*
460 * Used in computing values -- mustn't allocate any storage
461 */
462 FcLangSet *
463 FcLangSetPromote (const FcChar8 *lang)
464 {
465 static FcLangSet ls;
466 static FcStrSet strs;
467 static FcChar8 *str;
468 int id;
469
470 memset (ls.map, '\0', sizeof (ls.map));
471 ls.extra = 0;
472 id = FcLangSetIndex (lang);
473 if (id > 0)
474 {
475 FcLangSetBitSet (&ls, id);
476 }
477 else
478 {
479 ls.extra = &strs;
480 strs.num = 1;
481 strs.size = 1;
482 strs.strs = &str;
483 strs.ref = 1;
484 str = (FcChar8 *) lang;
485 }
486 return &ls;
487 }
488
489 FcChar32
490 FcLangSetHash (const FcLangSet *ls)
491 {
492 FcChar32 h = 0;
493 int i;
494
495 for (i = 0; i < NUM_LANG_SET_MAP; i++)
496 h ^= ls->map[i];
497 if (ls->extra)
498 h ^= ls->extra->num;
499 return h;
500 }
501
502 FcLangSet *
503 FcNameParseLangSet (const FcChar8 *string)
504 {
505 FcChar8 lang[32],c;
506 int i;
507 FcLangSet *ls;
508
509 ls = FcLangSetCreate ();
510 if (!ls)
511 goto bail0;
512
513 for(;;)
514 {
515 for(i = 0; i < 31;i++)
516 {
517 c = *string++;
518 if(c == '\0' || c == '|')
519 break; /* end of this code */
520 lang[i] = c;
521 }
522 lang[i] = '\0';
523 if (!FcLangSetAdd (ls, lang))
524 goto bail1;
525 if(c == '\0')
526 break;
527 }
528 return ls;
529 bail1:
530 FcLangSetDestroy (ls);
531 bail0:
532 return 0;
533 }
534
535 FcBool
536 FcNameUnparseLangSet (FcStrBuf *buf, const FcLangSet *ls)
537 {
538 int i, bit;
539 FcChar32 bits;
540 FcBool first = FcTrue;
541
542 for (i = 0; i < NUM_LANG_SET_MAP; i++)
543 {
544 if ((bits = ls->map[i]))
545 {
546 for (bit = 0; bit <= 31; bit++)
547 if (bits & (1 << bit))
548 {
549 int id = (i << 5) | bit;
550 if (!first)
551 if (!FcStrBufChar (buf, '|'))
552 return FcFalse;
553 if (!FcStrBufString (buf, fcLangCharSets[id].lang))
554 return FcFalse;
555 first = FcFalse;
556 }
557 }
558 }
559 if (ls->extra)
560 {
561 FcStrList *list = FcStrListCreate (ls->extra);
562 FcChar8 *extra;
563
564 if (!list)
565 return FcFalse;
566 while ((extra = FcStrListNext (list)))
567 {
568 if (!first)
569 if (!FcStrBufChar (buf, '|'))
570 {
571 FcStrListDone (list);
572 return FcFalse;
573 }
574 if (!FcStrBufString (buf, extra))
575 {
576 FcStrListDone (list);
577 return FcFalse;
578 }
579 first = FcFalse;
580 }
581 FcStrListDone (list);
582 }
583 return FcTrue;
584 }
585
586 FcBool
587 FcLangSetEqual (const FcLangSet *lsa, const FcLangSet *lsb)
588 {
589 int i;
590
591 for (i = 0; i < NUM_LANG_SET_MAP; i++)
592 {
593 if (lsa->map[i] != lsb->map[i])
594 return FcFalse;
595 }
596 if (!lsa->extra && !lsb->extra)
597 return FcTrue;
598 if (lsa->extra && lsb->extra)
599 return FcStrSetEqual (lsa->extra, lsb->extra);
600 return FcFalse;
601 }
602
603 static FcBool
604 FcLangSetContainsLang (const FcLangSet *ls, const FcChar8 *lang)
605 {
606 int id;
607 int i;
608
609 id = FcLangSetIndex (lang);
610 if (id < 0)
611 id = -id - 1;
612 else if (FcLangSetBitGet (ls, id))
613 return FcTrue;
614 /*
615 * search up and down among equal languages for a match
616 */
617 for (i = id - 1; i >= 0; i--)
618 {
619 if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
620 break;
621 if (FcLangSetBitGet (ls, i) &&
622 FcLangContains (fcLangCharSets[i].lang, lang))
623 return FcTrue;
624 }
625 for (i = id; i < NUM_LANG_CHAR_SET; i++)
626 {
627 if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
628 break;
629 if (FcLangSetBitGet (ls, i) &&
630 FcLangContains (fcLangCharSets[i].lang, lang))
631 return FcTrue;
632 }
633 if (ls->extra)
634 {
635 FcStrList *list = FcStrListCreate (ls->extra);
636 FcChar8 *extra;
637
638 if (list)
639 {
640 while ((extra = FcStrListNext (list)))
641 {
642 if (FcLangContains (extra, lang))
643 break;
644 }
645 FcStrListDone (list);
646 if (extra)
647 return FcTrue;
648 }
649 }
650 return FcFalse;
651 }
652
653 /*
654 * return FcTrue if lsa contains every language in lsb
655 */
656 FcBool
657 FcLangSetContains (const FcLangSet *lsa, const FcLangSet *lsb)
658 {
659 int i, j;
660 FcChar32 missing;
661
662 if (FcDebug() & FC_DBG_MATCHV)
663 {
664 printf ("FcLangSet "); FcLangSetPrint (lsa);
665 printf (" contains "); FcLangSetPrint (lsb);
666 printf ("\n");
667 }
668 /*
669 * check bitmaps for missing language support
670 */
671 for (i = 0; i < NUM_LANG_SET_MAP; i++)
672 {
673 missing = lsb->map[i] & ~lsa->map[i];
674 if (missing)
675 {
676 for (j = 0; j < 32; j++)
677 if (missing & (1 << j))
678 {
679 if (!FcLangSetContainsLang (lsa,
680 fcLangCharSets[i*32 + j].lang))
681 {
682 if (FcDebug() & FC_DBG_MATCHV)
683 printf ("\tMissing bitmap %s\n", fcLangCharSets[i*32+j].lang);
684 return FcFalse;
685 }
686 }
687 }
688 }
689 if (lsb->extra)
690 {
691 FcStrList *list = FcStrListCreate (lsb->extra);
692 FcChar8 *extra;
693
694 if (list)
695 {
696 while ((extra = FcStrListNext (list)))
697 {
698 if (!FcLangSetContainsLang (lsa, extra))
699 {
700 if (FcDebug() & FC_DBG_MATCHV)
701 printf ("\tMissing string %s\n", extra);
702 break;
703 }
704 }
705 FcStrListDone (list);
706 if (extra)
707 return FcFalse;
708 }
709 }
710 return FcTrue;
711 }
712
713 static FcLangSet ** langsets = 0;
714 static int langset_bank_count = 0, langset_ptr = 0, langset_count = 0;
715
716 void
717 FcLangSetNewBank (void)
718 {
719 langset_count = 0;
720 }
721
722 /* ideally, should only write one copy of any particular FcLangSet */
723 int
724 FcLangSetNeededBytes (const FcLangSet *l)
725 {
726 langset_count++;
727 return sizeof (FcLangSet);
728 }
729
730 int
731 FcLangSetNeededBytesAlign (void)
732 {
733 return fc_alignof (FcLangSet);
734 }
735
736 static FcBool
737 FcLangSetEnsureBank (int bi)
738 {
739 if (!langsets || bi >= langset_bank_count)
740 {
741 int new_count = langset_bank_count + 2;
742 int i;
743 FcLangSet** tt;
744 tt = realloc(langsets, new_count * sizeof(FcLangSet *));
745 if (!tt)
746 return FcFalse;
747
748 langsets = tt;
749 for (i = langset_bank_count; i < new_count; i++)
750 langsets[i] = 0;
751 langset_bank_count = new_count;
752 }
753
754 return FcTrue;
755 }
756
757 void *
758 FcLangSetDistributeBytes (FcCache * metadata, void * block_ptr)
759 {
760 int bi = FcCacheBankToIndex(metadata->bank);
761 if (!FcLangSetEnsureBank(bi))
762 return 0;
763
764 block_ptr = ALIGN(block_ptr, FcLangSet);
765 langsets[bi] = block_ptr;
766 block_ptr = (void *)((char *)block_ptr +
767 langset_count * sizeof(FcLangSet));
768 langset_ptr = 0;
769 metadata->langset_count = langset_count;
770 return block_ptr;
771 }
772
773 FcLangSet *
774 FcLangSetSerialize(int bank, FcLangSet *l)
775 {
776 int p = langset_ptr, bi = FcCacheBankToIndex(bank);
777
778 if (!l) return 0;
779
780 langsets[bi][langset_ptr] = *l;
781 langsets[bi][langset_ptr].extra = 0;
782 langset_ptr++;
783 return &langsets[bi][p];
784 }
785
786 void *
787 FcLangSetUnserialize (FcCache * metadata, void *block_ptr)
788 {
789 int bi = FcCacheBankToIndex(metadata->bank);
790 if (!FcLangSetEnsureBank(bi))
791 return 0;
792
793 FcMemAlloc (FC_MEM_LANGSET, metadata->langset_count * sizeof(FcLangSet));
794 block_ptr = ALIGN(block_ptr, FcLangSet);
795 langsets[bi] = (FcLangSet *)block_ptr;
796 block_ptr = (void *)((char *)block_ptr +
797 metadata->langset_count * sizeof(FcLangSet));
798 return block_ptr;
799 }