]> git.wh0rd.org - fontconfig.git/blob - src/fclang.c
ab7ae5381b6d020da2305594d3dd543e491088b4
[fontconfig.git] / src / fclang.c
1 /*
2 * fontconfig/src/fclang.c
3 *
4 * Copyright © 2002 Keith Packard
5 *
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of Keith Packard not be used in
11 * advertising or publicity pertaining to distribution of the software without
12 * specific, written prior permission. Keith Packard makes no
13 * representations about the suitability of this software for any purpose. It
14 * is provided "as is" without express or implied warranty.
15 *
16 * THE AUTHOR(S) DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
18 * EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY SPECIAL, INDIRECT OR
19 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
20 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
21 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
22 * PERFORMANCE OF THIS SOFTWARE.
23 */
24
25 #include "fcint.h"
26 #include "fcftint.h"
27
28 typedef struct {
29 const FcChar8 lang[8];
30 const FcCharSet charset;
31 } FcLangCharSet;
32
33 typedef struct {
34 int begin;
35 int end;
36 } FcLangCharSetRange;
37
38 #include "../fc-lang/fclang.h"
39
40 struct _FcLangSet {
41 FcChar32 map[NUM_LANG_SET_MAP];
42 FcStrSet *extra;
43 };
44
45 #define FcLangSetBitSet(ls, id) ((ls)->map[(fcLangCharSetIndices[id])>>5] |= ((FcChar32) 1 << ((fcLangCharSetIndices[id]) & 0x1f)))
46 #define FcLangSetBitGet(ls, id) (((ls)->map[(fcLangCharSetIndices[id])>>5] >> ((fcLangCharSetIndices[id]) & 0x1f)) & 1)
47
48 FcLangSet *
49 FcFreeTypeLangSet (const FcCharSet *charset,
50 const FcChar8 *exclusiveLang)
51 {
52 int i, j;
53 FcChar32 missing;
54 const FcCharSet *exclusiveCharset = 0;
55 FcLangSet *ls;
56
57 if (exclusiveLang)
58 exclusiveCharset = FcLangGetCharSet (exclusiveLang);
59 ls = FcLangSetCreate ();
60 if (!ls)
61 return 0;
62 if (FcDebug() & FC_DBG_LANGSET)
63 {
64 printf ("font charset");
65 FcCharSetPrint (charset);
66 printf ("\n");
67 }
68 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
69 {
70 if (FcDebug() & FC_DBG_LANGSET)
71 {
72 printf ("%s charset", fcLangCharSets[i].lang);
73 FcCharSetPrint (&fcLangCharSets[i].charset);
74 printf ("\n");
75 }
76
77 /*
78 * Check for Han charsets to make fonts
79 * which advertise support for a single language
80 * not support other Han languages
81 */
82 if (exclusiveCharset &&
83 FcFreeTypeIsExclusiveLang (fcLangCharSets[i].lang))
84 {
85 if (fcLangCharSets[i].charset.num != exclusiveCharset->num)
86 continue;
87
88 for (j = 0; j < fcLangCharSets[i].charset.num; j++)
89 if (FcCharSetLeaf(&fcLangCharSets[i].charset, j) !=
90 FcCharSetLeaf(exclusiveCharset, j))
91 continue;
92 }
93 missing = FcCharSetSubtractCount (&fcLangCharSets[i].charset, charset);
94 if (FcDebug() & FC_DBG_SCANV)
95 {
96 if (missing && missing < 10)
97 {
98 FcCharSet *missed = FcCharSetSubtract (&fcLangCharSets[i].charset,
99 charset);
100 FcChar32 ucs4;
101 FcChar32 map[FC_CHARSET_MAP_SIZE];
102 FcChar32 next;
103
104 printf ("\n%s(%u) ", fcLangCharSets[i].lang, missing);
105 printf ("{");
106 for (ucs4 = FcCharSetFirstPage (missed, map, &next);
107 ucs4 != FC_CHARSET_DONE;
108 ucs4 = FcCharSetNextPage (missed, map, &next))
109 {
110 int i, j;
111 for (i = 0; i < FC_CHARSET_MAP_SIZE; i++)
112 if (map[i])
113 {
114 for (j = 0; j < 32; j++)
115 if (map[i] & (1 << j))
116 printf (" %04x", ucs4 + i * 32 + j);
117 }
118 }
119 printf (" }\n\t");
120 FcCharSetDestroy (missed);
121 }
122 else
123 printf ("%s(%u) ", fcLangCharSets[i].lang, missing);
124 }
125 if (!missing)
126 FcLangSetBitSet (ls, i);
127 }
128
129 if (FcDebug() & FC_DBG_SCANV)
130 printf ("\n");
131
132
133 return ls;
134 }
135
136 #define FcLangEnd(c) ((c) == '-' || (c) == '\0')
137
138 FcLangResult
139 FcLangCompare (const FcChar8 *s1, const FcChar8 *s2)
140 {
141 FcChar8 c1, c2;
142 FcLangResult result = FcLangDifferentLang;
143
144 for (;;)
145 {
146 c1 = *s1++;
147 c2 = *s2++;
148
149 c1 = FcToLower (c1);
150 c2 = FcToLower (c2);
151 if (c1 != c2)
152 {
153 if (FcLangEnd (c1) && FcLangEnd (c2))
154 result = FcLangDifferentTerritory;
155 return result;
156 }
157 else if (!c1)
158 return FcLangEqual;
159 else if (c1 == '-')
160 result = FcLangDifferentTerritory;
161 }
162 }
163
164 /*
165 * Return FcTrue when super contains sub.
166 *
167 * super contains sub if super and sub have the same
168 * language and either the same country or one
169 * is missing the country
170 */
171
172 static FcBool
173 FcLangContains (const FcChar8 *super, const FcChar8 *sub)
174 {
175 FcChar8 c1, c2;
176
177 for (;;)
178 {
179 c1 = *super++;
180 c2 = *sub++;
181
182 c1 = FcToLower (c1);
183 c2 = FcToLower (c2);
184 if (c1 != c2)
185 {
186 /* see if super has a country while sub is mising one */
187 if (c1 == '-' && c2 == '\0')
188 return FcTrue;
189 /* see if sub has a country while super is mising one */
190 if (c1 == '\0' && c2 == '-')
191 return FcTrue;
192 return FcFalse;
193 }
194 else if (!c1)
195 return FcTrue;
196 }
197 }
198
199 const FcCharSet *
200 FcLangGetCharSet (const FcChar8 *lang)
201 {
202 int i;
203 int country = -1;
204
205 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
206 {
207 switch (FcLangCompare (lang, fcLangCharSets[i].lang)) {
208 case FcLangEqual:
209 return &fcLangCharSets[i].charset;
210 case FcLangDifferentTerritory:
211 if (country == -1)
212 country = i;
213 default:
214 break;
215 }
216 }
217 if (country == -1)
218 return 0;
219 return &fcLangCharSets[country].charset;
220 }
221
222 FcStrSet *
223 FcGetLangs (void)
224 {
225 FcStrSet *langs;
226 int i;
227
228 langs = FcStrSetCreate();
229 if (!langs)
230 return 0;
231
232 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
233 FcStrSetAdd (langs, fcLangCharSets[i].lang);
234
235 return langs;
236 }
237
238 FcLangSet *
239 FcLangSetCreate (void)
240 {
241 FcLangSet *ls;
242
243 ls = malloc (sizeof (FcLangSet));
244 if (!ls)
245 return 0;
246 FcMemAlloc (FC_MEM_LANGSET, sizeof (FcLangSet));
247 memset (ls->map, '\0', sizeof (ls->map));
248 ls->extra = 0;
249 return ls;
250 }
251
252 void
253 FcLangSetDestroy (FcLangSet *ls)
254 {
255 if (ls->extra)
256 FcStrSetDestroy (ls->extra);
257 FcMemFree (FC_MEM_LANGSET, sizeof (FcLangSet));
258 free (ls);
259 }
260
261 FcLangSet *
262 FcLangSetCopy (const FcLangSet *ls)
263 {
264 FcLangSet *new;
265
266 new = FcLangSetCreate ();
267 if (!new)
268 goto bail0;
269 memcpy (new->map, ls->map, sizeof (new->map));
270 if (ls->extra)
271 {
272 FcStrList *list;
273 FcChar8 *extra;
274
275 new->extra = FcStrSetCreate ();
276 if (!new->extra)
277 goto bail1;
278
279 list = FcStrListCreate (ls->extra);
280 if (!list)
281 goto bail1;
282
283 while ((extra = FcStrListNext (list)))
284 if (!FcStrSetAdd (new->extra, extra))
285 {
286 FcStrListDone (list);
287 goto bail1;
288 }
289 FcStrListDone (list);
290 }
291 return new;
292 bail1:
293 FcLangSetDestroy (new);
294 bail0:
295 return 0;
296 }
297
298 static int
299 FcLangSetIndex (const FcChar8 *lang)
300 {
301 int low, high, mid = 0;
302 int cmp = 0;
303 FcChar8 firstChar = FcToLower(lang[0]);
304 FcChar8 secondChar = firstChar ? FcToLower(lang[1]) : '\0';
305
306 if (firstChar < 'a')
307 {
308 low = 0;
309 high = fcLangCharSetRanges[0].begin;
310 }
311 else if(firstChar > 'z')
312 {
313 low = fcLangCharSetRanges[25].begin;
314 high = NUM_LANG_CHAR_SET - 1;
315 }
316 else
317 {
318 low = fcLangCharSetRanges[firstChar - 'a'].begin;
319 high = fcLangCharSetRanges[firstChar - 'a'].end;
320 /* no matches */
321 if (low > high)
322 return -low; /* next entry after where it would be */
323 }
324
325 while (low <= high)
326 {
327 mid = (high + low) >> 1;
328 if(fcLangCharSets[mid].lang[0] != firstChar)
329 cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang, lang);
330 else
331 { /* fast path for resolving 2-letter languages (by far the most common) after
332 * finding the first char (probably already true because of the hash table) */
333 cmp = fcLangCharSets[mid].lang[1] - secondChar;
334 if (cmp == 0 &&
335 (fcLangCharSets[mid].lang[2] != '\0' ||
336 lang[2] != '\0'))
337 {
338 cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang+2,
339 lang+2);
340 }
341 }
342 if (cmp == 0)
343 return mid;
344 if (cmp < 0)
345 low = mid + 1;
346 else
347 high = mid - 1;
348 }
349 if (cmp < 0)
350 mid++;
351 return -(mid + 1);
352 }
353
354 FcBool
355 FcLangSetAdd (FcLangSet *ls, const FcChar8 *lang)
356 {
357 int id;
358
359 id = FcLangSetIndex (lang);
360 if (id >= 0)
361 {
362 FcLangSetBitSet (ls, id);
363 return FcTrue;
364 }
365 if (!ls->extra)
366 {
367 ls->extra = FcStrSetCreate ();
368 if (!ls->extra)
369 return FcFalse;
370 }
371 return FcStrSetAdd (ls->extra, lang);
372 }
373
374 FcLangResult
375 FcLangSetHasLang (const FcLangSet *ls, const FcChar8 *lang)
376 {
377 int id;
378 FcLangResult best, r;
379 int i;
380
381 id = FcLangSetIndex (lang);
382 if (id < 0)
383 id = -id - 1;
384 else if (FcLangSetBitGet (ls, id))
385 return FcLangEqual;
386 best = FcLangDifferentLang;
387 for (i = id - 1; i >= 0; i--)
388 {
389 r = FcLangCompare (lang, fcLangCharSets[i].lang);
390 if (r == FcLangDifferentLang)
391 break;
392 if (FcLangSetBitGet (ls, i) && r < best)
393 best = r;
394 }
395 for (i = id; i < NUM_LANG_CHAR_SET; i++)
396 {
397 r = FcLangCompare (lang, fcLangCharSets[i].lang);
398 if (r == FcLangDifferentLang)
399 break;
400 if (FcLangSetBitGet (ls, i) && r < best)
401 best = r;
402 }
403 if (ls->extra)
404 {
405 FcStrList *list = FcStrListCreate (ls->extra);
406 FcChar8 *extra;
407
408 if (list)
409 {
410 while (best > FcLangEqual && (extra = FcStrListNext (list)))
411 {
412 r = FcLangCompare (lang, extra);
413 if (r < best)
414 best = r;
415 }
416 FcStrListDone (list);
417 }
418 }
419 return best;
420 }
421
422 static FcLangResult
423 FcLangSetCompareStrSet (const FcLangSet *ls, FcStrSet *set)
424 {
425 FcStrList *list = FcStrListCreate (set);
426 FcLangResult r, best = FcLangDifferentLang;
427 FcChar8 *extra;
428
429 if (list)
430 {
431 while (best > FcLangEqual && (extra = FcStrListNext (list)))
432 {
433 r = FcLangSetHasLang (ls, extra);
434 if (r < best)
435 best = r;
436 }
437 FcStrListDone (list);
438 }
439 return best;
440 }
441
442 FcLangResult
443 FcLangSetCompare (const FcLangSet *lsa, const FcLangSet *lsb)
444 {
445 int i, j;
446 FcLangResult best, r;
447
448 for (i = 0; i < NUM_LANG_SET_MAP; i++)
449 if (lsa->map[i] & lsb->map[i])
450 return FcLangEqual;
451 best = FcLangDifferentLang;
452 for (j = 0; j < NUM_COUNTRY_SET; j++)
453 for (i = 0; i < NUM_LANG_SET_MAP; i++)
454 if ((lsa->map[i] & fcLangCountrySets[j][i]) &&
455 (lsb->map[i] & fcLangCountrySets[j][i]))
456 {
457 best = FcLangDifferentTerritory;
458 break;
459 }
460 if (lsa->extra)
461 {
462 r = FcLangSetCompareStrSet (lsb, lsa->extra);
463 if (r < best)
464 best = r;
465 }
466 if (best > FcLangEqual && lsb->extra)
467 {
468 r = FcLangSetCompareStrSet (lsa, lsb->extra);
469 if (r < best)
470 best = r;
471 }
472 return best;
473 }
474
475 /*
476 * Used in computing values -- mustn't allocate any storage
477 */
478 FcLangSet *
479 FcLangSetPromote (const FcChar8 *lang)
480 {
481 static FcLangSet ls;
482 static FcStrSet strs;
483 static FcChar8 *str;
484 int id;
485
486 memset (ls.map, '\0', sizeof (ls.map));
487 ls.extra = 0;
488 id = FcLangSetIndex (lang);
489 if (id > 0)
490 {
491 FcLangSetBitSet (&ls, id);
492 }
493 else
494 {
495 ls.extra = &strs;
496 strs.num = 1;
497 strs.size = 1;
498 strs.strs = &str;
499 strs.ref = 1;
500 str = (FcChar8 *) lang;
501 }
502 return &ls;
503 }
504
505 FcChar32
506 FcLangSetHash (const FcLangSet *ls)
507 {
508 FcChar32 h = 0;
509 int i;
510
511 for (i = 0; i < NUM_LANG_SET_MAP; i++)
512 h ^= ls->map[i];
513 if (ls->extra)
514 h ^= ls->extra->num;
515 return h;
516 }
517
518 FcLangSet *
519 FcNameParseLangSet (const FcChar8 *string)
520 {
521 FcChar8 lang[32],c;
522 int i;
523 FcLangSet *ls;
524
525 ls = FcLangSetCreate ();
526 if (!ls)
527 goto bail0;
528
529 for(;;)
530 {
531 for(i = 0; i < 31;i++)
532 {
533 c = *string++;
534 if(c == '\0' || c == '|')
535 break; /* end of this code */
536 lang[i] = c;
537 }
538 lang[i] = '\0';
539 if (!FcLangSetAdd (ls, lang))
540 goto bail1;
541 if(c == '\0')
542 break;
543 }
544 return ls;
545 bail1:
546 FcLangSetDestroy (ls);
547 bail0:
548 return 0;
549 }
550
551 FcBool
552 FcNameUnparseLangSet (FcStrBuf *buf, const FcLangSet *ls)
553 {
554 int i, bit;
555 FcChar32 bits;
556 FcBool first = FcTrue;
557
558 for (i = 0; i < NUM_LANG_SET_MAP; i++)
559 {
560 if ((bits = ls->map[i]))
561 {
562 for (bit = 0; bit <= 31; bit++)
563 if (bits & (1 << bit))
564 {
565 int id = (i << 5) | bit;
566 if (!first)
567 if (!FcStrBufChar (buf, '|'))
568 return FcFalse;
569 if (!FcStrBufString (buf, fcLangCharSets[fcLangCharSetIndicesInv[id]].lang))
570 return FcFalse;
571 first = FcFalse;
572 }
573 }
574 }
575 if (ls->extra)
576 {
577 FcStrList *list = FcStrListCreate (ls->extra);
578 FcChar8 *extra;
579
580 if (!list)
581 return FcFalse;
582 while ((extra = FcStrListNext (list)))
583 {
584 if (!first)
585 if (!FcStrBufChar (buf, '|'))
586 {
587 FcStrListDone (list);
588 return FcFalse;
589 }
590 if (!FcStrBufString (buf, extra))
591 {
592 FcStrListDone (list);
593 return FcFalse;
594 }
595 first = FcFalse;
596 }
597 FcStrListDone (list);
598 }
599 return FcTrue;
600 }
601
602 FcBool
603 FcLangSetEqual (const FcLangSet *lsa, const FcLangSet *lsb)
604 {
605 int i;
606
607 for (i = 0; i < NUM_LANG_SET_MAP; i++)
608 {
609 if (lsa->map[i] != lsb->map[i])
610 return FcFalse;
611 }
612 if (!lsa->extra && !lsb->extra)
613 return FcTrue;
614 if (lsa->extra && lsb->extra)
615 return FcStrSetEqual (lsa->extra, lsb->extra);
616 return FcFalse;
617 }
618
619 static FcBool
620 FcLangSetContainsLang (const FcLangSet *ls, const FcChar8 *lang)
621 {
622 int id;
623 int i;
624
625 id = FcLangSetIndex (lang);
626 if (id < 0)
627 id = -id - 1;
628 else if (FcLangSetBitGet (ls, id))
629 return FcTrue;
630 /*
631 * search up and down among equal languages for a match
632 */
633 for (i = id - 1; i >= 0; i--)
634 {
635 if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
636 break;
637 if (FcLangSetBitGet (ls, i) &&
638 FcLangContains (fcLangCharSets[i].lang, lang))
639 return FcTrue;
640 }
641 for (i = id; i < NUM_LANG_CHAR_SET; i++)
642 {
643 if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
644 break;
645 if (FcLangSetBitGet (ls, i) &&
646 FcLangContains (fcLangCharSets[i].lang, lang))
647 return FcTrue;
648 }
649 if (ls->extra)
650 {
651 FcStrList *list = FcStrListCreate (ls->extra);
652 FcChar8 *extra;
653
654 if (list)
655 {
656 while ((extra = FcStrListNext (list)))
657 {
658 if (FcLangContains (extra, lang))
659 break;
660 }
661 FcStrListDone (list);
662 if (extra)
663 return FcTrue;
664 }
665 }
666 return FcFalse;
667 }
668
669 /*
670 * return FcTrue if lsa contains every language in lsb
671 */
672 FcBool
673 FcLangSetContains (const FcLangSet *lsa, const FcLangSet *lsb)
674 {
675 int i, j;
676 FcChar32 missing;
677
678 if (FcDebug() & FC_DBG_MATCHV)
679 {
680 printf ("FcLangSet "); FcLangSetPrint (lsa);
681 printf (" contains "); FcLangSetPrint (lsb);
682 printf ("\n");
683 }
684 /*
685 * check bitmaps for missing language support
686 */
687 for (i = 0; i < NUM_LANG_SET_MAP; i++)
688 {
689 missing = lsb->map[i] & ~lsa->map[i];
690 if (missing)
691 {
692 for (j = 0; j < 32; j++)
693 if (missing & (1 << j))
694 {
695 if (!FcLangSetContainsLang (lsa,
696 fcLangCharSets[fcLangCharSetIndicesInv[i*32 + j]].lang))
697 {
698 if (FcDebug() & FC_DBG_MATCHV)
699 printf ("\tMissing bitmap %s\n", fcLangCharSets[fcLangCharSetIndicesInv[i*32+j]].lang);
700 return FcFalse;
701 }
702 }
703 }
704 }
705 if (lsb->extra)
706 {
707 FcStrList *list = FcStrListCreate (lsb->extra);
708 FcChar8 *extra;
709
710 if (list)
711 {
712 while ((extra = FcStrListNext (list)))
713 {
714 if (!FcLangSetContainsLang (lsa, extra))
715 {
716 if (FcDebug() & FC_DBG_MATCHV)
717 printf ("\tMissing string %s\n", extra);
718 break;
719 }
720 }
721 FcStrListDone (list);
722 if (extra)
723 return FcFalse;
724 }
725 }
726 return FcTrue;
727 }
728
729 FcBool
730 FcLangSetSerializeAlloc (FcSerialize *serialize, const FcLangSet *l)
731 {
732 if (!FcSerializeAlloc (serialize, l, sizeof (FcLangSet)))
733 return FcFalse;
734 return FcTrue;
735 }
736
737 FcLangSet *
738 FcLangSetSerialize(FcSerialize *serialize, const FcLangSet *l)
739 {
740 FcLangSet *l_serialize = FcSerializePtr (serialize, l);
741
742 if (!l_serialize)
743 return NULL;
744 *l_serialize = *l;
745 return l_serialize;
746 }
747
748 FcStrSet *
749 FcLangSetGetLangs (const FcLangSet *ls)
750 {
751 FcStrSet *langs;
752 int i;
753
754 langs = FcStrSetCreate();
755 if (!langs)
756 return 0;
757
758 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
759 if (FcLangSetBitGet (ls, i))
760 FcStrSetAdd (langs, fcLangCharSets[i].lang);
761
762 if (ls->extra)
763 {
764 FcStrList *list = FcStrListCreate (ls->extra);
765 FcChar8 *extra;
766
767 if (list)
768 {
769 while ((extra = FcStrListNext (list)))
770 FcStrSetAdd (langs, extra);
771
772 FcStrListDone (list);
773 }
774 }
775
776 return langs;
777 }
778
779 #define __fclang__
780 #include "fcaliastail.h"
781 #include "fcftaliastail.h"
782 #undef __fclang__