]> git.wh0rd.org - fontconfig.git/blob - src/fclang.c
Fix FcNameUnparseLangSet()
[fontconfig.git] / src / fclang.c
1 /*
2 * fontconfig/src/fclang.c
3 *
4 * Copyright © 2002 Keith Packard
5 *
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of Keith Packard not be used in
11 * advertising or publicity pertaining to distribution of the software without
12 * specific, written prior permission. Keith Packard makes no
13 * representations about the suitability of this software for any purpose. It
14 * is provided "as is" without express or implied warranty.
15 *
16 * THE AUTHOR(S) DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
18 * EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY SPECIAL, INDIRECT OR
19 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
20 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
21 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
22 * PERFORMANCE OF THIS SOFTWARE.
23 */
24
25 #include "fcint.h"
26 #include "fcftint.h"
27
28 typedef struct {
29 const FcChar8 lang[8];
30 const FcCharSet charset;
31 } FcLangCharSet;
32
33 typedef struct {
34 int begin;
35 int end;
36 } FcLangCharSetRange;
37
38 #include "../fc-lang/fclang.h"
39
40 struct _FcLangSet {
41 FcChar32 map[NUM_LANG_SET_MAP];
42 FcStrSet *extra;
43 };
44
45 #define FcLangSetBitSet(ls, id) ((ls)->map[(fcLangCharSetIndices[id])>>5] |= ((FcChar32) 1 << ((fcLangCharSetIndices[id]) & 0x1f)))
46 #define FcLangSetBitGet(ls, id) (((ls)->map[(fcLangCharSetIndices[id])>>5] >> ((fcLangCharSetIndices[id]) & 0x1f)) & 1)
47
48 FcLangSet *
49 FcFreeTypeLangSet (const FcCharSet *charset,
50 const FcChar8 *exclusiveLang)
51 {
52 int i, j;
53 FcChar32 missing;
54 const FcCharSet *exclusiveCharset = 0;
55 FcLangSet *ls;
56
57 if (exclusiveLang)
58 exclusiveCharset = FcLangGetCharSet (exclusiveLang);
59 ls = FcLangSetCreate ();
60 if (!ls)
61 return 0;
62 if (FcDebug() & FC_DBG_LANGSET)
63 {
64 printf ("font charset\n");
65 FcCharSetPrint (charset);
66 printf ("\n");
67 }
68 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
69 {
70 if (FcDebug() & FC_DBG_LANGSET)
71 {
72 printf ("%s charset\n", fcLangCharSets[i].lang);
73 FcCharSetPrint (&fcLangCharSets[i].charset);
74 printf ("\n");
75 }
76
77 /*
78 * Check for Han charsets to make fonts
79 * which advertise support for a single language
80 * not support other Han languages
81 */
82 if (exclusiveCharset &&
83 FcFreeTypeIsExclusiveLang (fcLangCharSets[i].lang))
84 {
85 if (fcLangCharSets[i].charset.num != exclusiveCharset->num)
86 continue;
87
88 for (j = 0; j < fcLangCharSets[i].charset.num; j++)
89 if (FcCharSetLeaf(&fcLangCharSets[i].charset, j) !=
90 FcCharSetLeaf(exclusiveCharset, j))
91 continue;
92 }
93 missing = FcCharSetSubtractCount (&fcLangCharSets[i].charset, charset);
94 if (FcDebug() & FC_DBG_SCANV)
95 {
96 if (missing && missing < 10)
97 {
98 FcCharSet *missed = FcCharSetSubtract (&fcLangCharSets[i].charset,
99 charset);
100 FcChar32 ucs4;
101 FcChar32 map[FC_CHARSET_MAP_SIZE];
102 FcChar32 next;
103
104 printf ("\n%s(%u) ", fcLangCharSets[i].lang, missing);
105 printf ("{");
106 for (ucs4 = FcCharSetFirstPage (missed, map, &next);
107 ucs4 != FC_CHARSET_DONE;
108 ucs4 = FcCharSetNextPage (missed, map, &next))
109 {
110 int i, j;
111 for (i = 0; i < FC_CHARSET_MAP_SIZE; i++)
112 if (map[i])
113 {
114 for (j = 0; j < 32; j++)
115 if (map[i] & (1 << j))
116 printf (" %04x", ucs4 + i * 32 + j);
117 }
118 }
119 printf (" }\n\t");
120 FcCharSetDestroy (missed);
121 }
122 else
123 printf ("%s(%u) ", fcLangCharSets[i].lang, missing);
124 }
125 if (!missing)
126 FcLangSetBitSet (ls, i);
127 }
128
129 if (FcDebug() & FC_DBG_SCANV)
130 printf ("\n");
131
132
133 return ls;
134 }
135
136 #define FcLangEnd(c) ((c) == '-' || (c) == '\0')
137
138 FcLangResult
139 FcLangCompare (const FcChar8 *s1, const FcChar8 *s2)
140 {
141 FcChar8 c1, c2;
142 FcLangResult result = FcLangDifferentLang;
143
144 for (;;)
145 {
146 c1 = *s1++;
147 c2 = *s2++;
148
149 c1 = FcToLower (c1);
150 c2 = FcToLower (c2);
151 if (c1 != c2)
152 {
153 if (FcLangEnd (c1) && FcLangEnd (c2))
154 result = FcLangDifferentTerritory;
155 return result;
156 }
157 else if (!c1)
158 return FcLangEqual;
159 else if (c1 == '-')
160 result = FcLangDifferentTerritory;
161 }
162 }
163
164 /*
165 * Return FcTrue when super contains sub.
166 *
167 * super contains sub if super and sub have the same
168 * language and either the same country or one
169 * is missing the country
170 */
171
172 static FcBool
173 FcLangContains (const FcChar8 *super, const FcChar8 *sub)
174 {
175 FcChar8 c1, c2;
176
177 for (;;)
178 {
179 c1 = *super++;
180 c2 = *sub++;
181
182 c1 = FcToLower (c1);
183 c2 = FcToLower (c2);
184 if (c1 != c2)
185 {
186 /* see if super has a country while sub is mising one */
187 if (c1 == '-' && c2 == '\0')
188 return FcTrue;
189 /* see if sub has a country while super is mising one */
190 if (c1 == '\0' && c2 == '-')
191 return FcTrue;
192 return FcFalse;
193 }
194 else if (!c1)
195 return FcTrue;
196 }
197 }
198
199 const FcCharSet *
200 FcLangGetCharSet (const FcChar8 *lang)
201 {
202 int i;
203 int country = -1;
204
205 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
206 {
207 switch (FcLangCompare (lang, fcLangCharSets[i].lang)) {
208 case FcLangEqual:
209 return &fcLangCharSets[i].charset;
210 case FcLangDifferentTerritory:
211 if (country == -1)
212 country = i;
213 default:
214 break;
215 }
216 }
217 if (country == -1)
218 return 0;
219 return &fcLangCharSets[country].charset;
220 }
221
222 FcStrSet *
223 FcGetLangs (void)
224 {
225 FcStrSet *langs;
226 int i;
227
228 langs = FcStrSetCreate();
229 if (!langs)
230 return 0;
231
232 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
233 FcStrSetAdd (langs, fcLangCharSets[i].lang);
234
235 return langs;
236 }
237
238 FcLangSet *
239 FcLangSetCreate (void)
240 {
241 FcLangSet *ls;
242
243 ls = malloc (sizeof (FcLangSet));
244 if (!ls)
245 return 0;
246 FcMemAlloc (FC_MEM_LANGSET, sizeof (FcLangSet));
247 memset (ls->map, '\0', sizeof (ls->map));
248 ls->extra = 0;
249 return ls;
250 }
251
252 void
253 FcLangSetDestroy (FcLangSet *ls)
254 {
255 if (ls->extra)
256 FcStrSetDestroy (ls->extra);
257 FcMemFree (FC_MEM_LANGSET, sizeof (FcLangSet));
258 free (ls);
259 }
260
261 FcLangSet *
262 FcLangSetCopy (const FcLangSet *ls)
263 {
264 FcLangSet *new;
265
266 new = FcLangSetCreate ();
267 if (!new)
268 goto bail0;
269 memcpy (new->map, ls->map, sizeof (new->map));
270 if (ls->extra)
271 {
272 FcStrList *list;
273 FcChar8 *extra;
274
275 new->extra = FcStrSetCreate ();
276 if (!new->extra)
277 goto bail1;
278
279 list = FcStrListCreate (ls->extra);
280 if (!list)
281 goto bail1;
282
283 while ((extra = FcStrListNext (list)))
284 if (!FcStrSetAdd (new->extra, extra))
285 {
286 FcStrListDone (list);
287 goto bail1;
288 }
289 FcStrListDone (list);
290 }
291 return new;
292 bail1:
293 FcLangSetDestroy (new);
294 bail0:
295 return 0;
296 }
297
298 static int
299 FcLangSetIndex (const FcChar8 *lang)
300 {
301 int low, high, mid = 0;
302 int cmp = 0;
303 FcChar8 firstChar = FcToLower(lang[0]);
304 FcChar8 secondChar = firstChar ? FcToLower(lang[1]) : '\0';
305
306 if (firstChar < 'a')
307 {
308 low = 0;
309 high = fcLangCharSetRanges[0].begin;
310 }
311 else if(firstChar > 'z')
312 {
313 low = fcLangCharSetRanges[25].begin;
314 high = NUM_LANG_CHAR_SET - 1;
315 }
316 else
317 {
318 low = fcLangCharSetRanges[firstChar - 'a'].begin;
319 high = fcLangCharSetRanges[firstChar - 'a'].end;
320 /* no matches */
321 if (low > high)
322 return -low; /* next entry after where it would be */
323 }
324
325 while (low <= high)
326 {
327 mid = (high + low) >> 1;
328 if(fcLangCharSets[mid].lang[0] != firstChar)
329 cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang, lang);
330 else
331 { /* fast path for resolving 2-letter languages (by far the most common) after
332 * finding the first char (probably already true because of the hash table) */
333 cmp = fcLangCharSets[mid].lang[1] - secondChar;
334 if (cmp == 0 &&
335 (fcLangCharSets[mid].lang[2] != '\0' ||
336 lang[2] != '\0'))
337 {
338 cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang+2,
339 lang+2);
340 }
341 }
342 if (cmp == 0)
343 return mid;
344 if (cmp < 0)
345 low = mid + 1;
346 else
347 high = mid - 1;
348 }
349 if (cmp < 0)
350 mid++;
351 return -(mid + 1);
352 }
353
354 FcBool
355 FcLangSetAdd (FcLangSet *ls, const FcChar8 *lang)
356 {
357 int id;
358
359 id = FcLangSetIndex (lang);
360 if (id >= 0)
361 {
362 FcLangSetBitSet (ls, id);
363 return FcTrue;
364 }
365 if (!ls->extra)
366 {
367 ls->extra = FcStrSetCreate ();
368 if (!ls->extra)
369 return FcFalse;
370 }
371 return FcStrSetAdd (ls->extra, lang);
372 }
373
374 FcLangResult
375 FcLangSetHasLang (const FcLangSet *ls, const FcChar8 *lang)
376 {
377 int id;
378 FcLangResult best, r;
379 int i;
380
381 id = FcLangSetIndex (lang);
382 if (id < 0)
383 id = -id - 1;
384 else if (FcLangSetBitGet (ls, id))
385 return FcLangEqual;
386 best = FcLangDifferentLang;
387 for (i = id - 1; i >= 0; i--)
388 {
389 r = FcLangCompare (lang, fcLangCharSets[i].lang);
390 if (r == FcLangDifferentLang)
391 break;
392 if (FcLangSetBitGet (ls, i) && r < best)
393 best = r;
394 }
395 for (i = id; i < NUM_LANG_CHAR_SET; i++)
396 {
397 r = FcLangCompare (lang, fcLangCharSets[i].lang);
398 if (r == FcLangDifferentLang)
399 break;
400 if (FcLangSetBitGet (ls, i) && r < best)
401 best = r;
402 }
403 if (ls->extra)
404 {
405 FcStrList *list = FcStrListCreate (ls->extra);
406 FcChar8 *extra;
407
408 if (list)
409 {
410 while (best > FcLangEqual && (extra = FcStrListNext (list)))
411 {
412 r = FcLangCompare (lang, extra);
413 if (r < best)
414 best = r;
415 }
416 FcStrListDone (list);
417 }
418 }
419 return best;
420 }
421
422 static FcLangResult
423 FcLangSetCompareStrSet (const FcLangSet *ls, FcStrSet *set)
424 {
425 FcStrList *list = FcStrListCreate (set);
426 FcLangResult r, best = FcLangDifferentLang;
427 FcChar8 *extra;
428
429 if (list)
430 {
431 while (best > FcLangEqual && (extra = FcStrListNext (list)))
432 {
433 r = FcLangSetHasLang (ls, extra);
434 if (r < best)
435 best = r;
436 }
437 FcStrListDone (list);
438 }
439 return best;
440 }
441
442 FcLangResult
443 FcLangSetCompare (const FcLangSet *lsa, const FcLangSet *lsb)
444 {
445 int i, j;
446 FcLangResult best, r;
447
448 for (i = 0; i < NUM_LANG_SET_MAP; i++)
449 if (lsa->map[i] & lsb->map[i])
450 return FcLangEqual;
451 best = FcLangDifferentLang;
452 for (j = 0; j < NUM_COUNTRY_SET; j++)
453 for (i = 0; i < NUM_LANG_SET_MAP; i++)
454 if ((lsa->map[i] & fcLangCountrySets[j][i]) &&
455 (lsb->map[i] & fcLangCountrySets[j][i]))
456 {
457 best = FcLangDifferentTerritory;
458 break;
459 }
460 if (lsa->extra)
461 {
462 r = FcLangSetCompareStrSet (lsb, lsa->extra);
463 if (r < best)
464 best = r;
465 }
466 if (best > FcLangEqual && lsb->extra)
467 {
468 r = FcLangSetCompareStrSet (lsa, lsb->extra);
469 if (r < best)
470 best = r;
471 }
472 return best;
473 }
474
475 /*
476 * Used in computing values -- mustn't allocate any storage
477 */
478 FcLangSet *
479 FcLangSetPromote (const FcChar8 *lang)
480 {
481 static FcLangSet ls;
482 static FcStrSet strs;
483 static FcChar8 *str;
484 int id;
485
486 memset (ls.map, '\0', sizeof (ls.map));
487 ls.extra = 0;
488 id = FcLangSetIndex (lang);
489 if (id > 0)
490 {
491 FcLangSetBitSet (&ls, id);
492 }
493 else
494 {
495 ls.extra = &strs;
496 strs.num = 1;
497 strs.size = 1;
498 strs.strs = &str;
499 strs.ref = 1;
500 str = (FcChar8 *) lang;
501 }
502 return &ls;
503 }
504
505 FcChar32
506 FcLangSetHash (const FcLangSet *ls)
507 {
508 FcChar32 h = 0;
509 int i;
510
511 for (i = 0; i < NUM_LANG_SET_MAP; i++)
512 h ^= ls->map[i];
513 if (ls->extra)
514 h ^= ls->extra->num;
515 return h;
516 }
517
518 FcLangSet *
519 FcNameParseLangSet (const FcChar8 *string)
520 {
521 FcChar8 lang[32],c;
522 int i;
523 FcLangSet *ls;
524
525 ls = FcLangSetCreate ();
526 if (!ls)
527 goto bail0;
528
529 for(;;)
530 {
531 for(i = 0; i < 31;i++)
532 {
533 c = *string++;
534 if(c == '\0' || c == '|')
535 break; /* end of this code */
536 lang[i] = c;
537 }
538 lang[i] = '\0';
539 if (!FcLangSetAdd (ls, lang))
540 goto bail1;
541 if(c == '\0')
542 break;
543 }
544 return ls;
545 bail1:
546 FcLangSetDestroy (ls);
547 bail0:
548 return 0;
549 }
550
551 FcBool
552 FcNameUnparseLangSet (FcStrBuf *buf, const FcLangSet *ls)
553 {
554 int i;
555 FcBool first = FcTrue;
556
557 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
558 if (FcLangSetBitGet (ls, i))
559 {
560 if (!first)
561 if (!FcStrBufChar (buf, '|'))
562 return FcFalse;
563 if (!FcStrBufString (buf, fcLangCharSets[i].lang))
564 return FcFalse;
565 first = FcFalse;
566 }
567
568 if (ls->extra)
569 {
570 FcStrList *list = FcStrListCreate (ls->extra);
571 FcChar8 *extra;
572
573 if (!list)
574 return FcFalse;
575 while ((extra = FcStrListNext (list)))
576 {
577 if (!first)
578 if (!FcStrBufChar (buf, '|'))
579 {
580 FcStrListDone (list);
581 return FcFalse;
582 }
583 if (!FcStrBufString (buf, extra))
584 {
585 FcStrListDone (list);
586 return FcFalse;
587 }
588 first = FcFalse;
589 }
590 FcStrListDone (list);
591 }
592 return FcTrue;
593 }
594
595 FcBool
596 FcLangSetEqual (const FcLangSet *lsa, const FcLangSet *lsb)
597 {
598 int i;
599
600 for (i = 0; i < NUM_LANG_SET_MAP; i++)
601 {
602 if (lsa->map[i] != lsb->map[i])
603 return FcFalse;
604 }
605 if (!lsa->extra && !lsb->extra)
606 return FcTrue;
607 if (lsa->extra && lsb->extra)
608 return FcStrSetEqual (lsa->extra, lsb->extra);
609 return FcFalse;
610 }
611
612 static FcBool
613 FcLangSetContainsLang (const FcLangSet *ls, const FcChar8 *lang)
614 {
615 int id;
616 int i;
617
618 id = FcLangSetIndex (lang);
619 if (id < 0)
620 id = -id - 1;
621 else if (FcLangSetBitGet (ls, id))
622 return FcTrue;
623 /*
624 * search up and down among equal languages for a match
625 */
626 for (i = id - 1; i >= 0; i--)
627 {
628 if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
629 break;
630 if (FcLangSetBitGet (ls, i) &&
631 FcLangContains (fcLangCharSets[i].lang, lang))
632 return FcTrue;
633 }
634 for (i = id; i < NUM_LANG_CHAR_SET; i++)
635 {
636 if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
637 break;
638 if (FcLangSetBitGet (ls, i) &&
639 FcLangContains (fcLangCharSets[i].lang, lang))
640 return FcTrue;
641 }
642 if (ls->extra)
643 {
644 FcStrList *list = FcStrListCreate (ls->extra);
645 FcChar8 *extra;
646
647 if (list)
648 {
649 while ((extra = FcStrListNext (list)))
650 {
651 if (FcLangContains (extra, lang))
652 break;
653 }
654 FcStrListDone (list);
655 if (extra)
656 return FcTrue;
657 }
658 }
659 return FcFalse;
660 }
661
662 /*
663 * return FcTrue if lsa contains every language in lsb
664 */
665 FcBool
666 FcLangSetContains (const FcLangSet *lsa, const FcLangSet *lsb)
667 {
668 int i, j;
669 FcChar32 missing;
670
671 if (FcDebug() & FC_DBG_MATCHV)
672 {
673 printf ("FcLangSet "); FcLangSetPrint (lsa);
674 printf (" contains "); FcLangSetPrint (lsb);
675 printf ("\n");
676 }
677 /*
678 * check bitmaps for missing language support
679 */
680 for (i = 0; i < NUM_LANG_SET_MAP; i++)
681 {
682 missing = lsb->map[i] & ~lsa->map[i];
683 if (missing)
684 {
685 for (j = 0; j < 32; j++)
686 if (missing & (1 << j))
687 {
688 if (!FcLangSetContainsLang (lsa,
689 fcLangCharSets[i*32 + j].lang))
690 {
691 if (FcDebug() & FC_DBG_MATCHV)
692 printf ("\tMissing bitmap %s\n", fcLangCharSets[i*32+j].lang);
693 return FcFalse;
694 }
695 }
696 }
697 }
698 if (lsb->extra)
699 {
700 FcStrList *list = FcStrListCreate (lsb->extra);
701 FcChar8 *extra;
702
703 if (list)
704 {
705 while ((extra = FcStrListNext (list)))
706 {
707 if (!FcLangSetContainsLang (lsa, extra))
708 {
709 if (FcDebug() & FC_DBG_MATCHV)
710 printf ("\tMissing string %s\n", extra);
711 break;
712 }
713 }
714 FcStrListDone (list);
715 if (extra)
716 return FcFalse;
717 }
718 }
719 return FcTrue;
720 }
721
722 FcBool
723 FcLangSetSerializeAlloc (FcSerialize *serialize, const FcLangSet *l)
724 {
725 if (!FcSerializeAlloc (serialize, l, sizeof (FcLangSet)))
726 return FcFalse;
727 return FcTrue;
728 }
729
730 FcLangSet *
731 FcLangSetSerialize(FcSerialize *serialize, const FcLangSet *l)
732 {
733 FcLangSet *l_serialize = FcSerializePtr (serialize, l);
734
735 if (!l_serialize)
736 return NULL;
737 *l_serialize = *l;
738 return l_serialize;
739 }
740
741 FcStrSet *
742 FcLangSetGetLangs (const FcLangSet *ls)
743 {
744 FcStrSet *langs;
745 int i;
746
747 langs = FcStrSetCreate();
748 if (!langs)
749 return 0;
750
751 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
752 if (FcLangSetBitGet (ls, i))
753 FcStrSetAdd (langs, fcLangCharSets[i].lang);
754
755 if (ls->extra)
756 {
757 FcStrList *list = FcStrListCreate (ls->extra);
758 FcChar8 *extra;
759
760 if (list)
761 {
762 while ((extra = FcStrListNext (list)))
763 FcStrSetAdd (langs, extra);
764
765 FcStrListDone (list);
766 }
767 }
768
769 return langs;
770 }
771
772 #define __fclang__
773 #include "fcaliastail.h"
774 #include "fcftaliastail.h"
775 #undef __fclang__