]> git.wh0rd.org - fontconfig.git/blob - src/fclang.c
[arch] Try to ensure proper FcLangSet alignment in arch
[fontconfig.git] / src / fclang.c
1 /*
2 * fontconfig/src/fclang.c
3 *
4 * Copyright © 2002 Keith Packard
5 *
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of Keith Packard not be used in
11 * advertising or publicity pertaining to distribution of the software without
12 * specific, written prior permission. Keith Packard makes no
13 * representations about the suitability of this software for any purpose. It
14 * is provided "as is" without express or implied warranty.
15 *
16 * THE AUTHOR(S) DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
18 * EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY SPECIAL, INDIRECT OR
19 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
20 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
21 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
22 * PERFORMANCE OF THIS SOFTWARE.
23 */
24
25 #include "fcint.h"
26 #include "fcftint.h"
27
28 typedef struct {
29 const FcChar8 lang[8];
30 const FcCharSet charset;
31 } FcLangCharSet;
32
33 typedef struct {
34 int begin;
35 int end;
36 } FcLangCharSetRange;
37
38 #include "../fc-lang/fclang.h"
39
40 struct _FcLangSet {
41 FcStrSet *extra;
42 FcChar32 map_size;
43 FcChar32 map[NUM_LANG_SET_MAP];
44 };
45
46 static void
47 FcLangSetBitSet (FcLangSet *ls,
48 unsigned int id)
49 {
50 int bucket;
51
52 id = fcLangCharSetIndices[id];
53 bucket = id >> 5;
54 if (bucket >= ls->map_size)
55 return; /* shouldn't happen really */
56
57 ls->map[bucket] |= ((FcChar32) 1 << (id & 0x1f));
58 }
59
60 static FcBool
61 FcLangSetBitGet (const FcLangSet *ls,
62 unsigned int id)
63 {
64 int bucket;
65
66 id = fcLangCharSetIndices[id];
67 bucket = id >> 5;
68 if (bucket >= ls->map_size)
69 return FcFalse;
70
71 return ((ls->map[bucket] >> (id & 0x1f)) & 1) ? FcTrue : FcFalse;
72 }
73
74 FcLangSet *
75 FcFreeTypeLangSet (const FcCharSet *charset,
76 const FcChar8 *exclusiveLang)
77 {
78 int i, j;
79 FcChar32 missing;
80 const FcCharSet *exclusiveCharset = 0;
81 FcLangSet *ls;
82
83 if (exclusiveLang)
84 exclusiveCharset = FcLangGetCharSet (exclusiveLang);
85 ls = FcLangSetCreate ();
86 if (!ls)
87 return 0;
88 if (FcDebug() & FC_DBG_LANGSET)
89 {
90 printf ("font charset");
91 FcCharSetPrint (charset);
92 printf ("\n");
93 }
94 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
95 {
96 if (FcDebug() & FC_DBG_LANGSET)
97 {
98 printf ("%s charset", fcLangCharSets[i].lang);
99 FcCharSetPrint (&fcLangCharSets[i].charset);
100 printf ("\n");
101 }
102
103 /*
104 * Check for Han charsets to make fonts
105 * which advertise support for a single language
106 * not support other Han languages
107 */
108 if (exclusiveCharset &&
109 FcFreeTypeIsExclusiveLang (fcLangCharSets[i].lang))
110 {
111 if (fcLangCharSets[i].charset.num != exclusiveCharset->num)
112 continue;
113
114 for (j = 0; j < fcLangCharSets[i].charset.num; j++)
115 if (FcCharSetLeaf(&fcLangCharSets[i].charset, j) !=
116 FcCharSetLeaf(exclusiveCharset, j))
117 continue;
118 }
119 missing = FcCharSetSubtractCount (&fcLangCharSets[i].charset, charset);
120 if (FcDebug() & FC_DBG_SCANV)
121 {
122 if (missing && missing < 10)
123 {
124 FcCharSet *missed = FcCharSetSubtract (&fcLangCharSets[i].charset,
125 charset);
126 FcChar32 ucs4;
127 FcChar32 map[FC_CHARSET_MAP_SIZE];
128 FcChar32 next;
129
130 printf ("\n%s(%u) ", fcLangCharSets[i].lang, missing);
131 printf ("{");
132 for (ucs4 = FcCharSetFirstPage (missed, map, &next);
133 ucs4 != FC_CHARSET_DONE;
134 ucs4 = FcCharSetNextPage (missed, map, &next))
135 {
136 int i, j;
137 for (i = 0; i < FC_CHARSET_MAP_SIZE; i++)
138 if (map[i])
139 {
140 for (j = 0; j < 32; j++)
141 if (map[i] & (1 << j))
142 printf (" %04x", ucs4 + i * 32 + j);
143 }
144 }
145 printf (" }\n\t");
146 FcCharSetDestroy (missed);
147 }
148 else
149 printf ("%s(%u) ", fcLangCharSets[i].lang, missing);
150 }
151 if (!missing)
152 FcLangSetBitSet (ls, i);
153 }
154
155 if (FcDebug() & FC_DBG_SCANV)
156 printf ("\n");
157
158
159 return ls;
160 }
161
162 #define FcLangEnd(c) ((c) == '-' || (c) == '\0')
163
164 FcLangResult
165 FcLangCompare (const FcChar8 *s1, const FcChar8 *s2)
166 {
167 FcChar8 c1, c2;
168 FcLangResult result = FcLangDifferentLang;
169
170 for (;;)
171 {
172 c1 = *s1++;
173 c2 = *s2++;
174
175 c1 = FcToLower (c1);
176 c2 = FcToLower (c2);
177 if (c1 != c2)
178 {
179 if (FcLangEnd (c1) && FcLangEnd (c2))
180 result = FcLangDifferentTerritory;
181 return result;
182 }
183 else if (!c1)
184 return FcLangEqual;
185 else if (c1 == '-')
186 result = FcLangDifferentTerritory;
187 }
188 }
189
190 /*
191 * Return FcTrue when super contains sub.
192 *
193 * super contains sub if super and sub have the same
194 * language and either the same country or one
195 * is missing the country
196 */
197
198 static FcBool
199 FcLangContains (const FcChar8 *super, const FcChar8 *sub)
200 {
201 FcChar8 c1, c2;
202
203 for (;;)
204 {
205 c1 = *super++;
206 c2 = *sub++;
207
208 c1 = FcToLower (c1);
209 c2 = FcToLower (c2);
210 if (c1 != c2)
211 {
212 /* see if super has a country while sub is mising one */
213 if (c1 == '-' && c2 == '\0')
214 return FcTrue;
215 /* see if sub has a country while super is mising one */
216 if (c1 == '\0' && c2 == '-')
217 return FcTrue;
218 return FcFalse;
219 }
220 else if (!c1)
221 return FcTrue;
222 }
223 }
224
225 const FcCharSet *
226 FcLangGetCharSet (const FcChar8 *lang)
227 {
228 int i;
229 int country = -1;
230
231 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
232 {
233 switch (FcLangCompare (lang, fcLangCharSets[i].lang)) {
234 case FcLangEqual:
235 return &fcLangCharSets[i].charset;
236 case FcLangDifferentTerritory:
237 if (country == -1)
238 country = i;
239 case FcLangDifferentLang:
240 default:
241 break;
242 }
243 }
244 if (country == -1)
245 return 0;
246 return &fcLangCharSets[country].charset;
247 }
248
249 FcStrSet *
250 FcGetLangs (void)
251 {
252 FcStrSet *langs;
253 int i;
254
255 langs = FcStrSetCreate();
256 if (!langs)
257 return 0;
258
259 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
260 FcStrSetAdd (langs, fcLangCharSets[i].lang);
261
262 return langs;
263 }
264
265 FcLangSet *
266 FcLangSetCreate (void)
267 {
268 FcLangSet *ls;
269
270 ls = malloc (sizeof (FcLangSet));
271 if (!ls)
272 return 0;
273 FcMemAlloc (FC_MEM_LANGSET, sizeof (FcLangSet));
274 memset (ls->map, '\0', sizeof (ls->map));
275 ls->map_size = NUM_LANG_SET_MAP;
276 ls->extra = 0;
277 return ls;
278 }
279
280 void
281 FcLangSetDestroy (FcLangSet *ls)
282 {
283 if (ls->extra)
284 FcStrSetDestroy (ls->extra);
285 FcMemFree (FC_MEM_LANGSET, sizeof (FcLangSet));
286 free (ls);
287 }
288
289 FcLangSet *
290 FcLangSetCopy (const FcLangSet *ls)
291 {
292 FcLangSet *new;
293
294 new = FcLangSetCreate ();
295 if (!new)
296 goto bail0;
297 memset (new->map, '\0', sizeof (new->map));
298 memcpy (new->map, ls->map, FC_MIN (sizeof (new->map), ls->map_size * sizeof (ls->map[0])));
299 if (ls->extra)
300 {
301 FcStrList *list;
302 FcChar8 *extra;
303
304 new->extra = FcStrSetCreate ();
305 if (!new->extra)
306 goto bail1;
307
308 list = FcStrListCreate (ls->extra);
309 if (!list)
310 goto bail1;
311
312 while ((extra = FcStrListNext (list)))
313 if (!FcStrSetAdd (new->extra, extra))
314 {
315 FcStrListDone (list);
316 goto bail1;
317 }
318 FcStrListDone (list);
319 }
320 return new;
321 bail1:
322 FcLangSetDestroy (new);
323 bail0:
324 return 0;
325 }
326
327 static int
328 FcLangSetIndex (const FcChar8 *lang)
329 {
330 int low, high, mid = 0;
331 int cmp = 0;
332 FcChar8 firstChar = FcToLower(lang[0]);
333 FcChar8 secondChar = firstChar ? FcToLower(lang[1]) : '\0';
334
335 if (firstChar < 'a')
336 {
337 low = 0;
338 high = fcLangCharSetRanges[0].begin;
339 }
340 else if(firstChar > 'z')
341 {
342 low = fcLangCharSetRanges[25].begin;
343 high = NUM_LANG_CHAR_SET - 1;
344 }
345 else
346 {
347 low = fcLangCharSetRanges[firstChar - 'a'].begin;
348 high = fcLangCharSetRanges[firstChar - 'a'].end;
349 /* no matches */
350 if (low > high)
351 return -low; /* next entry after where it would be */
352 }
353
354 while (low <= high)
355 {
356 mid = (high + low) >> 1;
357 if(fcLangCharSets[mid].lang[0] != firstChar)
358 cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang, lang);
359 else
360 { /* fast path for resolving 2-letter languages (by far the most common) after
361 * finding the first char (probably already true because of the hash table) */
362 cmp = fcLangCharSets[mid].lang[1] - secondChar;
363 if (cmp == 0 &&
364 (fcLangCharSets[mid].lang[2] != '\0' ||
365 lang[2] != '\0'))
366 {
367 cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang+2,
368 lang+2);
369 }
370 }
371 if (cmp == 0)
372 return mid;
373 if (cmp < 0)
374 low = mid + 1;
375 else
376 high = mid - 1;
377 }
378 if (cmp < 0)
379 mid++;
380 return -(mid + 1);
381 }
382
383 FcBool
384 FcLangSetAdd (FcLangSet *ls, const FcChar8 *lang)
385 {
386 int id;
387
388 id = FcLangSetIndex (lang);
389 if (id >= 0)
390 {
391 FcLangSetBitSet (ls, id);
392 return FcTrue;
393 }
394 if (!ls->extra)
395 {
396 ls->extra = FcStrSetCreate ();
397 if (!ls->extra)
398 return FcFalse;
399 }
400 return FcStrSetAdd (ls->extra, lang);
401 }
402
403 FcLangResult
404 FcLangSetHasLang (const FcLangSet *ls, const FcChar8 *lang)
405 {
406 int id;
407 FcLangResult best, r;
408 int i;
409
410 id = FcLangSetIndex (lang);
411 if (id < 0)
412 id = -id - 1;
413 else if (FcLangSetBitGet (ls, id))
414 return FcLangEqual;
415 best = FcLangDifferentLang;
416 for (i = id - 1; i >= 0; i--)
417 {
418 r = FcLangCompare (lang, fcLangCharSets[i].lang);
419 if (r == FcLangDifferentLang)
420 break;
421 if (FcLangSetBitGet (ls, i) && r < best)
422 best = r;
423 }
424 for (i = id; i < NUM_LANG_CHAR_SET; i++)
425 {
426 r = FcLangCompare (lang, fcLangCharSets[i].lang);
427 if (r == FcLangDifferentLang)
428 break;
429 if (FcLangSetBitGet (ls, i) && r < best)
430 best = r;
431 }
432 if (ls->extra)
433 {
434 FcStrList *list = FcStrListCreate (ls->extra);
435 FcChar8 *extra;
436
437 if (list)
438 {
439 while (best > FcLangEqual && (extra = FcStrListNext (list)))
440 {
441 r = FcLangCompare (lang, extra);
442 if (r < best)
443 best = r;
444 }
445 FcStrListDone (list);
446 }
447 }
448 return best;
449 }
450
451 static FcLangResult
452 FcLangSetCompareStrSet (const FcLangSet *ls, FcStrSet *set)
453 {
454 FcStrList *list = FcStrListCreate (set);
455 FcLangResult r, best = FcLangDifferentLang;
456 FcChar8 *extra;
457
458 if (list)
459 {
460 while (best > FcLangEqual && (extra = FcStrListNext (list)))
461 {
462 r = FcLangSetHasLang (ls, extra);
463 if (r < best)
464 best = r;
465 }
466 FcStrListDone (list);
467 }
468 return best;
469 }
470
471 FcLangResult
472 FcLangSetCompare (const FcLangSet *lsa, const FcLangSet *lsb)
473 {
474 int i, j, count;
475 FcLangResult best, r;
476
477 count = FC_MIN (lsa->map_size, lsb->map_size);
478 count = FC_MIN (NUM_LANG_SET_MAP, count);
479 for (i = 0; i < count; i++)
480 if (lsa->map[i] & lsb->map[i])
481 return FcLangEqual;
482 best = FcLangDifferentLang;
483 for (j = 0; j < NUM_COUNTRY_SET; j++)
484 for (i = 0; i < count; i++)
485 if ((lsa->map[i] & fcLangCountrySets[j][i]) &&
486 (lsb->map[i] & fcLangCountrySets[j][i]))
487 {
488 best = FcLangDifferentTerritory;
489 break;
490 }
491 if (lsa->extra)
492 {
493 r = FcLangSetCompareStrSet (lsb, lsa->extra);
494 if (r < best)
495 best = r;
496 }
497 if (best > FcLangEqual && lsb->extra)
498 {
499 r = FcLangSetCompareStrSet (lsa, lsb->extra);
500 if (r < best)
501 best = r;
502 }
503 return best;
504 }
505
506 /*
507 * Used in computing values -- mustn't allocate any storage
508 */
509 FcLangSet *
510 FcLangSetPromote (const FcChar8 *lang)
511 {
512 static FcLangSet ls;
513 static FcStrSet strs;
514 static FcChar8 *str;
515 int id;
516
517 memset (ls.map, '\0', sizeof (ls.map));
518 ls.extra = 0;
519 id = FcLangSetIndex (lang);
520 if (id > 0)
521 {
522 FcLangSetBitSet (&ls, id);
523 }
524 else
525 {
526 ls.extra = &strs;
527 strs.num = 1;
528 strs.size = 1;
529 strs.strs = &str;
530 strs.ref = 1;
531 str = (FcChar8 *) lang;
532 }
533 return &ls;
534 }
535
536 FcChar32
537 FcLangSetHash (const FcLangSet *ls)
538 {
539 FcChar32 h = 0;
540 int i;
541
542 for (i = 0; i < ls->map_size; i++)
543 h ^= ls->map[i];
544 if (ls->extra)
545 h ^= ls->extra->num;
546 return h;
547 }
548
549 FcLangSet *
550 FcNameParseLangSet (const FcChar8 *string)
551 {
552 FcChar8 lang[32], c = 0;
553 int i;
554 FcLangSet *ls;
555
556 ls = FcLangSetCreate ();
557 if (!ls)
558 goto bail0;
559
560 for(;;)
561 {
562 for(i = 0; i < 31;i++)
563 {
564 c = *string++;
565 if(c == '\0' || c == '|')
566 break; /* end of this code */
567 lang[i] = c;
568 }
569 lang[i] = '\0';
570 if (!FcLangSetAdd (ls, lang))
571 goto bail1;
572 if(c == '\0')
573 break;
574 }
575 return ls;
576 bail1:
577 FcLangSetDestroy (ls);
578 bail0:
579 return 0;
580 }
581
582 FcBool
583 FcNameUnparseLangSet (FcStrBuf *buf, const FcLangSet *ls)
584 {
585 int i, bit, count;
586 FcChar32 bits;
587 FcBool first = FcTrue;
588
589 count = FC_MIN (ls->map_size, NUM_LANG_SET_MAP);
590 for (i = 0; i < count; i++)
591 {
592 if ((bits = ls->map[i]))
593 {
594 for (bit = 0; bit <= 31; bit++)
595 if (bits & (1 << bit))
596 {
597 int id = (i << 5) | bit;
598 if (!first)
599 if (!FcStrBufChar (buf, '|'))
600 return FcFalse;
601 if (!FcStrBufString (buf, fcLangCharSets[fcLangCharSetIndicesInv[id]].lang))
602 return FcFalse;
603 first = FcFalse;
604 }
605 }
606 }
607 if (ls->extra)
608 {
609 FcStrList *list = FcStrListCreate (ls->extra);
610 FcChar8 *extra;
611
612 if (!list)
613 return FcFalse;
614 while ((extra = FcStrListNext (list)))
615 {
616 if (!first)
617 if (!FcStrBufChar (buf, '|'))
618 {
619 FcStrListDone (list);
620 return FcFalse;
621 }
622 if (!FcStrBufString (buf, extra))
623 {
624 FcStrListDone (list);
625 return FcFalse;
626 }
627 first = FcFalse;
628 }
629 FcStrListDone (list);
630 }
631 return FcTrue;
632 }
633
634 FcBool
635 FcLangSetEqual (const FcLangSet *lsa, const FcLangSet *lsb)
636 {
637 int i, count;
638
639 count = FC_MIN (lsa->map_size, lsb->map_size);
640 count = FC_MIN (NUM_LANG_SET_MAP, count);
641 for (i = 0; i < count; i++)
642 {
643 if (lsa->map[i] != lsb->map[i])
644 return FcFalse;
645 }
646 if (!lsa->extra && !lsb->extra)
647 return FcTrue;
648 if (lsa->extra && lsb->extra)
649 return FcStrSetEqual (lsa->extra, lsb->extra);
650 return FcFalse;
651 }
652
653 static FcBool
654 FcLangSetContainsLang (const FcLangSet *ls, const FcChar8 *lang)
655 {
656 int id;
657 int i;
658
659 id = FcLangSetIndex (lang);
660 if (id < 0)
661 id = -id - 1;
662 else if (FcLangSetBitGet (ls, id))
663 return FcTrue;
664 /*
665 * search up and down among equal languages for a match
666 */
667 for (i = id - 1; i >= 0; i--)
668 {
669 if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
670 break;
671 if (FcLangSetBitGet (ls, i) &&
672 FcLangContains (fcLangCharSets[i].lang, lang))
673 return FcTrue;
674 }
675 for (i = id; i < NUM_LANG_CHAR_SET; i++)
676 {
677 if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
678 break;
679 if (FcLangSetBitGet (ls, i) &&
680 FcLangContains (fcLangCharSets[i].lang, lang))
681 return FcTrue;
682 }
683 if (ls->extra)
684 {
685 FcStrList *list = FcStrListCreate (ls->extra);
686 FcChar8 *extra;
687
688 if (list)
689 {
690 while ((extra = FcStrListNext (list)))
691 {
692 if (FcLangContains (extra, lang))
693 break;
694 }
695 FcStrListDone (list);
696 if (extra)
697 return FcTrue;
698 }
699 }
700 return FcFalse;
701 }
702
703 /*
704 * return FcTrue if lsa contains every language in lsb
705 */
706 FcBool
707 FcLangSetContains (const FcLangSet *lsa, const FcLangSet *lsb)
708 {
709 int i, j, count;
710 FcChar32 missing;
711
712 if (FcDebug() & FC_DBG_MATCHV)
713 {
714 printf ("FcLangSet "); FcLangSetPrint (lsa);
715 printf (" contains "); FcLangSetPrint (lsb);
716 printf ("\n");
717 }
718 /*
719 * check bitmaps for missing language support
720 */
721 count = FC_MIN (lsa->map_size, lsb->map_size);
722 count = FC_MIN (NUM_LANG_SET_MAP, count);
723 for (i = 0; i < count; i++)
724 {
725 missing = lsb->map[i] & ~lsa->map[i];
726 if (missing)
727 {
728 for (j = 0; j < 32; j++)
729 if (missing & (1 << j))
730 {
731 if (!FcLangSetContainsLang (lsa,
732 fcLangCharSets[fcLangCharSetIndicesInv[i*32 + j]].lang))
733 {
734 if (FcDebug() & FC_DBG_MATCHV)
735 printf ("\tMissing bitmap %s\n", fcLangCharSets[fcLangCharSetIndicesInv[i*32+j]].lang);
736 return FcFalse;
737 }
738 }
739 }
740 }
741 if (lsb->extra)
742 {
743 FcStrList *list = FcStrListCreate (lsb->extra);
744 FcChar8 *extra;
745
746 if (list)
747 {
748 while ((extra = FcStrListNext (list)))
749 {
750 if (!FcLangSetContainsLang (lsa, extra))
751 {
752 if (FcDebug() & FC_DBG_MATCHV)
753 printf ("\tMissing string %s\n", extra);
754 break;
755 }
756 }
757 FcStrListDone (list);
758 if (extra)
759 return FcFalse;
760 }
761 }
762 return FcTrue;
763 }
764
765 FcBool
766 FcLangSetSerializeAlloc (FcSerialize *serialize, const FcLangSet *l)
767 {
768 if (!FcSerializeAlloc (serialize, l, sizeof (FcLangSet)))
769 return FcFalse;
770 return FcTrue;
771 }
772
773 FcLangSet *
774 FcLangSetSerialize(FcSerialize *serialize, const FcLangSet *l)
775 {
776 FcLangSet *l_serialize = FcSerializePtr (serialize, l);
777
778 if (!l_serialize)
779 return NULL;
780 *l_serialize = *l;
781 l_serialize->extra = NULL; /* We don't serialize ls->extra */
782 return l_serialize;
783 }
784
785 FcStrSet *
786 FcLangSetGetLangs (const FcLangSet *ls)
787 {
788 FcStrSet *langs;
789 int i;
790
791 langs = FcStrSetCreate();
792 if (!langs)
793 return 0;
794
795 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
796 if (FcLangSetBitGet (ls, i))
797 FcStrSetAdd (langs, fcLangCharSets[i].lang);
798
799 if (ls->extra)
800 {
801 FcStrList *list = FcStrListCreate (ls->extra);
802 FcChar8 *extra;
803
804 if (list)
805 {
806 while ((extra = FcStrListNext (list)))
807 FcStrSetAdd (langs, extra);
808
809 FcStrListDone (list);
810 }
811 }
812
813 return langs;
814 }
815
816 #define __fclang__
817 #include "fcaliastail.h"
818 #include "fcftaliastail.h"
819 #undef __fclang__