]> git.wh0rd.org - fontconfig.git/blob - src/fclang.c
Use FcLangDifferentTerritory instead of FcLangDifferentCountry.
[fontconfig.git] / src / fclang.c
1 /*
2 * $RCSId: xc/lib/fontconfig/src/fclang.c,v 1.7 2002/08/26 23:34:31 keithp Exp $
3 *
4 * Copyright © 2002 Keith Packard
5 *
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of Keith Packard not be used in
11 * advertising or publicity pertaining to distribution of the software without
12 * specific, written prior permission. Keith Packard makes no
13 * representations about the suitability of this software for any purpose. It
14 * is provided "as is" without express or implied warranty.
15 *
16 * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
18 * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
19 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
20 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
21 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
22 * PERFORMANCE OF THIS SOFTWARE.
23 */
24
25 #include "fcint.h"
26
27 typedef struct {
28 const FcChar8 lang[8];
29 const FcCharSet charset;
30 } FcLangCharSet;
31
32 typedef struct {
33 int begin;
34 int end;
35 } FcLangCharSetRange;
36
37 #include "../fc-lang/fclang.h"
38
39 struct _FcLangSet {
40 FcChar32 map[NUM_LANG_SET_MAP];
41 FcStrSet *extra;
42 };
43
44 #define FcLangSetBitSet(ls, id) ((ls)->map[(id)>>5] |= ((FcChar32) 1 << ((id) & 0x1f)))
45 #define FcLangSetBitGet(ls, id) (((ls)->map[(id)>>5] >> ((id) & 0x1f)) & 1)
46
47 FcLangSet *
48 FcFreeTypeLangSet (const FcCharSet *charset,
49 const FcChar8 *exclusiveLang)
50 {
51 int i, j;
52 FcChar32 missing;
53 const FcCharSet *exclusiveCharset = 0;
54 FcLangSet *ls;
55
56 if (exclusiveLang)
57 exclusiveCharset = FcCharSetForLang (exclusiveLang);
58 ls = FcLangSetCreate ();
59 if (!ls)
60 return 0;
61 if (FcDebug() & FC_DBG_LANGSET)
62 {
63 printf ("font charset\n");
64 FcCharSetPrint (charset);
65 printf ("\n");
66 }
67 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
68 {
69 if (FcDebug() & FC_DBG_LANGSET)
70 {
71 printf ("%s charset\n", fcLangCharSets[i].lang);
72 FcCharSetPrint (&fcLangCharSets[i].charset);
73 printf ("\n");
74 }
75
76 /*
77 * Check for Han charsets to make fonts
78 * which advertise support for a single language
79 * not support other Han languages
80 */
81 if (exclusiveCharset &&
82 FcFreeTypeIsExclusiveLang (fcLangCharSets[i].lang))
83 {
84 if (fcLangCharSets[i].charset.num != exclusiveCharset->num)
85 continue;
86
87 for (j = 0; j < fcLangCharSets[i].charset.num; j++)
88 if (FcCharSetLeaf(&fcLangCharSets[i].charset, j) !=
89 FcCharSetLeaf(exclusiveCharset, j))
90 continue;
91 }
92 missing = FcCharSetSubtractCount (&fcLangCharSets[i].charset, charset);
93 if (FcDebug() & FC_DBG_SCANV)
94 {
95 if (missing && missing < 10)
96 {
97 FcCharSet *missed = FcCharSetSubtract (&fcLangCharSets[i].charset,
98 charset);
99 FcChar32 ucs4;
100 FcChar32 map[FC_CHARSET_MAP_SIZE];
101 FcChar32 next;
102
103 printf ("\n%s(%u) ", fcLangCharSets[i].lang, missing);
104 printf ("{");
105 for (ucs4 = FcCharSetFirstPage (missed, map, &next);
106 ucs4 != FC_CHARSET_DONE;
107 ucs4 = FcCharSetNextPage (missed, map, &next))
108 {
109 int i, j;
110 for (i = 0; i < FC_CHARSET_MAP_SIZE; i++)
111 if (map[i])
112 {
113 for (j = 0; j < 32; j++)
114 if (map[i] & (1 << j))
115 printf (" %04x", ucs4 + i * 32 + j);
116 }
117 }
118 printf (" }\n\t");
119 FcCharSetDestroy (missed);
120 }
121 else
122 printf ("%s(%u) ", fcLangCharSets[i].lang, missing);
123 }
124 if (!missing)
125 FcLangSetBitSet (ls, i);
126 }
127
128 if (FcDebug() & FC_DBG_SCANV)
129 printf ("\n");
130
131
132 return ls;
133 }
134
135 #define FcLangEnd(c) ((c) == '-' || (c) == '\0')
136
137 FcLangResult
138 FcLangCompare (const FcChar8 *s1, const FcChar8 *s2)
139 {
140 FcChar8 c1, c2;
141 FcLangResult result = FcLangDifferentLang;
142
143 for (;;)
144 {
145 c1 = *s1++;
146 c2 = *s2++;
147
148 c1 = FcToLower (c1);
149 c2 = FcToLower (c2);
150 if (c1 != c2)
151 {
152 if (FcLangEnd (c1) && FcLangEnd (c2))
153 result = FcLangDifferentTerritory;
154 return result;
155 }
156 else if (!c1)
157 return FcLangEqual;
158 else if (c1 == '-')
159 result = FcLangDifferentTerritory;
160 }
161 }
162
163 /*
164 * Return FcTrue when super contains sub.
165 *
166 * super contains sub if super and sub have the same
167 * language and either the same country or one
168 * is missing the country
169 */
170
171 static FcBool
172 FcLangContains (const FcChar8 *super, const FcChar8 *sub)
173 {
174 FcChar8 c1, c2;
175
176 for (;;)
177 {
178 c1 = *super++;
179 c2 = *sub++;
180
181 c1 = FcToLower (c1);
182 c2 = FcToLower (c2);
183 if (c1 != c2)
184 {
185 /* see if super has a country while sub is mising one */
186 if (c1 == '-' && c2 == '\0')
187 return FcTrue;
188 /* see if sub has a country while super is mising one */
189 if (c1 == '\0' && c2 == '-')
190 return FcTrue;
191 return FcFalse;
192 }
193 else if (!c1)
194 return FcTrue;
195 }
196 }
197
198 const FcCharSet *
199 FcCharSetForLang (const FcChar8 *lang)
200 {
201 int i;
202 int country = -1;
203
204 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
205 {
206 switch (FcLangCompare (lang, fcLangCharSets[i].lang)) {
207 case FcLangEqual:
208 return &fcLangCharSets[i].charset;
209 case FcLangDifferentTerritory:
210 if (country == -1)
211 country = i;
212 default:
213 break;
214 }
215 }
216 if (country == -1)
217 return 0;
218 return &fcLangCharSets[country].charset;
219 }
220
221 FcLangSet *
222 FcLangSetCreate (void)
223 {
224 FcLangSet *ls;
225
226 ls = malloc (sizeof (FcLangSet));
227 if (!ls)
228 return 0;
229 FcMemAlloc (FC_MEM_LANGSET, sizeof (FcLangSet));
230 memset (ls->map, '\0', sizeof (ls->map));
231 ls->extra = 0;
232 return ls;
233 }
234
235 void
236 FcLangSetDestroy (FcLangSet *ls)
237 {
238 if (ls->extra)
239 FcStrSetDestroy (ls->extra);
240 FcMemFree (FC_MEM_LANGSET, sizeof (FcLangSet));
241 free (ls);
242 }
243
244 FcLangSet *
245 FcLangSetCopy (const FcLangSet *ls)
246 {
247 FcLangSet *new;
248
249 new = FcLangSetCreate ();
250 if (!new)
251 goto bail0;
252 memcpy (new->map, ls->map, sizeof (new->map));
253 if (ls->extra)
254 {
255 FcStrList *list;
256 FcChar8 *extra;
257
258 new->extra = FcStrSetCreate ();
259 if (!new->extra)
260 goto bail1;
261
262 list = FcStrListCreate (ls->extra);
263 if (!list)
264 goto bail1;
265
266 while ((extra = FcStrListNext (list)))
267 if (!FcStrSetAdd (new->extra, extra))
268 {
269 FcStrListDone (list);
270 goto bail1;
271 }
272 FcStrListDone (list);
273 }
274 return new;
275 bail1:
276 FcLangSetDestroy (new);
277 bail0:
278 return 0;
279 }
280
281 static int
282 FcLangSetIndex (const FcChar8 *lang)
283 {
284 int low, high, mid = 0;
285 int cmp = 0;
286 FcChar8 firstChar = FcToLower(lang[0]);
287 FcChar8 secondChar = firstChar ? FcToLower(lang[1]) : '\0';
288
289 if (firstChar < 'a')
290 {
291 low = 0;
292 high = fcLangCharSetRanges[0].begin;
293 }
294 else if(firstChar > 'z')
295 {
296 low = fcLangCharSetRanges[25].begin;
297 high = NUM_LANG_CHAR_SET - 1;
298 }
299 else
300 {
301 low = fcLangCharSetRanges[firstChar - 'a'].begin;
302 high = fcLangCharSetRanges[firstChar - 'a'].end;
303 /* no matches */
304 if (low > high)
305 return -low; /* next entry after where it would be */
306 }
307
308 while (low <= high)
309 {
310 mid = (high + low) >> 1;
311 if(fcLangCharSets[mid].lang[0] != firstChar)
312 cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang, lang);
313 else
314 { /* fast path for resolving 2-letter languages (by far the most common) after
315 * finding the first char (probably already true because of the hash table) */
316 cmp = fcLangCharSets[mid].lang[1] - secondChar;
317 if (cmp == 0 &&
318 (fcLangCharSets[mid].lang[2] != '\0' ||
319 lang[2] != '\0'))
320 {
321 cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang+2,
322 lang+2);
323 }
324 }
325 if (cmp == 0)
326 return mid;
327 if (cmp < 0)
328 low = mid + 1;
329 else
330 high = mid - 1;
331 }
332 if (cmp < 0)
333 mid++;
334 return -(mid + 1);
335 }
336
337 FcBool
338 FcLangSetAdd (FcLangSet *ls, const FcChar8 *lang)
339 {
340 int id;
341
342 id = FcLangSetIndex (lang);
343 if (id >= 0)
344 {
345 FcLangSetBitSet (ls, id);
346 return FcTrue;
347 }
348 if (!ls->extra)
349 {
350 ls->extra = FcStrSetCreate ();
351 if (!ls->extra)
352 return FcFalse;
353 }
354 return FcStrSetAdd (ls->extra, lang);
355 }
356
357 FcLangResult
358 FcLangSetHasLang (const FcLangSet *ls, const FcChar8 *lang)
359 {
360 int id;
361 FcLangResult best, r;
362 int i;
363
364 id = FcLangSetIndex (lang);
365 if (id < 0)
366 id = -id - 1;
367 else if (FcLangSetBitGet (ls, id))
368 return FcLangEqual;
369 best = FcLangDifferentLang;
370 for (i = id - 1; i >= 0; i--)
371 {
372 r = FcLangCompare (lang, fcLangCharSets[i].lang);
373 if (r == FcLangDifferentLang)
374 break;
375 if (FcLangSetBitGet (ls, i) && r < best)
376 best = r;
377 }
378 for (i = id; i < NUM_LANG_CHAR_SET; i++)
379 {
380 r = FcLangCompare (lang, fcLangCharSets[i].lang);
381 if (r == FcLangDifferentLang)
382 break;
383 if (FcLangSetBitGet (ls, i) && r < best)
384 best = r;
385 }
386 if (ls->extra)
387 {
388 FcStrList *list = FcStrListCreate (ls->extra);
389 FcChar8 *extra;
390
391 if (list)
392 {
393 while (best > FcLangEqual && (extra = FcStrListNext (list)))
394 {
395 r = FcLangCompare (lang, extra);
396 if (r < best)
397 best = r;
398 }
399 FcStrListDone (list);
400 }
401 }
402 return best;
403 }
404
405 static FcLangResult
406 FcLangSetCompareStrSet (const FcLangSet *ls, FcStrSet *set)
407 {
408 FcStrList *list = FcStrListCreate (set);
409 FcLangResult r, best = FcLangDifferentLang;
410 FcChar8 *extra;
411
412 if (list)
413 {
414 while (best > FcLangEqual && (extra = FcStrListNext (list)))
415 {
416 r = FcLangSetHasLang (ls, extra);
417 if (r < best)
418 best = r;
419 }
420 FcStrListDone (list);
421 }
422 return best;
423 }
424
425 FcLangResult
426 FcLangSetCompare (const FcLangSet *lsa, const FcLangSet *lsb)
427 {
428 int i, j;
429 FcLangResult best, r;
430
431 for (i = 0; i < NUM_LANG_SET_MAP; i++)
432 if (lsa->map[i] & lsb->map[i])
433 return FcLangEqual;
434 best = FcLangDifferentLang;
435 for (j = 0; j < NUM_COUNTRY_SET; j++)
436 for (i = 0; i < NUM_LANG_SET_MAP; i++)
437 if ((lsa->map[i] & fcLangCountrySets[j][i]) &&
438 (lsb->map[i] & fcLangCountrySets[j][i]))
439 {
440 best = FcLangDifferentTerritory;
441 break;
442 }
443 if (lsa->extra)
444 {
445 r = FcLangSetCompareStrSet (lsb, lsa->extra);
446 if (r < best)
447 best = r;
448 }
449 if (best > FcLangEqual && lsb->extra)
450 {
451 r = FcLangSetCompareStrSet (lsa, lsb->extra);
452 if (r < best)
453 best = r;
454 }
455 return best;
456 }
457
458 /*
459 * Used in computing values -- mustn't allocate any storage
460 */
461 FcLangSet *
462 FcLangSetPromote (const FcChar8 *lang)
463 {
464 static FcLangSet ls;
465 static FcStrSet strs;
466 static FcChar8 *str;
467 int id;
468
469 memset (ls.map, '\0', sizeof (ls.map));
470 ls.extra = 0;
471 id = FcLangSetIndex (lang);
472 if (id > 0)
473 {
474 FcLangSetBitSet (&ls, id);
475 }
476 else
477 {
478 ls.extra = &strs;
479 strs.num = 1;
480 strs.size = 1;
481 strs.strs = &str;
482 strs.ref = 1;
483 str = (FcChar8 *) lang;
484 }
485 return &ls;
486 }
487
488 FcChar32
489 FcLangSetHash (const FcLangSet *ls)
490 {
491 FcChar32 h = 0;
492 int i;
493
494 for (i = 0; i < NUM_LANG_SET_MAP; i++)
495 h ^= ls->map[i];
496 if (ls->extra)
497 h ^= ls->extra->num;
498 return h;
499 }
500
501 FcLangSet *
502 FcNameParseLangSet (const FcChar8 *string)
503 {
504 FcChar8 lang[32],c;
505 int i;
506 FcLangSet *ls;
507
508 ls = FcLangSetCreate ();
509 if (!ls)
510 goto bail0;
511
512 for(;;)
513 {
514 for(i = 0; i < 31;i++)
515 {
516 c = *string++;
517 if(c == '\0' || c == '|')
518 break; /* end of this code */
519 lang[i] = c;
520 }
521 lang[i] = '\0';
522 if (!FcLangSetAdd (ls, lang))
523 goto bail1;
524 if(c == '\0')
525 break;
526 }
527 return ls;
528 bail1:
529 FcLangSetDestroy (ls);
530 bail0:
531 return 0;
532 }
533
534 FcBool
535 FcNameUnparseLangSet (FcStrBuf *buf, const FcLangSet *ls)
536 {
537 int i, bit;
538 FcChar32 bits;
539 FcBool first = FcTrue;
540
541 for (i = 0; i < NUM_LANG_SET_MAP; i++)
542 {
543 if ((bits = ls->map[i]))
544 {
545 for (bit = 0; bit <= 31; bit++)
546 if (bits & (1 << bit))
547 {
548 int id = (i << 5) | bit;
549 if (!first)
550 if (!FcStrBufChar (buf, '|'))
551 return FcFalse;
552 if (!FcStrBufString (buf, fcLangCharSets[id].lang))
553 return FcFalse;
554 first = FcFalse;
555 }
556 }
557 }
558 if (ls->extra)
559 {
560 FcStrList *list = FcStrListCreate (ls->extra);
561 FcChar8 *extra;
562
563 if (!list)
564 return FcFalse;
565 while ((extra = FcStrListNext (list)))
566 {
567 if (!first)
568 if (!FcStrBufChar (buf, '|'))
569 {
570 FcStrListDone (list);
571 return FcFalse;
572 }
573 if (!FcStrBufString (buf, extra))
574 {
575 FcStrListDone (list);
576 return FcFalse;
577 }
578 first = FcFalse;
579 }
580 FcStrListDone (list);
581 }
582 return FcTrue;
583 }
584
585 FcBool
586 FcLangSetEqual (const FcLangSet *lsa, const FcLangSet *lsb)
587 {
588 int i;
589
590 for (i = 0; i < NUM_LANG_SET_MAP; i++)
591 {
592 if (lsa->map[i] != lsb->map[i])
593 return FcFalse;
594 }
595 if (!lsa->extra && !lsb->extra)
596 return FcTrue;
597 if (lsa->extra && lsb->extra)
598 return FcStrSetEqual (lsa->extra, lsb->extra);
599 return FcFalse;
600 }
601
602 static FcBool
603 FcLangSetContainsLang (const FcLangSet *ls, const FcChar8 *lang)
604 {
605 int id;
606 int i;
607
608 id = FcLangSetIndex (lang);
609 if (id < 0)
610 id = -id - 1;
611 else if (FcLangSetBitGet (ls, id))
612 return FcTrue;
613 /*
614 * search up and down among equal languages for a match
615 */
616 for (i = id - 1; i >= 0; i--)
617 {
618 if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
619 break;
620 if (FcLangSetBitGet (ls, i) &&
621 FcLangContains (fcLangCharSets[i].lang, lang))
622 return FcTrue;
623 }
624 for (i = id; i < NUM_LANG_CHAR_SET; i++)
625 {
626 if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
627 break;
628 if (FcLangSetBitGet (ls, i) &&
629 FcLangContains (fcLangCharSets[i].lang, lang))
630 return FcTrue;
631 }
632 if (ls->extra)
633 {
634 FcStrList *list = FcStrListCreate (ls->extra);
635 FcChar8 *extra;
636
637 if (list)
638 {
639 while ((extra = FcStrListNext (list)))
640 {
641 if (FcLangContains (extra, lang))
642 break;
643 }
644 FcStrListDone (list);
645 if (extra)
646 return FcTrue;
647 }
648 }
649 return FcFalse;
650 }
651
652 /*
653 * return FcTrue if lsa contains every language in lsb
654 */
655 FcBool
656 FcLangSetContains (const FcLangSet *lsa, const FcLangSet *lsb)
657 {
658 int i, j;
659 FcChar32 missing;
660
661 if (FcDebug() & FC_DBG_MATCHV)
662 {
663 printf ("FcLangSet "); FcLangSetPrint (lsa);
664 printf (" contains "); FcLangSetPrint (lsb);
665 printf ("\n");
666 }
667 /*
668 * check bitmaps for missing language support
669 */
670 for (i = 0; i < NUM_LANG_SET_MAP; i++)
671 {
672 missing = lsb->map[i] & ~lsa->map[i];
673 if (missing)
674 {
675 for (j = 0; j < 32; j++)
676 if (missing & (1 << j))
677 {
678 if (!FcLangSetContainsLang (lsa,
679 fcLangCharSets[i*32 + j].lang))
680 {
681 if (FcDebug() & FC_DBG_MATCHV)
682 printf ("\tMissing bitmap %s\n", fcLangCharSets[i*32+j].lang);
683 return FcFalse;
684 }
685 }
686 }
687 }
688 if (lsb->extra)
689 {
690 FcStrList *list = FcStrListCreate (lsb->extra);
691 FcChar8 *extra;
692
693 if (list)
694 {
695 while ((extra = FcStrListNext (list)))
696 {
697 if (!FcLangSetContainsLang (lsa, extra))
698 {
699 if (FcDebug() & FC_DBG_MATCHV)
700 printf ("\tMissing string %s\n", extra);
701 break;
702 }
703 }
704 FcStrListDone (list);
705 if (extra)
706 return FcFalse;
707 }
708 }
709 return FcTrue;
710 }
711
712 FcBool
713 FcLangSetSerializeAlloc (FcSerialize *serialize, const FcLangSet *l)
714 {
715 if (!FcSerializeAlloc (serialize, l, sizeof (FcLangSet)))
716 return FcFalse;
717 return FcTrue;
718 }
719
720 FcLangSet *
721 FcLangSetSerialize(FcSerialize *serialize, const FcLangSet *l)
722 {
723 FcLangSet *l_serialize = FcSerializePtr (serialize, l);
724
725 if (!l_serialize)
726 return NULL;
727 *l_serialize = *l;
728 return l_serialize;
729 }
730 #define __fclang__
731 #include "fcaliastail.h"
732 #undef __fclang__