]> git.wh0rd.org - fontconfig.git/blob - src/fclang.c
Add FcGetLangs() and FcLangGetCharSet().
[fontconfig.git] / src / fclang.c
1 /*
2 * $RCSId: xc/lib/fontconfig/src/fclang.c,v 1.7 2002/08/26 23:34:31 keithp Exp $
3 *
4 * Copyright © 2002 Keith Packard
5 *
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of Keith Packard not be used in
11 * advertising or publicity pertaining to distribution of the software without
12 * specific, written prior permission. Keith Packard makes no
13 * representations about the suitability of this software for any purpose. It
14 * is provided "as is" without express or implied warranty.
15 *
16 * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
18 * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
19 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
20 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
21 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
22 * PERFORMANCE OF THIS SOFTWARE.
23 */
24
25 #include "fcint.h"
26
27 typedef struct {
28 const FcChar8 lang[8];
29 const FcCharSet charset;
30 } FcLangCharSet;
31
32 typedef struct {
33 int begin;
34 int end;
35 } FcLangCharSetRange;
36
37 #include "../fc-lang/fclang.h"
38
39 struct _FcLangSet {
40 FcChar32 map[NUM_LANG_SET_MAP];
41 FcStrSet *extra;
42 };
43
44 #define FcLangSetBitSet(ls, id) ((ls)->map[(id)>>5] |= ((FcChar32) 1 << ((id) & 0x1f)))
45 #define FcLangSetBitGet(ls, id) (((ls)->map[(id)>>5] >> ((id) & 0x1f)) & 1)
46
47 FcLangSet *
48 FcFreeTypeLangSet (const FcCharSet *charset,
49 const FcChar8 *exclusiveLang)
50 {
51 int i, j;
52 FcChar32 missing;
53 const FcCharSet *exclusiveCharset = 0;
54 FcLangSet *ls;
55
56 if (exclusiveLang)
57 exclusiveCharset = FcLangGetCharSet (exclusiveLang);
58 ls = FcLangSetCreate ();
59 if (!ls)
60 return 0;
61 if (FcDebug() & FC_DBG_LANGSET)
62 {
63 printf ("font charset\n");
64 FcCharSetPrint (charset);
65 printf ("\n");
66 }
67 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
68 {
69 if (FcDebug() & FC_DBG_LANGSET)
70 {
71 printf ("%s charset\n", fcLangCharSets[i].lang);
72 FcCharSetPrint (&fcLangCharSets[i].charset);
73 printf ("\n");
74 }
75
76 /*
77 * Check for Han charsets to make fonts
78 * which advertise support for a single language
79 * not support other Han languages
80 */
81 if (exclusiveCharset &&
82 FcFreeTypeIsExclusiveLang (fcLangCharSets[i].lang))
83 {
84 if (fcLangCharSets[i].charset.num != exclusiveCharset->num)
85 continue;
86
87 for (j = 0; j < fcLangCharSets[i].charset.num; j++)
88 if (FcCharSetLeaf(&fcLangCharSets[i].charset, j) !=
89 FcCharSetLeaf(exclusiveCharset, j))
90 continue;
91 }
92 missing = FcCharSetSubtractCount (&fcLangCharSets[i].charset, charset);
93 if (FcDebug() & FC_DBG_SCANV)
94 {
95 if (missing && missing < 10)
96 {
97 FcCharSet *missed = FcCharSetSubtract (&fcLangCharSets[i].charset,
98 charset);
99 FcChar32 ucs4;
100 FcChar32 map[FC_CHARSET_MAP_SIZE];
101 FcChar32 next;
102
103 printf ("\n%s(%u) ", fcLangCharSets[i].lang, missing);
104 printf ("{");
105 for (ucs4 = FcCharSetFirstPage (missed, map, &next);
106 ucs4 != FC_CHARSET_DONE;
107 ucs4 = FcCharSetNextPage (missed, map, &next))
108 {
109 int i, j;
110 for (i = 0; i < FC_CHARSET_MAP_SIZE; i++)
111 if (map[i])
112 {
113 for (j = 0; j < 32; j++)
114 if (map[i] & (1 << j))
115 printf (" %04x", ucs4 + i * 32 + j);
116 }
117 }
118 printf (" }\n\t");
119 FcCharSetDestroy (missed);
120 }
121 else
122 printf ("%s(%u) ", fcLangCharSets[i].lang, missing);
123 }
124 if (!missing)
125 FcLangSetBitSet (ls, i);
126 }
127
128 if (FcDebug() & FC_DBG_SCANV)
129 printf ("\n");
130
131
132 return ls;
133 }
134
135 #define FcLangEnd(c) ((c) == '-' || (c) == '\0')
136
137 FcLangResult
138 FcLangCompare (const FcChar8 *s1, const FcChar8 *s2)
139 {
140 FcChar8 c1, c2;
141 FcLangResult result = FcLangDifferentLang;
142
143 for (;;)
144 {
145 c1 = *s1++;
146 c2 = *s2++;
147
148 c1 = FcToLower (c1);
149 c2 = FcToLower (c2);
150 if (c1 != c2)
151 {
152 if (FcLangEnd (c1) && FcLangEnd (c2))
153 result = FcLangDifferentTerritory;
154 return result;
155 }
156 else if (!c1)
157 return FcLangEqual;
158 else if (c1 == '-')
159 result = FcLangDifferentTerritory;
160 }
161 }
162
163 /*
164 * Return FcTrue when super contains sub.
165 *
166 * super contains sub if super and sub have the same
167 * language and either the same country or one
168 * is missing the country
169 */
170
171 static FcBool
172 FcLangContains (const FcChar8 *super, const FcChar8 *sub)
173 {
174 FcChar8 c1, c2;
175
176 for (;;)
177 {
178 c1 = *super++;
179 c2 = *sub++;
180
181 c1 = FcToLower (c1);
182 c2 = FcToLower (c2);
183 if (c1 != c2)
184 {
185 /* see if super has a country while sub is mising one */
186 if (c1 == '-' && c2 == '\0')
187 return FcTrue;
188 /* see if sub has a country while super is mising one */
189 if (c1 == '\0' && c2 == '-')
190 return FcTrue;
191 return FcFalse;
192 }
193 else if (!c1)
194 return FcTrue;
195 }
196 }
197
198 const FcCharSet *
199 FcLangGetCharSet (const FcChar8 *lang)
200 {
201 int i;
202 int country = -1;
203
204 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
205 {
206 switch (FcLangCompare (lang, fcLangCharSets[i].lang)) {
207 case FcLangEqual:
208 return &fcLangCharSets[i].charset;
209 case FcLangDifferentTerritory:
210 if (country == -1)
211 country = i;
212 default:
213 break;
214 }
215 }
216 if (country == -1)
217 return 0;
218 return &fcLangCharSets[country].charset;
219 }
220
221 FcStrSet *
222 FcGetLangs (void)
223 {
224 FcStrSet *langs;
225 int i;
226
227 langs = FcStrSetCreate();
228 if (!langs)
229 return 0;
230
231 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
232 FcStrSetAdd (langs, fcLangCharSets[i].lang);
233
234 return langs;
235 }
236
237 FcLangSet *
238 FcLangSetCreate (void)
239 {
240 FcLangSet *ls;
241
242 ls = malloc (sizeof (FcLangSet));
243 if (!ls)
244 return 0;
245 FcMemAlloc (FC_MEM_LANGSET, sizeof (FcLangSet));
246 memset (ls->map, '\0', sizeof (ls->map));
247 ls->extra = 0;
248 return ls;
249 }
250
251 void
252 FcLangSetDestroy (FcLangSet *ls)
253 {
254 if (ls->extra)
255 FcStrSetDestroy (ls->extra);
256 FcMemFree (FC_MEM_LANGSET, sizeof (FcLangSet));
257 free (ls);
258 }
259
260 FcLangSet *
261 FcLangSetCopy (const FcLangSet *ls)
262 {
263 FcLangSet *new;
264
265 new = FcLangSetCreate ();
266 if (!new)
267 goto bail0;
268 memcpy (new->map, ls->map, sizeof (new->map));
269 if (ls->extra)
270 {
271 FcStrList *list;
272 FcChar8 *extra;
273
274 new->extra = FcStrSetCreate ();
275 if (!new->extra)
276 goto bail1;
277
278 list = FcStrListCreate (ls->extra);
279 if (!list)
280 goto bail1;
281
282 while ((extra = FcStrListNext (list)))
283 if (!FcStrSetAdd (new->extra, extra))
284 {
285 FcStrListDone (list);
286 goto bail1;
287 }
288 FcStrListDone (list);
289 }
290 return new;
291 bail1:
292 FcLangSetDestroy (new);
293 bail0:
294 return 0;
295 }
296
297 static int
298 FcLangSetIndex (const FcChar8 *lang)
299 {
300 int low, high, mid = 0;
301 int cmp = 0;
302 FcChar8 firstChar = FcToLower(lang[0]);
303 FcChar8 secondChar = firstChar ? FcToLower(lang[1]) : '\0';
304
305 if (firstChar < 'a')
306 {
307 low = 0;
308 high = fcLangCharSetRanges[0].begin;
309 }
310 else if(firstChar > 'z')
311 {
312 low = fcLangCharSetRanges[25].begin;
313 high = NUM_LANG_CHAR_SET - 1;
314 }
315 else
316 {
317 low = fcLangCharSetRanges[firstChar - 'a'].begin;
318 high = fcLangCharSetRanges[firstChar - 'a'].end;
319 /* no matches */
320 if (low > high)
321 return -low; /* next entry after where it would be */
322 }
323
324 while (low <= high)
325 {
326 mid = (high + low) >> 1;
327 if(fcLangCharSets[mid].lang[0] != firstChar)
328 cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang, lang);
329 else
330 { /* fast path for resolving 2-letter languages (by far the most common) after
331 * finding the first char (probably already true because of the hash table) */
332 cmp = fcLangCharSets[mid].lang[1] - secondChar;
333 if (cmp == 0 &&
334 (fcLangCharSets[mid].lang[2] != '\0' ||
335 lang[2] != '\0'))
336 {
337 cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang+2,
338 lang+2);
339 }
340 }
341 if (cmp == 0)
342 return mid;
343 if (cmp < 0)
344 low = mid + 1;
345 else
346 high = mid - 1;
347 }
348 if (cmp < 0)
349 mid++;
350 return -(mid + 1);
351 }
352
353 FcBool
354 FcLangSetAdd (FcLangSet *ls, const FcChar8 *lang)
355 {
356 int id;
357
358 id = FcLangSetIndex (lang);
359 if (id >= 0)
360 {
361 FcLangSetBitSet (ls, id);
362 return FcTrue;
363 }
364 if (!ls->extra)
365 {
366 ls->extra = FcStrSetCreate ();
367 if (!ls->extra)
368 return FcFalse;
369 }
370 return FcStrSetAdd (ls->extra, lang);
371 }
372
373 FcLangResult
374 FcLangSetHasLang (const FcLangSet *ls, const FcChar8 *lang)
375 {
376 int id;
377 FcLangResult best, r;
378 int i;
379
380 id = FcLangSetIndex (lang);
381 if (id < 0)
382 id = -id - 1;
383 else if (FcLangSetBitGet (ls, id))
384 return FcLangEqual;
385 best = FcLangDifferentLang;
386 for (i = id - 1; i >= 0; i--)
387 {
388 r = FcLangCompare (lang, fcLangCharSets[i].lang);
389 if (r == FcLangDifferentLang)
390 break;
391 if (FcLangSetBitGet (ls, i) && r < best)
392 best = r;
393 }
394 for (i = id; i < NUM_LANG_CHAR_SET; i++)
395 {
396 r = FcLangCompare (lang, fcLangCharSets[i].lang);
397 if (r == FcLangDifferentLang)
398 break;
399 if (FcLangSetBitGet (ls, i) && r < best)
400 best = r;
401 }
402 if (ls->extra)
403 {
404 FcStrList *list = FcStrListCreate (ls->extra);
405 FcChar8 *extra;
406
407 if (list)
408 {
409 while (best > FcLangEqual && (extra = FcStrListNext (list)))
410 {
411 r = FcLangCompare (lang, extra);
412 if (r < best)
413 best = r;
414 }
415 FcStrListDone (list);
416 }
417 }
418 return best;
419 }
420
421 static FcLangResult
422 FcLangSetCompareStrSet (const FcLangSet *ls, FcStrSet *set)
423 {
424 FcStrList *list = FcStrListCreate (set);
425 FcLangResult r, best = FcLangDifferentLang;
426 FcChar8 *extra;
427
428 if (list)
429 {
430 while (best > FcLangEqual && (extra = FcStrListNext (list)))
431 {
432 r = FcLangSetHasLang (ls, extra);
433 if (r < best)
434 best = r;
435 }
436 FcStrListDone (list);
437 }
438 return best;
439 }
440
441 FcLangResult
442 FcLangSetCompare (const FcLangSet *lsa, const FcLangSet *lsb)
443 {
444 int i, j;
445 FcLangResult best, r;
446
447 for (i = 0; i < NUM_LANG_SET_MAP; i++)
448 if (lsa->map[i] & lsb->map[i])
449 return FcLangEqual;
450 best = FcLangDifferentLang;
451 for (j = 0; j < NUM_COUNTRY_SET; j++)
452 for (i = 0; i < NUM_LANG_SET_MAP; i++)
453 if ((lsa->map[i] & fcLangCountrySets[j][i]) &&
454 (lsb->map[i] & fcLangCountrySets[j][i]))
455 {
456 best = FcLangDifferentTerritory;
457 break;
458 }
459 if (lsa->extra)
460 {
461 r = FcLangSetCompareStrSet (lsb, lsa->extra);
462 if (r < best)
463 best = r;
464 }
465 if (best > FcLangEqual && lsb->extra)
466 {
467 r = FcLangSetCompareStrSet (lsa, lsb->extra);
468 if (r < best)
469 best = r;
470 }
471 return best;
472 }
473
474 /*
475 * Used in computing values -- mustn't allocate any storage
476 */
477 FcLangSet *
478 FcLangSetPromote (const FcChar8 *lang)
479 {
480 static FcLangSet ls;
481 static FcStrSet strs;
482 static FcChar8 *str;
483 int id;
484
485 memset (ls.map, '\0', sizeof (ls.map));
486 ls.extra = 0;
487 id = FcLangSetIndex (lang);
488 if (id > 0)
489 {
490 FcLangSetBitSet (&ls, id);
491 }
492 else
493 {
494 ls.extra = &strs;
495 strs.num = 1;
496 strs.size = 1;
497 strs.strs = &str;
498 strs.ref = 1;
499 str = (FcChar8 *) lang;
500 }
501 return &ls;
502 }
503
504 FcChar32
505 FcLangSetHash (const FcLangSet *ls)
506 {
507 FcChar32 h = 0;
508 int i;
509
510 for (i = 0; i < NUM_LANG_SET_MAP; i++)
511 h ^= ls->map[i];
512 if (ls->extra)
513 h ^= ls->extra->num;
514 return h;
515 }
516
517 FcLangSet *
518 FcNameParseLangSet (const FcChar8 *string)
519 {
520 FcChar8 lang[32],c;
521 int i;
522 FcLangSet *ls;
523
524 ls = FcLangSetCreate ();
525 if (!ls)
526 goto bail0;
527
528 for(;;)
529 {
530 for(i = 0; i < 31;i++)
531 {
532 c = *string++;
533 if(c == '\0' || c == '|')
534 break; /* end of this code */
535 lang[i] = c;
536 }
537 lang[i] = '\0';
538 if (!FcLangSetAdd (ls, lang))
539 goto bail1;
540 if(c == '\0')
541 break;
542 }
543 return ls;
544 bail1:
545 FcLangSetDestroy (ls);
546 bail0:
547 return 0;
548 }
549
550 FcBool
551 FcNameUnparseLangSet (FcStrBuf *buf, const FcLangSet *ls)
552 {
553 int i, bit;
554 FcChar32 bits;
555 FcBool first = FcTrue;
556
557 for (i = 0; i < NUM_LANG_SET_MAP; i++)
558 {
559 if ((bits = ls->map[i]))
560 {
561 for (bit = 0; bit <= 31; bit++)
562 if (bits & (1 << bit))
563 {
564 int id = (i << 5) | bit;
565 if (!first)
566 if (!FcStrBufChar (buf, '|'))
567 return FcFalse;
568 if (!FcStrBufString (buf, fcLangCharSets[id].lang))
569 return FcFalse;
570 first = FcFalse;
571 }
572 }
573 }
574 if (ls->extra)
575 {
576 FcStrList *list = FcStrListCreate (ls->extra);
577 FcChar8 *extra;
578
579 if (!list)
580 return FcFalse;
581 while ((extra = FcStrListNext (list)))
582 {
583 if (!first)
584 if (!FcStrBufChar (buf, '|'))
585 {
586 FcStrListDone (list);
587 return FcFalse;
588 }
589 if (!FcStrBufString (buf, extra))
590 {
591 FcStrListDone (list);
592 return FcFalse;
593 }
594 first = FcFalse;
595 }
596 FcStrListDone (list);
597 }
598 return FcTrue;
599 }
600
601 FcBool
602 FcLangSetEqual (const FcLangSet *lsa, const FcLangSet *lsb)
603 {
604 int i;
605
606 for (i = 0; i < NUM_LANG_SET_MAP; i++)
607 {
608 if (lsa->map[i] != lsb->map[i])
609 return FcFalse;
610 }
611 if (!lsa->extra && !lsb->extra)
612 return FcTrue;
613 if (lsa->extra && lsb->extra)
614 return FcStrSetEqual (lsa->extra, lsb->extra);
615 return FcFalse;
616 }
617
618 static FcBool
619 FcLangSetContainsLang (const FcLangSet *ls, const FcChar8 *lang)
620 {
621 int id;
622 int i;
623
624 id = FcLangSetIndex (lang);
625 if (id < 0)
626 id = -id - 1;
627 else if (FcLangSetBitGet (ls, id))
628 return FcTrue;
629 /*
630 * search up and down among equal languages for a match
631 */
632 for (i = id - 1; i >= 0; i--)
633 {
634 if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
635 break;
636 if (FcLangSetBitGet (ls, i) &&
637 FcLangContains (fcLangCharSets[i].lang, lang))
638 return FcTrue;
639 }
640 for (i = id; i < NUM_LANG_CHAR_SET; i++)
641 {
642 if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
643 break;
644 if (FcLangSetBitGet (ls, i) &&
645 FcLangContains (fcLangCharSets[i].lang, lang))
646 return FcTrue;
647 }
648 if (ls->extra)
649 {
650 FcStrList *list = FcStrListCreate (ls->extra);
651 FcChar8 *extra;
652
653 if (list)
654 {
655 while ((extra = FcStrListNext (list)))
656 {
657 if (FcLangContains (extra, lang))
658 break;
659 }
660 FcStrListDone (list);
661 if (extra)
662 return FcTrue;
663 }
664 }
665 return FcFalse;
666 }
667
668 /*
669 * return FcTrue if lsa contains every language in lsb
670 */
671 FcBool
672 FcLangSetContains (const FcLangSet *lsa, const FcLangSet *lsb)
673 {
674 int i, j;
675 FcChar32 missing;
676
677 if (FcDebug() & FC_DBG_MATCHV)
678 {
679 printf ("FcLangSet "); FcLangSetPrint (lsa);
680 printf (" contains "); FcLangSetPrint (lsb);
681 printf ("\n");
682 }
683 /*
684 * check bitmaps for missing language support
685 */
686 for (i = 0; i < NUM_LANG_SET_MAP; i++)
687 {
688 missing = lsb->map[i] & ~lsa->map[i];
689 if (missing)
690 {
691 for (j = 0; j < 32; j++)
692 if (missing & (1 << j))
693 {
694 if (!FcLangSetContainsLang (lsa,
695 fcLangCharSets[i*32 + j].lang))
696 {
697 if (FcDebug() & FC_DBG_MATCHV)
698 printf ("\tMissing bitmap %s\n", fcLangCharSets[i*32+j].lang);
699 return FcFalse;
700 }
701 }
702 }
703 }
704 if (lsb->extra)
705 {
706 FcStrList *list = FcStrListCreate (lsb->extra);
707 FcChar8 *extra;
708
709 if (list)
710 {
711 while ((extra = FcStrListNext (list)))
712 {
713 if (!FcLangSetContainsLang (lsa, extra))
714 {
715 if (FcDebug() & FC_DBG_MATCHV)
716 printf ("\tMissing string %s\n", extra);
717 break;
718 }
719 }
720 FcStrListDone (list);
721 if (extra)
722 return FcFalse;
723 }
724 }
725 return FcTrue;
726 }
727
728 FcBool
729 FcLangSetSerializeAlloc (FcSerialize *serialize, const FcLangSet *l)
730 {
731 if (!FcSerializeAlloc (serialize, l, sizeof (FcLangSet)))
732 return FcFalse;
733 return FcTrue;
734 }
735
736 FcLangSet *
737 FcLangSetSerialize(FcSerialize *serialize, const FcLangSet *l)
738 {
739 FcLangSet *l_serialize = FcSerializePtr (serialize, l);
740
741 if (!l_serialize)
742 return NULL;
743 *l_serialize = *l;
744 return l_serialize;
745 }
746 #define __fclang__
747 #include "fcaliastail.h"
748 #undef __fclang__