]> git.wh0rd.org - fontconfig.git/blob - src/fclang.c
Overhaul the serialization system to create one mmapable file per directory
[fontconfig.git] / src / fclang.c
1 /*
2 * $RCSId: xc/lib/fontconfig/src/fclang.c,v 1.7 2002/08/26 23:34:31 keithp Exp $
3 *
4 * Copyright © 2002 Keith Packard
5 *
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of Keith Packard not be used in
11 * advertising or publicity pertaining to distribution of the software without
12 * specific, written prior permission. Keith Packard makes no
13 * representations about the suitability of this software for any purpose. It
14 * is provided "as is" without express or implied warranty.
15 *
16 * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
18 * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
19 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
20 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
21 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
22 * PERFORMANCE OF THIS SOFTWARE.
23 */
24
25 #include "fcint.h"
26
27 typedef struct {
28 FcChar8 *lang;
29 FcCharSet charset;
30 } FcLangCharSet;
31
32 typedef struct {
33 int begin;
34 int end;
35 } FcLangCharSetRange;
36
37 #include "../fc-lang/fclang.h"
38
39 struct _FcLangSet {
40 FcChar32 map[NUM_LANG_SET_MAP];
41 FcStrSet *extra;
42 };
43
44 #define FcLangSetBitSet(ls, id) ((ls)->map[(id)>>5] |= ((FcChar32) 1 << ((id) & 0x1f)))
45 #define FcLangSetBitGet(ls, id) (((ls)->map[(id)>>5] >> ((id) & 0x1f)) & 1)
46
47 FcLangSet *
48 FcFreeTypeLangSet (const FcCharSet *charset,
49 const FcChar8 *exclusiveLang)
50 {
51 int i, j;
52 FcChar32 missing;
53 const FcCharSet *exclusiveCharset = 0;
54 FcLangSet *ls;
55
56
57 if (exclusiveLang)
58 exclusiveCharset = FcCharSetForLang (exclusiveLang);
59 ls = FcLangSetCreate ();
60 if (!ls)
61 return 0;
62 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
63 {
64 /*
65 * Check for Han charsets to make fonts
66 * which advertise support for a single language
67 * not support other Han languages
68 */
69 if (exclusiveCharset &&
70 FcFreeTypeIsExclusiveLang (fcLangCharSets[i].lang))
71 {
72 if (fcLangCharSets[i].charset.num != exclusiveCharset->num)
73 continue;
74
75 for (j = 0; j < fcLangCharSets[i].charset.num; j++)
76 if (FcCharSetGetLeaf(&fcLangCharSets[i].charset, j) !=
77 FcCharSetGetLeaf(exclusiveCharset, j))
78 continue;
79 }
80 missing = FcCharSetSubtractCount (&fcLangCharSets[i].charset, charset);
81 if (FcDebug() & FC_DBG_SCANV)
82 {
83 if (missing && missing < 10)
84 {
85 FcCharSet *missed = FcCharSetSubtract (&fcLangCharSets[i].charset,
86 charset);
87 FcChar32 ucs4;
88 FcChar32 map[FC_CHARSET_MAP_SIZE];
89 FcChar32 next;
90
91 printf ("\n%s(%d) ", fcLangCharSets[i].lang, missing);
92 printf ("{");
93 for (ucs4 = FcCharSetFirstPage (missed, map, &next);
94 ucs4 != FC_CHARSET_DONE;
95 ucs4 = FcCharSetNextPage (missed, map, &next))
96 {
97 int i, j;
98 for (i = 0; i < FC_CHARSET_MAP_SIZE; i++)
99 if (map[i])
100 {
101 for (j = 0; j < 32; j++)
102 if (map[i] & (1 << j))
103 printf (" %04x", ucs4 + i * 32 + j);
104 }
105 }
106 printf (" }\n\t");
107 FcCharSetDestroy (missed);
108 }
109 else
110 printf ("%s(%d) ", fcLangCharSets[i].lang, missing);
111 }
112 if (!missing)
113 FcLangSetBitSet (ls, i);
114 }
115
116 if (FcDebug() & FC_DBG_SCANV)
117 printf ("\n");
118
119
120 return ls;
121 }
122
123 #define FcLangEnd(c) ((c) == '-' || (c) == '\0')
124
125 FcLangResult
126 FcLangCompare (const FcChar8 *s1, const FcChar8 *s2)
127 {
128 FcChar8 c1, c2;
129 FcLangResult result = FcLangDifferentLang;
130
131 for (;;)
132 {
133 c1 = *s1++;
134 c2 = *s2++;
135
136 c1 = FcToLower (c1);
137 c2 = FcToLower (c2);
138 if (c1 != c2)
139 {
140 if (FcLangEnd (c1) && FcLangEnd (c2))
141 result = FcLangDifferentCountry;
142 return result;
143 }
144 else if (!c1)
145 return FcLangEqual;
146 else if (c1 == '-')
147 result = FcLangDifferentCountry;
148 }
149 }
150
151 /*
152 * Return FcTrue when super contains sub.
153 *
154 * super contains sub if super and sub have the same
155 * language and either the same country or one
156 * is missing the country
157 */
158
159 static FcBool
160 FcLangContains (const FcChar8 *super, const FcChar8 *sub)
161 {
162 FcChar8 c1, c2;
163
164 for (;;)
165 {
166 c1 = *super++;
167 c2 = *sub++;
168
169 c1 = FcToLower (c1);
170 c2 = FcToLower (c2);
171 if (c1 != c2)
172 {
173 /* see if super has a country while sub is mising one */
174 if (c1 == '-' && c2 == '\0')
175 return FcTrue;
176 /* see if sub has a country while super is mising one */
177 if (c1 == '\0' && c2 == '-')
178 return FcTrue;
179 return FcFalse;
180 }
181 else if (!c1)
182 return FcTrue;
183 }
184 }
185
186 const FcCharSet *
187 FcCharSetForLang (const FcChar8 *lang)
188 {
189 int i;
190 int country = -1;
191 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
192 {
193 switch (FcLangCompare (lang, fcLangCharSets[i].lang)) {
194 case FcLangEqual:
195 return &fcLangCharSets[i].charset;
196 case FcLangDifferentCountry:
197 if (country == -1)
198 country = i;
199 default:
200 break;
201 }
202 }
203 if (country == -1)
204 return 0;
205 return &fcLangCharSets[i].charset;
206 }
207
208 FcLangSet *
209 FcLangSetCreate (void)
210 {
211 FcLangSet *ls;
212
213 ls = malloc (sizeof (FcLangSet));
214 if (!ls)
215 return 0;
216 FcMemAlloc (FC_MEM_LANGSET, sizeof (FcLangSet));
217 memset (ls->map, '\0', sizeof (ls->map));
218 ls->extra = 0;
219 return ls;
220 }
221
222 void
223 FcLangSetDestroy (FcLangSet *ls)
224 {
225 if (ls->extra)
226 FcStrSetDestroy (ls->extra);
227 FcMemFree (FC_MEM_LANGSET, sizeof (FcLangSet));
228 free (ls);
229 }
230
231 FcLangSet *
232 FcLangSetCopy (const FcLangSet *ls)
233 {
234 FcLangSet *new;
235
236 new = FcLangSetCreate ();
237 if (!new)
238 goto bail0;
239 memcpy (new->map, ls->map, sizeof (new->map));
240 if (ls->extra)
241 {
242 FcStrList *list;
243 FcChar8 *extra;
244
245 new->extra = FcStrSetCreate ();
246 if (!new->extra)
247 goto bail1;
248
249 list = FcStrListCreate (ls->extra);
250 if (!list)
251 goto bail1;
252
253 while ((extra = FcStrListNext (list)))
254 if (!FcStrSetAdd (new->extra, extra))
255 {
256 FcStrListDone (list);
257 goto bail1;
258 }
259 FcStrListDone (list);
260 }
261 return new;
262 bail1:
263 FcLangSetDestroy (new);
264 bail0:
265 return 0;
266 }
267
268 static int
269 FcLangSetIndex (const FcChar8 *lang)
270 {
271 int low, high, mid = 0;
272 int cmp = 0;
273 FcChar8 firstChar = FcToLower(lang[0]);
274 FcChar8 secondChar = firstChar ? FcToLower(lang[1]) : '\0';
275
276 if (firstChar < 'a')
277 {
278 low = 0;
279 high = fcLangCharSetRanges[0].begin;
280 }
281 else if(firstChar > 'z')
282 {
283 low = fcLangCharSetRanges[25].begin;
284 high = NUM_LANG_CHAR_SET - 1;
285 }
286 else
287 {
288 low = fcLangCharSetRanges[firstChar - 'a'].begin;
289 high = fcLangCharSetRanges[firstChar - 'a'].end;
290 /* no matches */
291 if (low > high)
292 return -low; /* next entry after where it would be */
293 }
294
295 while (low <= high)
296 {
297 mid = (high + low) >> 1;
298 if(fcLangCharSets[mid].lang[0] != firstChar)
299 cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang, lang);
300 else
301 { /* fast path for resolving 2-letter languages (by far the most common) after
302 * finding the first char (probably already true because of the hash table) */
303 cmp = fcLangCharSets[mid].lang[1] - secondChar;
304 if (cmp == 0 &&
305 (fcLangCharSets[mid].lang[2] != '\0' ||
306 lang[2] != '\0'))
307 {
308 cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang+2,
309 lang+2);
310 }
311 }
312 if (cmp == 0)
313 return mid;
314 if (cmp < 0)
315 low = mid + 1;
316 else
317 high = mid - 1;
318 }
319 if (cmp < 0)
320 mid++;
321 return -(mid + 1);
322 }
323
324 FcBool
325 FcLangSetAdd (FcLangSet *ls, const FcChar8 *lang)
326 {
327 int id;
328
329 id = FcLangSetIndex (lang);
330 if (id >= 0)
331 {
332 FcLangSetBitSet (ls, id);
333 return FcTrue;
334 }
335 if (!ls->extra)
336 {
337 ls->extra = FcStrSetCreate ();
338 if (!ls->extra)
339 return FcFalse;
340 }
341 return FcStrSetAdd (ls->extra, lang);
342 }
343
344 FcLangResult
345 FcLangSetHasLang (const FcLangSet *ls, const FcChar8 *lang)
346 {
347 int id;
348 FcLangResult best, r;
349 int i;
350
351 id = FcLangSetIndex (lang);
352 if (id < 0)
353 id = -id - 1;
354 else if (FcLangSetBitGet (ls, id))
355 return FcLangEqual;
356 best = FcLangDifferentLang;
357 for (i = id - 1; i >= 0; i--)
358 {
359 r = FcLangCompare (lang, fcLangCharSets[i].lang);
360 if (r == FcLangDifferentLang)
361 break;
362 if (FcLangSetBitGet (ls, i) && r < best)
363 best = r;
364 }
365 for (i = id; i < NUM_LANG_CHAR_SET; i++)
366 {
367 r = FcLangCompare (lang, fcLangCharSets[i].lang);
368 if (r == FcLangDifferentLang)
369 break;
370 if (FcLangSetBitGet (ls, i) && r < best)
371 best = r;
372 }
373 if (ls->extra)
374 {
375 FcStrList *list = FcStrListCreate (ls->extra);
376 FcChar8 *extra;
377 FcLangResult r;
378
379 if (list)
380 {
381 while (best > FcLangEqual && (extra = FcStrListNext (list)))
382 {
383 r = FcLangCompare (lang, extra);
384 if (r < best)
385 best = r;
386 }
387 FcStrListDone (list);
388 }
389 }
390 return best;
391 }
392
393 static FcLangResult
394 FcLangSetCompareStrSet (const FcLangSet *ls, FcStrSet *set)
395 {
396 FcStrList *list = FcStrListCreate (set);
397 FcLangResult r, best = FcLangDifferentLang;
398 FcChar8 *extra;
399
400 if (list)
401 {
402 while (best > FcLangEqual && (extra = FcStrListNext (list)))
403 {
404 r = FcLangSetHasLang (ls, extra);
405 if (r < best)
406 best = r;
407 }
408 FcStrListDone (list);
409 }
410 return best;
411 }
412
413 FcLangResult
414 FcLangSetCompare (const FcLangSet *lsa, const FcLangSet *lsb)
415 {
416 int i, j;
417 FcLangResult best, r;
418
419 for (i = 0; i < NUM_LANG_SET_MAP; i++)
420 if (lsa->map[i] & lsb->map[i])
421 return FcLangEqual;
422 best = FcLangDifferentLang;
423 for (j = 0; j < NUM_COUNTRY_SET; j++)
424 for (i = 0; i < NUM_LANG_SET_MAP; i++)
425 if ((lsa->map[i] & fcLangCountrySets[j][i]) &&
426 (lsb->map[i] & fcLangCountrySets[j][i]))
427 {
428 best = FcLangDifferentCountry;
429 break;
430 }
431 if (lsa->extra)
432 {
433 r = FcLangSetCompareStrSet (lsb, lsa->extra);
434 if (r < best)
435 best = r;
436 }
437 if (best > FcLangEqual && lsb->extra)
438 {
439 r = FcLangSetCompareStrSet (lsa, lsb->extra);
440 if (r < best)
441 best = r;
442 }
443 return best;
444 }
445
446 /*
447 * Used in computing values -- mustn't allocate any storage
448 */
449 FcLangSet *
450 FcLangSetPromote (const FcChar8 *lang)
451 {
452 static FcLangSet ls;
453 static FcStrSet strs;
454 static FcChar8 *str;
455 int id;
456
457 memset (ls.map, '\0', sizeof (ls.map));
458 ls.extra = 0;
459 id = FcLangSetIndex (lang);
460 if (id > 0)
461 {
462 FcLangSetBitSet (&ls, id);
463 }
464 else
465 {
466 ls.extra = &strs;
467 strs.num = 1;
468 strs.size = 1;
469 strs.strs = &str;
470 strs.ref = 1;
471 str = (FcChar8 *) lang;
472 }
473 return &ls;
474 }
475
476 FcChar32
477 FcLangSetHash (const FcLangSet *ls)
478 {
479 FcChar32 h = 0;
480 int i;
481
482 for (i = 0; i < NUM_LANG_SET_MAP; i++)
483 h ^= ls->map[i];
484 if (ls->extra)
485 h ^= ls->extra->num;
486 return h;
487 }
488
489 FcLangSet *
490 FcNameParseLangSet (const FcChar8 *string)
491 {
492 FcChar8 lang[32],c;
493 int i;
494 FcLangSet *ls;
495
496 ls = FcLangSetCreate ();
497 if (!ls)
498 goto bail0;
499
500 for(;;)
501 {
502 for(i = 0; i < 31;i++)
503 {
504 c = *string++;
505 if(c == '\0' || c == '|')
506 break; /* end of this code */
507 lang[i] = c;
508 }
509 lang[i] = '\0';
510 if (!FcLangSetAdd (ls, lang))
511 goto bail1;
512 if(c == '\0')
513 break;
514 }
515 return ls;
516 bail1:
517 FcLangSetDestroy (ls);
518 bail0:
519 return 0;
520 }
521
522 FcBool
523 FcNameUnparseLangSet (FcStrBuf *buf, const FcLangSet *ls)
524 {
525 int i, bit;
526 FcChar32 bits;
527 FcBool first = FcTrue;
528
529 for (i = 0; i < NUM_LANG_SET_MAP; i++)
530 {
531 if ((bits = ls->map[i]))
532 {
533 for (bit = 0; bit <= 31; bit++)
534 if (bits & (1 << bit))
535 {
536 int id = (i << 5) | bit;
537 if (!first)
538 if (!FcStrBufChar (buf, '|'))
539 return FcFalse;
540 if (!FcStrBufString (buf, fcLangCharSets[id].lang))
541 return FcFalse;
542 first = FcFalse;
543 }
544 }
545 }
546 if (ls->extra)
547 {
548 FcStrList *list = FcStrListCreate (ls->extra);
549 FcChar8 *extra;
550
551 if (!list)
552 return FcFalse;
553 while ((extra = FcStrListNext (list)))
554 {
555 if (!first)
556 if (!FcStrBufChar (buf, '|'))
557 return FcFalse;
558 if (!FcStrBufString (buf, extra))
559 return FcFalse;
560 first = FcFalse;
561 }
562 }
563 return FcTrue;
564 }
565
566 FcBool
567 FcLangSetEqual (const FcLangSet *lsa, const FcLangSet *lsb)
568 {
569 int i;
570
571 for (i = 0; i < NUM_LANG_SET_MAP; i++)
572 {
573 if (lsa->map[i] != lsb->map[i])
574 return FcFalse;
575 }
576 if (!lsa->extra && !lsb->extra)
577 return FcTrue;
578 if (lsa->extra && lsb->extra)
579 return FcStrSetEqual (lsa->extra, lsb->extra);
580 return FcFalse;
581 }
582
583 static FcBool
584 FcLangSetContainsLang (const FcLangSet *ls, const FcChar8 *lang)
585 {
586 int id;
587 int i;
588
589 id = FcLangSetIndex (lang);
590 if (id < 0)
591 id = -id - 1;
592 else if (FcLangSetBitGet (ls, id))
593 return FcTrue;
594 /*
595 * search up and down among equal languages for a match
596 */
597 for (i = id - 1; i >= 0; i--)
598 {
599 if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
600 break;
601 if (FcLangSetBitGet (ls, i) &&
602 FcLangContains (fcLangCharSets[i].lang, lang))
603 return FcTrue;
604 }
605 for (i = id; i < NUM_LANG_CHAR_SET; i++)
606 {
607 if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
608 break;
609 if (FcLangSetBitGet (ls, i) &&
610 FcLangContains (fcLangCharSets[i].lang, lang))
611 return FcTrue;
612 }
613 if (ls->extra)
614 {
615 FcStrList *list = FcStrListCreate (ls->extra);
616 FcChar8 *extra;
617
618 if (list)
619 {
620 while ((extra = FcStrListNext (list)))
621 {
622 if (FcLangContains (extra, lang))
623 break;
624 }
625 FcStrListDone (list);
626 if (extra)
627 return FcTrue;
628 }
629 }
630 return FcFalse;
631 }
632
633 /*
634 * return FcTrue if lsa contains every language in lsb
635 */
636 FcBool
637 FcLangSetContains (const FcLangSet *lsa, const FcLangSet *lsb)
638 {
639 int i, j;
640 FcChar32 missing;
641
642 if (FcDebug() & FC_DBG_MATCHV)
643 {
644 printf ("FcLangSet "); FcLangSetPrint (lsa);
645 printf (" contains "); FcLangSetPrint (lsb);
646 printf ("\n");
647 }
648 /*
649 * check bitmaps for missing language support
650 */
651 for (i = 0; i < NUM_LANG_SET_MAP; i++)
652 {
653 missing = lsb->map[i] & ~lsa->map[i];
654 if (missing)
655 {
656 for (j = 0; j < 32; j++)
657 if (missing & (1 << j))
658 {
659 if (!FcLangSetContainsLang (lsa,
660 fcLangCharSets[i*32 + j].lang))
661 {
662 if (FcDebug() & FC_DBG_MATCHV)
663 printf ("\tMissing bitmap %s\n", fcLangCharSets[i*32+j].lang);
664 return FcFalse;
665 }
666 }
667 }
668 }
669 if (lsb->extra)
670 {
671 FcStrList *list = FcStrListCreate (lsb->extra);
672 FcChar8 *extra;
673
674 if (list)
675 {
676 while ((extra = FcStrListNext (list)))
677 {
678 if (!FcLangSetContainsLang (lsa, extra))
679 {
680 if (FcDebug() & FC_DBG_MATCHV)
681 printf ("\tMissing string %s\n", extra);
682 break;
683 }
684 }
685 FcStrListDone (list);
686 if (extra)
687 return FcFalse;
688 }
689 }
690 return FcTrue;
691 }
692
693 static FcLangSet ** langsets = 0;
694 static int langset_bank_count = 0, langset_ptr = 0, langset_count = 0;
695
696 void
697 FcLangSetNewBank (void)
698 {
699 langset_count = 0;
700 }
701
702 /* ideally, should only write one copy of any particular FcLangSet */
703 int
704 FcLangSetNeededBytes (const FcLangSet *l)
705 {
706 langset_count++;
707 return sizeof (FcLangSet);
708 }
709
710 static FcBool
711 FcLangSetEnsureBank (int bi)
712 {
713 if (!langsets || bi >= langset_bank_count)
714 {
715 int new_count = langset_bank_count + 2;
716 int i;
717 FcLangSet** tt;
718 tt = realloc(langsets, new_count * sizeof(FcLangSet *));
719 if (!tt)
720 return FcFalse;
721
722 langsets = tt;
723 for (i = langset_bank_count; i < new_count; i++)
724 langsets[i] = 0;
725 langset_bank_count = new_count;
726 }
727
728 return FcTrue;
729 }
730
731 void *
732 FcLangSetDistributeBytes (FcCache * metadata, void * block_ptr)
733 {
734 int bi = FcCacheBankToIndex(metadata->bank);
735 if (!FcLangSetEnsureBank(bi))
736 return 0;
737
738 langsets[bi] = block_ptr;
739 block_ptr = (void *)((char *)block_ptr +
740 langset_count * sizeof(FcLangSet));
741 langset_ptr = 0;
742 metadata->langset_count = langset_count;
743 return block_ptr;
744 }
745
746 FcLangSet *
747 FcLangSetSerialize(int bank, FcLangSet *l)
748 {
749 int p = langset_ptr, bi = FcCacheBankToIndex(bank);
750
751 if (!l) return 0;
752
753 langsets[bi][langset_ptr] = *l;
754 langsets[bi][langset_ptr].extra = 0;
755 langset_ptr++;
756 return &langsets[bi][p];
757 }
758
759 void *
760 FcLangSetUnserialize (FcCache metadata, void *block_ptr)
761 {
762 int bi = FcCacheBankToIndex(metadata.bank);
763 if (!FcLangSetEnsureBank(bi))
764 return 0;
765
766 FcMemAlloc (FC_MEM_LANGSET, metadata.langset_count * sizeof(FcLangSet));
767 langsets[bi] = (FcLangSet *)block_ptr;
768 block_ptr = (void *)((char *)block_ptr +
769 metadata.langset_count * sizeof(FcLangSet));
770 return block_ptr;
771 }