]> git.wh0rd.org - fontconfig.git/blob - src/fclang.c
Optimization in FcLangSetIndex was broken, occasionally returning a pointer
[fontconfig.git] / src / fclang.c
1 /*
2 * $RCSId: xc/lib/fontconfig/src/fclang.c,v 1.7 2002/08/26 23:34:31 keithp Exp $
3 *
4 * Copyright © 2002 Keith Packard
5 *
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of Keith Packard not be used in
11 * advertising or publicity pertaining to distribution of the software without
12 * specific, written prior permission. Keith Packard makes no
13 * representations about the suitability of this software for any purpose. It
14 * is provided "as is" without express or implied warranty.
15 *
16 * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
18 * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
19 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
20 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
21 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
22 * PERFORMANCE OF THIS SOFTWARE.
23 */
24
25 #include "fcint.h"
26
27 typedef struct {
28 FcChar8 *lang;
29 FcCharSet charset;
30 } FcLangCharSet;
31
32 typedef struct {
33 int begin;
34 int end;
35 } FcLangCharSetRange;
36
37 #include "../fc-lang/fclang.h"
38
39 struct _FcLangSet {
40 FcChar32 map[NUM_LANG_SET_MAP];
41 FcStrSet *extra;
42 };
43
44 #define FcLangSetBitSet(ls, id) ((ls)->map[(id)>>5] |= ((FcChar32) 1 << ((id) & 0x1f)))
45 #define FcLangSetBitGet(ls, id) (((ls)->map[(id)>>5] >> ((id) & 0x1f)) & 1)
46
47 FcLangSet *
48 FcFreeTypeLangSet (const FcCharSet *charset,
49 const FcChar8 *exclusiveLang)
50 {
51 int i;
52 FcChar32 missing;
53 const FcCharSet *exclusiveCharset = 0;
54 FcLangSet *ls;
55
56
57 if (exclusiveLang)
58 exclusiveCharset = FcCharSetForLang (exclusiveLang);
59 ls = FcLangSetCreate ();
60 if (!ls)
61 return 0;
62 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
63 {
64 /*
65 * Check for Han charsets to make fonts
66 * which advertise support for a single language
67 * not support other Han languages
68 */
69 if (exclusiveCharset &&
70 FcFreeTypeIsExclusiveLang (fcLangCharSets[i].lang) &&
71 fcLangCharSets[i].charset.leaves != exclusiveCharset->leaves)
72 {
73 continue;
74 }
75 missing = FcCharSetSubtractCount (&fcLangCharSets[i].charset, charset);
76 if (FcDebug() & FC_DBG_SCANV)
77 {
78 if (missing && missing < 10)
79 {
80 FcCharSet *missed = FcCharSetSubtract (&fcLangCharSets[i].charset,
81 charset);
82 FcChar32 ucs4;
83 FcChar32 map[FC_CHARSET_MAP_SIZE];
84 FcChar32 next;
85
86 printf ("\n%s(%d) ", fcLangCharSets[i].lang, missing);
87 printf ("{");
88 for (ucs4 = FcCharSetFirstPage (missed, map, &next);
89 ucs4 != FC_CHARSET_DONE;
90 ucs4 = FcCharSetNextPage (missed, map, &next))
91 {
92 int i, j;
93 for (i = 0; i < FC_CHARSET_MAP_SIZE; i++)
94 if (map[i])
95 {
96 for (j = 0; j < 32; j++)
97 if (map[i] & (1 << j))
98 printf (" %04x", ucs4 + i * 32 + j);
99 }
100 }
101 printf (" }\n\t");
102 FcCharSetDestroy (missed);
103 }
104 else
105 printf ("%s(%d) ", fcLangCharSets[i].lang, missing);
106 }
107 if (!missing)
108 FcLangSetBitSet (ls, i);
109 }
110
111 if (FcDebug() & FC_DBG_SCANV)
112 printf ("\n");
113
114
115 return ls;
116 }
117
118 #define FcLangEnd(c) ((c) == '-' || (c) == '\0')
119
120 FcLangResult
121 FcLangCompare (const FcChar8 *s1, const FcChar8 *s2)
122 {
123 FcChar8 c1, c2;
124 FcLangResult result = FcLangDifferentLang;
125
126 for (;;)
127 {
128 c1 = *s1++;
129 c2 = *s2++;
130
131 c1 = FcToLower (c1);
132 c2 = FcToLower (c2);
133 if (c1 != c2)
134 {
135 if (FcLangEnd (c1) && FcLangEnd (c2))
136 result = FcLangDifferentCountry;
137 return result;
138 }
139 else if (!c1)
140 return FcLangEqual;
141 else if (c1 == '-')
142 result = FcLangDifferentCountry;
143 }
144 }
145
146 /*
147 * Return FcTrue when s1 contains s2.
148 *
149 * s1 contains s2 if s1 equals s2 or if s1 is a
150 * language with a country and s2 is just a language
151 */
152
153 static FcBool
154 FcLangContains (const FcChar8 *s1, const FcChar8 *s2)
155 {
156 FcChar8 c1, c2;
157
158 for (;;)
159 {
160 c1 = *s1++;
161 c2 = *s2++;
162
163 c1 = FcToLower (c1);
164 c2 = FcToLower (c2);
165 if (c1 != c2)
166 {
167 /* see if s1 has a country while s2 is mising one */
168 if (c1 == '-' && c2 == '\0')
169 return FcTrue;
170 return FcFalse;
171 }
172 else if (!c1)
173 return FcTrue;
174 }
175 }
176
177 const FcCharSet *
178 FcCharSetForLang (const FcChar8 *lang)
179 {
180 int i;
181 int country = -1;
182 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
183 {
184 switch (FcLangCompare (lang, fcLangCharSets[i].lang)) {
185 case FcLangEqual:
186 return &fcLangCharSets[i].charset;
187 case FcLangDifferentCountry:
188 if (country == -1)
189 country = i;
190 default:
191 break;
192 }
193 }
194 if (country == -1)
195 return 0;
196 return &fcLangCharSets[i].charset;
197 }
198
199 FcLangSet *
200 FcLangSetCreate (void)
201 {
202 FcLangSet *ls;
203
204 ls = malloc (sizeof (FcLangSet));
205 if (!ls)
206 return 0;
207 FcMemAlloc (FC_MEM_LANGSET, sizeof (FcLangSet));
208 memset (ls->map, '\0', sizeof (ls->map));
209 ls->extra = 0;
210 return ls;
211 }
212
213 void
214 FcLangSetDestroy (FcLangSet *ls)
215 {
216 if (ls->extra)
217 FcStrSetDestroy (ls->extra);
218 FcMemFree (FC_MEM_LANGSET, sizeof (FcLangSet));
219 free (ls);
220 }
221
222 FcLangSet *
223 FcLangSetCopy (const FcLangSet *ls)
224 {
225 FcLangSet *new;
226
227 new = FcLangSetCreate ();
228 if (!new)
229 goto bail0;
230 memcpy (new->map, ls->map, sizeof (new->map));
231 if (ls->extra)
232 {
233 FcStrList *list;
234 FcChar8 *extra;
235
236 new->extra = FcStrSetCreate ();
237 if (!new->extra)
238 goto bail1;
239
240 list = FcStrListCreate (ls->extra);
241 if (!list)
242 goto bail1;
243
244 while ((extra = FcStrListNext (list)))
245 if (!FcStrSetAdd (new->extra, extra))
246 {
247 FcStrListDone (list);
248 goto bail1;
249 }
250 FcStrListDone (list);
251 }
252 return new;
253 bail1:
254 FcLangSetDestroy (new);
255 bail0:
256 return 0;
257 }
258
259 static int
260 FcLangSetIndex (const FcChar8 *lang)
261 {
262 int low, high, mid = 0;
263 int cmp = 0;
264 FcChar8 firstChar = FcToLower(lang[0]);
265 FcChar8 secondChar = firstChar ? FcToLower(lang[1]) : '\0';
266
267 if (firstChar < 'a')
268 {
269 low = 0;
270 high = fcLangCharSetRanges[0].begin;
271 }
272 else if(firstChar > 'z')
273 {
274 low = fcLangCharSetRanges[25].begin;
275 high = NUM_LANG_CHAR_SET - 1;
276 }
277 else
278 {
279 low = fcLangCharSetRanges[firstChar - 'a'].begin;
280 high = fcLangCharSetRanges[firstChar - 'a'].end;
281 /* no matches */
282 if (low > high)
283 return -low; /* next entry after where it would be */
284 }
285
286 while (low <= high)
287 {
288 mid = (high + low) >> 1;
289 if(fcLangCharSets[mid].lang[0] != firstChar)
290 cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang, lang);
291 else
292 { /* fast path for resolving 2-letter languages (by far the most common) after
293 * finding the first char (probably already true because of the hash table) */
294 cmp = fcLangCharSets[mid].lang[1] - secondChar;
295 if (cmp == 0 &&
296 (fcLangCharSets[mid].lang[2] != '\0' ||
297 lang[2] != '\0'))
298 {
299 cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang+2,
300 lang+2);
301 }
302 }
303 if (cmp == 0)
304 return mid;
305 if (cmp < 0)
306 low = mid + 1;
307 else
308 high = mid - 1;
309 }
310 if (cmp < 0)
311 mid++;
312 return -(mid + 1);
313 }
314
315 FcBool
316 FcLangSetAdd (FcLangSet *ls, const FcChar8 *lang)
317 {
318 int id;
319
320 id = FcLangSetIndex (lang);
321 if (id >= 0)
322 {
323 FcLangSetBitSet (ls, id);
324 return FcTrue;
325 }
326 if (!ls->extra)
327 {
328 ls->extra = FcStrSetCreate ();
329 if (!ls->extra)
330 return FcFalse;
331 }
332 return FcStrSetAdd (ls->extra, lang);
333 }
334
335 FcLangResult
336 FcLangSetHasLang (const FcLangSet *ls, const FcChar8 *lang)
337 {
338 int id;
339 FcLangResult best, r;
340 int i;
341
342 id = FcLangSetIndex (lang);
343 if (id < 0)
344 id = -id - 1;
345 else if (FcLangSetBitGet (ls, id))
346 return FcLangEqual;
347 best = FcLangDifferentLang;
348 for (i = id - 1; i >= 0; i--)
349 {
350 r = FcLangCompare (lang, fcLangCharSets[i].lang);
351 if (r == FcLangDifferentLang)
352 break;
353 if (FcLangSetBitGet (ls, i) && r < best)
354 best = r;
355 }
356 for (i = id; i < NUM_LANG_CHAR_SET; i++)
357 {
358 r = FcLangCompare (lang, fcLangCharSets[i].lang);
359 if (r == FcLangDifferentLang)
360 break;
361 if (FcLangSetBitGet (ls, i) && r < best)
362 best = r;
363 }
364 if (ls->extra)
365 {
366 FcStrList *list = FcStrListCreate (ls->extra);
367 FcChar8 *extra;
368 FcLangResult r;
369
370 if (list)
371 {
372 while (best > FcLangEqual && (extra = FcStrListNext (list)))
373 {
374 r = FcLangCompare (lang, extra);
375 if (r < best)
376 best = r;
377 }
378 FcStrListDone (list);
379 }
380 }
381 return best;
382 }
383
384 static FcLangResult
385 FcLangSetCompareStrSet (const FcLangSet *ls, FcStrSet *set)
386 {
387 FcStrList *list = FcStrListCreate (set);
388 FcLangResult r, best = FcLangDifferentLang;
389 FcChar8 *extra;
390
391 if (list)
392 {
393 while (best > FcLangEqual && (extra = FcStrListNext (list)))
394 {
395 r = FcLangSetHasLang (ls, extra);
396 if (r < best)
397 best = r;
398 }
399 FcStrListDone (list);
400 }
401 return best;
402 }
403
404 FcLangResult
405 FcLangSetCompare (const FcLangSet *lsa, const FcLangSet *lsb)
406 {
407 int i, j;
408 FcLangResult best, r;
409
410 for (i = 0; i < NUM_LANG_SET_MAP; i++)
411 if (lsa->map[i] & lsb->map[i])
412 return FcLangEqual;
413 best = FcLangDifferentLang;
414 for (j = 0; j < NUM_COUNTRY_SET; j++)
415 for (i = 0; i < NUM_LANG_SET_MAP; i++)
416 if ((lsa->map[i] & fcLangCountrySets[j][i]) &&
417 (lsb->map[i] & fcLangCountrySets[j][i]))
418 {
419 best = FcLangDifferentCountry;
420 break;
421 }
422 if (lsa->extra)
423 {
424 r = FcLangSetCompareStrSet (lsb, lsa->extra);
425 if (r < best)
426 best = r;
427 }
428 if (best > FcLangEqual && lsb->extra)
429 {
430 r = FcLangSetCompareStrSet (lsa, lsb->extra);
431 if (r < best)
432 best = r;
433 }
434 return best;
435 }
436
437 /*
438 * Used in computing values -- mustn't allocate any storage
439 */
440 FcLangSet *
441 FcLangSetPromote (const FcChar8 *lang)
442 {
443 static FcLangSet ls;
444 static FcStrSet strs;
445 static FcChar8 *str;
446 int id;
447
448 memset (ls.map, '\0', sizeof (ls.map));
449 ls.extra = 0;
450 id = FcLangSetIndex (lang);
451 if (id > 0)
452 {
453 FcLangSetBitSet (&ls, id);
454 }
455 else
456 {
457 ls.extra = &strs;
458 strs.num = 1;
459 strs.size = 1;
460 strs.strs = &str;
461 strs.ref = 1;
462 str = (FcChar8 *) lang;
463 }
464 return &ls;
465 }
466
467 FcChar32
468 FcLangSetHash (const FcLangSet *ls)
469 {
470 FcChar32 h = 0;
471 int i;
472
473 for (i = 0; i < NUM_LANG_SET_MAP; i++)
474 h ^= ls->map[i];
475 if (ls->extra)
476 h ^= ls->extra->num;
477 return h;
478 }
479
480 FcLangSet *
481 FcNameParseLangSet (const FcChar8 *string)
482 {
483 FcChar8 lang[32],c;
484 int i;
485 FcLangSet *ls;
486
487 ls = FcLangSetCreate ();
488 if (!ls)
489 goto bail0;
490
491 for(;;)
492 {
493 for(i = 0; i < 31;i++)
494 {
495 c = *string++;
496 if(c == '\0' || c == '|')
497 break; /* end of this code */
498 lang[i] = c;
499 }
500 lang[i] = '\0';
501 if (!FcLangSetAdd (ls, lang))
502 goto bail1;
503 if(c == '\0')
504 break;
505 }
506 return ls;
507 bail1:
508 FcLangSetDestroy (ls);
509 bail0:
510 return 0;
511 }
512
513 FcBool
514 FcNameUnparseLangSet (FcStrBuf *buf, const FcLangSet *ls)
515 {
516 int i, bit;
517 FcChar32 bits;
518 FcBool first = FcTrue;
519
520 for (i = 0; i < NUM_LANG_SET_MAP; i++)
521 {
522 if ((bits = ls->map[i]))
523 {
524 for (bit = 0; bit <= 31; bit++)
525 if (bits & (1 << bit))
526 {
527 int id = (i << 5) | bit;
528 if (!first)
529 if (!FcStrBufChar (buf, '|'))
530 return FcFalse;
531 if (!FcStrBufString (buf, fcLangCharSets[id].lang))
532 return FcFalse;
533 first = FcFalse;
534 }
535 }
536 }
537 if (ls->extra)
538 {
539 FcStrList *list = FcStrListCreate (ls->extra);
540 FcChar8 *extra;
541
542 if (!list)
543 return FcFalse;
544 while ((extra = FcStrListNext (list)))
545 {
546 if (!first)
547 if (!FcStrBufChar (buf, '|'))
548 return FcFalse;
549 if (!FcStrBufString (buf, extra))
550 return FcFalse;
551 first = FcFalse;
552 }
553 }
554 return FcTrue;
555 }
556
557 FcBool
558 FcLangSetEqual (const FcLangSet *lsa, const FcLangSet *lsb)
559 {
560 int i;
561
562 for (i = 0; i < NUM_LANG_SET_MAP; i++)
563 {
564 if (lsa->map[i] != lsb->map[i])
565 return FcFalse;
566 }
567 if (!lsa->extra && !lsb->extra)
568 return FcTrue;
569 if (lsa->extra && lsb->extra)
570 return FcStrSetEqual (lsa->extra, lsb->extra);
571 return FcFalse;
572 }
573
574 static FcBool
575 FcLangSetContainsLang (const FcLangSet *ls, const FcChar8 *lang)
576 {
577 int id;
578 int i;
579
580 id = FcLangSetIndex (lang);
581 if (id < 0)
582 id = -id - 1;
583 else if (FcLangSetBitGet (ls, id))
584 return FcTrue;
585 /*
586 * search up and down among equal languages for a match
587 */
588 for (i = id - 1; i >= 0; i--)
589 {
590 if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
591 break;
592 if (FcLangSetBitGet (ls, i) &&
593 FcLangContains (fcLangCharSets[i].lang, lang))
594 return FcTrue;
595 }
596 for (i = id; i < NUM_LANG_CHAR_SET; i++)
597 {
598 if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
599 break;
600 if (FcLangSetBitGet (ls, i) &&
601 FcLangContains (fcLangCharSets[i].lang, lang))
602 return FcTrue;
603 }
604 if (ls->extra)
605 {
606 FcStrList *list = FcStrListCreate (ls->extra);
607 FcChar8 *extra;
608
609 if (list)
610 {
611 while ((extra = FcStrListNext (list)))
612 {
613 if (FcLangContains (extra, lang))
614 break;
615 }
616 FcStrListDone (list);
617 if (extra)
618 return FcTrue;
619 }
620 }
621 return FcFalse;
622 }
623
624 /*
625 * return FcTrue if lsa contains every language in lsb
626 */
627 FcBool
628 FcLangSetContains (const FcLangSet *lsa, const FcLangSet *lsb)
629 {
630 int i, j;
631 FcChar32 missing;
632
633 if (FcDebug() & FC_DBG_MATCHV)
634 {
635 printf ("FcLangSet "); FcLangSetPrint (lsa);
636 printf (" contains "); FcLangSetPrint (lsb);
637 printf ("\n");
638 }
639 /*
640 * check bitmaps for missing language support
641 */
642 for (i = 0; i < NUM_LANG_SET_MAP; i++)
643 {
644 missing = lsb->map[i] & ~lsa->map[i];
645 if (missing)
646 {
647 for (j = 0; j < 32; j++)
648 if (missing & (1 << j))
649 {
650 if (!FcLangSetContainsLang (lsa,
651 fcLangCharSets[i*32 + j].lang))
652 {
653 if (FcDebug() & FC_DBG_MATCHV)
654 printf ("\tMissing bitmap %s\n", fcLangCharSets[i*32+j].lang);
655 return FcFalse;
656 }
657 }
658 }
659 }
660 if (lsb->extra)
661 {
662 FcStrList *list = FcStrListCreate (lsb->extra);
663 FcChar8 *extra;
664
665 if (list)
666 {
667 while ((extra = FcStrListNext (list)))
668 {
669 if (!FcLangSetContainsLang (lsa, extra))
670 {
671 if (FcDebug() & FC_DBG_MATCHV)
672 printf ("\tMissing string %s\n", extra);
673 break;
674 }
675 }
676 FcStrListDone (list);
677 if (extra)
678 return FcFalse;
679 }
680 }
681 return FcTrue;
682 }