]> git.wh0rd.org - fontconfig.git/blame - src/fclang.c
Make FcOpNotContains use FcStrStr for strings so that it matches semantics
[fontconfig.git] / src / fclang.c
CommitLineData
3de8881e 1/*
793e946c 2 * $RCSId: xc/lib/fontconfig/src/fclang.c,v 1.7 2002/08/26 23:34:31 keithp Exp $
3de8881e 3 *
46b51147 4 * Copyright © 2002 Keith Packard
3de8881e
KP
5 *
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of Keith Packard not be used in
11 * advertising or publicity pertaining to distribution of the software without
12 * specific, written prior permission. Keith Packard makes no
13 * representations about the suitability of this software for any purpose. It
14 * is provided "as is" without express or implied warranty.
15 *
16 * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
18 * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
19 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
20 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
21 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
22 * PERFORMANCE OF THIS SOFTWARE.
23 */
24
25#include "fcint.h"
26
27typedef struct {
28 FcChar8 *lang;
29 FcCharSet charset;
30} FcLangCharSet;
31
793e946c
KP
32typedef struct {
33 int begin;
34 int end;
35} FcLangCharSetRange;
36
e50b9ae7 37#include "../fc-lang/fclang.h"
3de8881e 38
d8d73958
KP
39struct _FcLangSet {
40 FcChar32 map[NUM_LANG_SET_MAP];
41 FcStrSet *extra;
42};
43
44#define FcLangSetBitSet(ls, id) ((ls)->map[(id)>>5] |= ((FcChar32) 1 << ((id) & 0x1f)))
45#define FcLangSetBitGet(ls, id) (((ls)->map[(id)>>5] >> ((id) & 0x1f)) & 1)
46
47FcLangSet *
48FcFreeTypeLangSet (const FcCharSet *charset,
e50b9ae7 49 const FcChar8 *exclusiveLang)
3de8881e 50{
e50b9ae7
KP
51 int i;
52 FcChar32 missing;
e50b9ae7 53 const FcCharSet *exclusiveCharset = 0;
d8d73958
KP
54 FcLangSet *ls;
55
3de8881e 56
e50b9ae7
KP
57 if (exclusiveLang)
58 exclusiveCharset = FcCharSetForLang (exclusiveLang);
d8d73958
KP
59 ls = FcLangSetCreate ();
60 if (!ls)
61 return 0;
3de8881e
KP
62 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
63 {
e50b9ae7
KP
64 /*
65 * Check for Han charsets to make fonts
66 * which advertise support for a single language
67 * not support other Han languages
68 */
69 if (exclusiveCharset &&
70 FcFreeTypeIsExclusiveLang (fcLangCharSets[i].lang) &&
71 fcLangCharSets[i].charset.leaves != exclusiveCharset->leaves)
72 {
73 continue;
74 }
3de8881e
KP
75 missing = FcCharSetSubtractCount (&fcLangCharSets[i].charset, charset);
76 if (FcDebug() & FC_DBG_SCANV)
c80d2ac4
KP
77 {
78 if (missing && missing < 10)
79 {
80 FcCharSet *missed = FcCharSetSubtract (&fcLangCharSets[i].charset,
81 charset);
82 FcChar32 ucs4;
83 FcChar32 map[FC_CHARSET_MAP_SIZE];
84 FcChar32 next;
85
86 printf ("\n%s(%d) ", fcLangCharSets[i].lang, missing);
87 printf ("{");
88 for (ucs4 = FcCharSetFirstPage (missed, map, &next);
89 ucs4 != FC_CHARSET_DONE;
90 ucs4 = FcCharSetNextPage (missed, map, &next))
91 {
92 int i, j;
93 for (i = 0; i < FC_CHARSET_MAP_SIZE; i++)
94 if (map[i])
95 {
96 for (j = 0; j < 32; j++)
97 if (map[i] & (1 << j))
98 printf (" %04x", ucs4 + i * 32 + j);
99 }
100 }
101 printf (" }\n\t");
102 FcCharSetDestroy (missed);
103 }
104 else
105 printf ("%s(%d) ", fcLangCharSets[i].lang, missing);
106 }
e50b9ae7 107 if (!missing)
d8d73958 108 FcLangSetBitSet (ls, i);
3de8881e 109 }
e50b9ae7 110
3de8881e
KP
111 if (FcDebug() & FC_DBG_SCANV)
112 printf ("\n");
d8d73958
KP
113
114
115 return ls;
3de8881e
KP
116}
117
d8d73958 118#define FcLangEnd(c) ((c) == '-' || (c) == '\0')
3de8881e
KP
119
120FcLangResult
121FcLangCompare (const FcChar8 *s1, const FcChar8 *s2)
122{
3de8881e 123 FcChar8 c1, c2;
d8d73958
KP
124 FcLangResult result = FcLangDifferentLang;
125
3de8881e
KP
126 for (;;)
127 {
128 c1 = *s1++;
129 c2 = *s2++;
d8d73958 130
3de8881e
KP
131 c1 = FcToLower (c1);
132 c2 = FcToLower (c2);
133 if (c1 != c2)
d8d73958
KP
134 {
135 if (FcLangEnd (c1) && FcLangEnd (c2))
136 result = FcLangDifferentCountry;
137 return result;
138 }
139 else if (!c1)
140 return FcLangEqual;
141 else if (c1 == '-')
142 result = FcLangDifferentCountry;
3de8881e 143 }
3de8881e
KP
144}
145
793e946c 146/*
74a623e0 147 * Return FcTrue when super contains sub.
793e946c 148 *
74a623e0
KP
149 * super contains sub if super and sub have the same
150 * language and either the same country or one
151 * is missing the country
793e946c
KP
152 */
153
154static FcBool
74a623e0 155FcLangContains (const FcChar8 *super, const FcChar8 *sub)
793e946c
KP
156{
157 FcChar8 c1, c2;
158
159 for (;;)
160 {
74a623e0
KP
161 c1 = *super++;
162 c2 = *sub++;
793e946c
KP
163
164 c1 = FcToLower (c1);
165 c2 = FcToLower (c2);
166 if (c1 != c2)
167 {
74a623e0 168 /* see if super has a country while sub is mising one */
793e946c
KP
169 if (c1 == '-' && c2 == '\0')
170 return FcTrue;
74a623e0
KP
171 /* see if sub has a country while super is mising one */
172 if (c1 == '\0' && c2 == '-')
173 return FcTrue;
793e946c
KP
174 return FcFalse;
175 }
176 else if (!c1)
177 return FcTrue;
178 }
179}
180
3de8881e
KP
181const FcCharSet *
182FcCharSetForLang (const FcChar8 *lang)
183{
184 int i;
185 int country = -1;
186 for (i = 0; i < NUM_LANG_CHAR_SET; i++)
187 {
188 switch (FcLangCompare (lang, fcLangCharSets[i].lang)) {
189 case FcLangEqual:
190 return &fcLangCharSets[i].charset;
191 case FcLangDifferentCountry:
192 if (country == -1)
193 country = i;
194 default:
195 break;
196 }
197 }
198 if (country == -1)
199 return 0;
200 return &fcLangCharSets[i].charset;
201}
d8d73958
KP
202
203FcLangSet *
204FcLangSetCreate (void)
205{
206 FcLangSet *ls;
207
208 ls = malloc (sizeof (FcLangSet));
209 if (!ls)
210 return 0;
211 FcMemAlloc (FC_MEM_LANGSET, sizeof (FcLangSet));
212 memset (ls->map, '\0', sizeof (ls->map));
213 ls->extra = 0;
214 return ls;
215}
216
217void
218FcLangSetDestroy (FcLangSet *ls)
219{
220 if (ls->extra)
221 FcStrSetDestroy (ls->extra);
222 FcMemFree (FC_MEM_LANGSET, sizeof (FcLangSet));
223 free (ls);
224}
225
226FcLangSet *
227FcLangSetCopy (const FcLangSet *ls)
228{
229 FcLangSet *new;
230
231 new = FcLangSetCreate ();
232 if (!new)
233 goto bail0;
234 memcpy (new->map, ls->map, sizeof (new->map));
235 if (ls->extra)
236 {
237 FcStrList *list;
238 FcChar8 *extra;
239
240 new->extra = FcStrSetCreate ();
241 if (!new->extra)
242 goto bail1;
243
244 list = FcStrListCreate (ls->extra);
245 if (!list)
246 goto bail1;
247
248 while ((extra = FcStrListNext (list)))
249 if (!FcStrSetAdd (new->extra, extra))
250 {
251 FcStrListDone (list);
252 goto bail1;
253 }
254 FcStrListDone (list);
255 }
256 return new;
257bail1:
258 FcLangSetDestroy (new);
259bail0:
260 return 0;
261}
262
263static int
264FcLangSetIndex (const FcChar8 *lang)
265{
12d49d3c
CW
266 int low, high, mid = 0;
267 int cmp = 0;
793e946c 268 FcChar8 firstChar = FcToLower(lang[0]);
947afeb5 269 FcChar8 secondChar = firstChar ? FcToLower(lang[1]) : '\0';
793e946c
KP
270
271 if (firstChar < 'a')
272 {
273 low = 0;
274 high = fcLangCharSetRanges[0].begin;
275 }
276 else if(firstChar > 'z')
277 {
278 low = fcLangCharSetRanges[25].begin;
279 high = NUM_LANG_CHAR_SET - 1;
280 }
281 else
282 {
283 low = fcLangCharSetRanges[firstChar - 'a'].begin;
284 high = fcLangCharSetRanges[firstChar - 'a'].end;
285 /* no matches */
286 if (low > high)
287 return -low; /* next entry after where it would be */
288 }
d8d73958 289
d8d73958
KP
290 while (low <= high)
291 {
292 mid = (high + low) >> 1;
793e946c
KP
293 if(fcLangCharSets[mid].lang[0] != firstChar)
294 cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang, lang);
295 else
296 { /* fast path for resolving 2-letter languages (by far the most common) after
297 * finding the first char (probably already true because of the hash table) */
947afeb5
KP
298 cmp = fcLangCharSets[mid].lang[1] - secondChar;
299 if (cmp == 0 &&
300 (fcLangCharSets[mid].lang[2] != '\0' ||
301 lang[2] != '\0'))
793e946c 302 {
947afeb5
KP
303 cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang+2,
304 lang+2);
793e946c 305 }
793e946c
KP
306 }
307 if (cmp == 0)
d8d73958
KP
308 return mid;
309 if (cmp < 0)
310 low = mid + 1;
311 else
312 high = mid - 1;
313 }
314 if (cmp < 0)
315 mid++;
316 return -(mid + 1);
317}
318
319FcBool
320FcLangSetAdd (FcLangSet *ls, const FcChar8 *lang)
321{
322 int id;
323
324 id = FcLangSetIndex (lang);
325 if (id >= 0)
326 {
327 FcLangSetBitSet (ls, id);
328 return FcTrue;
329 }
330 if (!ls->extra)
331 {
332 ls->extra = FcStrSetCreate ();
333 if (!ls->extra)
334 return FcFalse;
335 }
336 return FcStrSetAdd (ls->extra, lang);
337}
338
339FcLangResult
340FcLangSetHasLang (const FcLangSet *ls, const FcChar8 *lang)
341{
342 int id;
343 FcLangResult best, r;
344 int i;
345
346 id = FcLangSetIndex (lang);
2458a6d8
KP
347 if (id < 0)
348 id = -id - 1;
349 else if (FcLangSetBitGet (ls, id))
d8d73958 350 return FcLangEqual;
d8d73958
KP
351 best = FcLangDifferentLang;
352 for (i = id - 1; i >= 0; i--)
353 {
354 r = FcLangCompare (lang, fcLangCharSets[i].lang);
355 if (r == FcLangDifferentLang)
356 break;
357 if (FcLangSetBitGet (ls, i) && r < best)
358 best = r;
359 }
360 for (i = id; i < NUM_LANG_CHAR_SET; i++)
361 {
362 r = FcLangCompare (lang, fcLangCharSets[i].lang);
363 if (r == FcLangDifferentLang)
364 break;
365 if (FcLangSetBitGet (ls, i) && r < best)
366 best = r;
367 }
368 if (ls->extra)
369 {
370 FcStrList *list = FcStrListCreate (ls->extra);
371 FcChar8 *extra;
372 FcLangResult r;
373
374 if (list)
375 {
376 while (best > FcLangEqual && (extra = FcStrListNext (list)))
377 {
378 r = FcLangCompare (lang, extra);
379 if (r < best)
380 best = r;
381 }
382 FcStrListDone (list);
383 }
384 }
385 return best;
386}
387
388static FcLangResult
389FcLangSetCompareStrSet (const FcLangSet *ls, FcStrSet *set)
390{
391 FcStrList *list = FcStrListCreate (set);
392 FcLangResult r, best = FcLangDifferentLang;
393 FcChar8 *extra;
394
395 if (list)
396 {
397 while (best > FcLangEqual && (extra = FcStrListNext (list)))
398 {
399 r = FcLangSetHasLang (ls, extra);
400 if (r < best)
401 best = r;
402 }
403 FcStrListDone (list);
404 }
405 return best;
406}
407
408FcLangResult
409FcLangSetCompare (const FcLangSet *lsa, const FcLangSet *lsb)
410{
234397b4 411 int i, j;
d8d73958
KP
412 FcLangResult best, r;
413
414 for (i = 0; i < NUM_LANG_SET_MAP; i++)
415 if (lsa->map[i] & lsb->map[i])
416 return FcLangEqual;
417 best = FcLangDifferentLang;
234397b4
DD
418 for (j = 0; j < NUM_COUNTRY_SET; j++)
419 for (i = 0; i < NUM_LANG_SET_MAP; i++)
420 if ((lsa->map[i] & fcLangCountrySets[j][i]) &&
421 (lsb->map[i] & fcLangCountrySets[j][i]))
422 {
423 best = FcLangDifferentCountry;
424 break;
425 }
d8d73958
KP
426 if (lsa->extra)
427 {
428 r = FcLangSetCompareStrSet (lsb, lsa->extra);
429 if (r < best)
430 best = r;
431 }
432 if (best > FcLangEqual && lsb->extra)
433 {
434 r = FcLangSetCompareStrSet (lsa, lsb->extra);
435 if (r < best)
436 best = r;
437 }
438 return best;
439}
440
441/*
442 * Used in computing values -- mustn't allocate any storage
443 */
444FcLangSet *
445FcLangSetPromote (const FcChar8 *lang)
446{
447 static FcLangSet ls;
448 static FcStrSet strs;
449 static FcChar8 *str;
450 int id;
451
452 memset (ls.map, '\0', sizeof (ls.map));
453 ls.extra = 0;
454 id = FcLangSetIndex (lang);
455 if (id > 0)
456 {
457 FcLangSetBitSet (&ls, id);
458 }
459 else
460 {
461 ls.extra = &strs;
462 strs.num = 1;
463 strs.size = 1;
464 strs.strs = &str;
47d4f950 465 strs.ref = 1;
d8d73958
KP
466 str = (FcChar8 *) lang;
467 }
468 return &ls;
469}
470
471FcChar32
472FcLangSetHash (const FcLangSet *ls)
473{
474 FcChar32 h = 0;
475 int i;
476
477 for (i = 0; i < NUM_LANG_SET_MAP; i++)
478 h ^= ls->map[i];
479 if (ls->extra)
480 h ^= ls->extra->num;
481 return h;
482}
483
484FcLangSet *
485FcNameParseLangSet (const FcChar8 *string)
486{
793e946c
KP
487 FcChar8 lang[32],c;
488 int i;
d8d73958
KP
489 FcLangSet *ls;
490
491 ls = FcLangSetCreate ();
492 if (!ls)
493 goto bail0;
494
793e946c 495 for(;;)
d8d73958 496 {
793e946c 497 for(i = 0; i < 31;i++)
d8d73958 498 {
793e946c
KP
499 c = *string++;
500 if(c == '\0' || c == '|')
501 break; /* end of this code */
502 lang[i] = c;
d8d73958 503 }
793e946c
KP
504 lang[i] = '\0';
505 if (!FcLangSetAdd (ls, lang))
506 goto bail1;
507 if(c == '\0')
508 break;
d8d73958
KP
509 }
510 return ls;
511bail1:
512 FcLangSetDestroy (ls);
513bail0:
514 return 0;
515}
516
517FcBool
518FcNameUnparseLangSet (FcStrBuf *buf, const FcLangSet *ls)
519{
520 int i, bit;
521 FcChar32 bits;
522 FcBool first = FcTrue;
523
524 for (i = 0; i < NUM_LANG_SET_MAP; i++)
525 {
526 if ((bits = ls->map[i]))
527 {
528 for (bit = 0; bit <= 31; bit++)
529 if (bits & (1 << bit))
530 {
531 int id = (i << 5) | bit;
532 if (!first)
533 if (!FcStrBufChar (buf, '|'))
534 return FcFalse;
535 if (!FcStrBufString (buf, fcLangCharSets[id].lang))
536 return FcFalse;
537 first = FcFalse;
538 }
539 }
540 }
541 if (ls->extra)
542 {
543 FcStrList *list = FcStrListCreate (ls->extra);
544 FcChar8 *extra;
545
546 if (!list)
547 return FcFalse;
548 while ((extra = FcStrListNext (list)))
549 {
550 if (!first)
551 if (!FcStrBufChar (buf, '|'))
552 return FcFalse;
793e946c 553 if (!FcStrBufString (buf, extra))
d8d73958
KP
554 return FcFalse;
555 first = FcFalse;
556 }
557 }
558 return FcTrue;
559}
560
561FcBool
562FcLangSetEqual (const FcLangSet *lsa, const FcLangSet *lsb)
563{
564 int i;
565
566 for (i = 0; i < NUM_LANG_SET_MAP; i++)
567 {
568 if (lsa->map[i] != lsb->map[i])
569 return FcFalse;
570 }
571 if (!lsa->extra && !lsb->extra)
572 return FcTrue;
573 if (lsa->extra && lsb->extra)
574 return FcStrSetEqual (lsa->extra, lsb->extra);
575 return FcFalse;
576}
793e946c
KP
577
578static FcBool
579FcLangSetContainsLang (const FcLangSet *ls, const FcChar8 *lang)
580{
581 int id;
793e946c
KP
582 int i;
583
584 id = FcLangSetIndex (lang);
585 if (id < 0)
586 id = -id - 1;
587 else if (FcLangSetBitGet (ls, id))
588 return FcTrue;
589 /*
590 * search up and down among equal languages for a match
591 */
592 for (i = id - 1; i >= 0; i--)
593 {
594 if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
595 break;
596 if (FcLangSetBitGet (ls, i) &&
597 FcLangContains (fcLangCharSets[i].lang, lang))
598 return FcTrue;
599 }
600 for (i = id; i < NUM_LANG_CHAR_SET; i++)
601 {
602 if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
603 break;
604 if (FcLangSetBitGet (ls, i) &&
605 FcLangContains (fcLangCharSets[i].lang, lang))
606 return FcTrue;
607 }
608 if (ls->extra)
609 {
610 FcStrList *list = FcStrListCreate (ls->extra);
611 FcChar8 *extra;
793e946c
KP
612
613 if (list)
614 {
615 while ((extra = FcStrListNext (list)))
616 {
617 if (FcLangContains (extra, lang))
618 break;
619 }
620 FcStrListDone (list);
621 if (extra)
622 return FcTrue;
623 }
624 }
625 return FcFalse;
626}
627
628/*
629 * return FcTrue if lsa contains every language in lsb
630 */
631FcBool
632FcLangSetContains (const FcLangSet *lsa, const FcLangSet *lsb)
633{
634 int i, j;
635 FcChar32 missing;
636
637 if (FcDebug() & FC_DBG_MATCHV)
638 {
639 printf ("FcLangSet "); FcLangSetPrint (lsa);
640 printf (" contains "); FcLangSetPrint (lsb);
641 printf ("\n");
642 }
643 /*
644 * check bitmaps for missing language support
645 */
646 for (i = 0; i < NUM_LANG_SET_MAP; i++)
647 {
648 missing = lsb->map[i] & ~lsa->map[i];
649 if (missing)
650 {
651 for (j = 0; j < 32; j++)
652 if (missing & (1 << j))
653 {
654 if (!FcLangSetContainsLang (lsa,
655 fcLangCharSets[i*32 + j].lang))
656 {
657 if (FcDebug() & FC_DBG_MATCHV)
658 printf ("\tMissing bitmap %s\n", fcLangCharSets[i*32+j].lang);
659 return FcFalse;
660 }
661 }
662 }
663 }
664 if (lsb->extra)
665 {
666 FcStrList *list = FcStrListCreate (lsb->extra);
667 FcChar8 *extra;
668
669 if (list)
670 {
671 while ((extra = FcStrListNext (list)))
672 {
673 if (!FcLangSetContainsLang (lsa, extra))
674 {
675 if (FcDebug() & FC_DBG_MATCHV)
676 printf ("\tMissing string %s\n", extra);
677 break;
678 }
679 }
680 FcStrListDone (list);
681 if (extra)
682 return FcFalse;
683 }
684 }
685 return FcTrue;
686}