/*
- * $XFree86: xc/lib/fontconfig/src/fcfreetype.c,v 1.4 2002/05/31 23:21:25 keithp Exp $
+ * $XFree86: xc/lib/fontconfig/src/fcfreetype.c,v 1.8 2002/07/09 02:28:29 keithp Exp $
*
* Copyright © 2001 Keith Packard, member of The XFree86 Project, Inc.
*
#include <freetype/freetype.h>
#include <freetype/internal/ftobjs.h>
#include <freetype/tttables.h>
-#include "fcknownsets.h"
+#include <freetype/ftsnames.h>
+#include <freetype/ttnameid.h>
+
+/*
+ * Keep Han languages separated by eliminating languages
+ * that the codePageRange bits says aren't supported
+ */
static const struct {
- int bit;
- FcChar8 *name;
+ int bit;
+ const FcChar8 *lang;
} FcCodePageRange[] = {
- { 0, (FcChar8 *) FC_LANG_LATIN_1 },
- { 1, (FcChar8 *) FC_LANG_LATIN_2 },
- { 2, (FcChar8 *) FC_LANG_CYRILLIC },
- { 3, (FcChar8 *) FC_LANG_GREEK },
- { 4, (FcChar8 *) FC_LANG_TURKISH },
- { 5, (FcChar8 *) FC_LANG_HEBREW },
- { 6, (FcChar8 *) FC_LANG_ARABIC },
- { 7, (FcChar8 *) FC_LANG_WINDOWS_BALTIC },
- { 8, (FcChar8 *) FC_LANG_VIETNAMESE },
-/* 9-15 reserved for Alternate ANSI */
- { 16, (FcChar8 *) FC_LANG_THAI },
- { 17, (FcChar8 *) FC_LANG_JAPANESE },
- { 18, (FcChar8 *) FC_LANG_SIMPLIFIED_CHINESE },
- { 19, (FcChar8 *) FC_LANG_KOREAN_WANSUNG },
- { 20, (FcChar8 *) FC_LANG_TRADITIONAL_CHINESE },
- { 21, (FcChar8 *) FC_LANG_KOREAN_JOHAB },
-/* 22-28 reserved for Alternate ANSI & OEM */
- { 29, (FcChar8 *) FC_LANG_MACINTOSH },
- { 30, (FcChar8 *) FC_LANG_OEM },
- { 31, (FcChar8 *) FC_LANG_SYMBOL },
-/* 32-47 reserved for OEM */
- { 48, (FcChar8 *) FC_LANG_IBM_GREEK },
- { 49, (FcChar8 *) FC_LANG_MSDOS_RUSSIAN },
- { 50, (FcChar8 *) FC_LANG_MSDOS_NORDIC },
- { 51, (FcChar8 *) FC_LANG_ARABIC_864 },
- { 52, (FcChar8 *) FC_LANG_MSDOS_CANADIAN_FRENCH },
- { 53, (FcChar8 *) FC_LANG_HEBREW_862 },
- { 54, (FcChar8 *) FC_LANG_MSDOS_ICELANDIC },
- { 55, (FcChar8 *) FC_LANG_MSDOS_PORTUGUESE },
- { 56, (FcChar8 *) FC_LANG_IBM_TURKISH },
- { 57, (FcChar8 *) FC_LANG_IBM_CYRILLIC },
- { 58, (FcChar8 *) FC_LANG_LATIN_2 },
- { 59, (FcChar8 *) FC_LANG_MSDOS_BALTIC },
- { 60, (FcChar8 *) FC_LANG_GREEK_437_G },
- { 61, (FcChar8 *) FC_LANG_ARABIC_ASMO_708 },
- { 62, (FcChar8 *) FC_LANG_WE_LATIN_1 },
- { 63, (FcChar8 *) FC_LANG_US },
+ { 17, (const FcChar8 *) "ja" },
+ { 18, (const FcChar8 *) "zh-cn" },
+ { 19, (const FcChar8 *) "ko" },
+ { 20, (const FcChar8 *) "zh-tw" },
};
#define NUM_CODE_PAGE_RANGE (sizeof FcCodePageRange / sizeof FcCodePageRange[0])
-static const struct {
- const FcCharSet *set;
- FcChar32 size;
- FcChar32 missing_tolerance;
- FcChar8 *name;
-} FcCodePageSet[] = {
- {
- &fcCharSet_Latin1_1252,
- fcCharSet_Latin1_1252_size,
- 8,
- (FcChar8 *) FC_LANG_LATIN_1,
- },
- {
- &fcCharSet_Latin2_1250,
- fcCharSet_Latin2_1250_size,
- 8,
- (FcChar8 *) FC_LANG_LATIN_2,
- },
- {
- &fcCharSet_Cyrillic_1251,
- fcCharSet_Cyrillic_1251_size,
- 16,
- (FcChar8 *) FC_LANG_CYRILLIC,
- },
- {
- &fcCharSet_Greek_1253,
- fcCharSet_Greek_1253_size,
- 8,
- (FcChar8 *) FC_LANG_GREEK,
- },
- {
- &fcCharSet_Turkish_1254,
- fcCharSet_Turkish_1254_size,
- 16,
- (FcChar8 *) FC_LANG_TURKISH,
- },
- {
- &fcCharSet_Hebrew_1255,
- fcCharSet_Hebrew_1255_size,
- 16,
- (FcChar8 *) FC_LANG_HEBREW,
- },
- {
- &fcCharSet_Arabic_1256,
- fcCharSet_Arabic_1256_size,
- 16,
- (FcChar8 *) FC_LANG_ARABIC,
- },
- {
- &fcCharSet_Windows_Baltic_1257,
- fcCharSet_Windows_Baltic_1257_size,
- 16,
- (FcChar8 *) FC_LANG_WINDOWS_BALTIC,
- },
- {
- &fcCharSet_Thai_874,
- fcCharSet_Thai_874_size,
- 16,
- (FcChar8 *) FC_LANG_THAI,
- },
- {
- &fcCharSet_Japanese_932,
- fcCharSet_Japanese_932_size,
- 500,
- (FcChar8 *) FC_LANG_JAPANESE,
- },
- {
- &fcCharSet_SimplifiedChinese_936,
- fcCharSet_SimplifiedChinese_936_size,
- 500,
- (FcChar8 *) FC_LANG_SIMPLIFIED_CHINESE,
- },
- {
- &fcCharSet_Korean_949,
- fcCharSet_Korean_949_size,
- 500,
- (FcChar8 *) FC_LANG_KOREAN_WANSUNG,
- },
+FcBool
+FcFreeTypeIsExclusiveLang (const FcChar8 *lang)
+{
+ int i;
+
+ for (i = 0; i < NUM_CODE_PAGE_RANGE; i++)
{
- &fcCharSet_TraditionalChinese_950,
- fcCharSet_TraditionalChinese_950_size,
- 500,
- (FcChar8 *) FC_LANG_TRADITIONAL_CHINESE,
- },
-};
+ if (FcLangCompare (lang, FcCodePageRange[i].lang) != FcLangDifferentLang)
+ return FcTrue;
+ }
+ return FcFalse;
+}
+
+#define FC_NAME_PRIO_LANG 0x0f00
+#define FC_NAME_PRIO_LANG_ENGLISH 0x0200
+#define FC_NAME_PRIO_LANG_LATIN 0x0100
+#define FC_NAME_PRIO_LANG_NONE 0x0000
+
+#define FC_NAME_PRIO_ENC 0x00f0
+#define FC_NAME_PRIO_ENC_UNICODE 0x0010
+#define FC_NAME_PRIO_ENC_NONE 0x0000
+
+#define FC_NAME_PRIO_NAME 0x000f
+#define FC_NAME_PRIO_NAME_FAMILY 0x0002
+#define FC_NAME_PRIO_NAME_PS 0x0001
+#define FC_NAME_PRIO_NAME_NONE 0x0000
-#define NUM_CODE_PAGE_SET (sizeof FcCodePageSet / sizeof FcCodePageSet[0])
+static FcBool
+FcUcs4IsLatin (FcChar32 ucs4)
+{
+ FcChar32 page = ucs4 >> 8;
+
+ if (page <= 2)
+ return FcTrue;
+ if (page == 0x1e)
+ return FcTrue;
+ if (0x20 <= page && page <= 0x23)
+ return FcTrue;
+ if (page == 0xfb)
+ return FcTrue;
+ if (page == 0xff)
+ return FcTrue;
+ return FcFalse;
+}
+
+static FcBool
+FcUtf8IsLatin (FcChar8 *str, int len)
+{
+ while (len)
+ {
+ FcChar32 ucs4;
+ int clen = FcUtf8ToUcs4 (str, &ucs4, len);
+ if (clen <= 0)
+ return FcFalse;
+ if (!FcUcs4IsLatin (ucs4))
+ return FcFalse;
+ len -= clen;
+ str += clen;
+ }
+ return FcTrue;
+}
FcPattern *
FcFreeTypeQuery (const FcChar8 *file,
int i;
FcCharSet *cs;
FT_Library ftLibrary;
- const FcChar8 *family;
+ FcChar8 *family;
+ FcChar8 *style;
TT_OS2 *os2;
- FcBool hasLang = FcFalse;
- FcChar32 codepoints;
- FcBool matchCodePage[NUM_CODE_PAGE_SET];
+ const FcChar8 *exclusiveLang = 0;
+ FT_SfntName sname;
+ FT_UInt snamei, snamec;
+ FcBool family_allocated = FcFalse;
+ FcBool style_allocated = FcFalse;
+ int family_prio = 0;
+ int style_prio = 0;
if (FT_Init_FreeType (&ftLibrary))
return 0;
if (!FcPatternAddInteger (pat, FC_WEIGHT, weight))
goto bail1;
- family = (FcChar8 *) face->family_name;
+ /*
+ * Grub through the name table looking for family
+ * and style names. FreeType makes quite a hash
+ * of them
+ */
+ family = 0;
+ style = 0;
+ snamec = FT_Get_Sfnt_Name_Count (face);
+ for (snamei = 0; snamei < snamec; snamei++)
+ {
+ FcChar8 *utf8;
+ int len;
+ int wchar;
+ FcChar8 *src;
+ int src_len;
+ FcChar8 *u8;
+ FcChar32 ucs4;
+ int ilen, olen;
+ int prio = 0;
+
+ const FcCharMap *map;
+ enum {
+ FcNameEncodingUtf16,
+ FcNameEncodingAppleRoman,
+ FcNameEncodingLatin1
+ } encoding;
+
+
+ if (FT_Get_Sfnt_Name (face, snamei, &sname) != 0)
+ break;
+
+ /*
+ * Look for Unicode strings
+ */
+ switch (sname.platform_id) {
+ case TT_PLATFORM_APPLE_UNICODE:
+ /*
+ * All APPLE_UNICODE encodings are Utf16 BE
+ *
+ * Because there's no language id for Unicode,
+ * assume it's English
+ */
+ prio |= FC_NAME_PRIO_LANG_ENGLISH;
+ prio |= FC_NAME_PRIO_ENC_UNICODE;
+ encoding = FcNameEncodingUtf16;
+ break;
+ case TT_PLATFORM_MACINTOSH:
+ switch (sname.encoding_id) {
+ case TT_MAC_ID_ROMAN:
+ encoding = FcNameEncodingAppleRoman;
+ break;
+ default:
+ continue;
+ }
+ switch (sname.language_id) {
+ case TT_MAC_LANGID_ENGLISH:
+ prio |= FC_NAME_PRIO_LANG_ENGLISH;
+ break;
+ default:
+ /*
+ * Sometimes Microsoft language ids
+ * end up in the macintosh table. This
+ * is often accompanied by data in
+ * some mystic encoding. Ignore these names
+ */
+ if (sname.language_id >= 0x100)
+ continue;
+ break;
+ }
+ break;
+ case TT_PLATFORM_MICROSOFT:
+ switch (sname.encoding_id) {
+ case TT_MS_ID_UNICODE_CS:
+ encoding = FcNameEncodingUtf16;
+ prio |= FC_NAME_PRIO_ENC_UNICODE;
+ break;
+ default:
+ continue;
+ }
+ switch (sname.language_id & 0xff) {
+ case 0x09:
+ prio |= FC_NAME_PRIO_LANG_ENGLISH;
+ break;
+ default:
+ break;
+ }
+ break;
+ case TT_PLATFORM_ISO:
+ switch (sname.encoding_id) {
+ case TT_ISO_ID_10646:
+ encoding = FcNameEncodingUtf16;
+ prio |= FC_NAME_PRIO_ENC_UNICODE;
+ break;
+ case TT_ISO_ID_7BIT_ASCII:
+ case TT_ISO_ID_8859_1:
+ encoding = FcNameEncodingLatin1;
+ break;
+ default:
+ continue;
+ }
+ break;
+ default:
+ continue;
+ }
+
+ /*
+ * Look for family and style names
+ */
+ switch (sname.name_id) {
+ case TT_NAME_ID_FONT_FAMILY:
+ prio |= FC_NAME_PRIO_NAME_FAMILY;
+ break;
+ case TT_NAME_ID_PS_NAME:
+ prio |= FC_NAME_PRIO_NAME_PS;
+ break;
+ case TT_NAME_ID_FONT_SUBFAMILY:
+ break;
+ default:
+ continue;
+ }
+
+ src = (FcChar8 *) sname.string;
+ src_len = sname.string_len;
+
+ switch (encoding) {
+ case FcNameEncodingUtf16:
+ /*
+ * Convert Utf16 to Utf8
+ */
+
+ if (!FcUtf16Len (src, FcEndianBig, src_len, &len, &wchar))
+ continue;
+
+ /*
+ * Allocate plenty of space
+ */
+ utf8 = malloc (len * FC_UTF8_MAX_LEN + 1);
+ if (!utf8)
+ continue;
+
+ u8 = utf8;
+
+ while ((ilen = FcUtf16ToUcs4 (src, FcEndianBig, &ucs4, src_len)) > 0)
+ {
+ src_len -= ilen;
+ src += ilen;
+ olen = FcUcs4ToUtf8 (ucs4, u8);
+ u8 += olen;
+ }
+ *u8 = '\0';
+ break;
+ case FcNameEncodingLatin1:
+ /*
+ * Convert Latin1 to Utf8
+ */
+ utf8 = malloc (src_len * 2 + 1);
+ if (!utf8)
+ continue;
+
+ u8 = utf8;
+ while (src_len > 0)
+ {
+ ucs4 = *src++;
+ src_len--;
+ olen = FcUcs4ToUtf8 (ucs4, u8);
+ u8 += olen;
+ }
+ *u8 = '\0';
+ break;
+ case FcNameEncodingAppleRoman:
+ /*
+ * Convert AppleRoman to Utf8
+ */
+ map = FcFreeTypeGetPrivateMap (ft_encoding_apple_roman);
+ if (!map)
+ continue;
+
+ utf8 = malloc (src_len * 3 + 1);
+ if (!utf8)
+ continue;
+
+ u8 = utf8;
+ while (src_len > 0)
+ {
+ ucs4 = FcFreeTypePrivateToUcs4 (*src++, map);
+ src_len--;
+ olen = FcUcs4ToUtf8 (ucs4, u8);
+ u8 += olen;
+ }
+ *u8 = '\0';
+ break;
+ default:
+ continue;
+ }
+ if ((prio & FC_NAME_PRIO_LANG) == FC_NAME_PRIO_LANG_NONE)
+ if (FcUtf8IsLatin (utf8, strlen ((char *) utf8)))
+ prio |= FC_NAME_PRIO_LANG_LATIN;
+
+ if (FcDebug () & FC_DBG_SCANV)
+ printf ("\nfound name (name %d platform %d encoding %d language 0x%x prio 0x%x) %s\n",
+ sname.name_id, sname.platform_id,
+ sname.encoding_id, sname.language_id,
+ prio, utf8);
+
+ switch (sname.name_id) {
+ case TT_NAME_ID_FONT_FAMILY:
+ case TT_NAME_ID_PS_NAME:
+ if (!family || prio > family_prio)
+ {
+ if (family)
+ free (family);
+ family = utf8;
+ utf8 = 0;
+ family_allocated = FcTrue;
+ family_prio = prio;
+ }
+ break;
+ case TT_NAME_ID_FONT_SUBFAMILY:
+ if (!style || prio > style_prio)
+ {
+ if (style)
+ free (style);
+ style = utf8;
+ utf8 = 0;
+ style_allocated = FcTrue;
+ style_prio = prio;
+ }
+ break;
+ }
+ if (utf8)
+ free (utf8);
+ }
+
+ if (!family)
+ family = (FcChar8 *) face->family_name;
+
+ if (!style)
+ style = (FcChar8 *) face->style_name;
+
if (!family)
{
- family = (FcChar8 *) strrchr ((char *) file, '/');
- if (family)
- family++;
+ FcChar8 *start, *end;
+
+ start = (FcChar8 *) strrchr ((char *) file, '/');
+ if (start)
+ start++;
else
- family = file;
+ start = (FcChar8 *) file;
+ end = (FcChar8 *) strrchr ((char *) start, '.');
+ if (!end)
+ end = start + strlen ((char *) start);
+ family = malloc (end - start + 1);
+ strncpy ((char *) family, (char *) start, end - start);
+ family[end - start] = '\0';
+ family_allocated = FcTrue;
}
+
+ if (FcDebug() & FC_DBG_SCAN)
+ printf ("\"%s\" \"%s\" ", family, style ? style : (FcChar8 *) "<none>");
+
if (!FcPatternAddString (pat, FC_FAMILY, family))
+ {
+ if (family_allocated)
+ free (family);
+ if (style_allocated)
+ free (style);
goto bail1;
+ }
+
+ if (family_allocated)
+ free (family);
- if (face->style_name)
+ if (style)
{
- if (!FcPatternAddString (pat, FC_STYLE, (FcChar8 *) face->style_name))
+ if (!FcPatternAddString (pat, FC_STYLE, style))
+ {
+ if (style_allocated)
+ free (style);
goto bail1;
+ }
+ if (style_allocated)
+ free (style);
}
if (!FcPatternAddString (pat, FC_FILE, file))
}
if (bits & (1 << bit))
{
- if (!FcPatternAddString (pat, FC_LANG,
- FcCodePageRange[i].name))
- goto bail1;
- hasLang = FcTrue;
+ /*
+ * If the font advertises support for multiple
+ * "exclusive" languages, then include support
+ * for any language found to have coverage
+ */
+ if (exclusiveLang)
+ {
+ exclusiveLang = 0;
+ break;
+ }
+ exclusiveLang = FcCodePageRange[i].lang;
}
}
}
if (!cs)
goto bail1;
- codepoints = FcCharSetCount (cs);
/*
* Skip over PCF fonts that have no encoded characters; they're
* usually just Unicode fonts transcoded to some legacy encoding
*/
- if (codepoints == 0)
+ if (FcCharSetCount (cs) == 0)
{
if (!strcmp(FT_MODULE_CLASS(&face->driver->root)->module_name, "pcf"))
goto bail2;
if (!FcPatternAddCharSet (pat, FC_CHARSET, cs))
goto bail2;
- if (!hasLang || (FcDebug() & FC_DBG_SCANV))
- {
- /*
- * Use the Unicode coverage to set lang if it wasn't
- * set from the OS/2 tables
- */
-
- if (FcDebug() & FC_DBG_SCANV)
- printf ("%s: ", family);
- for (i = 0; i < NUM_CODE_PAGE_SET; i++)
- {
- FcChar32 missing;
-
- missing = FcCharSetSubtractCount (FcCodePageSet[i].set,
- cs);
- matchCodePage[i] = missing <= FcCodePageSet[i].missing_tolerance;
- if (FcDebug() & FC_DBG_SCANV)
- printf ("%s(%d) ", FcCodePageSet[i].name, missing);
- }
- if (FcDebug() & FC_DBG_SCANV)
- printf ("\n");
+ if (!FcFreeTypeSetLang (pat, cs, exclusiveLang))
+ goto bail2;
- if (hasLang)
- {
- FcChar8 *lang;
- int j;
- /*
- * Validate the lang selections
- */
- for (i = 0; FcPatternGetString (pat, FC_LANG, i, &lang) == FcResultMatch; i++)
- {
- for (j = 0; j < NUM_CODE_PAGE_SET; j++)
- if (!strcmp ((char *) FcCodePageSet[j].name,
- (char *) lang))
- {
- if (!matchCodePage[j])
- printf ("%s(%s): missing lang %s\n", file, family, lang);
- }
- }
- for (j = 0; j < NUM_CODE_PAGE_SET; j++)
- {
- if (!matchCodePage[j])
- continue;
- lang = 0;
- for (i = 0; FcPatternGetString (pat, FC_LANG, i, &lang) == FcResultMatch; i++)
- {
- if (!strcmp ((char *) FcCodePageSet[j].name, (char *) lang))
- break;
- lang = 0;
- }
- if (!lang)
- printf ("%s(%s): extra lang %s\n", file, family, FcCodePageSet[j].name);
- }
- }
- else
- {
- /*
- * None provided, use the charset derived ones
- */
- for (i = 0; i < NUM_CODE_PAGE_SET; i++)
- if (matchCodePage[i])
- {
- if (!FcPatternAddString (pat, FC_LANG,
- FcCodePageRange[i].name))
- goto bail1;
- hasLang = TRUE;
- }
- }
- }
- if (!hasLang)
- if (!FcPatternAddString (pat, FC_LANG, (FcChar8 *) FC_LANG_UNKNOWN))
- goto bail1;
-
/*
* Drop our reference to the charset
*/