From: Keith Packard Date: Sat, 13 Jul 2002 05:43:25 +0000 (+0000) Subject: Add some Utf16 support, extract font family and style names from name table X-Git-Tag: fcpackage_rc1~5 X-Git-Url: https://git.wh0rd.org/?p=fontconfig.git;a=commitdiff_plain;h=69937bd9416eb3fbefd55b9fa6445d0fe4b4f7f3 Add some Utf16 support, extract font family and style names from name table for sfnt fonts --- diff --git a/fontconfig/fontconfig.h b/fontconfig/fontconfig.h index 060ba84..1939e46 100644 --- a/fontconfig/fontconfig.h +++ b/fontconfig/fontconfig.h @@ -1,5 +1,5 @@ /* - * $XFree86: xc/lib/fontconfig/fontconfig/fontconfig.h,v 1.20 2002/07/06 23:47:43 keithp Exp $ + * $XFree86: xc/lib/fontconfig/fontconfig/fontconfig.h,v 1.21 2002/07/11 02:47:50 keithp Exp $ * * Copyright © 2001 Keith Packard, member of The XFree86 Project, Inc. * @@ -187,6 +187,8 @@ typedef struct _FcAtomic FcAtomic; #define _FCFUNCPROTOEND #endif +typedef enum { FcEndianBig, FcEndianLittle } FcEndian; + typedef struct _FcConfig FcConfig; typedef struct _FcFileCache FcFileCache; @@ -651,6 +653,25 @@ FcUtf8Len (FcChar8 *string, int *nchar, int *wchar); +#define FC_UTF8_MAX_LEN 6 + +int +FcUcs4ToUtf8 (FcChar32 ucs4, + FcChar8 dest[FC_UTF8_MAX_LEN]); + +int +FcUtf16ToUcs4 (FcChar8 *src_orig, + FcEndian endian, + FcChar32 *dst, + int len); /* in bytes */ + +FcBool +FcUtf16Len (FcChar8 *string, + FcEndian endian, + int len, /* in bytes */ + int *nchar, + int *wchar); + FcChar8 * FcStrDirname (const FcChar8 *file); diff --git a/src/fccharset.c b/src/fccharset.c index cbd9cc8..c455efb 100644 --- a/src/fccharset.c +++ b/src/fccharset.c @@ -1,5 +1,5 @@ /* - * $XFree86: xc/lib/fontconfig/src/fccharset.c,v 1.15 2002/07/06 23:47:43 keithp Exp $ + * $XFree86: xc/lib/fontconfig/src/fccharset.c,v 1.16 2002/07/09 02:28:29 keithp Exp $ * * Copyright © 2001 Keith Packard, member of The XFree86 Project, Inc. * @@ -1173,10 +1173,10 @@ typedef struct _FcCharEnt { unsigned char encode; } FcCharEnt; -typedef struct _FcCharMap { +struct _FcCharMap { const FcCharEnt *ent; int nent; -} FcCharMap; +}; typedef struct _FcFontDecode { FT_Encoding encoding; @@ -1625,8 +1625,8 @@ static const FcFontDecode fcFontDecoders[] = { #define NUM_DECODE (sizeof (fcFontDecoders) / sizeof (fcFontDecoders[0])) -static FT_ULong -FcFreeTypeMapChar (FcChar32 ucs4, const FcCharMap *map) +FcChar32 +FcFreeTypeUcs4ToPrivate (FcChar32 ucs4, const FcCharMap *map) { int low, high, mid; FcChar16 bmp; @@ -1649,6 +1649,28 @@ FcFreeTypeMapChar (FcChar32 ucs4, const FcCharMap *map) return ~0; } +FcChar32 +FcFreeTypePrivateToUcs4 (FcChar32 private, const FcCharMap *map) +{ + int i; + + for (i = 0; i < map->nent; i++) + if (map->ent[i].encode == private) + return (FcChar32) map->ent[i].bmp; + return ~0; +} + +const FcCharMap * +FcFreeTypeGetPrivateMap (FT_Encoding encoding) +{ + int i; + + for (i = 0; i < NUM_DECODE; i++) + if (fcFontDecoders[i].encoding == encoding) + return fcFontDecoders[i].map; + return 0; +} + /* * Map a UCS4 glyph to a glyph index. Use all available encoding * tables to try and find one that works. This information is expected @@ -1660,7 +1682,7 @@ FcFreeTypeCharIndex (FT_Face face, FcChar32 ucs4) { int initial, offset, decode; FT_UInt glyphindex; - FT_ULong charcode; + FcChar32 charcode; initial = 0; /* @@ -1685,13 +1707,13 @@ FcFreeTypeCharIndex (FT_Face face, FcChar32 ucs4) continue; if (fcFontDecoders[decode].map) { - charcode = FcFreeTypeMapChar (ucs4, fcFontDecoders[decode].map); + charcode = FcFreeTypeUcs4ToPrivate (ucs4, fcFontDecoders[decode].map); if (charcode == ~0) continue; } else - charcode = (FT_ULong) ucs4; - glyphindex = FT_Get_Char_Index (face, charcode); + charcode = ucs4; + glyphindex = FT_Get_Char_Index (face, (FT_ULong) charcode); if (glyphindex) return glyphindex; } diff --git a/src/fcfreetype.c b/src/fcfreetype.c index 6a7d877..8e0a0a5 100644 --- a/src/fcfreetype.c +++ b/src/fcfreetype.c @@ -1,5 +1,5 @@ /* - * $XFree86: xc/lib/fontconfig/src/fcfreetype.c,v 1.7 2002/07/08 07:31:53 keithp Exp $ + * $XFree86: xc/lib/fontconfig/src/fcfreetype.c,v 1.8 2002/07/09 02:28:29 keithp Exp $ * * Copyright © 2001 Keith Packard, member of The XFree86 Project, Inc. * @@ -29,6 +29,8 @@ #include #include #include +#include +#include /* * Keep Han languages separated by eliminating languages @@ -60,6 +62,55 @@ FcFreeTypeIsExclusiveLang (const FcChar8 *lang) return FcFalse; } +#define FC_NAME_PRIO_LANG 0x0f00 +#define FC_NAME_PRIO_LANG_ENGLISH 0x0200 +#define FC_NAME_PRIO_LANG_LATIN 0x0100 +#define FC_NAME_PRIO_LANG_NONE 0x0000 + +#define FC_NAME_PRIO_ENC 0x00f0 +#define FC_NAME_PRIO_ENC_UNICODE 0x0010 +#define FC_NAME_PRIO_ENC_NONE 0x0000 + +#define FC_NAME_PRIO_NAME 0x000f +#define FC_NAME_PRIO_NAME_FAMILY 0x0002 +#define FC_NAME_PRIO_NAME_PS 0x0001 +#define FC_NAME_PRIO_NAME_NONE 0x0000 + +static FcBool +FcUcs4IsLatin (FcChar32 ucs4) +{ + FcChar32 page = ucs4 >> 8; + + if (page <= 2) + return FcTrue; + if (page == 0x1e) + return FcTrue; + if (0x20 <= page && page <= 0x23) + return FcTrue; + if (page == 0xfb) + return FcTrue; + if (page == 0xff) + return FcTrue; + return FcFalse; +} + +static FcBool +FcUtf8IsLatin (FcChar8 *str, int len) +{ + while (len) + { + FcChar32 ucs4; + int clen = FcUtf8ToUcs4 (str, &ucs4, len); + if (clen <= 0) + return FcFalse; + if (!FcUcs4IsLatin (ucs4)) + return FcFalse; + len -= clen; + str += clen; + } + return FcTrue; +} + FcPattern * FcFreeTypeQuery (const FcChar8 *file, int id, @@ -73,9 +124,16 @@ FcFreeTypeQuery (const FcChar8 *file, int i; FcCharSet *cs; FT_Library ftLibrary; - const FcChar8 *family; + FcChar8 *family; + FcChar8 *style; TT_OS2 *os2; const FcChar8 *exclusiveLang = 0; + FT_SfntName sname; + FT_UInt snamei, snamec; + FcBool family_allocated = FcFalse; + FcBool style_allocated = FcFalse; + int family_prio = 0; + int style_prio = 0; if (FT_Init_FreeType (&ftLibrary)) return 0; @@ -112,22 +170,288 @@ FcFreeTypeQuery (const FcChar8 *file, if (!FcPatternAddInteger (pat, FC_WEIGHT, weight)) goto bail1; - family = (FcChar8 *) face->family_name; + /* + * Grub through the name table looking for family + * and style names. FreeType makes quite a hash + * of them + */ + family = 0; + style = 0; + snamec = FT_Get_Sfnt_Name_Count (face); + for (snamei = 0; snamei < snamec; snamei++) + { + FcChar8 *utf8; + int len; + int wchar; + FcChar8 *src; + int src_len; + FcChar8 *u8; + FcChar32 ucs4; + int ilen, olen; + int prio = 0; + + const FcCharMap *map; + enum { + FcNameEncodingUtf16, + FcNameEncodingAppleRoman, + FcNameEncodingLatin1 + } encoding; + + + if (FT_Get_Sfnt_Name (face, snamei, &sname) != 0) + break; + + /* + * Look for Unicode strings + */ + switch (sname.platform_id) { + case TT_PLATFORM_APPLE_UNICODE: + /* + * All APPLE_UNICODE encodings are Utf16 BE + * + * Because there's no language id for Unicode, + * assume it's English + */ + prio |= FC_NAME_PRIO_LANG_ENGLISH; + prio |= FC_NAME_PRIO_ENC_UNICODE; + encoding = FcNameEncodingUtf16; + break; + case TT_PLATFORM_MACINTOSH: + switch (sname.encoding_id) { + case TT_MAC_ID_ROMAN: + encoding = FcNameEncodingAppleRoman; + break; + default: + continue; + } + switch (sname.language_id) { + case TT_MAC_LANGID_ENGLISH: + prio |= FC_NAME_PRIO_LANG_ENGLISH; + break; + default: + /* + * Sometimes Microsoft language ids + * end up in the macintosh table. This + * is often accompanied by data in + * some mystic encoding. Ignore these names + */ + if (sname.language_id >= 0x100) + continue; + break; + } + break; + case TT_PLATFORM_MICROSOFT: + switch (sname.encoding_id) { + case TT_MS_ID_UNICODE_CS: + encoding = FcNameEncodingUtf16; + prio |= FC_NAME_PRIO_ENC_UNICODE; + break; + default: + continue; + } + switch (sname.language_id & 0xff) { + case 0x09: + prio |= FC_NAME_PRIO_LANG_ENGLISH; + break; + default: + break; + } + break; + case TT_PLATFORM_ISO: + switch (sname.encoding_id) { + case TT_ISO_ID_10646: + encoding = FcNameEncodingUtf16; + prio |= FC_NAME_PRIO_ENC_UNICODE; + break; + case TT_ISO_ID_7BIT_ASCII: + case TT_ISO_ID_8859_1: + encoding = FcNameEncodingLatin1; + break; + default: + continue; + } + break; + default: + continue; + } + + /* + * Look for family and style names + */ + switch (sname.name_id) { + case TT_NAME_ID_FONT_FAMILY: + prio |= FC_NAME_PRIO_NAME_FAMILY; + break; + case TT_NAME_ID_PS_NAME: + prio |= FC_NAME_PRIO_NAME_PS; + break; + case TT_NAME_ID_FONT_SUBFAMILY: + break; + default: + continue; + } + + src = (FcChar8 *) sname.string; + src_len = sname.string_len; + + switch (encoding) { + case FcNameEncodingUtf16: + /* + * Convert Utf16 to Utf8 + */ + + if (!FcUtf16Len (src, FcEndianBig, src_len, &len, &wchar)) + continue; + + /* + * Allocate plenty of space + */ + utf8 = malloc (len * FC_UTF8_MAX_LEN + 1); + if (!utf8) + continue; + + u8 = utf8; + + while ((ilen = FcUtf16ToUcs4 (src, FcEndianBig, &ucs4, src_len)) > 0) + { + src_len -= ilen; + src += ilen; + olen = FcUcs4ToUtf8 (ucs4, u8); + u8 += olen; + } + *u8 = '\0'; + break; + case FcNameEncodingLatin1: + /* + * Convert Latin1 to Utf8 + */ + utf8 = malloc (src_len * 2 + 1); + if (!utf8) + continue; + + u8 = utf8; + while (src_len > 0) + { + ucs4 = *src++; + src_len--; + olen = FcUcs4ToUtf8 (ucs4, u8); + u8 += olen; + } + *u8 = '\0'; + break; + case FcNameEncodingAppleRoman: + /* + * Convert AppleRoman to Utf8 + */ + map = FcFreeTypeGetPrivateMap (ft_encoding_apple_roman); + if (!map) + continue; + + utf8 = malloc (src_len * 3 + 1); + if (!utf8) + continue; + + u8 = utf8; + while (src_len > 0) + { + ucs4 = FcFreeTypePrivateToUcs4 (*src++, map); + src_len--; + olen = FcUcs4ToUtf8 (ucs4, u8); + u8 += olen; + } + *u8 = '\0'; + break; + default: + continue; + } + if ((prio & FC_NAME_PRIO_LANG) == FC_NAME_PRIO_LANG_NONE) + if (FcUtf8IsLatin (utf8, strlen ((char *) utf8))) + prio |= FC_NAME_PRIO_LANG_LATIN; + + if (FcDebug () & FC_DBG_SCANV) + printf ("\nfound name (name %d platform %d encoding %d language 0x%x prio 0x%x) %s\n", + sname.name_id, sname.platform_id, + sname.encoding_id, sname.language_id, + prio, utf8); + + switch (sname.name_id) { + case TT_NAME_ID_FONT_FAMILY: + case TT_NAME_ID_PS_NAME: + if (!family || prio > family_prio) + { + if (family) + free (family); + family = utf8; + utf8 = 0; + family_allocated = FcTrue; + family_prio = prio; + } + break; + case TT_NAME_ID_FONT_SUBFAMILY: + if (!style || prio > style_prio) + { + if (style) + free (style); + style = utf8; + utf8 = 0; + style_allocated = FcTrue; + style_prio = prio; + } + break; + } + if (utf8) + free (utf8); + } + + if (!family) + family = (FcChar8 *) face->family_name; + + if (!style) + style = (FcChar8 *) face->style_name; + if (!family) { - family = (FcChar8 *) strrchr ((char *) file, '/'); - if (family) - family++; + FcChar8 *start, *end; + + start = (FcChar8 *) strrchr ((char *) file, '/'); + if (start) + start++; else - family = file; + start = (FcChar8 *) file; + end = (FcChar8 *) strrchr ((char *) start, '.'); + if (!end) + end = start + strlen ((char *) start); + family = malloc (end - start + 1); + strncpy ((char *) family, (char *) start, end - start); + family[end - start] = '\0'; + family_allocated = FcTrue; } + + if (FcDebug() & FC_DBG_SCAN) + printf ("\"%s\" \"%s\" ", family, style ? style : (FcChar8 *) ""); + if (!FcPatternAddString (pat, FC_FAMILY, family)) + { + if (family_allocated) + free (family); + if (style_allocated) + free (style); goto bail1; + } - if (face->style_name) + if (family_allocated) + free (family); + + if (style) { - if (!FcPatternAddString (pat, FC_STYLE, (FcChar8 *) face->style_name)) + if (!FcPatternAddString (pat, FC_STYLE, style)) + { + if (style_allocated) + free (style); goto bail1; + } + if (style_allocated) + free (style); } if (!FcPatternAddString (pat, FC_FILE, file)) diff --git a/src/fcint.h b/src/fcint.h index 750b19c..a40ef88 100644 --- a/src/fcint.h +++ b/src/fcint.h @@ -278,6 +278,8 @@ struct _FcConfig { extern FcConfig *_fcConfig; +typedef struct _FcCharMap FcCharMap; + /* fcblanks.c */ /* fccache.c */ @@ -369,6 +371,15 @@ FcNameUnparseCharSet (FcStrBuf *buf, const FcCharSet *c); FcCharSet * FcNameParseCharSet (FcChar8 *string); +FcChar32 +FcFreeTypeUcs4ToPrivate (FcChar32 ucs4, const FcCharMap *map); + +FcChar32 +FcFreeTypePrivateToUcs4 (FcChar32 private, const FcCharMap *map); + +const FcCharMap * +FcFreeTypeGetPrivateMap (FT_Encoding encoding); + /* fcdbg.c */ void FcValueListPrint (FcValueList *l); diff --git a/src/fcstr.c b/src/fcstr.c index bea05d9..a62cb7b 100644 --- a/src/fcstr.c +++ b/src/fcstr.c @@ -1,5 +1,5 @@ /* - * $XFree86: xc/lib/fontconfig/src/fcstr.c,v 1.5 2002/05/29 22:07:33 keithp Exp $ + * $XFree86: xc/lib/fontconfig/src/fcstr.c,v 1.6 2002/07/06 23:47:44 keithp Exp $ * * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc. * @@ -229,6 +229,103 @@ FcUtf8Len (FcChar8 *string, return FcTrue; } +int +FcUcs4ToUtf8 (FcChar32 ucs4, + FcChar8 dest[FC_UTF8_MAX_LEN]) +{ + int bits; + FcChar8 *d = dest; + + if (ucs4 < 0x80) { *d++= ucs4; bits= -6; } + else if (ucs4 < 0x800) { *d++= ((ucs4 >> 6) & 0x1F) | 0xC0; bits= 0; } + else if (ucs4 < 0x10000) { *d++= ((ucs4 >> 12) & 0x0F) | 0xE0; bits= 6; } + else if (ucs4 < 0x200000) { *d++= ((ucs4 >> 18) & 0x07) | 0xF0; bits= 12; } + else if (ucs4 < 0x4000000) { *d++= ((ucs4 >> 24) & 0x03) | 0xF8; bits= 18; } + else if (ucs4 < 0x80000000) { *d++= ((ucs4 >> 30) & 0x01) | 0xFC; bits= 24; } + else return 0; + + for ( ; bits >= 0; bits-= 6) { + *d++= ((ucs4 >> bits) & 0x3F) | 0x80; + } + return d - dest; +} + +#define GetUtf16(src,endian) \ + ((FcChar16) ((src)[endian == FcEndianBig ? 0 : 1] << 8) | \ + (FcChar16) ((src)[endian == FcEndianBig ? 1 : 0])) + +int +FcUtf16ToUcs4 (FcChar8 *src_orig, + FcEndian endian, + FcChar32 *dst, + int len) /* in bytes */ +{ + FcChar8 *src = src_orig; + FcChar16 a, b; + FcChar32 result; + + if (len < 2) + return 0; + + a = GetUtf16 (src, endian); src += 2; len -= 2; + + /* + * Check for surrogate + */ + if ((a & 0xfc00) == 0xd800) + { + if (len < 2) + return 0; + b = GetUtf16 (src, endian); src += 2; len -= 2; + /* + * Check for invalid surrogate sequence + */ + if ((b & 0xfc00) != 0xdc00) + return 0; + result = ((((FcChar32) a & 0x3ff) << 10) | + ((FcChar32) b & 0x3ff)) | 0x10000; + } + else + result = a; + *dst = result; + return src - src_orig; +} + +FcBool +FcUtf16Len (FcChar8 *string, + FcEndian endian, + int len, /* in bytes */ + int *nchar, + int *wchar) +{ + int n; + int clen; + FcChar32 c; + FcChar32 max; + + n = 0; + max = 0; + while (len) + { + clen = FcUtf16ToUcs4 (string, endian, &c, len); + if (clen <= 0) /* malformed UTF8 string */ + return FcFalse; + if (c > max) + max = c; + string += clen; + len -= clen; + n++; + } + *nchar = n; + if (max >= 0x10000) + *wchar = 4; + else if (max > 0x100) + *wchar = 2; + else + *wchar = 1; + return FcTrue; +} + void FcStrBufInit (FcStrBuf *buf, FcChar8 *init, int size) {