From: Keith Packard <keithp@keithp.com>
Date: Sat, 13 Jul 2002 05:43:25 +0000 (+0000)
Subject: Add some Utf16 support, extract font family and style names from name table
X-Git-Tag: fcpackage_rc1~5
X-Git-Url: https://git.wh0rd.org/?p=fontconfig.git;a=commitdiff_plain;h=69937bd9416eb3fbefd55b9fa6445d0fe4b4f7f3

Add some Utf16 support, extract font family and style names from name table
    for sfnt fonts
---

diff --git a/fontconfig/fontconfig.h b/fontconfig/fontconfig.h
index 060ba84..1939e46 100644
--- a/fontconfig/fontconfig.h
+++ b/fontconfig/fontconfig.h
@@ -1,5 +1,5 @@
 /*
- * $XFree86: xc/lib/fontconfig/fontconfig/fontconfig.h,v 1.20 2002/07/06 23:47:43 keithp Exp $
+ * $XFree86: xc/lib/fontconfig/fontconfig/fontconfig.h,v 1.21 2002/07/11 02:47:50 keithp Exp $
  *
  * Copyright © 2001 Keith Packard, member of The XFree86 Project, Inc.
  *
@@ -187,6 +187,8 @@ typedef struct _FcAtomic FcAtomic;
 #define _FCFUNCPROTOEND
 #endif
 
+typedef enum { FcEndianBig, FcEndianLittle } FcEndian;
+
 typedef struct _FcConfig    FcConfig;
 
 typedef struct _FcFileCache FcFileCache;
@@ -651,6 +653,25 @@ FcUtf8Len (FcChar8	*string,
 	   int		*nchar,
 	   int		*wchar);
 
+#define FC_UTF8_MAX_LEN	6
+
+int
+FcUcs4ToUtf8 (FcChar32	ucs4,
+	      FcChar8	dest[FC_UTF8_MAX_LEN]);
+
+int
+FcUtf16ToUcs4 (FcChar8	*src_orig,
+	       FcEndian	endian,
+	       FcChar32 *dst,
+	       int	len);	    /* in bytes */
+
+FcBool
+FcUtf16Len (FcChar8	*string,
+	    FcEndian	endian,
+	    int		len,	    /* in bytes */
+	    int		*nchar,
+	    int		*wchar);
+
 FcChar8 *
 FcStrDirname (const FcChar8 *file);
 
diff --git a/src/fccharset.c b/src/fccharset.c
index cbd9cc8..c455efb 100644
--- a/src/fccharset.c
+++ b/src/fccharset.c
@@ -1,5 +1,5 @@
 /*
- * $XFree86: xc/lib/fontconfig/src/fccharset.c,v 1.15 2002/07/06 23:47:43 keithp Exp $
+ * $XFree86: xc/lib/fontconfig/src/fccharset.c,v 1.16 2002/07/09 02:28:29 keithp Exp $
  *
  * Copyright © 2001 Keith Packard, member of The XFree86 Project, Inc.
  *
@@ -1173,10 +1173,10 @@ typedef struct _FcCharEnt {
     unsigned char   encode;
 } FcCharEnt;
 
-typedef struct _FcCharMap {
+struct _FcCharMap {
     const FcCharEnt *ent;
     int		    nent;
-} FcCharMap;
+};
 
 typedef struct _FcFontDecode {
     FT_Encoding	    encoding;
@@ -1625,8 +1625,8 @@ static const FcFontDecode fcFontDecoders[] = {
 
 #define NUM_DECODE  (sizeof (fcFontDecoders) / sizeof (fcFontDecoders[0]))
 
-static FT_ULong
-FcFreeTypeMapChar (FcChar32 ucs4, const FcCharMap *map)
+FcChar32
+FcFreeTypeUcs4ToPrivate (FcChar32 ucs4, const FcCharMap *map)
 {
     int		low, high, mid;
     FcChar16	bmp;
@@ -1649,6 +1649,28 @@ FcFreeTypeMapChar (FcChar32 ucs4, const FcCharMap *map)
     return ~0;
 }
 
+FcChar32
+FcFreeTypePrivateToUcs4 (FcChar32 private, const FcCharMap *map)
+{
+    int	    i;
+
+    for (i = 0; i < map->nent; i++)
+	if (map->ent[i].encode == private)
+	    return (FcChar32) map->ent[i].bmp;
+    return ~0;
+}
+
+const FcCharMap *
+FcFreeTypeGetPrivateMap (FT_Encoding encoding)
+{
+    int	i;
+
+    for (i = 0; i < NUM_DECODE; i++)
+	if (fcFontDecoders[i].encoding == encoding)
+	    return fcFontDecoders[i].map;
+    return 0;
+}
+
 /*
  * Map a UCS4 glyph to a glyph index.  Use all available encoding
  * tables to try and find one that works.  This information is expected
@@ -1660,7 +1682,7 @@ FcFreeTypeCharIndex (FT_Face face, FcChar32 ucs4)
 {
     int		    initial, offset, decode;
     FT_UInt	    glyphindex;
-    FT_ULong	    charcode;
+    FcChar32	    charcode;
 
     initial = 0;
     /*
@@ -1685,13 +1707,13 @@ FcFreeTypeCharIndex (FT_Face face, FcChar32 ucs4)
 		continue;
 	if (fcFontDecoders[decode].map)
 	{
-	    charcode = FcFreeTypeMapChar (ucs4, fcFontDecoders[decode].map);
+	    charcode = FcFreeTypeUcs4ToPrivate (ucs4, fcFontDecoders[decode].map);
 	    if (charcode == ~0)
 		continue;
 	}
 	else
-	    charcode = (FT_ULong) ucs4;
-	glyphindex = FT_Get_Char_Index (face, charcode);
+	    charcode = ucs4;
+	glyphindex = FT_Get_Char_Index (face, (FT_ULong) charcode);
 	if (glyphindex)
 	    return glyphindex;
     }
diff --git a/src/fcfreetype.c b/src/fcfreetype.c
index 6a7d877..8e0a0a5 100644
--- a/src/fcfreetype.c
+++ b/src/fcfreetype.c
@@ -1,5 +1,5 @@
 /*
- * $XFree86: xc/lib/fontconfig/src/fcfreetype.c,v 1.7 2002/07/08 07:31:53 keithp Exp $
+ * $XFree86: xc/lib/fontconfig/src/fcfreetype.c,v 1.8 2002/07/09 02:28:29 keithp Exp $
  *
  * Copyright © 2001 Keith Packard, member of The XFree86 Project, Inc.
  *
@@ -29,6 +29,8 @@
 #include <freetype/freetype.h>
 #include <freetype/internal/ftobjs.h>
 #include <freetype/tttables.h>
+#include <freetype/ftsnames.h>
+#include <freetype/ttnameid.h>
 
 /*
  * Keep Han languages separated by eliminating languages
@@ -60,6 +62,55 @@ FcFreeTypeIsExclusiveLang (const FcChar8  *lang)
     return FcFalse;
 }
 
+#define FC_NAME_PRIO_LANG	    0x0f00
+#define FC_NAME_PRIO_LANG_ENGLISH   0x0200
+#define FC_NAME_PRIO_LANG_LATIN	    0x0100
+#define FC_NAME_PRIO_LANG_NONE	    0x0000
+
+#define FC_NAME_PRIO_ENC	    0x00f0
+#define FC_NAME_PRIO_ENC_UNICODE    0x0010
+#define FC_NAME_PRIO_ENC_NONE	    0x0000
+
+#define FC_NAME_PRIO_NAME	    0x000f
+#define FC_NAME_PRIO_NAME_FAMILY    0x0002
+#define FC_NAME_PRIO_NAME_PS	    0x0001
+#define FC_NAME_PRIO_NAME_NONE	    0x0000
+
+static FcBool
+FcUcs4IsLatin (FcChar32 ucs4)
+{
+    FcChar32	page = ucs4 >> 8;
+    
+    if (page <= 2)
+	return FcTrue;
+    if (page == 0x1e)
+	return FcTrue;
+    if (0x20 <= page && page <= 0x23)
+	return FcTrue;
+    if (page == 0xfb)
+	return FcTrue;
+    if (page == 0xff)
+	return FcTrue;
+    return FcFalse;
+}
+
+static FcBool
+FcUtf8IsLatin (FcChar8 *str, int len)
+{
+    while (len)
+    {
+	FcChar32    ucs4;
+	int	    clen = FcUtf8ToUcs4 (str, &ucs4, len);
+	if (clen <= 0)
+	    return FcFalse;
+	if (!FcUcs4IsLatin (ucs4))
+	    return FcFalse;
+	len -= clen;
+	str += clen;
+    }
+    return FcTrue;
+}
+
 FcPattern *
 FcFreeTypeQuery (const FcChar8	*file,
 		 int		id,
@@ -73,9 +124,16 @@ FcFreeTypeQuery (const FcChar8	*file,
     int		    i;
     FcCharSet	    *cs;
     FT_Library	    ftLibrary;
-    const FcChar8   *family;
+    FcChar8	    *family;
+    FcChar8	    *style;
     TT_OS2	    *os2;
     const FcChar8   *exclusiveLang = 0;
+    FT_SfntName	    sname;
+    FT_UInt    	    snamei, snamec;
+    FcBool	    family_allocated = FcFalse;
+    FcBool	    style_allocated = FcFalse;
+    int		    family_prio = 0;
+    int		    style_prio = 0;
 
     if (FT_Init_FreeType (&ftLibrary))
 	return 0;
@@ -112,22 +170,288 @@ FcFreeTypeQuery (const FcChar8	*file,
     if (!FcPatternAddInteger (pat, FC_WEIGHT, weight))
 	goto bail1;
 
-    family = (FcChar8 *) face->family_name;
+    /*
+     * Grub through the name table looking for family
+     * and style names.  FreeType makes quite a hash
+     * of them
+     */
+    family = 0;
+    style = 0;
+    snamec = FT_Get_Sfnt_Name_Count (face);
+    for (snamei = 0; snamei < snamec; snamei++)
+    {
+	FcChar8		*utf8;
+	int		len;
+	int		wchar;
+	FcChar8		*src;
+	int		src_len;
+	FcChar8		*u8;
+	FcChar32	ucs4;
+	int		ilen, olen;
+	int		prio = 0;
+	
+	const FcCharMap	*map;
+	enum {
+	    FcNameEncodingUtf16, 
+	    FcNameEncodingAppleRoman,
+	    FcNameEncodingLatin1 
+	} encoding;
+	
+	
+	if (FT_Get_Sfnt_Name (face, snamei, &sname) != 0)
+	    break;
+	
+	/*
+	 * Look for Unicode strings
+	 */
+	switch (sname.platform_id) {
+	case TT_PLATFORM_APPLE_UNICODE:
+	    /*
+	     * All APPLE_UNICODE encodings are Utf16 BE
+	     *
+	     * Because there's no language id for Unicode,
+	     * assume it's English
+	     */
+	    prio |= FC_NAME_PRIO_LANG_ENGLISH;
+	    prio |= FC_NAME_PRIO_ENC_UNICODE;
+	    encoding = FcNameEncodingUtf16;
+	    break;
+	case TT_PLATFORM_MACINTOSH:
+	    switch (sname.encoding_id) {
+	    case TT_MAC_ID_ROMAN:
+		encoding = FcNameEncodingAppleRoman;
+		break;
+	    default:
+		continue;
+	    }
+	    switch (sname.language_id) {
+	    case TT_MAC_LANGID_ENGLISH:
+		prio |= FC_NAME_PRIO_LANG_ENGLISH;
+		break;
+	    default:
+		/*
+		 * Sometimes Microsoft language ids
+		 * end up in the macintosh table.  This
+		 * is often accompanied by data in
+		 * some mystic encoding.  Ignore these names
+		 */
+		if (sname.language_id >= 0x100)
+		    continue;
+		break;
+	    }
+	    break;
+	case TT_PLATFORM_MICROSOFT:
+	    switch (sname.encoding_id) {
+	    case TT_MS_ID_UNICODE_CS:
+		encoding = FcNameEncodingUtf16;
+		prio |= FC_NAME_PRIO_ENC_UNICODE;
+		break;
+	    default:
+		continue;
+	    }
+	    switch (sname.language_id & 0xff) {
+	    case 0x09:
+		prio |= FC_NAME_PRIO_LANG_ENGLISH;
+		break;
+	    default:
+		break;
+	    }
+	    break;
+	case TT_PLATFORM_ISO:
+	    switch (sname.encoding_id) {
+	    case TT_ISO_ID_10646:
+		encoding = FcNameEncodingUtf16;
+		prio |= FC_NAME_PRIO_ENC_UNICODE;
+		break;
+	    case TT_ISO_ID_7BIT_ASCII:
+	    case TT_ISO_ID_8859_1:
+		encoding = FcNameEncodingLatin1;
+		break;
+	    default:
+		continue;
+	    }
+	    break;
+	default:
+	    continue;
+	}
+	
+	/*
+	 * Look for family and style names 
+	 */
+	switch (sname.name_id) {
+	case TT_NAME_ID_FONT_FAMILY:
+	    prio |= FC_NAME_PRIO_NAME_FAMILY;
+	    break;
+	case TT_NAME_ID_PS_NAME:
+	    prio |= FC_NAME_PRIO_NAME_PS;
+	    break;
+	case TT_NAME_ID_FONT_SUBFAMILY:
+	    break;
+	default:
+	    continue;
+	}
+	    
+        src = (FcChar8 *) sname.string;
+        src_len = sname.string_len;
+	
+	switch (encoding) {
+	case FcNameEncodingUtf16:
+	    /*
+	     * Convert Utf16 to Utf8
+	     */
+	    
+	    if (!FcUtf16Len (src, FcEndianBig, src_len, &len, &wchar))
+		continue;
+    
+	    /*
+	     * Allocate plenty of space
+	     */
+	    utf8 = malloc (len * FC_UTF8_MAX_LEN + 1);
+	    if (!utf8)
+		continue;
+		
+	    u8 = utf8;
+	    
+	    while ((ilen = FcUtf16ToUcs4 (src, FcEndianBig, &ucs4, src_len)) > 0)
+	    {
+		src_len -= ilen;
+		src += ilen;
+		olen = FcUcs4ToUtf8 (ucs4, u8);
+		u8 += olen;
+	    }
+	    *u8 = '\0';
+	    break;
+	case FcNameEncodingLatin1:
+	    /*
+	     * Convert Latin1 to Utf8
+	     */
+	    utf8 = malloc (src_len * 2 + 1);
+	    if (!utf8)
+		continue;
+
+	    u8 = utf8;
+	    while (src_len > 0)
+	    {
+		ucs4 = *src++;
+		src_len--;
+		olen = FcUcs4ToUtf8 (ucs4, u8);
+		u8 += olen;
+	    }
+	    *u8 = '\0';
+	    break;
+	case FcNameEncodingAppleRoman:
+	    /*
+	     * Convert AppleRoman to Utf8
+	     */
+	    map = FcFreeTypeGetPrivateMap (ft_encoding_apple_roman);
+	    if (!map)
+		continue;
+
+	    utf8 = malloc (src_len * 3 + 1);
+	    if (!utf8)
+		continue;
+
+	    u8 = utf8;
+	    while (src_len > 0)
+	    {
+		ucs4 = FcFreeTypePrivateToUcs4 (*src++, map);
+		src_len--;
+		olen = FcUcs4ToUtf8 (ucs4, u8);
+		u8 += olen;
+	    }
+	    *u8 = '\0';
+	    break;
+	default:
+	    continue;
+	}
+	if ((prio & FC_NAME_PRIO_LANG) == FC_NAME_PRIO_LANG_NONE)
+	    if (FcUtf8IsLatin (utf8, strlen ((char *) utf8)))
+		prio |= FC_NAME_PRIO_LANG_LATIN;
+			       
+	if (FcDebug () & FC_DBG_SCANV)
+	    printf ("\nfound name (name %d platform %d encoding %d language 0x%x prio 0x%x) %s\n",
+		    sname.name_id, sname.platform_id,
+		    sname.encoding_id, sname.language_id,
+		    prio, utf8);
+    
+	switch (sname.name_id) {
+	case TT_NAME_ID_FONT_FAMILY:
+	case TT_NAME_ID_PS_NAME:
+	    if (!family || prio > family_prio)
+	    {
+		if (family)
+		    free (family);
+		family = utf8;
+		utf8 = 0;
+		family_allocated = FcTrue;
+		family_prio = prio;
+	    }
+	    break;
+	case TT_NAME_ID_FONT_SUBFAMILY:
+	    if (!style || prio > style_prio)
+	    {
+		if (style)
+		    free (style);
+		style = utf8;
+		utf8 = 0;
+		style_allocated = FcTrue;
+		style_prio = prio;
+	    }
+	    break;
+	}
+	if (utf8)
+	    free (utf8);
+    }
+    
+    if (!family)
+	family = (FcChar8 *) face->family_name;
+    
+    if (!style)
+	style = (FcChar8 *) face->style_name;
+    
     if (!family)
     {
-	family = (FcChar8 *) strrchr ((char *) file, '/');
-	if (family)
-	    family++;
+	FcChar8	*start, *end;
+	
+	start = (FcChar8 *) strrchr ((char *) file, '/');
+	if (start)
+	    start++;
 	else
-	    family = file;
+	    start = (FcChar8 *) file;
+	end = (FcChar8 *) strrchr ((char *) start, '.');
+	if (!end)
+	    end = start + strlen ((char *) start);
+	family = malloc (end - start + 1);
+	strncpy ((char *) family, (char *) start, end - start);
+	family[end - start] = '\0';
+	family_allocated = FcTrue;
     }
+
+    if (FcDebug() & FC_DBG_SCAN)
+	printf ("\"%s\" \"%s\" ", family, style ? style : (FcChar8 *) "<none>");
+
     if (!FcPatternAddString (pat, FC_FAMILY, family))
+    {
+	if (family_allocated)
+	    free (family);
+	if (style_allocated)
+	    free (style);
 	goto bail1;
+    }
 
-    if (face->style_name)
+    if (family_allocated)
+	free (family);
+
+    if (style)
     {
-	if (!FcPatternAddString (pat, FC_STYLE, (FcChar8 *) face->style_name))
+	if (!FcPatternAddString (pat, FC_STYLE, style))
+	{
+	    if (style_allocated)
+		free (style);
 	    goto bail1;
+	}
+	if (style_allocated)
+	    free (style);
     }
 
     if (!FcPatternAddString (pat, FC_FILE, file))
diff --git a/src/fcint.h b/src/fcint.h
index 750b19c..a40ef88 100644
--- a/src/fcint.h
+++ b/src/fcint.h
@@ -278,6 +278,8 @@ struct _FcConfig {
  
 extern FcConfig	*_fcConfig;
 
+typedef struct _FcCharMap FcCharMap;
+
 /* fcblanks.c */
 
 /* fccache.c */
@@ -369,6 +371,15 @@ FcNameUnparseCharSet (FcStrBuf *buf, const FcCharSet *c);
 FcCharSet *
 FcNameParseCharSet (FcChar8 *string);
 
+FcChar32
+FcFreeTypeUcs4ToPrivate (FcChar32 ucs4, const FcCharMap *map);
+
+FcChar32
+FcFreeTypePrivateToUcs4 (FcChar32 private, const FcCharMap *map);
+
+const FcCharMap *
+FcFreeTypeGetPrivateMap (FT_Encoding encoding);
+    
 /* fcdbg.c */
 void
 FcValueListPrint (FcValueList *l);
diff --git a/src/fcstr.c b/src/fcstr.c
index bea05d9..a62cb7b 100644
--- a/src/fcstr.c
+++ b/src/fcstr.c
@@ -1,5 +1,5 @@
 /*
- * $XFree86: xc/lib/fontconfig/src/fcstr.c,v 1.5 2002/05/29 22:07:33 keithp Exp $
+ * $XFree86: xc/lib/fontconfig/src/fcstr.c,v 1.6 2002/07/06 23:47:44 keithp Exp $
  *
  * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
  *
@@ -229,6 +229,103 @@ FcUtf8Len (FcChar8	*string,
     return FcTrue;
 }
 
+int
+FcUcs4ToUtf8 (FcChar32	ucs4,
+	      FcChar8	dest[FC_UTF8_MAX_LEN])
+{
+    int	bits;
+    FcChar8 *d = dest;
+    
+    if      (ucs4 <       0x80) {  *d++=  ucs4;                         bits= -6; }
+    else if (ucs4 <      0x800) {  *d++= ((ucs4 >>  6) & 0x1F) | 0xC0;  bits=  0; }
+    else if (ucs4 <    0x10000) {  *d++= ((ucs4 >> 12) & 0x0F) | 0xE0;  bits=  6; }
+    else if (ucs4 <   0x200000) {  *d++= ((ucs4 >> 18) & 0x07) | 0xF0;  bits= 12; }
+    else if (ucs4 <  0x4000000) {  *d++= ((ucs4 >> 24) & 0x03) | 0xF8;  bits= 18; }
+    else if (ucs4 < 0x80000000) {  *d++= ((ucs4 >> 30) & 0x01) | 0xFC;  bits= 24; }
+    else return 0;
+
+    for ( ; bits >= 0; bits-= 6) {
+	*d++= ((ucs4 >> bits) & 0x3F) | 0x80;
+    }
+    return d - dest;
+}
+
+#define GetUtf16(src,endian) \
+    ((FcChar16) ((src)[endian == FcEndianBig ? 0 : 1] << 8) | \
+     (FcChar16) ((src)[endian == FcEndianBig ? 1 : 0]))
+
+int
+FcUtf16ToUcs4 (FcChar8	*src_orig,
+	       FcEndian	endian,
+	       FcChar32	*dst,
+	       int	len)	/* in bytes */
+{
+    FcChar8	*src = src_orig;
+    FcChar16	a, b;
+    FcChar32	result;
+
+    if (len < 2)
+	return 0;
+    
+    a = GetUtf16 (src, endian); src += 2; len -= 2;
+    
+    /* 
+     * Check for surrogate 
+     */
+    if ((a & 0xfc00) == 0xd800)
+    {
+	if (len < 2)
+	    return 0;
+	b = GetUtf16 (src, endian); src += 2; len -= 2;
+	/*
+	 * Check for invalid surrogate sequence
+	 */
+	if ((b & 0xfc00) != 0xdc00)
+	    return 0;
+	result = ((((FcChar32) a & 0x3ff) << 10) |
+		  ((FcChar32) b & 0x3ff)) | 0x10000;
+    }
+    else
+	result = a;
+    *dst = result;
+    return src - src_orig;
+}
+
+FcBool
+FcUtf16Len (FcChar8	*string,
+	    FcEndian	endian,
+	    int		len,	/* in bytes */
+	    int		*nchar,
+	    int		*wchar)
+{
+    int		n;
+    int		clen;
+    FcChar32	c;
+    FcChar32	max;
+    
+    n = 0;
+    max = 0;
+    while (len)
+    {
+	clen = FcUtf16ToUcs4 (string, endian, &c, len);
+	if (clen <= 0)	/* malformed UTF8 string */
+	    return FcFalse;
+	if (c > max)
+	    max = c;
+	string += clen;
+	len -= clen;
+	n++;
+    }
+    *nchar = n;
+    if (max >= 0x10000)
+	*wchar = 4;
+    else if (max > 0x100)
+	*wchar = 2;
+    else
+	*wchar = 1;
+    return FcTrue;
+}
+
 void
 FcStrBufInit (FcStrBuf *buf, FcChar8 *init, int size)
 {