Add some Utf16 support, extract font family and style names from name table

author Keith Packard <keithp@keithp.com>

Sat, 13 Jul 2002 05:43:25 +0000 (05:43 +0000)

committer Keith Packard <keithp@keithp.com>

Sat, 13 Jul 2002 05:43:25 +0000 (05:43 +0000)
author Keith Packard <keithp@keithp.com>
Sat, 13 Jul 2002 05:43:25 +0000 (05:43 +0000)
committer Keith Packard <keithp@keithp.com>
Sat, 13 Jul 2002 05:43:25 +0000 (05:43 +0000)
diff --git a/fontconfig/fontconfig.h b/fontconfig/fontconfig.h

index 060ba84b6ed0db8d2125bdcf9e3eafa70a6e29c4..1939e466b31a6eec075170d940299d74d9000bf7 100644 (file)
--- a/fontconfig/fontconfig.h
+++ b/fontconfig/fontconfig.h
@@ -1,5 +1,5 @@
  /*
- * $XFree86: xc/lib/fontconfig/fontconfig/fontconfig.h,v 1.20 2002/07/06 23:47:43 keithp Exp $
+ * $XFree86: xc/lib/fontconfig/fontconfig/fontconfig.h,v 1.21 2002/07/11 02:47:50 keithp Exp $
   *
   * Copyright © 2001 Keith Packard, member of The XFree86 Project, Inc.
   *
@@ -187,6 +187,8 @@ typedef struct _FcAtomic FcAtomic;
  #define _FCFUNCPROTOEND
  #endif
  
+typedef enum { FcEndianBig, FcEndianLittle } FcEndian;
+
  typedef struct _FcConfig    FcConfig;
  
  typedef struct _FcFileCache FcFileCache;
@@ -651,6 +653,25 @@ FcUtf8Len (FcChar8 *string,
            int          *nchar,
            int          *wchar);
  
+#define FC_UTF8_MAX_LEN        6
+
+int
+FcUcs4ToUtf8 (FcChar32 ucs4,
+             FcChar8   dest[FC_UTF8_MAX_LEN]);
+
+int
+FcUtf16ToUcs4 (FcChar8 *src_orig,
+              FcEndian endian,
+              FcChar32 *dst,
+              int      len);       /* in bytes */
+
+FcBool
+FcUtf16Len (FcChar8    *string,
+           FcEndian    endian,
+           int         len,        /* in bytes */
+           int         *nchar,
+           int         *wchar);
+
  FcChar8 *
  FcStrDirname (const FcChar8 *file);
  
diff --git a/src/fccharset.c b/src/fccharset.c

index cbd9cc8625efeeca6d583e32f1cc6d0f751013b3..c455efb613b6b9e2ef1f0ae913dd6d4db037a08d 100644 (file)
--- a/src/fccharset.c
+++ b/src/fccharset.c
@@ -1,5 +1,5 @@
  /*
- * $XFree86: xc/lib/fontconfig/src/fccharset.c,v 1.15 2002/07/06 23:47:43 keithp Exp $
+ * $XFree86: xc/lib/fontconfig/src/fccharset.c,v 1.16 2002/07/09 02:28:29 keithp Exp $
   *
   * Copyright © 2001 Keith Packard, member of The XFree86 Project, Inc.
   *
@@ -1173,10 +1173,10 @@ typedef struct _FcCharEnt {
      unsigned char   encode;
  } FcCharEnt;
  
-typedef struct _FcCharMap {
+struct _FcCharMap {
      const FcCharEnt *ent;
      int                    nent;
-} FcCharMap;
+};
  
  typedef struct _FcFontDecode {
      FT_Encoding            encoding;
@@ -1625,8 +1625,8 @@ static const FcFontDecode fcFontDecoders[] = {
  
  #define NUM_DECODE  (sizeof (fcFontDecoders) / sizeof (fcFontDecoders[0]))
  
-static FT_ULong
-FcFreeTypeMapChar (FcChar32 ucs4, const FcCharMap *map)
+FcChar32
+FcFreeTypeUcs4ToPrivate (FcChar32 ucs4, const FcCharMap *map)
  {
      int                low, high, mid;
      FcChar16   bmp;
@@ -1649,6 +1649,28 @@ FcFreeTypeMapChar (FcChar32 ucs4, const FcCharMap *map)
      return ~0;
  }
  
+FcChar32
+FcFreeTypePrivateToUcs4 (FcChar32 private, const FcCharMap *map)
+{
+    int            i;
+
+    for (i = 0; i < map->nent; i++)
+       if (map->ent[i].encode == private)
+           return (FcChar32) map->ent[i].bmp;
+    return ~0;
+}
+
+const FcCharMap *
+FcFreeTypeGetPrivateMap (FT_Encoding encoding)
+{
+    int        i;
+
+    for (i = 0; i < NUM_DECODE; i++)
+       if (fcFontDecoders[i].encoding == encoding)
+           return fcFontDecoders[i].map;
+    return 0;
+}
+
  /*
   * Map a UCS4 glyph to a glyph index.  Use all available encoding
   * tables to try and find one that works.  This information is expected
@@ -1660,7 +1682,7 @@ FcFreeTypeCharIndex (FT_Face face, FcChar32 ucs4)
  {
      int                    initial, offset, decode;
      FT_UInt        glyphindex;
-    FT_ULong       charcode;
+    FcChar32       charcode;
  
      initial = 0;
      /*
@@ -1685,13 +1707,13 @@ FcFreeTypeCharIndex (FT_Face face, FcChar32 ucs4)
                 continue;
         if (fcFontDecoders[decode].map)
         {
-           charcode = FcFreeTypeMapChar (ucs4, fcFontDecoders[decode].map);
+           charcode = FcFreeTypeUcs4ToPrivate (ucs4, fcFontDecoders[decode].map);
             if (charcode == ~0)
                 continue;
         }
         else
-           charcode = (FT_ULong) ucs4;
-       glyphindex = FT_Get_Char_Index (face, charcode);
+           charcode = ucs4;
+       glyphindex = FT_Get_Char_Index (face, (FT_ULong) charcode);
         if (glyphindex)
             return glyphindex;
      }
diff --git a/src/fcfreetype.c b/src/fcfreetype.c

index 6a7d877fe50f038df979823f229724449bdc0c17..8e0a0a5d561c438b0a75504acd44efd46794d1c4 100644 (file)
--- a/src/fcfreetype.c
+++ b/src/fcfreetype.c
@@ -1,5 +1,5 @@
  /*
- * $XFree86: xc/lib/fontconfig/src/fcfreetype.c,v 1.7 2002/07/08 07:31:53 keithp Exp $
+ * $XFree86: xc/lib/fontconfig/src/fcfreetype.c,v 1.8 2002/07/09 02:28:29 keithp Exp $
   *
   * Copyright © 2001 Keith Packard, member of The XFree86 Project, Inc.
   *
@@ -29,6 +29,8 @@
  #include <freetype/freetype.h>
  #include <freetype/internal/ftobjs.h>
  #include <freetype/tttables.h>
+#include <freetype/ftsnames.h>
+#include <freetype/ttnameid.h>
  
  /*
   * Keep Han languages separated by eliminating languages
@@ -60,6 +62,55 @@ FcFreeTypeIsExclusiveLang (const FcChar8  *lang)
      return FcFalse;
  }
  
+#define FC_NAME_PRIO_LANG          0x0f00
+#define FC_NAME_PRIO_LANG_ENGLISH   0x0200
+#define FC_NAME_PRIO_LANG_LATIN            0x0100
+#define FC_NAME_PRIO_LANG_NONE     0x0000
+
+#define FC_NAME_PRIO_ENC           0x00f0
+#define FC_NAME_PRIO_ENC_UNICODE    0x0010
+#define FC_NAME_PRIO_ENC_NONE      0x0000
+
+#define FC_NAME_PRIO_NAME          0x000f
+#define FC_NAME_PRIO_NAME_FAMILY    0x0002
+#define FC_NAME_PRIO_NAME_PS       0x0001
+#define FC_NAME_PRIO_NAME_NONE     0x0000
+
+static FcBool
+FcUcs4IsLatin (FcChar32 ucs4)
+{
+    FcChar32   page = ucs4 >> 8;
+    
+    if (page <= 2)
+       return FcTrue;
+    if (page == 0x1e)
+       return FcTrue;
+    if (0x20 <= page && page <= 0x23)
+       return FcTrue;
+    if (page == 0xfb)
+       return FcTrue;
+    if (page == 0xff)
+       return FcTrue;
+    return FcFalse;
+}
+
+static FcBool
+FcUtf8IsLatin (FcChar8 *str, int len)
+{
+    while (len)
+    {
+       FcChar32    ucs4;
+       int         clen = FcUtf8ToUcs4 (str, &ucs4, len);
+       if (clen <= 0)
+           return FcFalse;
+       if (!FcUcs4IsLatin (ucs4))
+           return FcFalse;
+       len -= clen;
+       str += clen;
+    }
+    return FcTrue;
+}
+
  FcPattern *
  FcFreeTypeQuery (const FcChar8 *file,
                  int            id,
@@ -73,9 +124,16 @@ FcFreeTypeQuery (const FcChar8      *file,
      int                    i;
      FcCharSet      *cs;
      FT_Library     ftLibrary;
-    const FcChar8   *family;
+    FcChar8        *family;
+    FcChar8        *style;
      TT_OS2         *os2;
      const FcChar8   *exclusiveLang = 0;
+    FT_SfntName            sname;
+    FT_UInt                snamei, snamec;
+    FcBool         family_allocated = FcFalse;
+    FcBool         style_allocated = FcFalse;
+    int                    family_prio = 0;
+    int                    style_prio = 0;
  
      if (FT_Init_FreeType (&ftLibrary))
         return 0;
@@ -112,22 +170,288 @@ FcFreeTypeQuery (const FcChar8   *file,
      if (!FcPatternAddInteger (pat, FC_WEIGHT, weight))
         goto bail1;
  
-    family = (FcChar8 *) face->family_name;
+    /*
+     * Grub through the name table looking for family
+     * and style names.  FreeType makes quite a hash
+     * of them
+     */
+    family = 0;
+    style = 0;
+    snamec = FT_Get_Sfnt_Name_Count (face);
+    for (snamei = 0; snamei < snamec; snamei++)
+    {
+       FcChar8         *utf8;
+       int             len;
+       int             wchar;
+       FcChar8         *src;
+       int             src_len;
+       FcChar8         *u8;
+       FcChar32        ucs4;
+       int             ilen, olen;
+       int             prio = 0;
+       
+       const FcCharMap *map;
+       enum {
+           FcNameEncodingUtf16, 
+           FcNameEncodingAppleRoman,
+           FcNameEncodingLatin1 
+       } encoding;
+       
+       
+       if (FT_Get_Sfnt_Name (face, snamei, &sname) != 0)
+           break;
+       
+       /*
+        * Look for Unicode strings
+        */
+       switch (sname.platform_id) {
+       case TT_PLATFORM_APPLE_UNICODE:
+           /*
+            * All APPLE_UNICODE encodings are Utf16 BE
+            *
+            * Because there's no language id for Unicode,
+            * assume it's English
+            */
+           prio |= FC_NAME_PRIO_LANG_ENGLISH;
+           prio |= FC_NAME_PRIO_ENC_UNICODE;
+           encoding = FcNameEncodingUtf16;
+           break;
+       case TT_PLATFORM_MACINTOSH:
+           switch (sname.encoding_id) {
+           case TT_MAC_ID_ROMAN:
+               encoding = FcNameEncodingAppleRoman;
+               break;
+           default:
+               continue;
+           }
+           switch (sname.language_id) {
+           case TT_MAC_LANGID_ENGLISH:
+               prio |= FC_NAME_PRIO_LANG_ENGLISH;
+               break;
+           default:
+               /*
+                * Sometimes Microsoft language ids
+                * end up in the macintosh table.  This
+                * is often accompanied by data in
+                * some mystic encoding.  Ignore these names
+                */
+               if (sname.language_id >= 0x100)
+                   continue;
+               break;
+           }
+           break;
+       case TT_PLATFORM_MICROSOFT:
+           switch (sname.encoding_id) {
+           case TT_MS_ID_UNICODE_CS:
+               encoding = FcNameEncodingUtf16;
+               prio |= FC_NAME_PRIO_ENC_UNICODE;
+               break;
+           default:
+               continue;
+           }
+           switch (sname.language_id & 0xff) {
+           case 0x09:
+               prio |= FC_NAME_PRIO_LANG_ENGLISH;
+               break;
+           default:
+               break;
+           }
+           break;
+       case TT_PLATFORM_ISO:
+           switch (sname.encoding_id) {
+           case TT_ISO_ID_10646:
+               encoding = FcNameEncodingUtf16;
+               prio |= FC_NAME_PRIO_ENC_UNICODE;
+               break;
+           case TT_ISO_ID_7BIT_ASCII:
+           case TT_ISO_ID_8859_1:
+               encoding = FcNameEncodingLatin1;
+               break;
+           default:
+               continue;
+           }
+           break;
+       default:
+           continue;
+       }
+       
+       /*
+        * Look for family and style names 
+        */
+       switch (sname.name_id) {
+       case TT_NAME_ID_FONT_FAMILY:
+           prio |= FC_NAME_PRIO_NAME_FAMILY;
+           break;
+       case TT_NAME_ID_PS_NAME:
+           prio |= FC_NAME_PRIO_NAME_PS;
+           break;
+       case TT_NAME_ID_FONT_SUBFAMILY:
+           break;
+       default:
+           continue;
+       }
+           
+        src = (FcChar8 *) sname.string;
+        src_len = sname.string_len;
+       
+       switch (encoding) {
+       case FcNameEncodingUtf16:
+           /*
+            * Convert Utf16 to Utf8
+            */
+           
+           if (!FcUtf16Len (src, FcEndianBig, src_len, &len, &wchar))
+               continue;
+    
+           /*
+            * Allocate plenty of space
+            */
+           utf8 = malloc (len * FC_UTF8_MAX_LEN + 1);
+           if (!utf8)
+               continue;
+               
+           u8 = utf8;
+           
+           while ((ilen = FcUtf16ToUcs4 (src, FcEndianBig, &ucs4, src_len)) > 0)
+           {
+               src_len -= ilen;
+               src += ilen;
+               olen = FcUcs4ToUtf8 (ucs4, u8);
+               u8 += olen;
+           }
+           *u8 = '\0';
+           break;
+       case FcNameEncodingLatin1:
+           /*
+            * Convert Latin1 to Utf8
+            */
+           utf8 = malloc (src_len * 2 + 1);
+           if (!utf8)
+               continue;
+
+           u8 = utf8;
+           while (src_len > 0)
+           {
+               ucs4 = *src++;
+               src_len--;
+               olen = FcUcs4ToUtf8 (ucs4, u8);
+               u8 += olen;
+           }
+           *u8 = '\0';
+           break;
+       case FcNameEncodingAppleRoman:
+           /*
+            * Convert AppleRoman to Utf8
+            */
+           map = FcFreeTypeGetPrivateMap (ft_encoding_apple_roman);
+           if (!map)
+               continue;
+
+           utf8 = malloc (src_len * 3 + 1);
+           if (!utf8)
+               continue;
+
+           u8 = utf8;
+           while (src_len > 0)
+           {
+               ucs4 = FcFreeTypePrivateToUcs4 (*src++, map);
+               src_len--;
+               olen = FcUcs4ToUtf8 (ucs4, u8);
+               u8 += olen;
+           }
+           *u8 = '\0';
+           break;
+       default:
+           continue;
+       }
+       if ((prio & FC_NAME_PRIO_LANG) == FC_NAME_PRIO_LANG_NONE)
+           if (FcUtf8IsLatin (utf8, strlen ((char *) utf8)))
+               prio |= FC_NAME_PRIO_LANG_LATIN;
+                              
+       if (FcDebug () & FC_DBG_SCANV)
+           printf ("\nfound name (name %d platform %d encoding %d language 0x%x prio 0x%x) %s\n",
+                   sname.name_id, sname.platform_id,
+                   sname.encoding_id, sname.language_id,
+                   prio, utf8);
+    
+       switch (sname.name_id) {
+       case TT_NAME_ID_FONT_FAMILY:
+       case TT_NAME_ID_PS_NAME:
+           if (!family || prio > family_prio)
+           {
+               if (family)
+                   free (family);
+               family = utf8;
+               utf8 = 0;
+               family_allocated = FcTrue;
+               family_prio = prio;
+           }
+           break;
+       case TT_NAME_ID_FONT_SUBFAMILY:
+           if (!style || prio > style_prio)
+           {
+               if (style)
+                   free (style);
+               style = utf8;
+               utf8 = 0;
+               style_allocated = FcTrue;
+               style_prio = prio;
+           }
+           break;
+       }
+       if (utf8)
+           free (utf8);
+    }
+    
+    if (!family)
+       family = (FcChar8 *) face->family_name;
+    
+    if (!style)
+       style = (FcChar8 *) face->style_name;
+    
      if (!family)
      {
-       family = (FcChar8 *) strrchr ((char *) file, '/');
-       if (family)
-           family++;
+       FcChar8 *start, *end;
+       
+       start = (FcChar8 *) strrchr ((char *) file, '/');
+       if (start)
+           start++;
         else
-           family = file;
+           start = (FcChar8 *) file;
+       end = (FcChar8 *) strrchr ((char *) start, '.');
+       if (!end)
+           end = start + strlen ((char *) start);
+       family = malloc (end - start + 1);
+       strncpy ((char *) family, (char *) start, end - start);
+       family[end - start] = '\0';
+       family_allocated = FcTrue;
      }
+
+    if (FcDebug() & FC_DBG_SCAN)
+       printf ("\"%s\" \"%s\" ", family, style ? style : (FcChar8 *) "<none>");
+
      if (!FcPatternAddString (pat, FC_FAMILY, family))
+    {
+       if (family_allocated)
+           free (family);
+       if (style_allocated)
+           free (style);
         goto bail1;
+    }
  
-    if (face->style_name)
+    if (family_allocated)
+       free (family);
+
+    if (style)
      {
-       if (!FcPatternAddString (pat, FC_STYLE, (FcChar8 *) face->style_name))
+       if (!FcPatternAddString (pat, FC_STYLE, style))
+       {
+           if (style_allocated)
+               free (style);
             goto bail1;
+       }
+       if (style_allocated)
+           free (style);
      }
  
      if (!FcPatternAddString (pat, FC_FILE, file))
diff --git a/src/fcint.h b/src/fcint.h

index 750b19ca27d9ea03569e6958aa3d04ef6c81d72f..a40ef88447ac08fd2c56d143c9ff6b462863864c 100644 (file)
--- a/src/fcint.h
+++ b/src/fcint.h
@@ -278,6 +278,8 @@ struct _FcConfig {
   
  extern FcConfig        *_fcConfig;
  
+typedef struct _FcCharMap FcCharMap;
+
  /* fcblanks.c */
  
  /* fccache.c */
@@ -369,6 +371,15 @@ FcNameUnparseCharSet (FcStrBuf *buf, const FcCharSet *c);
  FcCharSet *
  FcNameParseCharSet (FcChar8 *string);
  
+FcChar32
+FcFreeTypeUcs4ToPrivate (FcChar32 ucs4, const FcCharMap *map);
+
+FcChar32
+FcFreeTypePrivateToUcs4 (FcChar32 private, const FcCharMap *map);
+
+const FcCharMap *
+FcFreeTypeGetPrivateMap (FT_Encoding encoding);
+    
  /* fcdbg.c */
  void
  FcValueListPrint (FcValueList *l);
diff --git a/src/fcstr.c b/src/fcstr.c

index bea05d90af13ea310bc70a34e72e0e7b4405e021..a62cb7b93dc6a92a9e823d4e83439425dc038575 100644 (file)
--- a/src/fcstr.c
+++ b/src/fcstr.c
@@ -1,5 +1,5 @@
  /*
- * $XFree86: xc/lib/fontconfig/src/fcstr.c,v 1.5 2002/05/29 22:07:33 keithp Exp $
+ * $XFree86: xc/lib/fontconfig/src/fcstr.c,v 1.6 2002/07/06 23:47:44 keithp Exp $
   *
   * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
   *
@@ -229,6 +229,103 @@ FcUtf8Len (FcChar8        *string,
      return FcTrue;
  }
  
+int
+FcUcs4ToUtf8 (FcChar32 ucs4,
+             FcChar8   dest[FC_UTF8_MAX_LEN])
+{
+    int        bits;
+    FcChar8 *d = dest;
+    
+    if      (ucs4 <       0x80) {  *d++=  ucs4;                         bits= -6; }
+    else if (ucs4 <      0x800) {  *d++= ((ucs4 >>  6) & 0x1F) | 0xC0;  bits=  0; }
+    else if (ucs4 <    0x10000) {  *d++= ((ucs4 >> 12) & 0x0F) | 0xE0;  bits=  6; }
+    else if (ucs4 <   0x200000) {  *d++= ((ucs4 >> 18) & 0x07) | 0xF0;  bits= 12; }
+    else if (ucs4 <  0x4000000) {  *d++= ((ucs4 >> 24) & 0x03) | 0xF8;  bits= 18; }
+    else if (ucs4 < 0x80000000) {  *d++= ((ucs4 >> 30) & 0x01) | 0xFC;  bits= 24; }
+    else return 0;
+
+    for ( ; bits >= 0; bits-= 6) {
+       *d++= ((ucs4 >> bits) & 0x3F) | 0x80;
+    }
+    return d - dest;
+}
+
+#define GetUtf16(src,endian) \
+    ((FcChar16) ((src)[endian == FcEndianBig ? 0 : 1] << 8) | \
+     (FcChar16) ((src)[endian == FcEndianBig ? 1 : 0]))
+
+int
+FcUtf16ToUcs4 (FcChar8 *src_orig,
+              FcEndian endian,
+              FcChar32 *dst,
+              int      len)    /* in bytes */
+{
+    FcChar8    *src = src_orig;
+    FcChar16   a, b;
+    FcChar32   result;
+
+    if (len < 2)
+       return 0;
+    
+    a = GetUtf16 (src, endian); src += 2; len -= 2;
+    
+    /* 
+     * Check for surrogate 
+     */
+    if ((a & 0xfc00) == 0xd800)
+    {
+       if (len < 2)
+           return 0;
+       b = GetUtf16 (src, endian); src += 2; len -= 2;
+       /*
+        * Check for invalid surrogate sequence
+        */
+       if ((b & 0xfc00) != 0xdc00)
+           return 0;
+       result = ((((FcChar32) a & 0x3ff) << 10) |
+                 ((FcChar32) b & 0x3ff)) | 0x10000;
+    }
+    else
+       result = a;
+    *dst = result;
+    return src - src_orig;
+}
+
+FcBool
+FcUtf16Len (FcChar8    *string,
+           FcEndian    endian,
+           int         len,    /* in bytes */
+           int         *nchar,
+           int         *wchar)
+{
+    int                n;
+    int                clen;
+    FcChar32   c;
+    FcChar32   max;
+    
+    n = 0;
+    max = 0;
+    while (len)
+    {
+       clen = FcUtf16ToUcs4 (string, endian, &c, len);
+       if (clen <= 0)  /* malformed UTF8 string */
+           return FcFalse;
+       if (c > max)
+           max = c;
+       string += clen;
+       len -= clen;
+       n++;
+    }
+    *nchar = n;
+    if (max >= 0x10000)
+       *wchar = 4;
+    else if (max > 0x100)
+       *wchar = 2;
+    else
+       *wchar = 1;
+    return FcTrue;
+}
+
  void
  FcStrBufInit (FcStrBuf *buf, FcChar8 *init, int size)
  {
author	Keith Packard <keithp@keithp.com>
	Sat, 13 Jul 2002 05:43:25 +0000 (05:43 +0000)
committer	Keith Packard <keithp@keithp.com>
	Sat, 13 Jul 2002 05:43:25 +0000 (05:43 +0000)
fontconfig/fontconfig.h		patch \| blob \| blame \| history
src/fccharset.c		patch \| blob \| blame \| history
src/fcfreetype.c		patch \| blob \| blame \| history
src/fcint.h		patch \| blob \| blame \| history
src/fcstr.c		patch \| blob \| blame \| history