Switch fc-match to use FcPatternFormat()

[fontconfig.git] / src / fcformat.c
diff --git a/src/fcformat.c b/src/fcformat.c

index 44730094c85a788b030cac1a427b4a75ccaf453c..4a850ee0944df487d95a77417b1615baba70f744 100644 (file)
--- a/src/fcformat.c
+++ b/src/fcformat.c
@@ -7,15 +7,15 @@
   * documentation for any purpose is hereby granted without fee, provided that
   * the above copyright notice appear in all copies and that both that
   * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Keith Packard not be used in
+ * documentation, and that the name of the author(s) not be used in
   * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission.  Keith Packard makes no
+ * specific, written prior permission.  The authors make no
   * representations about the suitability of this software for any purpose.  It
   * is provided "as is" without express or implied warranty.
   *
- * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * THE AUTHOR(S) DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
   * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
- * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY SPECIAL, INDIRECT OR
   * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
   * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
   * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
@@ -28,14 +28,57 @@
  #include <stdarg.h>
  
  
-/*
+/* The language is documented in doc/fcformat.fncs
+ * These are the features implemented:
+ *
+ * simple      %{elt}
+ * width       %width{elt}
+ * index       %{elt[idx]}
+ * name=       %{elt=}
+ * :name=      %{:elt}
+ * default     %{elt:-word}
+ * count       %{#elt}
+ * subexpr     %{{expr}}
+ * filter-out  %{-elt1,elt2,elt3{expr}}
+ * filter-in   %{+elt1,elt2,elt3{expr}}
+ * conditional %{?elt1,elt2,!elt3{}{}}
+ * enumerate   %{[]elt1,elt2{expr}}
+ * langset     langset enumeration using the same syntax
+ * builtin     %{=blt}
+ * convert     %{elt|conv1|conv2|conv3}
+ *
+ * converters:
+ * basename    FcStrBasename
+ * dirname     FcStrDirname
+ * downcase    FcStrDowncase
+ * shescape
+ * cescape
+ * xmlescape
+ * delete      delete chars
+ * escape      escape chars
+ * translate   translate chars
+ *
+ * builtins:
+ * unparse     FcNameUnparse
+ * fcmatch     fc-match default
+ * fclist      fc-list default
+ * pkgkit      PackageKit package tag format
+ *
+ *
   * Some ideas for future syntax extensions:
   *
+ * - verbose builtin that is like FcPatternPrint
   * - allow indexing subexprs using '%{[idx]elt1,elt2{subexpr}}'
- * - allow indexing simple tags using '%{elt[idx]}'
+ * - allow indexing in +, -, ? filtering?
   * - conditional/filtering/deletion on binding (using '(w)'/'(s)'/'(=)' notation)
   */
  
+
+#define FCMATCH_FORMAT "%{file:-<unknown filename>|basename}: \"%{family[0]:-<unknown family>}\" \"%{style[0]:-<unknown style>}\""
+#define FCLIST_FORMAT  "%{?file{%{file}: }}%{-file{%{=unparse}}}"
+#define PKGKIT_FORMAT  "%{[]family{font(%{family|downcase|delete( )})\n}}%{[]lang{font(:lang=%{lang|downcase|translate(_,-)})\n}}"
+
+
  static void
  message (const char *fmt, ...)
  {
@@ -216,30 +259,63 @@ read_chars (FcFormatContext *c,
      return FcTrue;
  }
  
+static FcBool
+FcPatternFormatToBuf (FcPattern     *pat,
+                     const FcChar8 *format,
+                     FcStrBuf      *buf);
+
  static FcBool
  interpret_builtin (FcFormatContext *c,
                    FcPattern       *pat,
                    FcStrBuf        *buf)
  {
-    if (!expect_char (c, '='))
-       return FcFalse;
+    FcChar8       *new_str;
+    FcBool         ret;
  
-    if (!read_word (c))
+    if (!expect_char (c, '=') ||
+       !read_word (c))
         return FcFalse;
+
+    /* try simple builtins first */
+    if (0) { }
  #define BUILTIN(name, func) \
      else if (0 == strcmp ((const char *) c->word, name))\
-       return func (c, pat, buf)
-#if 0
-    BUILTIN  ("unparse",  FcNameUnparse);
-    BUILTIN  ("verbose",  FcPatternPrint);
-    BUILTIN2 ("fcmatch",  FcStrDirname);
-    BUILTIN2 ("fclist",   FcStrDirname);
-    BUILTIN2 ("pkgkit",   FcStrDirname);
-#endif
-
-    message ("unknown builtin \"%s\"",
-            c->word);
-    return FcFalse;
+       do { new_str = func (pat); ret = FcTrue; } while (0)
+    BUILTIN ("unparse",  FcNameUnparse);
+ /* BUILTIN ("verbose",  FcPatternPrint); XXX */
+#undef BUILTIN
+    else
+       ret = FcFalse;
+
+    if (ret)
+    {
+       if (new_str)
+       {
+           FcStrBufString (buf, new_str);
+           free (new_str);
+           return FcTrue;
+       }
+       else
+           return FcFalse;
+    }
+
+    /* now try our custom formats */
+    if (0) { }
+#define BUILTIN(name, format) \
+    else if (0 == strcmp ((const char *) c->word, name))\
+       ret = FcPatternFormatToBuf (pat, (const FcChar8 *) format, buf)
+    BUILTIN ("fcmatch",  FCMATCH_FORMAT);
+    BUILTIN ("fclist",   FCLIST_FORMAT);
+    BUILTIN ("pkgkit",   PKGKIT_FORMAT);
+#undef BUILTIN
+    else
+       ret = FcFalse;
+
+    if (!ret)
+       message ("unknown builtin \"%s\"",
+                c->word);
+
+    return ret;
  }
  
  static FcBool
@@ -345,9 +421,9 @@ maybe_skip_subexpr (FcFormatContext *c)
  }
  
  static FcBool
-interpret_filter (FcFormatContext *c,
-                 FcPattern       *pat,
-                 FcStrBuf        *buf)
+interpret_filter_in (FcFormatContext *c,
+                    FcPattern       *pat,
+                    FcStrBuf        *buf)
  {
      FcObjectSet  *os;
      FcPattern    *subpat;
@@ -361,6 +437,7 @@ interpret_filter (FcFormatContext *c,
  
      do
      {
+       /* XXX binding */
         if (!read_word (c) ||
             !FcObjectSetAdd (os, (const char *) c->word))
         {
@@ -382,9 +459,9 @@ interpret_filter (FcFormatContext *c,
  }
  
  static FcBool
-interpret_delete (FcFormatContext *c,
-                 FcPattern       *pat,
-                 FcStrBuf        *buf)
+interpret_filter_out (FcFormatContext *c,
+                     FcPattern       *pat,
+                     FcStrBuf        *buf)
  {
      FcPattern    *subpat;
  
@@ -438,9 +515,8 @@ interpret_cond (FcFormatContext *c,
  
         pass = pass &&
                (negate ^
-               (FcResultMatch == FcPatternGet (pat,
-                                               (const char *) c->word,
-                                               0, &v)));
+               (FcResultMatch ==
+                FcPatternGet (pat, (const char *) c->word, 0, &v)));
      }
      while (consume_char (c, ','));
  
@@ -494,6 +570,124 @@ interpret_count (FcFormatContext *c,
      return FcTrue;
  }
  
+static FcBool
+interpret_enumerate (FcFormatContext *c,
+                    FcPattern       *pat,
+                    FcStrBuf        *buf)
+{
+    FcObjectSet   *os;
+    FcPattern     *subpat;
+    const FcChar8 *format_save;
+    int            idx;
+    FcBool         ret, done;
+    FcStrList      *lang_strs;
+
+    if (!expect_char (c, '[') ||
+       !expect_char (c, ']'))
+       return FcFalse;
+
+    os = FcObjectSetCreate ();
+    if (!os)
+       return FcFalse;
+
+    ret = FcTrue;
+
+    do
+    {
+       if (!read_word (c) ||
+           !FcObjectSetAdd (os, (const char *) c->word))
+       {
+           FcObjectSetDestroy (os);
+           return FcFalse;
+       }
+    }
+    while (consume_char (c, ','));
+
+    /* If we have one element and it's of type FcLangSet, we want
+     * to enumerate the languages in it. */
+    lang_strs = NULL;
+    if (os->nobject == 1)
+    {
+       FcLangSet *langset;
+       if (FcResultMatch ==
+           FcPatternGetLangSet (pat, os->objects[0], 0, &langset))
+       {
+           FcStrSet *ss;
+           if (!(ss = FcLangSetGetLangs (langset)) ||
+               !(lang_strs = FcStrListCreate (ss)))
+               goto bail0;
+       }
+    }
+
+    subpat = FcPatternDuplicate (pat);
+    if (!subpat)
+       goto bail0;
+
+    format_save = c->format;
+    idx = 0;
+    do
+    {
+       int i;
+
+       done = FcTrue;
+
+       if (lang_strs)
+       {
+           FcChar8 *lang;
+
+           FcPatternDel (subpat, os->objects[0]);
+           if ((lang = FcStrListNext (lang_strs)))
+           {
+               /* XXX binding? */
+               FcPatternAddString (subpat, os->objects[0], lang);
+               done = FcFalse;
+           }
+       }
+       else
+       {
+           for (i = 0; i < os->nobject; i++)
+           {
+               FcValue v;
+
+               /* XXX this can be optimized by accessing valuelist linked lists
+                * directly and remembering where we were.  Most (all) value lists
+                * in normal uses are pretty short though (language tags are
+                * stored as a LangSet, not separate values.). */
+               FcPatternDel (subpat, os->objects[i]);
+               if (FcResultMatch ==
+                   FcPatternGet (pat, os->objects[i], idx, &v))
+               {
+                   /* XXX binding */
+                   FcPatternAdd (subpat, os->objects[i], v, FcFalse);
+                   done = FcFalse;
+               }
+           }
+       }
+
+       if (!done)
+       {
+           c->format = format_save;
+           ret = interpret_subexpr (c, subpat, buf);
+           if (!ret)
+               goto bail;
+       }
+
+       idx++;
+    } while (!done);
+
+    if (c->format == format_save)
+       skip_subexpr (c);
+
+bail:
+    FcPatternDestroy (subpat);
+bail0:
+    if (lang_strs)
+       FcStrListDone (lang_strs);
+    FcObjectSetDestroy (os);
+
+    return ret;
+}
+
  static FcBool
  interpret_simple (FcFormatContext *c,
                   FcPattern       *pat,
@@ -502,6 +696,8 @@ interpret_simple (FcFormatContext *c,
      FcPatternElt *e;
      FcBool        add_colon = FcFalse;
      FcBool        add_elt_name = FcFalse;
+    int           idx;
+    FcChar8      *else_string;
  
      if (consume_char (c, ':'))
         add_colon = FcTrue;
@@ -509,14 +705,47 @@ interpret_simple (FcFormatContext *c,
      if (!read_word (c))
         return FcFalse;
  
+    idx = -1;
+    if (consume_char (c, '['))
+    {
+       idx = strtol ((const char *) c->format, (char **) &c->format, 10);
+       if (idx < 0)
+       {
+           message ("expected non-negative number at %d",
+                    c->format-1 - c->format_orig + 1);
+           return FcFalse;
+       }
+       if (!expect_char (c, ']'))
+           return FcFalse;
+    }
+
      if (consume_char (c, '='))
         add_elt_name = FcTrue;
  
+    /* modifiers */
+    else_string = NULL;
+    if (consume_char (c, ':'))
+    {
+       FcChar8 *orig;
+       /* divert the c->word for now */
+       orig = c->word;
+       c->word = c->word + strlen ((const char *) c->word) + 1;
+       /* for now we just support 'default value' */
+       if (!expect_char (c, '-') ||
+           !read_chars (c, '|'))
+       {
+           c->word = orig;
+           return FcFalse;
+       }
+       else_string = c->word;
+       c->word = orig;
+    }
+
      e = FcPatternObjectFindElt (pat,
                                 FcObjectFromName ((const char *) c->word));
-    if (e)
+    if (e || else_string)
      {
-       FcValueListPtr l;
+       FcValueListPtr l = NULL;
  
         if (add_colon)
             FcStrBufChar (buf, ':');
@@ -526,8 +755,33 @@ interpret_simple (FcFormatContext *c,
             FcStrBufChar (buf, '=');
         }
  
-       l = FcPatternEltValues(e);
-       FcNameUnparseValueList (buf, l, '\0');
+       if (e)
+           l = FcPatternEltValues(e);
+
+       if (idx != -1)
+       {
+           while (l && idx > 0)
+           {
+               l = FcValueListNext(l);
+               idx--;
+           }
+           if (l && idx == 0)
+           {
+               if (!FcNameUnparseValue (buf, &l->value, '\0'))
+                   return FcFalse;
+           }
+           else goto notfound;
+        }
+       else if (l)
+       {
+           FcNameUnparseValueList (buf, l, '\0');
+       }
+       else
+       {
+    notfound:
+           if (else_string)
+               FcStrBufString (buf, else_string);
+       }
      }
  
      return FcTrue;
@@ -535,9 +789,11 @@ interpret_simple (FcFormatContext *c,
  
  static FcBool
  cescape (FcFormatContext *c,
-        FcStrBuf        *buf,
-        const FcChar8   *str)
+        const FcChar8   *str,
+        FcStrBuf        *buf)
  {
+    /* XXX escape \n etc? */
+
      while(*str)
      {
         switch (*str)
@@ -554,8 +810,8 @@ cescape (FcFormatContext *c,
  
  static FcBool
  shescape (FcFormatContext *c,
-         FcStrBuf        *buf,
-         const FcChar8   *str)
+         const FcChar8   *str,
+         FcStrBuf        *buf)
  {
      FcStrBufChar (buf, '\'');
      while(*str)
@@ -572,9 +828,11 @@ shescape (FcFormatContext *c,
  
  static FcBool
  xmlescape (FcFormatContext *c,
-          FcStrBuf        *buf,
-          const FcChar8   *str)
+          const FcChar8   *str,
+          FcStrBuf        *buf)
  {
+    /* XXX escape \n etc? */
+
      while(*str)
      {
         switch (*str)
@@ -591,8 +849,8 @@ xmlescape (FcFormatContext *c,
  
  static FcBool
  delete_chars (FcFormatContext *c,
-             FcStrBuf        *buf,
-             const FcChar8   *str)
+             const FcChar8   *str,
+             FcStrBuf        *buf)
  {
      /* XXX not UTF-8 aware */
  
@@ -624,8 +882,8 @@ delete_chars (FcFormatContext *c,
  
  static FcBool
  escape_chars (FcFormatContext *c,
-             FcStrBuf        *buf,
-             const FcChar8   *str)
+             const FcChar8   *str,
+             FcStrBuf        *buf)
  {
      /* XXX not UTF-8 aware */
  
@@ -659,8 +917,8 @@ escape_chars (FcFormatContext *c,
  
  static FcBool
  translate_chars (FcFormatContext *c,
-                FcStrBuf        *buf,
-                const FcChar8   *str)
+                const FcChar8   *str,
+                FcStrBuf        *buf)
  {
      char *from, *to, repeat;
      int from_len, to_len;
@@ -676,7 +934,7 @@ translate_chars (FcFormatContext *c,
      from_len = strlen (from);
      to = from + from_len + 1;
  
-    /* hack: we temporarily diverge c->word */
+    /* hack: we temporarily divert c->word */
      c->word = (FcChar8 *) to;
      if (!read_chars (c, ')'))
      {
@@ -726,19 +984,17 @@ interpret_convert (FcFormatContext *c,
      FcChar8        buf_static[8192];
      FcBool         ret;
  
-    if (!expect_char (c, '|'))
+    if (!expect_char (c, '|') ||
+       !read_word (c))
         return FcFalse;
  
-    /* nul-terminate the buffer */
+    /* prepare the buffer */
      FcStrBufChar (buf, '\0');
      if (buf->failed)
         return FcFalse;
      str = buf->buf + start;
      buf->len = start;
  
-    if (!read_word (c))
-       return FcFalse;
-
      /* try simple converters first */
      if (0) { }
  #define CONVERTER(name, func) \
@@ -755,7 +1011,6 @@ interpret_convert (FcFormatContext *c,
      {
         if (new_str)
         {
-           /* replace in the buffer */
             FcStrBufString (buf, new_str);
             free (new_str);
             return FcTrue;
@@ -770,7 +1025,7 @@ interpret_convert (FcFormatContext *c,
      if (0) { }
  #define CONVERTER(name, func) \
      else if (0 == strcmp ((const char *) c->word, name))\
-       ret = func (c, &new_buf, str)
+       ret = func (c, str, &new_buf)
      CONVERTER ("cescape",   cescape);
      CONVERTER ("shescape",  shescape);
      CONVERTER ("xmlescape", xmlescape);
@@ -870,13 +1125,14 @@ interpret_percent (FcFormatContext *c,
      start = buf->len;
  
      switch (*c->format) {
-    case '=': ret = interpret_builtin (c, pat, buf); break;
-    case '{': ret = interpret_subexpr (c, pat, buf); break;
-    case '+': ret = interpret_filter  (c, pat, buf); break;
-    case '-': ret = interpret_delete  (c, pat, buf); break;
-    case '?': ret = interpret_cond    (c, pat, buf); break;
-    case '#': ret = interpret_count   (c, pat, buf); break;
-    default:  ret = interpret_simple  (c, pat, buf); break;
+    case '=': ret = interpret_builtin    (c, pat, buf); break;
+    case '{': ret = interpret_subexpr    (c, pat, buf); break;
+    case '+': ret = interpret_filter_in  (c, pat, buf); break;
+    case '-': ret = interpret_filter_out (c, pat, buf); break;
+    case '?': ret = interpret_cond       (c, pat, buf); break;
+    case '#': ret = interpret_count      (c, pat, buf); break;
+    case '[': ret = interpret_enumerate  (c, pat, buf); break;
+    default:  ret = interpret_simple     (c, pat, buf); break;
      }
  
      return ret &&
@@ -910,22 +1166,37 @@ interpret_expr (FcFormatContext *c,
      return FcTrue;
  }
  
+static FcBool
+FcPatternFormatToBuf (FcPattern     *pat,
+                     const FcChar8 *format,
+                     FcStrBuf      *buf)
+{
+    FcFormatContext c;
+    FcChar8         word_static[1024];
+    FcBool          ret;
+
+    if (!FcFormatContextInit (&c, format, word_static, sizeof (word_static)))
+       return FcFalse;
+
+    ret = interpret_expr (&c, pat, buf, '\0');
+
+    FcFormatContextDone (&c);
+
+    return ret;
+}
+
  FcChar8 *
-FcPatternFormat (FcPattern *pat, const FcChar8 *format)
+FcPatternFormat (FcPattern *pat,
+                const FcChar8 *format)
  {
      FcStrBuf        buf;
-    FcChar8         word_static[1024];
      FcChar8         buf_static[8192 - 1024];
-    FcFormatContext c;
      FcBool          ret;
  
      FcStrBufInit (&buf, buf_static, sizeof (buf_static));
-    if (!FcFormatContextInit (&c, format, word_static, sizeof (word_static)))
-       return NULL;
  
-    ret = interpret_expr (&c, pat, &buf, '\0');
+    ret = FcPatternFormatToBuf (pat, format, &buf);
  
-    FcFormatContextDone (&c);
      if (ret)
         return FcStrBufDone (&buf);
      else