From caeea376da54a6337bfcb1bb20f50c8c2302458f Mon Sep 17 00:00:00 2001
From: Behdad Esfahbod <behdad@behdad.org>
Date: Fri, 13 Mar 2009 17:26:21 -0400
Subject: [PATCH] Document FcPatternFormat() format

---
 doc/Makefile.am           |   2 +
 doc/fcformat.fncs         | 301 ++++++++++++++++++++++++++++++++++++++
 doc/fcpattern.fncs        |  20 ---
 doc/fontconfig-devel.sgml |   2 +
 src/fcformat.c            |  94 ++++++------
 5 files changed, 353 insertions(+), 66 deletions(-)
 create mode 100644 doc/fcformat.fncs
diff --git a/doc/Makefile.am b/doc/Makefile.am
index 8bf1ba1..9ffd54f 100644
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -49,6 +49,7 @@ DOC_FUNCS_FNCS=\
 	fcdircache.fncs \
 	fcfile.fncs \
 	fcfontset.fncs \
+	fcformat.fncs \
 	fcfreetype.fncs \
 	fcinit.fncs \
 	fclangset.fncs \
@@ -70,6 +71,7 @@ DOC_FUNCS_SGML=\
 	fcdircache.sgml \
 	fcfile.sgml \
 	fcfontset.sgml \
+	fcformat.sgml \
 	fcfreetype.sgml \
 	fcinit.sgml \
 	fclangset.sgml \
diff --git a/doc/fcformat.fncs b/doc/fcformat.fncs
new file mode 100644
index 0000000..b092449
--- /dev/null
+++ b/doc/fcformat.fncs
@@ -0,0 +1,301 @@
+/*
+ * fontconfig/doc/fcformat.fncs
+ *
+ * Copyright Â© 2008 Behdad Esfahbod
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Keith Packard makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE AUTHOR(S) DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+@RET@		FcChar8 *
+@FUNC@		FcPatternFormat
+@TYPE1@		FcPattern *			@ARG1@		pat
+@TYPE2@		const FcChar8 *			@ARG2@		format
+@PURPOSE@	Format a pattern into a string according to a format specifier
+@DESC@
+
+Converts given pattern <parameter>pat</parameter> into text described by
+the format specifier <parameter>format</parameter>.
+The return value refers to newly allocated memory which should be freed by the
+caller using free(), or NULL if <parameter>format</parameter> is invalid.
+
+</para><para>
+
+The format is loosely modelled after printf-style format string.
+The format string is composed of zero or more  directives: ordinary
+characters (not "%"), which are copied unchanged to the output stream;
+and tags which are interpreted to construct text from the pattern in a
+variety of ways (explained below).
+Special characters can be escaped
+using backslash.  C-string style special characters like \n and \r are
+also supported (this is useful when the format string is not a C string
+literal).
+It is advisable to always escape curly braces that
+are meant to be copied to the output as ordinary characters.
+
+</para><para>
+
+Each tags is introduced by the character "%",
+followed by an optional minimum field width,
+followed by tag contents in curly braces ({}).
+If the minimum field width value is provided the tag
+will be expanded and the result padded to achieve the minimum width.
+If the minimum field width is positive, the padding will right-align
+the text.  Negative field width will left-align.
+The rest of this section describes various supported tag contents
+and their expansion.
+
+</para><para>
+
+A <firstterm>simple</firstterm> tag
+is one where the content is an identifier.  When simple
+tags are expanded, the named identifier will be looked up in
+<parameter>pattern</parameter> and the resulting list of values returned,
+joined together using comma.  For example, to print the family name and style the
+pattern, use the format "%{family} %{style}\n".  To extend the family column
+to forty characters use "%-40{family}%{style}\n".
+
+</para><para>
+
+Simple tags expand to list of all values for an element.  To only choose
+one of the values, one can index using the syntax "%{elt[idx]}".  For example,
+to get the first family name only, use "%{family[0]}".
+
+</para><para>
+
+If a simple tag ends with "=" and the element is found in the pattern, the
+name of the element followed by "=" will be output before the list of values.
+For example, "%{weight=}" may expand to the string "weight=80".  Or to the empty
+string if <parameter>pattern</parameter> does not have weight set.
+
+</para><para>
+
+If a simple tag starts with ":" and the element is found in the pattern, ":"
+will be printed first.  For example, combining this with the =, the format
+"%{:weight=}" may expand to ":weight=80" or to the empty string
+if <parameter>pattern</parameter> does not have weight set.
+
+</para><para>
+
+If a simple tag contains the string ":-", the rest of the the tag contents
+will be used as a default string.  The default string is output if the element
+is not found in the pattern.  For example, the format
+"%{:weight=:-123}" may expand to ":weight=80" or to the string
+":weight=123" if <parameter>pattern</parameter> does not have weight set.
+
+</para><para>
+
+A <firstterm>count</firstterm> tag
+is one that starts with the character "#" followed by an element
+name, and expands to the number of values for the element in the pattern.
+For example, "%{#family}" expands to the number of family names
+<parameter>pattern</parameter> has set, which may be zero.
+
+</para><para>
+
+A <firstterm>sub-expression</firstterm> tag
+is one that expands a sub-expression.  The tag contents
+are the sub-expression to expand placed inside another set of curly braces.
+Sub-expression tags are useful for aligning an entire sub-expression, or to
+apply converters (explained later) on an entire sub-expression.
+For example, the format "%40{{%{family} %{style}}}" expands the sub-expression
+to construct the family name followed by the style, then takes the entire
+string and pads it on the left to be at least forty characters.
+
+</para><para>
+
+A <firstterm>filter-out</firstterm> tag
+is one starting with the character "-" followed by a
+comma-separated list of element names, followed by a sub-expression enclosed
+in curly braces.  The sub-expression will be expanded but with a pattern that
+has the listed elements removed from it.
+For example, the format "%{-size,pixelsize{sub-expr}}" will expand "sub-expr"
+with <parameter>pattern</parameter> sans the size and pixelsize elements.
+
+</para><para>
+
+A <firstterm>filter-in</firstterm> tag
+is one starting with the character "+" followed by a
+comma-separated list of element names, followed by a sub-expression enclosed
+in curly braces.  The sub-expression will be expanded but with a pattern that
+only has the listed elements from the surrounding pattern.
+For example, the format "%{+family,familylang{sub-expr}}" will expand "sub-expr"
+with a sub-pattern consisting only the family and family lang elements of
+<parameter>pattern</parameter>.
+
+</para><para>
+
+A <firstterm>conditional</firstterm> tag
+is one starting with the character "?" followed by a
+comma-separated list of element conditions, followed by two sub-expression
+enclosed in curly braces.  An element condition can be an element name,
+in which case it tests whether the element is defined in pattern, or
+the character "!" followed by an element name, in which case the test
+is negated.  The conditional passes if all the element conditions pass.
+The tag expands the first sub-expression if the conditional passes, and
+expands the second sub-expression otherwise.
+For example, the format "%{?size,dpi,!pixelsize{pass}{fail}}" will expand
+to "pass" if <parameter>pattern</parameter> has size and dpi elements but
+no pixelsize element, and to "fail" otherwise.
+
+</para><para>
+
+An <firstterm>enumerate</firstterm> tag
+is one starting with the string "[]" followed by a
+comma-separated list of element names, followed by a sub-expression enclosed
+in curly braces.  The list of values for the named elements are walked in
+parallel and the sub-expression expanded each time with a pattern just having
+a single value for those elements, starting from the first value and
+continuing as long as any of those elements has a value.
+For example, the format "%{[]family,familylang{%{family} (%{familylang})\n}}"
+will expand the pattern "%{family} (%{familylang})\n" with a pattern
+having only the first value of the family and familylang elemtns, then expands
+it with the second values, then the third, etc.
+
+</para><para>
+
+As a special case, if an enumerate tag has only one element, and that element
+has only one value in the pattern, and that value is of type FcLangSet, the
+individual languages in the language set are enumerated.
+
+</para><para>
+
+A <firstterm>builtin</firstterm> tag
+is one starting with the character "=" followed by a builtin
+name.  The following builtins are defined:
+
+<variablelist>
+
+<varlistentry><term>
+unparse
+</term><listitem><para>
+Expands to the result of calling FcNameUnparse() on the pattern.
+</para></listitem></varlistentry>
+
+<varlistentry><term>
+fcmatch
+</term><listitem><para>
+Expands to the output of the default output format of the fc-match
+command on the pattern, without the final newline.
+</para></listitem></varlistentry>
+
+<varlistentry><term>
+fclist
+</term><listitem><para>
+Expands to the output of the default output format of the fc-list
+command on the pattern, without the final newline.
+</para></listitem></varlistentry>
+
+<varlistentry><term>
+pkgkit
+</term><listitem><para>
+Expands to the list of PackageKit font() tags for the pattern.
+Currently this includes tags for each family name, and each language
+from the pattern, enumerated and sanitized into a set of tags terminated
+by newline.  Package management systems can use these tags to tag their
+packages accordingly.
+</para></listitem></varlistentry>
+
+</variablelist>
+
+For example, the format "%{+family,style{%{=unparse}}}\n" will expand
+to an unparsed name containing only the family and style element values
+from <parameter>pattern</parameter>.
+
+</para><para>
+
+The contents of any tag can be followed by a set of zero or more
+<firstterm>converter</firstterm>s.  A converter is specified by the
+character "|" followed by the converter name and arguments.  The
+following converters are defined:
+
+<variablelist>
+
+<varlistentry><term>
+basename
+</term><listitem><para>
+Replaces text with the results of calling FcStrBasename() on it.
+</para></listitem></varlistentry>
+
+<varlistentry><term>
+dirname
+</term><listitem><para>
+Replaces text with the results of calling FcStrDirname() on it.
+</para></listitem></varlistentry>
+
+<varlistentry><term>
+downcase
+</term><listitem><para>
+Replaces text with the results of calling FcStrDowncase() on it.
+</para></listitem></varlistentry>
+
+<varlistentry><term>
+shescape
+</term><listitem><para>
+Escapes text for one level of shell expansion.
+(Escapes single-quotes, also encloses text in single-quotes.)
+</para></listitem></varlistentry>
+
+<varlistentry><term>
+cescape
+</term><listitem><para>
+Escapes text such that it can be used as part of a C string literal.
+(Escapes backslash and double-quotes.)
+</para></listitem></varlistentry>
+
+<varlistentry><term>
+xmlescape
+</term><listitem><para>
+Escapes text such that it can be used in XML and HTML.
+(Escapes less-than, greater-than, and ampersand.)
+</para></listitem></varlistentry>
+
+<varlistentry><term>
+delete(<parameter>chars</parameter>)
+</term><listitem><para>
+Deletes all occurrences of each of the characters in <parameter>chars</parameter>
+from the text.
+FIXME: This converter is not UTF-8 aware yet.
+</para></listitem></varlistentry>
+
+<varlistentry><term>
+escape(<parameter>chars</parameter>)
+</term><listitem><para>
+Escapes all occurrences of each of the characters in <parameter>chars</parameter>
+by prepending it by the first character in <parameter>chars</parameter>.
+FIXME: This converter is not UTF-8 aware yet.
+</para></listitem></varlistentry>
+
+<varlistentry><term>
+translate(<parameter>from</parameter>,<parameter>to</parameter>)
+</term><listitem><para>
+Translates all occurrences of each of the characters in <parameter>from</parameter>
+by replacing them with their corresponding character in <parameter>to</parameter>.
+If <parameter>to</parameter> has fewer characters than
+<parameter>from</parameter>, it will be extended by repeating its last
+character.
+FIXME: This converter is not UTF-8 aware yet.
+</para></listitem></varlistentry>
+
+</variablelist>
+
+For example, the format "%{family|downcase|delete( )}\n" will expand
+to the values of the family element in <parameter>pattern</parameter>,
+lower-cased and with spaces removed.
+
+@@
diff --git a/doc/fcpattern.fncs b/doc/fcpattern.fncs
index 8948e88..e2c2953 100644
--- a/doc/fcpattern.fncs
+++ b/doc/fcpattern.fncs
@@ -395,23 +395,3 @@ Converts the given pattern into the standard text format described above.
 The return value is not static, but instead refers to newly allocated memory
 which should be freed by the caller using free().
 @@
-
-@RET@		FcChar8 *
-@FUNC@		FcPatternFormat
-@TYPE1@		FcPattern *			@ARG1@		pat
-@TYPE2@		const FcChar8 *			@ARG2@		format
-@PURPOSE@	Format a pattern into a string according to a format specifier
-@DESC@
-
-Converts the given pattern into text format described by the format specifier.
-The format specifier is similar to a C style printf string, which the
-printf(2) man page provides a good introduction to. However, as fontconfig
-already knows the type of data that is being printed, you must omit the type
-specifier. In its place put the element name you wish to print enclosed in
-curly braces ({}). For example, to print the family name and style the
-pattern, use the format "%{family} %{style}\n".
-There can be an option width specifier after the percent sign and before
-the opening brace.  The width modifier acts similar to those in printf.
-The return value refers to newly allocated memory which should be freed by the
-caller using free(), or NULL if <parameter>format</parameter> is invalid.
-@@
diff --git a/doc/fontconfig-devel.sgml b/doc/fontconfig-devel.sgml
index bce8d68..891251f 100644
--- a/doc/fontconfig-devel.sgml
+++ b/doc/fontconfig-devel.sgml
@@ -8,6 +8,7 @@
 <!ENTITY fcdircache SYSTEM "fcdircache.sgml">
 <!ENTITY fcfile SYSTEM "fcfile.sgml">
 <!ENTITY fcfontset SYSTEM "fcfontset.sgml">
+<!ENTITY fcformat SYSTEM "fcformat.sgml">
 <!ENTITY fcfreetype SYSTEM "fcfreetype.sgml">
 <!ENTITY fcinit SYSTEM "fcinit.sgml">
 <!ENTITY fclangset SYSTEM "fclangset.sgml">
@@ -437,6 +438,7 @@ An FcPattern is an opaque type that holds both patterns to match against the
 available fonts, as well as the information about each font.
     </para>
     &fcpattern;
+    &fcformat;
   </sect2>
   <sect2><title>FcFontSet</title>
     <para>
diff --git a/src/fcformat.c b/src/fcformat.c
index 5a05238..bf94017 100644
--- a/src/fcformat.c
+++ b/src/fcformat.c
@@ -28,44 +28,46 @@
 #include <stdarg.h>
 
 
-/* XXX Document the language.
+/* The language is documented in doc/fcformat.fncs
+ * These are the features implemented:
  *
- * These are mostly the features implemented but not documented:
+ * simple	%{elt}
+ * width	%width{elt}
+ * index	%{elt[idx]}
+ * name=	%{elt=}
+ * :name=	%{:elt}
+ * default	%{elt:-word}
+ * count	%{#elt}
+ * subexpr	%{{expr}}
+ * filter-out	%{-elt1,elt2,elt3{expr}}
+ * filter-in	%{+elt1,elt2,elt3{expr}}
+ * conditional	%{?elt1,elt2,!elt3{}{}}
+ * enumerate	%{[]elt1,elt2{expr}}
+ * langset	langset enumeration using the same syntax
+ * builtin	%{=blt}
+ * convert	%{elt|conv1|conv2|conv3}
  *
- * width   %[[-]width]{tag}
- * index   %{tag[ids]}
- * name=   %{tag=|decorator}
- * :name=  %{:tag=|decorator}
- * subexpr %{{expr}|decorator1|decorator2}
- * delete  %{-charset,lang{expr}|decorator}
- * filter  %{+family,familylang{expr}|decorator}
- * cond    %{?tag1,tag2,!tag3{}{}}
- * decorat %{tag|decorator1|decorator2}
- * default %{parameter:-word}
- * array   %{[]family,familylang{expr}|decorator}
- * langset enumeration using the same syntax as arrays
- *
- * filters:
- * basename        FcStrBasename
- * dirname         FcStrDirname
- * downcase        FcStrDowncase
+ * converters:
+ * basename	FcStrBasename
+ * dirname	FcStrDirname
+ * downcase	FcStrDowncase
  * shescape
  * cescape
  * xmlescape
- * delete          delete chars
- * escape          escape chars
- * translate       translate chars
+ * delete	delete chars
+ * escape	escape chars
+ * translate	translate chars
  *
  * builtins:
- * unparse
- * fcmatch
- * fclist
- * pkgkit
- */
-
-/*
+ * unparse	FcNameUnparse
+ * fcmatch	fc-match default
+ * fclist	fc-list default
+ * pkgkit	PackageKit package tag format
+ *
+ *
  * Some ideas for future syntax extensions:
  *
+ * - verbose builtin that is like FcPatternPrint
  * - allow indexing subexprs using '%{[idx]elt1,elt2{subexpr}}'
  * - conditional/filtering/deletion on binding (using '(w)'/'(s)'/'(=)' notation)
  */
@@ -418,9 +420,9 @@ maybe_skip_subexpr (FcFormatContext *c)
 }
 
 static FcBool
-interpret_filter (FcFormatContext *c,
-		  FcPattern       *pat,
-		  FcStrBuf        *buf)
+interpret_filter_in (FcFormatContext *c,
+		     FcPattern       *pat,
+		     FcStrBuf        *buf)
 {
     FcObjectSet  *os;
     FcPattern    *subpat;
@@ -455,9 +457,9 @@ interpret_filter (FcFormatContext *c,
 }
 
 static FcBool
-interpret_delete (FcFormatContext *c,
-		  FcPattern       *pat,
-		  FcStrBuf        *buf)
+interpret_filter_out (FcFormatContext *c,
+		      FcPattern       *pat,
+		      FcStrBuf        *buf)
 {
     FcPattern    *subpat;
 
@@ -567,9 +569,9 @@ interpret_count (FcFormatContext *c,
 }
 
 static FcBool
-interpret_array (FcFormatContext *c,
-		 FcPattern       *pat,
-		 FcStrBuf        *buf)
+interpret_enumerate (FcFormatContext *c,
+		     FcPattern       *pat,
+		     FcStrBuf        *buf)
 {
     FcObjectSet   *os;
     FcPattern     *subpat;
@@ -1115,14 +1117,14 @@ interpret_percent (FcFormatContext *c,
     start = buf->len;
 
     switch (*c->format) {
-    case '=': ret = interpret_builtin (c, pat, buf); break;
-    case '{': ret = interpret_subexpr (c, pat, buf); break;
-    case '+': ret = interpret_filter  (c, pat, buf); break;
-    case '-': ret = interpret_delete  (c, pat, buf); break;
-    case '?': ret = interpret_cond    (c, pat, buf); break;
-    case '#': ret = interpret_count   (c, pat, buf); break;
-    case '[': ret = interpret_array   (c, pat, buf); break;
-    default:  ret = interpret_simple  (c, pat, buf); break;
+    case '=': ret = interpret_builtin    (c, pat, buf); break;
+    case '{': ret = interpret_subexpr    (c, pat, buf); break;
+    case '+': ret = interpret_filter_in  (c, pat, buf); break;
+    case '-': ret = interpret_filter_out (c, pat, buf); break;
+    case '?': ret = interpret_cond       (c, pat, buf); break;
+    case '#': ret = interpret_count      (c, pat, buf); break;
+    case '[': ret = interpret_enumerate  (c, pat, buf); break;
+    default:  ret = interpret_simple     (c, pat, buf); break;
     }
 
     return ret &&
-- 
2.47.3