From ffd6668b469508177c4baf7745ae42aee5b00322 Mon Sep 17 00:00:00 2001
From: Behdad Esfahbod <behdad@behdad.org>
Date: Wed, 18 Mar 2009 19:28:52 -0400
Subject: [PATCH] [fc-lang] Make LangSet representation in the cache files
 stable

Fontconfig assigns an index number to each language it knows about.
The index is used to index a bit in FcLangSet language map.  The bit
map is stored in the cache.

Previously fc-lang simply sorted the list of languages and assigned
them an index starting from zero.  Net effect is that whenever new
orth files were added, all the FcLangSet info in the cache files would
become invalid.  This was causing weird bugs like this one:

  https://bugzilla.redhat.com/show_bug.cgi?id=490888

With this commit we fix the index assigned to each language.  The index
will be based on the order the orth files are passed to fc-lang.  As a
result all orth files are explicitly listed in Makefile.am now, and
new additions should be made to the end of the list.  The list is made
to reflect the sorted list of orthographies from 2.6.0 released followed
by new additions since.

This fixes the stability problem.  Needless to say, recreating caches
is necessary before any new orthography is recognized in existing fonts,
but at least the existing caches are still valid and don't cause bugs
like the above.
---
 configure.in        |   7 --
 fc-lang/Makefile.am | 248 +++++++++++++++++++++++++++++++++++++++++++-
 fc-lang/fc-lang.c   |  49 ++++++---
 src/fclang.c        |   4 +-
 4 files changed, 282 insertions(+), 26 deletions(-)

diff --git a/configure.in b/configure.in
index f2c94c2..9bd78e4 100644
--- a/configure.in
+++ b/configure.in
@@ -487,13 +487,6 @@ CONFDIR=${confdir}
 AC_DEFINE_UNQUOTED(CONFDIR, "$CONFDIR",[Font configuration directory])
 AC_SUBST(CONFDIR)
 
-#
-# Find out what language orthographies are included
-#
-
-ORTH_FILES=`cd ${srcdir}/fc-lang && echo *.orth`
-AC_SUBST(ORTH_FILES)
-
 #
 # Let people not build/install docs if they don't have docbook
 #
diff --git a/fc-lang/Makefile.am b/fc-lang/Makefile.am
index eaf76d2..f248e0e 100644
--- a/fc-lang/Makefile.am
+++ b/fc-lang/Makefile.am
@@ -36,8 +36,6 @@ noinst_PROGRAMS=fc-lang
 
 noinst_MANS=fc-lang.man
 
-ORTH=@ORTH_FILES@
-
 EXTRA_DIST=$(TMPL) $(ORTH)
 
 $(TARG):$(ORTH) fc-lang${EXEEXT} $(STMPL)
@@ -53,3 +51,249 @@ $(ALIAS_FILES):
 	touch $(ALIAS_FILES)
 
 CLEANFILES = $(TARG) $(ALIAS_FILES)
+
+# NOTE:
+#
+# The order of the orth files here is extremely important (part of the cache
+# format) and should not be modified.  New orth files should be added at the
+# end.  No files should be removed either.
+#
+ORTH = \
+	aa.orth \
+	ab.orth \
+	af.orth \
+	am.orth \
+	ar.orth \
+	as.orth \
+	ast.orth \
+	av.orth \
+	ay.orth \
+	az_az.orth \
+	az_ir.orth \
+	ba.orth \
+	bm.orth \
+	be.orth \
+	bg.orth \
+	bh.orth \
+	bho.orth \
+	bi.orth \
+	bin.orth \
+	bn.orth \
+	bo.orth \
+	br.orth \
+	bs.orth \
+	bua.orth \
+	ca.orth \
+	ce.orth \
+	ch.orth \
+	chm.orth \
+	chr.orth \
+	co.orth \
+	cs.orth \
+	cu.orth \
+	cv.orth \
+	cy.orth \
+	da.orth \
+	de.orth \
+	dz.orth \
+	el.orth \
+	en.orth \
+	eo.orth \
+	es.orth \
+	et.orth \
+	eu.orth \
+	fa.orth \
+	fi.orth \
+	fj.orth \
+	fo.orth \
+	fr.orth \
+	ff.orth \
+	fur.orth \
+	fy.orth \
+	ga.orth \
+	gd.orth \
+	gez.orth \
+	gl.orth \
+	gn.orth \
+	gu.orth \
+	gv.orth \
+	ha.orth \
+	haw.orth \
+	he.orth \
+	hi.orth \
+	ho.orth \
+	hr.orth \
+	hu.orth \
+	hy.orth \
+	ia.orth \
+	ig.orth \
+	id.orth \
+	ie.orth \
+	ik.orth \
+	io.orth \
+	is.orth \
+	it.orth \
+	iu.orth \
+	ja.orth \
+	ka.orth \
+	kaa.orth \
+	ki.orth \
+	kk.orth \
+	kl.orth \
+	km.orth \
+	kn.orth \
+	ko.orth \
+	kok.orth \
+	ks.orth \
+	ku_am.orth \
+	ku_ir.orth \
+	kum.orth \
+	kv.orth \
+	kw.orth \
+	ky.orth \
+	la.orth \
+	lb.orth \
+	lez.orth \
+	ln.orth \
+	lo.orth \
+	lt.orth \
+	lv.orth \
+	mg.orth \
+	mh.orth \
+	mi.orth \
+	mk.orth \
+	ml.orth \
+	mn_cn.orth \
+	mo.orth \
+	mr.orth \
+	mt.orth \
+	my.orth \
+	nb.orth \
+	nds.orth \
+	ne.orth \
+	nl.orth \
+	nn.orth \
+	no.orth \
+	nr.orth \
+	nso.orth \
+	ny.orth \
+	oc.orth \
+	om.orth \
+	or.orth \
+	os.orth \
+	pa_in.orth \
+	pl.orth \
+	ps_af.orth \
+	ps_pk.orth \
+	pt.orth \
+	rm.orth \
+	ro.orth \
+	ru.orth \
+	sa.orth \
+	sah.orth \
+	sco.orth \
+	se.orth \
+	sel.orth \
+	sh.orth \
+	shs.orth \
+	si.orth \
+	sk.orth \
+	sl.orth \
+	sm.orth \
+	sma.orth \
+	smj.orth \
+	smn.orth \
+	sms.orth \
+	so.orth \
+	sq.orth \
+	sr.orth \
+	ss.orth \
+	st.orth \
+	sv.orth \
+	sw.orth \
+	syr.orth \
+	ta.orth \
+	te.orth \
+	tg.orth \
+	th.orth \
+	ti_er.orth \
+	ti_et.orth \
+	tig.orth \
+	tk.orth \
+	tl.orth \
+	tn.orth \
+	to.orth \
+	tr.orth \
+	ts.orth \
+	tt.orth \
+	tw.orth \
+	tyv.orth \
+	ug.orth \
+	uk.orth \
+	ur.orth \
+	uz.orth \
+	ve.orth \
+	vi.orth \
+	vo.orth \
+	vot.orth \
+	wa.orth \
+	wen.orth \
+	wo.orth \
+	xh.orth \
+	yap.orth \
+	yi.orth \
+	yo.orth \
+	zh_cn.orth \
+	zh_hk.orth \
+	zh_mo.orth \
+	zh_sg.orth \
+	zh_tw.orth \
+	zu.orth \
+	ak.orth \
+	an.orth \
+	ber_dz.orth \
+	ber_ma.orth \
+	byn.orth \
+	crh.orth \
+	csb.orth \
+	dv.orth \
+	ee.orth \
+	fat.orth \
+	fil.orth \
+	hne.orth \
+	hsb.orth \
+	ht.orth \
+	hz.orth \
+	ii.orth \
+	jv.orth \
+	kab.orth \
+	kj.orth \
+	kr.orth \
+	ku_iq.orth \
+	ku_tr.orth \
+	kwm.orth \
+	lg.orth \
+	li.orth \
+	mai.orth \
+	mn_mn.orth \
+	ms.orth \
+	na.orth \
+	ng.orth \
+	nv.orth \
+	ota.orth \
+	pa_pk.orth \
+	pap_an.orth \
+	pap_aw.orth \
+	qu.orth \
+	rn.orth \
+	rw.orth \
+	sc.orth \
+	sd.orth \
+	sg.orth \
+	sid.orth \
+	sn.orth \
+	su.orth \
+	ty.orth \
+	wal.orth \
+	za.orth
+#	^-------------- Add new orth files here
diff --git a/fc-lang/fc-lang.c b/fc-lang/fc-lang.c
index b0f1690..a00aabc 100644
--- a/fc-lang/fc-lang.c
+++ b/fc-lang/fc-lang.c
@@ -129,7 +129,7 @@ static const FcCharSet *
 scan (FILE *f, char *file, FcCharSetFreezer *freezer)
 {
     FcCharSet	    *c = 0;
-    const FcCharSet *n;
+    FcCharSet	    *n;
     int		    start, end, ucs4;
     char	    line[1024];
     int		    lineno = 0;
@@ -224,22 +224,27 @@ get_lang (char *name)
     return lang;
 }
 
+typedef struct _Entry {
+    int id;
+    char *file;
+} Entry;
+
 static int compare (const void *a, const void *b)
 {
-    const FcChar8    *const *as = a, *const *bs = b;
-    return FcStrCmpIgnoreCase (*as, *bs);
+    const Entry const *as = a, *bs = b;
+    return FcStrCmpIgnoreCase (as->file, bs->file);
 }
 
 #define MAX_LANG	    1024
 #define MAX_LANG_SET_MAP    ((MAX_LANG + 31) / 32)
 
-#define BitSet(map, id)   ((map)[(id)>>5] |= ((FcChar32) 1 << ((id) & 0x1f)))
-#define BitGet(map, id)   ((map)[(id)>>5] >> ((id) & 0x1f)) & 1)
+#define BitSet(map, i)   ((map)[(entries[i].id)>>5] |= ((FcChar32) 1 << ((entries[i].id) & 0x1f)))
+#define BitGet(map, i)   ((map)[(entries[i].id)>>5] >> ((entries[i].id) & 0x1f)) & 1)
 
 int
 main (int argc, char **argv)
 {
-    static char		*files[MAX_LANG];
+    static Entry	entries[MAX_LANG];
     static const FcCharSet	*sets[MAX_LANG];
     static int		duplicate[MAX_LANG];
     static int		country[MAX_LANG];
@@ -276,18 +281,20 @@ main (int argc, char **argv)
 	}
 	if (i == MAX_LANG)
 	    fatal (argv[0], 0, "Too many languages");
-	files[i++] = argv[argi++];
+	entries[i].id = i;
+	entries[i].file = argv[argi++];
+	i++;
     }
-    files[i] = 0;
-    qsort (files, i, sizeof (char *), compare);
+    entries[i].file = 0;
+    qsort (entries, i, sizeof (Entry), compare);
     i = 0;
-    while (files[i])
+    while (entries[i].file)
     {
-	f = scanopen (files[i]);
+	f = scanopen (entries[i].file);
 	if (!f)
-	    fatal (files[i], 0, strerror (errno));
-	sets[i] = scan (f, files[i], freezer);
-	names[i] = get_name (files[i]);
+	    fatal (entries[i].file, 0, strerror (errno));
+	sets[i] = scan (f, entries[i].file, freezer);
+	names[i] = get_name (entries[i].file);
 	langs[i] = get_lang(names[i]);
 	if (strchr (langs[i], '-'))
 	    country[ncountry++] = i;
@@ -362,6 +369,7 @@ main (int argc, char **argv)
     printf ("#define NUM(s,n)    (NUM0 + n * sizeof (FcChar16) - SET(s))\n");
     printf ("#define LEAF(o,l)   (LEAF0 + l * sizeof (FcCharLeaf) - (OFF0 + o * sizeof (intptr_t)))\n");
     printf ("#define fcLangCharSets (fcLangData.langCharSets)\n");
+    printf ("#define fcLangCharSetIndices (fcLangData.langIndices)\n");
     printf ("\n");
     
     printf ("static const struct {\n"
@@ -369,8 +377,10 @@ main (int argc, char **argv)
 	    "    FcCharLeaf     leaves[%d];\n"
 	    "    intptr_t       leaf_offsets[%d];\n"
 	    "    FcChar16       numbers[%d];\n"
+	    "    FcChar%s       langIndices[%d];\n"
 	    "} fcLangData = {\n",
-	    nsets, tl, tn, tn);
+	    nsets, tl, tn, tn,
+	    nsets < 256 ? "8 " : "16", nsets);
 	
     /*
      * Dump sets
@@ -457,6 +467,13 @@ main (int argc, char **argv)
 	if (n % 8 != 0)
 	    printf ("\n");
     }
+    printf ("},\n");
+
+    printf ("{\n");
+    for (i = 0; sets[i]; i++)
+    {
+	printf ("    %d, /* %s */\n", entries[i].id, names[i]);
+    }
     printf ("}\n");
     
     printf ("};\n\n");
@@ -527,7 +544,9 @@ main (int argc, char **argv)
     /*
      * Dump sets start/finish for the fastpath
      */
+    printf ("\n");
     printf ("static const FcLangCharSetRange  fcLangCharSetRanges[] = {\n");
+	printf ("\n");
     for (setRangeChar = 'a'; setRangeChar <= 'z' ; setRangeChar++)
     {
 	printf ("    { %d, %d }, /* %c */\n",
diff --git a/src/fclang.c b/src/fclang.c
index 90426a0..fe4674c 100644
--- a/src/fclang.c
+++ b/src/fclang.c
@@ -42,8 +42,8 @@ struct _FcLangSet {
     FcStrSet	*extra;
 };
 
-#define FcLangSetBitSet(ls, id)	((ls)->map[(id)>>5] |= ((FcChar32) 1 << ((id) & 0x1f)))
-#define FcLangSetBitGet(ls, id) (((ls)->map[(id)>>5] >> ((id) & 0x1f)) & 1)
+#define FcLangSetBitSet(ls, id)	((ls)->map[(fcLangCharSetIndices[id])>>5] |= ((FcChar32) 1 << ((fcLangCharSetIndices[id]) & 0x1f)))
+#define FcLangSetBitGet(ls, id) (((ls)->map[(fcLangCharSetIndices[id])>>5] >> ((fcLangCharSetIndices[id]) & 0x1f)) & 1)
 
 FcLangSet *
 FcFreeTypeLangSet (const FcCharSet  *charset, 
-- 
2.39.2