4 * Copyright © 2004 Keith Packard
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of Keith Packard not be used in
11 * advertising or publicity pertaining to distribution of the software without
12 * specific, written prior permission. Keith Packard makes no
13 * representations about the suitability of this software for any purpose. It
14 * is provided "as is" without express or implied warranty.
16 * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
18 * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
19 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
20 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
21 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
22 * PERFORMANCE OF THIS SOFTWARE.
31 typedef enum _caseFoldClass
{ CaseFoldCommon
, CaseFoldFull
, CaseFoldSimple
, CaseFoldTurkic
} CaseFoldClass
;
33 typedef struct _caseFoldClassMap
{
38 static const CaseFoldClassMap caseFoldClassMap
[] = {
39 { "C", CaseFoldCommon
},
40 { "F", CaseFoldFull
},
41 { "S", CaseFoldSimple
},
42 { "T", CaseFoldTurkic
},
46 typedef struct _caseFoldRaw
{
50 FcChar32 lower
[MAX_OUT
];
54 panic (const char *reason
)
56 fprintf (stderr
, "fc-case: panic %s\n", reason
);
61 static FcCaseFold
*folds
;
68 folds
= realloc (folds
, (nfolds
+ 1) * sizeof (FcCaseFold
));
70 folds
= malloc (sizeof (FcCaseFold
));
72 panic ("out of memory");
73 return &folds
[nfolds
++];
77 ucs4_to_utf8 (FcChar32 ucs4
,
78 FcChar8 dest
[FC_UTF8_MAX_LEN
])
83 if (ucs4
< 0x80) { *d
++= ucs4
; bits
= -6; }
84 else if (ucs4
< 0x800) { *d
++= ((ucs4
>> 6) & 0x1F) | 0xC0; bits
= 0; }
85 else if (ucs4
< 0x10000) { *d
++= ((ucs4
>> 12) & 0x0F) | 0xE0; bits
= 6; }
86 else if (ucs4
< 0x200000) { *d
++= ((ucs4
>> 18) & 0x07) | 0xF0; bits
= 12; }
87 else if (ucs4
< 0x4000000) { *d
++= ((ucs4
>> 24) & 0x03) | 0xF8; bits
= 18; }
88 else if (ucs4
< 0x80000000) { *d
++= ((ucs4
>> 30) & 0x01) | 0xFC; bits
= 24; }
91 for ( ; bits
>= 0; bits
-= 6) {
92 *d
++= ((ucs4
>> bits
) & 0x3F) | 0x80;
98 utf8_size (FcChar32 ucs4
)
100 FcChar8 utf8
[FC_UTF8_MAX_LEN
];
101 return ucs4_to_utf8 (ucs4
, utf8
);
104 static FcChar8
*foldChars
;
107 FcChar32 minFoldChar
;
108 FcChar32 maxFoldChar
;
113 FcChar8 utf8
[FC_UTF8_MAX_LEN
];
117 len
= ucs4_to_utf8 (c
, utf8
);
119 foldChars
= realloc (foldChars
, (nfoldChars
+ len
) * sizeof (FcChar8
));
121 foldChars
= malloc (sizeof (FcChar8
) * len
);
123 panic ("out of memory");
124 for (i
= 0; i
< len
; i
++)
125 foldChars
[nfoldChars
+ i
] = utf8
[i
];
130 foldExtends (FcCaseFold
*fold
, CaseFoldRaw
*raw
)
132 switch (fold
->method
) {
133 case FC_CASE_FOLD_RANGE
:
134 if ((short) (raw
->lower
[0] - raw
->upper
) != fold
->offset
)
136 if (raw
->upper
!= fold
->upper
+ fold
->count
)
139 case FC_CASE_FOLD_EVEN_ODD
:
140 if ((short) (raw
->lower
[0] - raw
->upper
) != 1)
142 if (raw
->upper
!= fold
->upper
+ fold
->count
+ 1)
145 case FC_CASE_FOLD_FULL
:
152 case_fold_method_name (FcChar16 method
)
155 case FC_CASE_FOLD_RANGE
: return "FC_CASE_FOLD_RANGE,";
156 case FC_CASE_FOLD_EVEN_ODD
: return "FC_CASE_FOLD_EVEN_ODD,";
157 case FC_CASE_FOLD_FULL
: return "FC_CASE_FOLD_FULL,";
158 default: return "unknown";
167 printf ( "#define FC_NUM_CASE_FOLD %d\n", nfolds
);
168 printf ( "#define FC_NUM_CASE_FOLD_CHARS %d\n", nfoldChars
);
169 printf ( "#define FC_MAX_CASE_FOLD_CHARS %d\n", maxFoldChars
);
170 printf ( "#define FC_MAX_CASE_FOLD_EXPAND %d\n", maxExpand
);
171 printf ( "#define FC_MIN_FOLD_CHAR 0x%08x\n", minFoldChar
);
172 printf ( "#define FC_MAX_FOLD_CHAR 0x%08x\n", maxFoldChar
);
178 printf ("static const FcCaseFold fcCaseFold[FC_NUM_CASE_FOLD] = {\n");
179 for (i
= 0; i
< nfolds
; i
++)
181 printf (" { 0x%08x, %-22s 0x%04x, %6d },\n",
182 folds
[i
].upper
, case_fold_method_name (folds
[i
].method
),
183 folds
[i
].count
, folds
[i
].offset
);
188 * Dump out "other" values
191 printf ("static const FcChar8 fcCaseFoldChars[FC_NUM_CASE_FOLD_CHARS] = {\n");
192 for (i
= 0; i
< nfoldChars
; i
++)
194 printf ("0x%02x", foldChars
[i
]);
195 if (i
!= nfoldChars
- 1)
197 if ((i
& 0xf) == 0xf)
207 * Read the standard Unicode CaseFolding.txt file
212 parseRaw (char *line
, CaseFoldRaw
*raw
)
217 if (!isxdigit (line
[0]))
220 * Get upper case value
222 tok
= strtok (line
, SEP
);
223 if (!tok
|| tok
[0] == '#')
225 raw
->upper
= strtol (tok
, &end
, 16);
231 tok
= strtok (NULL
, SEP
);
232 if (!tok
|| tok
[0] == '#')
234 for (i
= 0; caseFoldClassMap
[i
].name
; i
++)
235 if (!strcmp (tok
, caseFoldClassMap
[i
].name
))
237 raw
->class = caseFoldClassMap
[i
].class;
240 if (!caseFoldClassMap
[i
].name
)
244 * Get list of result characters
246 for (i
= 0; i
< MAX_OUT
; i
++)
248 tok
= strtok (NULL
, SEP
);
249 if (!tok
|| tok
[0] == '#')
251 raw
->lower
[i
] = strtol (tok
, &end
, 16);
262 caseFoldReadRaw (FILE *in
, CaseFoldRaw
*raw
)
268 if (!fgets (line
, sizeof (line
) - 1, in
))
270 if (parseRaw (line
, raw
))
276 main (int argc
, char **argv
)
278 FcCaseFold
*fold
= 0;
286 panic ("usage: fc-case CaseFolding.txt");
287 caseFile
= fopen (argv
[1], "r");
289 panic ("can't open case folding file");
291 while (caseFoldReadRaw (caseFile
, &raw
))
294 minFoldChar
= raw
.upper
;
295 maxFoldChar
= raw
.upper
;
301 if (fold
&& foldExtends (fold
, &raw
))
302 fold
->count
= raw
.upper
- fold
->upper
+ 1;
306 fold
->upper
= raw
.upper
;
307 fold
->offset
= raw
.lower
[0] - raw
.upper
;
308 if (fold
->offset
== 1)
309 fold
->method
= FC_CASE_FOLD_EVEN_ODD
;
311 fold
->method
= FC_CASE_FOLD_RANGE
;
314 expand
= utf8_size (raw
.lower
[0]) - utf8_size(raw
.upper
);
319 fold
->upper
= raw
.upper
;
320 fold
->method
= FC_CASE_FOLD_FULL
;
321 fold
->offset
= nfoldChars
;
322 for (i
= 0; i
< raw
.nout
; i
++)
323 addChar (raw
.lower
[i
]);
324 fold
->count
= nfoldChars
- fold
->offset
;
325 if (fold
->count
> maxFoldChars
)
326 maxFoldChars
= fold
->count
;
327 expand
= fold
->count
- utf8_size (raw
.upper
);
329 if (expand
> maxExpand
)
339 * Scan the input until the marker is found
342 while (fgets (line
, sizeof (line
), stdin
))
344 if (!strncmp (line
, "@@@", 3))
346 fputs (line
, stdout
);
355 * And flush out the rest of the input file
358 while (fgets (line
, sizeof (line
), stdin
))
359 fputs (line
, stdout
);
362 exit (ferror (stdout
));