4 * Copyright © 2004 Keith Packard
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of Keith Packard not be used in
11 * advertising or publicity pertaining to distribution of the software without
12 * specific, written prior permission. Keith Packard makes no
13 * representations about the suitability of this software for any purpose. It
14 * is provided "as is" without express or implied warranty.
16 * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
18 * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
19 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
20 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
21 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
22 * PERFORMANCE OF THIS SOFTWARE.
31 /* stub definitions for declarations from fcint.h.. */
32 int * _fcBankId
= 0, * _fcBankIdx
= 0;
35 FcCacheBankToIndexMTF (int bank
)
39 /* end stub definitions */
41 typedef enum _caseFoldClass
{ CaseFoldCommon
, CaseFoldFull
, CaseFoldSimple
, CaseFoldTurkic
} CaseFoldClass
;
43 typedef struct _caseFoldClassMap
{
48 static const CaseFoldClassMap caseFoldClassMap
[] = {
49 { "C", CaseFoldCommon
},
50 { "F", CaseFoldFull
},
51 { "S", CaseFoldSimple
},
52 { "T", CaseFoldTurkic
},
56 typedef struct _caseFoldRaw
{
60 FcChar32 lower
[MAX_OUT
];
64 panic (const char *reason
)
66 fprintf (stderr
, "fc-case: panic %s\n", reason
);
71 static FcCaseFold
*folds
;
78 folds
= realloc (folds
, (nfolds
+ 1) * sizeof (FcCaseFold
));
80 folds
= malloc (sizeof (FcCaseFold
));
82 panic ("out of memory");
83 return &folds
[nfolds
++];
87 ucs4_to_utf8 (FcChar32 ucs4
,
88 FcChar8 dest
[FC_UTF8_MAX_LEN
])
93 if (ucs4
< 0x80) { *d
++= ucs4
; bits
= -6; }
94 else if (ucs4
< 0x800) { *d
++= ((ucs4
>> 6) & 0x1F) | 0xC0; bits
= 0; }
95 else if (ucs4
< 0x10000) { *d
++= ((ucs4
>> 12) & 0x0F) | 0xE0; bits
= 6; }
96 else if (ucs4
< 0x200000) { *d
++= ((ucs4
>> 18) & 0x07) | 0xF0; bits
= 12; }
97 else if (ucs4
< 0x4000000) { *d
++= ((ucs4
>> 24) & 0x03) | 0xF8; bits
= 18; }
98 else if (ucs4
< 0x80000000) { *d
++= ((ucs4
>> 30) & 0x01) | 0xFC; bits
= 24; }
101 for ( ; bits
>= 0; bits
-= 6) {
102 *d
++= ((ucs4
>> bits
) & 0x3F) | 0x80;
108 utf8_size (FcChar32 ucs4
)
110 FcChar8 utf8
[FC_UTF8_MAX_LEN
];
111 return ucs4_to_utf8 (ucs4
, utf8
);
114 static FcChar8
*foldChars
;
117 FcChar32 minFoldChar
;
118 FcChar32 maxFoldChar
;
123 FcChar8 utf8
[FC_UTF8_MAX_LEN
];
127 len
= ucs4_to_utf8 (c
, utf8
);
129 foldChars
= realloc (foldChars
, (nfoldChars
+ len
) * sizeof (FcChar8
));
131 foldChars
= malloc (sizeof (FcChar8
) * len
);
133 panic ("out of memory");
134 for (i
= 0; i
< len
; i
++)
135 foldChars
[nfoldChars
+ i
] = utf8
[i
];
140 foldExtends (FcCaseFold
*fold
, CaseFoldRaw
*raw
)
142 switch (fold
->method
) {
143 case FC_CASE_FOLD_RANGE
:
144 if ((short) (raw
->lower
[0] - raw
->upper
) != fold
->offset
)
146 if (raw
->upper
!= fold
->upper
+ fold
->count
)
149 case FC_CASE_FOLD_EVEN_ODD
:
150 if ((short) (raw
->lower
[0] - raw
->upper
) != 1)
152 if (raw
->upper
!= fold
->upper
+ fold
->count
+ 1)
155 case FC_CASE_FOLD_FULL
:
162 case_fold_method_name (FcChar16 method
)
165 case FC_CASE_FOLD_RANGE
: return "FC_CASE_FOLD_RANGE,";
166 case FC_CASE_FOLD_EVEN_ODD
: return "FC_CASE_FOLD_EVEN_ODD,";
167 case FC_CASE_FOLD_FULL
: return "FC_CASE_FOLD_FULL,";
168 default: return "unknown";
177 printf ( "#define FC_NUM_CASE_FOLD %d\n", nfolds
);
178 printf ( "#define FC_NUM_CASE_FOLD_CHARS %d\n", nfoldChars
);
179 printf ( "#define FC_MAX_CASE_FOLD_CHARS %d\n", maxFoldChars
);
180 printf ( "#define FC_MAX_CASE_FOLD_EXPAND %d\n", maxExpand
);
181 printf ( "#define FC_MIN_FOLD_CHAR 0x%08x\n", minFoldChar
);
182 printf ( "#define FC_MAX_FOLD_CHAR 0x%08x\n", maxFoldChar
);
188 printf ("static const FcCaseFold fcCaseFold[FC_NUM_CASE_FOLD] = {\n");
189 for (i
= 0; i
< nfolds
; i
++)
191 printf (" { 0x%08x, %-22s 0x%04x, %6d },\n",
192 folds
[i
].upper
, case_fold_method_name (folds
[i
].method
),
193 folds
[i
].count
, folds
[i
].offset
);
198 * Dump out "other" values
201 printf ("static const FcChar8 fcCaseFoldChars[FC_NUM_CASE_FOLD_CHARS] = {\n");
202 for (i
= 0; i
< nfoldChars
; i
++)
204 printf ("0x%02x", foldChars
[i
]);
205 if (i
!= nfoldChars
- 1)
207 if ((i
& 0xf) == 0xf)
217 * Read the standard Unicode CaseFolding.txt file
222 parseRaw (char *line
, CaseFoldRaw
*raw
)
227 if (!isxdigit (line
[0]))
230 * Get upper case value
232 tok
= strtok (line
, SEP
);
233 if (!tok
|| tok
[0] == '#')
235 raw
->upper
= strtol (tok
, &end
, 16);
241 tok
= strtok (NULL
, SEP
);
242 if (!tok
|| tok
[0] == '#')
244 for (i
= 0; caseFoldClassMap
[i
].name
; i
++)
245 if (!strcmp (tok
, caseFoldClassMap
[i
].name
))
247 raw
->class = caseFoldClassMap
[i
].class;
250 if (!caseFoldClassMap
[i
].name
)
254 * Get list of result characters
256 for (i
= 0; i
< MAX_OUT
; i
++)
258 tok
= strtok (NULL
, SEP
);
259 if (!tok
|| tok
[0] == '#')
261 raw
->lower
[i
] = strtol (tok
, &end
, 16);
272 caseFoldReadRaw (FILE *in
, CaseFoldRaw
*raw
)
278 if (!fgets (line
, sizeof (line
) - 1, in
))
280 if (parseRaw (line
, raw
))
286 main (int argc
, char **argv
)
288 FcCaseFold
*fold
= 0;
296 panic ("usage: fc-case CaseFolding.txt");
297 caseFile
= fopen (argv
[1], "r");
299 panic ("can't open case folding file");
301 while (caseFoldReadRaw (caseFile
, &raw
))
304 minFoldChar
= raw
.upper
;
305 maxFoldChar
= raw
.upper
;
311 if (fold
&& foldExtends (fold
, &raw
))
312 fold
->count
= raw
.upper
- fold
->upper
+ 1;
316 fold
->upper
= raw
.upper
;
317 fold
->offset
= raw
.lower
[0] - raw
.upper
;
318 if (fold
->offset
== 1)
319 fold
->method
= FC_CASE_FOLD_EVEN_ODD
;
321 fold
->method
= FC_CASE_FOLD_RANGE
;
324 expand
= utf8_size (raw
.lower
[0]) - utf8_size(raw
.upper
);
329 fold
->upper
= raw
.upper
;
330 fold
->method
= FC_CASE_FOLD_FULL
;
331 fold
->offset
= nfoldChars
;
332 for (i
= 0; i
< raw
.nout
; i
++)
333 addChar (raw
.lower
[i
]);
334 fold
->count
= nfoldChars
- fold
->offset
;
335 if (fold
->count
> maxFoldChars
)
336 maxFoldChars
= fold
->count
;
337 expand
= fold
->count
- utf8_size (raw
.upper
);
339 if (expand
> maxExpand
)
349 * Scan the input until the marker is found
352 while (fgets (line
, sizeof (line
), stdin
))
354 if (!strncmp (line
, "@@@", 3))
356 fputs (line
, stdout
);
365 * And flush out the rest of the input file
368 while (fgets (line
, sizeof (line
), stdin
))
369 fputs (line
, stdout
);
372 exit (ferror (stdout
));