From fc990b2e86008967f499fe0df88de8e074a3670e Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Thu, 25 Oct 2007 14:20:06 -0700 Subject: [PATCH] Update CaseFolding.txt to Unicode 5.1.0 --- fc-case/CaseFolding.txt | 184 ++++++++++++++++++++++++++++++++++++++-- src/fcstr.c | 2 +- 2 files changed, 178 insertions(+), 8 deletions(-) diff --git a/fc-case/CaseFolding.txt b/fc-case/CaseFolding.txt index 65f78ff..f33cdda 100644 --- a/fc-case/CaseFolding.txt +++ b/fc-case/CaseFolding.txt @@ -1,10 +1,11 @@ -# CaseFolding-4.0.1.txt -# Date: 2004-03-02, 02:41:24 GMT [MD] +# CaseFolding-5.1.0.txt +# Date: 2007-04-26, 20:59:40 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2004 Unicode, Inc. +# Copyright (c) 1991-2007 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see UCD.html +# # Case Folding Properties # # This file is a supplement to the UnicodeData file. @@ -16,14 +17,15 @@ # The data supports both implementations that require simple case foldings # (where string lengths don't change), and implementations that allow full case folding # (where string lengths may grow). Note that where they can be supported, the -# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match. +# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match. # # All code points not listed in this file map to themselves. # # NOTE: case folding does not preserve normalization formats! # -# For information on case folding, see -# UTR #21 Case Mappings, at http://www.unicode.org/unicode/reports/tr21/ +# For information on case folding, including how to have case folding +# preserve normalization formats, see Section 3.13 Default Case Algorithms in +# The Unicode Standard, Version 5.0. # # ================================================================================ # Format @@ -50,7 +52,7 @@ # behavior. (The default option is to exclude them.) # # ================================================================= - +# @missing 0000..10FFFF; 0041; C; 0061; # LATIN CAPITAL LETTER A 0042; C; 0062; # LATIN CAPITAL LETTER B 0043; C; 0063; # LATIN CAPITAL LETTER C @@ -271,6 +273,19 @@ 022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE 0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON 0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON +023A; C; 2C65; # LATIN CAPITAL LETTER A WITH STROKE +023B; C; 023C; # LATIN CAPITAL LETTER C WITH STROKE +023D; C; 019A; # LATIN CAPITAL LETTER L WITH BAR +023E; C; 2C66; # LATIN CAPITAL LETTER T WITH DIAGONAL STROKE +0241; C; 0242; # LATIN CAPITAL LETTER GLOTTAL STOP +0243; C; 0180; # LATIN CAPITAL LETTER B WITH STROKE +0244; C; 0289; # LATIN CAPITAL LETTER U BAR +0245; C; 028C; # LATIN CAPITAL LETTER TURNED V +0246; C; 0247; # LATIN CAPITAL LETTER E WITH STROKE +0248; C; 0249; # LATIN CAPITAL LETTER J WITH STROKE +024A; C; 024B; # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL +024C; C; 024D; # LATIN CAPITAL LETTER R WITH STROKE +024E; C; 024F; # LATIN CAPITAL LETTER Y WITH STROKE 0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI 0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS 0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS @@ -331,6 +346,9 @@ 03F7; C; 03F8; # GREEK CAPITAL LETTER SHO 03F9; C; 03F2; # GREEK CAPITAL LUNATE SIGMA SYMBOL 03FA; C; 03FB; # GREEK CAPITAL LETTER SAN +03FD; C; 037B; # GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL +03FE; C; 037C; # GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL +03FF; C; 037D; # GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL 0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE 0401; C; 0451; # CYRILLIC CAPITAL LETTER IO 0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE @@ -423,6 +441,7 @@ 04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA 04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE 04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER +04C0; C; 04CF; # CYRILLIC LETTER PALOCHKA 04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE 04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK 04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL @@ -449,7 +468,11 @@ 04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS 04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE 04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS +04F6; C; 04F7; # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER 04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS +04FA; C; 04FB; # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK +04FC; C; 04FD; # CYRILLIC CAPITAL LETTER HA WITH HOOK +04FE; C; 04FF; # CYRILLIC CAPITAL LETTER HA WITH STROKE 0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE 0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE 0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE @@ -458,6 +481,8 @@ 050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE 050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE 050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE +0510; C; 0511; # CYRILLIC CAPITAL LETTER REVERSED ZE +0512; C; 0513; # CYRILLIC CAPITAL LETTER EL WITH HOOK 0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB 0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN 0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM @@ -497,6 +522,44 @@ 0555; C; 0585; # ARMENIAN CAPITAL LETTER OH 0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH 0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN +10A0; C; 2D00; # GEORGIAN CAPITAL LETTER AN +10A1; C; 2D01; # GEORGIAN CAPITAL LETTER BAN +10A2; C; 2D02; # GEORGIAN CAPITAL LETTER GAN +10A3; C; 2D03; # GEORGIAN CAPITAL LETTER DON +10A4; C; 2D04; # GEORGIAN CAPITAL LETTER EN +10A5; C; 2D05; # GEORGIAN CAPITAL LETTER VIN +10A6; C; 2D06; # GEORGIAN CAPITAL LETTER ZEN +10A7; C; 2D07; # GEORGIAN CAPITAL LETTER TAN +10A8; C; 2D08; # GEORGIAN CAPITAL LETTER IN +10A9; C; 2D09; # GEORGIAN CAPITAL LETTER KAN +10AA; C; 2D0A; # GEORGIAN CAPITAL LETTER LAS +10AB; C; 2D0B; # GEORGIAN CAPITAL LETTER MAN +10AC; C; 2D0C; # GEORGIAN CAPITAL LETTER NAR +10AD; C; 2D0D; # GEORGIAN CAPITAL LETTER ON +10AE; C; 2D0E; # GEORGIAN CAPITAL LETTER PAR +10AF; C; 2D0F; # GEORGIAN CAPITAL LETTER ZHAR +10B0; C; 2D10; # GEORGIAN CAPITAL LETTER RAE +10B1; C; 2D11; # GEORGIAN CAPITAL LETTER SAN +10B2; C; 2D12; # GEORGIAN CAPITAL LETTER TAR +10B3; C; 2D13; # GEORGIAN CAPITAL LETTER UN +10B4; C; 2D14; # GEORGIAN CAPITAL LETTER PHAR +10B5; C; 2D15; # GEORGIAN CAPITAL LETTER KHAR +10B6; C; 2D16; # GEORGIAN CAPITAL LETTER GHAN +10B7; C; 2D17; # GEORGIAN CAPITAL LETTER QAR +10B8; C; 2D18; # GEORGIAN CAPITAL LETTER SHIN +10B9; C; 2D19; # GEORGIAN CAPITAL LETTER CHIN +10BA; C; 2D1A; # GEORGIAN CAPITAL LETTER CAN +10BB; C; 2D1B; # GEORGIAN CAPITAL LETTER JIL +10BC; C; 2D1C; # GEORGIAN CAPITAL LETTER CIL +10BD; C; 2D1D; # GEORGIAN CAPITAL LETTER CHAR +10BE; C; 2D1E; # GEORGIAN CAPITAL LETTER XAN +10BF; C; 2D1F; # GEORGIAN CAPITAL LETTER JHAN +10C0; C; 2D20; # GEORGIAN CAPITAL LETTER HAE +10C1; C; 2D21; # GEORGIAN CAPITAL LETTER HE +10C2; C; 2D22; # GEORGIAN CAPITAL LETTER HIE +10C3; C; 2D23; # GEORGIAN CAPITAL LETTER WE +10C4; C; 2D24; # GEORGIAN CAPITAL LETTER HAR +10C5; C; 2D25; # GEORGIAN CAPITAL LETTER HOE 1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW 1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE 1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW @@ -802,6 +865,7 @@ 2126; C; 03C9; # OHM SIGN 212A; C; 006B; # KELVIN SIGN 212B; C; 00E5; # ANGSTROM SIGN +2132; C; 214E; # TURNED CAPITAL F 2160; C; 2170; # ROMAN NUMERAL ONE 2161; C; 2171; # ROMAN NUMERAL TWO 2162; C; 2172; # ROMAN NUMERAL THREE @@ -818,6 +882,7 @@ 216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED 216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED 216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND +2183; C; 2184; # ROMAN NUMERAL REVERSED ONE HUNDRED 24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A 24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B 24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C @@ -844,6 +909,111 @@ 24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X 24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y 24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z +2C00; C; 2C30; # GLAGOLITIC CAPITAL LETTER AZU +2C01; C; 2C31; # GLAGOLITIC CAPITAL LETTER BUKY +2C02; C; 2C32; # GLAGOLITIC CAPITAL LETTER VEDE +2C03; C; 2C33; # GLAGOLITIC CAPITAL LETTER GLAGOLI +2C04; C; 2C34; # GLAGOLITIC CAPITAL LETTER DOBRO +2C05; C; 2C35; # GLAGOLITIC CAPITAL LETTER YESTU +2C06; C; 2C36; # GLAGOLITIC CAPITAL LETTER ZHIVETE +2C07; C; 2C37; # GLAGOLITIC CAPITAL LETTER DZELO +2C08; C; 2C38; # GLAGOLITIC CAPITAL LETTER ZEMLJA +2C09; C; 2C39; # GLAGOLITIC CAPITAL LETTER IZHE +2C0A; C; 2C3A; # GLAGOLITIC CAPITAL LETTER INITIAL IZHE +2C0B; C; 2C3B; # GLAGOLITIC CAPITAL LETTER I +2C0C; C; 2C3C; # GLAGOLITIC CAPITAL LETTER DJERVI +2C0D; C; 2C3D; # GLAGOLITIC CAPITAL LETTER KAKO +2C0E; C; 2C3E; # GLAGOLITIC CAPITAL LETTER LJUDIJE +2C0F; C; 2C3F; # GLAGOLITIC CAPITAL LETTER MYSLITE +2C10; C; 2C40; # GLAGOLITIC CAPITAL LETTER NASHI +2C11; C; 2C41; # GLAGOLITIC CAPITAL LETTER ONU +2C12; C; 2C42; # GLAGOLITIC CAPITAL LETTER POKOJI +2C13; C; 2C43; # GLAGOLITIC CAPITAL LETTER RITSI +2C14; C; 2C44; # GLAGOLITIC CAPITAL LETTER SLOVO +2C15; C; 2C45; # GLAGOLITIC CAPITAL LETTER TVRIDO +2C16; C; 2C46; # GLAGOLITIC CAPITAL LETTER UKU +2C17; C; 2C47; # GLAGOLITIC CAPITAL LETTER FRITU +2C18; C; 2C48; # GLAGOLITIC CAPITAL LETTER HERU +2C19; C; 2C49; # GLAGOLITIC CAPITAL LETTER OTU +2C1A; C; 2C4A; # GLAGOLITIC CAPITAL LETTER PE +2C1B; C; 2C4B; # GLAGOLITIC CAPITAL LETTER SHTA +2C1C; C; 2C4C; # GLAGOLITIC CAPITAL LETTER TSI +2C1D; C; 2C4D; # GLAGOLITIC CAPITAL LETTER CHRIVI +2C1E; C; 2C4E; # GLAGOLITIC CAPITAL LETTER SHA +2C1F; C; 2C4F; # GLAGOLITIC CAPITAL LETTER YERU +2C20; C; 2C50; # GLAGOLITIC CAPITAL LETTER YERI +2C21; C; 2C51; # GLAGOLITIC CAPITAL LETTER YATI +2C22; C; 2C52; # GLAGOLITIC CAPITAL LETTER SPIDERY HA +2C23; C; 2C53; # GLAGOLITIC CAPITAL LETTER YU +2C24; C; 2C54; # GLAGOLITIC CAPITAL LETTER SMALL YUS +2C25; C; 2C55; # GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL +2C26; C; 2C56; # GLAGOLITIC CAPITAL LETTER YO +2C27; C; 2C57; # GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS +2C28; C; 2C58; # GLAGOLITIC CAPITAL LETTER BIG YUS +2C29; C; 2C59; # GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS +2C2A; C; 2C5A; # GLAGOLITIC CAPITAL LETTER FITA +2C2B; C; 2C5B; # GLAGOLITIC CAPITAL LETTER IZHITSA +2C2C; C; 2C5C; # GLAGOLITIC CAPITAL LETTER SHTAPIC +2C2D; C; 2C5D; # GLAGOLITIC CAPITAL LETTER TROKUTASTI A +2C2E; C; 2C5E; # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE +2C60; C; 2C61; # LATIN CAPITAL LETTER L WITH DOUBLE BAR +2C62; C; 026B; # LATIN CAPITAL LETTER L WITH MIDDLE TILDE +2C63; C; 1D7D; # LATIN CAPITAL LETTER P WITH STROKE +2C64; C; 027D; # LATIN CAPITAL LETTER R WITH TAIL +2C67; C; 2C68; # LATIN CAPITAL LETTER H WITH DESCENDER +2C69; C; 2C6A; # LATIN CAPITAL LETTER K WITH DESCENDER +2C6B; C; 2C6C; # LATIN CAPITAL LETTER Z WITH DESCENDER +2C75; C; 2C76; # LATIN CAPITAL LETTER HALF H +2C80; C; 2C81; # COPTIC CAPITAL LETTER ALFA +2C82; C; 2C83; # COPTIC CAPITAL LETTER VIDA +2C84; C; 2C85; # COPTIC CAPITAL LETTER GAMMA +2C86; C; 2C87; # COPTIC CAPITAL LETTER DALDA +2C88; C; 2C89; # COPTIC CAPITAL LETTER EIE +2C8A; C; 2C8B; # COPTIC CAPITAL LETTER SOU +2C8C; C; 2C8D; # COPTIC CAPITAL LETTER ZATA +2C8E; C; 2C8F; # COPTIC CAPITAL LETTER HATE +2C90; C; 2C91; # COPTIC CAPITAL LETTER THETHE +2C92; C; 2C93; # COPTIC CAPITAL LETTER IAUDA +2C94; C; 2C95; # COPTIC CAPITAL LETTER KAPA +2C96; C; 2C97; # COPTIC CAPITAL LETTER LAULA +2C98; C; 2C99; # COPTIC CAPITAL LETTER MI +2C9A; C; 2C9B; # COPTIC CAPITAL LETTER NI +2C9C; C; 2C9D; # COPTIC CAPITAL LETTER KSI +2C9E; C; 2C9F; # COPTIC CAPITAL LETTER O +2CA0; C; 2CA1; # COPTIC CAPITAL LETTER PI +2CA2; C; 2CA3; # COPTIC CAPITAL LETTER RO +2CA4; C; 2CA5; # COPTIC CAPITAL LETTER SIMA +2CA6; C; 2CA7; # COPTIC CAPITAL LETTER TAU +2CA8; C; 2CA9; # COPTIC CAPITAL LETTER UA +2CAA; C; 2CAB; # COPTIC CAPITAL LETTER FI +2CAC; C; 2CAD; # COPTIC CAPITAL LETTER KHI +2CAE; C; 2CAF; # COPTIC CAPITAL LETTER PSI +2CB0; C; 2CB1; # COPTIC CAPITAL LETTER OOU +2CB2; C; 2CB3; # COPTIC CAPITAL LETTER DIALECT-P ALEF +2CB4; C; 2CB5; # COPTIC CAPITAL LETTER OLD COPTIC AIN +2CB6; C; 2CB7; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE +2CB8; C; 2CB9; # COPTIC CAPITAL LETTER DIALECT-P KAPA +2CBA; C; 2CBB; # COPTIC CAPITAL LETTER DIALECT-P NI +2CBC; C; 2CBD; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI +2CBE; C; 2CBF; # COPTIC CAPITAL LETTER OLD COPTIC OOU +2CC0; C; 2CC1; # COPTIC CAPITAL LETTER SAMPI +2CC2; C; 2CC3; # COPTIC CAPITAL LETTER CROSSED SHEI +2CC4; C; 2CC5; # COPTIC CAPITAL LETTER OLD COPTIC SHEI +2CC6; C; 2CC7; # COPTIC CAPITAL LETTER OLD COPTIC ESH +2CC8; C; 2CC9; # COPTIC CAPITAL LETTER AKHMIMIC KHEI +2CCA; C; 2CCB; # COPTIC CAPITAL LETTER DIALECT-P HORI +2CCC; C; 2CCD; # COPTIC CAPITAL LETTER OLD COPTIC HORI +2CCE; C; 2CCF; # COPTIC CAPITAL LETTER OLD COPTIC HA +2CD0; C; 2CD1; # COPTIC CAPITAL LETTER L-SHAPED HA +2CD2; C; 2CD3; # COPTIC CAPITAL LETTER OLD COPTIC HEI +2CD4; C; 2CD5; # COPTIC CAPITAL LETTER OLD COPTIC HAT +2CD6; C; 2CD7; # COPTIC CAPITAL LETTER OLD COPTIC GANGIA +2CD8; C; 2CD9; # COPTIC CAPITAL LETTER OLD COPTIC DJA +2CDA; C; 2CDB; # COPTIC CAPITAL LETTER OLD COPTIC SHIMA +2CDC; C; 2CDD; # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA +2CDE; C; 2CDF; # COPTIC CAPITAL LETTER OLD NUBIAN NGI +2CE0; C; 2CE1; # COPTIC CAPITAL LETTER OLD NUBIAN NYI +2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL diff --git a/src/fcstr.c b/src/fcstr.c index 6068596..1e7b466 100644 --- a/src/fcstr.c +++ b/src/fcstr.c @@ -367,7 +367,7 @@ FcStrIsAtIgnoreCase (const FcChar8 *s1, const FcChar8 *s2) } /* - * Does s1 contain an instance of s2 (ignoring case)? + * Does s1 contain an instance of s2 (ignoring blanks and case)? */ const FcChar8 * -- 2.39.5