From: David Lawrence Ramsey Date: Tue, 21 Jun 2005 03:26:58 +0000 (+0000) Subject: add a multibyte case-insensitive version of strcoll(), and make X-Git-Tag: v1.3.8~60 X-Git-Url: https://git.wh0rd.org/?a=commitdiff_plain;h=d0c4d378b40102e1f43e049dbbab60295a50147b;p=nano.git add a multibyte case-insensitive version of strcoll(), and make diralphasort() use it so that filenames are sorted properly according to the current locale git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2743 35c25a1d-7b9e-4130-9fde-d3aeb78583b8 --- diff --git a/ChangeLog b/ChangeLog index dc8c24bf..66407c96 100644 --- a/ChangeLog +++ b/ChangeLog @@ -60,10 +60,13 @@ CVS code - wrappers to take wint_t instead of wchar_t to match the functions they wrap; rename some functions for consistency; add functions to detect blank characters in a string, for use - in rcfile option parsing; and don't count matches between - valid and invalid multibyte sequences anymore, as it causes - problems when doing a replace. New functions - is_valid_mbstring(), has_blank_chars(), and + in rcfile option parsing; add functions to case-insensitively + compare strings while taking the current locale into account, + for use in sorting filename lists; and don't count matches + between valid and invalid multibyte sequences anymore, as it + causes problems when doing a replace. New functions + nstrcasecoll(), nstrcasecoll(), mbstrcasecoll(), + mbstrncasecoll(), is_valid_mbstring(), has_blank_chars(), and has_blank_mbchars(); changes to is_alnum_mbchar(), is_blank_char() (renamed nisblank()), is_blank_mbchar(), is_blank_wchar() (renamed niswblank()), is_cntrl_wchar(), @@ -154,8 +157,9 @@ CVS code - long instead of an unsigned int. (DLR) - Declare the size_t i only in the loop where it's used. (DLR) diralphasort() - - Use mbstrcasecmp() instead of strcasecmp(), so that UTF-8 - filenames are sorted properly. (DLR) + - Use mbstrcasecoll() instead of strcasecmp(), so that UTF-8 + filenames are sorted properly according to the current + locale. (DLR) cwd_tab_completion(), browser_init() - Rename variable next to nextdir to avoid confusion. (DLR) input_tab() @@ -354,7 +358,8 @@ CVS code - Weinehall) - Don't refer to the built-in file browser as crappy anymore. (DLR) - - Check for iswpunct() and mbstowcs(). (DLR) + - Check for iswpunct(), mbstowcs(), strcasecoll(), and + strncasecoll(). (DLR) - Change the behavior of --enable-extra to only define NANO_EXTRA, instead of defining both it and ENABLE_MULTIBUFFER. (DLR) diff --git a/configure.ac b/configure.ac index 9c99de43..bf1890e7 100644 --- a/configure.ac +++ b/configure.ac @@ -395,7 +395,7 @@ int main(void) dnl Checks for functions. -AC_CHECK_FUNCS(getdelim getline isblank snprintf strcasecmp strcasestr strncasecmp strnlen vsnprintf) +AC_CHECK_FUNCS(getdelim getline isblank snprintf strcasecmp strcasecoll strcasestr strncasecmp strncasecoll strnlen vsnprintf) if test x$enable_utf8 != xno; then AC_CHECK_FUNCS(iswalnum iswblank iswpunct iswspace mblen mbstowcs mbtowc wctomb wcwidth) diff --git a/src/chars.c b/src/chars.c index f0fc67d0..92c2e5dd 100644 --- a/src/chars.c +++ b/src/chars.c @@ -512,6 +512,119 @@ int mbstrncasecmp(const char *s1, const char *s2, size_t n) return strncasecmp(s1, s2, n); } +#if !defined(DISABLE_TABCOMP) || !defined(DISABLE_BROWSER) +#ifndef HAVE_STRCASECOLL +/* This function is equivalent to a case-insensitive strcoll(). */ +int nstrcasecoll(const char *s1, const char *s2) +{ + return strncasecoll(s1, s2, (size_t)-1); +} +#endif + +#ifndef HAVE_STRNCASECOLL +/* This function is equivalent to a case-insensitive strcoll() for the + * first n characters of s1 and s2. */ +int nstrncasecoll(const char *s1, const char *s2, size_t n) +{ + int retval = 0; + char t1[2] = {'\0', '\0'}, t2[2] = {'\0', '\0'}; + + assert(s1 != NULL && s2 != NULL); + + for (; n > 0 && *s1 != '\0' && *s2 != '\0'; n--, s1++, s2++) { + t1[0] = tolower(*s1); + t2[0] = tolower(*s2); + + if ((retval = strcoll(t1, t2)) != 0) + break; + } + + if (n > 0) + return retval; + else + return 0; +} +#endif + +/* This function is equivalent to a case-insensitive strcoll() for + * multibyte strings. */ +int mbstrcasecoll(const char *s1, const char *s2) +{ + return mbstrncasecoll(s1, s2, (size_t)-1); +} + +/* This function is equivalent to a case-insensitive strcoll() for the + * first n characters of multibyte strings. */ +int mbstrncasecoll(const char *s1, const char *s2, size_t n) +{ +#ifdef NANO_WIDE + if (ISSET(USE_UTF8)) { + int retval = 0; + char *s1_mb = charalloc(MB_CUR_MAX + 1); + char *s2_mb = charalloc(MB_CUR_MAX + 1); + wchar_t ws1, ws2; + + assert(s1 != NULL && s2 != NULL); + + while (n > 0 && *s1 != '\0' && *s2 != '\0') { + bool bad_s1_mb = FALSE, bad_s2_mb = FALSE; + int s1_mb_len, s2_mb_len; + + s1_mb_len = parse_mbchar(s1, s1_mb, NULL, NULL); + + if (mbtowc(&ws1, s1_mb, s1_mb_len) <= 0) { + mbtowc(NULL, NULL, 0); + s1_mb[0] = *s1; + s1_mb[1] = '\0'; + bad_s1_mb = TRUE; + } else { + s1_mb_len = wctomb(s1_mb, towlower(ws1)); + + if (s1_mb_len <= 0) { + wctomb(NULL, 0); + s1_mb_len = 0; + } + + s1_mb[s1_mb_len] = '\0'; + } + + s2_mb_len = parse_mbchar(s2, s2_mb, NULL, NULL); + + if (mbtowc(&ws2, s2_mb, s2_mb_len) <= 0) { + mbtowc(NULL, NULL, 0); + s2_mb[0] = *s2; + s2_mb[1] = '\0'; + bad_s2_mb = TRUE; + } else { + s2_mb_len = wctomb(s2_mb, towlower(ws2)); + + if (s2_mb_len <= 0) { + wctomb(NULL, 0); + s2_mb_len = 0; + } + + s2_mb[s2_mb_len] = '\0'; + } + + if (n == 0 || bad_s1_mb != bad_s2_mb || + (retval = strcoll(s1_mb, s2_mb)) != 0) + break; + + s1 += s1_mb_len; + s2 += s2_mb_len; + n--; + } + + free(s1_mb); + free(s2_mb); + + return retval; + } else +#endif + return strncasecoll(s1, s2, n); +} +#endif /* !DISABLE_TABCOMP || !DISABLE_BROWSER */ + #ifndef HAVE_STRCASESTR /* This function is equivalent to strcasestr(). It was adapted from * mutt's mutt_stristr() function. */ diff --git a/src/files.c b/src/files.c index fddedc7d..7d32f071 100644 --- a/src/files.c +++ b/src/files.c @@ -1994,7 +1994,8 @@ char *real_dir_from_tilde(const char *buf) #if !defined(DISABLE_TABCOMP) || !defined(DISABLE_BROWSER) /* Our sort routine for file listings. Sort alphabetically and - * case-insensitively, and sort directories before filenames. */ + * case-insensitively (taking the locale into account), and sort + * directories before filenames. */ int diralphasort(const void *va, const void *vb) { struct stat fileinfo; @@ -2008,7 +2009,7 @@ int diralphasort(const void *va, const void *vb) if (!aisdir && bisdir) return 1; - return mbstrcasecmp(a, b); + return mbstrcasecoll(a, b); } /* Free the memory allocated for array, which should contain len diff --git a/src/nano.h b/src/nano.h index df92597c..7ba24bf2 100644 --- a/src/nano.h +++ b/src/nano.h @@ -120,6 +120,12 @@ #ifndef HAVE_STRNCASECMP #define strncasecmp nstrncasecmp #endif +#ifndef HAVE_STRCASECOLL +#define strcasecoll nstrcasecoll +#endif +#ifndef HAVE_STRNCASECOLL +#define strncasecoll nstrncasecoll +#endif #ifndef HAVE_STRCASESTR #define strcasestr nstrcasestr #endif diff --git a/src/proto.h b/src/proto.h index 6465bf59..4ceccb76 100644 --- a/src/proto.h +++ b/src/proto.h @@ -195,6 +195,16 @@ int mbstrcasecmp(const char *s1, const char *s2); int nstrncasecmp(const char *s1, const char *s2, size_t n); #endif int mbstrncasecmp(const char *s1, const char *s2, size_t n); +#if !defined(DISABLE_TABCOMP) || !defined(DISABLE_BROWSER) +#ifndef HAVE_STRCASECOLL +int nstrcasecoll(const char *s1, const char *s2); +#endif +#ifndef HAVE_STRNCASECOLL +int nstrncasecoll(const char *s1, const char *s2, size_t n); +#endif +int mbstrcasecoll(const char *s1, const char *s2); +int mbstrncasecoll(const char *s1, const char *s2, size_t n); +#endif #ifndef HAVE_STRCASESTR const char *nstrcasestr(const char *haystack, const char *needle); #endif