add a multibyte case-insensitive version of strcoll(), and make

author David Lawrence Ramsey <pooka109@gmail.com>

Tue, 21 Jun 2005 03:26:58 +0000 (03:26 +0000)

committer David Lawrence Ramsey <pooka109@gmail.com>

Tue, 21 Jun 2005 03:26:58 +0000 (03:26 +0000)
author David Lawrence Ramsey <pooka109@gmail.com>
Tue, 21 Jun 2005 03:26:58 +0000 (03:26 +0000)
committer David Lawrence Ramsey <pooka109@gmail.com>
Tue, 21 Jun 2005 03:26:58 +0000 (03:26 +0000)
diff --git a/ChangeLog b/ChangeLog

index dc8c24bf16668e011cfb1eabde7a3deda7b4c516..66407c9695d9f04435aaa4ab28cd778272ad20f6 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -60,10 +60,13 @@ CVS code -
           wrappers to take wint_t instead of wchar_t to match the
           functions they wrap; rename some functions for consistency;
           add functions to detect blank characters in a string, for use
-         in rcfile option parsing; and don't count matches between
-         valid and invalid multibyte sequences anymore, as it causes
-         problems when doing a replace.  New functions
-         is_valid_mbstring(), has_blank_chars(), and
+         in rcfile option parsing; add functions to case-insensitively
+         compare strings while taking the current locale into account,
+         for use in sorting filename lists; and don't count matches
+         between valid and invalid multibyte sequences anymore, as it
+         causes problems when doing a replace.  New functions
+         nstrcasecoll(), nstrcasecoll(), mbstrcasecoll(),
+         mbstrncasecoll(), is_valid_mbstring(), has_blank_chars(), and
           has_blank_mbchars(); changes to is_alnum_mbchar(),
           is_blank_char() (renamed nisblank()), is_blank_mbchar(),
           is_blank_wchar() (renamed niswblank()), is_cntrl_wchar(),
@@ -154,8 +157,9 @@ CVS code -
           long instead of an unsigned int. (DLR)
         - Declare the size_t i only in the loop where it's used. (DLR)
    diralphasort()
-       - Use mbstrcasecmp() instead of strcasecmp(), so that UTF-8
-         filenames are sorted properly. (DLR)
+       - Use mbstrcasecoll() instead of strcasecmp(), so that UTF-8
+         filenames are sorted properly according to the current
+         locale. (DLR)
    cwd_tab_completion(), browser_init()
         - Rename variable next to nextdir to avoid confusion. (DLR)
    input_tab()
@@ -354,7 +358,8 @@ CVS code -
           Weinehall)
         - Don't refer to the built-in file browser as crappy anymore.
           (DLR)
-       - Check for iswpunct() and mbstowcs(). (DLR)
+       - Check for iswpunct(), mbstowcs(), strcasecoll(), and
+         strncasecoll(). (DLR)
         - Change the behavior of --enable-extra to only define
           NANO_EXTRA, instead of defining both it and
           ENABLE_MULTIBUFFER. (DLR)
diff --git a/configure.ac b/configure.ac

index 9c99de437d69d55cbb27507c44c25681994ddbf4..bf1890e7f8cb30211dad4684e096149d81131310 100644 (file)
--- a/configure.ac
+++ b/configure.ac
@@ -395,7 +395,7 @@ int main(void)
  
  dnl Checks for functions.
  
-AC_CHECK_FUNCS(getdelim getline isblank snprintf strcasecmp strcasestr strncasecmp strnlen vsnprintf)
+AC_CHECK_FUNCS(getdelim getline isblank snprintf strcasecmp strcasecoll strcasestr strncasecmp strncasecoll strnlen vsnprintf)
  
  if test x$enable_utf8 != xno; then
      AC_CHECK_FUNCS(iswalnum iswblank iswpunct iswspace mblen mbstowcs mbtowc wctomb wcwidth)
diff --git a/src/chars.c b/src/chars.c

index f0fc67d062e41d003916a7c69a1dc6e4f5d944b0..92c2e5dde729d02d2f9517fbc1a5d43d43879512 100644 (file)
--- a/src/chars.c
+++ b/src/chars.c
@@ -512,6 +512,119 @@ int mbstrncasecmp(const char *s1, const char *s2, size_t n)
         return strncasecmp(s1, s2, n);
  }
  
+#if !defined(DISABLE_TABCOMP) || !defined(DISABLE_BROWSER)
+#ifndef HAVE_STRCASECOLL
+/* This function is equivalent to a case-insensitive strcoll(). */
+int nstrcasecoll(const char *s1, const char *s2)
+{
+    return strncasecoll(s1, s2, (size_t)-1);
+}
+#endif
+
+#ifndef HAVE_STRNCASECOLL
+/* This function is equivalent to a case-insensitive strcoll() for the
+ * first n characters of s1 and s2. */
+int nstrncasecoll(const char *s1, const char *s2, size_t n)
+{
+    int retval = 0;
+    char t1[2] = {'\0', '\0'}, t2[2] = {'\0', '\0'};
+
+    assert(s1 != NULL && s2 != NULL);
+
+    for (; n > 0 && *s1 != '\0' && *s2 != '\0'; n--, s1++, s2++) {
+       t1[0] = tolower(*s1);
+       t2[0] = tolower(*s2);
+
+       if ((retval = strcoll(t1, t2)) != 0)
+           break;
+    }
+
+    if (n > 0)
+       return retval;
+    else
+       return 0;
+}
+#endif
+
+/* This function is equivalent to a case-insensitive strcoll() for
+ * multibyte strings. */
+int mbstrcasecoll(const char *s1, const char *s2)
+{
+    return mbstrncasecoll(s1, s2, (size_t)-1);
+}
+
+/* This function is equivalent to a case-insensitive strcoll() for the
+ * first n characters of multibyte strings. */
+int mbstrncasecoll(const char *s1, const char *s2, size_t n)
+{
+#ifdef NANO_WIDE
+    if (ISSET(USE_UTF8)) {
+       int retval = 0;
+       char *s1_mb = charalloc(MB_CUR_MAX + 1);
+       char *s2_mb = charalloc(MB_CUR_MAX + 1);
+       wchar_t ws1, ws2;
+
+       assert(s1 != NULL && s2 != NULL);
+
+       while (n > 0 && *s1 != '\0' && *s2 != '\0') {
+           bool bad_s1_mb = FALSE, bad_s2_mb = FALSE;
+           int s1_mb_len, s2_mb_len;
+
+           s1_mb_len = parse_mbchar(s1, s1_mb, NULL, NULL);
+
+           if (mbtowc(&ws1, s1_mb, s1_mb_len) <= 0) {
+               mbtowc(NULL, NULL, 0);
+               s1_mb[0] = *s1;
+               s1_mb[1] = '\0';
+               bad_s1_mb = TRUE;
+           } else {
+               s1_mb_len = wctomb(s1_mb, towlower(ws1));
+
+               if (s1_mb_len <= 0) {
+                   wctomb(NULL, 0);
+                   s1_mb_len = 0;
+               }
+
+               s1_mb[s1_mb_len] = '\0';
+           }
+
+           s2_mb_len = parse_mbchar(s2, s2_mb, NULL, NULL);
+
+           if (mbtowc(&ws2, s2_mb, s2_mb_len) <= 0) {
+               mbtowc(NULL, NULL, 0);
+               s2_mb[0] = *s2;
+               s2_mb[1] = '\0';
+               bad_s2_mb = TRUE;
+           } else {
+               s2_mb_len = wctomb(s2_mb, towlower(ws2));
+
+               if (s2_mb_len <= 0) {
+                   wctomb(NULL, 0);
+                   s2_mb_len = 0;
+               }
+
+               s2_mb[s2_mb_len] = '\0';
+           }
+
+           if (n == 0 || bad_s1_mb != bad_s2_mb ||
+               (retval = strcoll(s1_mb, s2_mb)) != 0)
+               break;
+
+           s1 += s1_mb_len;
+           s2 += s2_mb_len;
+           n--;
+       }
+
+       free(s1_mb);
+       free(s2_mb);
+
+       return retval;
+    } else
+#endif
+       return strncasecoll(s1, s2, n);
+}
+#endif /* !DISABLE_TABCOMP || !DISABLE_BROWSER */
+
  #ifndef HAVE_STRCASESTR
  /* This function is equivalent to strcasestr().  It was adapted from
   * mutt's mutt_stristr() function. */
diff --git a/src/files.c b/src/files.c

index fddedc7d406098793f5fea146fc4e563702bfc02..7d32f0712d4034391d14a7ca404ff0bcebbda012 100644 (file)
--- a/src/files.c
+++ b/src/files.c
@@ -1994,7 +1994,8 @@ char *real_dir_from_tilde(const char *buf)
  
  #if !defined(DISABLE_TABCOMP) || !defined(DISABLE_BROWSER)
  /* Our sort routine for file listings.  Sort alphabetically and
- * case-insensitively, and sort directories before filenames. */
+ * case-insensitively (taking the locale into account), and sort
+ * directories before filenames. */
  int diralphasort(const void *va, const void *vb)
  {
      struct stat fileinfo;
@@ -2008,7 +2009,7 @@ int diralphasort(const void *va, const void *vb)
      if (!aisdir && bisdir)
         return 1;
  
-    return mbstrcasecmp(a, b);
+    return mbstrcasecoll(a, b);
  }
  
  /* Free the memory allocated for array, which should contain len
diff --git a/src/nano.h b/src/nano.h

index df92597c5fdf3f194645d39517ddc0fdb41a3816..7ba24bf2486e643cd5d894128d9e9bf33db774eb 100644 (file)
--- a/src/nano.h
+++ b/src/nano.h
@@ -120,6 +120,12 @@
  #ifndef HAVE_STRNCASECMP
  #define strncasecmp nstrncasecmp
  #endif
+#ifndef HAVE_STRCASECOLL
+#define strcasecoll nstrcasecoll
+#endif
+#ifndef HAVE_STRNCASECOLL
+#define strncasecoll nstrncasecoll
+#endif
  #ifndef HAVE_STRCASESTR
  #define strcasestr nstrcasestr
  #endif
diff --git a/src/proto.h b/src/proto.h

index 6465bf592343f9fe52c8d312c7b63c1ac194652a..4ceccb76a94d345f27946d33373605c40611db35 100644 (file)
--- a/src/proto.h
+++ b/src/proto.h
@@ -195,6 +195,16 @@ int mbstrcasecmp(const char *s1, const char *s2);
  int nstrncasecmp(const char *s1, const char *s2, size_t n);
  #endif
  int mbstrncasecmp(const char *s1, const char *s2, size_t n);
+#if !defined(DISABLE_TABCOMP) || !defined(DISABLE_BROWSER)
+#ifndef HAVE_STRCASECOLL
+int nstrcasecoll(const char *s1, const char *s2);
+#endif
+#ifndef HAVE_STRNCASECOLL
+int nstrncasecoll(const char *s1, const char *s2, size_t n);
+#endif
+int mbstrcasecoll(const char *s1, const char *s2);
+int mbstrncasecoll(const char *s1, const char *s2, size_t n);
+#endif
  #ifndef HAVE_STRCASESTR
  const char *nstrcasestr(const char *haystack, const char *needle);
  #endif
author	David Lawrence Ramsey <pooka109@gmail.com>
	Tue, 21 Jun 2005 03:26:58 +0000 (03:26 +0000)
committer	David Lawrence Ramsey <pooka109@gmail.com>
	Tue, 21 Jun 2005 03:26:58 +0000 (03:26 +0000)
ChangeLog		patch \| blob \| history
configure.ac		patch \| blob \| history
src/chars.c		patch \| blob \| history
src/files.c		patch \| blob \| history
src/nano.h		patch \| blob \| history
src/proto.h		patch \| blob \| history