properly handle cases where a search string and a match to it are not

author David Lawrence Ramsey <pooka109@gmail.com>

Mon, 18 Jul 2005 05:17:59 +0000 (05:17 +0000)

committer David Lawrence Ramsey <pooka109@gmail.com>

Mon, 18 Jul 2005 05:17:59 +0000 (05:17 +0000)
author David Lawrence Ramsey <pooka109@gmail.com>
Mon, 18 Jul 2005 05:17:59 +0000 (05:17 +0000)
committer David Lawrence Ramsey <pooka109@gmail.com>
Mon, 18 Jul 2005 05:17:59 +0000 (05:17 +0000)
diff --git a/ChangeLog b/ChangeLog

index 09775a4ffddeb8d977c65649fc703a74cf4d9e42..35f07f739dd757417f8dfe69f9807cfd2210e39f 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -70,6 +70,11 @@ CVS code -
           do_cursorpos(). (DLR)
         - Change the NANO_WIDE #define to ENABLE_UTF8, as the latter is
           clearer. (DLR)
+       - Properly handle cases where a search string and a match to it
+         are not the same length in bytes, i.e, when the latter
+         contains invalid multibyte characters interpreted as normal
+         characters.  Changes to mbstrncasecmp(), mbstrcasestr(),
+         mbrevstrcasestr(), findnextstr(), and do_replace_loop(). (DLR)
  - files.c:
    open_file()
         - Assert that filename isn't NULL, and don't do anything special
diff --git a/src/chars.c b/src/chars.c

index 0d3a9ddf3077fd0eee2480cec2773b2a8ec24455..2acbe14628f1ebe730d6fab6733851372d012b43 100644 (file)
--- a/src/chars.c
+++ b/src/chars.c
@@ -475,7 +475,6 @@ int mbstrncasecmp(const char *s1, const char *s2, size_t n)
         assert(s1 != NULL && s2 != NULL);
  
         while (n > 0 && *s1 != '\0' && *s2 != '\0') {
-           bool bad_s1_mb = FALSE, bad_s2_mb = FALSE;
             int s1_mb_len, s2_mb_len;
  
             s1_mb_len = parse_mbchar(s1, s1_mb, NULL, NULL);
@@ -483,7 +482,6 @@ int mbstrncasecmp(const char *s1, const char *s2, size_t n)
             if (mbtowc(&ws1, s1_mb, s1_mb_len) <= 0) {
                 mbtowc(NULL, NULL, 0);
                 ws1 = (unsigned char)*s1_mb;
-               bad_s1_mb = TRUE;
             }
  
             s2_mb_len = parse_mbchar(s2, s2_mb, NULL, NULL);
@@ -491,11 +489,9 @@ int mbstrncasecmp(const char *s1, const char *s2, size_t n)
             if (mbtowc(&ws2, s2_mb, s2_mb_len) <= 0) {
                 mbtowc(NULL, NULL, 0);
                 ws2 = (unsigned char)*s2_mb;
-               bad_s2_mb = TRUE;
             }
  
-           if (n == 0 || bad_s1_mb != bad_s2_mb ||
-               towlower(ws1) != towlower(ws2))
+           if (n == 0 || towlower(ws1) != towlower(ws2))
                 break;
  
             s1 += s1_mb_len;
@@ -550,14 +546,11 @@ const char *mbstrcasestr(const char *haystack, const char *needle)
             int r_mb_len, q_mb_len;
  
             while (*q != '\0') {
-               bool bad_r_mb = FALSE, bad_q_mb = FALSE;
-
                 r_mb_len = parse_mbchar(r, r_mb, NULL, NULL);
  
                 if (mbtowc(&wr, r_mb, r_mb_len) <= 0) {
                     mbtowc(NULL, NULL, 0);
                     wr = (unsigned char)*r;
-                   bad_r_mb = TRUE;
                 }
  
                 q_mb_len = parse_mbchar(q, q_mb, NULL, NULL);
@@ -565,11 +558,9 @@ const char *mbstrcasestr(const char *haystack, const char *needle)
                 if (mbtowc(&wq, q_mb, q_mb_len) <= 0) {
                     mbtowc(NULL, NULL, 0);
                     wq = (unsigned char)*q;
-                   bad_q_mb = TRUE;
                 }
  
-               if (bad_r_mb != bad_q_mb ||
-                       towlower(wr) != towlower(wq))
+               if (towlower(wr) != towlower(wq))
                     break;
  
                 r += r_mb_len;
@@ -656,14 +647,11 @@ const char *mbrevstrcasestr(const char *haystack, const char *needle,
             int r_mb_len, q_mb_len;
  
             while (*q != '\0') {
-               bool bad_r_mb = FALSE, bad_q_mb = FALSE;
-
                 r_mb_len = parse_mbchar(r, r_mb, NULL, NULL);
  
                 if (mbtowc(&wr, r_mb, r_mb_len) <= 0) {
                     mbtowc(NULL, NULL, 0);
                     wr = (unsigned char)*r;
-                   bad_r_mb = TRUE;
                 }
  
                 q_mb_len = parse_mbchar(q, q_mb, NULL, NULL);
@@ -671,11 +659,9 @@ const char *mbrevstrcasestr(const char *haystack, const char *needle,
                 if (mbtowc(&wq, q_mb, q_mb_len) <= 0) {
                     mbtowc(NULL, NULL, 0);
                     wq = (unsigned char)*q;
-                   bad_q_mb = TRUE;
                 }
  
-               if (bad_r_mb != bad_q_mb ||
-                       towlower(wr) != towlower(wq))
+               if (towlower(wr) != towlower(wq))
                     break;
  
                 r += r_mb_len;
diff --git a/src/search.c b/src/search.c

index 0fc9e8cf43a37c6470731d114feaa5b322f4a6ac..f9c9472062acf2fd0cfb25647d5fbca3b7c7289d 100644 (file)
--- a/src/search.c
+++ b/src/search.c
@@ -321,12 +321,21 @@ bool findnextstr(bool can_display_wrap, bool wholeword, bool
                 /* Is this potential match a whole word? */
  
             /* Set found_len to the length of the potential match. */
-           found_len =
  #ifdef HAVE_REGEX_H
-               ISSET(USE_REGEXP) ?
-               regmatches[0].rm_eo - regmatches[0].rm_so :
+           if (ISSET(USE_REGEXP))
+               found_len = regmatches[0].rm_eo - regmatches[0].rm_so;
+           else
  #endif
-               strlen(needle);
+           {
+               size_t needle_len = mbstrlen(needle);
+
+               /* Get found's length in single-byte characters. */
+               found_len = 0;
+
+               for (; needle_len > 0; needle_len--)
+                   found_len += parse_mbchar(found + found_len, NULL,
+                       NULL, NULL);
+           }
  
             /* If we're searching for whole words, see if this potential
              * match is a whole word. */
@@ -784,13 +793,20 @@ ssize_t do_replace_loop(const char *needle, const filestruct
  #endif
  
         if (i > 0 || replaceall) {      /* Yes, replace it!!!! */
-           char *copy;
+           char *match, *copy;
             size_t length_change;
  
             if (i == 2)
                 replaceall = TRUE;
  
-           copy = replace_line(needle);
+           /* Get the match's length in single-byte characters. */
+           match = mallocstrncpy(NULL, openfile->current->data +
+               openfile->current_x, match_len + 1);
+           match[match_len] = '\0';
+
+           copy = replace_line(match);
+
+           free(match);
  
             length_change = strlen(copy) -
                 strlen(openfile->current->data);
author	David Lawrence Ramsey <pooka109@gmail.com>
	Mon, 18 Jul 2005 05:17:59 +0000 (05:17 +0000)
committer	David Lawrence Ramsey <pooka109@gmail.com>
	Mon, 18 Jul 2005 05:17:59 +0000 (05:17 +0000)
ChangeLog		patch \| blob \| history
src/chars.c		patch \| blob \| history
src/search.c		patch \| blob \| history