Reverting my own patch that claimed that UTF8 is a stateless encoding.

author Benno Schulenberg <bensberg@justemail.net>

Fri, 4 Sep 2015 19:34:55 +0000 (19:34 +0000)

committer Benno Schulenberg <bensberg@justemail.net>

Fri, 4 Sep 2015 19:34:55 +0000 (19:34 +0000)
author Benno Schulenberg <bensberg@justemail.net>
Fri, 4 Sep 2015 19:34:55 +0000 (19:34 +0000)
committer Benno Schulenberg <bensberg@justemail.net>
Fri, 4 Sep 2015 19:34:55 +0000 (19:34 +0000)
diff --git a/ChangeLog b/ChangeLog

index 42ad7b007bc4e54ab065fc8b77da6c16e5ed5cf5..763098644f9dbd5943d5518010c39210fcd50e42 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2015-09-04  Benno Schulenberg  <bensberg@justemail.net>
+       * src/chars.c: Reverting r5354 from August 12.  This fixes Savannah
+       bug #45874.  Apparently there is /some/ state somewhere after all.
+
  2015-08-29  Benno Schulenberg  <bensberg@justemail.net>
         * doc/syntax/autoconf.nanor: New file; syntax colouring for Autoconf.
         * src/files.c: Rewrap some lines and tweak two comments.
diff --git a/src/chars.c b/src/chars.c

index 0b51b66a02776172b9723e9176f66df997dc6276..2f8de139e295efe9a44904e851c18f99bdf27def 100644 (file)
--- a/src/chars.c
+++ b/src/chars.c
@@ -91,6 +91,16 @@ bool is_byte(int c)
      return ((unsigned int)c == (unsigned char)c);
  }
  
+void mbtowc_reset(void)
+{
+    IGNORE_CALL_RESULT(mbtowc(NULL, NULL, 0));
+}
+
+void wctomb_reset(void)
+{
+    IGNORE_CALL_RESULT(wctomb(NULL, 0));
+}
+
  /* This function is equivalent to isalnum() for multibyte characters. */
  bool is_alnum_mbchar(const char *c)
  {
@@ -100,8 +110,10 @@ bool is_alnum_mbchar(const char *c)
      if (use_utf8) {
         wchar_t wc;
  
-       if (mbtowc(&wc, c, MB_CUR_MAX) < 0)
+       if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
+           mbtowc_reset();
             wc = bad_wchar;
+       }
  
         return iswalnum(wc);
      } else
@@ -118,8 +130,10 @@ bool is_blank_mbchar(const char *c)
      if (use_utf8) {
         wchar_t wc;
  
-       if (mbtowc(&wc, c, MB_CUR_MAX) < 0)
+       if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
+           mbtowc_reset();
             wc = bad_wchar;
+       }
  
         return iswblank(wc);
      } else
@@ -163,8 +177,10 @@ bool is_cntrl_mbchar(const char *c)
      if (use_utf8) {
         wchar_t wc;
  
-       if (mbtowc(&wc, c, MB_CUR_MAX) < 0)
+       if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
+           mbtowc_reset();
             wc = bad_wchar;
+       }
  
         return is_cntrl_wchar(wc);
      } else
@@ -181,8 +197,10 @@ bool is_punct_mbchar(const char *c)
      if (use_utf8) {
         wchar_t wc;
  
-       if (mbtowc(&wc, c, MB_CUR_MAX) < 0)
+       if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
+           mbtowc_reset();
             wc = bad_wchar;
+       }
  
         return iswpunct(wc);
      } else
@@ -246,13 +264,16 @@ char *control_mbrep(const char *c, char *crep, int *crep_len)
         wchar_t wc;
  
         if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
+           mbtowc_reset();
             *crep_len = bad_mbchar_len;
             strncpy(crep, bad_mbchar, *crep_len);
         } else {
             *crep_len = wctomb(crep, control_wrep(wc));
  
-           if (*crep_len < 0)
+           if (*crep_len < 0) {
+               wctomb_reset();
                 *crep_len = 0;
+           }
         }
      } else {
  #endif
@@ -278,13 +299,16 @@ char *mbrep(const char *c, char *crep, int *crep_len)
  
         /* Reject invalid Unicode characters. */
         if (mbtowc(&wc, c, MB_CUR_MAX) < 0 || !is_valid_unicode(wc)) {
+           mbtowc_reset();
             *crep_len = bad_mbchar_len;
             strncpy(crep, bad_mbchar, *crep_len);
         } else {
             *crep_len = wctomb(crep, wc);
  
-           if (*crep_len < 0)
+           if (*crep_len < 0) {
+               wctomb_reset();
                 *crep_len = 0;
+           }
         }
      } else {
  #endif
@@ -307,13 +331,17 @@ int mbwidth(const char *c)
         wchar_t wc;
         int width;
  
-       if (mbtowc(&wc, c, MB_CUR_MAX) < 0)
+       if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
+           mbtowc_reset();
             wc = bad_wchar;
+       }
  
         width = wcwidth(wc);
  
-       if (width == -1)
-           width = wcwidth(bad_wchar);
+       if (width == -1) {
+           wc = bad_wchar;
+           width = wcwidth(wc);
+       }
  
         return width;
      } else
@@ -348,8 +376,10 @@ char *make_mbchar(long chr, int *chr_mb_len)
         *chr_mb_len = wctomb(chr_mb, (wchar_t)chr);
  
         /* Reject invalid Unicode characters. */
-       if (*chr_mb_len < 0 || !is_valid_unicode((wchar_t)chr))
+       if (*chr_mb_len < 0 || !is_valid_unicode((wchar_t)chr)) {
+           wctomb_reset();
             *chr_mb_len = 0;
+       }
      } else {
  #endif
         *chr_mb_len = 1;
@@ -538,6 +568,7 @@ int mbstrncasecmp(const char *s1, const char *s2, size_t n)
             s1_mb_len = parse_mbchar(s1, s1_mb, NULL);
  
             if (mbtowc(&ws1, s1_mb, s1_mb_len) < 0) {
+               mbtowc_reset();
                 ws1 = (unsigned char)*s1_mb;
                 bad_s1_mb = TRUE;
             }
@@ -545,6 +576,7 @@ int mbstrncasecmp(const char *s1, const char *s2, size_t n)
             s2_mb_len = parse_mbchar(s2, s2_mb, NULL);
  
             if (mbtowc(&ws2, s2_mb, s2_mb_len) < 0) {
+               mbtowc_reset();
                 ws2 = (unsigned char)*s2_mb;
                 bad_s2_mb = TRUE;
             }
@@ -773,6 +805,7 @@ char *mbstrchr(const char *s, const char *c)
         wchar_t ws, wc;
  
         if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
+           mbtowc_reset();
             wc = (unsigned char)*c;
             bad_c_mb = TRUE;
         }
@@ -781,6 +814,7 @@ char *mbstrchr(const char *s, const char *c)
             int s_mb_len = parse_mbchar(s, s_mb, NULL);
  
             if (mbtowc(&ws, s_mb, s_mb_len) < 0) {
+               mbtowc_reset();
                 ws = (unsigned char)*s;
                 bad_s_mb = TRUE;
             }
author	Benno Schulenberg <bensberg@justemail.net>
	Fri, 4 Sep 2015 19:34:55 +0000 (19:34 +0000)
committer	Benno Schulenberg <bensberg@justemail.net>
	Fri, 4 Sep 2015 19:34:55 +0000 (19:34 +0000)
ChangeLog		patch \| blob \| history
src/chars.c		patch \| blob \| history