From: Benno Schulenberg Date: Fri, 4 Sep 2015 19:34:55 +0000 (+0000) Subject: Reverting my own patch that claimed that UTF8 is a stateless encoding. X-Git-Tag: v2.5.0~93 X-Git-Url: https://git.wh0rd.org/?a=commitdiff_plain;h=9205c2886578e7df76b38ccf700df8fa4f16f815;p=nano.git Reverting my own patch that claimed that UTF8 is a stateless encoding. Apparently there is /some/ state somewhere after all. Don't have time now to figure out where exactly. git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@5369 35c25a1d-7b9e-4130-9fde-d3aeb78583b8 --- diff --git a/ChangeLog b/ChangeLog index 42ad7b00..76309864 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +2015-09-04 Benno Schulenberg + * src/chars.c: Reverting r5354 from August 12. This fixes Savannah + bug #45874. Apparently there is /some/ state somewhere after all. + 2015-08-29 Benno Schulenberg * doc/syntax/autoconf.nanor: New file; syntax colouring for Autoconf. * src/files.c: Rewrap some lines and tweak two comments. diff --git a/src/chars.c b/src/chars.c index 0b51b66a..2f8de139 100644 --- a/src/chars.c +++ b/src/chars.c @@ -91,6 +91,16 @@ bool is_byte(int c) return ((unsigned int)c == (unsigned char)c); } +void mbtowc_reset(void) +{ + IGNORE_CALL_RESULT(mbtowc(NULL, NULL, 0)); +} + +void wctomb_reset(void) +{ + IGNORE_CALL_RESULT(wctomb(NULL, 0)); +} + /* This function is equivalent to isalnum() for multibyte characters. */ bool is_alnum_mbchar(const char *c) { @@ -100,8 +110,10 @@ bool is_alnum_mbchar(const char *c) if (use_utf8) { wchar_t wc; - if (mbtowc(&wc, c, MB_CUR_MAX) < 0) + if (mbtowc(&wc, c, MB_CUR_MAX) < 0) { + mbtowc_reset(); wc = bad_wchar; + } return iswalnum(wc); } else @@ -118,8 +130,10 @@ bool is_blank_mbchar(const char *c) if (use_utf8) { wchar_t wc; - if (mbtowc(&wc, c, MB_CUR_MAX) < 0) + if (mbtowc(&wc, c, MB_CUR_MAX) < 0) { + mbtowc_reset(); wc = bad_wchar; + } return iswblank(wc); } else @@ -163,8 +177,10 @@ bool is_cntrl_mbchar(const char *c) if (use_utf8) { wchar_t wc; - if (mbtowc(&wc, c, MB_CUR_MAX) < 0) + if (mbtowc(&wc, c, MB_CUR_MAX) < 0) { + mbtowc_reset(); wc = bad_wchar; + } return is_cntrl_wchar(wc); } else @@ -181,8 +197,10 @@ bool is_punct_mbchar(const char *c) if (use_utf8) { wchar_t wc; - if (mbtowc(&wc, c, MB_CUR_MAX) < 0) + if (mbtowc(&wc, c, MB_CUR_MAX) < 0) { + mbtowc_reset(); wc = bad_wchar; + } return iswpunct(wc); } else @@ -246,13 +264,16 @@ char *control_mbrep(const char *c, char *crep, int *crep_len) wchar_t wc; if (mbtowc(&wc, c, MB_CUR_MAX) < 0) { + mbtowc_reset(); *crep_len = bad_mbchar_len; strncpy(crep, bad_mbchar, *crep_len); } else { *crep_len = wctomb(crep, control_wrep(wc)); - if (*crep_len < 0) + if (*crep_len < 0) { + wctomb_reset(); *crep_len = 0; + } } } else { #endif @@ -278,13 +299,16 @@ char *mbrep(const char *c, char *crep, int *crep_len) /* Reject invalid Unicode characters. */ if (mbtowc(&wc, c, MB_CUR_MAX) < 0 || !is_valid_unicode(wc)) { + mbtowc_reset(); *crep_len = bad_mbchar_len; strncpy(crep, bad_mbchar, *crep_len); } else { *crep_len = wctomb(crep, wc); - if (*crep_len < 0) + if (*crep_len < 0) { + wctomb_reset(); *crep_len = 0; + } } } else { #endif @@ -307,13 +331,17 @@ int mbwidth(const char *c) wchar_t wc; int width; - if (mbtowc(&wc, c, MB_CUR_MAX) < 0) + if (mbtowc(&wc, c, MB_CUR_MAX) < 0) { + mbtowc_reset(); wc = bad_wchar; + } width = wcwidth(wc); - if (width == -1) - width = wcwidth(bad_wchar); + if (width == -1) { + wc = bad_wchar; + width = wcwidth(wc); + } return width; } else @@ -348,8 +376,10 @@ char *make_mbchar(long chr, int *chr_mb_len) *chr_mb_len = wctomb(chr_mb, (wchar_t)chr); /* Reject invalid Unicode characters. */ - if (*chr_mb_len < 0 || !is_valid_unicode((wchar_t)chr)) + if (*chr_mb_len < 0 || !is_valid_unicode((wchar_t)chr)) { + wctomb_reset(); *chr_mb_len = 0; + } } else { #endif *chr_mb_len = 1; @@ -538,6 +568,7 @@ int mbstrncasecmp(const char *s1, const char *s2, size_t n) s1_mb_len = parse_mbchar(s1, s1_mb, NULL); if (mbtowc(&ws1, s1_mb, s1_mb_len) < 0) { + mbtowc_reset(); ws1 = (unsigned char)*s1_mb; bad_s1_mb = TRUE; } @@ -545,6 +576,7 @@ int mbstrncasecmp(const char *s1, const char *s2, size_t n) s2_mb_len = parse_mbchar(s2, s2_mb, NULL); if (mbtowc(&ws2, s2_mb, s2_mb_len) < 0) { + mbtowc_reset(); ws2 = (unsigned char)*s2_mb; bad_s2_mb = TRUE; } @@ -773,6 +805,7 @@ char *mbstrchr(const char *s, const char *c) wchar_t ws, wc; if (mbtowc(&wc, c, MB_CUR_MAX) < 0) { + mbtowc_reset(); wc = (unsigned char)*c; bad_c_mb = TRUE; } @@ -781,6 +814,7 @@ char *mbstrchr(const char *s, const char *c) int s_mb_len = parse_mbchar(s, s_mb, NULL); if (mbtowc(&ws, s_mb, s_mb_len) < 0) { + mbtowc_reset(); ws = (unsigned char)*s; bad_s_mb = TRUE; }