out of the prompt, and that magichistory is properly updated
when we change it and then move up. New function
history_reset(); changes to nanogetstr(). (DLR)
+ - Various character-handling cleanups. If we get an invalid
+ multibyte sequence, treat it as Unicode FFFD (Replacement
+ Character), unless we're determining if it's a control
+ character or searching for a match to it. Also, remove
+ unneeded variables and checks when parsing multibyte
+ sequences. Changes to is_alnum_mbchar(), is_blank_mbchar(),
+ is_cntrl_mbchar(), is_punct_mbchar(), control_mbrep(),
+ mbwidth(), make_mbchar(), parse_mbchar(), mbstrncasecmp(),
+ mbstrcasestr(), mbrevstrcasestr(), mbstrchr(), and
+ display_string(). (DLR)
- chars.c:
mbstrchr()
- Don't count matches between valid and invalid multibyte
HAVE_SNPRINTF. (DLR)
- Remove TOP from the topmidnone enum, and rename it centernone.
(DLR)
+ proto.h:
+ - Add declarations for bad_mbchar and bad_mbchar_len, so that we
+ can use them in display_string() as well as chars.c. (DLR)
- rcfile.c:
nregcomp()
- Return TRUE when the compilation succeeds and FALSE otherwise,
#ifdef HAVE_WCTYPE_H
#include <wctype.h>
#endif
+
+static const wchar_t bad_wchar = 0xFFFD;
+ /* If we get an invalid multibyte sequence, we treat it as
+ * Unicode FFFD (Replacement Character), unless we're
+ * determining if it's a control character or searching for a
+ * match to it. */
+const char *bad_mbchar = "\xEF\xBF\xBD";
+const int bad_mbchar_len = 3;
#endif
#ifndef HAVE_ISBLANK
#ifdef ENABLE_UTF8
if (ISSET(USE_UTF8)) {
wchar_t wc;
- int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX);
- if (c_mb_len <= 0) {
+ if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
mbtowc(NULL, NULL, 0);
- wc = (unsigned char)*c;
+ wc = bad_wchar;
}
return iswalnum(wc);
#ifdef ENABLE_UTF8
if (ISSET(USE_UTF8)) {
wchar_t wc;
- int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX);
- if (c_mb_len <= 0) {
+ if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
mbtowc(NULL, NULL, 0);
- wc = (unsigned char)*c;
+ wc = bad_wchar;
}
return iswblank(wc);
#ifdef ENABLE_UTF8
if (ISSET(USE_UTF8)) {
wchar_t wc;
- int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX);
- if (c_mb_len <= 0) {
+ if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
mbtowc(NULL, NULL, 0);
wc = (unsigned char)*c;
}
wchar_t wc;
int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX);
- if (c_mb_len <= 0) {
+ if (c_mb_len < 0) {
mbtowc(NULL, NULL, 0);
- wc = (unsigned char)*c;
+ wc = bad_wchar;
}
return iswpunct(wc);
if (ISSET(USE_UTF8)) {
wchar_t wc;
- if (mbtowc(&wc, c, MB_CUR_MAX) <= 0) {
+ if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
mbtowc(NULL, NULL, 0);
- wc = (unsigned char)*c;
- }
-
- *crep_len = wctomb(crep, control_wrep(wc));
-
- if (*crep_len <= 0) {
- wctomb(NULL, 0);
- *crep_len = 0;
+ crep = (char *)bad_mbchar;
+ *crep_len = bad_mbchar_len;
+ } else {
+ *crep_len = wctomb(crep, control_wrep(wc));
+
+ if (*crep_len < 0) {
+ wctomb(NULL, 0);
+ *crep_len = 0;
+ }
}
} else {
#endif
#ifdef ENABLE_UTF8
if (ISSET(USE_UTF8)) {
wchar_t wc;
- int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX), width;
+ int width;
- if (c_mb_len <= 0) {
+ if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
mbtowc(NULL, NULL, 0);
- wc = (unsigned char)*c;
+ wc = bad_wchar;
}
width = wcwidth(wc);
chr_mb = charalloc(MB_CUR_MAX);
*chr_mb_len = wctomb(chr_mb, chr);
- if (*chr_mb_len <= 0) {
+ if (*chr_mb_len < 0) {
wctomb(NULL, 0);
*chr_mb_len = 0;
}
/* Get the number of bytes in the multibyte character. */
buf_mb_len = mblen(buf, MB_CUR_MAX);
- /* If buf contains a null byte or an invalid multibyte
- * character, set bad_chr to TRUE (if it contains the latter)
- * and interpret buf's first byte. */
- if (buf_mb_len <= 0) {
+ /* If buf contains an invalid multibyte character, set bad_chr
+ * to TRUE and interpret buf's first byte. */
+ if (buf_mb_len < 0) {
mblen(NULL, 0);
- if (buf_mb_len < 0 && bad_chr != NULL)
+ if (bad_chr != NULL)
*bad_chr = TRUE;
buf_mb_len = 1;
- }
+ } else if (buf_mb_len == 0)
+ buf_mb_len++;
/* Save the multibyte character in chr. */
if (chr != NULL) {
s1_mb_len = parse_mbchar(s1, s1_mb, NULL, NULL);
- if (mbtowc(&ws1, s1_mb, s1_mb_len) <= 0) {
+ if (mbtowc(&ws1, s1_mb, s1_mb_len) < 0) {
mbtowc(NULL, NULL, 0);
ws1 = (unsigned char)*s1_mb;
bad_s1_mb = TRUE;
s2_mb_len = parse_mbchar(s2, s2_mb, NULL, NULL);
- if (mbtowc(&ws2, s2_mb, s2_mb_len) <= 0) {
+ if (mbtowc(&ws2, s2_mb, s2_mb_len) < 0) {
mbtowc(NULL, NULL, 0);
ws2 = (unsigned char)*s2_mb;
bad_s2_mb = TRUE;
r_mb_len = parse_mbchar(r, r_mb, NULL, NULL);
- if (mbtowc(&wr, r_mb, r_mb_len) <= 0) {
+ if (mbtowc(&wr, r_mb, r_mb_len) < 0) {
mbtowc(NULL, NULL, 0);
wr = (unsigned char)*r;
bad_r_mb = TRUE;
q_mb_len = parse_mbchar(q, q_mb, NULL, NULL);
- if (mbtowc(&wq, q_mb, q_mb_len) <= 0) {
+ if (mbtowc(&wq, q_mb, q_mb_len) < 0) {
mbtowc(NULL, NULL, 0);
wq = (unsigned char)*q;
bad_q_mb = TRUE;
r_mb_len = parse_mbchar(r, r_mb, NULL, NULL);
- if (mbtowc(&wr, r_mb, r_mb_len) <= 0) {
+ if (mbtowc(&wr, r_mb, r_mb_len) < 0) {
mbtowc(NULL, NULL, 0);
wr = (unsigned char)*r;
bad_r_mb = TRUE;
q_mb_len = parse_mbchar(q, q_mb, NULL, NULL);
- if (mbtowc(&wq, q_mb, q_mb_len) <= 0) {
+ if (mbtowc(&wq, q_mb, q_mb_len) < 0) {
mbtowc(NULL, NULL, 0);
wq = (unsigned char)*q;
bad_q_mb = TRUE;
wchar_t ws, wc;
int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX);
- if (c_mb_len <= 0) {
+ if (c_mb_len < 0) {
mbtowc(NULL, NULL, 0);
wc = (unsigned char)*c;
bad_c_mb = TRUE;
while (*s != '\0') {
int s_mb_len = parse_mbchar(s, s_mb, NULL, NULL);
- if (mbtowc(&ws, s_mb, s_mb_len) <= 0) {
+ if (mbtowc(&ws, s_mb, s_mb_len) < 0) {
mbtowc(NULL, NULL, 0);
ws = (unsigned char)*s;
bad_s_mb = TRUE;
start_col++;
}
/* If buf contains a control character, interpret it. If buf
- * contains an invalid multibyte control character, interpret
- * it as though it's a normal control character.*/
+ * contains an invalid multibyte control character, display it
+ * as such.*/
} else if (is_cntrl_mbchar(buf_mb)) {
char *ctrl_buf_mb = charalloc(mb_cur_max());
int ctrl_buf_mb_len, i;
#ifdef ENABLE_UTF8
/* If buf contains an invalid multibyte non-control
- * character, interpret it as though it's a normal
- * non-control character. */
+ * character, display it as such. */
if (ISSET(USE_UTF8) && bad_char) {
- char *bad_buf_mb;
- int bad_buf_mb_len;
+ for (i = 0; i < bad_mbchar_len; i++)
+ converted[index++] = bad_mbchar[i];
- bad_buf_mb = make_mbchar((unsigned char)*buf_mb,
- &bad_buf_mb_len);
-
- for (i = 0; i < bad_buf_mb_len; i++)
- converted[index++] = bad_buf_mb[i];
-
- start_col += mbwidth(bad_buf_mb);
-
- free(bad_buf_mb);
+ start_col += mbwidth(bad_mbchar);
} else {
#endif
for (i = 0; i < buf_mb_len; i++)