From 96452cb60cc55d35ccf60e25fc21040dec07d396 Mon Sep 17 00:00:00 2001 From: David Lawrence Ramsey Date: Tue, 26 Jul 2005 06:13:45 +0000 Subject: [PATCH] handle invalid multibyte characters more efficiently git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2941 35c25a1d-7b9e-4130-9fde-d3aeb78583b8 --- ChangeLog | 11 +++++--- src/chars.c | 73 +++++++++++++++++++++++++++++++++++----------------- src/files.c | 7 +++-- src/move.c | 13 +++++----- src/nano.c | 6 ++--- src/proto.h | 9 ++----- src/rcfile.c | 5 ++-- src/text.c | 28 ++++++++++---------- src/utils.c | 4 +-- src/winio.c | 63 +++++++++++++++++---------------------------- 10 files changed, 112 insertions(+), 107 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3134d69b..0400aa5b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -118,6 +118,11 @@ CVS code - - color.c: - Remove unneeded string.h and fcntl.h includes. (DLR) - chars.c: + mbrep() + - New function, the equivalent of control_mbrep() for non-control + characters. (DLR) + parse_mbchar() + - Remove now-unneeded bad_chr parameter. (DLR) mbstrchr() - Don't count matches between valid and invalid multibyte sequences anymore, for consistency. (DLR) @@ -200,9 +205,6 @@ CVS code - (DLR) - Move stdlib.h, dirent.h, regex.h, and assert.h includes here, as every source file needs them. (DLR) - proto.h: - - Add declarations for bad_mbchar and bad_mbchar_len, so that we - can use them in display_string() as well as chars.c. (DLR) - rcfile.c: nregcomp() - Return TRUE when the compilation succeeds and FALSE otherwise, @@ -237,6 +239,9 @@ CVS code - the number of lines and characters in the file or selection, as wc does. (DLR) - winio.c: + display_string() + - Instead of using parse_mbchar()'s bad_chr parameter, use + mbrep() to get the representation of a bad character. (DLR) edit_redraw(), edit_refresh() - Clean up and simplify. (DLR) edit_update() diff --git a/src/chars.c b/src/chars.c index c62d7b42..dbcd721a 100644 --- a/src/chars.c +++ b/src/chars.c @@ -41,8 +41,8 @@ static const wchar_t bad_wchar = 0xFFFD; * Unicode FFFD (Replacement Character), unless we're * determining if it's a control character or searching for a * match to it. */ -const char *bad_mbchar = "\xEF\xBF\xBD"; -const int bad_mbchar_len = 3; +static const char *bad_mbchar = "\xEF\xBF\xBD"; +static const int bad_mbchar_len = 3; #endif #ifndef HAVE_ISBLANK @@ -241,6 +241,39 @@ char *control_mbrep(const char *c, char *crep, int *crep_len) return crep; } +/* c is a multibyte non-control character. We return that multibyte + * character. */ +char *mbrep(const char *c, char *crep, int *crep_len) +{ + assert(c != NULL && crep != NULL && crep_len != NULL); + +#ifdef ENABLE_UTF8 + if (ISSET(USE_UTF8)) { + wchar_t wc; + + if (mbtowc(&wc, c, MB_CUR_MAX) < 0) { + mbtowc(NULL, NULL, 0); + crep = (char *)bad_mbchar; + *crep_len = bad_mbchar_len; + } else { + *crep_len = wctomb(crep, wc); + + if (*crep_len < 0) { + wctomb(NULL, 0); + *crep_len = 0; + } + } + } else { +#endif + *crep_len = 1; + *crep = *c; +#ifdef ENABLE_UTF8 + } +#endif + + return crep; +} + /* This function is equivalent to wcwidth() for multibyte characters. */ int mbwidth(const char *c) { @@ -310,19 +343,14 @@ char *make_mbchar(int chr, int *chr_mb_len) /* Parse a multibyte character from buf. Return the number of bytes * used. If chr isn't NULL, store the multibyte character in it. If - * bad_chr isn't NULL, set it to TRUE if we have a bad multibyte - * character. If col isn't NULL, store the new display width in it. If - * *str is '\t', we expect col to have the current display width. */ -int parse_mbchar(const char *buf, char *chr, bool *bad_chr, size_t - *col) + * col isn't NULL, store the new display width in it. If *buf is '\t', + * we expect col to have the current display width. */ +int parse_mbchar(const char *buf, char *chr, size_t *col) { int buf_mb_len; assert(buf != NULL); - if (bad_chr != NULL) - *bad_chr = FALSE; - #ifdef ENABLE_UTF8 if (ISSET(USE_UTF8)) { /* Get the number of bytes in the multibyte character. */ @@ -332,8 +360,6 @@ int parse_mbchar(const char *buf, char *chr, bool *bad_chr, size_t * to TRUE and interpret buf's first byte. */ if (buf_mb_len < 0) { mblen(NULL, 0); - if (bad_chr != NULL) - *bad_chr = TRUE; buf_mb_len = 1; } else if (buf_mb_len == 0) buf_mb_len++; @@ -415,8 +441,7 @@ size_t move_mbleft(const char *buf, size_t pos) /* There is no library function to move backward one multibyte * character. Here is the naive, O(pos) way to do it. */ while (TRUE) { - int buf_mb_len = parse_mbchar(buf + pos - pos_prev, NULL, NULL, - NULL); + int buf_mb_len = parse_mbchar(buf + pos - pos_prev, NULL, NULL); if (pos_prev <= (size_t)buf_mb_len) break; @@ -431,7 +456,7 @@ size_t move_mbleft(const char *buf, size_t pos) * after the one at pos. */ size_t move_mbright(const char *buf, size_t pos) { - return pos + parse_mbchar(buf + pos, NULL, NULL, NULL); + return pos + parse_mbchar(buf + pos, NULL, NULL); } #ifndef HAVE_STRCASECMP @@ -482,7 +507,7 @@ int mbstrncasecmp(const char *s1, const char *s2, size_t n) bool bad_s1_mb = FALSE, bad_s2_mb = FALSE; int s1_mb_len, s2_mb_len; - s1_mb_len = parse_mbchar(s1, s1_mb, NULL, NULL); + s1_mb_len = parse_mbchar(s1, s1_mb, NULL); if (mbtowc(&ws1, s1_mb, s1_mb_len) < 0) { mbtowc(NULL, NULL, 0); @@ -490,7 +515,7 @@ int mbstrncasecmp(const char *s1, const char *s2, size_t n) bad_s1_mb = TRUE; } - s2_mb_len = parse_mbchar(s2, s2_mb, NULL, NULL); + s2_mb_len = parse_mbchar(s2, s2_mb, NULL); if (mbtowc(&ws2, s2_mb, s2_mb_len) < 0) { mbtowc(NULL, NULL, 0); @@ -556,7 +581,7 @@ const char *mbstrcasestr(const char *haystack, const char *needle) while (*q != '\0') { bool bad_r_mb = FALSE, bad_q_mb = FALSE; - r_mb_len = parse_mbchar(r, r_mb, NULL, NULL); + r_mb_len = parse_mbchar(r, r_mb, NULL); if (mbtowc(&wr, r_mb, r_mb_len) < 0) { mbtowc(NULL, NULL, 0); @@ -564,7 +589,7 @@ const char *mbstrcasestr(const char *haystack, const char *needle) bad_r_mb = TRUE; } - q_mb_len = parse_mbchar(q, q_mb, NULL, NULL); + q_mb_len = parse_mbchar(q, q_mb, NULL); if (mbtowc(&wq, q_mb, q_mb_len) < 0) { mbtowc(NULL, NULL, 0); @@ -662,7 +687,7 @@ const char *mbrevstrcasestr(const char *haystack, const char *needle, while (*q != '\0') { bool bad_r_mb = FALSE, bad_q_mb = FALSE; - r_mb_len = parse_mbchar(r, r_mb, NULL, NULL); + r_mb_len = parse_mbchar(r, r_mb, NULL); if (mbtowc(&wr, r_mb, r_mb_len) < 0) { mbtowc(NULL, NULL, 0); @@ -670,7 +695,7 @@ const char *mbrevstrcasestr(const char *haystack, const char *needle, bad_r_mb = TRUE; } - q_mb_len = parse_mbchar(q, q_mb, NULL, NULL); + q_mb_len = parse_mbchar(q, q_mb, NULL); if (mbtowc(&wq, q_mb, q_mb_len) < 0) { mbtowc(NULL, NULL, 0); @@ -740,7 +765,7 @@ size_t mbstrnlen(const char *s, size_t maxlen) int s_mb_len; while (*s != '\0') { - s_mb_len = parse_mbchar(s, NULL, NULL, NULL); + s_mb_len = parse_mbchar(s, NULL, NULL); if (maxlen == 0) break; @@ -777,7 +802,7 @@ char *mbstrchr(const char *s, char *c) } while (*s != '\0') { - int s_mb_len = parse_mbchar(s, s_mb, NULL, NULL); + int s_mb_len = parse_mbchar(s, s_mb, NULL); if (mbtowc(&ws, s_mb, s_mb_len) < 0) { mbtowc(NULL, NULL, 0); @@ -832,7 +857,7 @@ bool has_blank_mbchars(const char *s) while (*s != '\0') { int chr_mb_len; - chr_mb_len = parse_mbchar(s, chr_mb, NULL, NULL); + chr_mb_len = parse_mbchar(s, chr_mb, NULL); if (is_blank_mbchar(chr_mb)) { retval = TRUE; diff --git a/src/files.c b/src/files.c index 949b121c..c8a49e67 100644 --- a/src/files.c +++ b/src/files.c @@ -2033,9 +2033,9 @@ char *input_tab(char *buf, size_t *place, bool *lastwastab, bool *list) /* Get the number of single-byte characters that all the * matches have in common. */ match1_mb_len = parse_mbchar(matches[0] + common_len, - match1_mb, NULL, NULL); + match1_mb, NULL); match2_mb_len = parse_mbchar(matches[match] + - common_len, match2_mb, NULL, NULL); + common_len, match2_mb, NULL); match1_mb[match1_mb_len] = '\0'; match2_mb[match2_mb_len] = '\0'; if (strcmp(match1_mb, match2_mb) != 0) @@ -2045,8 +2045,7 @@ char *input_tab(char *buf, size_t *place, bool *lastwastab, bool *list) if (match < num_matches || matches[0][common_len] == '\0') break; - common_len += parse_mbchar(buf + common_len, NULL, NULL, - NULL); + common_len += parse_mbchar(buf + common_len, NULL, NULL); } free(match1_mb); diff --git a/src/move.c b/src/move.c index 21a8175d..69b6b6cb 100644 --- a/src/move.c +++ b/src/move.c @@ -227,7 +227,7 @@ bool do_next_word(bool allow_punct, bool allow_update) * the current word. */ while (!end_line) { char_mb_len = parse_mbchar(openfile->current->data + - openfile->current_x, char_mb, NULL, NULL); + openfile->current_x, char_mb, NULL); /* If we've found it, stop moving forward through the current * line. */ @@ -254,7 +254,7 @@ bool do_next_word(bool allow_punct, bool allow_update) openfile->current = openfile->current->next) { while (!end_line) { char_mb_len = parse_mbchar(openfile->current->data + - openfile->current_x, char_mb, NULL, NULL); + openfile->current_x, char_mb, NULL); /* If we've found it, stop moving forward through the * current line. */ @@ -322,7 +322,7 @@ bool do_prev_word(bool allow_punct, bool allow_update) * of the current word. */ while (!begin_line) { char_mb_len = parse_mbchar(openfile->current->data + - openfile->current_x, char_mb, NULL, NULL); + openfile->current_x, char_mb, NULL); /* If we've found it, stop moving backward through the current * line. */ @@ -352,7 +352,7 @@ bool do_prev_word(bool allow_punct, bool allow_update) openfile->current = openfile->current->prev) { while (!begin_line) { char_mb_len = parse_mbchar(openfile->current->data + - openfile->current_x, char_mb, NULL, NULL); + openfile->current_x, char_mb, NULL); /* If we've found it, stop moving backward through the * current line. */ @@ -392,9 +392,8 @@ bool do_prev_word(bool allow_punct, bool allow_update) openfile->current_x); while (!begin_line) { - char_mb_len = - parse_mbchar(openfile->current->data + - openfile->current_x, char_mb, NULL, NULL); + char_mb_len = parse_mbchar(openfile->current->data + + openfile->current_x, char_mb, NULL); /* If we've found it, stop moving backward through the * current line. */ diff --git a/src/nano.c b/src/nano.c index 60cb2edb..1968c051 100644 --- a/src/nano.c +++ b/src/nano.c @@ -1778,10 +1778,8 @@ void do_output(char *output, size_t output_len, bool allow_cntrls) } } - /* Interpret the next multibyte character. If it's an invalid - * multibyte character, interpret it as though it's a byte - * character. */ - char_buf_len = parse_mbchar(output + i, char_buf, NULL, NULL); + /* Interpret the next multibyte character. */ + char_buf_len = parse_mbchar(output + i, char_buf, NULL); i += char_buf_len; diff --git a/src/proto.h b/src/proto.h index 91351345..75a37fd5 100644 --- a/src/proto.h +++ b/src/proto.h @@ -132,11 +132,6 @@ extern bool curses_ended; extern char *homedir; -#ifdef ENABLE_UTF8 -extern const char *bad_mbchar; -extern const int bad_mbchar_len; -#endif - /* The functions we want available. */ /* Public functions in chars.c. */ @@ -161,11 +156,11 @@ char control_rep(char c); wchar_t control_wrep(wchar_t c); #endif char *control_mbrep(const char *c, char *crep, int *crep_len); +char *mbrep(const char *c, char *crep, int *crep_len); int mbwidth(const char *c); int mb_cur_max(void); char *make_mbchar(int chr, int *chr_mb_len); -int parse_mbchar(const char *buf, char *chr, bool *bad_chr, size_t - *col); +int parse_mbchar(const char *buf, char *chr, size_t *col); size_t move_mbleft(const char *buf, size_t pos); size_t move_mbright(const char *buf, size_t pos); #ifndef HAVE_STRCASECMP diff --git a/src/rcfile.c b/src/rcfile.c index 96e6f080..929ef79f 100644 --- a/src/rcfile.c +++ b/src/rcfile.c @@ -625,11 +625,10 @@ void parse_rcfile(FILE *rcstream) } else { whitespace_len[0] = parse_mbchar(whitespace, NULL, - NULL, NULL); + NULL); whitespace_len[1] = parse_mbchar(whitespace + - whitespace_len[0], NULL, - NULL, NULL); + whitespace_len[0], NULL, NULL); } } else #endif diff --git a/src/text.c b/src/text.c index 603a1404..64af010f 100644 --- a/src/text.c +++ b/src/text.c @@ -76,7 +76,7 @@ void do_delete(void) if (openfile->current->data[openfile->current_x] != '\0') { int char_buf_len = parse_mbchar(openfile->current->data + - openfile->current_x, NULL, NULL, NULL); + openfile->current_x, NULL, NULL); size_t line_len = strlen(openfile->current->data + openfile->current_x); @@ -576,7 +576,7 @@ ssize_t break_line(const char *line, ssize_t goal, bool newline) while (*line != '\0' && goal >= 0) { size_t pos = 0; - line_len = parse_mbchar(line, NULL, NULL, &pos); + line_len = parse_mbchar(line, NULL, &pos); if (is_blank_mbchar(line) || (newline && *line == '\n')) { blank_loc = cur_loc; @@ -599,7 +599,7 @@ ssize_t break_line(const char *line, ssize_t goal, bool newline) bool found_blank = FALSE; while (*line != '\0') { - line_len = parse_mbchar(line, NULL, NULL, NULL); + line_len = parse_mbchar(line, NULL, NULL); if (is_blank_mbchar(line) || (newline && *line == '\n')) { if (!found_blank) @@ -617,12 +617,12 @@ ssize_t break_line(const char *line, ssize_t goal, bool newline) /* Move to the last blank after blank_loc, if there is one. */ line -= cur_loc; line += blank_loc; - line_len = parse_mbchar(line, NULL, NULL, NULL); + line_len = parse_mbchar(line, NULL, NULL); line += line_len; while (*line != '\0' && (is_blank_mbchar(line) || (newline && *line == '\n'))) { - line_len = parse_mbchar(line, NULL, NULL, NULL); + line_len = parse_mbchar(line, NULL, NULL); line += line_len; blank_loc += line_len; @@ -646,7 +646,7 @@ size_t indent_length(const char *line) blank_mb = charalloc(mb_cur_max()); while (*line != '\0') { - blank_mb_len = parse_mbchar(line, blank_mb, NULL, NULL); + blank_mb_len = parse_mbchar(line, blank_mb, NULL); if (!is_blank_mbchar(blank_mb)) break; @@ -697,14 +697,14 @@ void justify_format(filestruct *paragraph, size_t skip) /* If this character is blank, make sure that it's a space with * no blanks after it. */ if (is_blank_mbchar(end)) { - end_len = parse_mbchar(end, NULL, NULL, NULL); + end_len = parse_mbchar(end, NULL, NULL); *new_end = ' '; new_end++; end += end_len; while (*end != '\0' && is_blank_mbchar(end)) { - end_len = parse_mbchar(end, NULL, NULL, NULL); + end_len = parse_mbchar(end, NULL, NULL); end += end_len; shift += end_len; @@ -722,7 +722,7 @@ void justify_format(filestruct *paragraph, size_t skip) * more than two blanks after it, and make sure that the blanks * are spaces. */ } else if (mbstrchr(punct, end) != NULL) { - end_len = parse_mbchar(end, NULL, NULL, NULL); + end_len = parse_mbchar(end, NULL, NULL); while (end_len > 0) { *new_end = *end; @@ -732,7 +732,7 @@ void justify_format(filestruct *paragraph, size_t skip) } if (*end != '\0' && mbstrchr(brackets, end) != NULL) { - end_len = parse_mbchar(end, NULL, NULL, NULL); + end_len = parse_mbchar(end, NULL, NULL); while (end_len > 0) { *new_end = *end; @@ -743,7 +743,7 @@ void justify_format(filestruct *paragraph, size_t skip) } if (*end != '\0' && is_blank_mbchar(end)) { - end_len = parse_mbchar(end, NULL, NULL, NULL); + end_len = parse_mbchar(end, NULL, NULL); *new_end = ' '; new_end++; @@ -751,7 +751,7 @@ void justify_format(filestruct *paragraph, size_t skip) } if (*end != '\0' && is_blank_mbchar(end)) { - end_len = parse_mbchar(end, NULL, NULL, NULL); + end_len = parse_mbchar(end, NULL, NULL); *new_end = ' '; new_end++; @@ -759,7 +759,7 @@ void justify_format(filestruct *paragraph, size_t skip) } while (*end != '\0' && is_blank_mbchar(end)) { - end_len = parse_mbchar(end, NULL, NULL, NULL); + end_len = parse_mbchar(end, NULL, NULL); end += end_len; shift += end_len; @@ -775,7 +775,7 @@ void justify_format(filestruct *paragraph, size_t skip) /* If this character is neither blank nor punctuation, leave it * alone. */ } else { - end_len = parse_mbchar(end, NULL, NULL, NULL); + end_len = parse_mbchar(end, NULL, NULL); while (end_len > 0) { *new_end = *end; diff --git a/src/utils.c b/src/utils.c index ca8106b9..fa4cdf16 100644 --- a/src/utils.c +++ b/src/utils.c @@ -247,8 +247,8 @@ bool is_whole_word(size_t pos, const char *buf, const char *word) assert(buf != NULL && pos <= strlen(buf) && word != NULL); - parse_mbchar(buf + move_mbleft(buf, pos), p, NULL, NULL); - parse_mbchar(buf + word_end, r, NULL, NULL); + parse_mbchar(buf + move_mbleft(buf, pos), p, NULL); + parse_mbchar(buf + word_end, r, NULL); /* If we're at the beginning of the line or the character before the * word isn't a non-punctuation "word" character, and if we're at diff --git a/src/winio.c b/src/winio.c index 8132f48d..9d6b47e7 100644 --- a/src/winio.c +++ b/src/winio.c @@ -1862,10 +1862,8 @@ void do_statusbar_output(char *output, size_t output_len, bool } } - /* Interpret the next multibyte character. If it's an invalid - * multibyte character, interpret it as though it's a byte - * character. */ - char_buf_len = parse_mbchar(output + i, char_buf, NULL, NULL); + /* Interpret the next multibyte character. */ + char_buf_len = parse_mbchar(output + i, char_buf, NULL); i += char_buf_len; @@ -1935,7 +1933,7 @@ void do_statusbar_delete(void) { if (answer[statusbar_x] != '\0') { int char_buf_len = parse_mbchar(answer + statusbar_x, NULL, - NULL, NULL); + NULL); size_t line_len = strlen(answer + statusbar_x); assert(statusbar_x < strlen(answer)); @@ -1982,8 +1980,7 @@ bool do_statusbar_next_word(bool allow_punct) /* Move forward until we find the character after the last letter of * the current word. */ while (!end_line) { - char_mb_len = parse_mbchar(answer + statusbar_x, char_mb, NULL, - NULL); + char_mb_len = parse_mbchar(answer + statusbar_x, char_mb, NULL); /* If we've found it, stop moving forward through the current * line. */ @@ -2007,8 +2004,7 @@ bool do_statusbar_next_word(bool allow_punct) statusbar_x += char_mb_len; while (!end_line) { - char_mb_len = parse_mbchar(answer + statusbar_x, char_mb, NULL, - NULL); + char_mb_len = parse_mbchar(answer + statusbar_x, char_mb, NULL); /* If we've found it, stop moving forward through the current * line. */ @@ -2043,8 +2039,7 @@ bool do_statusbar_prev_word(bool allow_punct) /* Move backward until we find the character before the first letter * of the current word. */ while (!begin_line) { - char_mb_len = parse_mbchar(answer + statusbar_x, char_mb, NULL, - NULL); + char_mb_len = parse_mbchar(answer + statusbar_x, char_mb, NULL); /* If we've found it, stop moving backward through the current * line. */ @@ -2069,8 +2064,7 @@ bool do_statusbar_prev_word(bool allow_punct) statusbar_x = move_mbleft(answer, statusbar_x); while (!begin_line) { - char_mb_len = parse_mbchar(answer + statusbar_x, char_mb, NULL, - NULL); + char_mb_len = parse_mbchar(answer + statusbar_x, char_mb, NULL); /* If we've found it, stop moving backward through the current * line. */ @@ -2093,7 +2087,7 @@ bool do_statusbar_prev_word(bool allow_punct) while (!begin_line) { char_mb_len = parse_mbchar(answer + statusbar_x, char_mb, - NULL, NULL); + NULL); /* If we've found it, stop moving backward through the * current line. */ @@ -2164,7 +2158,7 @@ size_t actual_x(const char *str, size_t xplus) assert(str != NULL); while (*str != '\0') { - int str_len = parse_mbchar(str, NULL, NULL, &length); + int str_len = parse_mbchar(str, NULL, &length); if (length > xplus) break; @@ -2189,7 +2183,7 @@ size_t strnlenpt(const char *str, size_t size) assert(str != NULL); while (*str != '\0') { - int str_len = parse_mbchar(str, NULL, NULL, &length); + int str_len = parse_mbchar(str, NULL, &length); str += str_len; @@ -2281,8 +2275,6 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool /* The string we return. */ size_t index; /* Current position in converted. */ - bool bad_char; - /* Whether we have an invalid multibyte character. */ char *buf_mb = charalloc(mb_cur_max()); int buf_mb_len; @@ -2311,8 +2303,7 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool buf[start_index] != '\t')) { /* We don't display all of buf[start_index] since it starts to * the left of the screen. */ - buf_mb_len = parse_mbchar(buf + start_index, buf_mb, NULL, - NULL); + buf_mb_len = parse_mbchar(buf + start_index, buf_mb, NULL); if (is_cntrl_mbchar(buf_mb)) { if (column < start_col) { @@ -2343,8 +2334,7 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool } while (index < alloc_len - 1 && buf[start_index] != '\0') { - buf_mb_len = parse_mbchar(buf + start_index, buf_mb, &bad_char, - NULL); + buf_mb_len = parse_mbchar(buf + start_index, buf_mb, NULL); /* If buf contains a tab character, interpret it. */ if (*buf_mb == '\t') { @@ -2394,27 +2384,22 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool #endif converted[index++] = ' '; start_col++; - /* If buf contains a non-control character, interpret it. */ + /* If buf contains a non-control character, interpret it. If + * buf contains an invalid multibyte non-control character, + * display it as such. */ } else { - int i; + char *nctrl_buf_mb = charalloc(mb_cur_max()); + int nctrl_buf_mb_len, i; -#ifdef ENABLE_UTF8 - /* If buf contains an invalid multibyte non-control - * character, display it as such. */ - if (ISSET(USE_UTF8) && bad_char) { - for (i = 0; i < bad_mbchar_len; i++) - converted[index++] = bad_mbchar[i]; + nctrl_buf_mb = mbrep(buf_mb, nctrl_buf_mb, + &nctrl_buf_mb_len); - start_col += mbwidth(bad_mbchar); - } else { -#endif - for (i = 0; i < buf_mb_len; i++) - converted[index++] = buf[start_index + i]; + for (i = 0; i < nctrl_buf_mb_len; i++) + converted[index++] = nctrl_buf_mb[i]; - start_col += mbwidth(buf_mb); -#ifdef ENABLE_UTF8 - } -#endif + start_col += mbwidth(nctrl_buf_mb); + + free(nctrl_buf_mb); } start_index += buf_mb_len; -- 2.39.5