From 455a918071c659458f1f34b7a219dcbe0a9c2aeb Mon Sep 17 00:00:00 2001 From: Benno Schulenberg Date: Sat, 18 Apr 2015 20:07:31 +0000 Subject: [PATCH] Making sure an invalid starting byte of a multibyte sequence is properly terminated, to prevent the displaying of ghost characters. git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@5206 35c25a1d-7b9e-4130-9fde-d3aeb78583b8 --- ChangeLog | 8 ++++++++ src/winio.c | 14 ++++++++------ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/ChangeLog b/ChangeLog index bdc85d08..1ef7b176 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,14 @@ 2015-04-18 Benno Schulenberg * src/global.c, src/nano.c, doc/man/nanorc.5, doc/texinfo/nano.texi: Make the descriptions of the multibuffer feature more accurate. + * src/winio.c (display_string): Make sure an invalid starting byte + of a multibyte sequence is properly terminated, so that it doesn't + pick up lingering bytes of any previous content. This prevents the + displaying of ghosts -- characters that aren't really there -- when a + file contains valid ánd invalid UTF-8 sequences. For an example see: + https://lists.gnu.org/archive/html/nano-devel/2015-04/msg00052.html. + Also make two comments more accurate: an invalid multibyte sequence + will never be categorized as a control character or anything else. 2015-04-18 Mark Oteiza * doc/syntax/{python,ruby,sh,tex}.nanorc: Add a linter definition. diff --git a/src/winio.c b/src/winio.c index 9d6587a4..917f7d6e 100644 --- a/src/winio.c +++ b/src/winio.c @@ -2004,9 +2004,7 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool converted[index++] = ' '; start_col++; } - /* If buf contains a control character, interpret it. If buf - * contains an invalid multibyte control character, display it - * as such. */ + /* If buf contains a control character, interpret it. */ } else if (is_cntrl_mbchar(buf_mb)) { char *ctrl_buf_mb = charalloc(mb_cur_max()); int ctrl_buf_mb_len, i; @@ -2036,13 +2034,17 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool #endif converted[index++] = ' '; start_col++; - /* If buf contains a non-control character, interpret it. If - * buf contains an invalid multibyte non-control character, - * display it as such. */ + /* If buf contains a non-control character, interpret it. If buf + * contains an invalid multibyte sequence, display it as such. */ } else { char *nctrl_buf_mb = charalloc(mb_cur_max()); int nctrl_buf_mb_len, i; + /* Make sure an invalid sequence-starter byte is properly + * terminated, so that it doesn't pick up lingering bytes + * of any previous content. */ + null_at(&buf_mb, buf_mb_len); + nctrl_buf_mb = mbrep(buf_mb, nctrl_buf_mb, &nctrl_buf_mb_len); -- 2.39.5