From 78ea5e4a8ab9ce22ca2a39bb2f602c2c18525939 Mon Sep 17 00:00:00 2001
From: David Lawrence Ramsey <pooka109@gmail.com>
Date: Sun, 12 Dec 2004 19:04:56 +0000
Subject: [PATCH] improvements to wide/multibyte character input and output,
 using wide curses functions where applicable

git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2182 35c25a1d-7b9e-4130-9fde-d3aeb78583b8
---
 ChangeLog    |  22 ++++++----
 configure.ac |  31 +++++++++-----
 src/nano.c   |  24 ++++-------
 src/nano.h   |   4 --
 src/winio.c  | 116 +++++++++++++++++++++++----------------------------
 5 files changed, 97 insertions(+), 100 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 621b19d6..92e4ea69 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -35,20 +35,21 @@ CVS code -
 	  encodings such as KOI8-R can type properly again. (DLR, found
 	  by Arthur Ivanov)
 	- Massively overhaul the input and output routines to support
-	  buffered input and output, the first steps toward wide
-	  character input and output, and double-Escape/verbatim input
-	  of double-byte Unicode characters instead of single-byte ASCII
-	  characters.  New functions do_input(), do_mouse(),
-	  do_output(), is_byte_char(), get_buffer(), get_buffer_len(),
+	  buffered input and output, the first steps toward
+	  wide/multibyte character input and output, and
+	  double-Escape/verbatim input of double-byte Unicode characters
+	  instead of single-byte ASCII characters.  New functions
+	  is_byte_char(), get_buffer(), get_buffer_len(),
 	  buffer_to_keys(), unget_input(), get_input(), parse_kbinput(),
 	  and parse_verbatim_kbinput(); new macro charcpy(); changes to
 	  do_char() (renamed to do_output()), get_edit_input() (renamed
 	  to do_input() and moved to nano.c), get_edit_mouse() (renamed
 	  do_mouse() and moved to nano.c), do_verbatim_input(),
 	  do_tab(), main(), and get_ascii_kbinput() (renamed to
-	  get_word_kbinput()). (DLR; buffered input/output based on
-	  ideas from mutt 1.4.2.1; double-Escape input of Unicode
-	  characters suggested by Michael Piefel)
+	  get_word_kbinput()).  The wide version of ncurses is required
+	  in order for output to work properly. (DLR; buffered
+	  input/output based on ideas from mutt 1.4.2.1; double-Escape
+	  input of Unicode characters suggested by Michael Piefel)
 - cut.c:
   do_cut_text()
 	- If keep_cutbuffer is FALSE, only blow away the text in the
@@ -132,6 +133,11 @@ CVS code -
 	- Add a debug message. (DLR)
 - configure.ac:
 	- Remove specific references to control key shortcuts. (DLR)
+	- Check for the wide version of ncurses, without which multibyte
+	  strings don't seem to be displayed properly. (DLR; check for
+	  addwstr() in curses to determine whether it has wide character
+	  support inspired by mutt 1.4.2.1i's checking for waddnwstr()
+	  for the same reason)
 - doc/nanorc.sample:
 	- Add return to the "c-file" regexes. (DLR)
 
diff --git a/configure.ac b/configure.ac
index e94695e6..65c73250 100644
--- a/configure.ac
+++ b/configure.ac
@@ -312,28 +312,32 @@ AC_CHECK_FUNCS(getopt_long)
 dnl Checks for libraries.
 if eval "test x$CURSES_LIB_NAME = x"
 then
-    AC_CHECK_HEADERS(curses.h ncurses.h)
-    AC_CHECK_LIB(ncurses, initscr, [CURSES_LIB="-lncurses" CURSES_LIB_NAME=ncurses])
+    AC_CHECK_HEADERS(ncurses.h)
+    AC_CHECK_LIB(ncursesw, addwstr, [CURSES_LIB="-lncursesw" CURSES_LIB_NAME=ncursesw CURSES_LIB_WIDE="yes"])
+    if eval "test x$CURSES_LIB_NAME = x"
+    then
+	 AC_CHECK_LIB(ncurses, initscr, [CURSES_LIB="-lncurses" CURSES_LIB_NAME=ncurses])
+    fi
 fi
 
 if eval "test x$CURSES_LIB_NAME = x"
 then
-    AC_CHECK_LIB(curses, initscr, [CURSES_LIB="-lcurses" CURSES_LIB_NAME=curses])
-fi
-
-if eval "test x$CURSES_LIB_NAME = x"
-then
-    AC_CHECK_LIB(termcap, tgetent, [CURSES_LIB="-ltermcap" CURSES_LIB_NAME=termcap])
+    AC_CHECK_HEADERS(curses.h)
+    AC_CHECK_LIB(curses, addwstr, [CURSES_LIB="-lcurses" CURSES_LIB_NAME=curses CURSES_LIB_WIDE="yes"])
+    if eval "test x$CURSES_LIB_NAME = x"
+    then
+	 AC_CHECK_LIB(curses, initscr, [CURSES_LIB="-lcurses" CURSES_LIB_NAME=curses])
+    fi
 fi
 
 if eval "test x$CURSES_LIB_NAME = x"
 then
     AC_MSG_WARN([
-*** No termcap lib available, consider getting the official ncurses
+*** No curses lib available.  Consider getting the official ncurses
 *** distribution from ftp://ftp.gnu.org/pub/gnu/ncurses if you get
 *** errors compiling nano.])
 else
-    AC_MSG_RESULT([Using $CURSES_LIB_NAME as the termcap library])
+    AC_MSG_RESULT([Using $CURSES_LIB_NAME as the curses library])
 fi
 
 AC_CHECK_LIB([$CURSES_LIB_NAME], use_default_colors, AC_DEFINE(HAVE_USE_DEFAULT_COLORS, 1, [Define this if your curses library has the use_default_colors command.]))
@@ -353,6 +357,13 @@ then
 	LDFLAGS="$LDFLAGS $GLIB_LIBS"
 fi
 
+if test "x$CURSES_LIB_WIDE" == "xyes"
+then
+	AC_DEFINE(NANO_WIDE, 1, [Define this if your system has wide character support.])
+else
+	AC_MSG_WARN([No wide character support found.  nano will not be able to support UTF-8.])
+fi
+
 AC_CONFIG_FILES([
 Makefile
 doc/Makefile
diff --git a/src/nano.c b/src/nano.c
index abdd4252..e1cf6ad7 100644
--- a/src/nano.c
+++ b/src/nano.c
@@ -3567,26 +3567,22 @@ void do_output(int *kbinput, size_t kbinput_len)
 	/* Do we have to call edit_refresh(), or can we get away with
 	 * update_line()? */
 
-    char key[
 #ifdef NANO_WIDE
-	MB_LEN_MAX
+    char *key =
+	charalloc(MB_CUR_MAX)
 #else
-	1
+	charalloc(1)
 #endif
-	];		/* The current multibyte character we have. */
-    int key_len;	/* The length of the current multibyte
-			 * character. */
+	;
 
     assert(current != NULL && current->data != NULL);
 
     /* Turn off constant cursor position display. */
     UNSET(CONSTUPDATE);
 
-#ifdef NANO_WIDE
-    wctomb(NULL, 0);
-#endif
-
     for (i = 0; i < kbinput_len; i++) {
+	int key_len;
+
 	/* Null to newline, if needed. */
 	if (kbinput[i] == '\0')
 	    kbinput[i] = '\n';
@@ -3628,7 +3624,7 @@ void do_output(int *kbinput, size_t kbinput_len)
 	charcpy(&current->data[current_x], key, key_len);
 	current_len += key_len;
 	/* FIXME: Should totsize be the number of single-byte characters
-	 * or the number of multibyte characters?  Assume for former for
+	 * or the number of multibyte characters?  Assume the former for
 	 * now. */
 	totsize += key_len;
 	set_modified();
@@ -3669,15 +3665,13 @@ void do_output(int *kbinput, size_t kbinput_len)
 #endif
     }
 
-#ifdef NANO_WIDE
-    wctomb(NULL, 0);
-#endif
-
     /* Turn constant cursor position display back on if it was on
      * before. */
     if (old_constupdate)
 	SET(CONSTUPDATE);
 
+    free(key);
+
     if (do_refresh)
 	edit_refresh();
     else
diff --git a/src/nano.h b/src/nano.h
index 9a217f26..dcc31411 100644
--- a/src/nano.h
+++ b/src/nano.h
@@ -135,10 +135,6 @@
 
 #define VERMSG "GNU nano " VERSION
 
-/* FIXME: We should be checking for this instead of unconditionally
- * using it. */
-#define NANO_WIDE 1
-
 /* If we aren't using ncurses, turn the mouse support off, as it's
  * ncurses-specific. */
 #ifndef NCURSES_MOUSE_VERSION
diff --git a/src/winio.c b/src/winio.c
index c3b0ec90..18cfd93c 100644
--- a/src/winio.c
+++ b/src/winio.c
@@ -122,7 +122,7 @@ void reset_kbinput(void)
  * default keystroke buffer is empty. */
 void get_buffer(WINDOW *win)
 {
-    int input;
+    int input, input_key_code;
 
     /* If the keystroke buffer isn't empty, get out. */
     if (key_buffer != NULL)
@@ -134,19 +134,36 @@ void get_buffer(WINDOW *win)
 #ifndef NANO_SMALL
     allow_pending_sigwinch(TRUE);
 #endif
-    input = wgetch(win);
+
+#ifdef NANO_WIDE
+    if (!ISSET(NO_UTF8)) {
+	wint_t tmp;
+
+	input_key_code = wget_wch(win, &tmp);
+	input = (int)tmp;
+    } else {
+#endif
+	input = wgetch(win);
+	input_key_code = !is_byte_char(input);
+#ifdef NANO_WIDE
+    }
+#endif
+
 #ifndef NANO_SMALL
     allow_pending_sigwinch(FALSE);
 #endif
 
     /* Increment the length of the keystroke buffer, save the value of
      * the keystroke in key, and set key_code to TRUE if the keystroke
-     * is an extended keypad value and hence shouldn't be treated as a
-     * multibyte character. */
+     * is an extended keypad value or FALSE if it isn't. */
     key_buffer_len++;
     key_buffer = (buffer *)nmalloc(sizeof(buffer));
     key_buffer[0].key = input;
-    key_buffer[0].key_code = !is_byte_char(input);
+    key_buffer[0].key_code =
+#ifdef NANO_WIDE
+	!ISSET(NO_UTF8) ? (input_key_code == KEY_CODE_YES) :
+#endif
+	input_key_code;
 
     /* Read in the remaining characters using non-blocking input. */
     nodelay(win, TRUE);
@@ -155,73 +172,49 @@ void get_buffer(WINDOW *win)
 #ifndef NANO_SMALL
 	allow_pending_sigwinch(TRUE);
 #endif
-	input = wgetch(win);
-#ifndef NANO_SMALL
-	allow_pending_sigwinch(FALSE);
-#endif
 
+#ifdef NANO_WIDE
+	if (!ISSET(NO_UTF8)) {
+	    wint_t tmp;
+
+	    input_key_code = wget_wch(win, &tmp);
+	    input = (int)tmp;
+	} else {
+#endif
+	    input = wgetch(win);
+	    input_key_code = !is_byte_char(input);
+#ifdef NANO_WIDE
+	}
+#endif
 	/* If there aren't any more characters, stop reading. */
-	if (input == ERR)
+	if (
+#ifdef NANO_WIDE
+		(!ISSET(NO_UTF8) && input_key_code == ERR) ||
+#endif
+		input == ERR)
 	    break;
 
 	/* Otherwise, increment the length of the keystroke buffer, save
 	 * the value of the keystroke in key, and set key_code to TRUE
-	 * if the keystroke is an extended keypad value and hence
-	 * shouldn't be treated as a multibyte character. */
+	 * if the keystroke is an extended keypad value or FALSE if it
+	 * isn't. */
 	key_buffer_len++;
 	key_buffer = (buffer *)nrealloc(key_buffer, key_buffer_len *
 		sizeof(buffer));
 	key_buffer[key_buffer_len - 1].key = input;
-	key_buffer[key_buffer_len - 1].key_code = !is_byte_char(input);
+	key_buffer[key_buffer_len - 1].key_code =
+#ifdef NANO_WIDE
+		!ISSET(NO_UTF8) ? (input_key_code == KEY_CODE_YES) :
+#endif
+		input_key_code;
+
+#ifndef NANO_SMALL
+	allow_pending_sigwinch(FALSE);
+#endif
     }
 
     /* Switch back to non-blocking input. */
     nodelay(win, FALSE);
-
-#ifdef NANO_WIDE
-    if (!ISSET(NO_UTF8)) {
-	size_t i;
-	buffer *clean_key_buffer = NULL;
-	size_t clean_key_buffer_len = 0;
-
-	mbtowc(NULL, NULL, 0);
-
-	/* Change all complete and valid multibyte keystrokes to
-	 * their wide character values, discarding the others. */
-	for (i = 0; i < key_buffer_len; i++) {
-	    wchar_t wide_key;
-	    int wide_key_len;
-
-	    if (key_buffer[i].key_code) {
-		mbtowc(NULL, NULL, 0);
-
-		wide_key_len = 1;
-		wide_key = key_buffer[i].key;
-	    } else
-		wide_key_len = mbtowc(&wide_key,
-			(const char *)&key_buffer[i].key, 1);
-
-	    if (wide_key_len != -1) {
-		clean_key_buffer_len++;
-		clean_key_buffer = (buffer *)nrealloc(clean_key_buffer,
-			clean_key_buffer_len * sizeof(buffer));
-
-		clean_key_buffer[clean_key_buffer_len - 1].key =
-			(int)wide_key;
-		clean_key_buffer[clean_key_buffer_len - 1].key_code =
-			key_buffer[i].key_code;
-	    }
-	}
-
-	mbtowc(NULL, NULL, 0);
-
-	/* Replace the default keystroke buffer with the non-(-1)
-	 * keystroke buffer. */
-	key_buffer_len = clean_key_buffer_len;
-	free(key_buffer);
-	key_buffer = clean_key_buffer;
-    }
-#endif
 }
 
 /* Return the length of the default keystroke buffer. */
@@ -258,12 +251,10 @@ void unget_input(buffer *input, size_t input_len)
 #ifdef NANO_WIDE
     if (!ISSET(NO_UTF8)) {
 	size_t i;
-
-	wctomb(NULL, 0);
+	char *key = charalloc(MB_CUR_MAX);
 
 	/* Keep all valid wide keystrokes, discarding the others. */
 	for (i = 0; i < input_len; i++) {
-	    char key[MB_LEN_MAX];
 	    int key_len = input[i].key_code ? 1 :
 		wctomb(key, (wchar_t)input[i].key);
 
@@ -278,8 +269,7 @@ void unget_input(buffer *input, size_t input_len)
 	    }
 	}
 
-	wctomb(NULL, 0);
-
+	free(key);
     } else {
 #endif
 	clean_input = input;
-- 
2.39.5