From 78ea5e4a8ab9ce22ca2a39bb2f602c2c18525939 Mon Sep 17 00:00:00 2001 From: David Lawrence Ramsey Date: Sun, 12 Dec 2004 19:04:56 +0000 Subject: [PATCH] improvements to wide/multibyte character input and output, using wide curses functions where applicable git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2182 35c25a1d-7b9e-4130-9fde-d3aeb78583b8 --- ChangeLog | 22 ++++++---- configure.ac | 31 +++++++++----- src/nano.c | 24 ++++------- src/nano.h | 4 -- src/winio.c | 116 +++++++++++++++++++++++---------------------------- 5 files changed, 97 insertions(+), 100 deletions(-) diff --git a/ChangeLog b/ChangeLog index 621b19d6..92e4ea69 100644 --- a/ChangeLog +++ b/ChangeLog @@ -35,20 +35,21 @@ CVS code - encodings such as KOI8-R can type properly again. (DLR, found by Arthur Ivanov) - Massively overhaul the input and output routines to support - buffered input and output, the first steps toward wide - character input and output, and double-Escape/verbatim input - of double-byte Unicode characters instead of single-byte ASCII - characters. New functions do_input(), do_mouse(), - do_output(), is_byte_char(), get_buffer(), get_buffer_len(), + buffered input and output, the first steps toward + wide/multibyte character input and output, and + double-Escape/verbatim input of double-byte Unicode characters + instead of single-byte ASCII characters. New functions + is_byte_char(), get_buffer(), get_buffer_len(), buffer_to_keys(), unget_input(), get_input(), parse_kbinput(), and parse_verbatim_kbinput(); new macro charcpy(); changes to do_char() (renamed to do_output()), get_edit_input() (renamed to do_input() and moved to nano.c), get_edit_mouse() (renamed do_mouse() and moved to nano.c), do_verbatim_input(), do_tab(), main(), and get_ascii_kbinput() (renamed to - get_word_kbinput()). (DLR; buffered input/output based on - ideas from mutt 1.4.2.1; double-Escape input of Unicode - characters suggested by Michael Piefel) + get_word_kbinput()). The wide version of ncurses is required + in order for output to work properly. (DLR; buffered + input/output based on ideas from mutt 1.4.2.1; double-Escape + input of Unicode characters suggested by Michael Piefel) - cut.c: do_cut_text() - If keep_cutbuffer is FALSE, only blow away the text in the @@ -132,6 +133,11 @@ CVS code - - Add a debug message. (DLR) - configure.ac: - Remove specific references to control key shortcuts. (DLR) + - Check for the wide version of ncurses, without which multibyte + strings don't seem to be displayed properly. (DLR; check for + addwstr() in curses to determine whether it has wide character + support inspired by mutt 1.4.2.1i's checking for waddnwstr() + for the same reason) - doc/nanorc.sample: - Add return to the "c-file" regexes. (DLR) diff --git a/configure.ac b/configure.ac index e94695e6..65c73250 100644 --- a/configure.ac +++ b/configure.ac @@ -312,28 +312,32 @@ AC_CHECK_FUNCS(getopt_long) dnl Checks for libraries. if eval "test x$CURSES_LIB_NAME = x" then - AC_CHECK_HEADERS(curses.h ncurses.h) - AC_CHECK_LIB(ncurses, initscr, [CURSES_LIB="-lncurses" CURSES_LIB_NAME=ncurses]) + AC_CHECK_HEADERS(ncurses.h) + AC_CHECK_LIB(ncursesw, addwstr, [CURSES_LIB="-lncursesw" CURSES_LIB_NAME=ncursesw CURSES_LIB_WIDE="yes"]) + if eval "test x$CURSES_LIB_NAME = x" + then + AC_CHECK_LIB(ncurses, initscr, [CURSES_LIB="-lncurses" CURSES_LIB_NAME=ncurses]) + fi fi if eval "test x$CURSES_LIB_NAME = x" then - AC_CHECK_LIB(curses, initscr, [CURSES_LIB="-lcurses" CURSES_LIB_NAME=curses]) -fi - -if eval "test x$CURSES_LIB_NAME = x" -then - AC_CHECK_LIB(termcap, tgetent, [CURSES_LIB="-ltermcap" CURSES_LIB_NAME=termcap]) + AC_CHECK_HEADERS(curses.h) + AC_CHECK_LIB(curses, addwstr, [CURSES_LIB="-lcurses" CURSES_LIB_NAME=curses CURSES_LIB_WIDE="yes"]) + if eval "test x$CURSES_LIB_NAME = x" + then + AC_CHECK_LIB(curses, initscr, [CURSES_LIB="-lcurses" CURSES_LIB_NAME=curses]) + fi fi if eval "test x$CURSES_LIB_NAME = x" then AC_MSG_WARN([ -*** No termcap lib available, consider getting the official ncurses +*** No curses lib available. Consider getting the official ncurses *** distribution from ftp://ftp.gnu.org/pub/gnu/ncurses if you get *** errors compiling nano.]) else - AC_MSG_RESULT([Using $CURSES_LIB_NAME as the termcap library]) + AC_MSG_RESULT([Using $CURSES_LIB_NAME as the curses library]) fi AC_CHECK_LIB([$CURSES_LIB_NAME], use_default_colors, AC_DEFINE(HAVE_USE_DEFAULT_COLORS, 1, [Define this if your curses library has the use_default_colors command.])) @@ -353,6 +357,13 @@ then LDFLAGS="$LDFLAGS $GLIB_LIBS" fi +if test "x$CURSES_LIB_WIDE" == "xyes" +then + AC_DEFINE(NANO_WIDE, 1, [Define this if your system has wide character support.]) +else + AC_MSG_WARN([No wide character support found. nano will not be able to support UTF-8.]) +fi + AC_CONFIG_FILES([ Makefile doc/Makefile diff --git a/src/nano.c b/src/nano.c index abdd4252..e1cf6ad7 100644 --- a/src/nano.c +++ b/src/nano.c @@ -3567,26 +3567,22 @@ void do_output(int *kbinput, size_t kbinput_len) /* Do we have to call edit_refresh(), or can we get away with * update_line()? */ - char key[ #ifdef NANO_WIDE - MB_LEN_MAX + char *key = + charalloc(MB_CUR_MAX) #else - 1 + charalloc(1) #endif - ]; /* The current multibyte character we have. */ - int key_len; /* The length of the current multibyte - * character. */ + ; assert(current != NULL && current->data != NULL); /* Turn off constant cursor position display. */ UNSET(CONSTUPDATE); -#ifdef NANO_WIDE - wctomb(NULL, 0); -#endif - for (i = 0; i < kbinput_len; i++) { + int key_len; + /* Null to newline, if needed. */ if (kbinput[i] == '\0') kbinput[i] = '\n'; @@ -3628,7 +3624,7 @@ void do_output(int *kbinput, size_t kbinput_len) charcpy(¤t->data[current_x], key, key_len); current_len += key_len; /* FIXME: Should totsize be the number of single-byte characters - * or the number of multibyte characters? Assume for former for + * or the number of multibyte characters? Assume the former for * now. */ totsize += key_len; set_modified(); @@ -3669,15 +3665,13 @@ void do_output(int *kbinput, size_t kbinput_len) #endif } -#ifdef NANO_WIDE - wctomb(NULL, 0); -#endif - /* Turn constant cursor position display back on if it was on * before. */ if (old_constupdate) SET(CONSTUPDATE); + free(key); + if (do_refresh) edit_refresh(); else diff --git a/src/nano.h b/src/nano.h index 9a217f26..dcc31411 100644 --- a/src/nano.h +++ b/src/nano.h @@ -135,10 +135,6 @@ #define VERMSG "GNU nano " VERSION -/* FIXME: We should be checking for this instead of unconditionally - * using it. */ -#define NANO_WIDE 1 - /* If we aren't using ncurses, turn the mouse support off, as it's * ncurses-specific. */ #ifndef NCURSES_MOUSE_VERSION diff --git a/src/winio.c b/src/winio.c index c3b0ec90..18cfd93c 100644 --- a/src/winio.c +++ b/src/winio.c @@ -122,7 +122,7 @@ void reset_kbinput(void) * default keystroke buffer is empty. */ void get_buffer(WINDOW *win) { - int input; + int input, input_key_code; /* If the keystroke buffer isn't empty, get out. */ if (key_buffer != NULL) @@ -134,19 +134,36 @@ void get_buffer(WINDOW *win) #ifndef NANO_SMALL allow_pending_sigwinch(TRUE); #endif - input = wgetch(win); + +#ifdef NANO_WIDE + if (!ISSET(NO_UTF8)) { + wint_t tmp; + + input_key_code = wget_wch(win, &tmp); + input = (int)tmp; + } else { +#endif + input = wgetch(win); + input_key_code = !is_byte_char(input); +#ifdef NANO_WIDE + } +#endif + #ifndef NANO_SMALL allow_pending_sigwinch(FALSE); #endif /* Increment the length of the keystroke buffer, save the value of * the keystroke in key, and set key_code to TRUE if the keystroke - * is an extended keypad value and hence shouldn't be treated as a - * multibyte character. */ + * is an extended keypad value or FALSE if it isn't. */ key_buffer_len++; key_buffer = (buffer *)nmalloc(sizeof(buffer)); key_buffer[0].key = input; - key_buffer[0].key_code = !is_byte_char(input); + key_buffer[0].key_code = +#ifdef NANO_WIDE + !ISSET(NO_UTF8) ? (input_key_code == KEY_CODE_YES) : +#endif + input_key_code; /* Read in the remaining characters using non-blocking input. */ nodelay(win, TRUE); @@ -155,73 +172,49 @@ void get_buffer(WINDOW *win) #ifndef NANO_SMALL allow_pending_sigwinch(TRUE); #endif - input = wgetch(win); -#ifndef NANO_SMALL - allow_pending_sigwinch(FALSE); -#endif +#ifdef NANO_WIDE + if (!ISSET(NO_UTF8)) { + wint_t tmp; + + input_key_code = wget_wch(win, &tmp); + input = (int)tmp; + } else { +#endif + input = wgetch(win); + input_key_code = !is_byte_char(input); +#ifdef NANO_WIDE + } +#endif /* If there aren't any more characters, stop reading. */ - if (input == ERR) + if ( +#ifdef NANO_WIDE + (!ISSET(NO_UTF8) && input_key_code == ERR) || +#endif + input == ERR) break; /* Otherwise, increment the length of the keystroke buffer, save * the value of the keystroke in key, and set key_code to TRUE - * if the keystroke is an extended keypad value and hence - * shouldn't be treated as a multibyte character. */ + * if the keystroke is an extended keypad value or FALSE if it + * isn't. */ key_buffer_len++; key_buffer = (buffer *)nrealloc(key_buffer, key_buffer_len * sizeof(buffer)); key_buffer[key_buffer_len - 1].key = input; - key_buffer[key_buffer_len - 1].key_code = !is_byte_char(input); + key_buffer[key_buffer_len - 1].key_code = +#ifdef NANO_WIDE + !ISSET(NO_UTF8) ? (input_key_code == KEY_CODE_YES) : +#endif + input_key_code; + +#ifndef NANO_SMALL + allow_pending_sigwinch(FALSE); +#endif } /* Switch back to non-blocking input. */ nodelay(win, FALSE); - -#ifdef NANO_WIDE - if (!ISSET(NO_UTF8)) { - size_t i; - buffer *clean_key_buffer = NULL; - size_t clean_key_buffer_len = 0; - - mbtowc(NULL, NULL, 0); - - /* Change all complete and valid multibyte keystrokes to - * their wide character values, discarding the others. */ - for (i = 0; i < key_buffer_len; i++) { - wchar_t wide_key; - int wide_key_len; - - if (key_buffer[i].key_code) { - mbtowc(NULL, NULL, 0); - - wide_key_len = 1; - wide_key = key_buffer[i].key; - } else - wide_key_len = mbtowc(&wide_key, - (const char *)&key_buffer[i].key, 1); - - if (wide_key_len != -1) { - clean_key_buffer_len++; - clean_key_buffer = (buffer *)nrealloc(clean_key_buffer, - clean_key_buffer_len * sizeof(buffer)); - - clean_key_buffer[clean_key_buffer_len - 1].key = - (int)wide_key; - clean_key_buffer[clean_key_buffer_len - 1].key_code = - key_buffer[i].key_code; - } - } - - mbtowc(NULL, NULL, 0); - - /* Replace the default keystroke buffer with the non-(-1) - * keystroke buffer. */ - key_buffer_len = clean_key_buffer_len; - free(key_buffer); - key_buffer = clean_key_buffer; - } -#endif } /* Return the length of the default keystroke buffer. */ @@ -258,12 +251,10 @@ void unget_input(buffer *input, size_t input_len) #ifdef NANO_WIDE if (!ISSET(NO_UTF8)) { size_t i; - - wctomb(NULL, 0); + char *key = charalloc(MB_CUR_MAX); /* Keep all valid wide keystrokes, discarding the others. */ for (i = 0; i < input_len; i++) { - char key[MB_LEN_MAX]; int key_len = input[i].key_code ? 1 : wctomb(key, (wchar_t)input[i].key); @@ -278,8 +269,7 @@ void unget_input(buffer *input, size_t input_len) } } - wctomb(NULL, 0); - + free(key); } else { #endif clean_input = input; -- 2.39.5