From fc693210d5044b0735bfed92433a7cfb979f5521 Mon Sep 17 00:00:00 2001 From: David Lawrence Ramsey Date: Thu, 23 Dec 2004 17:43:27 +0000 Subject: [PATCH] more steps toward UTF-8 support: port all the parts of DB's UTF-8 patch that I currently understand to current CVS, with modifications of mine to autodetect UTF-8 support and to display multibyte strings instead of wide strings git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2193 35c25a1d-7b9e-4130-9fde-d3aeb78583b8 --- ChangeLog | 16 +++ configure.ac | 2 +- src/move.c | 4 +- src/nano.c | 96 ++++++++++---- src/nano.h | 1 + src/proto.h | 13 +- src/search.c | 7 +- src/utils.c | 154 ++++++++++++++++++++++ src/winio.c | 366 ++++++++++++++++++++++++++++++++++++++++----------- 9 files changed, 543 insertions(+), 116 deletions(-) diff --git a/ChangeLog b/ChangeLog index 993ee1f3..7da7f079 100644 --- a/ChangeLog +++ b/ChangeLog @@ -50,6 +50,16 @@ CVS code - in order for output to work properly. (DLR; buffered input/output based on ideas from mutt 1.4.2.1; double-Escape input of Unicode characters suggested by Michael Piefel) + - More steps toward wide character/multibyte character support. + Movement and cursor display in the edit window should now work + properly with files containing multibyte characters, and text + display of such files should work properly some of the time. + New functions control_rep(), parse_char(), move_left(), + move_right(), and display_string_len(); changes to do_left(), + do_right(), do_delete(), breakable(), break_line(), + do_output(), get_buffer(), unget_input(), actual_x(), + strnlenpt(), display_string(), titlebar(), and do_credits(). + (David Benbennick and DLR) - cut.c: do_cut_text() - If keep_cutbuffer is FALSE, only blow away the text in the @@ -92,6 +102,10 @@ CVS code - loop if there are no more paragraphs after the current one and the paragraph search left us on the magicline, so as to avoid a segfault. (DLR) + main() + - Try to automatically detect whether UTF-8 support is needed by + setting the NO_UTF8 flag if setlocale() returns a string that + doesn't contain "UTF-8". (DLR) - winio.c: titlebar() - Rename some variables for consistency, make space an int @@ -135,6 +149,8 @@ CVS code - - Remove specific references to control key shortcuts. (DLR) - Check for the wide version of ncurses, without which multibyte strings don't seem to be displayed properly. (DLR) + - Check for stddef.h and wchar.h, for those systems that need + one of the two for the wcwidth() prototype. (DLR) - doc/nanorc.sample: - Add return to the "c-file" regexes. (DLR) diff --git a/configure.ac b/configure.ac index c37f5ca9..d9db2881 100644 --- a/configure.ac +++ b/configure.ac @@ -40,7 +40,7 @@ AM_GNU_GETTEXT([external], [need-ngettext]) dnl Checks for header files. AC_HEADER_STDC -AC_CHECK_HEADERS(fcntl.h getopt.h libintl.h limits.h regex.h termio.h termios.h unistd.h) +AC_CHECK_HEADERS(fcntl.h getopt.h libintl.h limits.h regex.h stddef.h termio.h termios.h unistd.h wchar.h) AC_CHECK_HEADER(regex.h, AC_MSG_CHECKING([for broken regexec]) AC_TRY_RUN([ diff --git a/src/move.c b/src/move.c index e3fa3717..148ce28d 100644 --- a/src/move.c +++ b/src/move.c @@ -252,7 +252,7 @@ void do_left(int allow_update) { size_t pww_save = placewewant; if (current_x > 0) - current_x--; + current_x = move_left(current->data, current_x); else if (current != fileage) { do_up(); current_x = strlen(current->data); @@ -274,7 +274,7 @@ void do_right(int allow_update) assert(current_x <= strlen(current->data)); if (current->data[current_x] != '\0') - current_x++; + current_x = move_right(current->data, current_x); else if (current->next != NULL) { do_down(); current_x = 0; diff --git a/src/nano.c b/src/nano.c index 8f885927..817774ea 100644 --- a/src/nano.c +++ b/src/nano.c @@ -1185,18 +1185,25 @@ void do_delete(void) placewewant = xplustabs(); if (current->data[current_x] != '\0') { - size_t linelen = strlen(current->data + current_x); + int char_len = parse_char(current->data + current_x, NULL, + NULL +#ifdef NANO_WIDE + , NULL +#endif + ); + size_t line_len = strlen(current->data + current_x); assert(current_x < strlen(current->data)); /* Let's get dangerous. */ - charmove(¤t->data[current_x], ¤t->data[current_x + 1], - linelen); + charmove(¤t->data[current_x], + ¤t->data[current_x + char_len], + line_len - char_len + 1); - null_at(¤t->data, linelen + current_x - 1); + null_at(¤t->data, current_x + line_len - char_len); #ifndef NANO_SMALL if (current_x < mark_beginx && mark_beginbuf == current) - mark_beginx--; + mark_beginx -= char_len; #endif } else if (current != filebot && (current->next != filebot || current->data[0] == '\0')) { @@ -1211,8 +1218,8 @@ void do_delete(void) if (current->data[current_x] == '\0') do_refresh = TRUE; - current->data = charealloc(current->data, current_x + - strlen(foo->data) + 1); + current->data = charealloc(current->data, + current_x + strlen(foo->data) + 1); strcpy(current->data + current_x, foo->data); #ifndef NANO_SMALL if (mark_beginbuf == current->next) { @@ -1227,13 +1234,13 @@ void do_delete(void) delete_node(foo); renumber(current); totlines--; + totsize--; #ifndef DISABLE_WRAPPING wrap_reset(); #endif } else return; - totsize--; set_modified(); #ifdef ENABLE_COLOR @@ -2494,15 +2501,21 @@ filestruct *backup_lines(filestruct *first_line, size_t par_len, size_t /* Is it possible to break line at or before goal? */ bool breakable(const char *line, ssize_t goal) { - for (; *line != '\0' && goal >= 0; line++) { + while (*line != '\0' && goal >= 0) { + size_t pos = 0; + if (isblank(*line)) return TRUE; - if (is_cntrl_char(*line)) - goal -= 2; - else - goal -= 1; + line += parse_char(line, NULL, &pos +#ifdef NANO_WIDE + , NULL +#endif + ); + + goal -= pos; } + /* If goal is not negative, the whole line (one word) was short * enough. */ return goal >= 0; @@ -2522,32 +2535,49 @@ ssize_t break_line(const char *line, ssize_t goal, bool force) /* Current index in line. */ assert(line != NULL); - for (; *line != '\0' && goal >= 0; line++, cur_loc++) { + + while (*line != '\0' && goal >= 0) { + size_t pos = 0; + int line_len; + if (*line == ' ') space_loc = cur_loc; + assert(*line != '\t'); - if (is_cntrl_char(*line)) - goal -= 2; - else - goal--; + line_len = parse_char(line, NULL, &pos +#ifdef NANO_WIDE + , NULL +#endif + ); + + goal -= pos; + line += line_len; + cur_loc += line_len; } + if (goal >= 0) /* In fact, the whole line displays shorter than goal. */ return cur_loc; + if (space_loc == -1) { /* No space found short enough. */ - if (force) - for (; *line != '\0'; line++, cur_loc++) - if (*line == ' ' && *(line + 1) != ' ' && *(line + 1) != '\0') + if (force) { + for (; *line != '\0'; line++, cur_loc++) { + if (*line == ' ' && *(line + 1) != ' ' && + *(line + 1) != '\0') return cur_loc; - return -1; + } + return -1; + } } + /* Perhaps the character after space_loc is a space. But because * of justify_format(), there can be only two adjacent. */ if (*(line - cur_loc + space_loc + 1) == ' ' || *(line - cur_loc + space_loc + 1) == '\0') space_loc++; + return space_loc; } @@ -3639,13 +3669,7 @@ void do_output(int *kbinput, size_t kbinput_len) mark_beginx += key_len; #endif - { - /* FIXME: The movement functions should take multibyte - * characters into account. */ - int j; - for (j = 0; j < key_len; j++) - do_right(FALSE); - } + do_right(FALSE); #ifndef DISABLE_WRAPPING /* If we're wrapping text, we need to call edit_refresh(). */ @@ -3759,7 +3783,21 @@ int main(int argc, char **argv) }; #endif +#ifdef NANO_WIDE + { + /* If the locale set doesn't exist, or it exists but doesn't + * include the string "UTF-8", we shouldn't use UTF-8 + * support. */ + char *locale = setlocale(LC_ALL, ""); + + if (locale == NULL || (locale != NULL && + strstr(locale, "UTF-8") == NULL)) + SET(NO_UTF8); + } +#else setlocale(LC_ALL, ""); +#endif + #ifdef ENABLE_NLS bindtextdomain(PACKAGE, LOCALEDIR); textdomain(PACKAGE); diff --git a/src/nano.h b/src/nano.h index dcc31411..482dd703 100644 --- a/src/nano.h +++ b/src/nano.h @@ -83,6 +83,7 @@ #define N_(string) gettext_noop(string) /* Mark a string that will be sent to gettext later. */ +#include #include #include #include "config.h" diff --git a/src/proto.h b/src/proto.h index e4709915..3ef3aee2 100644 --- a/src/proto.h +++ b/src/proto.h @@ -475,7 +475,15 @@ int is_blank_char(int c); int is_cntrl_char(int c); bool is_byte_char(int c); int num_of_digits(int n); +unsigned char control_rep(unsigned char c); bool parse_num(const char *str, ssize_t *val); +int parse_char(const char *str, int *chr, size_t *col +#ifdef NANO_WIDE + , bool *bad_char +#endif + ); +size_t move_left(const char *str, size_t pos); +size_t move_right(const char *str, size_t pos); void align(char **strp); void null_at(char **data, size_t index); void unsunder(char *str, size_t true_len); @@ -570,7 +578,10 @@ void blank_edit(void); void blank_statusbar(void); void check_statusblank(void); void blank_bottombars(void); -char *display_string(const char *buf, size_t start_col, size_t len); +size_t display_string_len(const char *buf, size_t start_col, size_t + end_col); +char *display_string(const char *buf, size_t start_col, size_t len, bool + dollars); void nanoget_repaint(const char *buf, const char *inputbuf, size_t x); int nanogetstr(bool allow_tabs, const char *buf, const char *def, #ifndef NANO_SMALL diff --git a/src/search.c b/src/search.c index 29ad8127..82111fdd 100644 --- a/src/search.c +++ b/src/search.c @@ -83,7 +83,7 @@ void not_found_msg(const char *str) assert(str != NULL); - disp = display_string(str, 0, (COLS / 2) + 1); + disp = display_string(str, 0, (COLS / 2) + 1, FALSE); numchars = strnlen(disp, COLS / 2); statusbar(_("\"%.*s%s\" not found"), numchars, disp, @@ -150,7 +150,7 @@ int search_init(bool replacing, bool use_answer) #endif if (last_search[0] != '\0') { - char *disp = display_string(last_search, 0, COLS / 3); + char *disp = display_string(last_search, 0, COLS / 3, FALSE); buf = charalloc(COLS / 3 + 7); /* We use COLS / 3 here because we need to see more on the @@ -748,7 +748,8 @@ ssize_t do_replace_loop(const char *needle, const filestruct size_t xpt = xplustabs(); exp_word = display_string(current->data, xpt, - strnlenpt(current->data, match_len + current_x) - xpt); + strnlenpt(current->data, match_len + current_x) - xpt, + FALSE); curs_set(0); do_replace_highlight(TRUE, exp_word); diff --git a/src/utils.c b/src/utils.c index 950cb9e0..75c8765a 100644 --- a/src/utils.c +++ b/src/utils.c @@ -33,6 +33,10 @@ #include "proto.h" #include "nano.h" +#ifdef HAVE_WCHAR_H +#include +#endif + #ifdef HAVE_REGEX_H #ifdef BROKEN_REGEXEC #undef regexec @@ -92,6 +96,19 @@ int num_of_digits(int n) return i; } +/* c is a control character. It displays as ^@, ^?, or ^[ch] where ch + * is c + 64. We return that character. */ +unsigned char control_rep(unsigned char c) +{ + /* Treat newlines embedded in a line as encoded nulls. */ + if (c == '\n') + return '@'; + else if (c == NANO_CONTROL_8) + return '?'; + else + return c + 64; +} + /* Read a ssize_t from str, and store it in *val (if val is not NULL). * On error, we return FALSE and don't change *val. Otherwise, we * return TRUE. */ @@ -113,6 +130,143 @@ bool parse_num(const char *str, ssize_t *val) return TRUE; } +/* Parse a multi-byte character from str. Return the number of bytes + * used. If chr isn't NULL, store the wide character in it. If col + * isn't NULL, store the new display width in it. If *str is '\t', we + * expect col to have the current display width. If bad_char isn't + * NULL, set it to TRUE if we have a null byte or a bad multibyte + * character. */ +int parse_char(const char *str, int *chr, size_t *col +#ifdef NANO_WIDE + , bool *bad_char +#endif + ) +{ + int wide_str, wide_str_len; + + assert(str != NULL); + +#ifdef NANO_WIDE + if (bad_char != NULL) + *bad_char = FALSE; + + if (!ISSET(NO_UTF8)) { + wchar_t tmp; + + /* Get the wide character equivalent of the multibyte + * character. */ + wide_str_len = mbtowc(&tmp, str, MB_CUR_MAX); + wide_str = (int)tmp; + + /* If str contains a null byte or an invalid multibyte + * character, interpret str's first byte as a single-byte + * sequence and set bad_char to TRUE. */ + if (wide_str_len <= 0) { + wide_str_len = 1; + wide_str = (unsigned char)*str; + if (bad_char != NULL) + *bad_char = TRUE; + } + + /* Save the wide character in chr. */ + if (chr != NULL) + *chr = wide_str; + + /* Save the column width of the wide character in col. */ + if (col != NULL) { + /* If we have a tab, get its width in columns using the + * current value of col. */ + if (wide_str == '\t') + *col += tabsize - *col % tabsize; + /* If we have a control character, get its width using one + * column for the "^" that will be displayed in front of it, + * and the width in columns of its visible equivalent as + * returned by control_rep(). */ + else if (is_cntrl_char(wide_str)) { + char *ctrl_wide_str = charalloc(MB_CUR_MAX); + + (*col)++; + wide_str = control_rep((unsigned char)wide_str); + + if (wctomb(ctrl_wide_str, (wchar_t)wide_str) != -1) + *col += wcwidth(wide_str); + + free(ctrl_wide_str); + /* If we have a normal character, get its width in columns + * normally. */ + } else + *col += wcwidth(wide_str); + } + } else { +#endif + /* Interpret str's first character as a single-byte sequence. */ + wide_str_len = 1; + wide_str = (unsigned char)*str; + + /* Save the single-byte sequence in chr as though it's a wide + * character. */ + if (chr != NULL) + *chr = wide_str; + + if (col != NULL) { + /* If we have a tab, get its width in columns using the + * current value of col. */ + if (wide_str == '\t') + *col += tabsize - *col % tabsize; + /* If we have a control character, it's two columns wide: + * one column for the "^" that will be displayed in front of + * it, and one column for its visible equivalent as returned + * by control_rep(). */ + else if (is_cntrl_char(wide_str)) + *col += 2; + /* If we have a normal character, it's one column wide. */ + else + (*col)++; + } +#ifdef NANO_WIDE + } +#endif + + return wide_str_len; +} + +/* Return the index in str of the beginning of the character before the + * one at pos. */ +size_t move_left(const char *str, size_t pos) +{ + size_t pos_prev = pos; + + assert(str != NULL && pos <= strlen(str)); + + /* There is no library function to move backward one multibyte + * character. Here is the naive, O(pos) way to do it. */ + while (TRUE) { + int str_len = parse_char(str + pos - pos_prev, NULL, NULL +#ifdef NANO_WIDE + , NULL +#endif + ); + + if (pos_prev <= str_len) + break; + + pos_prev -= str_len; + } + + return pos - pos_prev; +} + +/* Return the index in str of the beginning of the character after the + * one at pos. */ +size_t move_right(const char *str, size_t pos) +{ + return pos + parse_char(str + pos, NULL, NULL +#ifdef NANO_WIDE + , NULL +#endif + ); +} + /* Fix the memory allocation for a string. */ void align(char **strp) { diff --git a/src/winio.c b/src/winio.c index 427ff318..ee013fdc 100644 --- a/src/winio.c +++ b/src/winio.c @@ -32,6 +32,10 @@ #include "proto.h" #include "nano.h" +#ifdef HAVE_WCHAR_H +#include +#endif + static buffer *key_buffer = NULL; /* The default keystroke buffer, * containing all the keystrokes we have @@ -1625,38 +1629,50 @@ size_t actual_x(const char *str, size_t xplus) assert(str != NULL); - for (; length < xplus && *str != '\0'; i++, str++) { - if (*str == '\t') - length += tabsize - (length % tabsize); - else if (is_cntrl_char(*str)) - length += 2; - else - length++; - } - assert(length == strnlenpt(str - i, i)); - assert(i <= strlen(str - i)); + while (*str != '\0') { + int str_len = parse_char(str, NULL, &length +#ifdef NANO_WIDE + , NULL +#endif + ); - if (length > xplus) - i--; + if (length > xplus) + break; + + i += str_len; + str += str_len; + } return i; } /* A strlen() with tabs factored in, similar to xplustabs(). How many - * columns wide are the first size characters of buf? */ -size_t strnlenpt(const char *buf, size_t size) + * columns wide are the first size characters of str? */ +size_t strnlenpt(const char *str, size_t size) { size_t length = 0; + /* The screen display width to str[i]. */ - assert(buf != NULL); - for (; *buf != '\0' && size != 0; size--, buf++) { - if (*buf == '\t') - length += tabsize - (length % tabsize); - else if (is_cntrl_char(*buf)) - length += 2; - else - length++; + if (size == 0) + return 0; + + assert(str != NULL); + + while (*str != '\0') { + int str_len = parse_char(str, NULL, &length +#ifdef NANO_WIDE + , NULL +#endif + ); + + str += str_len; + + if (size <= str_len) + break; + + size -= str_len; } + return length; } @@ -1704,19 +1720,101 @@ void blank_bottombars(void) } } +/* buf is a multibyte string to be displayed. We need to expand tabs + * and control characters. How many bytes do we need to display buf + * properly, not counting the null terminator? start_col is the column + * of *buf (usually 0). We display to (end_col - 1). */ +size_t display_string_len(const char *buf, size_t start_col, size_t + end_col) +{ + size_t retval = 0; + + assert(buf != NULL); + + /* Throughout the loop, we maintain the fact that *buf displays at + * column start_col. */ + while (start_col <= end_col && *buf != '\0') { + int wide_buf; + /* The current wide character. */ + int wide_buf_len; + /* How many bytes wide is this character? */ + size_t old_col = start_col; + bool bad_char; + + wide_buf_len = parse_char(buf, &wide_buf, &start_col +#ifdef NANO_WIDE + , &bad_char +#endif + ); + +#ifdef NANO_WIDE + /* If buf contains a null byte or an invalid multibyte + * character, interpret its first byte as though it's a wide + * character. */ + if (!ISSET(NO_UTF8) && bad_char) { + char *bad_wide_buf = charalloc(MB_CUR_MAX); + int bad_wide_buf_len; + + /* If we have a control character, add one byte to account + * for the "^" that will be displayed in front of it, and + * translate the character to its visible equivalent as + * returned by control_rep(). */ + if (is_cntrl_char(wide_buf)) { + retval++; + wide_buf = control_rep((unsigned char)wide_buf); + } + + /* Translate the wide character to its multibyte + * equivalent. */ + bad_wide_buf_len = wctomb(bad_wide_buf, (wchar_t)wide_buf); + + if (bad_wide_buf_len != -1) + retval += bad_wide_buf_len; + + free(bad_wide_buf); + } else +#endif + /* If we have a tab, get its width in bytes using the current + * value of col. */ + if (wide_buf == '\t') + retval += start_col - old_col; + /* If we have a control character, add one byte to account for + * the "^" that will be displayed in front of it, and translate + * the byte to its visible equivalent as returned by + * control_rep(). */ + else if (is_cntrl_char(wide_buf)) { + char ctrl_wide_buf = control_rep((unsigned char)wide_buf); + + retval += parse_char(&ctrl_wide_buf, NULL, NULL +#ifdef NANO_WIDE + , NULL +#endif + ) + 1; + + /* If we have a normal character, add its width in bytes + * normally. */ + } else + retval += wide_buf_len; + buf += wide_buf_len; + } + + return retval; +} + /* Convert buf into a string that can be displayed on screen. The * caller wants to display buf starting with column start_col, and * extending for at most len columns. start_col is zero-based. len is * one-based, so len == 0 means you get "" returned. The returned - * string is dynamically allocated, and should be freed. */ -char *display_string(const char *buf, size_t start_col, size_t len) + * string is dynamically allocated, and should be freed. If dollars is + * TRUE, the caller might put "$" at the beginning or end of the line if + * it's too long. */ +char *display_string(const char *buf, size_t start_col, size_t len, bool + dollars) { size_t start_index; /* Index in buf of first character shown in return value. */ size_t column; /* Screen column start_index corresponds to. */ - size_t end_index; - /* Index in buf of last character shown in return value. */ size_t alloc_len; /* The length of memory allocated for converted. */ char *converted; @@ -1724,54 +1822,155 @@ char *display_string(const char *buf, size_t start_col, size_t len) size_t index; /* Current position in converted. */ + /* If dollars is TRUE, make room for the "$" at the end of the + * line. Also make sure that we don't try to display only part of a + * multicolumn character there. */ + if (dollars && len > 0 && strlenpt(buf) > start_col + len) + len--; + if (len == 0) return mallocstrcpy(NULL, ""); start_index = actual_x(buf, start_col); column = strnlenpt(buf, start_index); + assert(column <= start_col); - end_index = actual_x(buf, start_col + len - 1); - alloc_len = strnlenpt(buf, end_index + 1) - column; - if (len > alloc_len + column - start_col) - len = alloc_len + column - start_col; + + alloc_len = display_string_len(buf + start_index, start_col, + column + len) + 2; converted = charalloc(alloc_len + 1); - buf += start_index; index = 0; - for (; index < alloc_len; buf++) { - if (*buf == '\t') { + if (column > start_col || (dollars && column > 0 && + buf[start_index] != '\t')) { + int wide_buf, wide_buf_len; + + /* We don't display all of buf[start_index] since it starts to + * the left of the screen. */ + wide_buf_len = parse_char(buf + start_index, &wide_buf, NULL +#ifdef NANO_WIDE + , NULL +#endif + ); + + if (is_cntrl_char(wide_buf)) { + if (column > start_col) { + char *ctrl_wide_buf = charalloc(MB_CUR_MAX); + int ctrl_wide_buf_len, i; + + wide_buf = control_rep((unsigned char)wide_buf); + ctrl_wide_buf_len = wctomb(ctrl_wide_buf, + (wchar_t)wide_buf); + + for (i = 0; i < ctrl_wide_buf_len; i++) + converted[index++] = ctrl_wide_buf[i]; + + free(ctrl_wide_buf); + start_index += wide_buf_len; + } + } else if (wcwidth(wide_buf) > 1) { + /* If dollars is TRUE, make room for the "$" at the + * beginning of the line. Also make sure that we don't try + * to display only part of a multicolumn character there. */ + converted[0] = ' '; + index = 1; + if (dollars && column == start_col) { + converted[1] = ' '; + index = 2; + } + start_index += wide_buf_len; + } + } + + while (index < alloc_len && buf[start_index] != '\0') { + int wide_buf, wide_buf_len; + bool bad_char; + + wide_buf_len = parse_char(buf + start_index, &wide_buf, NULL +#ifdef NANO_WIDE + , &bad_char +#endif + ); + +#ifdef NANO_WIDE + if (!ISSET(NO_UTF8) && bad_char) { + char *bad_wide_buf = charalloc(MB_CUR_MAX); + int bad_wide_buf_len, i; + + if (is_cntrl_char(wide_buf)) { + converted[index++] = '^'; + start_col++; + wide_buf = control_rep((unsigned char)wide_buf); + } + + bad_wide_buf_len = wctomb(bad_wide_buf, (wchar_t)wide_buf); + + for (i = 0; i < bad_wide_buf_len; i++) + converted[index++] = bad_wide_buf[i]; + + free(bad_wide_buf); + + start_col += wcwidth((wchar_t)wide_buf); + } else +#endif + if (wide_buf == '\t') { converted[index++] = #if !defined(NANO_SMALL) && defined(ENABLE_NANORC) ISSET(WHITESPACE_DISPLAY) ? whitespace[0] : #endif ' '; - while ((column + index) % tabsize) + start_col++; + while ((column + index) % tabsize) { converted[index++] = ' '; - } else if (is_cntrl_char(*buf)) { + start_col++; + } + } else if (is_cntrl_char(wide_buf)) { + char *ctrl_wide_buf = charalloc(MB_CUR_MAX); + int ctrl_wide_buf_len, i; + converted[index++] = '^'; - if (*buf == '\n') - /* Treat newlines embedded in a line as encoded nulls; - * the line in question should be run through unsunder() - * before reaching here. */ - converted[index++] = '@'; - else if (*buf == NANO_CONTROL_8) - converted[index++] = '?'; - else - converted[index++] = *buf + 64; - } else if (*buf == ' ') + start_col++; + wide_buf = control_rep((unsigned char)wide_buf); + + ctrl_wide_buf_len = wctomb(ctrl_wide_buf, + (wchar_t)wide_buf); + + for (i = 0; i < ctrl_wide_buf_len; i++) + converted[index++] = ctrl_wide_buf[i]; + + free(ctrl_wide_buf); + + start_col += wcwidth((wchar_t)wide_buf); + } else if (wide_buf == ' ') { converted[index++] = #if !defined(NANO_SMALL) && defined(ENABLE_NANORC) ISSET(WHITESPACE_DISPLAY) ? whitespace[1] : #endif ' '; - else - converted[index++] = *buf; + start_col++; + } else { + int i; + + for (i = 0; i < wide_buf_len; i++) + converted[index++] = buf[start_index + i]; + +#ifdef NANO_WIDE + if (!ISSET(NO_UTF8)) + start_col += wcwidth((wchar_t)wide_buf); + else +#endif + start_col++; + } + + start_index += wide_buf_len; } - assert(len <= alloc_len + column - start_col); - charmove(converted, converted + start_col - column, len); - null_at(&converted, len); - return charealloc(converted, len + 1); + /* Make sure that converted is at most len columns wide. */ + converted[index] = '\0'; + index = actual_x(converted, len); + null_at(&converted, index); + + return converted; } /* Repaint the statusbar when getting a character in nanogetstr(). buf @@ -1796,10 +1995,12 @@ void nanoget_repaint(const char *buf, const char *inputbuf, size_t x) waddch(bottomwin, x_real < wid ? ' ' : '$'); if (COLS > 2) { size_t page_start = x_real - x_real % wid; - char *expanded = display_string(inputbuf, page_start, wid); + char *expanded = display_string(inputbuf, page_start, wid, + FALSE); assert(wid > 0); assert(strlen(expanded) <= wid); + waddstr(bottomwin, expanded); free(expanded); wmove(bottomwin, 0, COLS - wid + x_real - page_start); @@ -2249,21 +2450,19 @@ void titlebar(const char *path) { int space; /* The space we have available for display. */ - size_t verlen = strlen(VERMSG) + 1; - /* The length of the version message. */ + size_t verlen = strlenpt(VERMSG) + 1; + /* The length of the version message in columns. */ const char *prefix; /* "File:", "Dir:", or "New Buffer". Goes before filename. */ size_t prefixlen; - /* strlen(prefix) + 1. */ + /* The length of the prefix in columns, plus one. */ const char *state; /* "Modified", "View", or spaces the length of "Modified". * Tells the state of this buffer. */ size_t statelen = 0; - /* strlen(state) + 1. */ + /* The length of the state in columns, plus one. */ char *exppath = NULL; /* The file name, expanded for display. */ - size_t exppathlen = 0; - /* strlen(exppath) + 1. */ bool newfie = FALSE; /* Do we say "New Buffer"? */ bool dots = FALSE; @@ -2299,10 +2498,10 @@ void titlebar(const char *path) state = _("View"); else { if (space > 0) - statelen = strnlen(_("Modified"), space - 1) + 1; + statelen = strnlenpt(_("Modified"), space - 1) + 1; state = &hblank[COLS - statelen]; } - statelen = strnlen(state, COLS); + statelen = strnlenpt(state, COLS); /* We need a space before state. */ if ((ISSET(MODIFIED) || ISSET(VIEW_MODE)) && statelen < COLS) statelen++; @@ -2322,7 +2521,7 @@ void titlebar(const char *path) } else prefix = _("File:"); assert(statelen < space); - prefixlen = strnlen(prefix, space - statelen); + prefixlen = strnlenpt(prefix, space - statelen); /* If newfie is FALSE, we need a space after prefix. */ if (!newfie && prefixlen + statelen < space) prefixlen++; @@ -2337,36 +2536,40 @@ void titlebar(const char *path) if (!newfie) { size_t lenpt = strlenpt(path), start_col; - if (lenpt > space) - start_col = actual_x(path, lenpt - space); - else - start_col = 0; - exppath = display_string(path, start_col, space); dots = (lenpt > space); - exppathlen = strlen(exppath); + + if (dots) { + start_col = lenpt - space + 3; + space -= 3; + } else + start_col = 0; + + exppath = display_string(path, start_col, space, FALSE); } if (!dots) { + size_t exppathlen = newfie ? 0 : strlenpt(exppath); + /* The length of the expanded filename. */ + /* There is room for the whole filename, so we center it. */ waddnstr(topwin, hblank, (space - exppathlen) / 3); waddnstr(topwin, prefix, prefixlen); if (!newfie) { - assert(strlen(prefix) + 1 == prefixlen); + assert(strlenpt(prefix) + 1 == prefixlen); + waddch(topwin, ' '); waddstr(topwin, exppath); } } else { /* We will say something like "File: ...ename". */ waddnstr(topwin, prefix, prefixlen); - if (space == 0 || newfie) + if (space <= -3 || newfie) goto the_end; waddch(topwin, ' '); - waddnstr(topwin, "...", space); - if (space <= 3) + waddnstr(topwin, "...", space + 3); + if (space <= 0) goto the_end; - space -= 3; - assert(exppathlen == space + 3); - waddnstr(topwin, exppath + 3, space); + waddstr(topwin, exppath); } the_end: @@ -2414,17 +2617,17 @@ void statusbar(const char *msg, ...) blank_statusbar(); if (COLS >= 4) { - char *bar; - char *foo; + char *bar, *foo; size_t start_x = 0, foo_len; #if !defined(NANO_SMALL) && defined(ENABLE_NANORC) bool old_whitespace = ISSET(WHITESPACE_DISPLAY); + UNSET(WHITESPACE_DISPLAY); #endif bar = charalloc(COLS - 3); vsnprintf(bar, COLS - 3, msg, ap); va_end(ap); - foo = display_string(bar, 0, COLS - 4); + foo = display_string(bar, 0, COLS - 4, FALSE); #if !defined(NANO_SMALL) && defined(ENABLE_NANORC) if (old_whitespace) SET(WHITESPACE_DISPLAY); @@ -2923,7 +3126,7 @@ void update_line(const filestruct *fileptr, size_t index) /* Expand the line, replacing tabs with spaces, and control * characters with their displayed forms. */ - converted = display_string(fileptr->data, page_start, COLS); + converted = display_string(fileptr->data, page_start, COLS, TRUE); /* Paint the line. */ edit_add(fileptr, converted, line, page_start); @@ -3569,7 +3772,10 @@ void do_credits(void) "David Benbennick", "Ken Tyler", "Sven Guckes", - "Florian König", +#ifdef NANO_WIDE + !ISSET(NO_UTF8) ? "Florian K\xC3\xB6nig" : +#endif + "Florian König", "Pauli Virtanen", "Daniele Medri", "Clement Laforet", @@ -3644,7 +3850,7 @@ void do_credits(void) what = _(xlcredits[xlpos]); xlpos++; } - start_x = COLS / 2 - strlen(what) / 2 - 1; + start_x = COLS / 2 - strlenpt(what) / 2 - 1; mvwaddstr(edit, editwinrows - 1 - editwinrows % 2, start_x, what); } -- 2.39.5