more steps toward UTF-8 support: port all the parts of DB's UTF-8 patch

author David Lawrence Ramsey <pooka109@gmail.com>

Thu, 23 Dec 2004 17:43:27 +0000 (17:43 +0000)

committer David Lawrence Ramsey <pooka109@gmail.com>

Thu, 23 Dec 2004 17:43:27 +0000 (17:43 +0000)
author David Lawrence Ramsey <pooka109@gmail.com>
Thu, 23 Dec 2004 17:43:27 +0000 (17:43 +0000)
committer David Lawrence Ramsey <pooka109@gmail.com>
Thu, 23 Dec 2004 17:43:27 +0000 (17:43 +0000)
diff --git a/ChangeLog b/ChangeLog

index 993ee1f3389adb8dfcf7223154e8e3cc62bdd402..7da7f079dff886a5a0b36865ba16ddfd9f315b17 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -50,6 +50,16 @@ CVS code -
           in order for output to work properly. (DLR; buffered
           input/output based on ideas from mutt 1.4.2.1; double-Escape
           input of Unicode characters suggested by Michael Piefel)
+       - More steps toward wide character/multibyte character support.
+         Movement and cursor display in the edit window should now work
+         properly with files containing multibyte characters, and text
+         display of such files should work properly some of the time.
+         New functions control_rep(), parse_char(), move_left(),
+         move_right(), and display_string_len(); changes to do_left(),
+         do_right(), do_delete(), breakable(), break_line(),
+         do_output(), get_buffer(), unget_input(), actual_x(),
+         strnlenpt(), display_string(), titlebar(), and do_credits().
+         (David Benbennick and DLR)
  - cut.c:
    do_cut_text()
         - If keep_cutbuffer is FALSE, only blow away the text in the
@@ -92,6 +102,10 @@ CVS code -
           loop if there are no more paragraphs after the current one and
           the paragraph search left us on the magicline, so as to avoid
           a segfault. (DLR)
+  main()
+       - Try to automatically detect whether UTF-8 support is needed by
+         setting the NO_UTF8 flag if setlocale() returns a string that
+         doesn't contain "UTF-8". (DLR)
  - winio.c:
    titlebar()
         - Rename some variables for consistency, make space an int
@@ -135,6 +149,8 @@ CVS code -
         - Remove specific references to control key shortcuts. (DLR)
         - Check for the wide version of ncurses, without which multibyte
           strings don't seem to be displayed properly. (DLR)
+       - Check for stddef.h and wchar.h, for those systems that need
+         one of the two for the wcwidth() prototype. (DLR)
  - doc/nanorc.sample:
         - Add return to the "c-file" regexes. (DLR)
  
diff --git a/configure.ac b/configure.ac

index c37f5ca9d7a3794c6d58306a532518cd562412aa..d9db28814895161d57a3015deaf564c0f8c578cc 100644 (file)
--- a/configure.ac
+++ b/configure.ac
@@ -40,7 +40,7 @@ AM_GNU_GETTEXT([external], [need-ngettext])
  
  dnl Checks for header files.
  AC_HEADER_STDC
-AC_CHECK_HEADERS(fcntl.h getopt.h libintl.h limits.h regex.h termio.h termios.h unistd.h)
+AC_CHECK_HEADERS(fcntl.h getopt.h libintl.h limits.h regex.h stddef.h termio.h termios.h unistd.h wchar.h)
  AC_CHECK_HEADER(regex.h,
      AC_MSG_CHECKING([for broken regexec])
      AC_TRY_RUN([
diff --git a/src/move.c b/src/move.c

index e3fa371733b678423e84de6011336ce30efb5043..148ce28dc50d0b368acfc09210b5fe0385af7dd1 100644 (file)
--- a/src/move.c
+++ b/src/move.c
@@ -252,7 +252,7 @@ void do_left(int allow_update)
  {
      size_t pww_save = placewewant;
      if (current_x > 0)
-       current_x--;
+       current_x = move_left(current->data, current_x);
      else if (current != fileage) {
         do_up();
         current_x = strlen(current->data);
@@ -274,7 +274,7 @@ void do_right(int allow_update)
      assert(current_x <= strlen(current->data));
  
      if (current->data[current_x] != '\0')
-       current_x++;
+       current_x = move_right(current->data, current_x);
      else if (current->next != NULL) {
         do_down();
         current_x = 0;
diff --git a/src/nano.c b/src/nano.c

index 8f885927f0fd2313590971c78e60ef55cfddf810..817774ea1890f5cb7491b3a9d153a0b4958a254b 100644 (file)
--- a/src/nano.c
+++ b/src/nano.c
@@ -1185,18 +1185,25 @@ void do_delete(void)
      placewewant = xplustabs();
  
      if (current->data[current_x] != '\0') {
-       size_t linelen = strlen(current->data + current_x);
+       int char_len = parse_char(current->data + current_x, NULL,
+               NULL
+#ifdef NANO_WIDE
+               , NULL
+#endif
+               );
+       size_t line_len = strlen(current->data + current_x);
  
         assert(current_x < strlen(current->data));
  
         /* Let's get dangerous. */
-       charmove(&current->data[current_x], &current->data[current_x + 1],
-               linelen);
+       charmove(&current->data[current_x],
+               &current->data[current_x + char_len],
+               line_len - char_len + 1);
  
-       null_at(&current->data, linelen + current_x - 1);
+       null_at(&current->data, current_x + line_len - char_len);
  #ifndef NANO_SMALL
         if (current_x < mark_beginx && mark_beginbuf == current)
-           mark_beginx--;
+           mark_beginx -= char_len;
  #endif
      } else if (current != filebot && (current->next != filebot ||
         current->data[0] == '\0')) {
@@ -1211,8 +1218,8 @@ void do_delete(void)
         if (current->data[current_x] == '\0')
             do_refresh = TRUE;
  
-       current->data = charealloc(current->data, current_x +
-               strlen(foo->data) + 1);
+       current->data = charealloc(current->data,
+               current_x + strlen(foo->data) + 1);
         strcpy(current->data + current_x, foo->data);
  #ifndef NANO_SMALL
         if (mark_beginbuf == current->next) {
@@ -1227,13 +1234,13 @@ void do_delete(void)
         delete_node(foo);
         renumber(current);
         totlines--;
+       totsize--;
  #ifndef DISABLE_WRAPPING
         wrap_reset();
  #endif
      } else
         return;
  
-    totsize--;
      set_modified();
  
  #ifdef ENABLE_COLOR
@@ -2494,15 +2501,21 @@ filestruct *backup_lines(filestruct *first_line, size_t par_len, size_t
  /* Is it possible to break line at or before goal? */
  bool breakable(const char *line, ssize_t goal)
  {
-    for (; *line != '\0' && goal >= 0; line++) {
+    while (*line != '\0' && goal >= 0) {
+       size_t pos = 0;
+
         if (isblank(*line))
             return TRUE;
  
-       if (is_cntrl_char(*line))
-           goal -= 2;
-       else
-           goal -= 1;
+       line += parse_char(line, NULL, &pos
+#ifdef NANO_WIDE
+               , NULL
+#endif
+               );
+
+       goal -= pos;
      }
+
      /* If goal is not negative, the whole line (one word) was short
       * enough. */
      return goal >= 0;
@@ -2522,32 +2535,49 @@ ssize_t break_line(const char *line, ssize_t goal, bool force)
         /* Current index in line. */
  
      assert(line != NULL);
-    for (; *line != '\0' && goal >= 0; line++, cur_loc++) {
+
+    while (*line != '\0' && goal >= 0) {
+       size_t pos = 0;
+       int line_len;
+
         if (*line == ' ')
             space_loc = cur_loc;
+
         assert(*line != '\t');
  
-       if (is_cntrl_char(*line))
-           goal -= 2;
-       else
-           goal--;
+       line_len = parse_char(line, NULL, &pos
+#ifdef NANO_WIDE
+               , NULL
+#endif
+               );
+
+       goal -= pos;
+       line += line_len;
+       cur_loc += line_len;
      }
+
      if (goal >= 0)
         /* In fact, the whole line displays shorter than goal. */
         return cur_loc;
+
      if (space_loc == -1) {
         /* No space found short enough. */
-       if (force)
-           for (; *line != '\0'; line++, cur_loc++)
-               if (*line == ' ' && *(line + 1) != ' ' && *(line + 1) != '\0')
+       if (force) {
+           for (; *line != '\0'; line++, cur_loc++) {
+               if (*line == ' ' && *(line + 1) != ' ' &&
+                       *(line + 1) != '\0')
                     return cur_loc;
-       return -1;
+           }
+           return -1;
+       }
      }
+
      /* Perhaps the character after space_loc is a space.  But because
       * of justify_format(), there can be only two adjacent. */
      if (*(line - cur_loc + space_loc + 1) == ' ' ||
         *(line - cur_loc + space_loc + 1) == '\0')
         space_loc++;
+
      return space_loc;
  }
  
@@ -3639,13 +3669,7 @@ void do_output(int *kbinput, size_t kbinput_len)
             mark_beginx += key_len;
  #endif
  
-       {
-           /* FIXME: The movement functions should take multibyte
-            * characters into account. */
-           int j;
-           for (j = 0; j < key_len; j++)
-               do_right(FALSE);
-       }
+       do_right(FALSE);
  
  #ifndef DISABLE_WRAPPING
         /* If we're wrapping text, we need to call edit_refresh(). */
@@ -3759,7 +3783,21 @@ int main(int argc, char **argv)
      };
  #endif
  
+#ifdef NANO_WIDE
+    {
+       /* If the locale set doesn't exist, or it exists but doesn't
+        * include the string "UTF-8", we shouldn't use UTF-8
+        * support. */
+       char *locale = setlocale(LC_ALL, "");
+
+       if (locale == NULL || (locale != NULL &&
+               strstr(locale, "UTF-8") == NULL))
+           SET(NO_UTF8);
+    }
+#else
      setlocale(LC_ALL, "");
+#endif
+
  #ifdef ENABLE_NLS
      bindtextdomain(PACKAGE, LOCALEDIR);
      textdomain(PACKAGE);
diff --git a/src/nano.h b/src/nano.h

index dcc31411d9af73285ab990a2e826f2045f04eb7f..482dd703c9d7de8c028ecc41cdb3ca2ad75bb5af 100644 (file)
--- a/src/nano.h
+++ b/src/nano.h
@@ -83,6 +83,7 @@
  #define N_(string) gettext_noop(string)
         /* Mark a string that will be sent to gettext later. */
  
+#include <stddef.h>
  #include <sys/types.h>
  #include <sys/stat.h>
  #include "config.h"
diff --git a/src/proto.h b/src/proto.h

index e47099154fc4a5b3c24bc5c2e04cf0f85166260f..3ef3aee24c0bbe9214e99dc3d7014a1e7b721795 100644 (file)
--- a/src/proto.h
+++ b/src/proto.h
@@ -475,7 +475,15 @@ int is_blank_char(int c);
  int is_cntrl_char(int c);
  bool is_byte_char(int c);
  int num_of_digits(int n);
+unsigned char control_rep(unsigned char c);
  bool parse_num(const char *str, ssize_t *val);
+int parse_char(const char *str, int *chr, size_t *col
+#ifdef NANO_WIDE
+       , bool *bad_char
+#endif
+       );
+size_t move_left(const char *str, size_t pos);
+size_t move_right(const char *str, size_t pos);
  void align(char **strp);
  void null_at(char **data, size_t index);
  void unsunder(char *str, size_t true_len);
@@ -570,7 +578,10 @@ void blank_edit(void);
  void blank_statusbar(void);
  void check_statusblank(void);
  void blank_bottombars(void);
-char *display_string(const char *buf, size_t start_col, size_t len);
+size_t display_string_len(const char *buf, size_t start_col, size_t
+       end_col);
+char *display_string(const char *buf, size_t start_col, size_t len, bool
+       dollars);
  void nanoget_repaint(const char *buf, const char *inputbuf, size_t x);
  int nanogetstr(bool allow_tabs, const char *buf, const char *def,
  #ifndef NANO_SMALL
diff --git a/src/search.c b/src/search.c

index 29ad8127c33a4d364cabfcf08b76fdb4eba49329..82111fdd90b6e91ad10051585d14d113e3b5e028 100644 (file)
--- a/src/search.c
+++ b/src/search.c
@@ -83,7 +83,7 @@ void not_found_msg(const char *str)
   
      assert(str != NULL);
  
-    disp = display_string(str, 0, (COLS / 2) + 1);
+    disp = display_string(str, 0, (COLS / 2) + 1, FALSE);
      numchars = strnlen(disp, COLS / 2);
  
      statusbar(_("\"%.*s%s\" not found"), numchars, disp,
@@ -150,7 +150,7 @@ int search_init(bool replacing, bool use_answer)
  #endif
  
      if (last_search[0] != '\0') {
-       char *disp = display_string(last_search, 0, COLS / 3);
+       char *disp = display_string(last_search, 0, COLS / 3, FALSE);
  
         buf = charalloc(COLS / 3 + 7);
         /* We use COLS / 3 here because we need to see more on the
@@ -748,7 +748,8 @@ ssize_t do_replace_loop(const char *needle, const filestruct
             size_t xpt = xplustabs();
  
             exp_word = display_string(current->data, xpt,
-               strnlenpt(current->data, match_len + current_x) - xpt);
+               strnlenpt(current->data, match_len + current_x) - xpt,
+               FALSE);
  
             curs_set(0);
             do_replace_highlight(TRUE, exp_word);
diff --git a/src/utils.c b/src/utils.c

index 950cb9e036542fcf38a85d0a5ccd5ec2c76585cb..75c8765a82a7067f920ddec2d42c8d38077420de 100644 (file)
--- a/src/utils.c
+++ b/src/utils.c
@@ -33,6 +33,10 @@
  #include "proto.h"
  #include "nano.h"
  
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif
+
  #ifdef HAVE_REGEX_H
  #ifdef BROKEN_REGEXEC
  #undef regexec
@@ -92,6 +96,19 @@ int num_of_digits(int n)
      return i;
  }
  
+/* c is a control character.  It displays as ^@, ^?, or ^[ch] where ch
+ * is c + 64.  We return that character. */
+unsigned char control_rep(unsigned char c)
+{
+    /* Treat newlines embedded in a line as encoded nulls. */
+    if (c == '\n')
+       return '@';
+    else if (c == NANO_CONTROL_8)
+       return '?';
+    else
+       return c + 64;
+}
+
  /* Read a ssize_t from str, and store it in *val (if val is not NULL).
   * On error, we return FALSE and don't change *val.  Otherwise, we
   * return TRUE. */
@@ -113,6 +130,143 @@ bool parse_num(const char *str, ssize_t *val)
      return TRUE;
  }
  
+/* Parse a multi-byte character from str.  Return the number of bytes
+ * used.  If chr isn't NULL, store the wide character in it.  If col
+ * isn't NULL, store the new display width in it.  If *str is '\t', we
+ * expect col to have the current display width.  If bad_char isn't
+ * NULL, set it to TRUE if we have a null byte or a bad multibyte
+ * character. */
+int parse_char(const char *str, int *chr, size_t *col
+#ifdef NANO_WIDE
+       , bool *bad_char
+#endif
+       )
+{
+    int wide_str, wide_str_len;
+
+    assert(str != NULL);
+
+#ifdef NANO_WIDE
+    if (bad_char != NULL)
+       *bad_char = FALSE;
+
+    if (!ISSET(NO_UTF8)) {
+       wchar_t tmp;
+
+       /* Get the wide character equivalent of the multibyte
+        * character. */
+       wide_str_len = mbtowc(&tmp, str, MB_CUR_MAX);
+       wide_str = (int)tmp;
+
+       /* If str contains a null byte or an invalid multibyte
+        * character, interpret str's first byte as a single-byte
+        * sequence and set bad_char to TRUE. */
+       if (wide_str_len <= 0) {
+           wide_str_len = 1;
+           wide_str = (unsigned char)*str;
+           if (bad_char != NULL)
+               *bad_char = TRUE;
+       }
+
+       /* Save the wide character in chr. */
+       if (chr != NULL)
+           *chr = wide_str;
+
+       /* Save the column width of the wide character in col. */
+       if (col != NULL) {
+           /* If we have a tab, get its width in columns using the
+            * current value of col. */
+           if (wide_str == '\t')
+               *col += tabsize - *col % tabsize;
+           /* If we have a control character, get its width using one
+            * column for the "^" that will be displayed in front of it,
+            * and the width in columns of its visible equivalent as
+            * returned by control_rep(). */
+           else if (is_cntrl_char(wide_str)) {
+               char *ctrl_wide_str = charalloc(MB_CUR_MAX);
+
+               (*col)++;
+               wide_str = control_rep((unsigned char)wide_str);
+
+               if (wctomb(ctrl_wide_str, (wchar_t)wide_str) != -1)
+                   *col += wcwidth(wide_str);
+
+               free(ctrl_wide_str);
+           /* If we have a normal character, get its width in columns
+            * normally. */
+           } else
+               *col += wcwidth(wide_str);
+       }
+    } else {
+#endif
+       /* Interpret str's first character as a single-byte sequence. */
+       wide_str_len = 1;
+       wide_str = (unsigned char)*str;
+
+       /* Save the single-byte sequence in chr as though it's a wide
+        * character. */
+       if (chr != NULL)
+           *chr = wide_str;
+
+       if (col != NULL) {
+           /* If we have a tab, get its width in columns using the
+            * current value of col. */
+           if (wide_str == '\t')
+               *col += tabsize - *col % tabsize;
+           /* If we have a control character, it's two columns wide:
+            * one column for the "^" that will be displayed in front of
+            * it, and one column for its visible equivalent as returned
+            * by control_rep(). */
+           else if (is_cntrl_char(wide_str))
+               *col += 2;
+           /* If we have a normal character, it's one column wide. */
+           else
+               (*col)++;
+       }
+#ifdef NANO_WIDE
+    }
+#endif
+
+    return wide_str_len;
+}
+
+/* Return the index in str of the beginning of the character before the
+ * one at pos. */
+size_t move_left(const char *str, size_t pos)
+{
+    size_t pos_prev = pos;
+
+    assert(str != NULL && pos <= strlen(str));
+
+    /* There is no library function to move backward one multibyte
+     * character.  Here is the naive, O(pos) way to do it. */
+    while (TRUE) {
+       int str_len = parse_char(str + pos - pos_prev, NULL, NULL
+#ifdef NANO_WIDE
+               , NULL
+#endif
+               );
+
+       if (pos_prev <= str_len)
+           break;
+
+       pos_prev -= str_len;
+    }
+
+    return pos - pos_prev;
+}
+
+/* Return the index in str of the beginning of the character after the
+ * one at pos. */
+size_t move_right(const char *str, size_t pos)
+{
+    return pos + parse_char(str + pos, NULL, NULL
+#ifdef NANO_WIDE
+       , NULL
+#endif
+       );
+}
+
  /* Fix the memory allocation for a string. */
  void align(char **strp)
  {
diff --git a/src/winio.c b/src/winio.c

index 427ff3187784a9257d96dd9329e1f74e958d0121..ee013fdca79f035252f9b64cf6804d357f0213b2 100644 (file)
--- a/src/winio.c
+++ b/src/winio.c
@@ -32,6 +32,10 @@
  #include "proto.h"
  #include "nano.h"
  
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif
+
  static buffer *key_buffer = NULL;
                                 /* The default keystroke buffer,
                                  * containing all the keystrokes we have
@@ -1625,38 +1629,50 @@ size_t actual_x(const char *str, size_t xplus)
  
      assert(str != NULL);
  
-    for (; length < xplus && *str != '\0'; i++, str++) {
-       if (*str == '\t')
-           length += tabsize - (length % tabsize);
-       else if (is_cntrl_char(*str))
-           length += 2;
-       else
-           length++;
-    }
-    assert(length == strnlenpt(str - i, i));
-    assert(i <= strlen(str - i));
+    while (*str != '\0') {
+       int str_len = parse_char(str, NULL, &length
+#ifdef NANO_WIDE
+               , NULL
+#endif
+               );
  
-    if (length > xplus)
-       i--;
+       if (length > xplus)
+           break;
+
+       i += str_len;
+       str += str_len;
+    }
  
      return i;
  }
  
  /* A strlen() with tabs factored in, similar to xplustabs().  How many
- * columns wide are the first size characters of buf? */
-size_t strnlenpt(const char *buf, size_t size)
+ * columns wide are the first size characters of str? */
+size_t strnlenpt(const char *str, size_t size)
  {
      size_t length = 0;
+       /* The screen display width to str[i]. */
  
-    assert(buf != NULL);
-    for (; *buf != '\0' && size != 0; size--, buf++) {
-       if (*buf == '\t')
-           length += tabsize - (length % tabsize);
-       else if (is_cntrl_char(*buf))
-           length += 2;
-       else
-           length++;
+    if (size == 0)
+       return 0;
+
+    assert(str != NULL);
+
+    while (*str != '\0') {
+       int str_len = parse_char(str, NULL, &length
+#ifdef NANO_WIDE
+               , NULL
+#endif
+               );
+
+       str += str_len;
+
+       if (size <= str_len)
+           break;
+
+       size -= str_len;
      }
+
      return length;
  }
  
@@ -1704,19 +1720,101 @@ void blank_bottombars(void)
      }
  }
  
+/* buf is a multibyte string to be displayed.  We need to expand tabs
+ * and control characters.  How many bytes do we need to display buf
+ * properly, not counting the null terminator?  start_col is the column
+ * of *buf (usually 0).  We display to (end_col - 1). */
+size_t display_string_len(const char *buf, size_t start_col, size_t
+       end_col)
+{
+    size_t retval = 0;
+
+    assert(buf != NULL);
+
+    /* Throughout the loop, we maintain the fact that *buf displays at
+     * column start_col. */
+    while (start_col <= end_col && *buf != '\0') {
+       int wide_buf;
+           /* The current wide character. */
+       int wide_buf_len;
+           /* How many bytes wide is this character? */
+       size_t old_col = start_col;
+       bool bad_char;
+
+       wide_buf_len = parse_char(buf, &wide_buf, &start_col
+#ifdef NANO_WIDE
+               , &bad_char
+#endif
+               );
+
+#ifdef NANO_WIDE
+       /* If buf contains a null byte or an invalid multibyte
+        * character, interpret its first byte as though it's a wide
+        * character. */
+       if (!ISSET(NO_UTF8) && bad_char) {
+           char *bad_wide_buf = charalloc(MB_CUR_MAX);
+           int bad_wide_buf_len;
+
+           /* If we have a control character, add one byte to account
+            * for the "^" that will be displayed in front of it, and
+            * translate the character to its visible equivalent as
+            * returned by control_rep(). */
+           if (is_cntrl_char(wide_buf)) {
+               retval++;
+               wide_buf = control_rep((unsigned char)wide_buf);
+           }
+
+           /* Translate the wide character to its multibyte
+            * equivalent. */
+           bad_wide_buf_len = wctomb(bad_wide_buf, (wchar_t)wide_buf);
+
+           if (bad_wide_buf_len != -1)
+               retval += bad_wide_buf_len;
+
+           free(bad_wide_buf);
+       } else
+#endif
+       /* If we have a tab, get its width in bytes using the current
+        * value of col. */
+       if (wide_buf == '\t')
+           retval += start_col - old_col;
+       /* If we have a control character, add one byte to account for
+        * the "^" that will be displayed in front of it, and translate
+        * the byte to its visible equivalent as returned by
+        * control_rep(). */
+       else if (is_cntrl_char(wide_buf)) {
+           char ctrl_wide_buf = control_rep((unsigned char)wide_buf);
+
+           retval += parse_char(&ctrl_wide_buf, NULL, NULL
+#ifdef NANO_WIDE
+               , NULL
+#endif
+               ) + 1;
+
+       /* If we have a normal character, add its width in bytes
+        * normally. */
+       } else
+           retval += wide_buf_len;
+       buf += wide_buf_len;
+    }
+
+    return retval;
+}
+
  /* Convert buf into a string that can be displayed on screen.  The
   * caller wants to display buf starting with column start_col, and
   * extending for at most len columns.  start_col is zero-based.  len is
   * one-based, so len == 0 means you get "" returned.  The returned
- * string is dynamically allocated, and should be freed. */
-char *display_string(const char *buf, size_t start_col, size_t len)
+ * string is dynamically allocated, and should be freed.  If dollars is
+ * TRUE, the caller might put "$" at the beginning or end of the line if
+ * it's too long. */
+char *display_string(const char *buf, size_t start_col, size_t len, bool
+       dollars)
  {
      size_t start_index;
         /* Index in buf of first character shown in return value. */
      size_t column;
         /* Screen column start_index corresponds to. */
-    size_t end_index;
-       /* Index in buf of last character shown in return value. */
      size_t alloc_len;
         /* The length of memory allocated for converted. */
      char *converted;
@@ -1724,54 +1822,155 @@ char *display_string(const char *buf, size_t start_col, size_t len)
      size_t index;
         /* Current position in converted. */
  
+    /* If dollars is TRUE, make room for the "$" at the end of the
+     * line.  Also make sure that we don't try to display only part of a
+     * multicolumn character there. */
+    if (dollars && len > 0 && strlenpt(buf) > start_col + len)
+       len--;
+
      if (len == 0)
         return mallocstrcpy(NULL, "");
  
      start_index = actual_x(buf, start_col);
      column = strnlenpt(buf, start_index);
+
      assert(column <= start_col);
-    end_index = actual_x(buf, start_col + len - 1);
-    alloc_len = strnlenpt(buf, end_index + 1) - column;
-    if (len > alloc_len + column - start_col)
-       len = alloc_len + column - start_col;
+
+    alloc_len = display_string_len(buf + start_index, start_col,
+       column + len) + 2;
      converted = charalloc(alloc_len + 1);
-    buf += start_index;
      index = 0;
  
-    for (; index < alloc_len; buf++) {
-       if (*buf == '\t') {
+    if (column > start_col || (dollars && column > 0 &&
+               buf[start_index] != '\t')) {
+       int wide_buf, wide_buf_len;
+
+       /* We don't display all of buf[start_index] since it starts to
+        * the left of the screen. */
+       wide_buf_len = parse_char(buf + start_index, &wide_buf, NULL
+#ifdef NANO_WIDE
+               , NULL
+#endif
+               );
+
+       if (is_cntrl_char(wide_buf)) {
+           if (column > start_col) {
+               char *ctrl_wide_buf = charalloc(MB_CUR_MAX);
+               int ctrl_wide_buf_len, i;
+
+               wide_buf = control_rep((unsigned char)wide_buf);
+               ctrl_wide_buf_len = wctomb(ctrl_wide_buf,
+                       (wchar_t)wide_buf);
+
+               for (i = 0; i < ctrl_wide_buf_len; i++)
+                   converted[index++] = ctrl_wide_buf[i];
+
+               free(ctrl_wide_buf);
+               start_index += wide_buf_len;
+           }
+       } else if (wcwidth(wide_buf) > 1) {
+           /* If dollars is TRUE, make room for the "$" at the
+            * beginning of the line.  Also make sure that we don't try
+            * to display only part of a multicolumn character there. */
+           converted[0] = ' ';
+           index = 1;
+           if (dollars && column == start_col) {
+               converted[1] = ' ';
+               index = 2;
+           }
+           start_index += wide_buf_len;
+       }
+    }
+
+    while (index < alloc_len && buf[start_index] != '\0') {
+       int wide_buf, wide_buf_len;
+       bool bad_char;
+
+       wide_buf_len = parse_char(buf + start_index, &wide_buf, NULL
+#ifdef NANO_WIDE
+               , &bad_char
+#endif
+               );
+
+#ifdef NANO_WIDE
+       if (!ISSET(NO_UTF8) && bad_char) {
+           char *bad_wide_buf = charalloc(MB_CUR_MAX);
+           int bad_wide_buf_len, i;
+
+           if (is_cntrl_char(wide_buf)) {
+               converted[index++] = '^';
+               start_col++;
+               wide_buf = control_rep((unsigned char)wide_buf);
+           }
+
+           bad_wide_buf_len = wctomb(bad_wide_buf, (wchar_t)wide_buf);
+
+           for (i = 0; i < bad_wide_buf_len; i++)
+               converted[index++] = bad_wide_buf[i];
+
+           free(bad_wide_buf);
+
+           start_col += wcwidth((wchar_t)wide_buf);
+       } else
+#endif
+       if (wide_buf == '\t') {
             converted[index++] =
  #if !defined(NANO_SMALL) && defined(ENABLE_NANORC)
                 ISSET(WHITESPACE_DISPLAY) ? whitespace[0] :
  #endif
                 ' '; 
-           while ((column + index) % tabsize)
+           start_col++;
+           while ((column + index) % tabsize) {
                 converted[index++] = ' ';
-       } else if (is_cntrl_char(*buf)) {
+               start_col++;
+           }
+       } else if (is_cntrl_char(wide_buf)) {
+           char *ctrl_wide_buf = charalloc(MB_CUR_MAX);
+           int ctrl_wide_buf_len, i;
+
             converted[index++] = '^';
-           if (*buf == '\n')
-               /* Treat newlines embedded in a line as encoded nulls;
-                * the line in question should be run through unsunder()
-                * before reaching here. */
-               converted[index++] = '@';
-           else if (*buf == NANO_CONTROL_8)
-               converted[index++] = '?';
-           else
-               converted[index++] = *buf + 64;
-       } else if (*buf == ' ')
+           start_col++;
+           wide_buf = control_rep((unsigned char)wide_buf);
+
+           ctrl_wide_buf_len = wctomb(ctrl_wide_buf,
+               (wchar_t)wide_buf);
+
+           for (i = 0; i < ctrl_wide_buf_len; i++)
+               converted[index++] = ctrl_wide_buf[i];
+
+           free(ctrl_wide_buf);
+
+           start_col += wcwidth((wchar_t)wide_buf);
+       } else if (wide_buf == ' ') {
             converted[index++] =
  #if !defined(NANO_SMALL) && defined(ENABLE_NANORC)
                 ISSET(WHITESPACE_DISPLAY) ? whitespace[1] :
  #endif
                 ' ';
-       else
-           converted[index++] = *buf;
+           start_col++;
+       } else {
+           int i;
+
+           for (i = 0; i < wide_buf_len; i++)
+               converted[index++] = buf[start_index + i];
+
+#ifdef NANO_WIDE
+           if (!ISSET(NO_UTF8))
+               start_col += wcwidth((wchar_t)wide_buf);
+           else
+#endif
+               start_col++;
+       }
+
+       start_index += wide_buf_len;
      }
-    assert(len <= alloc_len + column - start_col);
-    charmove(converted, converted + start_col - column, len);
-    null_at(&converted, len);
  
-    return charealloc(converted, len + 1);
+    /* Make sure that converted is at most len columns wide. */
+    converted[index] = '\0';
+    index = actual_x(converted, len);
+    null_at(&converted, index);
+
+    return converted;
  }
  
  /* Repaint the statusbar when getting a character in nanogetstr().  buf
@@ -1796,10 +1995,12 @@ void nanoget_repaint(const char *buf, const char *inputbuf, size_t x)
         waddch(bottomwin, x_real < wid ? ' ' : '$');
      if (COLS > 2) {
         size_t page_start = x_real - x_real % wid;
-       char *expanded = display_string(inputbuf, page_start, wid);
+       char *expanded = display_string(inputbuf, page_start, wid,
+               FALSE);
  
         assert(wid > 0);
         assert(strlen(expanded) <= wid);
+
         waddstr(bottomwin, expanded);
         free(expanded);
         wmove(bottomwin, 0, COLS - wid + x_real - page_start);
@@ -2249,21 +2450,19 @@ void titlebar(const char *path)
  {
      int space;
         /* The space we have available for display. */
-    size_t verlen = strlen(VERMSG) + 1;
-       /* The length of the version message. */
+    size_t verlen = strlenpt(VERMSG) + 1;
+       /* The length of the version message in columns. */
      const char *prefix;
         /* "File:", "Dir:", or "New Buffer".  Goes before filename. */
      size_t prefixlen;
-       /* strlen(prefix) + 1. */
+       /* The length of the prefix in columns, plus one. */
      const char *state;
         /* "Modified", "View", or spaces the length of "Modified".
          * Tells the state of this buffer. */
      size_t statelen = 0;
-       /* strlen(state) + 1. */
+       /* The length of the state in columns, plus one. */
      char *exppath = NULL;
         /* The file name, expanded for display. */
-    size_t exppathlen = 0;
-       /* strlen(exppath) + 1. */
      bool newfie = FALSE;
         /* Do we say "New Buffer"? */
      bool dots = FALSE;
@@ -2299,10 +2498,10 @@ void titlebar(const char *path)
         state = _("View");
      else {
         if (space > 0)
-           statelen = strnlen(_("Modified"), space - 1) + 1;
+           statelen = strnlenpt(_("Modified"), space - 1) + 1;
         state = &hblank[COLS - statelen];
      }
-    statelen = strnlen(state, COLS);
+    statelen = strnlenpt(state, COLS);
      /* We need a space before state. */
      if ((ISSET(MODIFIED) || ISSET(VIEW_MODE)) && statelen < COLS)
         statelen++;
@@ -2322,7 +2521,7 @@ void titlebar(const char *path)
      } else
         prefix = _("File:");
      assert(statelen < space);
-    prefixlen = strnlen(prefix, space - statelen);
+    prefixlen = strnlenpt(prefix, space - statelen);
      /* If newfie is FALSE, we need a space after prefix. */
      if (!newfie && prefixlen + statelen < space)
         prefixlen++;
@@ -2337,36 +2536,40 @@ void titlebar(const char *path)
      if (!newfie) {
         size_t lenpt = strlenpt(path), start_col;
  
-       if (lenpt > space)
-           start_col = actual_x(path, lenpt - space);
-       else
-           start_col = 0;
-       exppath = display_string(path, start_col, space);
         dots = (lenpt > space);
-       exppathlen = strlen(exppath);
+
+       if (dots) {
+           start_col = lenpt - space + 3;
+           space -= 3;
+       } else
+           start_col = 0;
+
+       exppath = display_string(path, start_col, space, FALSE);
      }
  
      if (!dots) {
+       size_t exppathlen = newfie ? 0 : strlenpt(exppath);
+           /* The length of the expanded filename. */
+
         /* There is room for the whole filename, so we center it. */
         waddnstr(topwin, hblank, (space - exppathlen) / 3);
         waddnstr(topwin, prefix, prefixlen);
         if (!newfie) {
-           assert(strlen(prefix) + 1 == prefixlen);
+           assert(strlenpt(prefix) + 1 == prefixlen);
+
             waddch(topwin, ' ');
             waddstr(topwin, exppath);
         }
      } else {
         /* We will say something like "File: ...ename". */
         waddnstr(topwin, prefix, prefixlen);
-       if (space == 0 || newfie)
+       if (space <= -3 || newfie)
             goto the_end;
         waddch(topwin, ' ');
-       waddnstr(topwin, "...", space);
-       if (space <= 3)
+       waddnstr(topwin, "...", space + 3);
+       if (space <= 0)
             goto the_end;
-       space -= 3;
-       assert(exppathlen == space + 3);
-       waddnstr(topwin, exppath + 3, space);
+       waddstr(topwin, exppath);
      }
  
    the_end:
@@ -2414,17 +2617,17 @@ void statusbar(const char *msg, ...)
      blank_statusbar();
  
      if (COLS >= 4) {
-       char *bar;
-       char *foo;
+       char *bar, *foo;
         size_t start_x = 0, foo_len;
  #if !defined(NANO_SMALL) && defined(ENABLE_NANORC)
         bool old_whitespace = ISSET(WHITESPACE_DISPLAY);
+
         UNSET(WHITESPACE_DISPLAY);
  #endif
         bar = charalloc(COLS - 3);
         vsnprintf(bar, COLS - 3, msg, ap);
         va_end(ap);
-       foo = display_string(bar, 0, COLS - 4);
+       foo = display_string(bar, 0, COLS - 4, FALSE);
  #if !defined(NANO_SMALL) && defined(ENABLE_NANORC)
         if (old_whitespace)
             SET(WHITESPACE_DISPLAY);
@@ -2923,7 +3126,7 @@ void update_line(const filestruct *fileptr, size_t index)
  
      /* Expand the line, replacing tabs with spaces, and control
       * characters with their displayed forms. */
-    converted = display_string(fileptr->data, page_start, COLS);
+    converted = display_string(fileptr->data, page_start, COLS, TRUE);
  
      /* Paint the line. */
      edit_add(fileptr, converted, line, page_start);
@@ -3569,7 +3772,10 @@ void do_credits(void)
         "David Benbennick",
         "Ken Tyler",
         "Sven Guckes",
-       "Florian König",
+#ifdef NANO_WIDE
+       !ISSET(NO_UTF8) ? "Florian K\xC3\xB6nig" :
+#endif
+               "Florian König",
         "Pauli Virtanen",
         "Daniele Medri",
         "Clement Laforet",
@@ -3644,7 +3850,7 @@ void do_credits(void)
                 what = _(xlcredits[xlpos]);
                 xlpos++;
             }
-           start_x = COLS / 2 - strlen(what) / 2 - 1;
+           start_x = COLS / 2 - strlenpt(what) / 2 - 1;
             mvwaddstr(edit, editwinrows - 1 - editwinrows % 2, start_x,
                 what);
         }
author	David Lawrence Ramsey <pooka109@gmail.com>
	Thu, 23 Dec 2004 17:43:27 +0000 (17:43 +0000)
committer	David Lawrence Ramsey <pooka109@gmail.com>
	Thu, 23 Dec 2004 17:43:27 +0000 (17:43 +0000)
ChangeLog		patch \| blob \| history
configure.ac		patch \| blob \| history
src/move.c		patch \| blob \| history
src/nano.c		patch \| blob \| history
src/nano.h		patch \| blob \| history
src/proto.h		patch \| blob \| history
src/search.c		patch \| blob \| history
src/utils.c		patch \| blob \| history
src/winio.c		patch \| blob \| history