From: David Lawrence Ramsey Date: Fri, 7 Jan 2005 22:39:43 +0000 (+0000) Subject: miscellaneous cleanups for the multibyte parsing functions X-Git-Tag: v1.3.6~160 X-Git-Url: https://git.wh0rd.org/?a=commitdiff_plain;h=d96851f53456bb22809f7be93860ca39d4315c54;p=nano.git miscellaneous cleanups for the multibyte parsing functions git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2242 35c25a1d-7b9e-4130-9fde-d3aeb78583b8 --- diff --git a/src/nano.c b/src/nano.c index b9ba1cd9..5027dd38 100644 --- a/src/nano.c +++ b/src/nano.c @@ -1178,12 +1178,11 @@ void do_delete(void) placewewant = xplustabs(); if (current->data[current_x] != '\0') { - int char_len = parse_char(current->data + current_x, NULL, - NULL + int char_len = parse_char(current->data + current_x, NULL #ifdef NANO_WIDE , NULL #endif - ); + , NULL); size_t line_len = strlen(current->data + current_x); assert(current_x < strlen(current->data)); @@ -2501,11 +2500,11 @@ bool breakable(const char *line, ssize_t goal) if (isblank(*line)) return TRUE; - line += parse_char(line, NULL, &pos + line += parse_char(line, NULL #ifdef NANO_WIDE , NULL #endif - ); + , &pos); goal -= pos; } @@ -2539,11 +2538,11 @@ ssize_t break_line(const char *line, ssize_t goal, bool force) assert(*line != '\t'); - line_len = parse_char(line, NULL, &pos + line_len = parse_char(line, NULL #ifdef NANO_WIDE , NULL #endif - ); + , &pos); goal -= pos; line += line_len; diff --git a/src/proto.h b/src/proto.h index 6801d0a6..7bcb6732 100644 --- a/src/proto.h +++ b/src/proto.h @@ -478,13 +478,13 @@ bool is_byte_char(int c); int num_of_digits(int n); unsigned char control_rep(unsigned char c); bool parse_num(const char *str, ssize_t *val); -int parse_char(const char *str, int *chr, size_t *col +int parse_char(const char *buf, int *chr #ifdef NANO_WIDE - , bool *bad_char + , bool *bad_chr #endif - ); -size_t move_left(const char *str, size_t pos); -size_t move_right(const char *str, size_t pos); + , size_t *col); +size_t move_left(const char *buf, size_t pos); +size_t move_right(const char *buf, size_t pos); void align(char **strp); void null_at(char **data, size_t index); void unsunder(char *str, size_t true_len); diff --git a/src/utils.c b/src/utils.c index 4b3c918b..dc95b95f 100644 --- a/src/utils.c +++ b/src/utils.c @@ -128,66 +128,66 @@ bool parse_num(const char *str, ssize_t *val) return TRUE; } -/* Parse a multi-byte character from str. Return the number of bytes - * used. If chr isn't NULL, store the wide character in it. If col - * isn't NULL, store the new display width in it. If *str is '\t', we - * expect col to have the current display width. If bad_char isn't - * NULL, set it to TRUE if we have a null byte or a bad multibyte - * character. */ -int parse_char(const char *str, int *chr, size_t *col +/* Parse a multibyte character from buf. Return the number of bytes + * used. If chr isn't NULL, store the wide character in it. If + * bad_chr isn't NULL, set it to TRUE if we have a null byte or a bad + * multibyte character. If col isn't NULL, store the new display width + * in it. If *str is '\t', we expect col to have the current display + * width. */ +int parse_char(const char *buf, int *chr #ifdef NANO_WIDE - , bool *bad_char + , bool *bad_chr #endif - ) + , size_t *col) { - int wide_str, wide_str_len; + int wide_buf, mb_buf_len; - assert(str != NULL); + assert(buf != NULL); #ifdef NANO_WIDE - if (bad_char != NULL) - *bad_char = FALSE; + if (bad_chr != NULL) + *bad_chr = FALSE; if (!ISSET(NO_UTF8)) { wchar_t tmp; /* Get the wide character equivalent of the multibyte * character. */ - wide_str_len = mbtowc(&tmp, str, MB_CUR_MAX); - wide_str = (int)tmp; - - /* If str contains a null byte or an invalid multibyte - * character, interpret str's first byte as a single-byte - * sequence and set bad_char to TRUE. */ - if (wide_str_len <= 0) { - wide_str_len = 1; - wide_str = (unsigned char)*str; - if (bad_char != NULL) - *bad_char = TRUE; + mb_buf_len = mbtowc(&tmp, buf, MB_CUR_MAX); + wide_buf = (int)tmp; + + /* If buf contains a null byte or an invalid multibyte + * character, interpret buf's first byte as a single-byte + * sequence and set bad_chr to TRUE. */ + if (mb_buf_len <= 0) { + mb_buf_len = 1; + wide_buf = (unsigned char)*buf; + if (bad_chr != NULL) + *bad_chr = TRUE; } /* Save the wide character in chr. */ if (chr != NULL) - *chr = wide_str; + *chr = wide_buf; /* Save the column width of the wide character in col. */ if (col != NULL) { /* If we have a tab, get its width in columns using the * current value of col. */ - if (wide_str == '\t') + if (wide_buf == '\t') *col += tabsize - *col % tabsize; /* If we have a control character, get its width using one * column for the "^" that will be displayed in front of it, * and the width in columns of its visible equivalent as * returned by control_rep(). */ - else if (is_cntrl_char(wide_str)) { - char *ctrl_wide_str = charalloc(MB_CUR_MAX); + else if (is_cntrl_char(wide_buf)) { + char *ctrl_mb_buf = charalloc(MB_CUR_MAX); (*col)++; - wide_str = control_rep((unsigned char)wide_str); + wide_buf = control_rep((unsigned char)wide_buf); - if (wctomb(ctrl_wide_str, (wchar_t)wide_str) != -1) { - int width = wcwidth(wide_str); + if (wctomb(ctrl_mb_buf, (wchar_t)wide_buf) != -1) { + int width = wcwidth((wchar_t)wide_buf); if (width != -1) *col += width; @@ -195,11 +195,11 @@ int parse_char(const char *str, int *chr, size_t *col else (*col)++; - free(ctrl_wide_str); + free(ctrl_mb_buf); /* If we have a normal character, get its width in columns * normally. */ } else { - int width = wcwidth(wide_str); + int width = wcwidth((wchar_t)wide_buf); if (width != -1) *col += width; @@ -207,25 +207,25 @@ int parse_char(const char *str, int *chr, size_t *col } } else { #endif - /* Interpret str's first character as a single-byte sequence. */ - wide_str_len = 1; - wide_str = (unsigned char)*str; + /* Interpret buf's first character as a single-byte sequence. */ + mb_buf_len = 1; + wide_buf = (unsigned char)*buf; /* Save the single-byte sequence in chr as though it's a wide * character. */ if (chr != NULL) - *chr = wide_str; + *chr = wide_buf; if (col != NULL) { /* If we have a tab, get its width in columns using the * current value of col. */ - if (wide_str == '\t') + if (wide_buf == '\t') *col += tabsize - *col % tabsize; /* If we have a control character, it's two columns wide: * one column for the "^" that will be displayed in front of * it, and one column for its visible equivalent as returned * by control_rep(). */ - else if (is_cntrl_char(wide_str)) + else if (is_cntrl_char(wide_buf)) *col += 2; /* If we have a normal character, it's one column wide. */ else @@ -235,44 +235,44 @@ int parse_char(const char *str, int *chr, size_t *col } #endif - return wide_str_len; + return mb_buf_len; } -/* Return the index in str of the beginning of the character before the +/* Return the index in buf of the beginning of the character before the * one at pos. */ -size_t move_left(const char *str, size_t pos) +size_t move_left(const char *buf, size_t pos) { size_t pos_prev = pos; - assert(str != NULL && pos <= strlen(str)); + assert(str != NULL && pos <= strlen(buf)); /* There is no library function to move backward one multibyte * character. Here is the naive, O(pos) way to do it. */ while (TRUE) { - int str_len = parse_char(str + pos - pos_prev, NULL, NULL + int mb_buf_len = parse_char(buf + pos - pos_prev, NULL #ifdef NANO_WIDE , NULL #endif - ); + , NULL); - if (pos_prev <= str_len) + if (pos_prev <= mb_buf_len) break; - pos_prev -= str_len; + pos_prev -= mb_buf_len; } return pos - pos_prev; } -/* Return the index in str of the beginning of the character after the +/* Return the index in buf of the beginning of the character after the * one at pos. */ -size_t move_right(const char *str, size_t pos) +size_t move_right(const char *buf, size_t pos) { - return pos + parse_char(str + pos, NULL, NULL + return pos + parse_char(buf + pos, NULL #ifdef NANO_WIDE , NULL #endif - ); + , NULL); } /* Fix the memory allocation for a string. */ diff --git a/src/winio.c b/src/winio.c index 36868c98..f4e55dbd 100644 --- a/src/winio.c +++ b/src/winio.c @@ -1936,11 +1936,11 @@ void do_statusbar_backspace(void) void do_statusbar_delete(void) { if (statusbar_x < statusbar_xend) { - int char_len = parse_char(answer + statusbar_x, NULL, NULL + int char_len = parse_char(answer + statusbar_x, NULL #ifdef NANO_WIDE , NULL #endif - ); + , NULL); charmove(answer + statusbar_x, answer + statusbar_x + char_len, statusbar_xend - statusbar_x - char_len + 1); @@ -2056,11 +2056,11 @@ size_t actual_x(const char *str, size_t xplus) assert(str != NULL); while (*str != '\0') { - int str_len = parse_char(str, NULL, &length + int str_len = parse_char(str, NULL #ifdef NANO_WIDE , NULL #endif - ); + , &length); if (length > xplus) break; @@ -2085,11 +2085,11 @@ size_t strnlenpt(const char *str, size_t size) assert(str != NULL); while (*str != '\0') { - int str_len = parse_char(str, NULL, &length + int str_len = parse_char(str, NULL #ifdef NANO_WIDE , NULL #endif - ); + , &length); str += str_len; @@ -2160,25 +2160,25 @@ size_t display_string_len(const char *buf, size_t start_col, size_t /* Throughout the loop, we maintain the fact that *buf displays at * column start_col. */ while (start_col <= end_col && *buf != '\0') { - int wide_buf, wide_buf_len; - size_t old_col = start_col; + int wide_buf, mb_buf_len; #ifdef NANO_WIDE bool bad_char; #endif + size_t old_col = start_col; - wide_buf_len = parse_char(buf, &wide_buf, &start_col + mb_buf_len = parse_char(buf, &wide_buf #ifdef NANO_WIDE , &bad_char #endif - ); + , &start_col); #ifdef NANO_WIDE /* If buf contains a null byte or an invalid multibyte * character, interpret that character as though it's a wide * character. */ if (!ISSET(NO_UTF8) && bad_char) { - char *bad_wide_buf = charalloc(MB_CUR_MAX); - int bad_wide_buf_len; + char *bad_mb_buf = charalloc(MB_CUR_MAX); + int bad_mb_buf_len; /* If we have a control character, add one byte to account * for the "^" that will be displayed in front of it, and @@ -2191,12 +2191,12 @@ size_t display_string_len(const char *buf, size_t start_col, size_t /* Translate the wide character to its multibyte * equivalent. */ - bad_wide_buf_len = wctomb(bad_wide_buf, (wchar_t)wide_buf); + bad_mb_buf_len = wctomb(bad_mb_buf, (wchar_t)wide_buf); - if (bad_wide_buf_len != -1) - retval += bad_wide_buf_len; + if (bad_mb_buf_len != -1) + retval += bad_mb_buf_len; - free(bad_wide_buf); + free(bad_mb_buf); } else { #endif /* If we have a tab, get its width in bytes using the @@ -2208,23 +2208,22 @@ size_t display_string_len(const char *buf, size_t start_col, size_t * then add the number of bytes for its visible equivalent * as returned by control_rep(). */ else if (is_cntrl_char(wide_buf)) { - char ctrl_wide_buf = - control_rep((unsigned char)wide_buf); + char ctrl_mb_buf = control_rep((unsigned char)wide_buf); retval++; - retval += parse_char(&ctrl_wide_buf, NULL, NULL + retval += parse_char(&ctrl_mb_buf, NULL #ifdef NANO_WIDE , NULL #endif - ); + , NULL); /* If we have a normal character, add its width in bytes * normally. */ } else - retval += wide_buf_len; + retval += mb_buf_len; #ifdef NANO_WIDE } - buf += wide_buf_len; + buf += mb_buf_len; #endif } @@ -2279,43 +2278,43 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool if (column < start_col || (dollars && column > 0 && buf[start_index] != '\t')) { - int wide_buf, wide_buf_len; + int wide_buf, mb_buf_len; /* We don't display all of buf[start_index] since it starts to * the left of the screen. */ - wide_buf_len = parse_char(buf + start_index, &wide_buf, NULL + mb_buf_len = parse_char(buf + start_index, &wide_buf #ifdef NANO_WIDE , NULL #endif - ); + , NULL); if (is_cntrl_char(wide_buf)) { if (column < start_col) { - char *ctrl_wide_buf = + char *ctrl_mb_buf = #ifdef NANO_WIDE !ISSET(NO_UTF8) ? charalloc(MB_CUR_MAX) : #endif charalloc(1); - int ctrl_wide_buf_len, i; + int ctrl_mb_buf_len, i; wide_buf = control_rep((unsigned char)wide_buf); #ifdef NANO_WIDE if (!ISSET(NO_UTF8)) - ctrl_wide_buf_len = wctomb(ctrl_wide_buf, + ctrl_mb_buf_len = wctomb(ctrl_mb_buf, (wchar_t)wide_buf); else { #endif - ctrl_wide_buf_len = 1; - ctrl_wide_buf[0] = (unsigned char)wide_buf; + ctrl_mb_buf_len = 1; + ctrl_mb_buf[0] = (unsigned char)wide_buf; #ifdef NANO_WIDE } #endif - for (i = 0; i < ctrl_wide_buf_len; i++) - converted[index++] = ctrl_wide_buf[i]; + for (i = 0; i < ctrl_mb_buf_len; i++) + converted[index++] = ctrl_mb_buf[i]; - free(ctrl_wide_buf); + free(ctrl_mb_buf); #ifdef NANO_WIDE if (!ISSET(NO_UTF8)) { @@ -2327,7 +2326,7 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool #endif start_col++; - start_index += wide_buf_len; + start_index += mb_buf_len; } } #ifdef NANO_WIDE @@ -2335,22 +2334,22 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool converted[index++] = ' '; start_col++; - start_index += wide_buf_len; + start_index += mb_buf_len; } #endif } while (index < alloc_len - 1 && buf[start_index] != '\0') { - int wide_buf, wide_buf_len; + int wide_buf, mb_buf_len; #ifdef NANO_WIDE bool bad_char; #endif - wide_buf_len = parse_char(buf + start_index, &wide_buf, NULL + mb_buf_len = parse_char(buf + start_index, &wide_buf #ifdef NANO_WIDE , &bad_char #endif - ); + , NULL); if (wide_buf == '\t') { converted[index++] = @@ -2367,12 +2366,12 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool * contains an invalid multibyte control character, interpret * that character as though it's a normal control character. */ } else if (is_cntrl_char(wide_buf)) { - char *ctrl_wide_buf = + char *ctrl_mb_buf = #ifdef NANO_WIDE !ISSET(NO_UTF8) ? charalloc(MB_CUR_MAX) : #endif charalloc(1); - int ctrl_wide_buf_len, i; + int ctrl_mb_buf_len, i; converted[index++] = '^'; start_col++; @@ -2380,20 +2379,20 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool #ifdef NANO_WIDE if (!ISSET(NO_UTF8)) - ctrl_wide_buf_len = wctomb(ctrl_wide_buf, + ctrl_mb_buf_len = wctomb(ctrl_mb_buf, (wchar_t)wide_buf); else { #endif - ctrl_wide_buf_len = 1; - ctrl_wide_buf[0] = (unsigned char)wide_buf; + ctrl_mb_buf_len = 1; + ctrl_mb_buf[0] = (unsigned char)wide_buf; #ifdef NANO_WIDE } #endif - for (i = 0; i < ctrl_wide_buf_len; i++) - converted[index++] = ctrl_wide_buf[i]; + for (i = 0; i < ctrl_mb_buf_len; i++) + converted[index++] = ctrl_mb_buf[i]; - free(ctrl_wide_buf); + free(ctrl_mb_buf); #ifdef NANO_WIDE if (!ISSET(NO_UTF8)) { @@ -2419,19 +2418,18 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool * character, interpret that character as though it's a * normal non-control character. */ if (!ISSET(NO_UTF8) && bad_char) { - char *bad_wide_buf = charalloc(MB_CUR_MAX); - int bad_wide_buf_len; + char *bad_mb_buf = charalloc(MB_CUR_MAX); + int bad_mb_buf_len; - bad_wide_buf_len = wctomb(bad_wide_buf, - (wchar_t)wide_buf); + bad_mb_buf_len = wctomb(bad_mb_buf, (wchar_t)wide_buf); - for (i = 0; i < bad_wide_buf_len; i++) - converted[index++] = bad_wide_buf[i]; + for (i = 0; i < bad_mb_buf_len; i++) + converted[index++] = bad_mb_buf[i]; - free(bad_wide_buf); + free(bad_mb_buf); } else { #endif - for (i = 0; i < wide_buf_len; i++) + for (i = 0; i < mb_buf_len; i++) converted[index++] = buf[start_index + i]; #ifdef NANO_WIDE } @@ -2446,7 +2444,7 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool start_col++; } - start_index += wide_buf_len; + start_index += mb_buf_len; } if (index < alloc_len - 1)