From: David Lawrence Ramsey Date: Tue, 15 Mar 2005 05:44:03 +0000 (+0000) Subject: make the rest of the justify code support multibyte characters X-Git-Tag: v1.3.6~32 X-Git-Url: https://git.wh0rd.org/?a=commitdiff_plain;h=38156d4491cd2134874e6cdc8eed10e4bb584a35;p=nano.git make the rest of the justify code support multibyte characters git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2371 35c25a1d-7b9e-4130-9fde-d3aeb78583b8 --- diff --git a/ChangeLog b/ChangeLog index 62054409..109edc6f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -175,12 +175,12 @@ CVS code - paragraph-searching utility functions when possible instead of duplicating code. Also overhaul the justify code to make it leave the right number of spaces at the ends of the lines of a - paragraph, to make it (partially) support multibyte - characters, and to make it simpler. Also, don't remove a - space after a duplicate character in punct anymore, as it - doesn't really make us more compatible with Pico. New - functions do_para_begin_void() and do_para_end_void(); changes - to justify_format(), do_para_begin(), inpar(), do_para_end(), + paragraph, to make it support multibyte characters, and to + make it simpler. Also, don't remove a space after a duplicate + character in punct anymore, as it doesn't really make us more + compatible with Pico. New functions mbstrchr(), + do_para_begin_void(), and do_para_end_void(); changes to + justify_format(), do_para_begin(), inpar(), do_para_end(), break_line(), do_para_search() (renamed find_paragraph()), and do_justify(); removal of breakable(). (DLR) - Still more steps toward full wide/multibyte character support. diff --git a/src/chars.c b/src/chars.c index ae807a7b..79c66b89 100644 --- a/src/chars.c +++ b/src/chars.c @@ -811,3 +811,48 @@ size_t mbstrnlen(const char *s, size_t maxlen) nstrnlen(s, maxlen); #endif } + +#ifndef DISABLE_JUSTIFY +/* This function is equivalent to strchr() for multibyte strings. */ +char *mbstrchr(const char *s, char *c) +{ + assert(s != NULL && c != NULL); + +#ifdef NANO_WIDE + if (!ISSET(NO_UTF8)) { + char *s_mb = charalloc(MB_CUR_MAX); + const char *q = s; + wchar_t ws, wc; + int s_mb_len, c_mb_len = mbtowc(&wc, c, MB_CUR_MAX); + + if (c_mb_len <= 0) { + mbtowc(NULL, NULL, 0); + wc = (unsigned char)*c; + } + + while (*s != '\0') { + s_mb_len = parse_mbchar(s, s_mb, NULL, NULL); + + if (mbtowc(&ws, s_mb, s_mb_len) <= 0) { + mbtowc(NULL, NULL, 0); + ws = (unsigned char)*s; + } + + if (ws == wc) + break; + + s += s_mb_len; + q += s_mb_len; + } + + free(s_mb); + + if (ws != wc) + q = NULL; + + return (char *)q; + } else +#endif + return strchr(s, *c); +} +#endif diff --git a/src/nano.c b/src/nano.c index cdf6685e..83bf41e1 100644 --- a/src/nano.c +++ b/src/nano.c @@ -2382,68 +2382,95 @@ void justify_format(filestruct *paragraph, size_t skip) new_end = new_paragraph_data + skip; while (*end != '\0') { + int end_len; + /* If this character is blank, make sure that it's a space with * no blanks after it. */ - if (is_blank_char(*end)) { + if (is_blank_mbchar(end)) { + end_len = parse_mbchar(end, NULL, NULL, NULL); + *new_end = ' '; new_end++; - end++; + end += end_len; - while (*end != '\0' && is_blank_char(*end)) { - end++; - shift++; + while (*end != '\0' && is_blank_mbchar(end)) { + end_len = parse_mbchar(end, NULL, NULL, NULL); + + end += end_len; + shift += end_len; #ifndef NANO_SMALL /* Keep track of the change in the current line. */ if (mark_beginbuf == paragraph && mark_beginx >= end - paragraph->data) - mark_shift++; + mark_shift += end_len; #endif } /* If this character is punctuation optionally followed by a * bracket and then followed by blanks, make sure there are no * more than two blanks after it, and make sure that the blanks * are spaces. */ - } else if (strchr(punct, *end) != NULL) { - *new_end = *end; - new_end++; - end++; + } else if (mbstrchr(punct, end) != NULL) { + end_len = parse_mbchar(end, NULL, NULL, NULL); - if (*end != '\0' && strchr(brackets, *end) != NULL) { + while (end_len > 0) { *new_end = *end; new_end++; end++; + end_len--; + } + + if (*end != '\0' && mbstrchr(brackets, end) != NULL) { + end_len = parse_mbchar(end, NULL, NULL, NULL); + + while (end_len > 0) { + *new_end = *end; + new_end++; + end++; + end_len--; + } } - if (*end != '\0' && is_blank_char(*end)) { + if (*end != '\0' && is_blank_mbchar(end)) { + end_len = parse_mbchar(end, NULL, NULL, NULL); + *new_end = ' '; new_end++; - end++; + end += end_len; } - if (*end != '\0' && is_blank_char(*end)) { + if (*end != '\0' && is_blank_mbchar(end)) { + end_len = parse_mbchar(end, NULL, NULL, NULL); + *new_end = ' '; new_end++; - end++; + end += end_len; } - while (*end != '\0' && is_blank_char(*end)) { - end++; - shift++; + while (*end != '\0' && is_blank_mbchar(end)) { + end_len = parse_mbchar(end, NULL, NULL, NULL); + + end += end_len; + shift += end_len; #ifndef NANO_SMALL - /* Keep track of the change in the current line. */ - if (mark_beginbuf == paragraph && - mark_beginx >= end - paragraph->data) - mark_shift++; + /* Keep track of the change in the current line. */ + if (mark_beginbuf == paragraph && + mark_beginx >= end - paragraph->data) + mark_shift += end_len; #endif } /* If this character is neither blank nor punctuation, leave it * alone. */ } else { - *new_end = *end; - new_end++; - end++; + end_len = parse_mbchar(end, NULL, NULL, NULL); + + while (end_len > 0) { + *new_end = *end; + new_end++; + end++; + end_len--; + } } } @@ -2743,11 +2770,11 @@ ssize_t break_line(const char *line, ssize_t goal, bool force) * found with short enough display width. */ ssize_t cur_loc = 0; /* Current index in line. */ + int line_len; assert(line != NULL); while (*line != '\0' && goal >= 0) { - int line_len; size_t pos = 0; line_len = parse_mbchar(line, NULL, NULL, &pos); @@ -2770,7 +2797,7 @@ ssize_t break_line(const char *line, ssize_t goal, bool force) bool found_blank = FALSE; while (*line != '\0') { - int line_len = parse_mbchar(line, NULL, NULL, NULL); + line_len = parse_mbchar(line, NULL, NULL, NULL); if (is_blank_mbchar(line)) { if (!found_blank) @@ -2786,11 +2813,18 @@ ssize_t break_line(const char *line, ssize_t goal, bool force) } } - /* Perhaps the character after blank_loc is a blank. But because - * of justify_format(), there can be only two adjacent. */ - if (*(line - cur_loc + blank_loc + 1) == ' ' || - *(line - cur_loc + blank_loc + 1) == '\0') - blank_loc++; + /* Move to the last blank after blank_loc, if there is one. */ + line -= cur_loc; + line += blank_loc; + line_len = parse_mbchar(line, NULL, NULL, NULL); + line += line_len; + + while (*line != '\0' && is_blank_mbchar(line)) { + line_len = parse_mbchar(line, NULL, NULL, NULL); + + line += line_len; + blank_loc += line_len; + } return blank_loc; } diff --git a/src/proto.h b/src/proto.h index b6841634..01b9a9a2 100644 --- a/src/proto.h +++ b/src/proto.h @@ -207,6 +207,9 @@ size_t mbstrlen(const char *s); size_t nstrnlen(const char *s, size_t maxlen); #endif size_t mbstrnlen(const char *s, size_t maxlen); +#ifndef DISABLE_JUSTIFY +char *mbstrchr(const char *s, char *c); +#endif /* Public functions in color.c. */ #ifdef ENABLE_COLOR