From: David Lawrence Ramsey Date: Fri, 6 Jan 2006 07:10:30 +0000 (+0000) Subject: rework the bracket searching code to handle multibyte bracket characters X-Git-Tag: v1.3.11~68 X-Git-Url: https://git.wh0rd.org/?a=commitdiff_plain;h=a248863b43e8a705f535975aa16009fc7de4ee33;p=nano.git rework the bracket searching code to handle multibyte bracket characters git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@3258 35c25a1d-7b9e-4130-9fde-d3aeb78583b8 --- diff --git a/ChangeLog b/ChangeLog index 3bf584ba..f9e48836 100644 --- a/ChangeLog +++ b/ChangeLog @@ -7,6 +7,11 @@ CVS code - atom in the default regex string for quotestr, according to ASCII. Changes to main(), nano.1, nanorc.5, and nanorc.sample. (DLR) + - Rework the bracket searching code to handle multibyte bracket + characters. New functions mbstrpbrk() and mbrevstrpbrk(); + changes to find_statusbar_bracket_match(), + do_statusbar_find_bracket(), find_bracket_match(), and + do_find_bracket(). (DLR) - chars.c: mbstrchr() - Make parameter c const. (DLR) diff --git a/src/chars.c b/src/chars.c index 05041bc4..340b6827 100644 --- a/src/chars.c +++ b/src/chars.c @@ -813,27 +813,7 @@ size_t mbstrnlen(const char *s, size_t maxlen) return strnlen(s, maxlen); } -#ifndef NANO_TINY -/* This function is equivalent to strpbrk(), except in that it scans the - * string in reverse, starting at rev_start. */ -char *revstrpbrk(const char *s, const char *accept, const char - *rev_start) -{ - assert(s != NULL && accept != NULL && rev_start != NULL); - - for (; rev_start >= s; rev_start--) { - const char *q = (*rev_start == '\0') ? NULL : strchr(accept, - *rev_start); - - if (q != NULL) - return (char *)rev_start; - } - - return NULL; -} -#endif /* !NANO_TINY */ - -#ifndef DISABLE_JUSTIFY +#if !defined(NANO_TINY) || !defined(DISABLE_JUSTIFY) /* This function is equivalent to strchr() for multibyte strings. */ char *mbstrchr(const char *s, const char *c) { @@ -879,8 +859,75 @@ char *mbstrchr(const char *s, const char *c) #endif return strchr(s, *c); } +#endif /* !NANO_TINY || !DISABLE_JUSTIFY */ -#ifdef ENABLE_NANORC +#ifndef NANO_TINY +/* This function is equivalent to strpbrk() for multibyte strings. */ +char *mbstrpbrk(const char *s, const char *accept) +{ + assert(s != NULL && accept != NULL); + +#ifdef ENABLE_UTF8 + if (ISSET(USE_UTF8)) { + while (*s != '\0') { + if (mbstrchr(accept, s) != NULL) + return (char *)s; + + s += move_mbright(s, 0); + } + + return NULL; + } else +#endif + return strpbrk(s, accept); +} + +/* This function is equivalent to strpbrk(), except in that it scans the + * string in reverse, starting at rev_start. */ +char *revstrpbrk(const char *s, const char *accept, const char + *rev_start) +{ + assert(s != NULL && accept != NULL && rev_start != NULL); + + for (; rev_start >= s; rev_start--) { + const char *q = (*rev_start == '\0') ? NULL : strchr(accept, + *rev_start); + + if (q != NULL) + return (char *)rev_start; + } + + return NULL; +} + +/* This function is equivalent to strpbrk() for multibyte strings, + * except in that it scans the string in reverse, starting at + * rev_start. */ +char *mbrevstrpbrk(const char *s, const char *accept, const char + *rev_start) +{ + assert(s != NULL && accept != NULL && rev_start != NULL); + +#ifdef ENABLE_UTF8 + if (ISSET(USE_UTF8)) { + while (rev_start >= s) { + const char *q = (*rev_start == '\0') ? NULL : + mbstrchr(accept, rev_start); + + if (q != NULL) + return (char *)rev_start; + + rev_start = s + move_mbleft(s, rev_start - s); + } + + return NULL; + } else +#endif + return revstrpbrk(s, accept, rev_start); +} +#endif /* !NANO_TINY */ + +#if !defined(DISABLE_JUSTIFY) && defined(ENABLE_NANORC) /* Return TRUE if the string s contains one or more blank characters, * and FALSE otherwise. */ bool has_blank_chars(const char *s) @@ -926,8 +973,7 @@ bool has_blank_mbchars(const char *s) #endif return has_blank_chars(s); } -#endif /* ENABLE_NANORC */ -#endif /* !DISABLE_JUSTIFY */ +#endif /* !DISABLE_JUSTIFY && ENABLE_NANORC */ #ifdef ENABLE_UTF8 /* Return TRUE if wc is valid Unicode, and FALSE otherwise. */ diff --git a/src/prompt.c b/src/prompt.c index 86f31f5c..cfb1d2a1 100644 --- a/src/prompt.c +++ b/src/prompt.c @@ -3,7 +3,7 @@ * prompt.c * * * * Copyright (C) 1999-2004 Chris Allegretta * - * Copyright (C) 2005 David Lawrence Ramsey * + * Copyright (C) 2005-2006 David Lawrence Ramsey * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2, or (at your option) * @@ -672,7 +672,7 @@ bool find_statusbar_bracket_match(bool reverse, const char { const char *rev_start = NULL, *found = NULL; - assert(strlen(bracket_set) == 2); + assert(mbstrlen(bracket_set) == 2); /* rev_start might end up 1 character before the start or after the * end of the line. This won't be a problem because we'll skip over @@ -687,8 +687,8 @@ bool find_statusbar_bracket_match(bool reverse, const char * is found. */ found = ((rev_start > answer && *(rev_start - 1) == '\0') || rev_start < answer) ? NULL : (reverse ? - revstrpbrk(answer, bracket_set, rev_start) : - strpbrk(rev_start, bracket_set)); + mbrevstrpbrk(answer, bracket_set, rev_start) : + mbstrpbrk(rev_start, bracket_set)); /* We've found a potential match. */ if (found != NULL) @@ -711,48 +711,81 @@ bool find_statusbar_bracket_match(bool reverse, const char void do_statusbar_find_bracket(void) { size_t statusbar_x_save, pww_save; - const char *bracket_list = "()<>[]{}"; + const char *bracket_list = "(<[{)>]}"; /* The list of brackets we can find matches to. */ - const char *pos; + const char *ch; /* The location in bracket_list of the bracket at the current * cursor position. */ - char ch; - /* The bracket at the current cursor position. */ - char wanted_ch; - /* The bracket complementing the bracket at the current cursor - * position. */ - char bracket_set[3]; + int ch_len; + /* The length of ch in bytes. */ + const char *wanted_ch; + /* The location in bracket_list of the bracket complementing the + * bracket at the current cursor position. */ + int wanted_ch_len; + /* The length of wanted_ch in bytes. */ + char *bracket_set; /* The pair of characters in ch and wanted_ch. */ + size_t bracket_halflist; + /* The number of characters in one half of bracket_list. */ size_t count = 1; /* The initial bracket count. */ bool reverse; /* The direction we search. */ + char *found_ch; + /* The character we find. */ - assert(strlen(bracket_list) % 2 == 0); + assert(mbstrlen(bracket_list) % 2 == 0); - ch = answer[statusbar_x]; + ch = answer + statusbar_x; - if (ch == '\0' || (pos = strchr(bracket_list, ch)) == NULL) + if (ch == '\0' || (ch = mbstrchr(bracket_list, ch)) == NULL) return; /* Save where we are. */ statusbar_x_save = statusbar_x; pww_save = statusbar_pww; - /* If we're on an opening bracket, we want to search forwards for a - * closing bracket, and if we're on a closing bracket, we want to - * search backwards for an opening bracket. */ - reverse = ((pos - bracket_list) % 2 != 0); - wanted_ch = reverse ? *(pos - 1) : *(pos + 1); - bracket_set[0] = ch; - bracket_set[1] = wanted_ch; - bracket_set[2] = '\0'; + /* If we're on an opening bracket, which must be in the first half + * of bracket_list, we want to search forwards for a closing + * bracket. If we're on a closing bracket, which must be in the + * second half of bracket_list, we want to search backwards for an + * opening bracket. */ + bracket_halflist = mbstrlen(bracket_list) / 2; + reverse = ((ch - bracket_list) > bracket_halflist); + + /* If we're on an opening bracket, set wanted_ch to the character + * that's bracket_halflist characters after ch. If we're on a + * closing bracket, set wanted_ch to the character that's + * bracket_halflist characters before ch. */ + wanted_ch = ch; + + while (bracket_halflist > 0) { + if (reverse) + wanted_ch = bracket_list + move_mbleft(bracket_list, + wanted_ch - bracket_list); + else + wanted_ch += move_mbright(wanted_ch, 0); + + bracket_halflist--; + } + + ch_len = parse_mbchar(ch, NULL, NULL); + wanted_ch_len = parse_mbchar(wanted_ch, NULL, NULL); + + /* Fill bracket_set in with the values of ch and wanted_ch. */ + bracket_set = charalloc((mb_cur_max() * 2) + 1); + strncpy(bracket_set, ch, ch_len); + strncpy(bracket_set + ch_len, wanted_ch, wanted_ch_len); + null_at(&bracket_set, ch_len + wanted_ch_len); + + found_ch = charalloc(mb_cur_max() + 1); while (TRUE) { if (find_statusbar_bracket_match(reverse, bracket_set)) { /* If we found an identical bracket, increment count. If we * found a complementary bracket, decrement it. */ - count += (answer[statusbar_x] == ch) ? 1 : -1; + parse_mbchar(answer + statusbar_x, found_ch, NULL); + count += (strncmp(found_ch, ch, ch_len) == 0) ? 1 : -1; /* If count is zero, we've found a matching bracket. Update * the statusbar prompt and get out. */ @@ -769,6 +802,10 @@ void do_statusbar_find_bracket(void) break; } } + + /* Clean up. */ + free(bracket_set); + free(found_ch); } #endif /* !NANO_TINY */ diff --git a/src/proto.h b/src/proto.h index f0ab86d0..29f76b2e 100644 --- a/src/proto.h +++ b/src/proto.h @@ -196,17 +196,20 @@ size_t mbstrlen(const char *s); size_t nstrnlen(const char *s, size_t maxlen); #endif size_t mbstrnlen(const char *s, size_t maxlen); +#if !defined(NANO_TINY) || !defined(DISABLE_JUSTIFY) +char *mbstrchr(const char *s, const char *c); +#endif #ifndef NANO_TINY +char *mbstrpbrk(const char *s, const char *accept); char *revstrpbrk(const char *s, const char *accept, const char *rev_start); +char *mbrevstrpbrk(const char *s, const char *accept, const char + *rev_start); #endif -#ifndef DISABLE_JUSTIFY -char *mbstrchr(const char *s, const char *c); -#ifdef ENABLE_NANORC +#if !defined(DISABLE_JUSTIFY) && defined(ENABLE_NANORC) bool has_blank_chars(const char *s); bool has_blank_mbchars(const char *s); #endif -#endif #ifdef ENABLE_UTF8 bool is_valid_unicode(wchar_t wc); #endif diff --git a/src/search.c b/src/search.c index 7e46b7d5..b80a4e89 100644 --- a/src/search.c +++ b/src/search.c @@ -3,7 +3,7 @@ * search.c * * * * Copyright (C) 1999-2004 Chris Allegretta * - * Copyright (C) 2005 David Lawrence Ramsey * + * Copyright (C) 2005-2006 David Lawrence Ramsey * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2, or (at your option) * @@ -276,9 +276,9 @@ bool findnextstr( filestruct *fileptr = openfile->current; const char *rev_start = NULL, *found = NULL; size_t found_len; - /* The length of the match we found. */ + /* The length of the match we find. */ size_t current_x_find = 0; - /* The location in the current line of the match we found. */ + /* The location in the current line of the match we find. */ ssize_t current_y_find = openfile->current_y; /* rev_start might end up 1 character before the start or after the @@ -1076,7 +1076,7 @@ bool find_bracket_match(bool reverse, const char *bracket_set) const char *rev_start = NULL, *found = NULL; ssize_t current_y_find = openfile->current_y; - assert(strlen(bracket_set) == 2); + assert(mbstrlen(bracket_set) == 2); /* rev_start might end up 1 character before the start or after the * end of the line. This won't be a problem because we'll skip over @@ -1091,8 +1091,8 @@ bool find_bracket_match(bool reverse, const char *bracket_set) while (TRUE) { found = ((rev_start > fileptr->data && *(rev_start - 1) == '\0') || rev_start < fileptr->data) ? NULL : (reverse ? - revstrpbrk(fileptr->data, bracket_set, rev_start) : - strpbrk(rev_start, bracket_set)); + mbrevstrpbrk(fileptr->data, bracket_set, rev_start) : + mbstrpbrk(rev_start, bracket_set)); /* We've found a potential match. */ if (found != NULL) @@ -1130,28 +1130,34 @@ void do_find_bracket(void) { filestruct *current_save; size_t current_x_save, pww_save; - const char *bracket_list = "()<>[]{}"; + const char *bracket_list = "(<[{)>]}"; /* The list of brackets we can find matches to. */ - const char *pos; + const char *ch; /* The location in bracket_list of the bracket at the current * cursor position. */ - char ch; - /* The bracket at the current cursor position. */ - char wanted_ch; - /* The bracket complementing the bracket at the current cursor - * position. */ - char bracket_set[3]; + int ch_len; + /* The length of ch in bytes. */ + const char *wanted_ch; + /* The location in bracket_list of the bracket complementing the + * bracket at the current cursor position. */ + int wanted_ch_len; + /* The length of wanted_ch in bytes. */ + char *bracket_set; /* The pair of characters in ch and wanted_ch. */ + size_t bracket_halflist; + /* The number of characters in one half of bracket_list. */ size_t count = 1; /* The initial bracket count. */ bool reverse; /* The direction we search. */ + char *found_ch; + /* The character we find. */ - assert(strlen(bracket_list) % 2 == 0); + assert(mbstrlen(bracket_list) % 2 == 0); - ch = openfile->current->data[openfile->current_x]; + ch = openfile->current->data + openfile->current_x; - if (ch == '\0' || (pos = strchr(bracket_list, ch)) == NULL) { + if (ch == '\0' || (ch = mbstrchr(bracket_list, ch)) == NULL) { statusbar(_("Not a bracket")); return; } @@ -1161,21 +1167,48 @@ void do_find_bracket(void) current_x_save = openfile->current_x; pww_save = openfile->placewewant; - /* If we're on an opening bracket, we want to search forwards for a - * closing bracket, and if we're on a closing bracket, we want to - * search backwards for an opening bracket. */ - reverse = ((pos - bracket_list) % 2 != 0); - wanted_ch = reverse ? *(pos - 1) : *(pos + 1); - bracket_set[0] = ch; - bracket_set[1] = wanted_ch; - bracket_set[2] = '\0'; + /* If we're on an opening bracket, which must be in the first half + * of bracket_list, we want to search forwards for a closing + * bracket. If we're on a closing bracket, which must be in the + * second half of bracket_list, we want to search backwards for an + * opening bracket. */ + bracket_halflist = mbstrlen(bracket_list) / 2; + reverse = ((ch - bracket_list) > bracket_halflist); + + /* If we're on an opening bracket, set wanted_ch to the character + * that's bracket_halflist characters after ch. If we're on a + * closing bracket, set wanted_ch to the character that's + * bracket_halflist characters before ch. */ + wanted_ch = ch; + + while (bracket_halflist > 0) { + if (reverse) + wanted_ch = bracket_list + move_mbleft(bracket_list, + wanted_ch - bracket_list); + else + wanted_ch += move_mbright(wanted_ch, 0); + + bracket_halflist--; + } + + ch_len = parse_mbchar(ch, NULL, NULL); + wanted_ch_len = parse_mbchar(wanted_ch, NULL, NULL); + + /* Fill bracket_set in with the values of ch and wanted_ch. */ + bracket_set = charalloc((mb_cur_max() * 2) + 1); + strncpy(bracket_set, ch, ch_len); + strncpy(bracket_set + ch_len, wanted_ch, wanted_ch_len); + null_at(&bracket_set, ch_len + wanted_ch_len); + + found_ch = charalloc(mb_cur_max() + 1); while (TRUE) { if (find_bracket_match(reverse, bracket_set)) { /* If we found an identical bracket, increment count. If we * found a complementary bracket, decrement it. */ - count += (openfile->current->data[openfile->current_x] == - ch) ? 1 : -1; + parse_mbchar(openfile->current->data + openfile->current_x, + found_ch, NULL); + count += (strncmp(found_ch, ch, ch_len) == 0) ? 1 : -1; /* If count is zero, we've found a matching bracket. Update * the screen and get out. */ @@ -1193,6 +1226,10 @@ void do_find_bracket(void) break; } } + + /* Clean up. */ + free(bracket_set); + free(found_ch); } #ifdef ENABLE_NANORC