rework the bracket searching code to handle multibyte bracket characters

author David Lawrence Ramsey <pooka109@gmail.com>

Fri, 6 Jan 2006 07:10:30 +0000 (07:10 +0000)

committer David Lawrence Ramsey <pooka109@gmail.com>

Fri, 6 Jan 2006 07:10:30 +0000 (07:10 +0000)
author David Lawrence Ramsey <pooka109@gmail.com>
Fri, 6 Jan 2006 07:10:30 +0000 (07:10 +0000)
committer David Lawrence Ramsey <pooka109@gmail.com>
Fri, 6 Jan 2006 07:10:30 +0000 (07:10 +0000)
diff --git a/ChangeLog b/ChangeLog

index 3bf584ba43df63fbac35f70bd29574fca0404d92..f9e488369760b8915c59dfea63b9513fffd554e8 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -7,6 +7,11 @@ CVS code -
           atom in the default regex string for quotestr, according to
           ASCII.  Changes to main(), nano.1, nanorc.5, and
           nanorc.sample. (DLR)
+       - Rework the bracket searching code to handle multibyte bracket
+         characters.  New functions mbstrpbrk() and mbrevstrpbrk();
+         changes to find_statusbar_bracket_match(),
+         do_statusbar_find_bracket(), find_bracket_match(), and
+         do_find_bracket(). (DLR)
  - chars.c:
    mbstrchr()
         - Make parameter c const. (DLR)
diff --git a/src/chars.c b/src/chars.c

index 05041bc4a4d5a7d4e1d892c1d71f1763ec5eb39b..340b682798ce11b26e3c63bcf832f66d2671fb2b 100644 (file)
--- a/src/chars.c
+++ b/src/chars.c
@@ -813,27 +813,7 @@ size_t mbstrnlen(const char *s, size_t maxlen)
         return strnlen(s, maxlen);
  }
  
-#ifndef NANO_TINY
-/* This function is equivalent to strpbrk(), except in that it scans the
- * string in reverse, starting at rev_start. */
-char *revstrpbrk(const char *s, const char *accept, const char
-       *rev_start)
-{
-    assert(s != NULL && accept != NULL && rev_start != NULL);
-
-    for (; rev_start >= s; rev_start--) {
-       const char *q = (*rev_start == '\0') ? NULL : strchr(accept,
-               *rev_start);
-
-       if (q != NULL)
-           return (char *)rev_start;
-    }
-
-    return NULL;
-}
-#endif /* !NANO_TINY */
-
-#ifndef DISABLE_JUSTIFY
+#if !defined(NANO_TINY) || !defined(DISABLE_JUSTIFY)
  /* This function is equivalent to strchr() for multibyte strings. */
  char *mbstrchr(const char *s, const char *c)
  {
@@ -879,8 +859,75 @@ char *mbstrchr(const char *s, const char *c)
  #endif
         return strchr(s, *c);
  }
+#endif /* !NANO_TINY || !DISABLE_JUSTIFY */
  
-#ifdef ENABLE_NANORC
+#ifndef NANO_TINY
+/* This function is equivalent to strpbrk() for multibyte strings. */
+char *mbstrpbrk(const char *s, const char *accept)
+{
+    assert(s != NULL && accept != NULL);
+
+#ifdef ENABLE_UTF8
+    if (ISSET(USE_UTF8)) {
+       while (*s != '\0') {
+           if (mbstrchr(accept, s) != NULL)
+               return (char *)s;
+
+           s += move_mbright(s, 0);
+       }
+
+       return NULL;
+    } else
+#endif
+       return strpbrk(s, accept);
+}
+
+/* This function is equivalent to strpbrk(), except in that it scans the
+ * string in reverse, starting at rev_start. */
+char *revstrpbrk(const char *s, const char *accept, const char
+       *rev_start)
+{
+    assert(s != NULL && accept != NULL && rev_start != NULL);
+
+    for (; rev_start >= s; rev_start--) {
+       const char *q = (*rev_start == '\0') ? NULL : strchr(accept,
+               *rev_start);
+
+       if (q != NULL)
+           return (char *)rev_start;
+    }
+
+    return NULL;
+}
+
+/* This function is equivalent to strpbrk() for multibyte strings,
+ * except in that it scans the string in reverse, starting at
+ * rev_start. */
+char *mbrevstrpbrk(const char *s, const char *accept, const char
+       *rev_start)
+{
+    assert(s != NULL && accept != NULL && rev_start != NULL);
+
+#ifdef ENABLE_UTF8
+    if (ISSET(USE_UTF8)) {
+       while (rev_start >= s) {
+           const char *q = (*rev_start == '\0') ? NULL :
+               mbstrchr(accept, rev_start);
+
+           if (q != NULL)
+               return (char *)rev_start;
+
+           rev_start = s + move_mbleft(s, rev_start - s);
+       }
+
+       return NULL;
+    } else
+#endif
+       return revstrpbrk(s, accept, rev_start);
+}
+#endif /* !NANO_TINY */
+
+#if !defined(DISABLE_JUSTIFY) && defined(ENABLE_NANORC)
  /* Return TRUE if the string s contains one or more blank characters,
   * and FALSE otherwise. */
  bool has_blank_chars(const char *s)
@@ -926,8 +973,7 @@ bool has_blank_mbchars(const char *s)
  #endif
         return has_blank_chars(s);
  }
-#endif /* ENABLE_NANORC */
-#endif /* !DISABLE_JUSTIFY */
+#endif /* !DISABLE_JUSTIFY && ENABLE_NANORC */
  
  #ifdef ENABLE_UTF8
  /* Return TRUE if wc is valid Unicode, and FALSE otherwise. */
diff --git a/src/prompt.c b/src/prompt.c

index 86f31f5c0cc91cef384e2b40ee6f48c8da804583..cfb1d2a1b80aaed3e2e4b887a7ce1acd9c0f7c19 100644 (file)
--- a/src/prompt.c
+++ b/src/prompt.c
@@ -3,7 +3,7 @@
   *   prompt.c                                                             *
   *                                                                        *
   *   Copyright (C) 1999-2004 Chris Allegretta                             *
- *   Copyright (C) 2005 David Lawrence Ramsey                             *
+ *   Copyright (C) 2005-2006 David Lawrence Ramsey                        *
   *   This program is free software; you can redistribute it and/or modify *
   *   it under the terms of the GNU General Public License as published by *
   *   the Free Software Foundation; either version 2, or (at your option)  *
@@ -672,7 +672,7 @@ bool find_statusbar_bracket_match(bool reverse, const char
  {
      const char *rev_start = NULL, *found = NULL;
  
-    assert(strlen(bracket_set) == 2);
+    assert(mbstrlen(bracket_set) == 2);
  
      /* rev_start might end up 1 character before the start or after the
       * end of the line.  This won't be a problem because we'll skip over
@@ -687,8 +687,8 @@ bool find_statusbar_bracket_match(bool reverse, const char
          * is found. */
         found = ((rev_start > answer && *(rev_start - 1) == '\0') ||
                 rev_start < answer) ? NULL : (reverse ?
-               revstrpbrk(answer, bracket_set, rev_start) :
-               strpbrk(rev_start, bracket_set));
+               mbrevstrpbrk(answer, bracket_set, rev_start) :
+               mbstrpbrk(rev_start, bracket_set));
  
         /* We've found a potential match. */
         if (found != NULL)
@@ -711,48 +711,81 @@ bool find_statusbar_bracket_match(bool reverse, const char
  void do_statusbar_find_bracket(void)
  {
      size_t statusbar_x_save, pww_save;
-    const char *bracket_list = "()<>[]{}";
+    const char *bracket_list = "(<[{)>]}";
         /* The list of brackets we can find matches to. */
-    const char *pos;
+    const char *ch;
         /* The location in bracket_list of the bracket at the current
          * cursor position. */
-    char ch;
-       /* The bracket at the current cursor position. */
-    char wanted_ch;
-       /* The bracket complementing the bracket at the current cursor
-        * position. */
-    char bracket_set[3];
+    int ch_len;
+       /* The length of ch in bytes. */
+    const char *wanted_ch;
+       /* The location in bracket_list of the bracket complementing the
+        * bracket at the current cursor position. */
+    int wanted_ch_len;
+       /* The length of wanted_ch in bytes. */
+    char *bracket_set;
         /* The pair of characters in ch and wanted_ch. */
+    size_t bracket_halflist;
+       /* The number of characters in one half of bracket_list. */
      size_t count = 1;
         /* The initial bracket count. */
      bool reverse;
         /* The direction we search. */
+    char *found_ch;
+       /* The character we find. */
  
-    assert(strlen(bracket_list) % 2 == 0);
+    assert(mbstrlen(bracket_list) % 2 == 0);
  
-    ch = answer[statusbar_x];
+    ch = answer + statusbar_x;
  
-    if (ch == '\0' || (pos = strchr(bracket_list, ch)) == NULL)
+    if (ch == '\0' || (ch = mbstrchr(bracket_list, ch)) == NULL)
         return;
  
      /* Save where we are. */
      statusbar_x_save = statusbar_x;
      pww_save = statusbar_pww;
  
-    /* If we're on an opening bracket, we want to search forwards for a
-     * closing bracket, and if we're on a closing bracket, we want to
-     * search backwards for an opening bracket. */
-    reverse = ((pos - bracket_list) % 2 != 0);
-    wanted_ch = reverse ? *(pos - 1) : *(pos + 1);
-    bracket_set[0] = ch;
-    bracket_set[1] = wanted_ch;
-    bracket_set[2] = '\0';
+    /* If we're on an opening bracket, which must be in the first half
+     * of bracket_list, we want to search forwards for a closing
+     * bracket.  If we're on a closing bracket, which must be in the
+     * second half of bracket_list, we want to search backwards for an
+     * opening bracket. */
+    bracket_halflist = mbstrlen(bracket_list) / 2;
+    reverse = ((ch - bracket_list) > bracket_halflist);
+
+    /* If we're on an opening bracket, set wanted_ch to the character
+     * that's bracket_halflist characters after ch.  If we're on a
+     * closing bracket, set wanted_ch to the character that's
+     * bracket_halflist characters before ch. */
+    wanted_ch = ch;
+
+    while (bracket_halflist > 0) {
+       if (reverse)
+           wanted_ch = bracket_list + move_mbleft(bracket_list,
+               wanted_ch - bracket_list);
+       else
+           wanted_ch += move_mbright(wanted_ch, 0);
+
+       bracket_halflist--;
+    }
+
+    ch_len = parse_mbchar(ch, NULL, NULL);
+    wanted_ch_len = parse_mbchar(wanted_ch, NULL, NULL);
+
+    /* Fill bracket_set in with the values of ch and wanted_ch. */
+    bracket_set = charalloc((mb_cur_max() * 2) + 1);
+    strncpy(bracket_set, ch, ch_len);
+    strncpy(bracket_set + ch_len, wanted_ch, wanted_ch_len);
+    null_at(&bracket_set, ch_len + wanted_ch_len);
+
+    found_ch = charalloc(mb_cur_max() + 1);
  
      while (TRUE) {
         if (find_statusbar_bracket_match(reverse, bracket_set)) {
             /* If we found an identical bracket, increment count.  If we
              * found a complementary bracket, decrement it. */
-           count += (answer[statusbar_x] == ch) ? 1 : -1;
+           parse_mbchar(answer + statusbar_x, found_ch, NULL);
+           count += (strncmp(found_ch, ch, ch_len) == 0) ? 1 : -1;
  
             /* If count is zero, we've found a matching bracket.  Update
              * the statusbar prompt and get out. */
@@ -769,6 +802,10 @@ void do_statusbar_find_bracket(void)
             break;
         }
      }
+
+    /* Clean up. */
+    free(bracket_set);
+    free(found_ch);
  }
  #endif /* !NANO_TINY */
  
diff --git a/src/proto.h b/src/proto.h

index f0ab86d0d731c3182cedacc2e57dc1c5d0c229f9..29f76b2e4c0048ed77b18d030ff644c0719f4334 100644 (file)
--- a/src/proto.h
+++ b/src/proto.h
@@ -196,17 +196,20 @@ size_t mbstrlen(const char *s);
  size_t nstrnlen(const char *s, size_t maxlen);
  #endif
  size_t mbstrnlen(const char *s, size_t maxlen);
+#if !defined(NANO_TINY) || !defined(DISABLE_JUSTIFY)
+char *mbstrchr(const char *s, const char *c);
+#endif
  #ifndef NANO_TINY
+char *mbstrpbrk(const char *s, const char *accept);
  char *revstrpbrk(const char *s, const char *accept, const char
         *rev_start);
+char *mbrevstrpbrk(const char *s, const char *accept, const char
+       *rev_start);
  #endif
-#ifndef DISABLE_JUSTIFY
-char *mbstrchr(const char *s, const char *c);
-#ifdef ENABLE_NANORC
+#if !defined(DISABLE_JUSTIFY) && defined(ENABLE_NANORC)
  bool has_blank_chars(const char *s);
  bool has_blank_mbchars(const char *s);
  #endif
-#endif
  #ifdef ENABLE_UTF8
  bool is_valid_unicode(wchar_t wc);
  #endif
diff --git a/src/search.c b/src/search.c

index 7e46b7d53e55e26a5f587793d7f7a9855e7c8d94..b80a4e89c006d27e7e0a1faa7417095a8f96a2ed 100644 (file)
--- a/src/search.c
+++ b/src/search.c
@@ -3,7 +3,7 @@
   *   search.c                                                             *
   *                                                                        *
   *   Copyright (C) 1999-2004 Chris Allegretta                             *
- *   Copyright (C) 2005 David Lawrence Ramsey                             *
+ *   Copyright (C) 2005-2006 David Lawrence Ramsey                        *
   *   This program is free software; you can redistribute it and/or modify *
   *   it under the terms of the GNU General Public License as published by *
   *   the Free Software Foundation; either version 2, or (at your option)  *
@@ -276,9 +276,9 @@ bool findnextstr(
      filestruct *fileptr = openfile->current;
      const char *rev_start = NULL, *found = NULL;
      size_t found_len;
-       /* The length of the match we found. */
+       /* The length of the match we find. */
      size_t current_x_find = 0;
-       /* The location in the current line of the match we found. */
+       /* The location in the current line of the match we find. */
      ssize_t current_y_find = openfile->current_y;
  
      /* rev_start might end up 1 character before the start or after the
@@ -1076,7 +1076,7 @@ bool find_bracket_match(bool reverse, const char *bracket_set)
      const char *rev_start = NULL, *found = NULL;
      ssize_t current_y_find = openfile->current_y;
  
-    assert(strlen(bracket_set) == 2);
+    assert(mbstrlen(bracket_set) == 2);
  
      /* rev_start might end up 1 character before the start or after the
       * end of the line.  This won't be a problem because we'll skip over
@@ -1091,8 +1091,8 @@ bool find_bracket_match(bool reverse, const char *bracket_set)
      while (TRUE) {
         found = ((rev_start > fileptr->data && *(rev_start - 1) ==
                 '\0') || rev_start < fileptr->data) ? NULL : (reverse ?
-               revstrpbrk(fileptr->data, bracket_set, rev_start) :
-               strpbrk(rev_start, bracket_set));
+               mbrevstrpbrk(fileptr->data, bracket_set, rev_start) :
+               mbstrpbrk(rev_start, bracket_set));
  
         /* We've found a potential match. */
         if (found != NULL)
@@ -1130,28 +1130,34 @@ void do_find_bracket(void)
  {
      filestruct *current_save;
      size_t current_x_save, pww_save;
-    const char *bracket_list = "()<>[]{}";
+    const char *bracket_list = "(<[{)>]}";
         /* The list of brackets we can find matches to. */
-    const char *pos;
+    const char *ch;
         /* The location in bracket_list of the bracket at the current
          * cursor position. */
-    char ch;
-       /* The bracket at the current cursor position. */
-    char wanted_ch;
-       /* The bracket complementing the bracket at the current cursor
-        * position. */
-    char bracket_set[3];
+    int ch_len;
+       /* The length of ch in bytes. */
+    const char *wanted_ch;
+       /* The location in bracket_list of the bracket complementing the
+        * bracket at the current cursor position. */
+    int wanted_ch_len;
+       /* The length of wanted_ch in bytes. */
+    char *bracket_set;
         /* The pair of characters in ch and wanted_ch. */
+    size_t bracket_halflist;
+       /* The number of characters in one half of bracket_list. */
      size_t count = 1;
         /* The initial bracket count. */
      bool reverse;
         /* The direction we search. */
+    char *found_ch;
+       /* The character we find. */
  
-    assert(strlen(bracket_list) % 2 == 0);
+    assert(mbstrlen(bracket_list) % 2 == 0);
  
-    ch = openfile->current->data[openfile->current_x];
+    ch = openfile->current->data + openfile->current_x;
  
-    if (ch == '\0' || (pos = strchr(bracket_list, ch)) == NULL) {
+    if (ch == '\0' || (ch = mbstrchr(bracket_list, ch)) == NULL) {
         statusbar(_("Not a bracket"));
         return;
      }
@@ -1161,21 +1167,48 @@ void do_find_bracket(void)
      current_x_save = openfile->current_x;
      pww_save = openfile->placewewant;
  
-    /* If we're on an opening bracket, we want to search forwards for a
-     * closing bracket, and if we're on a closing bracket, we want to
-     * search backwards for an opening bracket. */
-    reverse = ((pos - bracket_list) % 2 != 0);
-    wanted_ch = reverse ? *(pos - 1) : *(pos + 1);
-    bracket_set[0] = ch;
-    bracket_set[1] = wanted_ch;
-    bracket_set[2] = '\0';
+    /* If we're on an opening bracket, which must be in the first half
+     * of bracket_list, we want to search forwards for a closing
+     * bracket.  If we're on a closing bracket, which must be in the
+     * second half of bracket_list, we want to search backwards for an
+     * opening bracket. */
+    bracket_halflist = mbstrlen(bracket_list) / 2;
+    reverse = ((ch - bracket_list) > bracket_halflist);
+
+    /* If we're on an opening bracket, set wanted_ch to the character
+     * that's bracket_halflist characters after ch.  If we're on a
+     * closing bracket, set wanted_ch to the character that's
+     * bracket_halflist characters before ch. */
+    wanted_ch = ch;
+
+    while (bracket_halflist > 0) {
+       if (reverse)
+           wanted_ch = bracket_list + move_mbleft(bracket_list,
+               wanted_ch - bracket_list);
+       else
+           wanted_ch += move_mbright(wanted_ch, 0);
+
+       bracket_halflist--;
+    }
+
+    ch_len = parse_mbchar(ch, NULL, NULL);
+    wanted_ch_len = parse_mbchar(wanted_ch, NULL, NULL);
+
+    /* Fill bracket_set in with the values of ch and wanted_ch. */
+    bracket_set = charalloc((mb_cur_max() * 2) + 1);
+    strncpy(bracket_set, ch, ch_len);
+    strncpy(bracket_set + ch_len, wanted_ch, wanted_ch_len);
+    null_at(&bracket_set, ch_len + wanted_ch_len);
+
+    found_ch = charalloc(mb_cur_max() + 1);
  
      while (TRUE) {
         if (find_bracket_match(reverse, bracket_set)) {
             /* If we found an identical bracket, increment count.  If we
              * found a complementary bracket, decrement it. */
-           count += (openfile->current->data[openfile->current_x] ==
-               ch) ? 1 : -1;
+           parse_mbchar(openfile->current->data + openfile->current_x,
+               found_ch, NULL);
+           count += (strncmp(found_ch, ch, ch_len) == 0) ? 1 : -1;
  
             /* If count is zero, we've found a matching bracket.  Update
              * the screen and get out. */
@@ -1193,6 +1226,10 @@ void do_find_bracket(void)
             break;
         }
      }
+
+    /* Clean up. */
+    free(bracket_set);
+    free(found_ch);
  }
  
  #ifdef ENABLE_NANORC
author	David Lawrence Ramsey <pooka109@gmail.com>
	Fri, 6 Jan 2006 07:10:30 +0000 (07:10 +0000)
committer	David Lawrence Ramsey <pooka109@gmail.com>
	Fri, 6 Jan 2006 07:10:30 +0000 (07:10 +0000)
ChangeLog		patch \| blob \| history
src/chars.c		patch \| blob \| history
src/prompt.c		patch \| blob \| history
src/proto.h		patch \| blob \| history
src/search.c		patch \| blob \| history