From: David Lawrence Ramsey Date: Mon, 13 Jun 2005 02:40:04 +0000 (+0000) Subject: detect words more accurately by taking punctuation into account, and X-Git-Tag: v1.3.8~163 X-Git-Url: https://git.wh0rd.org/?a=commitdiff_plain;h=67287088dcdfa50a22f05a87063ab9c9a87bf701;p=nano.git detect words more accurately by taking punctuation into account, and convert all word-detecting functions to use the same wrapper function for ease of maintenance git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2640 35c25a1d-7b9e-4130-9fde-d3aeb78583b8 --- diff --git a/ChangeLog b/ChangeLog index 1ce72e28..afe72d5b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -65,6 +65,12 @@ CVS code - this is disabled when NANO_SMALL is defined. New functions do_word_count() and do_next_word_void(); changes to shortcut_init() and do_next_word(). (DLR) + - Detect words more accurately by taking punctuation into + account, and convert all word-detecting functions to use the + same wrapper function for ease of maintenance. New functions + is_punct_mbchar() and is_word_mbchar(); changes to + do_next_word(), do_prev_word(), is_whole_word(), + do_statusbar_next_word(), and do_statusbar_prev_word(). (DLR) - chars.c: make_mbstring() - Change erroneous ENABLE_EXTRA #ifdef to NANO_EXTRA to fix a @@ -233,6 +239,7 @@ CVS code - Weinehall) - Don't refer to the built-in file browser as crappy anymore. (DLR) + - Check for iswpunct(). (DLR) - doc/faq.html: - Update the question about the FAQ to mention the current maintainer. (DLR) diff --git a/configure.ac b/configure.ac index d10581eb..ccbc9c36 100644 --- a/configure.ac +++ b/configure.ac @@ -399,7 +399,7 @@ dnl Checks for functions. AC_CHECK_FUNCS(snprintf vsnprintf isblank strcasecmp strncasecmp strcasestr strnlen getline getdelim) if test x$enable_utf8 != xno; then - AC_CHECK_FUNCS(iswalnum mblen mbtowc wctomb wcwidth iswspace iswblank) + AC_CHECK_FUNCS(iswalnum iswblank iswpunct iswspace mblen mbtowc wctomb wcwidth) fi if test x$ac_cv_func_snprintf = xno || test x$ac_cv_func_vsnprintf = xno; then @@ -472,12 +472,13 @@ fi if test x$enable_utf8 != xno && \ test x$CURSES_LIB_WIDE = xyes && \ test x$ac_cv_func_iswalnum = xyes && \ + test x$ac_cv_func_iswpunct = xyes && \ + (test x$ac_cv_func_iswblank = xyes || test x$ac_cv_func_iswspace = xyes) && \ test x$ac_cv_func_mblen = xyes && \ test x$ac_cv_func_mbtowc = xyes && \ test x$ac_cv_func_wctomb = xyes && \ - test x$ac_cv_func_wcwidth = xyes && \ - (test x$ac_cv_func_iswspace = xyes || test x$ac_cv_func_iswblank = xyes); then - AC_DEFINE(NANO_WIDE, 1, [Define this if your system has sufficient wide character support (a wide curses library, iswalnum(), iswspace() or iswblank(), mblen(), mbtowc(), wctomb(), and wcwidth()).]) + test x$ac_cv_func_wcwidth = xyes; then + AC_DEFINE(NANO_WIDE, 1, [Define this if your system has sufficient wide character support (a wide curses library, iswalnum(), iswpunct(), iswblank() or iswspace(), mblen(), mbtowc(), wctomb(), and wcwidth()).]) else if test x$enable_utf8 = xyes; then AC_MSG_ERROR([ diff --git a/src/chars.c b/src/chars.c index 880b034f..57e1a71c 100644 --- a/src/chars.c +++ b/src/chars.c @@ -146,6 +146,35 @@ bool is_cntrl_mbchar(const char *c) return is_cntrl_char((unsigned char)*c); } +/* This function is equivalent to ispunct() for multibyte characters. */ +bool is_punct_mbchar(const char *c) +{ + assert(c != NULL); + +#ifdef NANO_WIDE + if (!ISSET(NO_UTF8)) { + wchar_t wc; + int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX); + + if (c_mb_len <= 0) { + mbtowc(NULL, NULL, 0); + wc = (unsigned char)*c; + } + + return iswpunct(wc); + } else +#endif + return ispunct((unsigned char)*c); +} + +/* This function returns TRUE for a multibyte character found in a word + * (currently only an alphanumeric or punctuation character) and FALSE + * otherwise. */ +bool is_word_mbchar(const char *c) +{ + return is_alnum_mbchar(c) || is_punct_mbchar(c); +} + /* c is a control character. It displays as ^@, ^?, or ^[ch], where ch * is c + 64. We return that character. */ char control_rep(char c) diff --git a/src/nano.c b/src/nano.c index e49a24df..fb3f523f 100644 --- a/src/nano.c +++ b/src/nano.c @@ -1468,7 +1468,7 @@ bool do_next_word(bool allow_update) /* If we've found it, stop moving forward through the current * line. */ - if (!is_alnum_mbchar(char_mb)) + if (!is_word_mbchar(char_mb)) break; /* If we haven't found it, then we've started on a word, so set * started_on_word to TRUE. */ @@ -1489,7 +1489,7 @@ bool do_next_word(bool allow_update) /* If we've found it, stop moving forward through the * current line. */ - if (is_alnum_mbchar(char_mb)) + if (is_word_mbchar(char_mb)) break; current_x += char_mb_len; @@ -1546,7 +1546,7 @@ void do_prev_word(void) /* If we've found it, stop moving backward through the current * line. */ - if (!is_alnum_mbchar(char_mb)) + if (!is_word_mbchar(char_mb)) break; if (current_x == 0) @@ -1569,7 +1569,7 @@ void do_prev_word(void) /* If we've found it, stop moving backward through the * current line. */ - if (is_alnum_mbchar(char_mb)) + if (is_word_mbchar(char_mb)) break; if (current_x == 0) @@ -1608,7 +1608,7 @@ void do_prev_word(void) /* If we've found it, stop moving backward through the * current line. */ - if (!is_alnum_mbchar(char_mb)) + if (!is_word_mbchar(char_mb)) break; if (current_x == 0) diff --git a/src/proto.h b/src/proto.h index f180a8c1..7ce99ec4 100644 --- a/src/proto.h +++ b/src/proto.h @@ -173,6 +173,8 @@ bool is_cntrl_char(int c); bool is_cntrl_wchar(wint_t wc); #endif bool is_cntrl_mbchar(const char *c); +bool is_punct_mbchar(const char *c); +bool is_word_mbchar(const char *c); char control_rep(char c); #ifdef NANO_WIDE wchar_t control_wrep(wchar_t c); diff --git a/src/search.c b/src/search.c index 45ec4f6f..e739e997 100644 --- a/src/search.c +++ b/src/search.c @@ -273,11 +273,11 @@ bool is_whole_word(size_t pos, const char *buf, const char *word) parse_mbchar(buf + word_end, r, NULL, NULL); /* If we're at the beginning of the line or the character before the - * word isn't an alphanumeric character, and if we're at the end of - * the line or the character after the word isn't an alphanumeric - * character, we have a whole word. */ - retval = (pos == 0 || !is_alnum_mbchar(p)) && - (word_end == strlen(buf) || !is_alnum_mbchar(r)); + * word isn't a "word" character, and if we're at the end of the + * line or the character after the word isn't a "word" character, we + * have a whole word. */ + retval = (pos == 0 || !is_word_mbchar(p)) && + (word_end == strlen(buf) || !is_word_mbchar(r)); free(p); free(r); diff --git a/src/winio.c b/src/winio.c index 85307bad..8f2d690b 100644 --- a/src/winio.c +++ b/src/winio.c @@ -1917,7 +1917,7 @@ void do_statusbar_next_word(void) /* If we've found it, stop moving forward through the current * line. */ - if (!is_alnum_mbchar(char_mb)) + if (!is_word_mbchar(char_mb)) break; statusbar_x += char_mb_len; @@ -1933,7 +1933,7 @@ void do_statusbar_next_word(void) /* If we've found it, stop moving forward through the current * line. */ - if (is_alnum_mbchar(char_mb)) + if (is_word_mbchar(char_mb)) break; statusbar_x += char_mb_len; @@ -1960,7 +1960,7 @@ void do_statusbar_prev_word(void) /* If we've found it, stop moving backward through the current * line. */ - if (!is_alnum_mbchar(char_mb)) + if (!is_word_mbchar(char_mb)) break; if (statusbar_x == 0) @@ -1982,7 +1982,7 @@ void do_statusbar_prev_word(void) /* If we've found it, stop moving backward through the current * line. */ - if (is_alnum_mbchar(char_mb)) + if (is_word_mbchar(char_mb)) break; if (statusbar_x == 0) @@ -2005,7 +2005,7 @@ void do_statusbar_prev_word(void) /* If we've found it, stop moving backward through the * current line. */ - if (!is_alnum_mbchar(char_mb)) + if (!is_word_mbchar(char_mb)) break; if (statusbar_x == 0)