this is disabled when NANO_SMALL is defined. New functions
do_word_count() and do_next_word_void(); changes to
shortcut_init() and do_next_word(). (DLR)
+ - Detect words more accurately by taking punctuation into
+ account, and convert all word-detecting functions to use the
+ same wrapper function for ease of maintenance. New functions
+ is_punct_mbchar() and is_word_mbchar(); changes to
+ do_next_word(), do_prev_word(), is_whole_word(),
+ do_statusbar_next_word(), and do_statusbar_prev_word(). (DLR)
- chars.c:
make_mbstring()
- Change erroneous ENABLE_EXTRA #ifdef to NANO_EXTRA to fix a
Weinehall)
- Don't refer to the built-in file browser as crappy anymore.
(DLR)
+ - Check for iswpunct(). (DLR)
- doc/faq.html:
- Update the question about the FAQ to mention the current
maintainer. (DLR)
AC_CHECK_FUNCS(snprintf vsnprintf isblank strcasecmp strncasecmp strcasestr strnlen getline getdelim)
if test x$enable_utf8 != xno; then
- AC_CHECK_FUNCS(iswalnum mblen mbtowc wctomb wcwidth iswspace iswblank)
+ AC_CHECK_FUNCS(iswalnum iswblank iswpunct iswspace mblen mbtowc wctomb wcwidth)
fi
if test x$ac_cv_func_snprintf = xno || test x$ac_cv_func_vsnprintf = xno; then
if test x$enable_utf8 != xno && \
test x$CURSES_LIB_WIDE = xyes && \
test x$ac_cv_func_iswalnum = xyes && \
+ test x$ac_cv_func_iswpunct = xyes && \
+ (test x$ac_cv_func_iswblank = xyes || test x$ac_cv_func_iswspace = xyes) && \
test x$ac_cv_func_mblen = xyes && \
test x$ac_cv_func_mbtowc = xyes && \
test x$ac_cv_func_wctomb = xyes && \
- test x$ac_cv_func_wcwidth = xyes && \
- (test x$ac_cv_func_iswspace = xyes || test x$ac_cv_func_iswblank = xyes); then
- AC_DEFINE(NANO_WIDE, 1, [Define this if your system has sufficient wide character support (a wide curses library, iswalnum(), iswspace() or iswblank(), mblen(), mbtowc(), wctomb(), and wcwidth()).])
+ test x$ac_cv_func_wcwidth = xyes; then
+ AC_DEFINE(NANO_WIDE, 1, [Define this if your system has sufficient wide character support (a wide curses library, iswalnum(), iswpunct(), iswblank() or iswspace(), mblen(), mbtowc(), wctomb(), and wcwidth()).])
else
if test x$enable_utf8 = xyes; then
AC_MSG_ERROR([
return is_cntrl_char((unsigned char)*c);
}
+/* This function is equivalent to ispunct() for multibyte characters. */
+bool is_punct_mbchar(const char *c)
+{
+ assert(c != NULL);
+
+#ifdef NANO_WIDE
+ if (!ISSET(NO_UTF8)) {
+ wchar_t wc;
+ int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX);
+
+ if (c_mb_len <= 0) {
+ mbtowc(NULL, NULL, 0);
+ wc = (unsigned char)*c;
+ }
+
+ return iswpunct(wc);
+ } else
+#endif
+ return ispunct((unsigned char)*c);
+}
+
+/* This function returns TRUE for a multibyte character found in a word
+ * (currently only an alphanumeric or punctuation character) and FALSE
+ * otherwise. */
+bool is_word_mbchar(const char *c)
+{
+ return is_alnum_mbchar(c) || is_punct_mbchar(c);
+}
+
/* c is a control character. It displays as ^@, ^?, or ^[ch], where ch
* is c + 64. We return that character. */
char control_rep(char c)
/* If we've found it, stop moving forward through the current
* line. */
- if (!is_alnum_mbchar(char_mb))
+ if (!is_word_mbchar(char_mb))
break;
/* If we haven't found it, then we've started on a word, so set
* started_on_word to TRUE. */
/* If we've found it, stop moving forward through the
* current line. */
- if (is_alnum_mbchar(char_mb))
+ if (is_word_mbchar(char_mb))
break;
current_x += char_mb_len;
/* If we've found it, stop moving backward through the current
* line. */
- if (!is_alnum_mbchar(char_mb))
+ if (!is_word_mbchar(char_mb))
break;
if (current_x == 0)
/* If we've found it, stop moving backward through the
* current line. */
- if (is_alnum_mbchar(char_mb))
+ if (is_word_mbchar(char_mb))
break;
if (current_x == 0)
/* If we've found it, stop moving backward through the
* current line. */
- if (!is_alnum_mbchar(char_mb))
+ if (!is_word_mbchar(char_mb))
break;
if (current_x == 0)
bool is_cntrl_wchar(wint_t wc);
#endif
bool is_cntrl_mbchar(const char *c);
+bool is_punct_mbchar(const char *c);
+bool is_word_mbchar(const char *c);
char control_rep(char c);
#ifdef NANO_WIDE
wchar_t control_wrep(wchar_t c);
parse_mbchar(buf + word_end, r, NULL, NULL);
/* If we're at the beginning of the line or the character before the
- * word isn't an alphanumeric character, and if we're at the end of
- * the line or the character after the word isn't an alphanumeric
- * character, we have a whole word. */
- retval = (pos == 0 || !is_alnum_mbchar(p)) &&
- (word_end == strlen(buf) || !is_alnum_mbchar(r));
+ * word isn't a "word" character, and if we're at the end of the
+ * line or the character after the word isn't a "word" character, we
+ * have a whole word. */
+ retval = (pos == 0 || !is_word_mbchar(p)) &&
+ (word_end == strlen(buf) || !is_word_mbchar(r));
free(p);
free(r);
/* If we've found it, stop moving forward through the current
* line. */
- if (!is_alnum_mbchar(char_mb))
+ if (!is_word_mbchar(char_mb))
break;
statusbar_x += char_mb_len;
/* If we've found it, stop moving forward through the current
* line. */
- if (is_alnum_mbchar(char_mb))
+ if (is_word_mbchar(char_mb))
break;
statusbar_x += char_mb_len;
/* If we've found it, stop moving backward through the current
* line. */
- if (!is_alnum_mbchar(char_mb))
+ if (!is_word_mbchar(char_mb))
break;
if (statusbar_x == 0)
/* If we've found it, stop moving backward through the current
* line. */
- if (is_alnum_mbchar(char_mb))
+ if (is_word_mbchar(char_mb))
break;
if (statusbar_x == 0)
/* If we've found it, stop moving backward through the
* current line. */
- if (!is_alnum_mbchar(char_mb))
+ if (!is_word_mbchar(char_mb))
break;
if (statusbar_x == 0)