From 42abfe057e7880fb0fb15538b562d3d482b15b7d Mon Sep 17 00:00:00 2001 From: David Lawrence Ramsey Date: Sat, 22 Jan 2005 18:24:16 +0000 Subject: [PATCH] add multibyte equivalent of strcasestr(), plus some miscellaneous cleanups git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2293 35c25a1d-7b9e-4130-9fde-d3aeb78583b8 --- ChangeLog | 12 ++--- src/chars.c | 126 +++++++++++++++++++++++++++++++++++----------------- src/nano.h | 8 +--- src/proto.h | 1 + src/utils.c | 2 +- 5 files changed, 96 insertions(+), 53 deletions(-) diff --git a/ChangeLog b/ChangeLog index 7c64b4a3..eaaf9b1f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -97,8 +97,8 @@ CVS code - is_alnum_mbchar(), is_alnum_wchar(), is_blank_mbchar(), is_blank_wchar(), is_cntrl_mbchar(), is_cntrl_wchar(), control_mbrep(), control_wrep(), mbwidth(), mb_cur_max(), - make_mbchar(), mbstrnlen(), mbstrcasecmp(), and - mbstrncasecmp(); changes to is_byte() (moved to chars.c), + make_mbchar(), mbstrnlen(), mbstrcasecmp(), mbstrncasecmp(), + and mbstrcasestr(); changes to is_byte() (moved to chars.c), is_blank_char() (moved to chars.c), is_cntrl_char() (moved to chars.c), nstricmp() (renamed nstrcasecmp() and moved to chars.c), nstrnicmp() (renamed nstrncasecmp() and moved to @@ -110,8 +110,8 @@ CVS code - moved to chars.c), move_right() (renamed move_mbright() and moved to chars.c), do_home(), do_verbatim_input(), do_delete(), do_tab(), do_next_word(), do_prev_word(), - do_input(), do_output(), get_buffer(), unget_input(), - unget_kbinput(), get_input(), parse_kbinput(), + do_input(), do_output(), strstrwrapper(), get_buffer(), + unget_input(), unget_kbinput(), get_input(), parse_kbinput(), unparse_kbinput(), parse_verbatim_kbinput(), do_statusbar_input(), do_statusbar_home(), do_statusbar_verbatim_kbinput(), do_statusbar_output(), and @@ -137,7 +137,9 @@ CVS code - - Use void instead of RETSIGTYPE, as signal handlers are supposed to return void anyway. Also, the value of RETSIGTYPE is sometimes misdetected as int, leading to compilation - warnings or errors. (David Benbennick) + warnings or errors. Changes to cancel_fork(), + handle_hipterm(), do_suspend(), and do_cont(). (David + Benbennick) - cut.c: do_cut_text() - If keep_cutbuffer is FALSE, only blow away the text in the diff --git a/src/chars.c b/src/chars.c index f1dc5034..a68f8fad 100644 --- a/src/chars.c +++ b/src/chars.c @@ -30,13 +30,14 @@ #include "proto.h" #include "nano.h" -#if defined(HAVE_WCHAR_H) && defined(NANO_WIDE) +#ifdef NANO_WIDE +#ifdef HAVE_WCHAR_H #include #endif - -#if defined(HAVE_WCTYPE_H) && defined(NANO_WIDE) +#ifdef HAVE_WCTYPE_H #include #endif +#endif /* Return TRUE if the value of c is in byte range, and FALSE * otherwise. */ @@ -262,12 +263,11 @@ int mbwidth(const char *c) /* Return the maximum width in bytes of a multibyte character. */ int mb_cur_max(void) { + return #ifdef NANO_WIDE - if (!ISSET(NO_UTF8)) - return MB_CUR_MAX; - else + !ISSET(NO_UTF8) ? MB_CUR_MAX : #endif - return 1; + 1; } /* Convert the value in chr to a multibyte character with the same @@ -348,7 +348,11 @@ int parse_mbchar(const char *buf, char *chr * and the width in columns of its visible equivalent as * returned by control_rep(). */ else if (is_cntrl_mbchar(buf)) { - char *ctrl_buf_mb = charalloc(mb_cur_max()); + char *ctrl_buf_mb = +#ifdef NANO_WIDE + !ISSET(NO_UTF8) ? charalloc(MB_CUR_MAX) : +#endif + charalloc(1); int ctrl_buf_mb_len; (*col)++; @@ -452,19 +456,16 @@ int mbstrcasecmp(const char *s1, const char *s2) { #ifdef NANO_WIDE if (!ISSET(NO_UTF8)) { - char *s1_mb = charalloc(mb_cur_max()); - char *s2_mb = charalloc(mb_cur_max()); - int s1_mb_len, s2_mb_len; + char *s1_mb = charalloc(MB_CUR_MAX); + char *s2_mb = charalloc(MB_CUR_MAX); wchar_t ws1, ws2; assert(s1 != NULL && s2 != NULL); while (*s1 != '\0' && *s2 != '\0') { - s1_mb_len = parse_mbchar(s1, s1_mb -#ifdef NANO_WIDE - , NULL -#endif - , NULL); + int s1_mb_len, s2_mb_len; + + s1_mb_len = parse_mbchar(s1, s1_mb, NULL, NULL); if (mbtowc(&ws1, s1_mb, s1_mb_len) <= 0) { mbtowc(NULL, NULL, 0); @@ -472,11 +473,7 @@ int mbstrcasecmp(const char *s1, const char *s2) } - s2_mb_len = parse_mbchar(s2, s2_mb -#ifdef NANO_WIDE - , NULL -#endif - , NULL); + s2_mb_len = parse_mbchar(s2, s2_mb, NULL, NULL); if (mbtowc(&ws2, s2_mb, s2_mb_len) <= 0) { mbtowc(NULL, NULL, 0); @@ -529,30 +526,23 @@ int mbstrncasecmp(const char *s1, const char *s2, size_t n) { #ifdef NANO_WIDE if (!ISSET(NO_UTF8)) { - char *s1_mb = charalloc(mb_cur_max()); - char *s2_mb = charalloc(mb_cur_max()); - int s1_mb_len, s2_mb_len; + char *s1_mb = charalloc(MB_CUR_MAX); + char *s2_mb = charalloc(MB_CUR_MAX); wchar_t ws1, ws2; assert(s1 != NULL && s2 != NULL); while (n > 0 && *s1 != '\0' && *s2 != '\0') { - s1_mb_len = parse_mbchar(s1, s1_mb -#ifdef NANO_WIDE - , NULL -#endif - , NULL); + int s1_mb_len, s2_mb_len; + + s1_mb_len = parse_mbchar(s1, s1_mb, NULL, NULL); if (mbtowc(&ws1, s1_mb, s1_mb_len) <= 0) { mbtowc(NULL, NULL, 0); ws1 = (unsigned char)*s1_mb; } - s2_mb_len = parse_mbchar(s2, s2_mb -#ifdef NANO_WIDE - , NULL -#endif - , NULL); + s2_mb_len = parse_mbchar(s2, s2_mb, NULL, NULL); if (mbtowc(&ws2, s2_mb, s2_mb_len) <= 0) { mbtowc(NULL, NULL, 0); @@ -602,6 +592,66 @@ const char *nstrcasestr(const char *haystack, const char *needle) } #endif +/* This function is equivalent to strcasestr() for multibyte strings. */ +const char *mbstrcasestr(const char *haystack, const char *needle) +{ +#ifdef NANO_WIDE + if (!ISSET(NO_UTF8)) { + char *p_mb = charalloc(MB_CUR_MAX); + char *q_mb = charalloc(MB_CUR_MAX); + wchar_t wp, wq; + bool found_needle = FALSE; + + assert(haystack != NULL && needle != NULL); + + while (*haystack != '\0') { + const char *p = haystack, *q = needle; + int p_mb_len, q_mb_len; + + while (*q != '\0') { + p_mb_len = parse_mbchar(p, p_mb, NULL, NULL); + + if (mbtowc(&wp, p_mb, p_mb_len) <= 0) { + mbtowc(NULL, NULL, 0); + wp = (unsigned char)*p; + } + + q_mb_len = parse_mbchar(q, q_mb, NULL, NULL); + + if (mbtowc(&wq, q_mb, q_mb_len) <= 0) { + mbtowc(NULL, NULL, 0); + wq = (unsigned char)*q; + } + + if (towlower(wp) != towlower(wq)) + break; + + p += p_mb_len; + q += q_mb_len; + } + + if (*q == '\0') { + found_needle = TRUE; + break; + } + + haystack += parse_mbchar(haystack, NULL, NULL, NULL); + } + + free(p_mb); + free(q_mb); + + return found_needle ? haystack : NULL; + } else +#endif + return +#ifdef HAVE_STRCASESTR + strcasestr(haystack, needle); +#else + nstrcasestr(haystack, needle); +#endif +} + #ifndef NANO_SMALL /* This function is equivalent to strstr(), except in that it scans the * string in reverse. */ @@ -667,15 +717,11 @@ size_t mbstrnlen(const char *s, size_t maxlen) #ifdef NANO_WIDE if (!ISSET(NO_UTF8)) { size_t n = 0; - char *s_mb = charalloc(mb_cur_max()); + char *s_mb = charalloc(MB_CUR_MAX); int s_mb_len; while (*s != '\0') { - s_mb_len = parse_mbchar(s + n, s_mb -#ifdef NANO_WIDE - , NULL -#endif - , NULL); + s_mb_len = parse_mbchar(s + n, s_mb, NULL, NULL); if (maxlen == 0) break; diff --git a/src/nano.h b/src/nano.h index b8814d5a..345839a6 100644 --- a/src/nano.h +++ b/src/nano.h @@ -100,16 +100,10 @@ #endif #endif -/* If no strcasestr(), getdelim(), or getline(), use the versions we - * have. */ -#ifndef HAVE_STRCASESTR -#define strcasestr nstrcasestr -#endif - +/* If no getdelim() or getline(), use the versions we have. */ #ifndef HAVE_GETDELIM #define getdelim ngetdelim #endif - #ifndef HAVE_GETLINE #define getline ngetline #endif diff --git a/src/proto.h b/src/proto.h index 0b1581e9..dd078580 100644 --- a/src/proto.h +++ b/src/proto.h @@ -193,6 +193,7 @@ int mbstrncasecmp(const char *s1, const char *s2, size_t n); #ifndef HAVE_STRCASESTR const char *nstrcasestr(const char *haystack, const char *needle); #endif +const char *mbstrcasestr(const char *haystack, const char *needle); #ifndef NANO_SMALL const char *revstrstr(const char *haystack, const char *needle, const char *rev_start); diff --git a/src/utils.c b/src/utils.c index 9f7d2005..cbd67820 100644 --- a/src/utils.c +++ b/src/utils.c @@ -245,7 +245,7 @@ const char *strstrwrapper(const char *haystack, const char *needle, else if (ISSET(REVERSE_SEARCH)) return revstrcasestr(haystack, needle, start); #endif - return strcasestr(start, needle); + return mbstrcasestr(start, needle); } /* This is a wrapper for the perror() function. The wrapper takes care -- 2.39.5