- color.c:
- Remove unneeded string.h and fcntl.h includes. (DLR)
- chars.c:
+ mbrep()
+ - New function, the equivalent of control_mbrep() for non-control
+ characters. (DLR)
+ parse_mbchar()
+ - Remove now-unneeded bad_chr parameter. (DLR)
mbstrchr()
- Don't count matches between valid and invalid multibyte
sequences anymore, for consistency. (DLR)
(DLR)
- Move stdlib.h, dirent.h, regex.h, and assert.h includes here,
as every source file needs them. (DLR)
- proto.h:
- - Add declarations for bad_mbchar and bad_mbchar_len, so that we
- can use them in display_string() as well as chars.c. (DLR)
- rcfile.c:
nregcomp()
- Return TRUE when the compilation succeeds and FALSE otherwise,
the number of lines and characters in the file or selection,
as wc does. (DLR)
- winio.c:
+ display_string()
+ - Instead of using parse_mbchar()'s bad_chr parameter, use
+ mbrep() to get the representation of a bad character. (DLR)
edit_redraw(), edit_refresh()
- Clean up and simplify. (DLR)
edit_update()
* Unicode FFFD (Replacement Character), unless we're
* determining if it's a control character or searching for a
* match to it. */
-const char *bad_mbchar = "\xEF\xBF\xBD";
-const int bad_mbchar_len = 3;
+static const char *bad_mbchar = "\xEF\xBF\xBD";
+static const int bad_mbchar_len = 3;
#endif
#ifndef HAVE_ISBLANK
return crep;
}
+/* c is a multibyte non-control character. We return that multibyte
+ * character. */
+char *mbrep(const char *c, char *crep, int *crep_len)
+{
+ assert(c != NULL && crep != NULL && crep_len != NULL);
+
+#ifdef ENABLE_UTF8
+ if (ISSET(USE_UTF8)) {
+ wchar_t wc;
+
+ if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
+ mbtowc(NULL, NULL, 0);
+ crep = (char *)bad_mbchar;
+ *crep_len = bad_mbchar_len;
+ } else {
+ *crep_len = wctomb(crep, wc);
+
+ if (*crep_len < 0) {
+ wctomb(NULL, 0);
+ *crep_len = 0;
+ }
+ }
+ } else {
+#endif
+ *crep_len = 1;
+ *crep = *c;
+#ifdef ENABLE_UTF8
+ }
+#endif
+
+ return crep;
+}
+
/* This function is equivalent to wcwidth() for multibyte characters. */
int mbwidth(const char *c)
{
/* Parse a multibyte character from buf. Return the number of bytes
* used. If chr isn't NULL, store the multibyte character in it. If
- * bad_chr isn't NULL, set it to TRUE if we have a bad multibyte
- * character. If col isn't NULL, store the new display width in it. If
- * *str is '\t', we expect col to have the current display width. */
-int parse_mbchar(const char *buf, char *chr, bool *bad_chr, size_t
- *col)
+ * col isn't NULL, store the new display width in it. If *buf is '\t',
+ * we expect col to have the current display width. */
+int parse_mbchar(const char *buf, char *chr, size_t *col)
{
int buf_mb_len;
assert(buf != NULL);
- if (bad_chr != NULL)
- *bad_chr = FALSE;
-
#ifdef ENABLE_UTF8
if (ISSET(USE_UTF8)) {
/* Get the number of bytes in the multibyte character. */
* to TRUE and interpret buf's first byte. */
if (buf_mb_len < 0) {
mblen(NULL, 0);
- if (bad_chr != NULL)
- *bad_chr = TRUE;
buf_mb_len = 1;
} else if (buf_mb_len == 0)
buf_mb_len++;
/* There is no library function to move backward one multibyte
* character. Here is the naive, O(pos) way to do it. */
while (TRUE) {
- int buf_mb_len = parse_mbchar(buf + pos - pos_prev, NULL, NULL,
- NULL);
+ int buf_mb_len = parse_mbchar(buf + pos - pos_prev, NULL, NULL);
if (pos_prev <= (size_t)buf_mb_len)
break;
* after the one at pos. */
size_t move_mbright(const char *buf, size_t pos)
{
- return pos + parse_mbchar(buf + pos, NULL, NULL, NULL);
+ return pos + parse_mbchar(buf + pos, NULL, NULL);
}
#ifndef HAVE_STRCASECMP
bool bad_s1_mb = FALSE, bad_s2_mb = FALSE;
int s1_mb_len, s2_mb_len;
- s1_mb_len = parse_mbchar(s1, s1_mb, NULL, NULL);
+ s1_mb_len = parse_mbchar(s1, s1_mb, NULL);
if (mbtowc(&ws1, s1_mb, s1_mb_len) < 0) {
mbtowc(NULL, NULL, 0);
bad_s1_mb = TRUE;
}
- s2_mb_len = parse_mbchar(s2, s2_mb, NULL, NULL);
+ s2_mb_len = parse_mbchar(s2, s2_mb, NULL);
if (mbtowc(&ws2, s2_mb, s2_mb_len) < 0) {
mbtowc(NULL, NULL, 0);
while (*q != '\0') {
bool bad_r_mb = FALSE, bad_q_mb = FALSE;
- r_mb_len = parse_mbchar(r, r_mb, NULL, NULL);
+ r_mb_len = parse_mbchar(r, r_mb, NULL);
if (mbtowc(&wr, r_mb, r_mb_len) < 0) {
mbtowc(NULL, NULL, 0);
bad_r_mb = TRUE;
}
- q_mb_len = parse_mbchar(q, q_mb, NULL, NULL);
+ q_mb_len = parse_mbchar(q, q_mb, NULL);
if (mbtowc(&wq, q_mb, q_mb_len) < 0) {
mbtowc(NULL, NULL, 0);
while (*q != '\0') {
bool bad_r_mb = FALSE, bad_q_mb = FALSE;
- r_mb_len = parse_mbchar(r, r_mb, NULL, NULL);
+ r_mb_len = parse_mbchar(r, r_mb, NULL);
if (mbtowc(&wr, r_mb, r_mb_len) < 0) {
mbtowc(NULL, NULL, 0);
bad_r_mb = TRUE;
}
- q_mb_len = parse_mbchar(q, q_mb, NULL, NULL);
+ q_mb_len = parse_mbchar(q, q_mb, NULL);
if (mbtowc(&wq, q_mb, q_mb_len) < 0) {
mbtowc(NULL, NULL, 0);
int s_mb_len;
while (*s != '\0') {
- s_mb_len = parse_mbchar(s, NULL, NULL, NULL);
+ s_mb_len = parse_mbchar(s, NULL, NULL);
if (maxlen == 0)
break;
}
while (*s != '\0') {
- int s_mb_len = parse_mbchar(s, s_mb, NULL, NULL);
+ int s_mb_len = parse_mbchar(s, s_mb, NULL);
if (mbtowc(&ws, s_mb, s_mb_len) < 0) {
mbtowc(NULL, NULL, 0);
while (*s != '\0') {
int chr_mb_len;
- chr_mb_len = parse_mbchar(s, chr_mb, NULL, NULL);
+ chr_mb_len = parse_mbchar(s, chr_mb, NULL);
if (is_blank_mbchar(chr_mb)) {
retval = TRUE;
/* Get the number of single-byte characters that all the
* matches have in common. */
match1_mb_len = parse_mbchar(matches[0] + common_len,
- match1_mb, NULL, NULL);
+ match1_mb, NULL);
match2_mb_len = parse_mbchar(matches[match] +
- common_len, match2_mb, NULL, NULL);
+ common_len, match2_mb, NULL);
match1_mb[match1_mb_len] = '\0';
match2_mb[match2_mb_len] = '\0';
if (strcmp(match1_mb, match2_mb) != 0)
if (match < num_matches || matches[0][common_len] == '\0')
break;
- common_len += parse_mbchar(buf + common_len, NULL, NULL,
- NULL);
+ common_len += parse_mbchar(buf + common_len, NULL, NULL);
}
free(match1_mb);
* the current word. */
while (!end_line) {
char_mb_len = parse_mbchar(openfile->current->data +
- openfile->current_x, char_mb, NULL, NULL);
+ openfile->current_x, char_mb, NULL);
/* If we've found it, stop moving forward through the current
* line. */
openfile->current = openfile->current->next) {
while (!end_line) {
char_mb_len = parse_mbchar(openfile->current->data +
- openfile->current_x, char_mb, NULL, NULL);
+ openfile->current_x, char_mb, NULL);
/* If we've found it, stop moving forward through the
* current line. */
* of the current word. */
while (!begin_line) {
char_mb_len = parse_mbchar(openfile->current->data +
- openfile->current_x, char_mb, NULL, NULL);
+ openfile->current_x, char_mb, NULL);
/* If we've found it, stop moving backward through the current
* line. */
openfile->current = openfile->current->prev) {
while (!begin_line) {
char_mb_len = parse_mbchar(openfile->current->data +
- openfile->current_x, char_mb, NULL, NULL);
+ openfile->current_x, char_mb, NULL);
/* If we've found it, stop moving backward through the
* current line. */
openfile->current_x);
while (!begin_line) {
- char_mb_len =
- parse_mbchar(openfile->current->data +
- openfile->current_x, char_mb, NULL, NULL);
+ char_mb_len = parse_mbchar(openfile->current->data +
+ openfile->current_x, char_mb, NULL);
/* If we've found it, stop moving backward through the
* current line. */
}
}
- /* Interpret the next multibyte character. If it's an invalid
- * multibyte character, interpret it as though it's a byte
- * character. */
- char_buf_len = parse_mbchar(output + i, char_buf, NULL, NULL);
+ /* Interpret the next multibyte character. */
+ char_buf_len = parse_mbchar(output + i, char_buf, NULL);
i += char_buf_len;
extern char *homedir;
-#ifdef ENABLE_UTF8
-extern const char *bad_mbchar;
-extern const int bad_mbchar_len;
-#endif
-
/* The functions we want available. */
/* Public functions in chars.c. */
wchar_t control_wrep(wchar_t c);
#endif
char *control_mbrep(const char *c, char *crep, int *crep_len);
+char *mbrep(const char *c, char *crep, int *crep_len);
int mbwidth(const char *c);
int mb_cur_max(void);
char *make_mbchar(int chr, int *chr_mb_len);
-int parse_mbchar(const char *buf, char *chr, bool *bad_chr, size_t
- *col);
+int parse_mbchar(const char *buf, char *chr, size_t *col);
size_t move_mbleft(const char *buf, size_t pos);
size_t move_mbright(const char *buf, size_t pos);
#ifndef HAVE_STRCASECMP
} else {
whitespace_len[0] =
parse_mbchar(whitespace, NULL,
- NULL, NULL);
+ NULL);
whitespace_len[1] =
parse_mbchar(whitespace +
- whitespace_len[0], NULL,
- NULL, NULL);
+ whitespace_len[0], NULL, NULL);
}
} else
#endif
if (openfile->current->data[openfile->current_x] != '\0') {
int char_buf_len = parse_mbchar(openfile->current->data +
- openfile->current_x, NULL, NULL, NULL);
+ openfile->current_x, NULL, NULL);
size_t line_len = strlen(openfile->current->data +
openfile->current_x);
while (*line != '\0' && goal >= 0) {
size_t pos = 0;
- line_len = parse_mbchar(line, NULL, NULL, &pos);
+ line_len = parse_mbchar(line, NULL, &pos);
if (is_blank_mbchar(line) || (newline && *line == '\n')) {
blank_loc = cur_loc;
bool found_blank = FALSE;
while (*line != '\0') {
- line_len = parse_mbchar(line, NULL, NULL, NULL);
+ line_len = parse_mbchar(line, NULL, NULL);
if (is_blank_mbchar(line) || (newline && *line == '\n')) {
if (!found_blank)
/* Move to the last blank after blank_loc, if there is one. */
line -= cur_loc;
line += blank_loc;
- line_len = parse_mbchar(line, NULL, NULL, NULL);
+ line_len = parse_mbchar(line, NULL, NULL);
line += line_len;
while (*line != '\0' && (is_blank_mbchar(line) ||
(newline && *line == '\n'))) {
- line_len = parse_mbchar(line, NULL, NULL, NULL);
+ line_len = parse_mbchar(line, NULL, NULL);
line += line_len;
blank_loc += line_len;
blank_mb = charalloc(mb_cur_max());
while (*line != '\0') {
- blank_mb_len = parse_mbchar(line, blank_mb, NULL, NULL);
+ blank_mb_len = parse_mbchar(line, blank_mb, NULL);
if (!is_blank_mbchar(blank_mb))
break;
/* If this character is blank, make sure that it's a space with
* no blanks after it. */
if (is_blank_mbchar(end)) {
- end_len = parse_mbchar(end, NULL, NULL, NULL);
+ end_len = parse_mbchar(end, NULL, NULL);
*new_end = ' ';
new_end++;
end += end_len;
while (*end != '\0' && is_blank_mbchar(end)) {
- end_len = parse_mbchar(end, NULL, NULL, NULL);
+ end_len = parse_mbchar(end, NULL, NULL);
end += end_len;
shift += end_len;
* more than two blanks after it, and make sure that the blanks
* are spaces. */
} else if (mbstrchr(punct, end) != NULL) {
- end_len = parse_mbchar(end, NULL, NULL, NULL);
+ end_len = parse_mbchar(end, NULL, NULL);
while (end_len > 0) {
*new_end = *end;
}
if (*end != '\0' && mbstrchr(brackets, end) != NULL) {
- end_len = parse_mbchar(end, NULL, NULL, NULL);
+ end_len = parse_mbchar(end, NULL, NULL);
while (end_len > 0) {
*new_end = *end;
}
if (*end != '\0' && is_blank_mbchar(end)) {
- end_len = parse_mbchar(end, NULL, NULL, NULL);
+ end_len = parse_mbchar(end, NULL, NULL);
*new_end = ' ';
new_end++;
}
if (*end != '\0' && is_blank_mbchar(end)) {
- end_len = parse_mbchar(end, NULL, NULL, NULL);
+ end_len = parse_mbchar(end, NULL, NULL);
*new_end = ' ';
new_end++;
}
while (*end != '\0' && is_blank_mbchar(end)) {
- end_len = parse_mbchar(end, NULL, NULL, NULL);
+ end_len = parse_mbchar(end, NULL, NULL);
end += end_len;
shift += end_len;
/* If this character is neither blank nor punctuation, leave it
* alone. */
} else {
- end_len = parse_mbchar(end, NULL, NULL, NULL);
+ end_len = parse_mbchar(end, NULL, NULL);
while (end_len > 0) {
*new_end = *end;
assert(buf != NULL && pos <= strlen(buf) && word != NULL);
- parse_mbchar(buf + move_mbleft(buf, pos), p, NULL, NULL);
- parse_mbchar(buf + word_end, r, NULL, NULL);
+ parse_mbchar(buf + move_mbleft(buf, pos), p, NULL);
+ parse_mbchar(buf + word_end, r, NULL);
/* If we're at the beginning of the line or the character before the
* word isn't a non-punctuation "word" character, and if we're at
}
}
- /* Interpret the next multibyte character. If it's an invalid
- * multibyte character, interpret it as though it's a byte
- * character. */
- char_buf_len = parse_mbchar(output + i, char_buf, NULL, NULL);
+ /* Interpret the next multibyte character. */
+ char_buf_len = parse_mbchar(output + i, char_buf, NULL);
i += char_buf_len;
{
if (answer[statusbar_x] != '\0') {
int char_buf_len = parse_mbchar(answer + statusbar_x, NULL,
- NULL, NULL);
+ NULL);
size_t line_len = strlen(answer + statusbar_x);
assert(statusbar_x < strlen(answer));
/* Move forward until we find the character after the last letter of
* the current word. */
while (!end_line) {
- char_mb_len = parse_mbchar(answer + statusbar_x, char_mb, NULL,
- NULL);
+ char_mb_len = parse_mbchar(answer + statusbar_x, char_mb, NULL);
/* If we've found it, stop moving forward through the current
* line. */
statusbar_x += char_mb_len;
while (!end_line) {
- char_mb_len = parse_mbchar(answer + statusbar_x, char_mb, NULL,
- NULL);
+ char_mb_len = parse_mbchar(answer + statusbar_x, char_mb, NULL);
/* If we've found it, stop moving forward through the current
* line. */
/* Move backward until we find the character before the first letter
* of the current word. */
while (!begin_line) {
- char_mb_len = parse_mbchar(answer + statusbar_x, char_mb, NULL,
- NULL);
+ char_mb_len = parse_mbchar(answer + statusbar_x, char_mb, NULL);
/* If we've found it, stop moving backward through the current
* line. */
statusbar_x = move_mbleft(answer, statusbar_x);
while (!begin_line) {
- char_mb_len = parse_mbchar(answer + statusbar_x, char_mb, NULL,
- NULL);
+ char_mb_len = parse_mbchar(answer + statusbar_x, char_mb, NULL);
/* If we've found it, stop moving backward through the current
* line. */
while (!begin_line) {
char_mb_len = parse_mbchar(answer + statusbar_x, char_mb,
- NULL, NULL);
+ NULL);
/* If we've found it, stop moving backward through the
* current line. */
assert(str != NULL);
while (*str != '\0') {
- int str_len = parse_mbchar(str, NULL, NULL, &length);
+ int str_len = parse_mbchar(str, NULL, &length);
if (length > xplus)
break;
assert(str != NULL);
while (*str != '\0') {
- int str_len = parse_mbchar(str, NULL, NULL, &length);
+ int str_len = parse_mbchar(str, NULL, &length);
str += str_len;
/* The string we return. */
size_t index;
/* Current position in converted. */
- bool bad_char;
- /* Whether we have an invalid multibyte character. */
char *buf_mb = charalloc(mb_cur_max());
int buf_mb_len;
buf[start_index] != '\t')) {
/* We don't display all of buf[start_index] since it starts to
* the left of the screen. */
- buf_mb_len = parse_mbchar(buf + start_index, buf_mb, NULL,
- NULL);
+ buf_mb_len = parse_mbchar(buf + start_index, buf_mb, NULL);
if (is_cntrl_mbchar(buf_mb)) {
if (column < start_col) {
}
while (index < alloc_len - 1 && buf[start_index] != '\0') {
- buf_mb_len = parse_mbchar(buf + start_index, buf_mb, &bad_char,
- NULL);
+ buf_mb_len = parse_mbchar(buf + start_index, buf_mb, NULL);
/* If buf contains a tab character, interpret it. */
if (*buf_mb == '\t') {
#endif
converted[index++] = ' ';
start_col++;
- /* If buf contains a non-control character, interpret it. */
+ /* If buf contains a non-control character, interpret it. If
+ * buf contains an invalid multibyte non-control character,
+ * display it as such. */
} else {
- int i;
+ char *nctrl_buf_mb = charalloc(mb_cur_max());
+ int nctrl_buf_mb_len, i;
-#ifdef ENABLE_UTF8
- /* If buf contains an invalid multibyte non-control
- * character, display it as such. */
- if (ISSET(USE_UTF8) && bad_char) {
- for (i = 0; i < bad_mbchar_len; i++)
- converted[index++] = bad_mbchar[i];
+ nctrl_buf_mb = mbrep(buf_mb, nctrl_buf_mb,
+ &nctrl_buf_mb_len);
- start_col += mbwidth(bad_mbchar);
- } else {
-#endif
- for (i = 0; i < buf_mb_len; i++)
- converted[index++] = buf[start_index + i];
+ for (i = 0; i < nctrl_buf_mb_len; i++)
+ converted[index++] = nctrl_buf_mb[i];
- start_col += mbwidth(buf_mb);
-#ifdef ENABLE_UTF8
- }
-#endif
+ start_col += mbwidth(nctrl_buf_mb);
+
+ free(nctrl_buf_mb);
}
start_index += buf_mb_len;