tweaks: elide a function that is an amalgam of three others

In addition, the function was used just once, had a weird return value,
and now some more code can be excluded from a non-UTF8 build.

Make use of the fact that any single-byte character always occupies
just one column, and call the costly mbtowc() and wcwidth() only for
characters that actually are multibyte.
This commit is contained in:
Benno Schulenberg 2019-06-10 19:27:42 +02:00
parent c5955d14ce
commit cd09482231
3 changed files with 38 additions and 52 deletions

View File

@ -200,36 +200,6 @@ char control_mbrep(const char *c, bool isdata)
return control_rep(*c);
}
/* Assess how many bytes the given (multibyte) character occupies. Return -1
* if the byte sequence is invalid, and return the number of bytes minus 8
* when it encodes an invalid codepoint. Also, in the second parameter,
* return the number of columns that the character occupies. */
int length_of_char(const char *c, int *width)
{
#ifdef ENABLE_UTF8
if (use_utf8 && (signed char)*c < 0) {
wchar_t wc;
int charlen = mbtowc(&wc, c, MAXCHARLEN);
/* If the sequence is invalid... */
if (charlen < 0)
return -1;
/* If the codepoint is invalid... */
if (!is_valid_unicode(wc))
return charlen - 8;
else {
*width = wcwidth(wc);
/* If the codepoint is unassigned, assume a width of one. */
if (*width < 0)
*width = 1;
return charlen;
}
} else
#endif
return 1;
}
/* This function is equivalent to wcwidth() for multibyte characters. */
int mbwidth(const char *c)
{

View File

@ -210,7 +210,6 @@ bool is_ascii_cntrl_char(int c);
bool is_cntrl_mbchar(const char *c);
bool is_word_mbchar(const char *c, bool allow_punct);
char control_mbrep(const char *c, bool isdata);
int length_of_char(const char *c, int *width);
int mbwidth(const char *c);
char *make_mbchar(long chr, int *chr_mb_len);
int char_length(const char *pointer);

View File

@ -1922,7 +1922,7 @@ char *display_string(const char *buf, size_t column, size_t span,
}
while (*buf != '\0' && (column < beyond || mbwidth(buf) == 0)) {
int charlength, charwidth = 1;
int charlength, charwidth;
if (*buf == ' ') {
/* Show a space as a visible character, or as a space. */
@ -1961,7 +1961,7 @@ char *display_string(const char *buf, size_t column, size_t span,
continue;
}
charlength = length_of_char(buf, &charwidth);
charlength = mblen(buf, MAXCHARLEN);
/* If buf contains a control character, represent it. */
if (is_cntrl_mbchar(buf)) {
@ -1972,29 +1972,46 @@ char *display_string(const char *buf, size_t column, size_t span,
continue;
}
/* If buf contains a valid non-control character, simply copy it. */
if (charlength > 0) {
for (; charlength > 0; charlength--)
converted[index++] = *(buf++);
column += charwidth;
#ifdef USING_OLD_NCURSES
if (charwidth > 1)
seen_wide = TRUE;
#endif
/* A one-byte character is necessarily one column wide. */
if (charlength == 1) {
converted[index++] = *(buf++);
column++;
continue;
}
/* Represent an invalid starter byte with the Replacement Character. */
converted[index++] = '\xEF';
converted[index++] = '\xBF';
converted[index++] = '\xBD';
column++;
buf++;
#ifdef ENABLE_UTF8
/* For a multibyte character, check whether it is valid,
* and determine whether it occupies one or two columns. */
wchar_t wc;
int length = mbtowc(&wc, buf, MAXCHARLEN);
/* For invalid codepoints, skip extra bytes. */
if (charlength < -1)
buf += charlength + 7;
if (charlength != length)
die("Different character lengths");
/* When invalid, represent it with the Replacement Character. */
if (charlength < 0 || !is_valid_unicode(wc)) {
converted[index++] = '\xEF';
converted[index++] = '\xBF';
converted[index++] = '\xBD';
column++;
buf += (charlength > 0 ? charlength : 1);
continue;
}
/* For any valid character, just copy its bytes. */
for (; charlength > 0; charlength--)
converted[index++] = *(buf++);
charwidth = wcwidth(wc);
/* If the codepoint is unassigned, assume a width of one. */
column += (charwidth < 0 ? 1 : charwidth);
#ifdef USING_OLD_NCURSES
if (charwidth > 1)
seen_wide = TRUE;
#endif
#endif /* ENABLE_UTF8 */
}
/* If there is more text than can be shown, make room for the ">". */