chars: speed up the determination whether something is a control character

Use knowledge of UTF-8 instead of converting to wide characters first.
This commit is contained in:
Benno Schulenberg 2016-06-29 20:48:04 +02:00
parent 019d7b34ca
commit af53c56ec8
2 changed files with 3 additions and 23 deletions

View File

@ -150,20 +150,9 @@ bool is_ascii_cntrl_char(int c)
* handles high-bit control characters. */
bool is_cntrl_char(int c)
{
return (-128 <= c && c < -96) || (0 <= c && c < 32) ||
(127 <= c && c < 160);
return ((c & 0x60) == 0 || c == 127);
}
#ifdef ENABLE_UTF8
/* This function is equivalent to iscntrl() for wide characters, except
* in that it also handles wide control characters with their high bits
* set. */
bool is_cntrl_wchar(wchar_t wc)
{
return (0 <= wc && wc < 32) || (127 <= wc && wc < 160);
}
#endif
/* This function is equivalent to iscntrl() for multibyte characters,
* except in that it also handles multibyte control characters with
* their high bits set. */
@ -173,14 +162,8 @@ bool is_cntrl_mbchar(const char *c)
#ifdef ENABLE_UTF8
if (use_utf8) {
wchar_t wc;
if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
mbtowc_reset();
wc = bad_wchar;
}
return is_cntrl_wchar(wc);
return ((c[0] & 0xE0) == 0 || c[0] == 127 ||
((signed char)c[0] == -62 && (signed char)c[1] < -96));
} else
#endif
return is_cntrl_char((unsigned char)*c);

View File

@ -183,9 +183,6 @@ bool is_alnum_mbchar(const char *c);
bool is_blank_mbchar(const char *c);
bool is_ascii_cntrl_char(int c);
bool is_cntrl_char(int c);
#ifdef ENABLE_UTF8
bool is_cntrl_wchar(wchar_t wc);
#endif
bool is_cntrl_mbchar(const char *c);
bool is_punct_mbchar(const char *c);
bool is_word_mbchar(const char *c, bool allow_punct);