diff --git a/ChangeLog b/ChangeLog index 8003babc..31dec6ad 100644 --- a/ChangeLog +++ b/ChangeLog @@ -33,6 +33,10 @@ CVS code - do_replace() - Blank out last_replace properly again just before displaying the "Replace" prompt. (DLR, found by Mike Frysinger) +- winio.c: + display_string() + - Display invalid multibyte sequences as Unicode 0xFFFD + (Replacement Character). (DLR) - configure.ac: - Since we only use vsnprintf() now, remove the tests for snprintf(). (DLR) diff --git a/src/winio.c b/src/winio.c index 5e1d1904..5e9e932a 100644 --- a/src/winio.c +++ b/src/winio.c @@ -2265,10 +2265,18 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool /* The string we return. */ size_t index; /* Current position in converted. */ + bool bad_char; + /* Whether we have an invalid multibyte character. */ +#ifdef NANO_WIDE + const char *bad_buf_mb = "\xEF\xBF\xBD"; + /* What to display when we have an invalid multibyte + * character: Unicode 0xFFFD (Replacement Character). */ + const int bad_buf_mb_len = 3; + /* The length of bad_buf_mb. */ +#endif char *buf_mb = charalloc(mb_cur_max()); int buf_mb_len; - bool bad_char; /* If dollars is TRUE, make room for the "$" at the end of the * line. */ @@ -2329,6 +2337,7 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool buf_mb_len = parse_mbchar(buf + start_index, buf_mb, &bad_char, NULL); + /* If buf contains a tab character, interpret it. */ if (*buf_mb == '\t') { #if !defined(NANO_SMALL) && defined(ENABLE_NANORC) if (ISSET(WHITESPACE_DISPLAY)) { @@ -2344,25 +2353,38 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool converted[index++] = ' '; start_col++; } - /* If buf contains a control character, interpret it. If it - * contains an invalid multibyte control character, interpret - * that character as though it's a normal control character. */ + /* If buf contains a control character, interpret it. */ } else if (is_cntrl_mbchar(buf_mb)) { - char *ctrl_buf_mb = charalloc(mb_cur_max()); - int ctrl_buf_mb_len, i; + int i; converted[index++] = '^'; start_col++; - ctrl_buf_mb = control_mbrep(buf_mb, ctrl_buf_mb, - &ctrl_buf_mb_len); +#ifdef NANO_WIDE + /* If buf contains an invalid multibyte control character, + * display it as such. */ + if (ISSET(USE_UTF8) && bad_char) { + for (i = 0; i < bad_buf_mb_len; i++) + converted[index++] = bad_buf_mb[i]; - for (i = 0; i < ctrl_buf_mb_len; i++) - converted[index++] = ctrl_buf_mb[i]; + start_col += mbwidth(bad_buf_mb); + } else +#endif + { + char *ctrl_buf_mb = charalloc(mb_cur_max()); + int ctrl_buf_mb_len; - start_col += mbwidth(ctrl_buf_mb); + ctrl_buf_mb = control_mbrep(buf_mb, ctrl_buf_mb, + &ctrl_buf_mb_len); - free(ctrl_buf_mb); + for (i = 0; i < ctrl_buf_mb_len; i++) + converted[index++] = ctrl_buf_mb[i]; + + start_col += mbwidth(ctrl_buf_mb); + + free(ctrl_buf_mb); + } + /* If buf contains a space character, interpret it. */ } else if (*buf_mb == ' ') { #if !defined(NANO_SMALL) && defined(ENABLE_NANORC) if (ISSET(WHITESPACE_DISPLAY)) { @@ -2375,26 +2397,18 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool #endif converted[index++] = ' '; start_col++; + /* If buf contains a non-control character, interpret it. */ } else { int i; #ifdef NANO_WIDE /* If buf contains an invalid multibyte non-control - * character, interpret that character as though it's a - * normal non-control character. */ + * character, display it as such. */ if (ISSET(USE_UTF8) && bad_char) { - char *bad_buf_mb; - int bad_buf_mb_len; - - bad_buf_mb = make_mbchar((unsigned char)*buf_mb, - &bad_buf_mb_len); - for (i = 0; i < bad_buf_mb_len; i++) converted[index++] = bad_buf_mb[i]; start_col += mbwidth(bad_buf_mb); - - free(bad_buf_mb); } else { #endif for (i = 0; i < buf_mb_len; i++)