It is ridiculous to truncate files on character conversions without

warning and a chance for recovery. This patch sets the handler to
copy the character, clear the error and proceed instead of bailing
out.

To replicate:
	- unset LANG
	- Create a file that has ~1000 lines. Put a single bad character
	- '\344' in it, around 2/3rds of the file down. Save it.
	- export LANG=en_US.UTF-8
	- edit the file. Notice there is no error for input conversion,
	  since nvi reads the file opportunistically.
	- :w Boom, the file is truncated.

Alternatively, you can put that character in the first line of the file,
and watch the fireworks. If you like to restore the previous behavior
compile with -DERROR_ON_CONVERT

XXX: Pullup to 6, 5 etc.
This commit is contained in:
christos 2013-01-23 18:51:51 +00:00
parent 674743a5f1
commit ab35868474
1 changed files with 25 additions and 8 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: conv.c,v 1.6 2009/01/18 03:45:50 lukem Exp $ */
/* $NetBSD: conv.c,v 1.7 2013/01/23 18:51:51 christos Exp $ */
/*-
* Copyright (c) 1993, 1994
@ -62,6 +62,21 @@ raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
return 0;
}
#ifndef ERROR_ON_CONVERT
#define HANDLE_ICONV_ERROR(o, i, ol, il) do { \
*o++ = *i++; \
ol--; il--; \
} while (/*CONSTCOND*/0)
#define HANDLE_MBR_ERROR(n, mbs, d, s) do { \
d = s; \
MEMSET(&mbs, 0, 1); \
n = 1; \
} while (/*CONSTCOND*/0)
#else
#define HANDLE_ICONV_ERROR goto err
#define HANDLE_MBR_ERROR goto err
#endif
#define CONV_BUFFER_SIZE 512
/* fill the buffer with codeset encoding of string pointed to by str
* left has the number of bytes left in str and is adjusted
@ -74,9 +89,9 @@ raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
char *bp = buffer; \
outleft = CONV_BUFFER_SIZE; \
errno = 0; \
if (iconv(id, (const char **)&str, &left, &bp, &outleft) == (size_t)-1 \
/* && errno != E2BIG */) \
goto err; \
if (iconv(id, (const char **)&str, &left, &bp, &outleft) \
== (size_t)-1 /* && errno != E2BIG */) \
HANDLE_ICONV_ERROR(bp, str, outleft, left); \
if ((len = CONV_BUFFER_SIZE - outleft) == 0) { \
error = -left; \
goto err; \
@ -120,7 +135,8 @@ default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
n = mbrtowc((*tostr)+i, src+j, len-j, &mbs);
/* NULL character converted */
if (n == (size_t)-2) error = -(len-j);
if (n == (size_t)-1 || n == (size_t)-2) goto err;
if (n == (size_t)-1 || n == (size_t)-2)
HANDLE_MBR_ERROR(n, mbs, (*tostr)[i], src[j]);
if (n == 0) n = 1;
j += n;
if (++i >= *blen) {
@ -243,8 +259,8 @@ default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
} \
errno = 0; \
if (iconv(id, &bp, &len, &obp, &outleft) == (size_t)-1 && \
errno != E2BIG) \
goto err; \
errno != E2BIG) \
HANDLE_ICONV_ERROR(obp, bp, outleft, len); \
offset = cw->blen1 - outleft; \
} \
} while (0)
@ -268,7 +284,8 @@ default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
for (i = 0, j = 0; i < (size_t)len; ++i) {
n = wcrtomb(dst+j, str[i], &mbs);
if (n == (size_t)-1) goto err;
if (n == (size_t)-1)
HANDLE_MBR_ERROR(n, mbs, dst[j], str[i]);
j += n;
if (buflen < j + MB_CUR_MAX) {
if (id != (iconv_t)-1) {