From 8c64f063a1603a4396fc3a3a61188d37f00d48ae Mon Sep 17 00:00:00 2001 From: wiz Date: Tue, 5 Jun 2007 17:48:19 +0000 Subject: [PATCH] Apply SODA Nuriyuki's patch for multibyte support. Based on soda-revised2.patch (in private mail) with a change from soda on tech-userlevel. Ok christos@. --- bin/ls/util.c | 87 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 77 insertions(+), 10 deletions(-) diff --git a/bin/ls/util.c b/bin/ls/util.c index dec71d750b2f..9cf586b61f0b 100644 --- a/bin/ls/util.c +++ b/bin/ls/util.c @@ -1,4 +1,4 @@ -/* $NetBSD: util.c,v 1.30 2006/12/14 14:15:26 christos Exp $ */ +/* $NetBSD: util.c,v 1.31 2007/06/05 17:48:19 wiz Exp $ */ /* * Copyright (c) 1989, 1993, 1994 @@ -37,14 +37,13 @@ #if 0 static char sccsid[] = "@(#)util.c 8.5 (Berkeley) 4/28/95"; #else -__RCSID("$NetBSD: util.c,v 1.30 2006/12/14 14:15:26 christos Exp $"); +__RCSID("$NetBSD: util.c,v 1.31 2007/06/05 17:48:19 wiz Exp $"); #endif #endif /* not lint */ #include #include -#include #include #include #include @@ -52,6 +51,8 @@ __RCSID("$NetBSD: util.c,v 1.30 2006/12/14 14:15:26 christos Exp $"); #include #include #include +#include +#include #include "ls.h" #include "extern.h" @@ -84,17 +85,83 @@ safe_print(const char *src) /* NOTREACHED */ } +/* + * The reasons why we don't use putwchar(wc) here are: + * - If wc == L'\0', we need to restore the initial shift state, but + * the C language standard doesn't say that putwchar(L'\0') does. + * - It isn't portable to mix a wide-oriented function (i.e. getwchar) + * with byte-oriented functions (printf et al.) in same FILE. + */ +static int +printwc(wchar_t wc, mbstate_t *pst) +{ + size_t size; + char buf[MB_LEN_MAX]; + + size = wcrtomb(buf, wc, pst); + if (size == (size_t)-1) /* This shouldn't happen, but for sure */ + return 0; + if (wc == L'\0') { + /* The following condition must be always true, but for sure */ + if (size > 0 && buf[size - 1] == '\0') + --size; + } + if (size > 0) + fwrite(buf, 1, size, stdout); + return wc == L'\0' ? 0 : wcwidth(wc); +} + int printescaped(const char *src) { - unsigned char c; - int n; + int n = 0; + mbstate_t src_state, stdout_state; + /* The following +1 is to pass '\0' at the end of src to mbrtowc(). */ + const char *endptr = src + strlen(src) + 1; - for (n = 0; (c = *src) != '\0'; ++src, ++n) - if (isprint(c)) - (void)putchar(c); - else - (void)putchar('?'); + /* + * We have to reset src_state each time in this function, because + * the codeset of src pathname may not match with current locale. + * Note that if we pass NULL instead of src_state to mbrtowc(), + * there is no way to reset the state. + */ + memset(&src_state, 0, sizeof(src_state)); + memset(&stdout_state, 0, sizeof(stdout_state)); + while (src < endptr) { + wchar_t wc; + size_t rv, span = endptr - src; + +#if 0 +/* soda says: + * Comment this out, because if there are redundant escape sequences + * which exceed 32 bytes, our current implementation doesn't display + * the pathname correctly with above. + */ + if (span > MB_CUR_MAX) + span = MB_CUR_MAX; +#endif + rv = mbrtowc(&wc, src, span, &src_state); + if (rv == 0) { /* assert(wc == L'\0'); */ + /* The following may output a shift sequence. */ + n += printwc(wc, &stdout_state); + break; + } + if (rv == (size_t)-1) { /* probably errno == EILSEQ */ + n += printwc(L'?', &stdout_state); + /* try to skip 1byte, because there is no better way */ + src++; + memset(&src_state, 0, sizeof(src_state)); + } else if (rv == (size_t)-2) { + if (span < MB_CUR_MAX) { /* incomplete char */ + n += printwc(L'?', &stdout_state); + break; + } + src += span; /* a redundant shift sequence? */ + } else { + n += printwc(iswprint(wc) ? wc : L'?', &stdout_state); + src += rv; + } + } return n; }