From b5e98a79d9c29a3dfb6d9053f955c56004e09cc2 Mon Sep 17 00:00:00 2001 From: martin Date: Sat, 9 Dec 2023 13:10:16 +0000 Subject: [PATCH] Pull up following revision(s) (requested by riastradh in ticket #1923): lib/libc/gen/vis.c: revision 1.75-1.86 tests/lib/libc/gen/t_vis.c: revision 1.10-1.14 PR 56260: fix out-of-bounds stack read. vis(3): Avoid nonportable MIN in portable code. vis(3) tests: Add xfail test for encoding overflow. From Kyle Evans . PR lib/57573 vis(3) tests: Expand tests and diagnostic outputs on failure. PR lib/57573 vis(3) tests: Test another overflow edge case. Related to PR lib/57573. vis(3): Make maxolen unsigned size_t, not ssize_t. It is initialized once either to *dlen, which is unsigned size_t, or to wcslen(start) * MB_MAX_LEN + 1, and wcslen returns unsigned size_t too. So there appears to have never been any reason for this to be signed. Part of PR lib/57573. vis(3): Make mbslength unsigned. Sprinkle assertions and comments justifying the proposition that it would never go negative if signed. Obviates need to worry about mblength > SSIZE_MAX. Prompted by PR lib/57573. vis(3): Avoid arithmetic overflow before calloc(3). Prompted by PR lib/57573. vis(3): Call wcslen(start) only once. It had better not change between these two times! Prompted by PR lib/57573. vis(3): Avoid potential arithmetic overflow in maxolen. Can't easily prove that this overflow is impossible, so let's add a check. Prompted by PR lib/57573. vis(3): Fix main part of PR lib/57573. From Kyle Evans . vis(3): Fix one more buffer overrun in an edge case. PR lib/57573 vis(3): Sort includes. No functional change intended. Prompted by PR lib/57573. vis(3): Need for SIZE_MAX, per C standard. From Kyle Evans . Followup to PR lib/57573. vis(3): Per KNF, sys/param.h comes before sys/types.h. Which is nice because that's also lexicographic. --- lib/libc/gen/vis.c | 122 +++++++++++++++++++++++++++++++------ tests/lib/libc/gen/t_vis.c | 98 ++++++++++++++++++++++++++++- 2 files changed, 202 insertions(+), 18 deletions(-) diff --git a/lib/libc/gen/vis.c b/lib/libc/gen/vis.c index 5b4334b1b7d6..b203f3707889 100644 --- a/lib/libc/gen/vis.c +++ b/lib/libc/gen/vis.c @@ -1,4 +1,4 @@ -/* $NetBSD: vis.c,v 1.73.4.1 2018/01/16 14:15:50 martin Exp $ */ +/* $NetBSD: vis.c,v 1.73.4.2 2023/12/09 13:10:16 martin Exp $ */ /*- * Copyright (c) 1989, 1993 @@ -57,7 +57,7 @@ #include #if defined(LIBC_SCCS) && !defined(lint) -__RCSID("$NetBSD: vis.c,v 1.73.4.1 2018/01/16 14:15:50 martin Exp $"); +__RCSID("$NetBSD: vis.c,v 1.73.4.2 2023/12/09 13:10:16 martin Exp $"); #endif /* LIBC_SCCS and not lint */ #ifdef __FBSDID __FBSDID("$FreeBSD$"); @@ -65,13 +65,15 @@ __FBSDID("$FreeBSD$"); #endif #include "namespace.h" -#include + #include +#include #include -#include #include +#include #include +#include #include #include @@ -353,12 +355,15 @@ makeextralist(int flags, const char *src) wchar_t *dst, *d; size_t len; const wchar_t *s; + mbstate_t mbstate; len = strlen(src); if ((dst = calloc(len + MAXEXTRAS, sizeof(*dst))) == NULL) return NULL; - if ((flags & VIS_NOLOCALE) || mbstowcs(dst, src, len) == (size_t)-1) { + memset(&mbstate, 0, sizeof(mbstate)); + if ((flags & VIS_NOLOCALE) + || mbsrtowcs(dst, &src, len, &mbstate) == (size_t)-1) { size_t i; for (i = 0; i < len; i++) dst[i] = (wchar_t)(u_char)src[i]; @@ -393,20 +398,23 @@ static int istrsenvisx(char **mbdstp, size_t *dlen, const char *mbsrc, size_t mblength, int flags, const char *mbextra, int *cerr_ptr) { + char mbbuf[MB_LEN_MAX]; wchar_t *dst, *src, *pdst, *psrc, *start, *extra; size_t len, olen; uint64_t bmsk, wmsk; wint_t c; visfun_t f; int clen = 0, cerr, error = -1, i, shft; - char *mbdst, *mdst; - ssize_t mbslength, maxolen; + char *mbdst, *mbwrite, *mdst; + size_t mbslength; + size_t maxolen; + mbstate_t mbstate; _DIAGASSERT(mbdstp != NULL); _DIAGASSERT(mbsrc != NULL || mblength == 0); _DIAGASSERT(mbextra != NULL); - mbslength = (ssize_t)mblength; + mbslength = mblength; /* * When inputing a single character, must also read in the * next character for nextc, the look-ahead character. @@ -427,6 +435,14 @@ istrsenvisx(char **mbdstp, size_t *dlen, const char *mbsrc, size_t mblength, * return to the caller. */ + /* + * Guarantee the arithmetic on input to calloc won't overflow. + */ + if (mbslength > (SIZE_MAX - 1)/16) { + errno = ENOMEM; + return -1; + } + /* Allocate space for the wide char strings */ psrc = pdst = extra = NULL; mdst = NULL; @@ -458,10 +474,18 @@ istrsenvisx(char **mbdstp, size_t *dlen, const char *mbsrc, size_t mblength, * stop at NULs because we may be processing a block of data * that includes NULs. */ + memset(&mbstate, 0, sizeof(mbstate)); while (mbslength > 0) { /* Convert one multibyte character to wchar_t. */ - if (!cerr) - clen = mbtowc(src, mbsrc, MB_LEN_MAX); + if (!cerr) { + clen = mbrtowc(src, mbsrc, + (mbslength < MB_LEN_MAX + ? mbslength + : MB_LEN_MAX), + &mbstate); + assert(clen < 0 || (size_t)clen <= mbslength); + assert(clen <= MB_LEN_MAX); + } if (cerr || clen < 0) { /* Conversion error, process as a byte instead. */ *src = (wint_t)(u_char)*mbsrc; @@ -475,6 +499,20 @@ istrsenvisx(char **mbdstp, size_t *dlen, const char *mbsrc, size_t mblength, */ clen = 1; } + /* + * Let n := MIN(mbslength, MB_LEN_MAX). We have: + * + * mbslength >= 1 + * mbrtowc(..., n, &mbstate) <= n, + * by the contract of mbrtowc + * + * clen is either + * (a) mbrtowc(..., n, &mbstate), in which case + * clen <= n <= mbslength; or + * (b) 1, in which case clen = 1 <= mbslength. + */ + assert(clen > 0); + assert((size_t)clen <= mbslength); /* Advance buffer character pointer. */ src++; /* Advance input pointer by number of bytes read. */ @@ -532,11 +570,49 @@ istrsenvisx(char **mbdstp, size_t *dlen, const char *mbsrc, size_t mblength, * output byte-by-byte here. Else use wctomb(). */ len = wcslen(start); - maxolen = dlen ? *dlen : (wcslen(start) * MB_LEN_MAX + 1); + if (dlen) { + maxolen = *dlen; + if (maxolen == 0) { + errno = ENOSPC; + goto out; + } + } else { + if (len > (SIZE_MAX - 1)/MB_LEN_MAX) { + errno = ENOSPC; + goto out; + } + maxolen = len*MB_LEN_MAX + 1; + } olen = 0; + memset(&mbstate, 0, sizeof(mbstate)); for (dst = start; len > 0; len--) { - if (!cerr) - clen = wctomb(mbdst, *dst); + if (!cerr) { + /* + * If we have at least MB_CUR_MAX bytes in the buffer, + * we'll just do the conversion in-place into mbdst. We + * need to be a little more conservative when we get to + * the end of the buffer, as we may not have MB_CUR_MAX + * bytes but we may not need it. + */ + if (maxolen - olen > MB_CUR_MAX) + mbwrite = mbdst; + else + mbwrite = mbbuf; + clen = wcrtomb(mbwrite, *dst, &mbstate); + if (clen > 0 && mbwrite != mbdst) { + /* + * Don't break past our output limit, noting + * that maxolen includes the nul terminator so + * we can't write past maxolen - 1 here. + */ + if (olen + clen >= maxolen) { + errno = ENOSPC; + goto out; + } + + memcpy(mbdst, mbwrite, clen); + } + } if (cerr || clen < 0) { /* * Conversion error, process as a byte(s) instead. @@ -551,16 +627,27 @@ istrsenvisx(char **mbdstp, size_t *dlen, const char *mbsrc, size_t mblength, shft = i * NBBY; bmsk = (uint64_t)0xffLL << shft; wmsk |= bmsk; - if ((*dst & wmsk) || i == 0) + if ((*dst & wmsk) || i == 0) { + if (olen + clen + 1 >= maxolen) { + errno = ENOSPC; + goto out; + } + mbdst[clen++] = (char)( (uint64_t)(*dst & bmsk) >> shft); + } } cerr = 1; } - /* If this character would exceed our output limit, stop. */ - if (olen + clen > (size_t)maxolen) - break; + + /* + * We'll be dereferencing mbdst[clen] after this to write the + * nul terminator; the above paths should have checked for a + * possible overflow already. + */ + assert(olen + clen < maxolen); + /* Advance output pointer by number of bytes written. */ mbdst += clen; /* Advance buffer character pointer. */ @@ -570,6 +657,7 @@ istrsenvisx(char **mbdstp, size_t *dlen, const char *mbsrc, size_t mblength, } /* Terminate the output string. */ + assert(olen < maxolen); *mbdst = '\0'; if (flags & VIS_NOLOCALE) { diff --git a/tests/lib/libc/gen/t_vis.c b/tests/lib/libc/gen/t_vis.c index adb0930a300a..86ffd0587ff2 100644 --- a/tests/lib/libc/gen/t_vis.c +++ b/tests/lib/libc/gen/t_vis.c @@ -1,4 +1,4 @@ -/* $NetBSD: t_vis.c,v 1.9 2017/01/10 15:16:57 christos Exp $ */ +/* $NetBSD: t_vis.c,v 1.9.6.1 2023/12/09 13:10:16 martin Exp $ */ /*- * Copyright (c) 2002 The NetBSD Foundation, Inc. @@ -116,6 +116,23 @@ ATF_TC_BODY(strvis_empty, tc) ATF_REQUIRE(dst[0] == '\0' && dst[1] == 'a'); } +ATF_TC(strnvis_empty_empty); +ATF_TC_HEAD(strnvis_empty_empty, tc) +{ + atf_tc_set_md_var(tc, "descr", + "Test strnvis(3) with empty source and destination"); +} + +ATF_TC_BODY(strnvis_empty_empty, tc) +{ + char dst[] = "fail"; + int n; + + n = strnvis(dst, 0, "", VIS_SAFE); + ATF_CHECK(memcmp(dst, "fail", sizeof(dst)) == 0); + ATF_CHECK_EQ_MSG(n, -1, "n=%d", n); +} + ATF_TC(strunvis_hex); ATF_TC_HEAD(strunvis_hex, tc) { @@ -175,16 +192,95 @@ ATF_TC_BODY(strvis_locale, tc) } #endif /* VIS_NOLOCALE */ +#define STRVIS_OVERFLOW_MARKER ((char)0xff) /* Arbitrary */ + +#ifdef VIS_NOLOCALE +ATF_TC(strvis_overflow_mb); +ATF_TC_HEAD(strvis_overflow_mb, tc) +{ + atf_tc_set_md_var(tc, "descr", "Test strvis(3) multi-byte overflow"); +} + +ATF_TC_BODY(strvis_overflow_mb, tc) +{ + const char src[] = "\xf0\x9f\xa5\x91"; + /* Extra byte to detect overflow */ + char dst[sizeof(src) + 1]; + unsigned i; + int n; + + setlocale(LC_CTYPE, "en_US.UTF-8"); + + for (i = 0; i < sizeof(dst) - 1; i++) { + memset(dst, STRVIS_OVERFLOW_MARKER, sizeof(dst)); + n = strnvis(dst, i, src, VIS_SAFE); + ATF_CHECK_EQ_MSG(dst[i], STRVIS_OVERFLOW_MARKER, + "[%u] dst=[%02hhx %02hhx %02hhx %02hhx %02hhx]" + " STRVIS_OVERFLOW_MARKER=%02hhx", + i, dst[0], dst[1], dst[2], dst[3], dst[4], + STRVIS_OVERFLOW_MARKER); + ATF_CHECK_EQ_MSG(n, -1, "[%u] n=%d", i, n); + } + + memset(dst, STRVIS_OVERFLOW_MARKER, sizeof(dst)); + n = strnvis(dst, sizeof(dst) - 1, src, VIS_SAFE); + ATF_CHECK_EQ_MSG(dst[sizeof(dst) - 1], STRVIS_OVERFLOW_MARKER, + "[%u] dst=[%02hhx %02hhx %02hhx %02hhx %02hhx %02hhx]" + " STRVIS_OVERFLOW_MARKER=%02hhx", + i, dst[0], dst[1], dst[2], dst[3], dst[4], dst[5], + STRVIS_OVERFLOW_MARKER); + ATF_CHECK_EQ_MSG(n, (int)sizeof(dst) - 2, "n=%d", n); +} +#endif + +ATF_TC(strvis_overflow_c); +ATF_TC_HEAD(strvis_overflow_c, tc) +{ + atf_tc_set_md_var(tc, "descr", "Test strvis(3) C locale overflow"); +} + +ATF_TC_BODY(strvis_overflow_c, tc) +{ + const char src[] = "AAAA"; + /* Extra byte to detect overflow */ + char dst[sizeof(src) + 1]; + unsigned i; + int n; + + for (i = 0; i < sizeof(dst) - 1; i++) { + memset(dst, STRVIS_OVERFLOW_MARKER, sizeof(dst)); + n = strnvis(dst, i, src, VIS_SAFE | VIS_NOLOCALE); + ATF_CHECK_EQ_MSG(dst[i], STRVIS_OVERFLOW_MARKER, + "[%u] dst=[%02hhx %02hhx %02hhx %02hhx %02hhx]" + " STRVIS_OVERFLOW_MARKER=%02hhx", + i, dst[0], dst[1], dst[2], dst[3], dst[4], + STRVIS_OVERFLOW_MARKER); + ATF_CHECK_EQ_MSG(n, -1, "[%u] n=%d", i, n); + } + + memset(dst, STRVIS_OVERFLOW_MARKER, sizeof(dst)); + n = strnvis(dst, sizeof(dst) - 1, src, VIS_SAFE | VIS_NOLOCALE); + ATF_CHECK_EQ_MSG(dst[sizeof(dst) - 1], STRVIS_OVERFLOW_MARKER, + "[%u] dst=[%02hhx %02hhx %02hhx %02hhx %02hhx %02hhx]" + " STRVIS_OVERFLOW_MARKER=%02hhx", + i, dst[0], dst[1], dst[2], dst[3], dst[4], dst[5], + STRVIS_OVERFLOW_MARKER); + ATF_CHECK_EQ_MSG(n, (int)sizeof(dst) - 2, "n=%d", n); +} + ATF_TP_ADD_TCS(tp) { ATF_TP_ADD_TC(tp, strvis_basic); ATF_TP_ADD_TC(tp, strvis_null); ATF_TP_ADD_TC(tp, strvis_empty); + ATF_TP_ADD_TC(tp, strnvis_empty_empty); ATF_TP_ADD_TC(tp, strunvis_hex); #ifdef VIS_NOLOCALE ATF_TP_ADD_TC(tp, strvis_locale); + ATF_TP_ADD_TC(tp, strvis_overflow_mb); #endif /* VIS_NOLOCALE */ + ATF_TP_ADD_TC(tp, strvis_overflow_c); return atf_no_error(); }