From 96f7cfd903824cc1e4bcb2e9e6ac318c9592492c Mon Sep 17 00:00:00 2001 From: rin Date: Thu, 28 Dec 2023 03:49:35 +0000 Subject: [PATCH] mklocale: XXX: Neglect TODIGIT at the moment PR lib/57798 It was implemented with an assumption that all digit characters can be mapped to numerical values <= 255. This is no longer true for Unicode, and results in, e.g., wrong return values of wcwidth(3) for U+5146 or U+16B60. As a workaround, neglect TODIGIT for now, as done for OpenBSD: https://github.com/OpenBSD/src/commit/4efe9bdeb34 XXX At least netbsd-10 should be fixed, but it requires some tests. --- usr.bin/mklocale/mklocale.1 | 8 +++++--- usr.bin/mklocale/yacc.y | 29 +++++++++++++++++++++++++---- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/usr.bin/mklocale/mklocale.1 b/usr.bin/mklocale/mklocale.1 index 35875500e47d..bbc1e62e706e 100644 --- a/usr.bin/mklocale/mklocale.1 +++ b/usr.bin/mklocale/mklocale.1 @@ -1,4 +1,4 @@ -.\" $NetBSD: mklocale.1,v 1.17 2017/07/03 21:34:20 wiz Exp $ +.\" $NetBSD: mklocale.1,v 1.18 2023/12/28 03:49:35 rin Exp $ .\" FreeBSD: src/usr.bin/mklocale/mklocale.1,v 1.6 1999/09/20 09:15:21 phantom Exp .\" .\" Copyright (c) 1993, 1994 @@ -33,7 +33,7 @@ .\" .\" @(#)mklocale.1 8.2 (Berkeley) 4/18/94 .\" -.Dd July 15, 2013 +.Dd December 28, 2023 .Dt MKLOCALE 1 .Os .Sh NAME @@ -210,7 +210,9 @@ is the integer value represented by For example, the ASCII character .Sq 0 would map to the decimal value 0. -Only values up to 255 are allowed. +On +.Nx , +this information is ignored and not put into the binary output file. .El .Pp The following keywords may appear multiple times and have the following diff --git a/usr.bin/mklocale/yacc.y b/usr.bin/mklocale/yacc.y index f95cbd5fe8e9..1f07e57c7b4f 100644 --- a/usr.bin/mklocale/yacc.y +++ b/usr.bin/mklocale/yacc.y @@ -1,4 +1,4 @@ -/* $NetBSD: yacc.y,v 1.34 2019/10/13 21:12:32 christos Exp $ */ +/* $NetBSD: yacc.y,v 1.35 2023/12/28 03:49:35 rin Exp $ */ %{ /*- @@ -43,7 +43,7 @@ static char sccsid[] = "@(#)yacc.y 8.1 (Berkeley) 6/6/93"; static char rcsid[] = "$FreeBSD$"; #else -__RCSID("$NetBSD: yacc.y,v 1.34 2019/10/13 21:12:32 christos Exp $"); +__RCSID("$NetBSD: yacc.y,v 1.35 2023/12/28 03:49:35 rin Exp $"); #endif #endif /* not lint */ @@ -82,7 +82,9 @@ __nbrune_t charsetmask = (__nbrune_t)0x0000007f; __nbrune_t charsetmask = (__nbrune_t)0xffffffff; void set_map(rune_map *, rune_list *, u_int32_t); +#if 0 void set_digitmap(rune_map *, rune_list *); +#endif void add_map(rune_map *, rune_list *, u_int32_t); __dead void usage(void); @@ -187,8 +189,19 @@ entry : ENCODING STRING { set_map(&maplower, $2, 0); } | MAPUPPER map { set_map(&mapupper, $2, 0); } - | DIGITMAP map - { set_digitmap(&types, $2); } +/* + * XXX PR lib/57798 + * set_digitmap() was implemented with an assumption that + * all characters are mapped to numerical values <= 255. + * This is no longer true for Unicode, and results in, e.g., + * wrong return values of wcwidth(3) for U+5146 or U+16B60. + * + * | DIGITMAP map + * { set_digitmap(&types, $2); } + * + */ + | DIGITMAP mapignore + { } ; list : RUNE @@ -254,6 +267,12 @@ map : LBRK RUNE RUNE RBRK $$->next = $1; } ; + +mapignore : LBRK RUNE RUNE RBRK { } + | map LBRK RUNE RUNE RBRK { } + | LBRK RUNE THRU RUNE ':' RUNE RBRK { } + | map LBRK RUNE THRU RUNE ':' RUNE RBRK { } + ; %% int debug = 0; @@ -382,6 +401,7 @@ set_map(rune_map *map, rune_list *list, u_int32_t flag) } } +#if 0 void set_digitmap(rune_map *map, rune_list *list) { @@ -401,6 +421,7 @@ set_digitmap(rune_map *map, rune_list *list) list = nlist; } } +#endif void add_map(rune_map *map, rune_list *list, u_int32_t flag)