mklocale: XXX: Neglect TODIGIT at the moment

PR lib/57798

It was implemented with an assumption that all digit characters
can be mapped to numerical values <= 255.

This is no longer true for Unicode, and results in, e.g., wrong
return values of wcwidth(3) for U+5146 or U+16B60.

As a workaround, neglect TODIGIT for now, as done for OpenBSD:
https://github.com/OpenBSD/src/commit/4efe9bdeb34

XXX
At least netbsd-10 should be fixed, but it requires some tests.
This commit is contained in:
rin 2023-12-28 03:49:35 +00:00
parent 649e2b1f68
commit 96f7cfd903
2 changed files with 30 additions and 7 deletions

View File

@ -1,4 +1,4 @@
.\" $NetBSD: mklocale.1,v 1.17 2017/07/03 21:34:20 wiz Exp $
.\" $NetBSD: mklocale.1,v 1.18 2023/12/28 03:49:35 rin Exp $
.\" FreeBSD: src/usr.bin/mklocale/mklocale.1,v 1.6 1999/09/20 09:15:21 phantom Exp
.\"
.\" Copyright (c) 1993, 1994
@ -33,7 +33,7 @@
.\"
.\" @(#)mklocale.1 8.2 (Berkeley) 4/18/94
.\"
.Dd July 15, 2013
.Dd December 28, 2023
.Dt MKLOCALE 1
.Os
.Sh NAME
@ -210,7 +210,9 @@ is the integer value represented by
For example, the ASCII character
.Sq 0
would map to the decimal value 0.
Only values up to 255 are allowed.
On
.Nx ,
this information is ignored and not put into the binary output file.
.El
.Pp
The following keywords may appear multiple times and have the following

View File

@ -1,4 +1,4 @@
/* $NetBSD: yacc.y,v 1.34 2019/10/13 21:12:32 christos Exp $ */
/* $NetBSD: yacc.y,v 1.35 2023/12/28 03:49:35 rin Exp $ */
%{
/*-
@ -43,7 +43,7 @@
static char sccsid[] = "@(#)yacc.y 8.1 (Berkeley) 6/6/93";
static char rcsid[] = "$FreeBSD$";
#else
__RCSID("$NetBSD: yacc.y,v 1.34 2019/10/13 21:12:32 christos Exp $");
__RCSID("$NetBSD: yacc.y,v 1.35 2023/12/28 03:49:35 rin Exp $");
#endif
#endif /* not lint */
@ -82,7 +82,9 @@ __nbrune_t charsetmask = (__nbrune_t)0x0000007f;
__nbrune_t charsetmask = (__nbrune_t)0xffffffff;
void set_map(rune_map *, rune_list *, u_int32_t);
#if 0
void set_digitmap(rune_map *, rune_list *);
#endif
void add_map(rune_map *, rune_list *, u_int32_t);
__dead void usage(void);
@ -187,8 +189,19 @@ entry : ENCODING STRING
{ set_map(&maplower, $2, 0); }
| MAPUPPER map
{ set_map(&mapupper, $2, 0); }
| DIGITMAP map
{ set_digitmap(&types, $2); }
/*
* XXX PR lib/57798
* set_digitmap() was implemented with an assumption that
* all characters are mapped to numerical values <= 255.
* This is no longer true for Unicode, and results in, e.g.,
* wrong return values of wcwidth(3) for U+5146 or U+16B60.
*
* | DIGITMAP map
* { set_digitmap(&types, $2); }
*
*/
| DIGITMAP mapignore
{ }
;
list : RUNE
@ -254,6 +267,12 @@ map : LBRK RUNE RUNE RBRK
$$->next = $1;
}
;
mapignore : LBRK RUNE RUNE RBRK { }
| map LBRK RUNE RUNE RBRK { }
| LBRK RUNE THRU RUNE ':' RUNE RBRK { }
| map LBRK RUNE THRU RUNE ':' RUNE RBRK { }
;
%%
int debug = 0;
@ -382,6 +401,7 @@ set_map(rune_map *map, rune_list *list, u_int32_t flag)
}
}
#if 0
void
set_digitmap(rune_map *map, rune_list *list)
{
@ -401,6 +421,7 @@ set_digitmap(rune_map *map, rune_list *list)
list = nlist;
}
}
#endif
void
add_map(rune_map *map, rune_list *list, u_int32_t flag)