mklocale(1): Add range check for TODIGIT, rather than disabling it
PR lib/57798 Digit value specified by TODIGIT is storaged as lowest 8 bits of _RuneType, see lib/libc/locale/runetype_file.h: https://nxr.netbsd.org/xref/src/lib/libc/locale/runetype_file.h#56 The symptom reported in the PR is due to missing range check for this value; values of 256 and above were mistakenly treated as other flag bits in _RuneType. For example, U+5146 has numerical value 1000,000,000,000 == 0xe8d4a51000 where __BITS(30, 31) == _RUNETYPE_SW3 are turned on. This is why wcwidth(3) returned 3 for this character. This apparently affected not only character width, but also other attributes storaged in _RuneType. IIUC, digit value attributes in _RuneType have never been utilized until now, but preserve these if digit fits within (0, 256). This should be safer for pulling this up into netbsd-10. Also, these attributes may be useful to implement some I18N features as suggested by uwe@ in the PR. netbsd-[98] is not affected as these use old UTF-8 ctype definitions.
This commit is contained in:
parent
96fc6d0a8c
commit
1246b914bd
|
@ -1,4 +1,4 @@
|
|||
.\" $NetBSD: mklocale.1,v 1.18 2023/12/28 03:49:35 rin Exp $
|
||||
.\" $NetBSD: mklocale.1,v 1.19 2024/01/05 02:38:06 rin Exp $
|
||||
.\" FreeBSD: src/usr.bin/mklocale/mklocale.1,v 1.6 1999/09/20 09:15:21 phantom Exp
|
||||
.\"
|
||||
.\" Copyright (c) 1993, 1994
|
||||
|
@ -33,7 +33,7 @@
|
|||
.\"
|
||||
.\" @(#)mklocale.1 8.2 (Berkeley) 4/18/94
|
||||
.\"
|
||||
.Dd December 28, 2023
|
||||
.Dd January 5, 2024
|
||||
.Dt MKLOCALE 1
|
||||
.Os
|
||||
.Sh NAME
|
||||
|
@ -212,7 +212,9 @@ For example, the ASCII character
|
|||
would map to the decimal value 0.
|
||||
On
|
||||
.Nx ,
|
||||
this information is ignored and not put into the binary output file.
|
||||
this information has never been used until now.
|
||||
Only values up to 255 are allowed, and mapping to 256 and above is
|
||||
silently ignored.
|
||||
.El
|
||||
.Pp
|
||||
The following keywords may appear multiple times and have the following
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: yacc.y,v 1.35 2023/12/28 03:49:35 rin Exp $ */
|
||||
/* $NetBSD: yacc.y,v 1.36 2024/01/05 02:38:06 rin Exp $ */
|
||||
|
||||
%{
|
||||
/*-
|
||||
|
@ -43,7 +43,7 @@
|
|||
static char sccsid[] = "@(#)yacc.y 8.1 (Berkeley) 6/6/93";
|
||||
static char rcsid[] = "$FreeBSD$";
|
||||
#else
|
||||
__RCSID("$NetBSD: yacc.y,v 1.35 2023/12/28 03:49:35 rin Exp $");
|
||||
__RCSID("$NetBSD: yacc.y,v 1.36 2024/01/05 02:38:06 rin Exp $");
|
||||
#endif
|
||||
#endif /* not lint */
|
||||
|
||||
|
@ -82,9 +82,7 @@ __nbrune_t charsetmask = (__nbrune_t)0x0000007f;
|
|||
__nbrune_t charsetmask = (__nbrune_t)0xffffffff;
|
||||
|
||||
void set_map(rune_map *, rune_list *, u_int32_t);
|
||||
#if 0
|
||||
void set_digitmap(rune_map *, rune_list *);
|
||||
#endif
|
||||
void add_map(rune_map *, rune_list *, u_int32_t);
|
||||
|
||||
__dead void usage(void);
|
||||
|
@ -189,19 +187,8 @@ entry : ENCODING STRING
|
|||
{ set_map(&maplower, $2, 0); }
|
||||
| MAPUPPER map
|
||||
{ set_map(&mapupper, $2, 0); }
|
||||
/*
|
||||
* XXX PR lib/57798
|
||||
* set_digitmap() was implemented with an assumption that
|
||||
* all characters are mapped to numerical values <= 255.
|
||||
* This is no longer true for Unicode, and results in, e.g.,
|
||||
* wrong return values of wcwidth(3) for U+5146 or U+16B60.
|
||||
*
|
||||
* | DIGITMAP map
|
||||
* { set_digitmap(&types, $2); }
|
||||
*
|
||||
*/
|
||||
| DIGITMAP mapignore
|
||||
{ }
|
||||
| DIGITMAP map
|
||||
{ set_digitmap(&types, $2); }
|
||||
;
|
||||
|
||||
list : RUNE
|
||||
|
@ -267,12 +254,6 @@ map : LBRK RUNE RUNE RBRK
|
|||
$$->next = $1;
|
||||
}
|
||||
;
|
||||
|
||||
mapignore : LBRK RUNE RUNE RBRK { }
|
||||
| map LBRK RUNE RUNE RBRK { }
|
||||
| LBRK RUNE THRU RUNE ':' RUNE RBRK { }
|
||||
| map LBRK RUNE THRU RUNE ':' RUNE RBRK { }
|
||||
;
|
||||
%%
|
||||
|
||||
int debug = 0;
|
||||
|
@ -401,7 +382,6 @@ set_map(rune_map *map, rune_list *list, u_int32_t flag)
|
|||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
void
|
||||
set_digitmap(rune_map *map, rune_list *list)
|
||||
{
|
||||
|
@ -410,18 +390,24 @@ set_digitmap(rune_map *map, rune_list *list)
|
|||
while (list) {
|
||||
rune_list *nlist = list->next;
|
||||
for (i = list->min; i <= list->max; ++i) {
|
||||
if (list->map + (i - list->min)) {
|
||||
/*
|
||||
* XXX PR lib/57798
|
||||
* Currently, we support mapping up to 255. Attempts to map
|
||||
* 256 (== _RUNETYPE_A) and above are silently ignored.
|
||||
*/
|
||||
_RuneType digit = list->map + (i - list->min);
|
||||
if (digit > 0 && digit <= 0xff) {
|
||||
rune_list *tmp = (rune_list *)xmalloc(sizeof(rune_list));
|
||||
memset(tmp, 0, sizeof(*tmp));
|
||||
tmp->min = i;
|
||||
tmp->max = i;
|
||||
add_map(map, tmp, list->map + (i - list->min));
|
||||
add_map(map, tmp, digit);
|
||||
}
|
||||
}
|
||||
free(list);
|
||||
list = nlist;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void
|
||||
add_map(rune_map *map, rune_list *list, u_int32_t flag)
|
||||
|
|
Loading…
Reference in New Issue