2012-01-20 20:31:29 +04:00
|
|
|
/* $NetBSD: rune.c,v 1.43 2012/01/20 16:31:30 joerg Exp $ */
|
2000-12-21 14:29:47 +03:00
|
|
|
|
|
|
|
/*-
|
2010-06-19 17:26:51 +04:00
|
|
|
* Copyright (c)2010 Citrus Project,
|
2000-12-21 14:29:47 +03:00
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
2010-06-19 17:26:51 +04:00
|
|
|
#include <sys/endian.h>
|
|
|
|
#include <sys/mman.h>
|
2010-06-01 17:52:07 +04:00
|
|
|
#include <sys/stat.h>
|
2001-01-03 18:23:26 +03:00
|
|
|
#include <assert.h>
|
2010-06-01 17:52:07 +04:00
|
|
|
#include <errno.h>
|
2010-06-19 17:26:51 +04:00
|
|
|
#include <fcntl.h>
|
|
|
|
#define __SETLOCALE_SOURCE__
|
|
|
|
#include <locale.h>
|
|
|
|
#include <stddef.h>
|
2000-12-21 14:29:47 +03:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
2010-06-01 17:52:07 +04:00
|
|
|
#include <string.h>
|
2010-06-19 17:26:51 +04:00
|
|
|
#include <unistd.h>
|
2003-03-05 23:18:14 +03:00
|
|
|
#include <wchar.h>
|
Fixes PR lib/39662, shortcomings in LC_{MONETARY,NUMERIC,TIME,MESSAGES} db format.
ok'ed by core and releng.
(thanks for agc@, snj@ and i'm sorry for long time patience).
[libc]
- localeio.[ch] and lc*.[ch] in src/lib/libc/locale was replaced by
new locale-db implementation using citrus_db backend,
see src/lib/libc/citrus/citrus_lc_*.[ch].
- add citrus_bcs_strtou?l.c. don't use strtou?l locale implementation
internally, because they're locale-aware function.
- add some stubs for multi-locale issue, see {current,global}_locale.c.
- remove some obsolete file, setrunelocale.c, ___runetype_mb.c.
- remove __savectype() from ctypeio.[ch].
[tools]
- mklocale(1): add new option ``-t'' that generates new style
LC_{MONETARY,NUMERIC,TIME,MESSAGES} locale-db format.
- chrtbl(1): added ctypeio.[ch] for __savectype().
[locale-db]
- added en_US.US-ASCII locale.
- removed some shareable locale definition file:
en_US.US-ASCII -> en_US.ISO8859-1, en_US.UTF-8
zh_CN.eucCN -> zh_CN.GB18030
and more...see src/share/locale/*/Makefile.
- remove obsoleted locale sr_YU, added new locale sr_ME, sr_RS.
- change locale name ja_JP.ISO2022-JP* -> ja_JP.ISO-2022-JP*
for X11's locale.alias file alignments.
- fix regression test, wrong wcs?width(3), NAN/INF usage.
i tested release-build following arch:
i386, amd64, hpc{mips,arm,sh}, sparc64, vax.
citrus_lc_*.[ch] also can read old-plain-text style locale-db.
so that backward compatibility is keeped, but lc*.[ch] can't read
new citrus_db'ed locale-db and localeio.c never check sanity,
so forward compatibility is broken ;-<
old mklocale(1) doesn't know -t option, so you have to rebuild toolchain.
2009-01-02 03:20:18 +03:00
|
|
|
|
2010-06-19 17:26:51 +04:00
|
|
|
#include "setlocale_local.h"
|
|
|
|
|
Fixes PR lib/39662, shortcomings in LC_{MONETARY,NUMERIC,TIME,MESSAGES} db format.
ok'ed by core and releng.
(thanks for agc@, snj@ and i'm sorry for long time patience).
[libc]
- localeio.[ch] and lc*.[ch] in src/lib/libc/locale was replaced by
new locale-db implementation using citrus_db backend,
see src/lib/libc/citrus/citrus_lc_*.[ch].
- add citrus_bcs_strtou?l.c. don't use strtou?l locale implementation
internally, because they're locale-aware function.
- add some stubs for multi-locale issue, see {current,global}_locale.c.
- remove some obsolete file, setrunelocale.c, ___runetype_mb.c.
- remove __savectype() from ctypeio.[ch].
[tools]
- mklocale(1): add new option ``-t'' that generates new style
LC_{MONETARY,NUMERIC,TIME,MESSAGES} locale-db format.
- chrtbl(1): added ctypeio.[ch] for __savectype().
[locale-db]
- added en_US.US-ASCII locale.
- removed some shareable locale definition file:
en_US.US-ASCII -> en_US.ISO8859-1, en_US.UTF-8
zh_CN.eucCN -> zh_CN.GB18030
and more...see src/share/locale/*/Makefile.
- remove obsoleted locale sr_YU, added new locale sr_ME, sr_RS.
- change locale name ja_JP.ISO2022-JP* -> ja_JP.ISO-2022-JP*
for X11's locale.alias file alignments.
- fix regression test, wrong wcs?width(3), NAN/INF usage.
i tested release-build following arch:
i386, amd64, hpc{mips,arm,sh}, sparc64, vax.
citrus_lc_*.[ch] also can read old-plain-text style locale-db.
so that backward compatibility is keeped, but lc*.[ch] can't read
new citrus_db'ed locale-db and localeio.c never check sanity,
so forward compatibility is broken ;-<
old mklocale(1) doesn't know -t option, so you have to rebuild toolchain.
2009-01-02 03:20:18 +03:00
|
|
|
#include "citrus_module.h"
|
|
|
|
#include "citrus_ctype.h"
|
|
|
|
|
2010-06-13 08:14:56 +04:00
|
|
|
#include "runetype_local.h"
|
2000-12-21 14:29:47 +03:00
|
|
|
|
2010-06-19 17:26:51 +04:00
|
|
|
#include "multibyte.h"
|
2000-12-21 14:29:47 +03:00
|
|
|
|
2010-06-19 17:26:51 +04:00
|
|
|
#include "_wctype_local.h"
|
|
|
|
#include "_wctrans_local.h"
|
2000-12-21 14:29:47 +03:00
|
|
|
|
2010-06-19 17:26:51 +04:00
|
|
|
typedef struct {
|
|
|
|
_RuneLocale rl;
|
|
|
|
unsigned char rlp_ctype_tab [_CTYPE_NUM_CHARS + 1];
|
|
|
|
short rlp_tolower_tab[_CTYPE_NUM_CHARS + 1];
|
|
|
|
short rlp_toupper_tab[_CTYPE_NUM_CHARS + 1];
|
|
|
|
char rlp_codeset[33]; /* XXX */
|
|
|
|
} _RuneLocalePriv;
|
2000-12-21 14:29:47 +03:00
|
|
|
|
2010-06-19 17:26:51 +04:00
|
|
|
static __inline void
|
|
|
|
_rune_wctype_init(_RuneLocale *rl)
|
2000-12-21 14:29:47 +03:00
|
|
|
{
|
2010-06-19 17:26:51 +04:00
|
|
|
memcpy(&rl->rl_wctype, &_DefaultRuneLocale.rl_wctype,
|
|
|
|
sizeof(rl->rl_wctype));
|
2000-12-21 14:29:47 +03:00
|
|
|
}
|
|
|
|
|
2010-06-19 17:26:51 +04:00
|
|
|
static __inline void
|
|
|
|
_rune_wctrans_init(_RuneLocale *rl)
|
2001-03-26 23:55:42 +04:00
|
|
|
{
|
2010-06-19 17:26:51 +04:00
|
|
|
rl->rl_wctrans[_WCTRANS_INDEX_LOWER].te_name = "tolower";
|
|
|
|
rl->rl_wctrans[_WCTRANS_INDEX_LOWER].te_cached = &rl->rl_maplower[0];
|
|
|
|
rl->rl_wctrans[_WCTRANS_INDEX_LOWER].te_extmap = &rl->rl_maplower_ext;
|
|
|
|
rl->rl_wctrans[_WCTRANS_INDEX_UPPER].te_name = "toupper";
|
|
|
|
rl->rl_wctrans[_WCTRANS_INDEX_UPPER].te_cached = &rl->rl_mapupper[0];
|
|
|
|
rl->rl_wctrans[_WCTRANS_INDEX_UPPER].te_extmap = &rl->rl_mapupper_ext;
|
2001-03-26 23:55:42 +04:00
|
|
|
}
|
|
|
|
|
2010-06-19 17:26:51 +04:00
|
|
|
static __inline void
|
|
|
|
_rune_init_priv(_RuneLocalePriv *rlp)
|
2000-12-21 14:29:47 +03:00
|
|
|
{
|
2010-06-19 17:26:51 +04:00
|
|
|
#if _CTYPE_CACHE_SIZE != _CTYPE_NUM_CHARS
|
|
|
|
int i;
|
2000-12-21 14:29:47 +03:00
|
|
|
|
2010-06-19 17:26:51 +04:00
|
|
|
for (i = _CTYPE_CACHE_SIZE; i < _CTYPE_NUM_CHARS; ++i) {
|
|
|
|
rlp->rlp_ctype_tab [i + 1] = 0;
|
|
|
|
rlp->rlp_tolower_tab[i + 1] = i;
|
|
|
|
rlp->rlp_toupper_tab[i + 1] = i;
|
2000-12-21 14:29:47 +03:00
|
|
|
}
|
2010-06-19 17:26:51 +04:00
|
|
|
#endif
|
|
|
|
rlp->rlp_ctype_tab [0] = 0;
|
|
|
|
rlp->rlp_tolower_tab[0] = EOF;
|
|
|
|
rlp->rlp_toupper_tab[0] = EOF;
|
|
|
|
|
|
|
|
rlp->rl.rl_ctype_tab = (const unsigned char *)&rlp->rlp_ctype_tab[0];
|
|
|
|
rlp->rl.rl_tolower_tab = (const short *)&rlp->rlp_tolower_tab[0];
|
|
|
|
rlp->rl.rl_toupper_tab = (const short *)&rlp->rlp_toupper_tab[0];
|
|
|
|
rlp->rl.rl_codeset = (const char *)&rlp->rlp_codeset[0];
|
|
|
|
|
|
|
|
_rune_wctype_init(&rlp->rl);
|
|
|
|
_rune_wctrans_init(&rlp->rl);
|
2000-12-21 14:29:47 +03:00
|
|
|
}
|
|
|
|
|
2010-06-19 17:26:51 +04:00
|
|
|
static __inline void
|
|
|
|
_rune_find_codeset(char *s, size_t n,
|
2010-11-30 18:25:05 +03:00
|
|
|
char *var, size_t *plenvar)
|
2003-03-03 01:18:11 +03:00
|
|
|
{
|
2010-11-30 18:25:05 +03:00
|
|
|
size_t lenvar;
|
2010-06-19 17:26:51 +04:00
|
|
|
const char *endvar;
|
|
|
|
|
|
|
|
#define _RUNE_CODESET_LEN (sizeof(_RUNE_CODESET)-1)
|
|
|
|
|
2010-11-30 18:25:05 +03:00
|
|
|
lenvar = *plenvar;
|
2010-06-19 17:26:51 +04:00
|
|
|
for (/**/; lenvar > _RUNE_CODESET_LEN; ++var, --lenvar) {
|
|
|
|
if (!memcmp(var, _RUNE_CODESET, _RUNE_CODESET_LEN)) {
|
2010-11-30 18:25:05 +03:00
|
|
|
*var = '\0';
|
|
|
|
*plenvar -= lenvar;
|
2010-06-19 17:26:51 +04:00
|
|
|
endvar = &var[_RUNE_CODESET_LEN];
|
|
|
|
while (n-- > 1 && lenvar-- > _RUNE_CODESET_LEN) {
|
|
|
|
if (*endvar == ' ' || *endvar == '\t')
|
|
|
|
break;
|
|
|
|
*s++ = *endvar++;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*s = '\0';
|
2003-03-03 01:18:11 +03:00
|
|
|
}
|
|
|
|
|
2010-06-19 17:26:51 +04:00
|
|
|
static __inline int
|
|
|
|
_rune_read_file(const char * __restrict var, size_t lenvar,
|
|
|
|
_RuneLocale ** __restrict prl)
|
2000-12-21 14:29:47 +03:00
|
|
|
{
|
2010-06-19 17:26:51 +04:00
|
|
|
int ret, i;
|
|
|
|
const _FileRuneLocale *frl;
|
|
|
|
const _FileRuneEntry *fre;
|
|
|
|
const uint32_t *frune;
|
|
|
|
_RuneLocalePriv *rlp;
|
2000-12-21 14:29:47 +03:00
|
|
|
_RuneLocale *rl;
|
2010-06-19 17:26:51 +04:00
|
|
|
_RuneEntry *re;
|
|
|
|
uint32_t *rune;
|
|
|
|
uint32_t runetype_len, maplower_len, mapupper_len, variable_len;
|
|
|
|
size_t len, n;
|
|
|
|
|
|
|
|
if (lenvar < sizeof(*frl))
|
|
|
|
return EFTYPE;
|
|
|
|
lenvar -= sizeof(*frl);
|
|
|
|
frl = (const _FileRuneLocale *)(const void *)var;
|
|
|
|
if (memcmp(_RUNECT10_MAGIC, &frl->frl_magic[0], sizeof(frl->frl_magic)))
|
|
|
|
return EFTYPE;
|
|
|
|
|
|
|
|
runetype_len = be32toh(frl->frl_runetype_ext.frr_nranges);
|
|
|
|
maplower_len = be32toh(frl->frl_maplower_ext.frr_nranges);
|
|
|
|
mapupper_len = be32toh(frl->frl_mapupper_ext.frr_nranges);
|
|
|
|
len = runetype_len + maplower_len + mapupper_len;
|
|
|
|
|
|
|
|
fre = (const _FileRuneEntry *)(const void *)(frl + 1);
|
|
|
|
frune = (const uint32_t *)(const void *)(fre + len);
|
|
|
|
|
|
|
|
variable_len = be32toh((uint32_t)frl->frl_variable_len);
|
|
|
|
|
|
|
|
n = (len * sizeof(*fre)) + variable_len;
|
|
|
|
if (lenvar < n)
|
|
|
|
return EFTYPE;
|
|
|
|
lenvar -= n;
|
|
|
|
|
|
|
|
n = sizeof(*rlp) + (len * sizeof(*re)) + lenvar;
|
|
|
|
rlp = (_RuneLocalePriv *)malloc(n);
|
|
|
|
if (rlp == NULL)
|
|
|
|
return ENOMEM;
|
|
|
|
_rune_init_priv(rlp);
|
|
|
|
|
|
|
|
rl = &rlp->rl;
|
|
|
|
re = (_RuneEntry *)(void *)(rlp + 1);
|
|
|
|
rune = (uint32_t *)(void *)(re + len);
|
|
|
|
|
|
|
|
for (i = 0; i < _CTYPE_CACHE_SIZE; ++i) {
|
|
|
|
rl->rl_runetype[i] = be32toh(frl->frl_runetype[i]);
|
|
|
|
rl->rl_maplower[i] = be32toh((uint32_t)frl->frl_maplower[i]);
|
|
|
|
rl->rl_mapupper[i] = be32toh((uint32_t)frl->frl_mapupper[i]);
|
2000-12-21 14:29:47 +03:00
|
|
|
}
|
|
|
|
|
2010-06-19 17:26:51 +04:00
|
|
|
#define READ_RANGE(name) \
|
|
|
|
do { \
|
|
|
|
const _FileRuneEntry *end_fre; \
|
|
|
|
const uint32_t *end_frune; \
|
|
|
|
\
|
|
|
|
rl->rl_##name##_ext.rr_nranges = name##_len; \
|
|
|
|
rl->rl_##name##_ext.rr_rune_ranges = re; \
|
|
|
|
\
|
|
|
|
end_fre = fre + name##_len; \
|
|
|
|
while (fre < end_fre) { \
|
|
|
|
re->re_min = be32toh((uint32_t)fre->fre_min); \
|
|
|
|
re->re_max = be32toh((uint32_t)fre->fre_max); \
|
|
|
|
re->re_map = be32toh((uint32_t)fre->fre_map); \
|
|
|
|
if (re->re_map != 0) { \
|
|
|
|
re->re_rune_types = NULL; \
|
|
|
|
} else { \
|
|
|
|
re->re_rune_types = rune; \
|
|
|
|
len = re->re_max - re->re_min + 1; \
|
|
|
|
n = len * sizeof(*frune); \
|
|
|
|
if (lenvar < n) { \
|
|
|
|
ret = EFTYPE; \
|
|
|
|
goto err; \
|
|
|
|
} \
|
|
|
|
lenvar -= n; \
|
|
|
|
end_frune = frune + len; \
|
|
|
|
while (frune < end_frune) \
|
|
|
|
*rune++ = be32toh(*frune++); \
|
|
|
|
} \
|
|
|
|
++fre, ++re; \
|
|
|
|
} \
|
|
|
|
} while (/*CONSTCOND*/0)
|
|
|
|
|
|
|
|
READ_RANGE(runetype);
|
|
|
|
READ_RANGE(maplower);
|
|
|
|
READ_RANGE(mapupper);
|
|
|
|
|
|
|
|
memcpy((void *)rune, (void const *)frune, variable_len);
|
|
|
|
rl->rl_variable_len = variable_len;
|
|
|
|
rl->rl_variable = (void *)rune;
|
|
|
|
|
|
|
|
if (lenvar > 0) {
|
|
|
|
ret = EFTYPE;
|
|
|
|
goto err;
|
2000-12-21 14:29:47 +03:00
|
|
|
}
|
|
|
|
|
2010-06-19 17:26:51 +04:00
|
|
|
_rune_find_codeset(rlp->rlp_codeset, sizeof(rlp->rlp_codeset),
|
2010-11-30 18:25:05 +03:00
|
|
|
(char *)rl->rl_variable, &rl->rl_variable_len);
|
2010-06-19 17:26:51 +04:00
|
|
|
|
|
|
|
ret = _citrus_ctype_open(&rl->rl_citrus_ctype, frl->frl_encoding,
|
|
|
|
rl->rl_variable, rl->rl_variable_len, _PRIVSIZE);
|
|
|
|
if (ret)
|
|
|
|
goto err;
|
|
|
|
if (__mb_len_max_runtime <
|
|
|
|
_citrus_ctype_get_mb_cur_max(rl->rl_citrus_ctype)) {
|
|
|
|
ret = EINVAL;
|
|
|
|
goto err;
|
2000-12-21 14:29:47 +03:00
|
|
|
}
|
|
|
|
|
2010-06-19 17:26:51 +04:00
|
|
|
for (i = 0; i < _CTYPE_CACHE_SIZE; ++i) {
|
|
|
|
wint_t wc;
|
|
|
|
|
|
|
|
ret = _citrus_ctype_btowc(rl->rl_citrus_ctype, i, &wc);
|
|
|
|
if (ret)
|
|
|
|
goto err;
|
|
|
|
if (wc == WEOF) {
|
|
|
|
rlp->rlp_ctype_tab[i + 1] = 0;
|
|
|
|
rlp->rlp_tolower_tab[i + 1] = i;
|
|
|
|
rlp->rlp_toupper_tab[i + 1] = i;
|
|
|
|
} else {
|
|
|
|
rlp->rlp_ctype_tab[i + 1] = (unsigned char)
|
|
|
|
_runetype_to_ctype(_runetype_priv(rl, wc));
|
|
|
|
|
|
|
|
#define CONVERT_MAP(name) \
|
|
|
|
do { \
|
|
|
|
wint_t map; \
|
|
|
|
int c; \
|
|
|
|
\
|
|
|
|
map = _towctrans_priv(wc, _wctrans_##name(rl)); \
|
|
|
|
if (map == wc || (_citrus_ctype_wctob(rl->rl_citrus_ctype, \
|
|
|
|
map, &c) || c == EOF)) \
|
|
|
|
c = i; \
|
|
|
|
rlp->rlp_to##name##_tab[i + 1] = (short)c; \
|
|
|
|
} while (/*CONSTCOND*/0)
|
|
|
|
|
|
|
|
CONVERT_MAP(lower);
|
|
|
|
CONVERT_MAP(upper);
|
|
|
|
}
|
2000-12-21 14:29:47 +03:00
|
|
|
}
|
2010-06-19 17:26:51 +04:00
|
|
|
*prl = rl;
|
|
|
|
return 0;
|
2000-12-21 14:29:47 +03:00
|
|
|
|
2010-06-19 17:26:51 +04:00
|
|
|
err:
|
|
|
|
free(rlp);
|
|
|
|
return ret;
|
2000-12-21 14:29:47 +03:00
|
|
|
}
|
|
|
|
|
2010-06-19 17:26:51 +04:00
|
|
|
int
|
|
|
|
_rune_load(const char * __restrict var, size_t lenvar,
|
|
|
|
_RuneLocale ** __restrict prl)
|
2000-12-21 14:29:47 +03:00
|
|
|
{
|
2010-06-19 17:26:51 +04:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
_DIAGASSERT(var != NULL || lenvar < 1);
|
|
|
|
_DIAGASSERT(prl != NULL);
|
|
|
|
|
|
|
|
if (lenvar < 1)
|
|
|
|
return EFTYPE;
|
|
|
|
switch (*var) {
|
|
|
|
case 'R':
|
|
|
|
ret = _rune_read_file(var, lenvar, prl);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
ret = EFTYPE;
|
2000-12-21 14:29:47 +03:00
|
|
|
}
|
2010-06-19 17:26:51 +04:00
|
|
|
return ret;
|
2000-12-21 14:29:47 +03:00
|
|
|
}
|