NetBSD/lib/libc/citrus/modules/citrus_iconv_std.c
2006-11-13 19:08:19 +00:00

587 lines
14 KiB
C

/* $NetBSD: citrus_iconv_std.c,v 1.15 2006/11/13 19:08:19 tnozaki Exp $ */
/*-
* Copyright (c)2003 Citrus Project,
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
#if defined(LIBC_SCCS) && !defined(lint)
__RCSID("$NetBSD: citrus_iconv_std.c,v 1.15 2006/11/13 19:08:19 tnozaki Exp $");
#endif /* LIBC_SCCS and not lint */
#include <assert.h>
#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <machine/endian.h>
#include <sys/queue.h>
#include "citrus_namespace.h"
#include "citrus_types.h"
#include "citrus_module.h"
#include "citrus_region.h"
#include "citrus_mmap.h"
#include "citrus_hash.h"
#include "citrus_iconv.h"
#include "citrus_stdenc.h"
#include "citrus_mapper.h"
#include "citrus_csmapper.h"
#include "citrus_memstream.h"
#include "citrus_iconv_std.h"
#include "citrus_esdb.h"
/* ---------------------------------------------------------------------- */
_CITRUS_ICONV_DECLS(iconv_std);
_CITRUS_ICONV_DEF_OPS(iconv_std);
/* ---------------------------------------------------------------------- */
int
_citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops, size_t lenops,
u_int32_t expected_version)
{
if (expected_version<_CITRUS_ICONV_ABI_VERSION || lenops<sizeof(*ops))
return (EINVAL);
memcpy(ops, &_citrus_iconv_std_iconv_ops,
sizeof(_citrus_iconv_std_iconv_ops));
return (0);
}
/* ---------------------------------------------------------------------- */
/*
* convenience routines for stdenc.
*/
static __inline void
save_encoding_state(struct _citrus_iconv_std_encoding *se)
{
if (se->se_ps)
memcpy(se->se_pssaved, se->se_ps,
_stdenc_get_state_size(se->se_handle));
}
static __inline void
restore_encoding_state(struct _citrus_iconv_std_encoding *se)
{
if (se->se_ps)
memcpy(se->se_ps, se->se_pssaved,
_stdenc_get_state_size(se->se_handle));
}
static __inline void
init_encoding_state(struct _citrus_iconv_std_encoding *se)
{
if (se->se_ps)
_stdenc_init_state(se->se_handle, se->se_ps);
}
static __inline int
mbtocsx(struct _citrus_iconv_std_encoding *se,
_csid_t *csid, _index_t *idx, const char **s, size_t n,
size_t *nresult)
{
return _stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps,
nresult);
}
static __inline int
cstombx(struct _citrus_iconv_std_encoding *se,
char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult)
{
return _stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps,
nresult);
}
static __inline int
wctombx(struct _citrus_iconv_std_encoding *se,
char *s, size_t n, _wc_t wc, size_t *nresult)
{
return _stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult);
}
static __inline int
put_state_resetx(struct _citrus_iconv_std_encoding *se,
char *s, size_t n, size_t *nresult)
{
return _stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult);
}
static __inline int
get_state_desc_gen(struct _citrus_iconv_std_encoding *se, int *rstate)
{
int ret;
struct _stdenc_state_desc ssd;
ret = _stdenc_get_state_desc(se->se_handle, se->se_ps,
_STDENC_SDID_GENERIC, &ssd);
if (!ret)
*rstate = ssd.u.generic.state;
return ret;
}
/*
* init encoding context
*/
static int
init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs,
void *ps1, void *ps2)
{
int ret = -1;
se->se_handle = cs;
se->se_ps = ps1;
se->se_pssaved = ps2;
if (se->se_ps)
ret = _stdenc_init_state(cs, se->se_ps);
if (!ret && se->se_pssaved)
ret = _stdenc_init_state(cs, se->se_pssaved);
return ret;
}
static int
open_csmapper(struct _csmapper **rcm, const char *src, const char *dst,
unsigned long *rnorm)
{
int ret;
struct _csmapper *cm;
ret = _csmapper_open(&cm, src, dst, 0, rnorm);
if (ret)
return ret;
if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 ||
_csmapper_get_state_size(cm) != 0) {
_csmapper_close(cm);
return EINVAL;
}
*rcm = cm;
return 0;
}
static void
close_dsts(struct _citrus_iconv_std_dst_list *dl)
{
struct _citrus_iconv_std_dst *sd;
while ((sd=TAILQ_FIRST(dl)) != NULL) {
TAILQ_REMOVE(dl, sd, sd_entry);
_csmapper_close(sd->sd_mapper);
free(sd);
}
}
static int
open_dsts(struct _citrus_iconv_std_dst_list *dl,
const struct _esdb_charset *ec, const struct _esdb *dbdst)
{
int i, ret;
struct _citrus_iconv_std_dst *sd, *sdtmp;
unsigned long norm;
sd = malloc(sizeof(*sd));
if (sd == NULL)
return errno;
for (i=0; i<dbdst->db_num_charsets; i++) {
ret = open_csmapper(&sd->sd_mapper, ec->ec_csname,
dbdst->db_charsets[i].ec_csname, &norm);
if (ret == 0) {
sd->sd_csid = dbdst->db_charsets[i].ec_csid;
sd->sd_norm = norm;
/* insert this mapper by sorted order. */
TAILQ_FOREACH(sdtmp, dl, sd_entry) {
if (sdtmp->sd_norm > norm) {
TAILQ_INSERT_BEFORE(sdtmp, sd,
sd_entry);
sd = NULL;
break;
}
}
if (sd)
TAILQ_INSERT_TAIL(dl, sd, sd_entry);
sd = malloc(sizeof(*sd));
if (sd == NULL) {
ret = errno;
close_dsts(dl);
return ret;
}
} else if (ret != ENOENT) {
close_dsts(dl);
free(sd);
return ret;
}
}
free(sd);
return 0;
}
static void
close_srcs(struct _citrus_iconv_std_src_list *sl)
{
struct _citrus_iconv_std_src *ss;
while ((ss=TAILQ_FIRST(sl)) != NULL) {
TAILQ_REMOVE(sl, ss, ss_entry);
close_dsts(&ss->ss_dsts);
free(ss);
}
}
static int
open_srcs(struct _citrus_iconv_std_src_list *sl,
const struct _esdb *dbsrc, const struct _esdb *dbdst)
{
int i, ret, count = 0;
struct _citrus_iconv_std_src *ss;
ss = malloc(sizeof(*ss));
if (ss == NULL)
return errno;
TAILQ_INIT(&ss->ss_dsts);
for (i=0; i<dbsrc->db_num_charsets; i++) {
ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst);
if (ret)
goto err;
if (!TAILQ_EMPTY(&ss->ss_dsts)) {
ss->ss_csid = dbsrc->db_charsets[i].ec_csid;
TAILQ_INSERT_TAIL(sl, ss, ss_entry);
ss = malloc(sizeof(*ss));
if (ss == NULL) {
ret = errno;
goto err;
}
count++;
TAILQ_INIT(&ss->ss_dsts);
}
}
free(ss);
return count ? 0 : ENOENT;
err:
free(ss);
close_srcs(sl);
return ret;
}
/* do convert a character */
#define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */
static int
/*ARGSUSED*/
do_conv(const struct _citrus_iconv_std_shared *is,
struct _citrus_iconv_std_context *sc, _csid_t *csid, _index_t *idx)
{
_index_t tmpidx;
int ret;
struct _citrus_iconv_std_src *ss;
struct _citrus_iconv_std_dst *sd;
TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) {
if (ss->ss_csid == *csid) {
TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) {
ret = _csmapper_convert(sd->sd_mapper,
&tmpidx, *idx, NULL);
switch (ret) {
case _MAPPER_CONVERT_SUCCESS:
*csid = sd->sd_csid;
*idx = tmpidx;
return 0;
case _MAPPER_CONVERT_NONIDENTICAL:
break;
case _MAPPER_CONVERT_SRC_MORE:
/*FALLTHROUGH*/
case _MAPPER_CONVERT_DST_MORE:
/*FALLTHROUGH*/
case _MAPPER_CONVERT_FATAL:
return EINVAL;
case _MAPPER_CONVERT_ILSEQ:
return EILSEQ;
}
}
break;
}
}
return E_NO_CORRESPONDING_CHAR;
}
/* ---------------------------------------------------------------------- */
static int
/*ARGSUSED*/
_citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci,
const char * __restrict curdir,
const char * __restrict src,
const char * __restrict dst,
const void * __restrict var, size_t lenvar)
{
int ret;
struct _citrus_iconv_std_shared *is;
struct _citrus_esdb esdbsrc, esdbdst;
is = malloc(sizeof(*is));
if (is==NULL) {
ret = errno;
goto err0;
}
ret = _citrus_esdb_open(&esdbsrc, src);
if (ret)
goto err1;
ret = _citrus_esdb_open(&esdbdst, dst);
if (ret)
goto err2;
ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname,
esdbsrc.db_variable, esdbsrc.db_len_variable);
if (ret)
goto err3;
ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname,
esdbdst.db_variable, esdbdst.db_len_variable);
if (ret)
goto err4;
is->is_use_invalid = esdbdst.db_use_invalid;
is->is_invalid = esdbdst.db_invalid;
TAILQ_INIT(&is->is_srcs);
ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst);
if (ret)
goto err5;
_esdb_close(&esdbsrc);
_esdb_close(&esdbdst);
ci->ci_closure = is;
return 0;
err5:
_stdenc_close(is->is_dst_encoding);
err4:
_stdenc_close(is->is_src_encoding);
err3:
_esdb_close(&esdbdst);
err2:
_esdb_close(&esdbsrc);
err1:
free(is);
err0:
return ret;
}
static void
_citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci)
{
struct _citrus_iconv_std_shared *is = ci->ci_closure;
if (is == NULL)
return;
_stdenc_close(is->is_src_encoding);
_stdenc_close(is->is_dst_encoding);
close_srcs(&is->is_srcs);
free(is);
}
static int
_citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv)
{
const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
struct _citrus_iconv_std_context *sc;
int ret;
size_t szpssrc, szpsdst, sz;
char *ptr;
szpssrc = _stdenc_get_state_size(is->is_src_encoding);
szpsdst = _stdenc_get_state_size(is->is_dst_encoding);
sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context);
sc = malloc(sz);
if (sc == NULL)
return errno;
ptr = (char *)&sc[1];
if (szpssrc)
init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
ptr, ptr+szpssrc);
else
init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
NULL, NULL);
ptr += szpssrc*2;
if (szpsdst)
init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
ptr, ptr+szpsdst);
else
init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
NULL, NULL);
cv->cv_closure = (void *)sc;
return 0;
}
static void
_citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv)
{
free(cv->cv_closure);
}
static int
_citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,
const char * __restrict * __restrict in,
size_t * __restrict inbytes,
char * __restrict * __restrict out,
size_t * __restrict outbytes, u_int32_t flags,
size_t * __restrict invalids)
{
const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
struct _citrus_iconv_std_context *sc = cv->cv_closure;
_index_t idx;
_csid_t csid;
int ret, state;
size_t szrin, szrout;
size_t inval;
const char *tmpin;
inval = 0;
if (in==NULL || *in==NULL) {
/* special cases */
if (out!=NULL && *out!=NULL) {
/* init output state and store the shift sequence */
save_encoding_state(&sc->sc_src_encoding);
save_encoding_state(&sc->sc_dst_encoding);
szrout = 0;
ret = put_state_resetx(&sc->sc_dst_encoding,
*out, *outbytes,
&szrout);
if (ret)
goto err;
if (szrout == (size_t)-2) {
/* too small to store the character */
ret = EINVAL;
goto err;
}
*out += szrout;
*outbytes -= szrout;
} else
/* otherwise, discard the shift sequence */
init_encoding_state(&sc->sc_dst_encoding);
init_encoding_state(&sc->sc_src_encoding);
*invalids = 0;
return 0;
}
/* normal case */
for (;;) {
if (*inbytes==0) {
ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
if (state == _STDENC_SDGEN_INITIAL ||
state == _STDENC_SDGEN_STABLE)
break;
}
/* save the encoding states for the error recovery */
save_encoding_state(&sc->sc_src_encoding);
save_encoding_state(&sc->sc_dst_encoding);
/* mb -> csid/index */
tmpin = *in;
szrin = szrout = 0;
ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx,
&tmpin, *inbytes, &szrin);
if (ret)
goto err;
if (szrin == (size_t)-2) {
/* incompleted character */
ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
if (ret) {
ret = EINVAL;
goto err;
}
switch (state) {
case _STDENC_SDGEN_INITIAL:
case _STDENC_SDGEN_STABLE:
/* fetch shift sequences only. */
goto next;
}
ret = EINVAL;
goto err;
}
/* convert the character */
ret = do_conv(is, sc, &csid, &idx);
if (ret) {
if (ret == E_NO_CORRESPONDING_CHAR) {
inval++;
szrout = 0;
if ((flags&_CITRUS_ICONV_F_HIDE_INVALID)==0 &&
is->is_use_invalid) {
ret = wctombx(&sc->sc_dst_encoding,
*out, *outbytes,
is->is_invalid,
&szrout);
if (ret)
goto err;
}
goto next;
} else {
goto err;
}
}
/* csid/index -> mb */
ret = cstombx(&sc->sc_dst_encoding,
*out, *outbytes, csid, idx, &szrout);
if (ret)
goto err;
next:
_DIAGASSERT(*inbytes>=szrin && *outbytes>=szrout);
*inbytes -= tmpin-*in; /* szrin is insufficient on \0. */
*in = tmpin;
*outbytes -= szrout;
*out += szrout;
}
*invalids = inval;
return 0;
err:
restore_encoding_state(&sc->sc_src_encoding);
restore_encoding_state(&sc->sc_dst_encoding);
err_norestore:
*invalids = inval;
return ret;
}