Add support for localized character sets (a.k.a. LC_CTYPE).

Thanks go to Matthias Scheler <tron@lyssa.owl.de> for contributing his initial
work in PR/3592, and to Christos Zoulas for refining it!
This commit is contained in:
kleink 1997-06-02 09:52:36 +00:00
parent 6aa07ba92c
commit dbda158f1f
7 changed files with 287 additions and 92 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: ctype.h,v 1.14 1994/10/26 00:55:47 cgd Exp $ */
/* $NetBSD: ctype.h,v 1.15 1997/06/02 09:52:36 kleink Exp $ */
/*
* Copyright (c) 1989 The Regents of the University of California.
@ -53,10 +53,11 @@
#define _X 0x40
#define _B 0x80
extern const char *_ctype_;
extern const unsigned char *_ctype_;
extern const short *_tolower_tab_;
extern const short *_toupper_tab_;
__BEGIN_DECLS
extern int isalnum __P ((int));
extern int isalpha __P ((int));
@ -81,23 +82,23 @@ extern int _toupper __P ((int));
#endif
__END_DECLS
#define isdigit(c) ((_ctype_ + 1)[c] & _N)
#define islower(c) ((_ctype_ + 1)[c] & _L)
#define isspace(c) ((_ctype_ + 1)[c] & _S)
#define ispunct(c) ((_ctype_ + 1)[c] & _P)
#define isupper(c) ((_ctype_ + 1)[c] & _U)
#define isalpha(c) ((_ctype_ + 1)[c] & (_U|_L))
#define isxdigit(c) ((_ctype_ + 1)[c] & (_N|_X))
#define isalnum(c) ((_ctype_ + 1)[c] & (_U|_L|_N))
#define isprint(c) ((_ctype_ + 1)[c] & (_P|_U|_L|_N|_B))
#define isgraph(c) ((_ctype_ + 1)[c] & (_P|_U|_L|_N))
#define iscntrl(c) ((_ctype_ + 1)[c] & _C)
#define tolower(c) ((_tolower_tab_ + 1)[c])
#define toupper(c) ((_toupper_tab_ + 1)[c])
#define isdigit(c) ((int)((_ctype_ + 1)[c] & _N))
#define islower(c) ((int)((_ctype_ + 1)[c] & _L))
#define isspace(c) ((int)((_ctype_ + 1)[c] & _S))
#define ispunct(c) ((int)((_ctype_ + 1)[c] & _P))
#define isupper(c) ((int)((_ctype_ + 1)[c] & _U))
#define isalpha(c) ((int)((_ctype_ + 1)[c] & (_U|_L)))
#define isxdigit(c) ((int)((_ctype_ + 1)[c] & (_N|_X)))
#define isalnum(c) ((int)((_ctype_ + 1)[c] & (_U|_L|_N)))
#define isprint(c) ((int)((_ctype_ + 1)[c] & (_P|_U|_L|_N|_B)))
#define isgraph(c) ((int)((_ctype_ + 1)[c] & (_P|_U|_L|_N)))
#define iscntrl(c) ((int)((_ctype_ + 1)[c] & _C))
#define tolower(c) ((int)((_tolower_tab_ + 1)[c]))
#define toupper(c) ((int)((_toupper_tab_ + 1)[c]))
#if !defined(_ANSI_SOURCE) && !defined (_POSIX_SOURCE)
#if notyet
#define isblank(c) ((_ctype_ + 1)[c] & _B)
#define isblank(c) ((int)((_ctype_ + 1)[c] & _B))
#endif
#define isascii(c) ((unsigned)(c) <= 0177)
#define toascii(c) ((c) & 0177)
@ -105,4 +106,15 @@ __END_DECLS
#define _toupper(c) ((c) - 'a' + 'A')
#endif
#ifdef _CTYPE_PRIVATE
#define _CTYPE_NUM_CHARS (1<<(sizeof(char)<<3))
#define _CTYPE_ID "BSDCTYPE"
#define _CTYPE_REV 2
extern const u_int8_t _C_ctype_[];
extern const int16_t _C_toupper_[];
extern const int16_t _C_tolower_[];
#endif
#endif /* !_CTYPE_H_ */

View File

@ -1,4 +1,4 @@
/* $NetBSD: ctype_.c,v 1.12 1995/02/25 15:39:28 cgd Exp $ */
/* $NetBSD: ctype_.c,v 1.13 1997/06/02 09:52:42 kleink Exp $ */
/*
* Copyright (c) 1989 The Regents of the University of California.
@ -42,13 +42,16 @@
#if 0
/*static char *sccsid = "from: @(#)ctype_.c 5.6 (Berkeley) 6/1/90";*/
#else
static char rcsid[] = "$NetBSD: ctype_.c,v 1.12 1995/02/25 15:39:28 cgd Exp $";
static char rcsid[] = "$NetBSD: ctype_.c,v 1.13 1997/06/02 09:52:42 kleink Exp $";
#endif
#endif /* LIBC_SCCS and not lint */
#define _CTYPE_PRIVATE
#include <sys/types.h>
#include <ctype.h>
const char _C_ctype_[1 + 256] = {
const unsigned char _C_ctype_[1 + _CTYPE_NUM_CHARS] = {
0,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C|_S, _C|_S, _C|_S, _C|_S, _C|_S, _C, _C,
@ -68,4 +71,4 @@ const char _C_ctype_[1 + 256] = {
_L, _L, _L, _P, _P, _P, _P, _C
};
const char *_ctype_ = _C_ctype_;
const unsigned char *_ctype_ = _C_ctype_;

View File

@ -1,10 +1,10 @@
# from: @(#)Makefile.inc 5.1 (Berkeley) 2/18/91
# $NetBSD: Makefile.inc,v 1.12 1997/04/29 16:40:13 kleink Exp $
# $NetBSD: Makefile.inc,v 1.13 1997/06/02 09:52:45 kleink Exp $
# locale sources
.PATH: ${.CURDIR}/arch/${MACHINE_ARCH}/locale ${.CURDIR}/locale
SRCS+= _def_messages.c _def_monetary.c _def_numeric.c _def_time.c \
localeconv.c nl_langinfo.c setlocale.c
ctypeio.c localeconv.c nl_langinfo.c setlocale.c
MAN+= nl_langinfo.3

162
lib/libc/locale/ctypeio.c Normal file
View File

@ -0,0 +1,162 @@
/* $NetBSD: ctypeio.c,v 1.1 1997/06/02 09:52:47 kleink Exp $ */
/*
* Copyright (c) 1997 Christos Zoulas. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Christos Zoulas.
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#define _CTYPE_PRIVATE
#include <ctype.h>
#include "ctypeio.h"
int
__loadctype(name)
const char *name;
{
FILE *fp;
char id[sizeof(_CTYPE_ID) - 1];
u_int32_t i, len;
unsigned char *new_ctype = NULL;
short *new_toupper = NULL, *new_tolower = NULL;
if ((fp = fopen(name, "r")) == NULL)
return 0;
if (fread(id, sizeof(id), 1, fp) != 1)
goto bad;
if (memcmp(id, _CTYPE_ID, sizeof(id)) != 0)
goto bad;
if (fread(&i, sizeof(u_int32_t), 1, fp) != 1)
goto bad;
if ((i = ntohl(i)) != _CTYPE_REV)
goto bad;
if (fread(&len, sizeof(u_int32_t), 1, fp) != 1)
goto bad;
if ((len = ntohl(len)) != _CTYPE_NUM_CHARS)
goto bad;
if ((new_ctype = malloc(sizeof(u_int8_t) * (1 + len))) == NULL)
goto bad;
new_ctype[0] = 0;
if (fread(&new_ctype[1], sizeof(u_int8_t), len, fp) != len)
goto bad;
if ((new_toupper = malloc(sizeof(int16_t) * (1 + len))) == NULL)
goto bad;
new_toupper[0] = EOF;
if (fread(&new_toupper[1], sizeof(int16_t), len, fp) != len)
goto bad;
if ((new_tolower = malloc(sizeof(int16_t) * (1 + len))) == NULL)
goto bad;
new_tolower[0] = EOF;
if (fread(&new_tolower[1], sizeof(int16_t), len, fp) != len)
goto bad;
#if BYTE_ORDER == LITTLE_ENDIAN
for (i = 1; i <= len; i++) {
new_toupper[i] = ntohs(new_toupper[i]);
new_tolower[i] = ntohs(new_tolower[i]);
}
#endif
(void) fclose(fp);
if (_ctype_ != _C_ctype_)
free((void *) _ctype_);
_ctype_ = new_ctype;
if (_toupper_tab_ != _C_toupper_)
free((void *) _toupper_tab_);
_toupper_tab_ = new_toupper;
if (_tolower_tab_ != _C_tolower_)
free((void *) _tolower_tab_);
_tolower_tab_ = new_tolower;
return 1;
bad:
free(new_tolower);
free(new_toupper);
free(new_ctype);
(void) fclose(fp);
return 0;
}
int
__savectype(name, new_ctype, new_toupper, new_tolower)
const char *name;
unsigned char *new_ctype;
short *new_toupper, *new_tolower;
{
FILE *fp;
u_int32_t i, len = _CTYPE_NUM_CHARS;
if ((fp = fopen(name, "w")) == NULL)
return 0;
if (fwrite(_CTYPE_ID, sizeof(_CTYPE_ID) - 1, 1, fp) != 1)
goto bad;
i = htonl(_CTYPE_REV);
if (fwrite(&i, sizeof(u_int32_t), 1, fp) != 1)
goto bad;
i = htonl(len);
if (fwrite(&i, sizeof(u_int32_t), 1, fp) != 1)
goto bad;
if (fwrite(&new_ctype[1], sizeof(u_int8_t), len, fp) != len)
goto bad;
#if BYTE_ORDER == LITTLE_ENDIAN
for (i = 1; i <= len; i++) {
new_toupper[i] = htons(new_toupper[i]);
new_tolower[i] = htons(new_tolower[i]);
}
#endif
if (fwrite(&new_toupper[1], sizeof(int16_t), len, fp) != len)
goto bad;
if (fwrite(&new_tolower[1], sizeof(int16_t), len, fp) != len)
goto bad;
(void) fclose(fp);
return 1;
bad:
(void) fclose(fp);
return 0;
}

35
lib/libc/locale/ctypeio.h Normal file
View File

@ -0,0 +1,35 @@
/* $NetBSD: ctypeio.h,v 1.1 1997/06/02 09:52:48 kleink Exp $ */
/*
* Copyright (c) 1997 Christos Zoulas. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Christos Zoulas.
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
__BEGIN_DECLS
int __loadctype __P((const char *));
int __savectype __P((const char *, unsigned char *, short *, short *));
__END_DECLS

View File

@ -1,4 +1,4 @@
/* $NetBSD: setlocale.c,v 1.10 1997/04/29 16:40:19 kleink Exp $ */
/* $NetBSD: setlocale.c,v 1.11 1997/06/02 09:52:50 kleink Exp $ */
/*
* Copyright (c) 1991, 1993
@ -40,17 +40,21 @@
#if 0
static char sccsid[] = "@(#)setlocale.c 8.1 (Berkeley) 7/4/93";
#else
static char rcsid[] = "$NetBSD: setlocale.c,v 1.10 1997/04/29 16:40:19 kleink Exp $";
static char rcsid[] = "$NetBSD: setlocale.c,v 1.11 1997/06/02 09:52:50 kleink Exp $";
#endif
#endif /* LIBC_SCCS and not lint */
#define _CTYPE_PRIVATE
#include <sys/localedef.h>
#include <locale.h>
#include <ctype.h>
#include <limits.h>
#include <locale.h>
#include <paths.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <paths.h>
#include "ctypeio.h"
/*
* Category names for getenv()
@ -202,28 +206,38 @@ currentlocale()
return (current_locale_string);
}
static char *
char *
loadlocale(category)
int category;
{
char name[PATH_MAX];
if (strcmp(new_categories[category],
current_categories[category]) == 0)
if (strcmp(new_categories[category], current_categories[category]) == 0)
return (current_categories[category]);
if (!strcmp(new_categories[category], "C") ||
!strcmp(new_categories[category], "POSIX")) {
!strcmp(new_categories[category], "POSIX")) {
switch (category) {
case LC_CTYPE:
if (_ctype_ != _C_ctype_) {
free((void *)_ctype_);
_ctype_ = _C_ctype_;
}
if (_toupper_tab_ != _C_toupper_) {
free((void *)_toupper_tab_);
_toupper_tab_ = _C_toupper_;
}
if (_tolower_tab_ != _C_tolower_) {
free((void *)_tolower_tab_);
_tolower_tab_ = _C_tolower_;
}
}
/*
* Some day this will need to reset the locale to the default
* C locale. Since we have no way to change them as of yet,
* there is no need to reset them.
*/
(void)strncpy(current_categories[category],
new_categories[category],
sizeof(current_categories[category]) - 1);
return (current_categories[category]);
return current_categories[category];
}
/*
@ -233,12 +247,22 @@ loadlocale(category)
PathLocale, new_categories[category], categories[category]);
switch (category) {
case LC_CTYPE:
case LC_COLLATE:
case LC_MESSAGES:
case LC_MONETARY:
case LC_NUMERIC:
case LC_TIME:
return (NULL);
case LC_CTYPE:
if (__loadctype(name)) {
(void)strncpy(current_categories[category],
new_categories[category],
sizeof(current_categories[category]) - 1);
return current_categories[category];
}
return NULL;
case LC_COLLATE:
case LC_MESSAGES:
case LC_MONETARY:
case LC_NUMERIC:
case LC_TIME:
return NULL;
}
return NULL;
}

View File

@ -33,65 +33,25 @@
#if defined(LIBC_SCCS) && !defined(lint)
/*static const char sccsid[] = "from: @(#)strcasecmp.c 5.10 (Berkeley) 1/26/91";*/
static char *rcsid = "$Id: strcasecmp.c,v 1.4 1995/06/15 00:07:37 jtc Exp $";
static char *rcsid = "$Id: strcasecmp.c,v 1.5 1997/06/02 09:52:39 kleink Exp $";
#endif /* LIBC_SCCS and not lint */
#include <ctype.h>
#include <string.h>
typedef unsigned char u_char;
/*
* This array is designed for mapping upper and lower case letter
* together for a case independent comparison. The mappings are
* based upon ascii character sequences.
*/
static const u_char charmap[] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
};
int
strcasecmp(s1, s2)
const char *s1, *s2;
{
register const u_char *cm = charmap,
*us1 = (const u_char *)s1,
register const u_char *us1 = (const u_char *)s1,
*us2 = (const u_char *)s2;
while (cm[*us1] == cm[*us2++])
while (tolower(*us1) == tolower(*us2++))
if (*us1++ == '\0')
return (0);
return (cm[*us1] - cm[*--us2]);
return (tolower(*us1) - tolower(*--us2));
}
int
@ -100,13 +60,12 @@ strncasecmp(s1, s2, n)
register size_t n;
{
if (n != 0) {
register const u_char *cm = charmap,
*us1 = (const u_char *)s1,
register const u_char *us1 = (const u_char *)s1,
*us2 = (const u_char *)s2;
do {
if (cm[*us1] != cm[*us2++])
return (cm[*us1] - cm[*--us2]);
if (tolower(*us1) != tolower(*us2++))
return (tolower(*us1) - tolower(*--us2));
if (*us1++ == '\0')
break;
} while (--n != 0);