Add support for multibyte charsets in the "tolower" and "toupper" awk

functions.  Code contributed by Aleksey Cheusov in PR#36394, and slightly
tweaked for closer-to-KNF conventions by me.  Also slightly improved checks
of error returns.
This commit is contained in:
he 2007-10-25 14:44:49 +00:00
parent c6b19d4d1d
commit 50d663aa6a
3 changed files with 78 additions and 11 deletions

1
dist/nawk/proto.h vendored
View File

@ -112,6 +112,7 @@ extern double getfval(Cell *);
extern char *getsval(Cell *);
extern char *getpssval(Cell *); /* for print */
extern char *tostring(const char *);
extern char *tostringN(const char *, size_t n);
extern char *qstring(const char *, int);
extern void recinit(unsigned int);

77
dist/nawk/run.c vendored
View File

@ -25,6 +25,8 @@ THIS SOFTWARE.
#define DEBUG
#include <stdio.h>
#include <ctype.h>
#include <wchar.h>
#include <wctype.h>
#include <setjmp.h>
#include <limits.h>
#include <math.h>
@ -1461,12 +1463,71 @@ Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */
void flush_all(void);
static char *nawk_toXXX(const char *s,
int (*fun_c)(int),
wint_t (*fun_wc)(wint_t))
{
char *buf = NULL;
char *pbuf = NULL;
const char *ps = NULL;
size_t n = 0;
mbstate_t mbs, mbs2;
wchar_t wc;
size_t sz = MB_CUR_MAX;
if (sz == 1) {
buf = tostring(s);
for (pbuf = buf; *pbuf; pbuf++)
*pbuf = fun_c((uschar)*pbuf);
return buf;
} else {
/* upper/lower character may be shorter/longer */
buf = tostringN(s, strlen(s) * sz + 1);
memset(&mbs, 0, sizeof(mbs));
memset(&mbs2, 0, sizeof(mbs2));
ps = s;
pbuf = buf;
while (n = mbrtowc(&wc, ps, sz, &mbs),
n > 0 && n != (size_t)-1 && n != (size_t)-2)
{
ps += n;
n = wcrtomb(pbuf, fun_wc(wc), &mbs2);
if (n == (size_t)-1 || n == (size_t)-2)
FATAL("illegal wide character %s", s);
pbuf += n;
}
*pbuf = 0;
if (n)
FATAL("illegal byte sequence %s", s);
return buf;
}
}
static char *nawk_toupper(const char *s)
{
return nawk_toXXX(s, toupper, towupper);
}
static char *nawk_tolower(const char *s)
{
return nawk_toXXX(s, tolower, towlower);
}
Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */
{
Cell *x, *y;
Awkfloat u;
int t, sz;
char *p, *buf, *fmt;
char *buf, *fmt;
Node *nextarg;
FILE *fp;
time_t tv;
@ -1522,16 +1583,10 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
break;
case FTOUPPER:
case FTOLOWER:
buf = tostring(getsval(x));
if (t == FTOUPPER) {
for (p = buf; *p; p++)
if (islower((uschar) *p))
*p = toupper((uschar)*p);
} else {
for (p = buf; *p; p++)
if (isupper((uschar) *p))
*p = tolower((uschar)*p);
}
if (t == FTOUPPER)
buf = nawk_toupper(getsval(x));
else
buf = nawk_tolower(getsval(x));
tempfree(x);
x = gettemp();
setsval(x, buf);

11
dist/nawk/tran.c vendored
View File

@ -411,6 +411,17 @@ char *tostring(const char *s) /* make a copy of string s */
return(p);
}
char *tostringN(const char *s, size_t n) /* make a copy of string s */
{
char *p;
p = malloc(n);
if (p == NULL)
FATAL("out of space in tostring on %s", s);
strcpy(p, s);
return(p);
}
char *qstring(const char *is, int delim) /* collect string up to next delim */
{
const char *os = is;