Add support for multibyte charsets in the "tolower" and "toupper" awk
functions. Code contributed by Aleksey Cheusov in PR#36394, and slightly tweaked for closer-to-KNF conventions by me. Also slightly improved checks of error returns.
This commit is contained in:
parent
c6b19d4d1d
commit
50d663aa6a
1
dist/nawk/proto.h
vendored
1
dist/nawk/proto.h
vendored
@ -112,6 +112,7 @@ extern double getfval(Cell *);
|
||||
extern char *getsval(Cell *);
|
||||
extern char *getpssval(Cell *); /* for print */
|
||||
extern char *tostring(const char *);
|
||||
extern char *tostringN(const char *, size_t n);
|
||||
extern char *qstring(const char *, int);
|
||||
|
||||
extern void recinit(unsigned int);
|
||||
|
77
dist/nawk/run.c
vendored
77
dist/nawk/run.c
vendored
@ -25,6 +25,8 @@ THIS SOFTWARE.
|
||||
#define DEBUG
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include <wchar.h>
|
||||
#include <wctype.h>
|
||||
#include <setjmp.h>
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
@ -1461,12 +1463,71 @@ Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */
|
||||
|
||||
void flush_all(void);
|
||||
|
||||
static char *nawk_toXXX(const char *s,
|
||||
int (*fun_c)(int),
|
||||
wint_t (*fun_wc)(wint_t))
|
||||
{
|
||||
char *buf = NULL;
|
||||
char *pbuf = NULL;
|
||||
const char *ps = NULL;
|
||||
size_t n = 0;
|
||||
mbstate_t mbs, mbs2;
|
||||
wchar_t wc;
|
||||
size_t sz = MB_CUR_MAX;
|
||||
|
||||
if (sz == 1) {
|
||||
buf = tostring(s);
|
||||
|
||||
for (pbuf = buf; *pbuf; pbuf++)
|
||||
*pbuf = fun_c((uschar)*pbuf);
|
||||
|
||||
return buf;
|
||||
} else {
|
||||
/* upper/lower character may be shorter/longer */
|
||||
buf = tostringN(s, strlen(s) * sz + 1);
|
||||
|
||||
memset(&mbs, 0, sizeof(mbs));
|
||||
memset(&mbs2, 0, sizeof(mbs2));
|
||||
|
||||
ps = s;
|
||||
pbuf = buf;
|
||||
while (n = mbrtowc(&wc, ps, sz, &mbs),
|
||||
n > 0 && n != (size_t)-1 && n != (size_t)-2)
|
||||
{
|
||||
ps += n;
|
||||
|
||||
n = wcrtomb(pbuf, fun_wc(wc), &mbs2);
|
||||
if (n == (size_t)-1 || n == (size_t)-2)
|
||||
FATAL("illegal wide character %s", s);
|
||||
|
||||
pbuf += n;
|
||||
}
|
||||
|
||||
*pbuf = 0;
|
||||
|
||||
if (n)
|
||||
FATAL("illegal byte sequence %s", s);
|
||||
|
||||
return buf;
|
||||
}
|
||||
}
|
||||
|
||||
static char *nawk_toupper(const char *s)
|
||||
{
|
||||
return nawk_toXXX(s, toupper, towupper);
|
||||
}
|
||||
|
||||
static char *nawk_tolower(const char *s)
|
||||
{
|
||||
return nawk_toXXX(s, tolower, towlower);
|
||||
}
|
||||
|
||||
Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */
|
||||
{
|
||||
Cell *x, *y;
|
||||
Awkfloat u;
|
||||
int t, sz;
|
||||
char *p, *buf, *fmt;
|
||||
char *buf, *fmt;
|
||||
Node *nextarg;
|
||||
FILE *fp;
|
||||
time_t tv;
|
||||
@ -1522,16 +1583,10 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
|
||||
break;
|
||||
case FTOUPPER:
|
||||
case FTOLOWER:
|
||||
buf = tostring(getsval(x));
|
||||
if (t == FTOUPPER) {
|
||||
for (p = buf; *p; p++)
|
||||
if (islower((uschar) *p))
|
||||
*p = toupper((uschar)*p);
|
||||
} else {
|
||||
for (p = buf; *p; p++)
|
||||
if (isupper((uschar) *p))
|
||||
*p = tolower((uschar)*p);
|
||||
}
|
||||
if (t == FTOUPPER)
|
||||
buf = nawk_toupper(getsval(x));
|
||||
else
|
||||
buf = nawk_tolower(getsval(x));
|
||||
tempfree(x);
|
||||
x = gettemp();
|
||||
setsval(x, buf);
|
||||
|
11
dist/nawk/tran.c
vendored
11
dist/nawk/tran.c
vendored
@ -411,6 +411,17 @@ char *tostring(const char *s) /* make a copy of string s */
|
||||
return(p);
|
||||
}
|
||||
|
||||
char *tostringN(const char *s, size_t n) /* make a copy of string s */
|
||||
{
|
||||
char *p;
|
||||
|
||||
p = malloc(n);
|
||||
if (p == NULL)
|
||||
FATAL("out of space in tostring on %s", s);
|
||||
strcpy(p, s);
|
||||
return(p);
|
||||
}
|
||||
|
||||
char *qstring(const char *is, int delim) /* collect string up to next delim */
|
||||
{
|
||||
const char *os = is;
|
||||
|
Loading…
Reference in New Issue
Block a user