560 lines
12 KiB
C
560 lines
12 KiB
C
/* $NetBSD: chrtbl.c,v 1.3 1997/10/19 09:23:29 mrg Exp $ */
|
|
|
|
/*
|
|
* Copyright (c) 1997 Christos Zoulas. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
* must display the following acknowledgement:
|
|
* This product includes software developed by Christos Zoulas.
|
|
* 4. The name of the author may not be used to endorse or promote products
|
|
* derived from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <ctype.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <err.h>
|
|
|
|
#include "ctypeio.h"
|
|
|
|
struct chartbl {
|
|
size_t maxchar;
|
|
char *ctypefilename;
|
|
unsigned char *ctype;
|
|
unsigned short *uptab;
|
|
unsigned short *lotab;
|
|
char *numericfilename;
|
|
unsigned char decimal_point;
|
|
unsigned char thousands_sep;
|
|
};
|
|
|
|
static void usage __P((void));
|
|
static int numeric __P((struct chartbl *, const char *, int, char *, size_t));
|
|
static int cswidth __P((struct chartbl *, const char *, int, char *, size_t));
|
|
static int setfilename __P((struct chartbl *, const char *, int, char *,
|
|
size_t));
|
|
static int addattr __P((struct chartbl *, const char *, int, char *, size_t));
|
|
static int uplow __P((struct chartbl *, const char *, int, char *, size_t));
|
|
static void printctype __P((FILE *, unsigned int));
|
|
static int output_ascii __P((const char *, const struct chartbl *));
|
|
static int output_binary __P((const struct chartbl *));
|
|
static char *getline __P((FILE *, size_t *, size_t *));
|
|
|
|
int main __P((int, char *[]));
|
|
|
|
static const struct toklist {
|
|
char *name;
|
|
int (*func) __P((struct chartbl *, const char *, int arg,
|
|
char *, size_t lno));
|
|
int arg;
|
|
} tokens[] = {
|
|
{ "LC_CTYPE", setfilename, 0 },
|
|
{ "isupper", addattr, _U },
|
|
{ "islower", addattr, _L },
|
|
{ "isdigit", addattr, _N },
|
|
{ "isspace", addattr, _S },
|
|
{ "ispunct", addattr, _P },
|
|
{ "iscntrl", addattr, _C },
|
|
{ "isblank", addattr, _B },
|
|
{ "isxdigit", addattr, _X },
|
|
{ "ul", uplow, 0 },
|
|
{ "cswidth", cswidth, 0 },
|
|
{ "LC_NUMERIC", setfilename, 1 },
|
|
{ "decimal_point", numeric, 0 },
|
|
{ "thousands_sep", numeric, 0 },
|
|
{ NULL, NULL, 0 }
|
|
};
|
|
|
|
/* usage():
|
|
* Print a usage message and exit
|
|
*/
|
|
static void
|
|
usage()
|
|
{
|
|
extern char *__progname;
|
|
|
|
(void) fprintf(stderr, "Usage: %s [-o <filename>] <description>\n",
|
|
__progname);
|
|
exit(1);
|
|
}
|
|
|
|
|
|
/* numeric():
|
|
* Parse a decimal_point or thousands_sep line
|
|
*/
|
|
static int
|
|
numeric(cs, token, arg, line, lnum)
|
|
struct chartbl *cs;
|
|
const char *token;
|
|
int arg;
|
|
char *line;
|
|
size_t lnum;
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
|
|
/* cswidth():
|
|
* Parse a cswidth line. This is of the form:
|
|
* cswidth: <n1>:<s1>,<n2>:<s2>,<n3>:<s3>
|
|
* Where:
|
|
* n1,n2,n3: byte widths of the supplementary codes 1,2,3
|
|
* s1,s2,s3: screen widths " " "
|
|
*/
|
|
static int
|
|
cswidth(cs, token, arg, line, lnum)
|
|
struct chartbl *cs;
|
|
const char *token;
|
|
int arg;
|
|
char *line;
|
|
size_t lnum;
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
/* setfilename():
|
|
* Set the output file name for LC_CTYPE or LC_NUMERIC
|
|
*/
|
|
static int
|
|
setfilename(cs, token, arg, line, lnum)
|
|
struct chartbl *cs;
|
|
const char *token;
|
|
int arg;
|
|
char *line;
|
|
size_t lnum;
|
|
{
|
|
char *p = strtok(line, " \t");
|
|
|
|
if (p == NULL || *p == '\0')
|
|
return 0;
|
|
|
|
if ((p = strdup(p)) == NULL)
|
|
err(1, "Out of memory at line %lu", (u_long)lnum);
|
|
|
|
switch (arg) {
|
|
case 0:
|
|
cs->ctypefilename = p;
|
|
return 0;
|
|
case 1:
|
|
cs->numericfilename = p;
|
|
return 0;
|
|
default:
|
|
warn("%s: Bad filename argument %d at line %lu", token, arg,
|
|
(u_long)lnum);
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
|
|
/* addattr():
|
|
* Parse a character attribute line
|
|
* The line is of the form <attribute>: <char> | <char> - <char>
|
|
*/
|
|
static int
|
|
addattr(cs, token, arg, line, lnum)
|
|
struct chartbl *cs;
|
|
const char *token;
|
|
int arg;
|
|
char *line;
|
|
size_t lnum;
|
|
{
|
|
static const char sep[] = "\t ";
|
|
size_t b = 0, e = 0, n;
|
|
int st = 0;
|
|
char *ptr, *ep;
|
|
|
|
for (ptr = strtok(line, sep); ptr; ptr = strtok(NULL, sep)) {
|
|
if (strcmp(ptr, "-") == 0) {
|
|
if (st++ == 0)
|
|
goto badrange;
|
|
continue;
|
|
}
|
|
|
|
n = (size_t) strtoul(ptr, &ep, 0);
|
|
if (ptr == ep || *ep) {
|
|
warnx("%s: Bad number `%s' at line %lu", token,
|
|
ptr, (u_long)lnum);
|
|
return 1;
|
|
}
|
|
switch (++st) {
|
|
case 1:
|
|
b = n;
|
|
continue;
|
|
case 2:
|
|
if (b > cs->maxchar) {
|
|
n = b;
|
|
goto oorange;
|
|
}
|
|
cs->ctype[b+1] |= arg;
|
|
st = 1;
|
|
b = n;
|
|
continue;
|
|
case 3:
|
|
e = n;
|
|
if (e > cs->maxchar) {
|
|
n = e;
|
|
goto oorange;
|
|
}
|
|
for (n = b; n <= e; n++)
|
|
cs->ctype[n+1] |= arg;
|
|
st = 0;
|
|
continue;
|
|
default:
|
|
goto badstate;
|
|
}
|
|
}
|
|
switch (st) {
|
|
case 0:
|
|
return 0;
|
|
case 1:
|
|
if (b > cs->maxchar) {
|
|
n = b;
|
|
goto oorange;
|
|
}
|
|
cs->ctype[b+1] |= arg;
|
|
return 0;
|
|
case 2:
|
|
goto badrange;
|
|
default:
|
|
goto badstate;
|
|
}
|
|
|
|
oorange:
|
|
warnx("%s: Character %lu out of range at line %lu", token, (u_long)n,
|
|
(u_long)lnum);
|
|
return 1;
|
|
badstate:
|
|
warnx("%s: Unexpected state %d at line %lu", token, st,
|
|
(u_long)lnum);
|
|
return 1;
|
|
badrange:
|
|
warnx("%s: Missing %s range at line %lu", token,
|
|
st == 1 ? "begin" : "end", (u_long)lnum);
|
|
return 1;
|
|
}
|
|
|
|
|
|
/* uplow():
|
|
* Parse an upper<->lower case transformation. The format of the line
|
|
* is ul <upper lower> ...
|
|
*/
|
|
static int
|
|
uplow(cs, token, arg, line, lnum)
|
|
struct chartbl *cs;
|
|
const char *token;
|
|
int arg;
|
|
char *line;
|
|
size_t lnum;
|
|
{
|
|
size_t lo, up;
|
|
char *p, *ep;
|
|
|
|
for (p = line;;) {
|
|
while (*p && isspace((u_char) *p))
|
|
p++;
|
|
switch (*p) {
|
|
case '\0':
|
|
return 0;
|
|
case '<':
|
|
p++;
|
|
break;
|
|
default:
|
|
goto badtoken;
|
|
}
|
|
while (*p && isspace((u_char) *p))
|
|
p++;
|
|
if (*p == '\0')
|
|
goto badtoken;
|
|
lo = (size_t) strtol(p, &ep, 0);
|
|
if (p == ep || !isspace((u_char) *ep))
|
|
goto badtoken;
|
|
p = ep + 1;
|
|
while (*p && isspace((u_char) *p))
|
|
p++;
|
|
up = (size_t) strtol(p, &ep, 0);
|
|
if (p == ep)
|
|
goto badtoken;
|
|
if (lo > cs->maxchar)
|
|
goto oorange;
|
|
if (up > cs->maxchar) {
|
|
lo = up;
|
|
goto oorange;
|
|
}
|
|
cs->lotab[up + 1] = lo;
|
|
cs->uptab[lo + 1] = up;
|
|
p = ep;
|
|
switch (*ep) {
|
|
case '\0':
|
|
return 0;
|
|
case ' ':
|
|
case '\t':
|
|
case '>':
|
|
p++;
|
|
break;
|
|
default:
|
|
goto badtoken;
|
|
}
|
|
}
|
|
|
|
badtoken:
|
|
warnx("%s: Bad token `%s' at line %lu", token, p, (u_long)lnum);
|
|
return 1;
|
|
oorange:
|
|
warnx("%s: Out of range character %lx at line %lu", token, (u_long)lo,
|
|
(u_long)lnum);
|
|
return 1;
|
|
}
|
|
|
|
|
|
/* printctype():
|
|
* Symbolically print an ascii character.
|
|
*/
|
|
static void
|
|
printctype(fp, ct)
|
|
FILE *fp;
|
|
unsigned int ct;
|
|
{
|
|
int did = 0;
|
|
|
|
#define DO(a) if (__CONCAT(_,a) & ct) { \
|
|
if (did) \
|
|
(void) fputc('|', fp); \
|
|
did = 1; \
|
|
(void) fputc('_', fp); \
|
|
(void) fputs(__STRING(a), fp); \
|
|
}
|
|
DO(U)
|
|
DO(L)
|
|
DO(N)
|
|
DO(S)
|
|
DO(P)
|
|
DO(C)
|
|
DO(B)
|
|
DO(X)
|
|
if (!did)
|
|
(void) fputc('0', fp);
|
|
}
|
|
|
|
|
|
/* output_ascii():
|
|
* Print a `c' symbolic description of the character set
|
|
*/
|
|
static int
|
|
output_ascii(fn, ct)
|
|
const char *fn;
|
|
const struct chartbl *ct;
|
|
{
|
|
int i;
|
|
FILE *fp;
|
|
|
|
if ((fp = fopen(fn, "w")) == NULL) {
|
|
warn("Cannot open `%s'", fn);
|
|
return 1;
|
|
}
|
|
|
|
(void) fprintf(fp, "/* Automatically generated file; do not edit */\n");
|
|
(void) fprintf(fp, "#include <ctype.h>\n");
|
|
(void) fprintf(fp, "unsigned char _ctype_[] = { 0");
|
|
for (i = 1; i <= ct->maxchar; i++) {
|
|
if (((i - 1) & 7) == 0)
|
|
(void) fputs(",\n\t", fp);
|
|
printctype(fp, ct->ctype[i]);
|
|
if ((i & 7) != 0)
|
|
(void) fputs(",\t", fp);
|
|
}
|
|
(void) fprintf(fp, "\n};\n");
|
|
|
|
(void) fprintf(fp, "short _tolower_tab_[] = { -1");
|
|
for (i = 1; i <= ct->maxchar; i++) {
|
|
if (((i - 1) & 7) == 0)
|
|
(void) fputs(",\n\t", fp);
|
|
(void) fprintf(fp, "0x%x", ct->lotab[i]);
|
|
if ((i & 7) != 0)
|
|
(void) fputs(",\t", fp);
|
|
}
|
|
(void) fprintf(fp, "\n};\n");
|
|
|
|
(void) fprintf(fp, "short _toupper_tab_[] = { -1");
|
|
for (i = 1; i <= ct->maxchar; i++) {
|
|
if (((i - 1) & 7) == 0)
|
|
(void) fputs(",\n\t", fp);
|
|
(void) fprintf(fp, "0x%x", ct->uptab[i]);
|
|
if ((i & 7) != 0)
|
|
(void) fputs(",\t", fp);
|
|
}
|
|
(void) fprintf(fp, "\n};\n");
|
|
(void) fclose(fp);
|
|
return 0;
|
|
}
|
|
|
|
|
|
/* output_binary():
|
|
* Print a binary description of the requested character set.
|
|
*/
|
|
static int
|
|
output_binary(ct)
|
|
const struct chartbl *ct;
|
|
{
|
|
int error = 0;
|
|
|
|
if (ct->ctypefilename != NULL) {
|
|
if (!__savectype(ct->ctypefilename, ct->ctype, ct->lotab,
|
|
ct->uptab))
|
|
err(1, "Cannot creating/writing `%s'",
|
|
ct->ctypefilename);
|
|
}
|
|
else {
|
|
warnx("No output file for LC_CTYPE specified");
|
|
error |= 1;
|
|
}
|
|
return error;
|
|
}
|
|
|
|
|
|
/* getline():
|
|
* Read a line from a file parsing continuations ending in \
|
|
* and eliminating trailing newlines.
|
|
*/
|
|
static char *
|
|
getline(fp, size, lineno)
|
|
FILE *fp;
|
|
size_t *size, *lineno;
|
|
{
|
|
size_t s, len = 0;
|
|
char *buf = NULL;
|
|
char *ptr;
|
|
int cnt = 1;
|
|
|
|
while (cnt) {
|
|
if ((ptr = fgetln(fp, &s)) == NULL) {
|
|
*size = len;
|
|
return buf;
|
|
}
|
|
/* the newline may be missing at EOF */
|
|
if (ptr[s - 1] == '\n') {
|
|
s--; /* forget newline */
|
|
*lineno += 1;
|
|
}
|
|
if (s && (cnt = (ptr[s - 1] == '\\'))) /* check for \\ */
|
|
s--; /* forget \\ */
|
|
|
|
buf = realloc(buf, len + s + 1);
|
|
if (buf == NULL)
|
|
err(1, "Out of memory");
|
|
(void) memcpy(buf + len, ptr, s);
|
|
len += s;
|
|
buf[len] = '\0';
|
|
}
|
|
*size = len;
|
|
return buf;
|
|
}
|
|
|
|
|
|
int
|
|
main(argc, argv)
|
|
int argc;
|
|
char *argv[];
|
|
{
|
|
size_t lnum, size;
|
|
FILE *fp;
|
|
char *line, *token, *p;
|
|
const struct toklist *t;
|
|
struct chartbl ct;
|
|
int c;
|
|
char *ifname, *ofname = NULL;
|
|
int error = 0;
|
|
|
|
while ((c = getopt(argc, argv, "o:")) != -1)
|
|
switch (c) {
|
|
case 'o':
|
|
ofname = optarg;
|
|
break;
|
|
default:
|
|
usage();
|
|
break;
|
|
}
|
|
|
|
if (argc - 1 != optind)
|
|
usage();
|
|
|
|
ifname = argv[optind];
|
|
|
|
if ((fp = fopen(ifname, "r")) == NULL)
|
|
err(1, "Cannot open `%s'", ifname);
|
|
|
|
ct.maxchar = 256;
|
|
ct.ctype = malloc(sizeof(ct.ctype[0]) * (ct.maxchar + 1));
|
|
ct.uptab = malloc(sizeof(ct.uptab[0]) * (ct.maxchar + 1));
|
|
ct.lotab = malloc(sizeof(ct.lotab[0]) * (ct.maxchar + 1));
|
|
ct.ctypefilename = NULL;
|
|
ct.numericfilename = NULL;
|
|
ct.decimal_point = '.';
|
|
ct.thousands_sep = ',';
|
|
|
|
if (ct.ctype == NULL || ct.uptab == NULL || ct.lotab == NULL)
|
|
err(1, "Out of memory");
|
|
|
|
(void) memset(ct.ctype, 0, sizeof(ct.ctype[0]) * (ct.maxchar * 1));
|
|
(void) memset(ct.uptab, 0, sizeof(ct.uptab[0]) * (ct.maxchar * 1));
|
|
(void) memset(ct.lotab, 0, sizeof(ct.lotab[0]) * (ct.maxchar * 1));
|
|
|
|
for (lnum = 1; (line = getline(fp, &size, &lnum)) != NULL; free(line)) {
|
|
if (*line == '#')
|
|
continue;
|
|
for (token = line; *token && isspace((u_char) *token); token++)
|
|
continue;
|
|
if (*token == '\0')
|
|
continue;
|
|
for (p = token; *p && !isspace((u_char) *p); p++)
|
|
continue;
|
|
if (*p == '\0')
|
|
continue;
|
|
*p = '\0';
|
|
for (p++; *p && isspace((u_char) *p); p++)
|
|
continue;
|
|
for (t = tokens; t->name != NULL; t++)
|
|
if (strcmp(t->name, token) == 0)
|
|
break;
|
|
if (t->name == NULL) {
|
|
warnx("Unknown token %s at line %lu", token,
|
|
(u_long)lnum);
|
|
error |= 1;
|
|
continue;
|
|
}
|
|
error |= (*t->func)(&ct, token, t->arg, p, lnum);
|
|
}
|
|
(void) fclose(fp);
|
|
|
|
for (c = 1; c <= ct.maxchar; c++) {
|
|
if (ct.uptab[c] == 0)
|
|
ct.uptab[c] = c - 1;
|
|
if (ct.lotab[c] == 0)
|
|
ct.lotab[c] = c - 1;
|
|
}
|
|
|
|
if (ofname != NULL)
|
|
error |= output_ascii(ofname, &ct);
|
|
error |= output_binary(&ct);
|
|
return error;
|
|
}
|