Update to Henry's Nov 2, 1993 alpha3.2 release.

This commit is contained in:
jtc 1993-11-11 02:02:57 +00:00
parent 5797eb6fc3
commit 6931099e77
15 changed files with 91 additions and 47 deletions

View File

@ -3,21 +3,16 @@
# internal assertion checking). Take -Dconst= out for an ANSI compiler.
# Do not take -DPOSIX_MISTAKE out. REGCFLAGS isn't important to you (it's
# for my use in some special contexts).
#CFLAGS = -I. -Dconst= -DPOSIX_MISTAKE -DREDEBUG $(REGCFLAGS)
CFLAGS = -I. -DPOSIX_MISTAKE -DREDEBUG $(REGCFLAGS)
# If you have an ANSI compiler, take -o out of MKHFLAGS. If you want
# the Berkeley __P macro, put -b in.
#MKHFLAGS = -o
MKHFLAGS = -b
LDFLAGS =
# If you have an ANSI environment, take limits.h and stdlib.h out of
# HMISSING and take memmove out of SRCMISSING and OBJMISSING.
#HMISSING = limits.h stdlib.h
#SRCMISSING = split.c memmove.c
#OBJMISSING = split.o memmove.o
HMISSING =
SRCMISSING = split.c
OBJMISSING = split.o
@ -118,4 +113,4 @@ cio: $(DTR)
cio $(DTR)
rdf: $(DTR)
rcsdiff -c $(DTR)
rcsdiff -c $(DTR) 2>&1 | p

View File

@ -1,5 +1,5 @@
alpha3.1 release.
Sat Aug 14 01:38:38 EDT 1993
alpha3.2 release.
Tue Nov 2 02:06:08 EST 1993
henry@zoo.toronto.edu
See WHATSNEW for change listing.

View File

@ -1,3 +1,15 @@
New in alpha3.2: Assorted bits of cleanup and portability improvement
(the development base is now a BSDI system using GCC instead of an ancient
Sun system, and the newer compiler exposed some glitches). Fix for a
serious bug that affected REs using many [] (including REG_ICASE REs
because of the way they are implemented), *sometimes*, depending on
memory-allocation patterns. The header-file prototypes no longer name
the parameters, avoiding possible name conflicts. The possibility that
some clot has defined CHAR_MIN as (say) `-128' instead of `(-128)' is
now handled gracefully. "uchar" is no longer used as an internal type
name (too many people have the same idea). Still the same old lousy
performance, alas.
New in alpha3.1: Basically nothing, this release is just a bookkeeping
convenience. Stay tuned.

View File

@ -393,10 +393,12 @@ sopno stopst;
break;
case OLPAREN:
i = OPND(m->g->strip[ss]);
assert(0 < i && i <= m->g->nsub);
m->pmatch[i].rm_so = sp - m->offp;
break;
case ORPAREN:
i = OPND(m->g->strip[ss]);
assert(0 < i && i <= m->g->nsub);
m->pmatch[i].rm_eo = sp - m->offp;
break;
default: /* uh oh */
@ -522,6 +524,7 @@ sopno lev; /* PLUS nesting level */
switch (OP(s)) {
case OBACK_: /* the vilest depths */
i = OPND(s);
assert(0 < i && i <= m->g->nsub);
if (m->pmatch[i].rm_eo == -1)
return(NULL);
assert(m->pmatch[i].rm_so != -1);
@ -582,6 +585,7 @@ sopno lev; /* PLUS nesting level */
break;
case OLPAREN: /* must undo assignment if rest fails */
i = OPND(s);
assert(0 < i && i <= m->g->nsub);
offsave = m->pmatch[i].rm_so;
m->pmatch[i].rm_so = sp - m->offp;
dp = backref(m, sp, stop, ss+1, stopst, lev);
@ -592,6 +596,7 @@ sopno lev; /* PLUS nesting level */
break;
case ORPAREN: /* must undo assignment if rest fails */
i = OPND(s);
assert(0 < i && i <= m->g->nsub);
offsave = m->pmatch[i].rm_eo;
m->pmatch[i].rm_eo = sp - m->offp;
dp = backref(m, sp, stop, ss+1, stopst, lev);

View File

@ -69,7 +69,7 @@ static int never = 0; /* for use in asserts; shuts lint up */
/*
- regcomp - interface for parser and compilation
= extern int regcomp(regex_t *preg, const char *pattern, int cflags);
= extern int regcomp(regex_t *, const char *, int);
= #define REG_BASIC 0000
= #define REG_EXTENDED 0001
= #define REG_ICASE 0002
@ -606,7 +606,7 @@ register struct parse *p;
CHadd(cs, ci);
}
if (cs->multis != NULL)
mccase(cs);
mccase(p, cs);
}
if (invert) {
register int i;
@ -619,7 +619,7 @@ register struct parse *p;
if (p->g->cflags&REG_NEWLINE)
CHsub(cs, '\n');
if (cs->multis != NULL)
mcinvert(cs);
mcinvert(p, cs);
}
assert(cs->multis == NULL); /* xxx */
@ -728,7 +728,7 @@ register cset *cs;
while ((c = *u++) != '\0')
CHadd(cs, c);
for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
MCadd(cs, u);
MCadd(p, cs, u);
}
/*
@ -980,6 +980,7 @@ register struct parse *p;
register size_t nbytes;
register cset *cs;
register size_t css = (size_t)p->g->csetsize;
register int i;
if (no >= p->ncsalloc) { /* need another column of space */
p->ncsalloc += CHAR_BIT;
@ -992,10 +993,14 @@ register struct parse *p;
p->g->sets = (cset *)realloc((char *)p->g->sets,
nc * sizeof(cset));
if (p->g->setbits == NULL)
p->g->setbits = (uchar *)malloc(nbytes);
else
p->g->setbits = (uchar *)realloc((char *)p->g->setbits,
p->g->setbits = (uch *)malloc(nbytes);
else {
p->g->setbits = (uch *)realloc((char *)p->g->setbits,
nbytes);
/* xxx this isn't right if setbits is now NULL */
for (i = 0; i < no; i++)
p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
}
if (p->g->sets != NULL && p->g->setbits != NULL)
(void) memset((char *)p->g->setbits + (nbytes - css),
0, css);
@ -1051,7 +1056,7 @@ freezeset(p, cs)
register struct parse *p;
register cset *cs;
{
register uchar h = cs->hash;
register uch h = cs->hash;
register int i;
register cset *top = &p->g->sets[p->g->ncsets];
register cset *cs2;
@ -1201,13 +1206,14 @@ register char *cp;
/*
- mcinvert - invert the list of collating elements in a cset
== static void mcinvert(register cset *cs);
== static void mcinvert(register struct parse *p, register cset *cs);
*
* This would have to know the set of possibilities. Implementation
* is deferred.
*/
static void
mcinvert(cs)
mcinvert(p, cs)
register struct parse *p;
register cset *cs;
{
assert(cs->multis == NULL); /* xxx */
@ -1215,13 +1221,14 @@ register cset *cs;
/*
- mccase - add case counterparts of the list of collating elements in a cset
== static void mccase(register cset *cs);
== static void mccase(register struct parse *p, register cset *cs);
*
* This would have to know the set of possibilities. Implementation
* is deferred.
*/
static void
mccase(cs)
mccase(p, cs)
register struct parse *p;
register cset *cs;
{
assert(cs->multis == NULL); /* xxx */
@ -1236,7 +1243,7 @@ isinsets(g, c)
register struct re_guts *g;
int c;
{
register uchar *col;
register uch *col;
register int i;
register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
register unsigned uc = (unsigned char)c;
@ -1257,7 +1264,7 @@ register struct re_guts *g;
int c1;
int c2;
{
register uchar *col;
register uch *col;
register int i;
register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
register unsigned uc1 = (unsigned char)c1;
@ -1442,7 +1449,7 @@ register struct parse *p;
register struct re_guts *g;
{
g->nstates = p->slen;
g->strip = (sop *)realloc((sop *)p->strip, p->slen * sizeof(sop));
g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
if (g->strip == NULL) {
SETERROR(REG_ESPACE);
g->strip = p->strip;
@ -1529,8 +1536,10 @@ register struct re_guts *g;
for (i = g->mlen; i > 0; i--) {
while (OP(s = *scan++) != OCHAR)
continue;
assert(cp < g->must + g->mlen);
*cp++ = (char)OPND(s);
}
assert(cp == g->must + g->mlen);
*cp++ = '\0'; /* just on general principles */
}

View File

@ -31,8 +31,8 @@ static void mcadd __P((register struct parse *p, register cset *cs, register cha
static void mcsub __P((register cset *cs, register char *cp));
static int mcin __P((register cset *cs, register char *cp));
static char *mcfind __P((register cset *cs, register char *cp));
static void mcinvert __P((register cset *cs));
static void mccase __P((register cset *cs));
static void mcinvert __P((register struct parse *p, register cset *cs));
static void mccase __P((register struct parse *p, register cset *cs));
static int isinsets __P((register struct re_guts *g, int c));
static int samesets __P((register struct re_guts *g, int c1, int c2));
static void categorize __P((struct parse *p, register struct re_guts *g));

View File

@ -55,8 +55,7 @@ static struct rerr {
/*
- regerror - the interface to error numbers
= extern size_t regerror(int errcode, const regex_t *preg, char *errbuf, \
= size_t errbuf_size);
= extern size_t regerror(int, const regex_t *, char *, size_t);
*/
/* ARGSUSED */
size_t
@ -105,7 +104,7 @@ size_t errbuf_size;
/*
- regatoi - internal routine to implement REG_ATOI
= static char *regatoi(const regex_t *preg, char *localbuf);
== static char *regatoi(const regex_t *preg, char *localbuf);
*/
static char *
regatoi(preg, localbuf)

View File

@ -4,6 +4,7 @@ extern "C" {
#endif
/* === regerror.c === */
static char *regatoi __P((const regex_t *preg, char *localbuf));
#ifdef __cplusplus
}

View File

@ -20,7 +20,7 @@ typedef struct {
/* === regcomp.c === */
extern int regcomp __P((regex_t *preg, const char *pattern, int cflags));
extern int regcomp __P((regex_t *, const char *, int));
#define REG_BASIC 0000
#define REG_EXTENDED 0001
#define REG_ICASE 0002
@ -50,12 +50,11 @@ extern int regcomp __P((regex_t *preg, const char *pattern, int cflags));
#define REG_INVARG 16
#define REG_ATOI 255 /* convert name to number (!) */
#define REG_ITOA 0400 /* convert number to name (!) */
extern size_t regerror __P((int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size));
static char *regatoi __P((const regex_t *preg, char *localbuf));
extern size_t regerror __P((int, const regex_t *, char *, size_t));
/* === regexec.c === */
extern int regexec __P((const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags));
extern int regexec __P((const regex_t *, const char *, size_t, regmatch_t [], int));
#define REG_NOTBOL 00001
#define REG_NOTEOL 00002
#define REG_STARTEND 00004
@ -65,7 +64,7 @@ extern int regexec __P((const regex_t *preg, const char *string, size_t nmatch,
/* === regfree.c === */
extern void regfree __P((regex_t *preg));
extern void regfree __P((regex_t *));
#ifdef __cplusplus
}

View File

@ -80,19 +80,19 @@ typedef long sopno;
* vectors at run time.
*/
typedef struct {
uchar *ptr; /* -> uchar [csetsize] */
uchar mask; /* bit within array */
uchar hash; /* hash code */
uch *ptr; /* -> uch [csetsize] */
uch mask; /* bit within array */
uch hash; /* hash code */
size_t smultis;
char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */
} cset;
/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */
#define CHadd(cs, c) ((cs)->ptr[(uchar)(c)] |= (cs)->mask, (cs)->hash += (c))
#define CHsub(cs, c) ((cs)->ptr[(uchar)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
#define CHIN(cs, c) ((cs)->ptr[(uchar)(c)] & (cs)->mask)
#define MCadd(cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */
#define MCsub(cs, cp) mcsub(cs, cp)
#define MCin(cs, cp) mcin(cs, cp)
#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c))
#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask)
#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */
#define MCsub(p, cs, cp) mcsub(p, cs, cp)
#define MCin(p, cs, cp) mcin(p, cs, cp)
/* stuff for character categories */
typedef unsigned char cat_t;
@ -107,7 +107,7 @@ struct re_guts {
int csetsize; /* number of bits in a cset vector */
int ncsets; /* number of csets in use */
cset *sets; /* -> cset [ncsets] */
uchar *setbits; /* -> uchar[csetsize][ncsets/CHAR_BIT] */
uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */
int cflags; /* copy of regcomp() cflags argument */
sopno nstates; /* = number of sops */
sopno firststate; /* the initial OEND (normally 0) */

View File

@ -96,8 +96,8 @@ static int nope = 0; /* for use in asserts; shuts lint up */
/*
- regexec - interface for matching
= extern int regexec(const regex_t *preg, const char *string, size_t nmatch, \
= regmatch_t pmatch[], int eflags);
= extern int regexec(const regex_t *, const char *, size_t, \
= regmatch_t [], int);
= #define REG_NOTBOL 00001
= #define REG_NOTEOL 00002
= #define REG_STARTEND 00004

View File

@ -8,7 +8,7 @@
/*
- regfree - free everything
= extern void regfree(regex_t *preg);
= extern void regfree(regex_t *);
*/
void
regfree(preg)

View File

@ -318,6 +318,11 @@ a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab
123456789012345678901234567890123 - a123456789012345678901234567890123b 123456789012345678901234567890123
# and one really big one, beyond any plausible word width
1234567890123456789012345678901234567890123456789012345678901234567890 - a1234567890123456789012345678901234567890123456789012345678901234567890b 1234567890123456789012345678901234567890123456789012345678901234567890
# fish for problems as brackets go past 8
[ab][cd][ef][gh][ij][kl][mn] - xacegikmoq acegikm
[ab][cd][ef][gh][ij][kl][mn][op] - xacegikmoq acegikmo
[ab][cd][ef][gh][ij][kl][mn][op][qr] - xacegikmoqy acegikmoq
[ab][cd][ef][gh][ij][kl][mn][op][q] - xacegikmoqy acegikmoq
# subtleties of matching
abc & xabcy abc
@ -434,3 +439,10 @@ a[[:>:]] & a- a
[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc abc
[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc-q abc
[[:<:]]a.c[[:>:]] & axc-dayc-dazce-abc axc
# past problems
(A[1])|(A[2])|(A[3])|(A[4])|(A[5])|(A[6])|(A[7])|(A[8])|(A[9])|(A[A]) - A1 A1
abcdefghijklmnop i abcdefghijklmnop abcdefghijklmnop
abcdefghijklmnopqrstuv i abcdefghijklmnopqrstuv abcdefghijklmnopqrstuv
(ALAK)|(ALT[AB])|(CC[123]1)|(CM[123]1)|(GAMC)|(LC[23][EO ])|(SEM[1234])|(SL[ES][12])|(SLWW)|(SLF )|(SLDT)|(VWH[12])|(WH[34][EW])|(WP1[ESN]) - CC11 CC11
CC[13]1|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a - CC11 CC11

View File

@ -3,7 +3,7 @@
#define INFINITY (DUPMAX + 1)
#define NC (CHAR_MAX - CHAR_MIN + 1)
typedef unsigned char uchar;
typedef unsigned char uch;
#ifndef REDEBUG
#ifndef NDEBUG

View File

@ -318,6 +318,11 @@ a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab
123456789012345678901234567890123 - a123456789012345678901234567890123b 123456789012345678901234567890123
# and one really big one, beyond any plausible word width
1234567890123456789012345678901234567890123456789012345678901234567890 - a1234567890123456789012345678901234567890123456789012345678901234567890b 1234567890123456789012345678901234567890123456789012345678901234567890
# fish for problems as brackets go past 8
[ab][cd][ef][gh][ij][kl][mn] - xacegikmoq acegikm
[ab][cd][ef][gh][ij][kl][mn][op] - xacegikmoq acegikmo
[ab][cd][ef][gh][ij][kl][mn][op][qr] - xacegikmoqy acegikmoq
[ab][cd][ef][gh][ij][kl][mn][op][q] - xacegikmoqy acegikmoq
# subtleties of matching
abc & xabcy abc
@ -434,3 +439,10 @@ a[[:>:]] & a- a
[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc abc
[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc-q abc
[[:<:]]a.c[[:>:]] & axc-dayc-dazce-abc axc
# past problems
(A[1])|(A[2])|(A[3])|(A[4])|(A[5])|(A[6])|(A[7])|(A[8])|(A[9])|(A[A]) - A1 A1
abcdefghijklmnop i abcdefghijklmnop abcdefghijklmnop
abcdefghijklmnopqrstuv i abcdefghijklmnopqrstuv abcdefghijklmnopqrstuv
(ALAK)|(ALT[AB])|(CC[123]1)|(CM[123]1)|(GAMC)|(LC[23][EO ])|(SEM[1234])|(SL[ES][12])|(SLWW)|(SLF )|(SLDT)|(VWH[12])|(WH[34][EW])|(WP1[ESN]) - CC11 CC11
CC[13]1|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a - CC11 CC11