Update to Henry's Nov 2, 1993 alpha3.2 release.
This commit is contained in:
parent
5797eb6fc3
commit
6931099e77
|
@ -3,21 +3,16 @@
|
|||
# internal assertion checking). Take -Dconst= out for an ANSI compiler.
|
||||
# Do not take -DPOSIX_MISTAKE out. REGCFLAGS isn't important to you (it's
|
||||
# for my use in some special contexts).
|
||||
#CFLAGS = -I. -Dconst= -DPOSIX_MISTAKE -DREDEBUG $(REGCFLAGS)
|
||||
CFLAGS = -I. -DPOSIX_MISTAKE -DREDEBUG $(REGCFLAGS)
|
||||
|
||||
# If you have an ANSI compiler, take -o out of MKHFLAGS. If you want
|
||||
# the Berkeley __P macro, put -b in.
|
||||
#MKHFLAGS = -o
|
||||
MKHFLAGS = -b
|
||||
|
||||
LDFLAGS =
|
||||
|
||||
# If you have an ANSI environment, take limits.h and stdlib.h out of
|
||||
# HMISSING and take memmove out of SRCMISSING and OBJMISSING.
|
||||
#HMISSING = limits.h stdlib.h
|
||||
#SRCMISSING = split.c memmove.c
|
||||
#OBJMISSING = split.o memmove.o
|
||||
HMISSING =
|
||||
SRCMISSING = split.c
|
||||
OBJMISSING = split.o
|
||||
|
@ -118,4 +113,4 @@ cio: $(DTR)
|
|||
cio $(DTR)
|
||||
|
||||
rdf: $(DTR)
|
||||
rcsdiff -c $(DTR)
|
||||
rcsdiff -c $(DTR) 2>&1 | p
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
alpha3.1 release.
|
||||
Sat Aug 14 01:38:38 EDT 1993
|
||||
alpha3.2 release.
|
||||
Tue Nov 2 02:06:08 EST 1993
|
||||
henry@zoo.toronto.edu
|
||||
|
||||
See WHATSNEW for change listing.
|
||||
|
|
|
@ -1,3 +1,15 @@
|
|||
New in alpha3.2: Assorted bits of cleanup and portability improvement
|
||||
(the development base is now a BSDI system using GCC instead of an ancient
|
||||
Sun system, and the newer compiler exposed some glitches). Fix for a
|
||||
serious bug that affected REs using many [] (including REG_ICASE REs
|
||||
because of the way they are implemented), *sometimes*, depending on
|
||||
memory-allocation patterns. The header-file prototypes no longer name
|
||||
the parameters, avoiding possible name conflicts. The possibility that
|
||||
some clot has defined CHAR_MIN as (say) `-128' instead of `(-128)' is
|
||||
now handled gracefully. "uchar" is no longer used as an internal type
|
||||
name (too many people have the same idea). Still the same old lousy
|
||||
performance, alas.
|
||||
|
||||
New in alpha3.1: Basically nothing, this release is just a bookkeeping
|
||||
convenience. Stay tuned.
|
||||
|
||||
|
|
|
@ -393,10 +393,12 @@ sopno stopst;
|
|||
break;
|
||||
case OLPAREN:
|
||||
i = OPND(m->g->strip[ss]);
|
||||
assert(0 < i && i <= m->g->nsub);
|
||||
m->pmatch[i].rm_so = sp - m->offp;
|
||||
break;
|
||||
case ORPAREN:
|
||||
i = OPND(m->g->strip[ss]);
|
||||
assert(0 < i && i <= m->g->nsub);
|
||||
m->pmatch[i].rm_eo = sp - m->offp;
|
||||
break;
|
||||
default: /* uh oh */
|
||||
|
@ -522,6 +524,7 @@ sopno lev; /* PLUS nesting level */
|
|||
switch (OP(s)) {
|
||||
case OBACK_: /* the vilest depths */
|
||||
i = OPND(s);
|
||||
assert(0 < i && i <= m->g->nsub);
|
||||
if (m->pmatch[i].rm_eo == -1)
|
||||
return(NULL);
|
||||
assert(m->pmatch[i].rm_so != -1);
|
||||
|
@ -582,6 +585,7 @@ sopno lev; /* PLUS nesting level */
|
|||
break;
|
||||
case OLPAREN: /* must undo assignment if rest fails */
|
||||
i = OPND(s);
|
||||
assert(0 < i && i <= m->g->nsub);
|
||||
offsave = m->pmatch[i].rm_so;
|
||||
m->pmatch[i].rm_so = sp - m->offp;
|
||||
dp = backref(m, sp, stop, ss+1, stopst, lev);
|
||||
|
@ -592,6 +596,7 @@ sopno lev; /* PLUS nesting level */
|
|||
break;
|
||||
case ORPAREN: /* must undo assignment if rest fails */
|
||||
i = OPND(s);
|
||||
assert(0 < i && i <= m->g->nsub);
|
||||
offsave = m->pmatch[i].rm_eo;
|
||||
m->pmatch[i].rm_eo = sp - m->offp;
|
||||
dp = backref(m, sp, stop, ss+1, stopst, lev);
|
||||
|
|
|
@ -69,7 +69,7 @@ static int never = 0; /* for use in asserts; shuts lint up */
|
|||
|
||||
/*
|
||||
- regcomp - interface for parser and compilation
|
||||
= extern int regcomp(regex_t *preg, const char *pattern, int cflags);
|
||||
= extern int regcomp(regex_t *, const char *, int);
|
||||
= #define REG_BASIC 0000
|
||||
= #define REG_EXTENDED 0001
|
||||
= #define REG_ICASE 0002
|
||||
|
@ -606,7 +606,7 @@ register struct parse *p;
|
|||
CHadd(cs, ci);
|
||||
}
|
||||
if (cs->multis != NULL)
|
||||
mccase(cs);
|
||||
mccase(p, cs);
|
||||
}
|
||||
if (invert) {
|
||||
register int i;
|
||||
|
@ -619,7 +619,7 @@ register struct parse *p;
|
|||
if (p->g->cflags®_NEWLINE)
|
||||
CHsub(cs, '\n');
|
||||
if (cs->multis != NULL)
|
||||
mcinvert(cs);
|
||||
mcinvert(p, cs);
|
||||
}
|
||||
|
||||
assert(cs->multis == NULL); /* xxx */
|
||||
|
@ -728,7 +728,7 @@ register cset *cs;
|
|||
while ((c = *u++) != '\0')
|
||||
CHadd(cs, c);
|
||||
for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
|
||||
MCadd(cs, u);
|
||||
MCadd(p, cs, u);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -980,6 +980,7 @@ register struct parse *p;
|
|||
register size_t nbytes;
|
||||
register cset *cs;
|
||||
register size_t css = (size_t)p->g->csetsize;
|
||||
register int i;
|
||||
|
||||
if (no >= p->ncsalloc) { /* need another column of space */
|
||||
p->ncsalloc += CHAR_BIT;
|
||||
|
@ -992,10 +993,14 @@ register struct parse *p;
|
|||
p->g->sets = (cset *)realloc((char *)p->g->sets,
|
||||
nc * sizeof(cset));
|
||||
if (p->g->setbits == NULL)
|
||||
p->g->setbits = (uchar *)malloc(nbytes);
|
||||
else
|
||||
p->g->setbits = (uchar *)realloc((char *)p->g->setbits,
|
||||
p->g->setbits = (uch *)malloc(nbytes);
|
||||
else {
|
||||
p->g->setbits = (uch *)realloc((char *)p->g->setbits,
|
||||
nbytes);
|
||||
/* xxx this isn't right if setbits is now NULL */
|
||||
for (i = 0; i < no; i++)
|
||||
p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
|
||||
}
|
||||
if (p->g->sets != NULL && p->g->setbits != NULL)
|
||||
(void) memset((char *)p->g->setbits + (nbytes - css),
|
||||
0, css);
|
||||
|
@ -1051,7 +1056,7 @@ freezeset(p, cs)
|
|||
register struct parse *p;
|
||||
register cset *cs;
|
||||
{
|
||||
register uchar h = cs->hash;
|
||||
register uch h = cs->hash;
|
||||
register int i;
|
||||
register cset *top = &p->g->sets[p->g->ncsets];
|
||||
register cset *cs2;
|
||||
|
@ -1201,13 +1206,14 @@ register char *cp;
|
|||
|
||||
/*
|
||||
- mcinvert - invert the list of collating elements in a cset
|
||||
== static void mcinvert(register cset *cs);
|
||||
== static void mcinvert(register struct parse *p, register cset *cs);
|
||||
*
|
||||
* This would have to know the set of possibilities. Implementation
|
||||
* is deferred.
|
||||
*/
|
||||
static void
|
||||
mcinvert(cs)
|
||||
mcinvert(p, cs)
|
||||
register struct parse *p;
|
||||
register cset *cs;
|
||||
{
|
||||
assert(cs->multis == NULL); /* xxx */
|
||||
|
@ -1215,13 +1221,14 @@ register cset *cs;
|
|||
|
||||
/*
|
||||
- mccase - add case counterparts of the list of collating elements in a cset
|
||||
== static void mccase(register cset *cs);
|
||||
== static void mccase(register struct parse *p, register cset *cs);
|
||||
*
|
||||
* This would have to know the set of possibilities. Implementation
|
||||
* is deferred.
|
||||
*/
|
||||
static void
|
||||
mccase(cs)
|
||||
mccase(p, cs)
|
||||
register struct parse *p;
|
||||
register cset *cs;
|
||||
{
|
||||
assert(cs->multis == NULL); /* xxx */
|
||||
|
@ -1236,7 +1243,7 @@ isinsets(g, c)
|
|||
register struct re_guts *g;
|
||||
int c;
|
||||
{
|
||||
register uchar *col;
|
||||
register uch *col;
|
||||
register int i;
|
||||
register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
|
||||
register unsigned uc = (unsigned char)c;
|
||||
|
@ -1257,7 +1264,7 @@ register struct re_guts *g;
|
|||
int c1;
|
||||
int c2;
|
||||
{
|
||||
register uchar *col;
|
||||
register uch *col;
|
||||
register int i;
|
||||
register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
|
||||
register unsigned uc1 = (unsigned char)c1;
|
||||
|
@ -1442,7 +1449,7 @@ register struct parse *p;
|
|||
register struct re_guts *g;
|
||||
{
|
||||
g->nstates = p->slen;
|
||||
g->strip = (sop *)realloc((sop *)p->strip, p->slen * sizeof(sop));
|
||||
g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
|
||||
if (g->strip == NULL) {
|
||||
SETERROR(REG_ESPACE);
|
||||
g->strip = p->strip;
|
||||
|
@ -1529,8 +1536,10 @@ register struct re_guts *g;
|
|||
for (i = g->mlen; i > 0; i--) {
|
||||
while (OP(s = *scan++) != OCHAR)
|
||||
continue;
|
||||
assert(cp < g->must + g->mlen);
|
||||
*cp++ = (char)OPND(s);
|
||||
}
|
||||
assert(cp == g->must + g->mlen);
|
||||
*cp++ = '\0'; /* just on general principles */
|
||||
}
|
||||
|
||||
|
|
|
@ -31,8 +31,8 @@ static void mcadd __P((register struct parse *p, register cset *cs, register cha
|
|||
static void mcsub __P((register cset *cs, register char *cp));
|
||||
static int mcin __P((register cset *cs, register char *cp));
|
||||
static char *mcfind __P((register cset *cs, register char *cp));
|
||||
static void mcinvert __P((register cset *cs));
|
||||
static void mccase __P((register cset *cs));
|
||||
static void mcinvert __P((register struct parse *p, register cset *cs));
|
||||
static void mccase __P((register struct parse *p, register cset *cs));
|
||||
static int isinsets __P((register struct re_guts *g, int c));
|
||||
static int samesets __P((register struct re_guts *g, int c1, int c2));
|
||||
static void categorize __P((struct parse *p, register struct re_guts *g));
|
||||
|
|
|
@ -55,8 +55,7 @@ static struct rerr {
|
|||
|
||||
/*
|
||||
- regerror - the interface to error numbers
|
||||
= extern size_t regerror(int errcode, const regex_t *preg, char *errbuf, \
|
||||
= size_t errbuf_size);
|
||||
= extern size_t regerror(int, const regex_t *, char *, size_t);
|
||||
*/
|
||||
/* ARGSUSED */
|
||||
size_t
|
||||
|
@ -105,7 +104,7 @@ size_t errbuf_size;
|
|||
|
||||
/*
|
||||
- regatoi - internal routine to implement REG_ATOI
|
||||
= static char *regatoi(const regex_t *preg, char *localbuf);
|
||||
== static char *regatoi(const regex_t *preg, char *localbuf);
|
||||
*/
|
||||
static char *
|
||||
regatoi(preg, localbuf)
|
||||
|
|
|
@ -4,6 +4,7 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
/* === regerror.c === */
|
||||
static char *regatoi __P((const regex_t *preg, char *localbuf));
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -20,7 +20,7 @@ typedef struct {
|
|||
|
||||
|
||||
/* === regcomp.c === */
|
||||
extern int regcomp __P((regex_t *preg, const char *pattern, int cflags));
|
||||
extern int regcomp __P((regex_t *, const char *, int));
|
||||
#define REG_BASIC 0000
|
||||
#define REG_EXTENDED 0001
|
||||
#define REG_ICASE 0002
|
||||
|
@ -50,12 +50,11 @@ extern int regcomp __P((regex_t *preg, const char *pattern, int cflags));
|
|||
#define REG_INVARG 16
|
||||
#define REG_ATOI 255 /* convert name to number (!) */
|
||||
#define REG_ITOA 0400 /* convert number to name (!) */
|
||||
extern size_t regerror __P((int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size));
|
||||
static char *regatoi __P((const regex_t *preg, char *localbuf));
|
||||
extern size_t regerror __P((int, const regex_t *, char *, size_t));
|
||||
|
||||
|
||||
/* === regexec.c === */
|
||||
extern int regexec __P((const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags));
|
||||
extern int regexec __P((const regex_t *, const char *, size_t, regmatch_t [], int));
|
||||
#define REG_NOTBOL 00001
|
||||
#define REG_NOTEOL 00002
|
||||
#define REG_STARTEND 00004
|
||||
|
@ -65,7 +64,7 @@ extern int regexec __P((const regex_t *preg, const char *string, size_t nmatch,
|
|||
|
||||
|
||||
/* === regfree.c === */
|
||||
extern void regfree __P((regex_t *preg));
|
||||
extern void regfree __P((regex_t *));
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -80,19 +80,19 @@ typedef long sopno;
|
|||
* vectors at run time.
|
||||
*/
|
||||
typedef struct {
|
||||
uchar *ptr; /* -> uchar [csetsize] */
|
||||
uchar mask; /* bit within array */
|
||||
uchar hash; /* hash code */
|
||||
uch *ptr; /* -> uch [csetsize] */
|
||||
uch mask; /* bit within array */
|
||||
uch hash; /* hash code */
|
||||
size_t smultis;
|
||||
char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */
|
||||
} cset;
|
||||
/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */
|
||||
#define CHadd(cs, c) ((cs)->ptr[(uchar)(c)] |= (cs)->mask, (cs)->hash += (c))
|
||||
#define CHsub(cs, c) ((cs)->ptr[(uchar)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
|
||||
#define CHIN(cs, c) ((cs)->ptr[(uchar)(c)] & (cs)->mask)
|
||||
#define MCadd(cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */
|
||||
#define MCsub(cs, cp) mcsub(cs, cp)
|
||||
#define MCin(cs, cp) mcin(cs, cp)
|
||||
#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c))
|
||||
#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
|
||||
#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask)
|
||||
#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */
|
||||
#define MCsub(p, cs, cp) mcsub(p, cs, cp)
|
||||
#define MCin(p, cs, cp) mcin(p, cs, cp)
|
||||
|
||||
/* stuff for character categories */
|
||||
typedef unsigned char cat_t;
|
||||
|
@ -107,7 +107,7 @@ struct re_guts {
|
|||
int csetsize; /* number of bits in a cset vector */
|
||||
int ncsets; /* number of csets in use */
|
||||
cset *sets; /* -> cset [ncsets] */
|
||||
uchar *setbits; /* -> uchar[csetsize][ncsets/CHAR_BIT] */
|
||||
uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */
|
||||
int cflags; /* copy of regcomp() cflags argument */
|
||||
sopno nstates; /* = number of sops */
|
||||
sopno firststate; /* the initial OEND (normally 0) */
|
||||
|
|
|
@ -96,8 +96,8 @@ static int nope = 0; /* for use in asserts; shuts lint up */
|
|||
|
||||
/*
|
||||
- regexec - interface for matching
|
||||
= extern int regexec(const regex_t *preg, const char *string, size_t nmatch, \
|
||||
= regmatch_t pmatch[], int eflags);
|
||||
= extern int regexec(const regex_t *, const char *, size_t, \
|
||||
= regmatch_t [], int);
|
||||
= #define REG_NOTBOL 00001
|
||||
= #define REG_NOTEOL 00002
|
||||
= #define REG_STARTEND 00004
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
|
||||
/*
|
||||
- regfree - free everything
|
||||
= extern void regfree(regex_t *preg);
|
||||
= extern void regfree(regex_t *);
|
||||
*/
|
||||
void
|
||||
regfree(preg)
|
||||
|
|
|
@ -318,6 +318,11 @@ a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab
|
|||
123456789012345678901234567890123 - a123456789012345678901234567890123b 123456789012345678901234567890123
|
||||
# and one really big one, beyond any plausible word width
|
||||
1234567890123456789012345678901234567890123456789012345678901234567890 - a1234567890123456789012345678901234567890123456789012345678901234567890b 1234567890123456789012345678901234567890123456789012345678901234567890
|
||||
# fish for problems as brackets go past 8
|
||||
[ab][cd][ef][gh][ij][kl][mn] - xacegikmoq acegikm
|
||||
[ab][cd][ef][gh][ij][kl][mn][op] - xacegikmoq acegikmo
|
||||
[ab][cd][ef][gh][ij][kl][mn][op][qr] - xacegikmoqy acegikmoq
|
||||
[ab][cd][ef][gh][ij][kl][mn][op][q] - xacegikmoqy acegikmoq
|
||||
|
||||
# subtleties of matching
|
||||
abc & xabcy abc
|
||||
|
@ -434,3 +439,10 @@ a[[:>:]] & a- a
|
|||
[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc abc
|
||||
[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc-q abc
|
||||
[[:<:]]a.c[[:>:]] & axc-dayc-dazce-abc axc
|
||||
|
||||
# past problems
|
||||
(A[1])|(A[2])|(A[3])|(A[4])|(A[5])|(A[6])|(A[7])|(A[8])|(A[9])|(A[A]) - A1 A1
|
||||
abcdefghijklmnop i abcdefghijklmnop abcdefghijklmnop
|
||||
abcdefghijklmnopqrstuv i abcdefghijklmnopqrstuv abcdefghijklmnopqrstuv
|
||||
(ALAK)|(ALT[AB])|(CC[123]1)|(CM[123]1)|(GAMC)|(LC[23][EO ])|(SEM[1234])|(SL[ES][12])|(SLWW)|(SLF )|(SLDT)|(VWH[12])|(WH[34][EW])|(WP1[ESN]) - CC11 CC11
|
||||
CC[13]1|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a - CC11 CC11
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#define INFINITY (DUPMAX + 1)
|
||||
#define NC (CHAR_MAX - CHAR_MIN + 1)
|
||||
|
||||
typedef unsigned char uchar;
|
||||
typedef unsigned char uch;
|
||||
|
||||
#ifndef REDEBUG
|
||||
#ifndef NDEBUG
|
||||
|
|
|
@ -318,6 +318,11 @@ a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab
|
|||
123456789012345678901234567890123 - a123456789012345678901234567890123b 123456789012345678901234567890123
|
||||
# and one really big one, beyond any plausible word width
|
||||
1234567890123456789012345678901234567890123456789012345678901234567890 - a1234567890123456789012345678901234567890123456789012345678901234567890b 1234567890123456789012345678901234567890123456789012345678901234567890
|
||||
# fish for problems as brackets go past 8
|
||||
[ab][cd][ef][gh][ij][kl][mn] - xacegikmoq acegikm
|
||||
[ab][cd][ef][gh][ij][kl][mn][op] - xacegikmoq acegikmo
|
||||
[ab][cd][ef][gh][ij][kl][mn][op][qr] - xacegikmoqy acegikmoq
|
||||
[ab][cd][ef][gh][ij][kl][mn][op][q] - xacegikmoqy acegikmoq
|
||||
|
||||
# subtleties of matching
|
||||
abc & xabcy abc
|
||||
|
@ -434,3 +439,10 @@ a[[:>:]] & a- a
|
|||
[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc abc
|
||||
[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc-q abc
|
||||
[[:<:]]a.c[[:>:]] & axc-dayc-dazce-abc axc
|
||||
|
||||
# past problems
|
||||
(A[1])|(A[2])|(A[3])|(A[4])|(A[5])|(A[6])|(A[7])|(A[8])|(A[9])|(A[A]) - A1 A1
|
||||
abcdefghijklmnop i abcdefghijklmnop abcdefghijklmnop
|
||||
abcdefghijklmnopqrstuv i abcdefghijklmnopqrstuv abcdefghijklmnopqrstuv
|
||||
(ALAK)|(ALT[AB])|(CC[123]1)|(CM[123]1)|(GAMC)|(LC[23][EO ])|(SEM[1234])|(SL[ES][12])|(SLWW)|(SLF )|(SLDT)|(VWH[12])|(WH[34][EW])|(WP1[ESN]) - CC11 CC11
|
||||
CC[13]1|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a - CC11 CC11
|
||||
|
|
Loading…
Reference in New Issue