mirror of
https://github.com/lua/lua
synced 2025-04-19 11:22:48 +03:00
new implementation for '*' in patterns + new option '+'
This commit is contained in:
parent
732741b62f
commit
7808ea3a5f
26
liolib.c
26
liolib.c
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
** $Id: liolib.c,v 1.37 1999/04/05 19:47:05 roberto Exp roberto $
|
** $Id: liolib.c,v 1.38 1999/04/14 20:40:32 roberto Exp $
|
||||||
** Standard I/O (and system) library
|
** Standard I/O (and system) library
|
||||||
** See Copyright Notice in lua.h
|
** See Copyright Notice in lua.h
|
||||||
*/
|
*/
|
||||||
@ -244,23 +244,25 @@ static int read_pattern (FILE *f, char *p) {
|
|||||||
p++;
|
p++;
|
||||||
continue;
|
continue;
|
||||||
default: {
|
default: {
|
||||||
char *ep; /* get what is next */
|
char *ep = luaI_classend(p); /* get what is next */
|
||||||
int m; /* match result */
|
int m; /* match result */
|
||||||
if (c == NEED_OTHER) c = getc(f);
|
if (c == NEED_OTHER) c = getc(f);
|
||||||
if (c != EOF)
|
m = (c==EOF) ? 0 : luaI_singlematch(c, p, ep);
|
||||||
m = luaI_singlematch(c, p, &ep);
|
|
||||||
else {
|
|
||||||
luaI_singlematch(0, p, &ep); /* to set "ep" */
|
|
||||||
m = 0; /* EOF matches no pattern */
|
|
||||||
}
|
|
||||||
if (m) {
|
if (m) {
|
||||||
if (!inskip) luaL_addchar(c);
|
if (!inskip) luaL_addchar(c);
|
||||||
c = NEED_OTHER;
|
c = NEED_OTHER;
|
||||||
}
|
}
|
||||||
switch (*ep) {
|
switch (*ep) {
|
||||||
case '*': /* repetition */
|
case '+': /* repetition (1 or more) */
|
||||||
if (!m) p = ep+1; /* else stay in (repeat) the same item */
|
if (!m) goto break_while; /* pattern fails? */
|
||||||
continue;
|
/* else go through */
|
||||||
|
case '*': /* repetition (0 or more) */
|
||||||
|
while (m) { /* reads the same item until it fails */
|
||||||
|
c = getc(f);
|
||||||
|
m = (c==EOF) ? 0 : luaI_singlematch(c, p, ep);
|
||||||
|
if (m && !inskip) luaL_addchar(c);
|
||||||
|
}
|
||||||
|
/* go through to continue reading the pattern */
|
||||||
case '?': /* optional */
|
case '?': /* optional */
|
||||||
p = ep+1; /* continues reading the pattern */
|
p = ep+1; /* continues reading the pattern */
|
||||||
continue;
|
continue;
|
||||||
@ -336,7 +338,7 @@ static void io_read (void) {
|
|||||||
success = 1; /* always success */
|
success = 1; /* always success */
|
||||||
break;
|
break;
|
||||||
case 4: /* word */
|
case 4: /* word */
|
||||||
success = read_pattern(f, "{%s*}%S%S*");
|
success = read_pattern(f, "{%s*}%S+");
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
success = read_pattern(f, p);
|
success = read_pattern(f, p);
|
||||||
|
238
lstrlib.c
238
lstrlib.c
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
** $Id: lstrlib.c,v 1.28 1999/02/26 15:49:53 roberto Exp roberto $
|
** $Id: lstrlib.c,v 1.29 1999/04/30 14:12:05 roberto Exp roberto $
|
||||||
** Standard library for strings and pattern-matching
|
** Standard library for strings and pattern-matching
|
||||||
** See Copyright Notice in lua.h
|
** See Copyright Notice in lua.h
|
||||||
*/
|
*/
|
||||||
@ -130,7 +130,7 @@ struct Capture {
|
|||||||
|
|
||||||
|
|
||||||
#define ESC '%'
|
#define ESC '%'
|
||||||
#define SPECIALS "^$*?.([%-"
|
#define SPECIALS "^$*+?.([%-"
|
||||||
|
|
||||||
|
|
||||||
static void push_captures (struct Capture *cap) {
|
static void push_captures (struct Capture *cap) {
|
||||||
@ -160,8 +160,21 @@ static int capture_to_close (struct Capture *cap) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static char *bracket_end (char *p) {
|
char *luaI_classend (char *p) {
|
||||||
return (*p == 0) ? NULL : strchr((*p=='^') ? p+2 : p+1, ']');
|
switch (*p++) {
|
||||||
|
case ESC:
|
||||||
|
if (*p == '\0')
|
||||||
|
luaL_verror("incorrect pattern (ends with `%c')", ESC);
|
||||||
|
return p+1;
|
||||||
|
case '[':
|
||||||
|
if (*p == '^') p++;
|
||||||
|
if (*p == ']') p++;
|
||||||
|
p = strchr(p, ']');
|
||||||
|
if (!p) lua_error("incorrect pattern (missing `]')");
|
||||||
|
return p+1;
|
||||||
|
default:
|
||||||
|
return p;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -184,48 +197,55 @@ static int matchclass (int c, int cl) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int luaI_singlematch (int c, char *p, char **ep) {
|
|
||||||
|
static int matchbracketclass (int c, char *p, char *end) {
|
||||||
|
int sig = 1;
|
||||||
|
if (*(p+1) == '^') {
|
||||||
|
sig = 0;
|
||||||
|
p++; /* skip the '^' */
|
||||||
|
}
|
||||||
|
while (++p < end) {
|
||||||
|
if (*p == ESC) {
|
||||||
|
p++;
|
||||||
|
if ((p < end) && matchclass(c, (unsigned char)*p))
|
||||||
|
return sig;
|
||||||
|
}
|
||||||
|
else if ((*(p+1) == '-') && (p+2 < end)) {
|
||||||
|
p+=2;
|
||||||
|
if ((int)(unsigned char)*(p-2) <= c && c <= (int)(unsigned char)*p)
|
||||||
|
return sig;
|
||||||
|
}
|
||||||
|
else if ((unsigned char)*p == c) return sig;
|
||||||
|
}
|
||||||
|
return !sig;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int luaI_singlematch (int c, char *p, char *ep) {
|
||||||
switch (*p) {
|
switch (*p) {
|
||||||
case '.': /* matches any char */
|
case '.': /* matches any char */
|
||||||
*ep = p+1;
|
|
||||||
return 1;
|
return 1;
|
||||||
case '\0': /* end of pattern; matches nothing */
|
|
||||||
*ep = p;
|
|
||||||
return 0;
|
|
||||||
case ESC:
|
case ESC:
|
||||||
if (*(++p) == '\0')
|
return matchclass(c, (unsigned char)*(p+1));
|
||||||
luaL_verror("incorrect pattern (ends with `%c')", ESC);
|
case '[':
|
||||||
*ep = p+1;
|
return matchbracketclass(c, p, ep-1);
|
||||||
return matchclass(c, (unsigned char)*p);
|
|
||||||
case '[': {
|
|
||||||
char *end = bracket_end(p+1);
|
|
||||||
int sig = *(p+1) == '^' ? (p++, 0) : 1;
|
|
||||||
if (end == NULL) lua_error("incorrect pattern (missing `]')");
|
|
||||||
*ep = end+1;
|
|
||||||
while (++p < end) {
|
|
||||||
if (*p == ESC) {
|
|
||||||
if (((p+1) < end) && matchclass(c, (unsigned char)*++p))
|
|
||||||
return sig;
|
|
||||||
}
|
|
||||||
else if ((*(p+1) == '-') && (p+2 < end)) {
|
|
||||||
p+=2;
|
|
||||||
if ((int)(unsigned char)*(p-2) <= c && c <= (int)(unsigned char)*p)
|
|
||||||
return sig;
|
|
||||||
}
|
|
||||||
else if ((unsigned char)*p == c) return sig;
|
|
||||||
}
|
|
||||||
return !sig;
|
|
||||||
}
|
|
||||||
default:
|
default:
|
||||||
*ep = p+1;
|
|
||||||
return ((unsigned char)*p == c);
|
return ((unsigned char)*p == c);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static char *matchbalance (char *s, int b, int e, struct Capture *cap) {
|
static char *match (char *s, char *p, struct Capture *cap);
|
||||||
if (*s != b) return NULL;
|
|
||||||
|
|
||||||
|
static char *matchbalance (char *s, char *p, struct Capture *cap) {
|
||||||
|
if (*p == 0 || *(p+1) == 0)
|
||||||
|
lua_error("unbalanced pattern");
|
||||||
|
if (*s != *p) return NULL;
|
||||||
else {
|
else {
|
||||||
|
int b = *p;
|
||||||
|
int e = *(p+1);
|
||||||
int cont = 1;
|
int cont = 1;
|
||||||
while (++s < cap->src_end) {
|
while (++s < cap->src_end) {
|
||||||
if (*s == e) {
|
if (*s == e) {
|
||||||
@ -238,89 +258,109 @@ static char *matchbalance (char *s, int b, int e, struct Capture *cap) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static char *matchitem (char *s, char *p, struct Capture *cap, char **ep) {
|
static char *max_expand (char *s, char *p, char *ep, struct Capture *cap) {
|
||||||
if (*p == ESC) {
|
int i = 0; /* counts maximum expand for item */
|
||||||
p++;
|
while ((s+i)<cap->src_end && luaI_singlematch((unsigned char)*(s+i), p, ep))
|
||||||
if (isdigit((unsigned char)*p)) { /* capture */
|
i++;
|
||||||
int l = check_cap(*p, cap);
|
/* keeps trying to match mith the maximum repetitions */
|
||||||
int len = cap->capture[l].len;
|
while (i>=0) {
|
||||||
*ep = p+1;
|
char *res = match((s+i), ep+1, cap);
|
||||||
if (cap->src_end-s >= len && memcmp(cap->capture[l].init, s, len) == 0)
|
if (res) return res;
|
||||||
return s+len;
|
i--; /* else didn't match; reduce 1 repetition to try again */
|
||||||
else return NULL;
|
|
||||||
}
|
|
||||||
else if (*p == 'b') { /* balanced string */
|
|
||||||
p++;
|
|
||||||
if (*p == 0 || *(p+1) == 0)
|
|
||||||
lua_error("unbalanced pattern");
|
|
||||||
*ep = p+2;
|
|
||||||
return matchbalance(s, *p, *(p+1), cap);
|
|
||||||
}
|
|
||||||
else p--; /* and go through */
|
|
||||||
}
|
}
|
||||||
/* "luaI_singlematch" sets "ep" (so must be called even at the end of "s" */
|
return NULL;
|
||||||
return (luaI_singlematch((unsigned char)*s, p, ep) && s<cap->src_end) ?
|
}
|
||||||
s+1 : NULL;
|
|
||||||
|
|
||||||
|
static char *min_expand (char *s, char *p, char *ep, struct Capture *cap) {
|
||||||
|
for (;;) {
|
||||||
|
char *res = match(s, ep+1, cap);
|
||||||
|
if (res != NULL)
|
||||||
|
return res;
|
||||||
|
else if (s<cap->src_end && luaI_singlematch((unsigned char)*s, p, ep))
|
||||||
|
s++; /* try with one more repetition */
|
||||||
|
else return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static char *start_capt (char *s, char *p, struct Capture *cap) {
|
||||||
|
char *res;
|
||||||
|
int level = cap->level;
|
||||||
|
if (level >= MAX_CAPT) lua_error("too many captures");
|
||||||
|
cap->capture[level].init = s;
|
||||||
|
cap->capture[level].len = -1;
|
||||||
|
cap->level = level+1;
|
||||||
|
if ((res=match(s, p+1, cap)) == NULL) /* match failed? */
|
||||||
|
cap->level--; /* undo capture */
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static char *end_capt (char *s, char *p, struct Capture *cap) {
|
||||||
|
int l = capture_to_close(cap);
|
||||||
|
char *res;
|
||||||
|
cap->capture[l].len = s - cap->capture[l].init; /* close capture */
|
||||||
|
if ((res = match(s, p+1, cap)) == NULL) /* match failed? */
|
||||||
|
cap->capture[l].len = -1; /* undo capture */
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static char *match_capture (char *s, int level, struct Capture *cap) {
|
||||||
|
int l = check_cap(level, cap);
|
||||||
|
int len = cap->capture[l].len;
|
||||||
|
if (cap->src_end-s >= len &&
|
||||||
|
memcmp(cap->capture[l].init, s, len) == 0)
|
||||||
|
return s+len;
|
||||||
|
else return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static char *match (char *s, char *p, struct Capture *cap) {
|
static char *match (char *s, char *p, struct Capture *cap) {
|
||||||
init: /* using goto's to optimize tail recursion */
|
init: /* using goto's to optimize tail recursion */
|
||||||
switch (*p) {
|
switch (*p) {
|
||||||
case '(': { /* start capture */
|
case '(': /* start capture */
|
||||||
char *res;
|
return start_capt(s, p, cap);
|
||||||
if (cap->level >= MAX_CAPT) lua_error("too many captures");
|
case ')': /* end capture */
|
||||||
cap->capture[cap->level].init = s;
|
return end_capt(s, p, cap);
|
||||||
cap->capture[cap->level].len = -1;
|
case ESC: /* may be %[0-9] or %b */
|
||||||
cap->level++;
|
if (isdigit((unsigned char)(*(p+1)))) { /* capture? */
|
||||||
if ((res=match(s, p+1, cap)) == NULL) /* match failed? */
|
s = match_capture(s, *(p+1), cap);
|
||||||
cap->level--; /* undo capture */
|
if (s == NULL) return NULL;
|
||||||
return res;
|
p+=2; goto init; /* else return match(p+2, s, cap) */
|
||||||
}
|
}
|
||||||
case ')': { /* end capture */
|
else if (*(p+1) == 'b') { /* balanced string? */
|
||||||
int l = capture_to_close(cap);
|
s = matchbalance(s, p+2, cap);
|
||||||
char *res;
|
if (s == NULL) return NULL;
|
||||||
cap->capture[l].len = s - cap->capture[l].init; /* close capture */
|
p+=4; goto init; /* else return match(p+4, s, cap); */
|
||||||
if ((res = match(s, p+1, cap)) == NULL) /* match failed? */
|
}
|
||||||
cap->capture[l].len = -1; /* undo capture */
|
else goto dflt; /* case default */
|
||||||
return res;
|
|
||||||
}
|
|
||||||
case '\0': /* end of pattern */
|
case '\0': /* end of pattern */
|
||||||
return s; /* match succeeded */
|
return s; /* match succeeded */
|
||||||
case '$':
|
case '$':
|
||||||
if (*(p+1) == '\0') /* is the '$' the last char in pattern? */
|
if (*(p+1) == '\0') /* is the '$' the last char in pattern? */
|
||||||
return (s == cap->src_end) ? s : NULL; /* check end of string */
|
return (s == cap->src_end) ? s : NULL; /* check end of string */
|
||||||
/* else is a regular '$'; go through */
|
else goto dflt;
|
||||||
default: { /* it is a pattern item */
|
default: dflt: { /* it is a pattern item */
|
||||||
char *ep; /* will point to what is next */
|
char *ep = luaI_classend(p); /* points to what is next */
|
||||||
char *s1 = matchitem(s, p, cap, &ep);
|
int m = s<cap->src_end && luaI_singlematch((unsigned char)*s, p, ep);
|
||||||
switch (*ep) {
|
switch (*ep) {
|
||||||
case '*': { /* repetition */
|
|
||||||
char *res;
|
|
||||||
if (s1 && s1>s && ((res=match(s1, p, cap)) != NULL))
|
|
||||||
return res;
|
|
||||||
p=ep+1; goto init; /* else return match(s, ep+1, cap); */
|
|
||||||
}
|
|
||||||
case '?': { /* optional */
|
case '?': { /* optional */
|
||||||
char *res;
|
char *res;
|
||||||
if (s1 && ((res=match(s1, ep+1, cap)) != NULL))
|
if (m && ((res=match(s+1, ep+1, cap)) != NULL))
|
||||||
return res;
|
return res;
|
||||||
p=ep+1; goto init; /* else return match(s, ep+1, cap); */
|
p=ep+1; goto init; /* else return match(s, ep+1, cap); */
|
||||||
}
|
}
|
||||||
case '-': { /* repetition */
|
case '*': /* 0 or more repetitions */
|
||||||
char *res;
|
return max_expand(s, p, ep, cap);
|
||||||
if ((res = match(s, ep+1, cap)) != NULL)
|
case '+': /* 1 or more repetitions */
|
||||||
return res;
|
return (m ? max_expand(s+1, p, ep, cap) : NULL);
|
||||||
else if (s1 && s1>s) {
|
case '-': /* 0 or more repetitions (minimum) */
|
||||||
s = s1;
|
return min_expand(s, p, ep, cap);
|
||||||
goto init; /* return match(s1, p, cap); */
|
|
||||||
}
|
|
||||||
else
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
default:
|
default:
|
||||||
if (s1) { s=s1; p=ep; goto init; } /* return match(s1, ep, cap); */
|
if (!m) return NULL;
|
||||||
else return NULL;
|
s++; p=ep; goto init; /* else return match(s+1, ep, cap); */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
5
lualib.h
5
lualib.h
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
** $Id: lualib.h,v 1.4 1998/06/19 16:14:09 roberto Exp roberto $
|
** $Id: lualib.h,v 1.5 1999/01/08 16:47:44 roberto Exp roberto $
|
||||||
** Lua standard libraries
|
** Lua standard libraries
|
||||||
** See Copyright Notice in lua.h
|
** See Copyright Notice in lua.h
|
||||||
*/
|
*/
|
||||||
@ -29,7 +29,8 @@ void lua_userinit (void);
|
|||||||
|
|
||||||
/* Auxiliary functions (private) */
|
/* Auxiliary functions (private) */
|
||||||
|
|
||||||
int luaI_singlematch (int c, char *p, char **ep);
|
char *luaI_classend (char *p);
|
||||||
|
int luaI_singlematch (int c, char *p, char *ep);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user