mirror of
https://github.com/lua/lua
synced 2024-11-22 21:01:26 +03:00
patterns now accept '\0' as a regular character
This commit is contained in:
parent
9100f7479a
commit
4541243355
66
lstrlib.c
66
lstrlib.c
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
** $Id: lstrlib.c,v 1.148 2010/01/04 16:37:19 roberto Exp roberto $
|
** $Id: lstrlib.c,v 1.149 2010/04/09 16:14:46 roberto Exp roberto $
|
||||||
** Standard library for string operations and pattern-matching
|
** Standard library for string operations and pattern-matching
|
||||||
** See Copyright Notice in lua.h
|
** See Copyright Notice in lua.h
|
||||||
*/
|
*/
|
||||||
@ -180,7 +180,8 @@ static int str_dump (lua_State *L) {
|
|||||||
|
|
||||||
typedef struct MatchState {
|
typedef struct MatchState {
|
||||||
const char *src_init; /* init of source string */
|
const char *src_init; /* init of source string */
|
||||||
const char *src_end; /* end (`\0') of source string */
|
const char *src_end; /* end ('\0') of source string */
|
||||||
|
const char *p_end; /* end ('\0') of pattern */
|
||||||
lua_State *L;
|
lua_State *L;
|
||||||
int level; /* total number of captures (finished or unfinished) */
|
int level; /* total number of captures (finished or unfinished) */
|
||||||
struct {
|
struct {
|
||||||
@ -213,16 +214,16 @@ static int capture_to_close (MatchState *ms) {
|
|||||||
static const char *classend (MatchState *ms, const char *p) {
|
static const char *classend (MatchState *ms, const char *p) {
|
||||||
switch (*p++) {
|
switch (*p++) {
|
||||||
case L_ESC: {
|
case L_ESC: {
|
||||||
if (*p == '\0')
|
if (p == ms->p_end)
|
||||||
luaL_error(ms->L, "malformed pattern (ends with " LUA_QL("%%") ")");
|
luaL_error(ms->L, "malformed pattern (ends with " LUA_QL("%%") ")");
|
||||||
return p+1;
|
return p+1;
|
||||||
}
|
}
|
||||||
case '[': {
|
case '[': {
|
||||||
if (*p == '^') p++;
|
if (*p == '^') p++;
|
||||||
do { /* look for a `]' */
|
do { /* look for a `]' */
|
||||||
if (*p == '\0')
|
if (p == ms->p_end)
|
||||||
luaL_error(ms->L, "malformed pattern (missing " LUA_QL("]") ")");
|
luaL_error(ms->L, "malformed pattern (missing " LUA_QL("]") ")");
|
||||||
if (*(p++) == L_ESC && *p != '\0')
|
if (*(p++) == L_ESC && p < ms->p_end)
|
||||||
p++; /* skip escapes (e.g. `%]') */
|
p++; /* skip escapes (e.g. `%]') */
|
||||||
} while (*p != ']');
|
} while (*p != ']');
|
||||||
return p+1;
|
return p+1;
|
||||||
@ -246,7 +247,7 @@ static int match_class (int c, int cl) {
|
|||||||
case 'u' : res = isupper(c); break;
|
case 'u' : res = isupper(c); break;
|
||||||
case 'w' : res = isalnum(c); break;
|
case 'w' : res = isalnum(c); break;
|
||||||
case 'x' : res = isxdigit(c); break;
|
case 'x' : res = isxdigit(c); break;
|
||||||
case 'z' : res = (c == 0); break;
|
case 'z' : res = (c == 0); break; /* deprecated option */
|
||||||
default: return (cl == c);
|
default: return (cl == c);
|
||||||
}
|
}
|
||||||
return (islower(cl) ? res : !res);
|
return (islower(cl) ? res : !res);
|
||||||
@ -291,8 +292,9 @@ static const char *match (MatchState *ms, const char *s, const char *p);
|
|||||||
|
|
||||||
static const char *matchbalance (MatchState *ms, const char *s,
|
static const char *matchbalance (MatchState *ms, const char *s,
|
||||||
const char *p) {
|
const char *p) {
|
||||||
if (*p == 0 || *(p+1) == 0)
|
if (p >= ms->p_end - 1)
|
||||||
luaL_error(ms->L, "unbalanced pattern");
|
luaL_error(ms->L, "malformed pattern "
|
||||||
|
"(missing arguments to " LUA_QL("%%b") ")");
|
||||||
if (*s != *p) return NULL;
|
if (*s != *p) return NULL;
|
||||||
else {
|
else {
|
||||||
int b = *p;
|
int b = *p;
|
||||||
@ -375,6 +377,8 @@ static const char *match_capture (MatchState *ms, const char *s, int l) {
|
|||||||
|
|
||||||
static const char *match (MatchState *ms, const char *s, const char *p) {
|
static const char *match (MatchState *ms, const char *s, const char *p) {
|
||||||
init: /* using goto's to optimize tail recursion */
|
init: /* using goto's to optimize tail recursion */
|
||||||
|
if (p == ms->p_end) /* end of pattern? */
|
||||||
|
return s; /* match succeeded */
|
||||||
switch (*p) {
|
switch (*p) {
|
||||||
case '(': { /* start capture */
|
case '(': { /* start capture */
|
||||||
if (*(p+1) == ')') /* position capture? */
|
if (*(p+1) == ')') /* position capture? */
|
||||||
@ -385,11 +389,8 @@ static const char *match (MatchState *ms, const char *s, const char *p) {
|
|||||||
case ')': { /* end capture */
|
case ')': { /* end capture */
|
||||||
return end_capture(ms, s, p+1);
|
return end_capture(ms, s, p+1);
|
||||||
}
|
}
|
||||||
case '\0': { /* end of pattern */
|
|
||||||
return s; /* match succeeded */
|
|
||||||
}
|
|
||||||
case '$': {
|
case '$': {
|
||||||
if (*(p+1) == '\0') /* is the `$' the last char in pattern? */
|
if ((p+1) == ms->p_end) /* is the `$' the last char in pattern? */
|
||||||
return (s == ms->src_end) ? s : NULL; /* check end of string */
|
return (s == ms->src_end) ? s : NULL; /* check end of string */
|
||||||
else goto dflt;
|
else goto dflt;
|
||||||
}
|
}
|
||||||
@ -419,12 +420,12 @@ static const char *match (MatchState *ms, const char *s, const char *p) {
|
|||||||
if (s == NULL) return NULL;
|
if (s == NULL) return NULL;
|
||||||
p+=2; goto init; /* else return match(ms, s, p+2) */
|
p+=2; goto init; /* else return match(ms, s, p+2) */
|
||||||
}
|
}
|
||||||
default: break; /* go through to 'dflt' */
|
default: goto dflt;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
default: dflt: { /* pattern class plus optional sufix */
|
default: dflt: { /* pattern class plus optional sufix */
|
||||||
const char *ep = classend(ms, p); /* points to what is next */
|
const char *ep = classend(ms, p); /* points to what is next */
|
||||||
int m = s<ms->src_end && singlematch(uchar(*s), p, ep);
|
int m = s < ms->src_end && singlematch(uchar(*s), p, ep);
|
||||||
switch (*ep) {
|
switch (*ep) {
|
||||||
case '?': { /* optional */
|
case '?': { /* optional */
|
||||||
const char *res;
|
const char *res;
|
||||||
@ -504,32 +505,36 @@ static int push_captures (MatchState *ms, const char *s, const char *e) {
|
|||||||
|
|
||||||
|
|
||||||
static int str_find_aux (lua_State *L, int find) {
|
static int str_find_aux (lua_State *L, int find) {
|
||||||
size_t l1, l2;
|
size_t ls, lp;
|
||||||
const char *s = luaL_checklstring(L, 1, &l1);
|
const char *s = luaL_checklstring(L, 1, &ls);
|
||||||
const char *p = luaL_checklstring(L, 2, &l2);
|
const char *p = luaL_checklstring(L, 2, &lp);
|
||||||
size_t init = posrelat(luaL_optinteger(L, 3, 1), l1);
|
size_t init = posrelat(luaL_optinteger(L, 3, 1), ls);
|
||||||
if (init < 1) init = 1;
|
if (init < 1) init = 1;
|
||||||
else if (init > l1 + 1) { /* start after string's end? */
|
else if (init > ls + 1) { /* start after string's end? */
|
||||||
lua_pushnil(L); /* cannot find anything */
|
lua_pushnil(L); /* cannot find anything */
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
if (find && (lua_toboolean(L, 4) || /* explicit request? */
|
if (find && (lua_toboolean(L, 4) || /* explicit request? */
|
||||||
strpbrk(p, SPECIALS) == NULL)) { /* or no special characters? */
|
strpbrk(p, SPECIALS) == NULL)) { /* or no special characters? */
|
||||||
/* do a plain search */
|
/* do a plain search */
|
||||||
const char *s2 = lmemfind(s + init - 1, l1 - init + 1, p, l2);
|
const char *s2 = lmemfind(s + init - 1, ls - init + 1, p, lp);
|
||||||
if (s2) {
|
if (s2) {
|
||||||
lua_pushinteger(L, s2 - s + 1);
|
lua_pushinteger(L, s2 - s + 1);
|
||||||
lua_pushinteger(L, s2 - s + l2);
|
lua_pushinteger(L, s2 - s + lp);
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
MatchState ms;
|
MatchState ms;
|
||||||
int anchor = (*p == '^') ? (p++, 1) : 0;
|
|
||||||
const char *s1 = s + init - 1;
|
const char *s1 = s + init - 1;
|
||||||
|
int anchor = (*p == '^');
|
||||||
|
if (anchor) {
|
||||||
|
p++; lp--; /* skip anchor character */
|
||||||
|
}
|
||||||
ms.L = L;
|
ms.L = L;
|
||||||
ms.src_init = s;
|
ms.src_init = s;
|
||||||
ms.src_end = s + l1;
|
ms.src_end = s + ls;
|
||||||
|
ms.p_end = p + lp;
|
||||||
do {
|
do {
|
||||||
const char *res;
|
const char *res;
|
||||||
ms.level = 0;
|
ms.level = 0;
|
||||||
@ -561,13 +566,14 @@ static int str_match (lua_State *L) {
|
|||||||
|
|
||||||
static int gmatch_aux (lua_State *L) {
|
static int gmatch_aux (lua_State *L) {
|
||||||
MatchState ms;
|
MatchState ms;
|
||||||
size_t ls;
|
size_t ls, lp;
|
||||||
const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls);
|
const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls);
|
||||||
const char *p = lua_tostring(L, lua_upvalueindex(2));
|
const char *p = lua_tolstring(L, lua_upvalueindex(2), &lp);
|
||||||
const char *src;
|
const char *src;
|
||||||
ms.L = L;
|
ms.L = L;
|
||||||
ms.src_init = s;
|
ms.src_init = s;
|
||||||
ms.src_end = s+ls;
|
ms.src_end = s+ls;
|
||||||
|
ms.p_end = p + lp;
|
||||||
for (src = s + (size_t)lua_tointeger(L, lua_upvalueindex(3));
|
for (src = s + (size_t)lua_tointeger(L, lua_upvalueindex(3));
|
||||||
src <= ms.src_end;
|
src <= ms.src_end;
|
||||||
src++) {
|
src++) {
|
||||||
@ -659,12 +665,12 @@ static void add_value (MatchState *ms, luaL_Buffer *b, const char *s,
|
|||||||
|
|
||||||
|
|
||||||
static int str_gsub (lua_State *L) {
|
static int str_gsub (lua_State *L) {
|
||||||
size_t srcl;
|
size_t srcl, lp;
|
||||||
const char *src = luaL_checklstring(L, 1, &srcl);
|
const char *src = luaL_checklstring(L, 1, &srcl);
|
||||||
const char *p = luaL_checkstring(L, 2);
|
const char *p = luaL_checklstring(L, 2, &lp);
|
||||||
int tr = lua_type(L, 3);
|
int tr = lua_type(L, 3);
|
||||||
size_t max_s = luaL_optinteger(L, 4, srcl+1);
|
size_t max_s = luaL_optinteger(L, 4, srcl+1);
|
||||||
int anchor = (*p == '^') ? (p++, 1) : 0;
|
int anchor = (*p == '^');
|
||||||
size_t n = 0;
|
size_t n = 0;
|
||||||
MatchState ms;
|
MatchState ms;
|
||||||
luaL_Buffer b;
|
luaL_Buffer b;
|
||||||
@ -672,9 +678,13 @@ static int str_gsub (lua_State *L) {
|
|||||||
tr == LUA_TFUNCTION || tr == LUA_TTABLE, 3,
|
tr == LUA_TFUNCTION || tr == LUA_TTABLE, 3,
|
||||||
"string/function/table expected");
|
"string/function/table expected");
|
||||||
luaL_buffinit(L, &b);
|
luaL_buffinit(L, &b);
|
||||||
|
if (anchor) {
|
||||||
|
p++; lp--; /* skip anchor character */
|
||||||
|
}
|
||||||
ms.L = L;
|
ms.L = L;
|
||||||
ms.src_init = src;
|
ms.src_init = src;
|
||||||
ms.src_end = src+srcl;
|
ms.src_end = src+srcl;
|
||||||
|
ms.p_end = p + lp;
|
||||||
while (n < max_s) {
|
while (n < max_s) {
|
||||||
const char *e;
|
const char *e;
|
||||||
ms.level = 0;
|
ms.level = 0;
|
||||||
|
Loading…
Reference in New Issue
Block a user