mirror of
https://github.com/lua/lua
synced 2024-11-22 12:51:30 +03:00
Small optimizations in 'string.gsub'
Avoid creating extra strings when possible: - avoid creating new resulting string when subject was not modified (instead, return the subject itself); - avoid creating strings representing the captured substrings when handling replacements like '%1' (instead, add the substring directly to the buffer).
This commit is contained in:
parent
a93e014447
commit
b0810c51c3
130
lstrlib.c
130
lstrlib.c
@ -660,25 +660,46 @@ static const char *lmemfind (const char *s1, size_t l1,
|
||||
}
|
||||
|
||||
|
||||
static void push_onecapture (MatchState *ms, int i, const char *s,
|
||||
const char *e) {
|
||||
/*
|
||||
** get information about the i-th capture. If there are no captures
|
||||
** and 'i==0', return information about the whole match, which
|
||||
** is the range 's'..'e'. If the capture is a string, return
|
||||
** its length and put its address in '*cap'. If it is an integer
|
||||
** (a position), push it on the stack and return CAP_POSITION.
|
||||
*/
|
||||
static size_t get_onecapture (MatchState *ms, int i, const char *s,
|
||||
const char *e, const char **cap) {
|
||||
if (i >= ms->level) {
|
||||
if (i == 0) /* ms->level == 0, too */
|
||||
lua_pushlstring(ms->L, s, e - s); /* add whole match */
|
||||
else
|
||||
if (i != 0)
|
||||
luaL_error(ms->L, "invalid capture index %%%d", i + 1);
|
||||
*cap = s;
|
||||
return e - s;
|
||||
}
|
||||
else {
|
||||
ptrdiff_t l = ms->capture[i].len;
|
||||
if (l == CAP_UNFINISHED) luaL_error(ms->L, "unfinished capture");
|
||||
if (l == CAP_POSITION)
|
||||
ptrdiff_t capl = ms->capture[i].len;
|
||||
*cap = ms->capture[i].init;
|
||||
if (capl == CAP_UNFINISHED)
|
||||
luaL_error(ms->L, "unfinished capture");
|
||||
else if (capl == CAP_POSITION)
|
||||
lua_pushinteger(ms->L, (ms->capture[i].init - ms->src_init) + 1);
|
||||
else
|
||||
lua_pushlstring(ms->L, ms->capture[i].init, l);
|
||||
return capl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Push the i-th capture on the stack.
|
||||
*/
|
||||
static void push_onecapture (MatchState *ms, int i, const char *s,
|
||||
const char *e) {
|
||||
const char *cap;
|
||||
ptrdiff_t l = get_onecapture(ms, i, s, e, &cap);
|
||||
if (l != CAP_POSITION)
|
||||
lua_pushlstring(ms->L, cap, l);
|
||||
/* else position was already pushed */
|
||||
}
|
||||
|
||||
|
||||
static int push_captures (MatchState *ms, const char *s, const char *e) {
|
||||
int i;
|
||||
int nlevels = (ms->level == 0 && s) ? 1 : ms->level;
|
||||
@ -817,60 +838,72 @@ static int gmatch (lua_State *L) {
|
||||
|
||||
static void add_s (MatchState *ms, luaL_Buffer *b, const char *s,
|
||||
const char *e) {
|
||||
size_t l, i;
|
||||
size_t l;
|
||||
lua_State *L = ms->L;
|
||||
const char *news = lua_tolstring(L, 3, &l);
|
||||
for (i = 0; i < l; i++) {
|
||||
if (news[i] != L_ESC)
|
||||
luaL_addchar(b, news[i]);
|
||||
else {
|
||||
i++; /* skip ESC */
|
||||
if (!isdigit(uchar(news[i]))) {
|
||||
if (news[i] != L_ESC)
|
||||
luaL_error(L, "invalid use of '%c' in replacement string", L_ESC);
|
||||
luaL_addchar(b, news[i]);
|
||||
}
|
||||
else if (news[i] == '0')
|
||||
luaL_addlstring(b, s, e - s);
|
||||
else {
|
||||
push_onecapture(ms, news[i] - '1', s, e);
|
||||
luaL_tolstring(L, -1, NULL); /* if number, convert it to string */
|
||||
lua_remove(L, -2); /* remove original value */
|
||||
luaL_addvalue(b); /* add capture to accumulated result */
|
||||
}
|
||||
const char *p;
|
||||
while ((p = (char *)memchr(news, L_ESC, l)) != NULL) {
|
||||
luaL_addlstring(b, news, p - news);
|
||||
p++; /* skip ESC */
|
||||
if (*p == L_ESC) /* '%%' */
|
||||
luaL_addchar(b, *p);
|
||||
else if (*p == '0') /* '%0' */
|
||||
luaL_addlstring(b, s, e - s);
|
||||
else if (isdigit(uchar(*p))) { /* '%n' */
|
||||
const char *cap;
|
||||
ptrdiff_t resl = get_onecapture(ms, *p - '1', s, e, &cap);
|
||||
if (resl == CAP_POSITION)
|
||||
luaL_addvalue(b); /* add position to accumulated result */
|
||||
else
|
||||
luaL_addlstring(b, cap, resl);
|
||||
}
|
||||
else
|
||||
luaL_error(L, "invalid use of '%c' in replacement string", L_ESC);
|
||||
l -= p + 1 - news;
|
||||
news = p + 1;
|
||||
}
|
||||
luaL_addlstring(b, news, l);
|
||||
}
|
||||
|
||||
|
||||
static void add_value (MatchState *ms, luaL_Buffer *b, const char *s,
|
||||
const char *e, int tr) {
|
||||
/*
|
||||
** Add the replacement value to the string buffer 'b'.
|
||||
** Return true if the original string was changed. (Function calls and
|
||||
** table indexing resulting in nil or false do not change the subject.)
|
||||
*/
|
||||
static int add_value (MatchState *ms, luaL_Buffer *b, const char *s,
|
||||
const char *e, int tr) {
|
||||
lua_State *L = ms->L;
|
||||
switch (tr) {
|
||||
case LUA_TFUNCTION: {
|
||||
case LUA_TFUNCTION: { /* call the function */
|
||||
int n;
|
||||
lua_pushvalue(L, 3);
|
||||
n = push_captures(ms, s, e);
|
||||
lua_call(L, n, 1);
|
||||
lua_pushvalue(L, 3); /* push the function */
|
||||
n = push_captures(ms, s, e); /* all captures as arguments */
|
||||
lua_call(L, n, 1); /* call it */
|
||||
break;
|
||||
}
|
||||
case LUA_TTABLE: {
|
||||
push_onecapture(ms, 0, s, e);
|
||||
case LUA_TTABLE: { /* index the table */
|
||||
push_onecapture(ms, 0, s, e); /* first capture is the index */
|
||||
lua_gettable(L, 3);
|
||||
break;
|
||||
}
|
||||
default: { /* LUA_TNUMBER or LUA_TSTRING */
|
||||
add_s(ms, b, s, e);
|
||||
return;
|
||||
add_s(ms, b, s, e); /* add value to the buffer */
|
||||
return 1; /* something changed */
|
||||
}
|
||||
}
|
||||
if (!lua_toboolean(L, -1)) { /* nil or false? */
|
||||
lua_pop(L, 1);
|
||||
lua_pushlstring(L, s, e - s); /* keep original text */
|
||||
lua_pop(L, 1); /* remove value */
|
||||
luaL_addlstring(b, s, e - s); /* keep original text */
|
||||
return 0; /* no changes */
|
||||
}
|
||||
else if (!lua_isstring(L, -1))
|
||||
luaL_error(L, "invalid replacement value (a %s)", luaL_typename(L, -1));
|
||||
luaL_addvalue(b); /* add result to accumulator */
|
||||
return luaL_error(L, "invalid replacement value (a %s)",
|
||||
luaL_typename(L, -1));
|
||||
else {
|
||||
luaL_addvalue(b); /* add result to accumulator */
|
||||
return 1; /* something changed */
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -883,6 +916,7 @@ static int str_gsub (lua_State *L) {
|
||||
lua_Integer max_s = luaL_optinteger(L, 4, srcl + 1); /* max replacements */
|
||||
int anchor = (*p == '^');
|
||||
lua_Integer n = 0; /* replacement count */
|
||||
int changed = 0; /* change flag */
|
||||
MatchState ms;
|
||||
luaL_Buffer b;
|
||||
luaL_argexpected(L, tr == LUA_TNUMBER || tr == LUA_TSTRING ||
|
||||
@ -898,7 +932,7 @@ static int str_gsub (lua_State *L) {
|
||||
reprepstate(&ms); /* (re)prepare state for new match */
|
||||
if ((e = match(&ms, src, p)) != NULL && e != lastmatch) { /* match? */
|
||||
n++;
|
||||
add_value(&ms, &b, src, e, tr); /* add replacement to buffer */
|
||||
changed = add_value(&ms, &b, src, e, tr) | changed;
|
||||
src = lastmatch = e;
|
||||
}
|
||||
else if (src < ms.src_end) /* otherwise, skip one character */
|
||||
@ -906,8 +940,12 @@ static int str_gsub (lua_State *L) {
|
||||
else break; /* end of subject */
|
||||
if (anchor) break;
|
||||
}
|
||||
luaL_addlstring(&b, src, ms.src_end-src);
|
||||
luaL_pushresult(&b);
|
||||
if (!changed) /* no changes? */
|
||||
lua_pushvalue(L, 1); /* return original string */
|
||||
else { /* something changed */
|
||||
luaL_addlstring(&b, src, ms.src_end-src);
|
||||
luaL_pushresult(&b); /* create and return new string */
|
||||
}
|
||||
lua_pushinteger(L, n); /* number of substitutions */
|
||||
return 2;
|
||||
}
|
||||
|
@ -113,7 +113,7 @@ do
|
||||
contCreate = 0
|
||||
while contCreate <= limit do
|
||||
a = contCreate .. "b";
|
||||
a = string.gsub(a, '(%d%d*)', string.upper)
|
||||
a = string.gsub(a, '(%d%d*)', "%1 %1")
|
||||
a = "a"
|
||||
contCreate = contCreate+1
|
||||
end
|
||||
|
@ -387,5 +387,35 @@ assert(string.match("abc\0\0\0", "%\0%\0?") == "\0\0")
|
||||
assert(string.find("abc\0\0","\0.") == 4)
|
||||
assert(string.find("abcx\0\0abc\0abc","x\0\0abc\0a.") == 4)
|
||||
|
||||
|
||||
do -- test reuse of original string in gsub
|
||||
local s = string.rep("a", 100)
|
||||
local r = string.gsub(s, "b", "c") -- no match
|
||||
assert(string.format("%p", s) == string.format("%p", r))
|
||||
|
||||
r = string.gsub(s, ".", {x = "y"}) -- no substitutions
|
||||
assert(string.format("%p", s) == string.format("%p", r))
|
||||
|
||||
local count = 0
|
||||
r = string.gsub(s, ".", function (x)
|
||||
assert(x == "a")
|
||||
count = count + 1
|
||||
return nil -- no substitution
|
||||
end)
|
||||
r = string.gsub(r, ".", {b = 'x'}) -- "a" is not a key; no subst.
|
||||
assert(count == 100)
|
||||
assert(string.format("%p", s) == string.format("%p", r))
|
||||
|
||||
count = 0
|
||||
r = string.gsub(s, ".", function (x)
|
||||
assert(x == "a")
|
||||
count = count + 1
|
||||
return x -- substitution...
|
||||
end)
|
||||
assert(count == 100)
|
||||
-- no reuse in this case
|
||||
assert(r == s and string.format("%p", s) ~= string.format("%p", r))
|
||||
end
|
||||
|
||||
print('OK')
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user