Small optimizations in 'string.gsub'

Avoid creating extra strings when possible:

- avoid creating new resulting string when subject was not modified
(instead, return the subject itself);

- avoid creating strings representing the captured substrings when
handling replacements like '%1' (instead, add the substring directly
to the buffer).
This commit is contained in:
Roberto Ierusalimschy 2019-04-11 11:29:16 -03:00
parent a93e014447
commit b0810c51c3
3 changed files with 115 additions and 47 deletions

130
lstrlib.c
View File

@ -660,25 +660,46 @@ static const char *lmemfind (const char *s1, size_t l1,
}
static void push_onecapture (MatchState *ms, int i, const char *s,
const char *e) {
/*
** get information about the i-th capture. If there are no captures
** and 'i==0', return information about the whole match, which
** is the range 's'..'e'. If the capture is a string, return
** its length and put its address in '*cap'. If it is an integer
** (a position), push it on the stack and return CAP_POSITION.
*/
static size_t get_onecapture (MatchState *ms, int i, const char *s,
const char *e, const char **cap) {
if (i >= ms->level) {
if (i == 0) /* ms->level == 0, too */
lua_pushlstring(ms->L, s, e - s); /* add whole match */
else
if (i != 0)
luaL_error(ms->L, "invalid capture index %%%d", i + 1);
*cap = s;
return e - s;
}
else {
ptrdiff_t l = ms->capture[i].len;
if (l == CAP_UNFINISHED) luaL_error(ms->L, "unfinished capture");
if (l == CAP_POSITION)
ptrdiff_t capl = ms->capture[i].len;
*cap = ms->capture[i].init;
if (capl == CAP_UNFINISHED)
luaL_error(ms->L, "unfinished capture");
else if (capl == CAP_POSITION)
lua_pushinteger(ms->L, (ms->capture[i].init - ms->src_init) + 1);
else
lua_pushlstring(ms->L, ms->capture[i].init, l);
return capl;
}
}
/*
** Push the i-th capture on the stack.
*/
static void push_onecapture (MatchState *ms, int i, const char *s,
const char *e) {
const char *cap;
ptrdiff_t l = get_onecapture(ms, i, s, e, &cap);
if (l != CAP_POSITION)
lua_pushlstring(ms->L, cap, l);
/* else position was already pushed */
}
static int push_captures (MatchState *ms, const char *s, const char *e) {
int i;
int nlevels = (ms->level == 0 && s) ? 1 : ms->level;
@ -817,60 +838,72 @@ static int gmatch (lua_State *L) {
static void add_s (MatchState *ms, luaL_Buffer *b, const char *s,
const char *e) {
size_t l, i;
size_t l;
lua_State *L = ms->L;
const char *news = lua_tolstring(L, 3, &l);
for (i = 0; i < l; i++) {
if (news[i] != L_ESC)
luaL_addchar(b, news[i]);
else {
i++; /* skip ESC */
if (!isdigit(uchar(news[i]))) {
if (news[i] != L_ESC)
luaL_error(L, "invalid use of '%c' in replacement string", L_ESC);
luaL_addchar(b, news[i]);
}
else if (news[i] == '0')
luaL_addlstring(b, s, e - s);
else {
push_onecapture(ms, news[i] - '1', s, e);
luaL_tolstring(L, -1, NULL); /* if number, convert it to string */
lua_remove(L, -2); /* remove original value */
luaL_addvalue(b); /* add capture to accumulated result */
}
const char *p;
while ((p = (char *)memchr(news, L_ESC, l)) != NULL) {
luaL_addlstring(b, news, p - news);
p++; /* skip ESC */
if (*p == L_ESC) /* '%%' */
luaL_addchar(b, *p);
else if (*p == '0') /* '%0' */
luaL_addlstring(b, s, e - s);
else if (isdigit(uchar(*p))) { /* '%n' */
const char *cap;
ptrdiff_t resl = get_onecapture(ms, *p - '1', s, e, &cap);
if (resl == CAP_POSITION)
luaL_addvalue(b); /* add position to accumulated result */
else
luaL_addlstring(b, cap, resl);
}
else
luaL_error(L, "invalid use of '%c' in replacement string", L_ESC);
l -= p + 1 - news;
news = p + 1;
}
luaL_addlstring(b, news, l);
}
static void add_value (MatchState *ms, luaL_Buffer *b, const char *s,
const char *e, int tr) {
/*
** Add the replacement value to the string buffer 'b'.
** Return true if the original string was changed. (Function calls and
** table indexing resulting in nil or false do not change the subject.)
*/
static int add_value (MatchState *ms, luaL_Buffer *b, const char *s,
const char *e, int tr) {
lua_State *L = ms->L;
switch (tr) {
case LUA_TFUNCTION: {
case LUA_TFUNCTION: { /* call the function */
int n;
lua_pushvalue(L, 3);
n = push_captures(ms, s, e);
lua_call(L, n, 1);
lua_pushvalue(L, 3); /* push the function */
n = push_captures(ms, s, e); /* all captures as arguments */
lua_call(L, n, 1); /* call it */
break;
}
case LUA_TTABLE: {
push_onecapture(ms, 0, s, e);
case LUA_TTABLE: { /* index the table */
push_onecapture(ms, 0, s, e); /* first capture is the index */
lua_gettable(L, 3);
break;
}
default: { /* LUA_TNUMBER or LUA_TSTRING */
add_s(ms, b, s, e);
return;
add_s(ms, b, s, e); /* add value to the buffer */
return 1; /* something changed */
}
}
if (!lua_toboolean(L, -1)) { /* nil or false? */
lua_pop(L, 1);
lua_pushlstring(L, s, e - s); /* keep original text */
lua_pop(L, 1); /* remove value */
luaL_addlstring(b, s, e - s); /* keep original text */
return 0; /* no changes */
}
else if (!lua_isstring(L, -1))
luaL_error(L, "invalid replacement value (a %s)", luaL_typename(L, -1));
luaL_addvalue(b); /* add result to accumulator */
return luaL_error(L, "invalid replacement value (a %s)",
luaL_typename(L, -1));
else {
luaL_addvalue(b); /* add result to accumulator */
return 1; /* something changed */
}
}
@ -883,6 +916,7 @@ static int str_gsub (lua_State *L) {
lua_Integer max_s = luaL_optinteger(L, 4, srcl + 1); /* max replacements */
int anchor = (*p == '^');
lua_Integer n = 0; /* replacement count */
int changed = 0; /* change flag */
MatchState ms;
luaL_Buffer b;
luaL_argexpected(L, tr == LUA_TNUMBER || tr == LUA_TSTRING ||
@ -898,7 +932,7 @@ static int str_gsub (lua_State *L) {
reprepstate(&ms); /* (re)prepare state for new match */
if ((e = match(&ms, src, p)) != NULL && e != lastmatch) { /* match? */
n++;
add_value(&ms, &b, src, e, tr); /* add replacement to buffer */
changed = add_value(&ms, &b, src, e, tr) | changed;
src = lastmatch = e;
}
else if (src < ms.src_end) /* otherwise, skip one character */
@ -906,8 +940,12 @@ static int str_gsub (lua_State *L) {
else break; /* end of subject */
if (anchor) break;
}
luaL_addlstring(&b, src, ms.src_end-src);
luaL_pushresult(&b);
if (!changed) /* no changes? */
lua_pushvalue(L, 1); /* return original string */
else { /* something changed */
luaL_addlstring(&b, src, ms.src_end-src);
luaL_pushresult(&b); /* create and return new string */
}
lua_pushinteger(L, n); /* number of substitutions */
return 2;
}

View File

@ -113,7 +113,7 @@ do
contCreate = 0
while contCreate <= limit do
a = contCreate .. "b";
a = string.gsub(a, '(%d%d*)', string.upper)
a = string.gsub(a, '(%d%d*)', "%1 %1")
a = "a"
contCreate = contCreate+1
end

View File

@ -387,5 +387,35 @@ assert(string.match("abc\0\0\0", "%\0%\0?") == "\0\0")
assert(string.find("abc\0\0","\0.") == 4)
assert(string.find("abcx\0\0abc\0abc","x\0\0abc\0a.") == 4)
do -- test reuse of original string in gsub
local s = string.rep("a", 100)
local r = string.gsub(s, "b", "c") -- no match
assert(string.format("%p", s) == string.format("%p", r))
r = string.gsub(s, ".", {x = "y"}) -- no substitutions
assert(string.format("%p", s) == string.format("%p", r))
local count = 0
r = string.gsub(s, ".", function (x)
assert(x == "a")
count = count + 1
return nil -- no substitution
end)
r = string.gsub(r, ".", {b = 'x'}) -- "a" is not a key; no subst.
assert(count == 100)
assert(string.format("%p", s) == string.format("%p", r))
count = 0
r = string.gsub(s, ".", function (x)
assert(x == "a")
count = count + 1
return x -- substitution...
end)
assert(count == 100)
-- no reuse in this case
assert(r == s and string.format("%p", s) ~= string.format("%p", r))
end
print('OK')