External strings

Strings can use external buffers to store their contents.
This commit is contained in:
Roberto Ierusalimschy 2023-11-09 17:05:42 -03:00
parent 7f4906f565
commit 024f9064f1
9 changed files with 195 additions and 14 deletions

14
lapi.c
View File

@ -535,6 +535,20 @@ LUA_API const char *lua_pushlstring (lua_State *L, const char *s, size_t len) {
}
LUA_API const char *lua_pushextlstring (lua_State *L,
const char *s, size_t len, lua_Alloc falloc, void *ud) {
TString *ts;
lua_lock(L);
api_check(L, s[len] == '\0', "string not ending with zero");
ts = luaS_newextlstr (L, s, len, falloc, ud);
setsvalue2s(L, L->top.p, ts);
api_incr_top(L);
luaC_checkGC(L);
lua_unlock(L);
return getstr(ts);
}
LUA_API const char *lua_pushstring (lua_State *L, const char *s) {
lua_lock(L);
if (s == NULL)

4
lgc.c
View File

@ -813,7 +813,9 @@ static void freeobj (lua_State *L, GCObject *o) {
}
case LUA_VLNGSTR: {
TString *ts = gco2ts(o);
luaM_freemem(L, ts, sizestrlng(ts->u.lnglen));
if (ts->shrlen == LSTRMEM) /* must free external string? */
(*ts->falloc)(ts->ud, ts->contents, ts->u.lnglen + 1, 0);
luaM_freemem(L, ts, luaS_sizelngstr(ts->u.lnglen, ts->shrlen));
break;
}
default: lua_assert(0);

View File

@ -382,6 +382,12 @@ typedef struct GCObject {
#define setsvalue2n setsvalue
/* Kinds of long strings (stored in 'shrlen') */
#define LSTRREG -1 /* regular long string */
#define LSTRFIX -2 /* fixed external long string */
#define LSTRMEM -3 /* external long string with deallocation */
/*
** Header for a string value.
*/
@ -395,6 +401,8 @@ typedef struct TString {
struct TString *hnext; /* linked list for hash table */
} u;
char *contents; /* pointer to content in long strings */
lua_Alloc falloc; /* deallocation function for external strings */
void *ud; /* user data for external strings */
} TString;

View File

@ -136,6 +136,20 @@ void luaS_init (lua_State *L) {
}
size_t luaS_sizelngstr (size_t len, int kind) {
switch (kind) {
case LSTRREG: /* regular long string */
/* don't need 'falloc'/'ud', but need space for content */
return offsetof(TString, falloc) + (len + 1) * sizeof(char);
case LSTRFIX: /* fixed external long string */
/* don't need 'falloc'/'ud' */
return offsetof(TString, falloc);
default: /* external long string with deallocation */
lua_assert(kind == LSTRMEM);
return sizeof(TString);
}
}
/*
** creates a new string object
@ -153,11 +167,11 @@ static TString *createstrobj (lua_State *L, size_t totalsize, int tag,
TString *luaS_createlngstrobj (lua_State *L, size_t l) {
size_t totalsize = sizestrlng(l);
size_t totalsize = luaS_sizelngstr(l, LSTRREG);
TString *ts = createstrobj(L, totalsize, LUA_VLNGSTR, G(L)->seed);
ts->u.lnglen = l;
ts->shrlen = -1; /* signals that it is a long string */
ts->contents = cast_charp(ts) + sizeof(TString);
ts->shrlen = LSTRREG; /* signals that it is a regular long string */
ts->contents = cast_charp(ts) + offsetof(TString, falloc);
ts->contents[l] = '\0'; /* ending 0 */
return ts;
}
@ -275,3 +289,61 @@ Udata *luaS_newudata (lua_State *L, size_t s, int nuvalue) {
return u;
}
struct NewExt {
int kind;
const char *s;
size_t len;
TString *ts; /* output */
};
static void f_newext (lua_State *L, void *ud) {
struct NewExt *ne = cast(struct NewExt *, ud);
size_t size = luaS_sizelngstr(0, ne->kind);
ne->ts = createstrobj(L, size, LUA_VLNGSTR, G(L)->seed);
}
static void f_pintern (lua_State *L, void *ud) {
struct NewExt *ne = cast(struct NewExt *, ud);
ne->ts = internshrstr(L, ne->s, ne->len);
}
TString *luaS_newextlstr (lua_State *L,
const char *s, size_t len, lua_Alloc falloc, void *ud) {
struct NewExt ne;
if (len <= LUAI_MAXSHORTLEN) { /* short string? */
ne.s = s; ne.len = len;
if (!falloc)
f_pintern(L, &ne); /* just internalize string */
else {
int status = luaD_rawrunprotected(L, f_pintern, &ne);
(*falloc)(ud, cast_voidp(s), len + 1, 0); /* free external string */
if (status != LUA_OK) /* memory error? */
luaM_error(L); /* re-raise memory error */
}
return ne.ts;
}
/* "normal" case: long strings */
if (!falloc) {
ne.kind = LSTRFIX;
f_newext(L, &ne); /* just create header */
}
else {
ne.kind = LSTRMEM;
if (luaD_rawrunprotected(L, f_newext, &ne) != LUA_OK) { /* mem. error? */
(*falloc)(ud, cast_voidp(s), len + 1, 0); /* free external string */
luaM_error(L); /* re-raise memory error */
}
ne.ts->falloc = falloc;
ne.ts->ud = ud;
}
ne.ts->shrlen = ne.kind;
ne.ts->u.lnglen = len;
ne.ts->contents = cast_charp(s);
return ne.ts;
}

View File

@ -26,12 +26,6 @@
#define sizestrshr(l) \
(offsetof(TString, contents) + ((l) + 1) * sizeof(char))
/*
** Size of a long TString: Size of the header plus space for the string
** itself (including final '\0').
*/
#define sizestrlng(l) (sizeof(TString) + ((l) + 1) * sizeof(char))
#define luaS_newliteral(L, s) (luaS_newlstr(L, "" s, \
(sizeof(s)/sizeof(char))-1))
@ -60,6 +54,8 @@ LUAI_FUNC Udata *luaS_newudata (lua_State *L, size_t s, int nuvalue);
LUAI_FUNC TString *luaS_newlstr (lua_State *L, const char *str, size_t l);
LUAI_FUNC TString *luaS_new (lua_State *L, const char *str);
LUAI_FUNC TString *luaS_createlngstrobj (lua_State *L, size_t l);
LUAI_FUNC TString *luaS_newextlstr (lua_State *L,
const char *s, size_t len, lua_Alloc falloc, void *ud);
LUAI_FUNC size_t luaS_sizelngstr (size_t len, int kind);
#endif

View File

@ -1277,6 +1277,37 @@ static int checkpanic (lua_State *L) {
}
static int externKstr (lua_State *L) {
size_t len;
const char *s = luaL_checklstring(L, 1, &len);
lua_pushextlstring(L, s, len, NULL, NULL);
return 1;
}
/*
** Create a buffer with the content of a given string and then
** create an external string using that buffer. Use the allocation
** function from Lua to create and free the buffer.
*/
static int externstr (lua_State *L) {
size_t len;
const char *s = luaL_checklstring(L, 1, &len);
void *ud;
lua_Alloc allocf = lua_getallocf(L, &ud); /* get allocation function */
/* create the buffer */
char *buff = cast_charp((*allocf)(ud, NULL, 0, len + 1));
if (buff == NULL) { /* memory error? */
lua_pushliteral(L, "not enough memory");
lua_error(L); /* raise a memory error */
}
/* copy string content to buffer, including ending 0 */
memcpy(buff, s, (len + 1) * sizeof(char));
/* create external string */
lua_pushextlstring(L, buff, len, allocf, ud);
return 1;
}
/*
** {====================================================================
@ -1949,6 +1980,8 @@ static const struct luaL_Reg tests_funcs[] = {
{"udataval", udataval},
{"unref", unref},
{"upvalue", upvalue},
{"externKstr", externKstr},
{"externstr", externstr},
{NULL, NULL}
};

2
lua.h
View File

@ -244,6 +244,8 @@ LUA_API void (lua_pushnil) (lua_State *L);
LUA_API void (lua_pushnumber) (lua_State *L, lua_Number n);
LUA_API void (lua_pushinteger) (lua_State *L, lua_Integer n);
LUA_API const char *(lua_pushlstring) (lua_State *L, const char *s, size_t len);
LUA_API const char *(lua_pushextlstring) (lua_State *L,
const char *s, size_t len, lua_Alloc falloc, void *ud);
LUA_API const char *(lua_pushstring) (lua_State *L, const char *s);
LUA_API const char *(lua_pushvfstring) (lua_State *L, const char *fmt,
va_list argp);

View File

@ -3908,6 +3908,40 @@ This function is equivalent to @Lid{lua_pushcclosure} with no upvalues.
}
@APIEntry{const char *(lua_pushextlstring) (lua_State *L,
const char *s, size_t len, lua_Alloc falloc, void *ud);|
@apii{0,1,m}
Creates an @emphx{external string},
that is, a string that uses memory not managed by Lua.
The pointer @id{s} points to the exernal buffer
holding the string content,
and @id{len} is the length of the string.
The string should have a zero at its end,
that is, the condition @T{s[len] == '\0'} should hold.
If @id{falloc} is different from @id{NULL},
that function will be called by Lua
when the external buffer is no longer needed.
The contents of the buffer should not change before this call.
The function will be called with the given @id{ud},
the string @id{s} as the block,
the length plus one (to account for the ending zero) as the old size,
and 0 as the new size.
Lua always @x{internalizes} strings with lengths up to 40 characters.
So, for strings in that range,
this function will immediately internalize the string
and call @id{falloc} to free the buffer.
Even when using an external buffer,
Lua still has to allocate a header for the string.
In case of a memory-allocation error,
Lua will call @id{falloc} before raising the error.
}
@APIEntry{const char *lua_pushfstring (lua_State *L, const char *fmt, ...);|
@apii{0,1,v}

View File

@ -157,6 +157,12 @@ else -- compatible coercion
assert(tostring(-1203 + 0.0) == "-1203")
end
local function topointer (s)
return string.format("%p", s)
end
do -- tests for '%p' format
-- not much to test, as C does not specify what '%p' does.
-- ("The value of the pointer is converted to a sequence of printing
@ -180,18 +186,18 @@ do -- tests for '%p' format
do
local t1 = {}; local t2 = {}
assert(string.format("%p", t1) ~= string.format("%p", t2))
assert(topointer(t1) ~= topointer(t2))
end
do -- short strings are internalized
local s1 = string.rep("a", 10)
local s2 = string.rep("aa", 5)
assert(string.format("%p", s1) == string.format("%p", s2))
assert(topointer(s1) == topointer(s2))
end
do -- long strings aren't internalized
local s1 = string.rep("a", 300); local s2 = string.rep("a", 300)
assert(string.format("%p", s1) ~= string.format("%p", s2))
assert(topointer(s1) ~= topointer(s2))
end
end
@ -521,6 +527,20 @@ else
testpfs("P", str, {})
end
if T == nil then
(Message or print)('\n >>> testC not active: skipping external strings tests <<<\n')
else
print("testing external strings")
local x = T.externKstr("hello") -- external fixed short string
assert(x == "hello")
local x = T.externstr("hello") -- external allocated short string
assert(x == "hello")
x = string.rep("a", 100) -- long string
local y = T.externKstr(x) -- external fixed long string
assert(y == x)
local z = T.externstr(x) -- external allocated long string
assert(z == y)
end
print('OK')