From d70a0c91ad42275af1f6f1b6e37c604442b3f0d1 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Thu, 15 Dec 2022 16:44:22 -0300 Subject: [PATCH] Dump/undump reuse strings A repeated string in a dump is represented as an index to its first occurence, instead of another copy of the string. --- lapi.c | 29 +++++++++++++++++++++++++---- ldump.c | 37 +++++++++++++++++++++++++++++++------ lstrlib.c | 1 + lundump.c | 20 +++++++++++++++++++- lundump.h | 2 +- 5 files changed, 77 insertions(+), 12 deletions(-) diff --git a/lapi.c b/lapi.c index b2ac0c57..a1eb7dc6 100644 --- a/lapi.c +++ b/lapi.c @@ -1107,16 +1107,37 @@ LUA_API int lua_load (lua_State *L, lua_Reader reader, void *data, } +/* +** Dump a function, calling 'writer' to write its parts. Because the +** writer can use the stack in unkown ways, this function should not +** push things on the stack, but it must anchor an auxiliary table +** used by 'luaU_dump'. To do so, it creates the table, anchors the +** function that is on the stack in the table, and substitutes the +** table for the function in the stack. +*/ + LUA_API int lua_dump (lua_State *L, lua_Writer writer, void *data, int strip) { int status; + StkId fstk; /* pointer to function */ TValue *o; lua_lock(L); api_checknelems(L, 1); - o = s2v(L->top.p - 1); - if (isLfunction(o)) - status = luaU_dump(L, getproto(o), writer, data, strip); - else + fstk = L->top.p - 1; + o = s2v(fstk); + if (!isLfunction(o)) status = 1; + else { + LClosure *f = clLvalue(o); + ptrdiff_t fidx = savestack(L, fstk); /* function index */ + Table *h = luaH_new(L); /* auxiliary table used by 'luaU_dump' */ + sethvalue2s(L, L->top.p, h); /* anchor it (luaH_set may call GC) */ + L->top.p++; /* (assume extra slot) */ + luaH_set(L, h, o, o); /* anchor function into table */ + setobjs2s(L, fstk, L->top.p - 1); /* move table over function */ + L->top.p--; /* stack back to initial size */ + status = luaU_dump(L, f->p, writer, data, strip, h); + setclLvalue2s(L, restorestack(L, fidx), f); /* put function back */ + } lua_unlock(L); return status; } diff --git a/ldump.c b/ldump.c index f848b669..70c7adc6 100644 --- a/ldump.c +++ b/ldump.c @@ -14,8 +14,10 @@ #include "lua.h" +#include "lgc.h" #include "lobject.h" #include "lstate.h" +#include "ltable.h" #include "lundump.h" @@ -25,6 +27,8 @@ typedef struct { void *data; int strip; int status; + Table *h; /* table to track saved strings */ + lua_Integer nstr; /* counter to number saved strings */ } DumpState; @@ -85,14 +89,33 @@ static void dumpInteger (DumpState *D, lua_Integer x) { } -static void dumpString (DumpState *D, const TString *s) { +/* +** Dump a String. First dump its "size": size==0 means NULL; +** size==1 is followed by an index and means "reuse saved string with +** that index"; size>=2 is followed by the string contents with real +** size==size-2 and means that string, which will be saved with +** the next available index. +*/ +static void dumpString (DumpState *D, TString *s) { if (s == NULL) dumpSize(D, 0); else { - size_t size = tsslen(s); - const char *str = getstr(s); - dumpSize(D, size + 1); - dumpVector(D, str, size); + const TValue *idx = luaH_getstr(D->h, s); + if (ttisinteger(idx)) { /* string already saved? */ + dumpSize(D, 1); /* reuse a saved string */ + dumpInt(D, ivalue(idx)); /* index of saved string */ + } + else { /* must write and save the string */ + TValue key, value; /* to save the string in the hash */ + size_t size = tsslen(s); + dumpSize(D, size + 2); + dumpVector(D, getstr(s), size); + D->nstr++; /* one more saved string */ + setsvalue(D->L, &key, s); /* the string is the key */ + setivalue(&value, D->nstr); /* its index is the value */ + luaH_finishset(D->L, D->h, &key, idx, &value); /* h[s] = nstr */ + /* integer value does not need barrier */ + } } } @@ -211,13 +234,15 @@ static void dumpHeader (DumpState *D) { ** dump Lua function as precompiled chunk */ int luaU_dump(lua_State *L, const Proto *f, lua_Writer w, void *data, - int strip) { + int strip, Table *h) { DumpState D; D.L = L; D.writer = w; D.data = data; D.strip = strip; D.status = 0; + D.h = h; + D.nstr = 0; dumpHeader(&D); dumpByte(&D, f->sizeupvalues); dumpFunction(&D, f, NULL); diff --git a/lstrlib.c b/lstrlib.c index 0b4fdbb7..ce07d9bc 100644 --- a/lstrlib.c +++ b/lstrlib.c @@ -239,6 +239,7 @@ static int str_dump (lua_State *L) { if (l_unlikely(lua_dump(L, writer, &state, strip) != 0)) return luaL_error(L, "unable to dump given function"); luaL_pushresult(&state.B); + lua_assert(lua_isfunction(L, 1)); /* lua_dump kept that value */ return 1; } diff --git a/lundump.c b/lundump.c index aba93f82..4048fdea 100644 --- a/lundump.c +++ b/lundump.c @@ -21,6 +21,7 @@ #include "lmem.h" #include "lobject.h" #include "lstring.h" +#include "ltable.h" #include "lundump.h" #include "lzio.h" @@ -34,6 +35,8 @@ typedef struct { lua_State *L; ZIO *Z; const char *name; + Table *h; /* list for string reuse */ + lua_Integer nstr; /* number of strings in the list */ } LoadState; @@ -110,10 +113,16 @@ static lua_Integer loadInteger (LoadState *S) { static TString *loadStringN (LoadState *S, Proto *p) { lua_State *L = S->L; TString *ts; + TValue sv; size_t size = loadSize(S); if (size == 0) /* no string? */ return NULL; - else if (--size <= LUAI_MAXSHORTLEN) { /* short string? */ + else if (size == 1) { /* previously saved string? */ + int idx = loadInt(S); /* get its index */ + const TValue *stv = luaH_getint(S->h, idx); + return tsvalue(stv); + } + else if (size -= 2, size <= LUAI_MAXSHORTLEN) { /* short string? */ char buff[LUAI_MAXSHORTLEN]; loadVector(S, buff, size); /* load string into buffer */ ts = luaS_newlstr(L, buff, size); /* create string */ @@ -126,6 +135,10 @@ static TString *loadStringN (LoadState *S, Proto *p) { L->top.p--; /* pop string */ } luaC_objbarrier(L, p, ts); + S->nstr++; /* add string to list of saved strings */ + setsvalue(L, &sv, ts); + luaH_setint(L, S->h, S->nstr, &sv); + luaC_objbarrierback(L, obj2gco(S->h), ts); return ts; } @@ -323,11 +336,16 @@ LClosure *luaU_undump(lua_State *L, ZIO *Z, const char *name) { cl = luaF_newLclosure(L, loadByte(&S)); setclLvalue2s(L, L->top.p, cl); luaD_inctop(L); + S.h = luaH_new(L); /* create list of saved strings */ + S.nstr = 0; + sethvalue2s(L, L->top.p, S.h); /* anchor it */ + luaD_inctop(L); cl->p = luaF_newproto(L); luaC_objbarrier(L, cl, cl->p); loadFunction(&S, cl->p, NULL); lua_assert(cl->nupvalues == cl->p->sizeupvalues); luai_verifycode(L, cl->p); + L->top.p--; /* pop table */ return cl; } diff --git a/lundump.h b/lundump.h index f3748a99..7def905b 100644 --- a/lundump.h +++ b/lundump.h @@ -31,6 +31,6 @@ LUAI_FUNC LClosure* luaU_undump (lua_State* L, ZIO* Z, const char* name); /* dump one chunk; from ldump.c */ LUAI_FUNC int luaU_dump (lua_State* L, const Proto* f, lua_Writer w, - void* data, int strip); + void* data, int strip, Table *h); #endif