From 934e77a286aeb97ca02badf56956ccc78217e9d0 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Mon, 15 May 2023 10:07:25 -0300 Subject: [PATCH 01/19] Details - Better comments about short strings in opcodes. - luaH_newkey made static. --- lcode.c | 7 ++++--- lopcodes.h | 8 ++++---- ltable.c | 3 ++- ltable.h | 2 -- lvm.c | 8 ++++---- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/lcode.c b/lcode.c index 1a371ca9..eade2806 100644 --- a/lcode.c +++ b/lcode.c @@ -1215,7 +1215,7 @@ static void codenot (FuncState *fs, expdesc *e) { /* -** Check whether expression 'e' is a small literal string +** Check whether expression 'e' is a short literal string */ static int isKstr (FuncState *fs, expdesc *e) { return (e->k == VK && !hasjumps(e) && e->u.info <= MAXARG_B && @@ -1283,15 +1283,16 @@ void luaK_indexed (FuncState *fs, expdesc *t, expdesc *k) { if (t->k == VUPVAL && !isKstr(fs, k)) /* upvalue indexed by non 'Kstr'? */ luaK_exp2anyreg(fs, t); /* put it in a register */ if (t->k == VUPVAL) { + lua_assert(isKstr(fs, k)); t->u.ind.t = t->u.info; /* upvalue index */ - t->u.ind.idx = k->u.info; /* literal string */ + t->u.ind.idx = k->u.info; /* literal short string */ t->k = VINDEXUP; } else { /* register index of the table */ t->u.ind.t = (t->k == VLOCAL) ? t->u.var.ridx: t->u.info; if (isKstr(fs, k)) { - t->u.ind.idx = k->u.info; /* literal string */ + t->u.ind.idx = k->u.info; /* literal short string */ t->k = VINDEXSTR; } else if (isCint(k)) { diff --git a/lopcodes.h b/lopcodes.h index 4c551453..46911cac 100644 --- a/lopcodes.h +++ b/lopcodes.h @@ -210,15 +210,15 @@ OP_LOADNIL,/* A B R[A], R[A+1], ..., R[A+B] := nil */ OP_GETUPVAL,/* A B R[A] := UpValue[B] */ OP_SETUPVAL,/* A B UpValue[B] := R[A] */ -OP_GETTABUP,/* A B C R[A] := UpValue[B][K[C]:string] */ +OP_GETTABUP,/* A B C R[A] := UpValue[B][K[C]:shortstring] */ OP_GETTABLE,/* A B C R[A] := R[B][R[C]] */ OP_GETI,/* A B C R[A] := R[B][C] */ -OP_GETFIELD,/* A B C R[A] := R[B][K[C]:string] */ +OP_GETFIELD,/* A B C R[A] := R[B][K[C]:shortstring] */ -OP_SETTABUP,/* A B C UpValue[A][K[B]:string] := RK(C) */ +OP_SETTABUP,/* A B C UpValue[A][K[B]:shortstring] := RK(C) */ OP_SETTABLE,/* A B C R[A][R[B]] := RK(C) */ OP_SETI,/* A B C R[A][B] := RK(C) */ -OP_SETFIELD,/* A B C R[A][K[B]:string] := RK(C) */ +OP_SETFIELD,/* A B C R[A][K[B]:shortstring] := RK(C) */ OP_NEWTABLE,/* A B C k R[A] := {} */ diff --git a/ltable.c b/ltable.c index 3c690c5f..3fb575a1 100644 --- a/ltable.c +++ b/ltable.c @@ -662,7 +662,8 @@ static Node *getfreepos (Table *t) { ** put new key in its main position; otherwise (colliding node is in its main ** position), new key goes to an empty position. */ -void luaH_newkey (lua_State *L, Table *t, const TValue *key, TValue *value) { +static void luaH_newkey (lua_State *L, Table *t, const TValue *key, + TValue *value) { Node *mp; TValue aux; if (l_unlikely(ttisnil(key))) diff --git a/ltable.h b/ltable.h index 75dd9e26..8e689034 100644 --- a/ltable.h +++ b/ltable.h @@ -41,8 +41,6 @@ LUAI_FUNC void luaH_setint (lua_State *L, Table *t, lua_Integer key, LUAI_FUNC const TValue *luaH_getshortstr (Table *t, TString *key); LUAI_FUNC const TValue *luaH_getstr (Table *t, TString *key); LUAI_FUNC const TValue *luaH_get (Table *t, const TValue *key); -LUAI_FUNC void luaH_newkey (lua_State *L, Table *t, const TValue *key, - TValue *value); LUAI_FUNC void luaH_set (lua_State *L, Table *t, const TValue *key, TValue *value); LUAI_FUNC void luaH_finishset (lua_State *L, Table *t, const TValue *key, diff --git a/lvm.c b/lvm.c index 8493a770..4c300a87 100644 --- a/lvm.c +++ b/lvm.c @@ -1253,7 +1253,7 @@ void luaV_execute (lua_State *L, CallInfo *ci) { const TValue *slot; TValue *upval = cl->upvals[GETARG_B(i)]->v.p; TValue *rc = KC(i); - TString *key = tsvalue(rc); /* key must be a string */ + TString *key = tsvalue(rc); /* key must be a short string */ if (luaV_fastget(L, upval, key, slot, luaH_getshortstr)) { setobj2s(L, ra, slot); } @@ -1296,7 +1296,7 @@ void luaV_execute (lua_State *L, CallInfo *ci) { const TValue *slot; TValue *rb = vRB(i); TValue *rc = KC(i); - TString *key = tsvalue(rc); /* key must be a string */ + TString *key = tsvalue(rc); /* key must be a short string */ if (luaV_fastget(L, rb, key, slot, luaH_getshortstr)) { setobj2s(L, ra, slot); } @@ -1309,7 +1309,7 @@ void luaV_execute (lua_State *L, CallInfo *ci) { TValue *upval = cl->upvals[GETARG_A(i)]->v.p; TValue *rb = KB(i); TValue *rc = RKC(i); - TString *key = tsvalue(rb); /* key must be a string */ + TString *key = tsvalue(rb); /* key must be a short string */ if (luaV_fastget(L, upval, key, slot, luaH_getshortstr)) { luaV_finishfastset(L, upval, slot, rc); } @@ -1352,7 +1352,7 @@ void luaV_execute (lua_State *L, CallInfo *ci) { const TValue *slot; TValue *rb = KB(i); TValue *rc = RKC(i); - TString *key = tsvalue(rb); /* key must be a string */ + TString *key = tsvalue(rb); /* key must be a short string */ if (luaV_fastget(L, s2v(ra), key, slot, luaH_getshortstr)) { luaV_finishfastset(L, s2v(ra), slot, rc); } From c197885cb00b85251c35cffdc4057efaee2d7a88 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Mon, 15 May 2023 10:20:13 -0300 Subject: [PATCH 02/19] Small improvements in tests --- testes/db.lua | 2 +- testes/main.lua | 43 +++++++++++++++++++++++-------------------- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/testes/db.lua b/testes/db.lua index 02b96aca..67b58934 100644 --- a/testes/db.lua +++ b/testes/db.lua @@ -928,7 +928,7 @@ do local cl = countlines(rest) -- at most 10 lines in first part, 11 in second, plus '...' assert(cl <= 10 + 11 + 1) - local brk = string.find(rest, "%.%.%.") + local brk = string.find(rest, "%.%.%.\t%(skip") if brk then -- does message have '...'? local rest1 = string.sub(rest, 1, brk) local rest2 = string.sub(rest, brk, #rest) diff --git a/testes/main.lua b/testes/main.lua index f59badcf..3fa94e97 100644 --- a/testes/main.lua +++ b/testes/main.lua @@ -27,17 +27,19 @@ do end print("progname: "..progname) -local prepfile = function (s, p) - p = p or prog - io.output(p) - io.write(s) - assert(io.close()) + +local prepfile = function (s, mod, p) + mod = mod and "wb" or "w" -- mod true means binary files + p = p or prog -- file to write the program + local f = io.open(p, mod) + f:write(s) + assert(f:close()) end local function getoutput () - io.input(out) - local t = io.read("a") - io.input():close() + local f = io.open(out) + local t = f:read("a") + f:close() assert(os.remove(out)) return t end @@ -65,10 +67,11 @@ local function RUN (p, ...) assert(os.execute(s)) end + local function NoRun (msg, p, ...) p = string.gsub(p, "lua", '"'..progname..'"', 1) local s = string.format(p, ...) - s = string.format("%s 2> %s", s, out) -- will send error to 'out' + s = string.format("%s >%s 2>&1", s, out) -- send output and error to 'out' assert(not os.execute(s)) assert(string.find(getoutput(), msg, 1, true)) -- check error message end @@ -108,17 +111,17 @@ RUN('lua %s > %s', prog, out) checkout("3\n") -- bad BOMs -prepfile("\xEF") -NoRun("unexpected symbol", 'lua %s > %s', prog, out) +prepfile("\xEF", true) +NoRun("unexpected symbol", 'lua %s', prog) -prepfile("\xEF\xBB") -NoRun("unexpected symbol", 'lua %s > %s', prog, out) +prepfile("\xEF\xBB", true) +NoRun("unexpected symbol", 'lua %s', prog) -prepfile("\xEFprint(3)") -NoRun("unexpected symbol", 'lua %s > %s', prog, out) +prepfile("\xEFprint(3)", true) +NoRun("unexpected symbol", 'lua %s', prog) -prepfile("\xEF\xBBprint(3)") -NoRun("unexpected symbol", 'lua %s > %s', prog, out) +prepfile("\xEF\xBBprint(3)", true) +NoRun("unexpected symbol", 'lua %s', prog) -- test option '-' @@ -213,7 +216,7 @@ convert("a;b;;c") -- test -l over multiple libraries prepfile("print(1); a=2; return {x=15}") -prepfile(("print(a); print(_G['%s'].x)"):format(prog), otherprog) +prepfile(("print(a); print(_G['%s'].x)"):format(prog), false, otherprog) RUN('env LUA_PATH="?;;" lua -l %s -l%s -lstring -l io %s > %s', prog, otherprog, otherprog, out) checkout("1\n2\n15\n2\n15\n") @@ -237,7 +240,7 @@ RUN('lua "-e " -- %s a b c', prog) -- "-e " runs an empty command -- test 'arg' availability in libraries prepfile"assert(arg)" -prepfile("assert(arg)", otherprog) +prepfile("assert(arg)", false, otherprog) RUN('env LUA_PATH="?;;" lua -l%s - < %s', prog, otherprog) -- test messing up the 'arg' table @@ -413,7 +416,7 @@ prepfile[[#comment in 1st line without \n at the end]] RUN('lua %s', prog) -- first-line comment with binary file -prepfile("#comment\n" .. string.dump(load("print(3)"))) +prepfile("#comment\n" .. string.dump(load("print(3)")), true) RUN('lua %s > %s', prog, out) checkout('3\n') From 09f3c2372f5dbeaec9f50614a26c1b5761726a88 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Mon, 15 May 2023 13:46:38 -0300 Subject: [PATCH 03/19] Option '-l' discards version sufix from file name Like 'require', the command-line option '-l' discards an optional version suffix (everything after an hyphen) from a file name when creating the module name. --- loadlib.c | 9 --------- lua.c | 12 ++++++++++-- luaconf.h | 9 +++++++++ testes/main.lua | 7 +++++++ 4 files changed, 26 insertions(+), 11 deletions(-) diff --git a/loadlib.c b/loadlib.c index d792dffa..6d289fce 100644 --- a/loadlib.c +++ b/loadlib.c @@ -24,15 +24,6 @@ #include "lualib.h" -/* -** LUA_IGMARK is a mark to ignore all before it when building the -** luaopen_ function name. -*/ -#if !defined (LUA_IGMARK) -#define LUA_IGMARK "-" -#endif - - /* ** LUA_CSUBSEP is the character that replaces dots in submodule names ** when searching for a C loader. diff --git a/lua.c b/lua.c index 0ff88454..3af5ce6a 100644 --- a/lua.c +++ b/lua.c @@ -210,12 +210,17 @@ static int dostring (lua_State *L, const char *s, const char *name) { /* ** Receives 'globname[=modname]' and runs 'globname = require(modname)'. +** If there is no explicit modname and globname contains a '-', cut +** the sufix after '-' (the "version") to make the global name. */ static int dolibrary (lua_State *L, char *globname) { int status; + char *suffix = NULL; char *modname = strchr(globname, '='); - if (modname == NULL) /* no explicit name? */ + if (modname == NULL) { /* no explicit name? */ modname = globname; /* module name is equal to global name */ + suffix = strchr(modname, *LUA_IGMARK); /* look for a suffix mark */ + } else { *modname = '\0'; /* global name ends here */ modname++; /* module name starts after the '=' */ @@ -223,8 +228,11 @@ static int dolibrary (lua_State *L, char *globname) { lua_getglobal(L, "require"); lua_pushstring(L, modname); status = docall(L, 1, 1); /* call 'require(modname)' */ - if (status == LUA_OK) + if (status == LUA_OK) { + if (suffix != NULL) /* is there a suffix mark? */ + *suffix = '\0'; /* remove sufix from global name */ lua_setglobal(L, globname); /* globname = require(modname) */ + } return report(L, status); } diff --git a/luaconf.h b/luaconf.h index 137103ed..acebe29c 100644 --- a/luaconf.h +++ b/luaconf.h @@ -257,6 +257,15 @@ #endif + +/* +** LUA_IGMARK is a mark to ignore all after it when building the +** module name (e.g., used to build the luaopen_ function name). +** Typically, the sufix after the mark is the module version, +** as in "mod-v1.2.so". +*/ +#define LUA_IGMARK "-" + /* }================================================================== */ diff --git a/testes/main.lua b/testes/main.lua index 3fa94e97..11b14b44 100644 --- a/testes/main.lua +++ b/testes/main.lua @@ -225,6 +225,13 @@ prepfile("print(str.upper'alo alo', m.max(10, 20))") RUN("lua -l 'str=string' '-lm=math' -e 'print(m.sin(0))' %s > %s", prog, out) checkout("0.0\nALO ALO\t20\n") + +-- test module names with version sufix ("libs/lib2-v2") +RUN("env LUA_CPATH='./libs/?.so' lua -l lib2-v2 -e 'print(lib2.id())' > %s", + out) +checkout("true\n") + + -- test 'arg' table local a = [[ assert(#arg == 3 and arg[1] == 'a' and From 9be74ccc214eb6f4d9d0b9496fd973542c7377d9 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Mon, 22 May 2023 14:47:54 -0300 Subject: [PATCH 04/19] Several functions turned 'static' Several functions that were already being used only inside their own file have been declared as 'static'. --- lcode.c | 22 +++++++++++----------- lcode.h | 3 --- ldo.c | 6 +++--- ldo.h | 1 - lgc.c | 4 ++-- lstate.c | 4 ++-- lstate.h | 1 - 7 files changed, 18 insertions(+), 23 deletions(-) diff --git a/lcode.c b/lcode.c index eade2806..caac6ba3 100644 --- a/lcode.c +++ b/lcode.c @@ -415,7 +415,7 @@ int luaK_codeABx (FuncState *fs, OpCode o, int a, unsigned int bc) { /* ** Format and emit an 'iAsBx' instruction. */ -int luaK_codeAsBx (FuncState *fs, OpCode o, int a, int bc) { +static int codeAsBx (FuncState *fs, OpCode o, int a, int bc) { unsigned int b = bc + OFFSET_sBx; lua_assert(getOpMode(o) == iAsBx); lua_assert(a <= MAXARG_A && b <= MAXARG_Bx); @@ -671,7 +671,7 @@ static int fitsBx (lua_Integer i) { void luaK_int (FuncState *fs, int reg, lua_Integer i) { if (fitsBx(i)) - luaK_codeAsBx(fs, OP_LOADI, reg, cast_int(i)); + codeAsBx(fs, OP_LOADI, reg, cast_int(i)); else luaK_codek(fs, reg, luaK_intK(fs, i)); } @@ -680,7 +680,7 @@ void luaK_int (FuncState *fs, int reg, lua_Integer i) { static void luaK_float (FuncState *fs, int reg, lua_Number f) { lua_Integer fi; if (luaV_flttointeger(f, &fi, F2Ieq) && fitsBx(fi)) - luaK_codeAsBx(fs, OP_LOADF, reg, cast_int(fi)); + codeAsBx(fs, OP_LOADF, reg, cast_int(fi)); else luaK_codek(fs, reg, luaK_numberK(fs, f)); } @@ -1025,7 +1025,7 @@ static int luaK_exp2K (FuncState *fs, expdesc *e) { ** in the range of R/K indices). ** Returns 1 iff expression is K. */ -int luaK_exp2RK (FuncState *fs, expdesc *e) { +static int exp2RK (FuncState *fs, expdesc *e) { if (luaK_exp2K(fs, e)) return 1; else { /* not a constant in the right range: put it in a register */ @@ -1037,7 +1037,7 @@ int luaK_exp2RK (FuncState *fs, expdesc *e) { static void codeABRK (FuncState *fs, OpCode o, int a, int b, expdesc *ec) { - int k = luaK_exp2RK(fs, ec); + int k = exp2RK(fs, ec); luaK_codeABCk(fs, o, a, b, ec->u.info, k); } @@ -1225,7 +1225,7 @@ static int isKstr (FuncState *fs, expdesc *e) { /* ** Check whether expression 'e' is a literal integer. */ -int luaK_isKint (expdesc *e) { +static int isKint (expdesc *e) { return (e->k == VKINT && !hasjumps(e)); } @@ -1235,7 +1235,7 @@ int luaK_isKint (expdesc *e) { ** proper range to fit in register C */ static int isCint (expdesc *e) { - return luaK_isKint(e) && (l_castS2U(e->u.ival) <= l_castS2U(MAXARG_C)); + return isKint(e) && (l_castS2U(e->u.ival) <= l_castS2U(MAXARG_C)); } @@ -1244,7 +1244,7 @@ static int isCint (expdesc *e) { ** proper range to fit in register sC */ static int isSCint (expdesc *e) { - return luaK_isKint(e) && fitsC(e->u.ival); + return isKint(e) && fitsC(e->u.ival); } @@ -1460,7 +1460,7 @@ static void codebinK (FuncState *fs, BinOpr opr, */ static int finishbinexpneg (FuncState *fs, expdesc *e1, expdesc *e2, OpCode op, int line, TMS event) { - if (!luaK_isKint(e2)) + if (!isKint(e2)) return 0; /* not an integer constant */ else { lua_Integer i2 = e2->u.ival; @@ -1593,7 +1593,7 @@ static void codeeq (FuncState *fs, BinOpr opr, expdesc *e1, expdesc *e2) { op = OP_EQI; r2 = im; /* immediate operand */ } - else if (luaK_exp2RK(fs, e2)) { /* 2nd expression is constant? */ + else if (exp2RK(fs, e2)) { /* 2nd expression is constant? */ op = OP_EQK; r2 = e2->u.info; /* constant index */ } @@ -1659,7 +1659,7 @@ void luaK_infix (FuncState *fs, BinOpr op, expdesc *v) { } case OPR_EQ: case OPR_NE: { if (!tonumeral(v, NULL)) - luaK_exp2RK(fs, v); + exp2RK(fs, v); /* else keep numeral, which may be an immediate operand */ break; } diff --git a/lcode.h b/lcode.h index 32658244..0b971fc4 100644 --- a/lcode.h +++ b/lcode.h @@ -61,10 +61,8 @@ typedef enum UnOpr { OPR_MINUS, OPR_BNOT, OPR_NOT, OPR_LEN, OPR_NOUNOPR } UnOpr; LUAI_FUNC int luaK_code (FuncState *fs, Instruction i); LUAI_FUNC int luaK_codeABx (FuncState *fs, OpCode o, int A, unsigned int Bx); -LUAI_FUNC int luaK_codeAsBx (FuncState *fs, OpCode o, int A, int Bx); LUAI_FUNC int luaK_codeABCk (FuncState *fs, OpCode o, int A, int B, int C, int k); -LUAI_FUNC int luaK_isKint (expdesc *e); LUAI_FUNC int luaK_exp2const (FuncState *fs, const expdesc *e, TValue *v); LUAI_FUNC void luaK_fixline (FuncState *fs, int line); LUAI_FUNC void luaK_nil (FuncState *fs, int from, int n); @@ -76,7 +74,6 @@ LUAI_FUNC int luaK_exp2anyreg (FuncState *fs, expdesc *e); LUAI_FUNC void luaK_exp2anyregup (FuncState *fs, expdesc *e); LUAI_FUNC void luaK_exp2nextreg (FuncState *fs, expdesc *e); LUAI_FUNC void luaK_exp2val (FuncState *fs, expdesc *e); -LUAI_FUNC int luaK_exp2RK (FuncState *fs, expdesc *e); LUAI_FUNC void luaK_self (FuncState *fs, expdesc *e, expdesc *key); LUAI_FUNC void luaK_indexed (FuncState *fs, expdesc *t, expdesc *k); LUAI_FUNC void luaK_goiftrue (FuncState *fs, expdesc *e); diff --git a/ldo.c b/ldo.c index 2a0017ca..bd8d965f 100644 --- a/ldo.c +++ b/ldo.c @@ -409,7 +409,7 @@ static void rethook (lua_State *L, CallInfo *ci, int nres) { ** stack, below original 'func', so that 'luaD_precall' can call it. Raise ** an error if there is no '__call' metafield. */ -StkId luaD_tryfuncTM (lua_State *L, StkId func) { +static StkId tryfuncTM (lua_State *L, StkId func) { const TValue *tm; StkId p; checkstackGCp(L, 1, func); /* space for metamethod */ @@ -568,7 +568,7 @@ int luaD_pretailcall (lua_State *L, CallInfo *ci, StkId func, return -1; } default: { /* not a function */ - func = luaD_tryfuncTM(L, func); /* try to get '__call' metamethod */ + func = tryfuncTM(L, func); /* try to get '__call' metamethod */ /* return luaD_pretailcall(L, ci, func, narg1 + 1, delta); */ narg1++; goto retry; /* try again */ @@ -609,7 +609,7 @@ CallInfo *luaD_precall (lua_State *L, StkId func, int nresults) { return ci; } default: { /* not a function */ - func = luaD_tryfuncTM(L, func); /* try to get '__call' metamethod */ + func = tryfuncTM(L, func); /* try to get '__call' metamethod */ /* return luaD_precall(L, func, nresults); */ goto retry; /* try again with metamethod */ } diff --git a/ldo.h b/ldo.h index 1aa446ad..56008ab3 100644 --- a/ldo.h +++ b/ldo.h @@ -71,7 +71,6 @@ LUAI_FUNC int luaD_pretailcall (lua_State *L, CallInfo *ci, StkId func, LUAI_FUNC CallInfo *luaD_precall (lua_State *L, StkId func, int nResults); LUAI_FUNC void luaD_call (lua_State *L, StkId func, int nResults); LUAI_FUNC void luaD_callnoyield (lua_State *L, StkId func, int nResults); -LUAI_FUNC StkId luaD_tryfuncTM (lua_State *L, StkId func); LUAI_FUNC int luaD_closeprotected (lua_State *L, ptrdiff_t level, int status); LUAI_FUNC int luaD_pcall (lua_State *L, Pfunc func, void *u, ptrdiff_t oldtop, ptrdiff_t ef); diff --git a/lgc.c b/lgc.c index a3094ff5..dd824e77 100644 --- a/lgc.c +++ b/lgc.c @@ -1409,7 +1409,7 @@ static void stepgenfull (lua_State *L, global_State *g) { setminordebt(g); } else { /* another bad collection; stay in incremental mode */ - g->GCestimate = gettotalbytes(g); /* first estimate */; + g->GCestimate = gettotalbytes(g); /* first estimate */ entersweep(L); luaC_runtilstate(L, bitmask(GCSpause)); /* finish collection */ setpause(g); @@ -1604,7 +1604,7 @@ static lu_mem singlestep (lua_State *L) { case GCSenteratomic: { work = atomic(L); /* work is what was traversed by 'atomic' */ entersweep(L); - g->GCestimate = gettotalbytes(g); /* first estimate */; + g->GCestimate = gettotalbytes(g); /* first estimate */ break; } case GCSswpallgc: { /* sweep "regular" objects */ diff --git a/lstate.c b/lstate.c index 1e925e5a..06667dac 100644 --- a/lstate.c +++ b/lstate.c @@ -119,7 +119,7 @@ CallInfo *luaE_extendCI (lua_State *L) { /* ** free all CallInfo structures not in use by a thread */ -void luaE_freeCI (lua_State *L) { +static void freeCI (lua_State *L) { CallInfo *ci = L->ci; CallInfo *next = ci->next; ci->next = NULL; @@ -204,7 +204,7 @@ static void freestack (lua_State *L) { if (L->stack.p == NULL) return; /* stack not completely built yet */ L->ci = &L->base_ci; /* free the entire 'ci' list */ - luaE_freeCI(L); + freeCI(L); lua_assert(L->nci == 0); luaM_freearray(L, L->stack.p, stacksize(L) + EXTRA_STACK); /* free stack */ } diff --git a/lstate.h b/lstate.h index 8bf6600e..40ff89aa 100644 --- a/lstate.h +++ b/lstate.h @@ -396,7 +396,6 @@ union GCUnion { LUAI_FUNC void luaE_setdebt (global_State *g, l_mem debt); LUAI_FUNC void luaE_freethread (lua_State *L, lua_State *L1); LUAI_FUNC CallInfo *luaE_extendCI (lua_State *L); -LUAI_FUNC void luaE_freeCI (lua_State *L); LUAI_FUNC void luaE_shrinkCI (lua_State *L); LUAI_FUNC void luaE_checkcstack (lua_State *L); LUAI_FUNC void luaE_incCstack (lua_State *L); From f623b969325be736297bc1dff48e763c08778243 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Wed, 14 Jun 2023 14:38:07 -0300 Subject: [PATCH 05/19] Bug: read overflow in 'l_strcmp' Equality according to 'strcoll' does not imply that strings have the same length. --- lvm.c | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/lvm.c b/lvm.c index 4c300a87..2b437bdf 100644 --- a/lvm.c +++ b/lvm.c @@ -366,30 +366,32 @@ void luaV_finishset (lua_State *L, const TValue *t, TValue *key, /* -** Compare two strings 'ls' x 'rs', returning an integer less-equal- -** -greater than zero if 'ls' is less-equal-greater than 'rs'. +** Compare two strings 'ts1' x 'ts2', returning an integer less-equal- +** -greater than zero if 'ts1' is less-equal-greater than 'ts2'. ** The code is a little tricky because it allows '\0' in the strings -** and it uses 'strcoll' (to respect locales) for each segments -** of the strings. +** and it uses 'strcoll' (to respect locales) for each segment +** of the strings. Note that segments can compare equal but still +** have different lengths. */ -static int l_strcmp (const TString *ls, const TString *rs) { - const char *l = getstr(ls); - size_t ll = tsslen(ls); - const char *r = getstr(rs); - size_t lr = tsslen(rs); +static int l_strcmp (const TString *ts1, const TString *ts2) { + const char *s1 = getstr(ts1); + size_t rl1 = tsslen(ts1); /* real length */ + const char *s2 = getstr(ts2); + size_t rl2 = tsslen(ts2); for (;;) { /* for each segment */ - int temp = strcoll(l, r); + int temp = strcoll(s1, s2); if (temp != 0) /* not equal? */ return temp; /* done */ else { /* strings are equal up to a '\0' */ - size_t len = strlen(l); /* index of first '\0' in both strings */ - if (len == lr) /* 'rs' is finished? */ - return (len == ll) ? 0 : 1; /* check 'ls' */ - else if (len == ll) /* 'ls' is finished? */ - return -1; /* 'ls' is less than 'rs' ('rs' is not finished) */ - /* both strings longer than 'len'; go on comparing after the '\0' */ - len++; - l += len; ll -= len; r += len; lr -= len; + size_t zl1 = strlen(s1); /* index of first '\0' in 's1' */ + size_t zl2 = strlen(s2); /* index of first '\0' in 's2' */ + if (zl2 == rl2) /* 's2' is finished? */ + return (zl1 == rl1) ? 0 : 1; /* check 's1' */ + else if (zl1 == rl1) /* 's1' is finished? */ + return -1; /* 's1' is less than 's2' ('s2' is not finished) */ + /* both strings longer than 'zl'; go on comparing after the '\0' */ + zl1++; zl2++; + s1 += zl1; rl1 -= zl1; s2 += zl2; rl2 -= zl2; } } } From 05ec55f16b389a4377adab84efe374437da8dbd2 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Fri, 16 Jun 2023 11:52:14 -0300 Subject: [PATCH 06/19] Avoid inclusion loop in 'ltm.h' --- ltm.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ltm.h b/ltm.h index c309e2ae..73b833c6 100644 --- a/ltm.h +++ b/ltm.h @@ -9,7 +9,6 @@ #include "lobject.h" -#include "lstate.h" /* @@ -96,8 +95,8 @@ LUAI_FUNC int luaT_callorderiTM (lua_State *L, const TValue *p1, int v2, int inv, int isfloat, TMS event); LUAI_FUNC void luaT_adjustvarargs (lua_State *L, int nfixparams, - CallInfo *ci, const Proto *p); -LUAI_FUNC void luaT_getvarargs (lua_State *L, CallInfo *ci, + struct CallInfo *ci, const Proto *p); +LUAI_FUNC void luaT_getvarargs (lua_State *L, struct CallInfo *ci, StkId where, int wanted); From ea39042e13645f63713425c05cc9ee4cfdcf0a40 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Wed, 21 Jun 2023 15:04:24 -0300 Subject: [PATCH 07/19] Removed redundancy in definitions of version/release String rendering now derived from the numeric original definitions. --- lua.h | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/lua.h b/lua.h index fd16cf80..040cc8e4 100644 --- a/lua.h +++ b/lua.h @@ -1,7 +1,7 @@ /* ** $Id: lua.h $ ** Lua - A Scripting Language -** Lua.org, PUC-Rio, Brazil (http://www.lua.org) +** Lua.org, PUC-Rio, Brazil (www.lua.org) ** See Copyright Notice at the end of this file */ @@ -13,22 +13,21 @@ #include -#include "luaconf.h" - - -#define LUA_VERSION_MAJOR "5" -#define LUA_VERSION_MINOR "4" -#define LUA_VERSION_RELEASE "6" - -#define LUA_VERSION_NUM 504 -#define LUA_VERSION_RELEASE_NUM (LUA_VERSION_NUM * 100 + 6) - -#define LUA_VERSION "Lua " LUA_VERSION_MAJOR "." LUA_VERSION_MINOR -#define LUA_RELEASE LUA_VERSION "." LUA_VERSION_RELEASE #define LUA_COPYRIGHT LUA_RELEASE " Copyright (C) 1994-2023 Lua.org, PUC-Rio" #define LUA_AUTHORS "R. Ierusalimschy, L. H. de Figueiredo, W. Celes" +#define LUA_VERSION_MAJOR_N 5 +#define LUA_VERSION_MINOR_N 4 +#define LUA_VERSION_RELEASE_N 6 + +#define LUA_VERSION_NUM (LUA_VERSION_MAJOR_N * 100 + LUA_VERSION_MINOR_N) +#define LUA_VERSION_RELEASE_NUM (LUA_VERSION_NUM * 100 + LUA_VERSION_RELEASE_N) + + +#include "luaconf.h" + + /* mark for precompiled code ('Lua') */ #define LUA_SIGNATURE "\x1bLua" @@ -496,6 +495,17 @@ struct lua_Debug { /* }====================================================================== */ +#define LUAI_TOSTRAUX(x) #x +#define LUAI_TOSTR(x) LUAI_TOSTRAUX(x) + +#define LUA_VERSION_MAJOR LUAI_TOSTR(LUA_VERSION_MAJOR_N) +#define LUA_VERSION_MINOR LUAI_TOSTR(LUA_VERSION_MINOR_N) +#define LUA_VERSION_RELEASE LUAI_TOSTR(LUA_VERSION_RELEASE_N) + +#define LUA_VERSION "Lua " LUA_VERSION_MAJOR "." LUA_VERSION_MINOR +#define LUA_RELEASE LUA_VERSION "." LUA_VERSION_RELEASE + + /****************************************************************************** * Copyright (C) 1994-2023 Lua.org, PUC-Rio. * From cbae01620278f9b568805db16a96d0631ced473d Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Mon, 3 Jul 2023 14:12:54 -0300 Subject: [PATCH 08/19] Details --- lundump.h | 3 +-- testes/calls.lua | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/lundump.h b/lundump.h index f3748a99..bc71ced8 100644 --- a/lundump.h +++ b/lundump.h @@ -21,8 +21,7 @@ /* ** Encode major-minor version in one byte, one nibble for each */ -#define MYINT(s) (s[0]-'0') /* assume one-digit numerals */ -#define LUAC_VERSION (MYINT(LUA_VERSION_MAJOR)*16+MYINT(LUA_VERSION_MINOR)) +#define LUAC_VERSION (LUA_VERSION_MAJOR_N*16+LUA_VERSION_MINOR_N) #define LUAC_FORMAT 0 /* this is the official format */ diff --git a/testes/calls.lua b/testes/calls.lua index 2d562a24..664be1b4 100644 --- a/testes/calls.lua +++ b/testes/calls.lua @@ -342,7 +342,7 @@ do -- another bug (in 5.4.0) end -do -- another bug (since 5.2) +if not _port then -- another bug (since 5.2) -- corrupted binary dump: list of upvalue names is larger than number -- of upvalues, overflowing the array of upvalues. local code = From 6b51133a988587f34ee9581d799ea9913581afd3 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Thu, 13 Jul 2023 14:55:46 -0300 Subject: [PATCH 09/19] Thread stacks resized in the atomic phase Although stack resize can be a little expensive, it seems unusual to have too many threads needing resize during one GC cycle. On the other hand, the change allows full collections to skip the propagate phase, going straight from a pause to the atomic phase. --- lgc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lgc.c b/lgc.c index dd824e77..f8f43393 100644 --- a/lgc.c +++ b/lgc.c @@ -638,7 +638,9 @@ static int traversethread (global_State *g, lua_State *th) { for (uv = th->openupval; uv != NULL; uv = uv->u.open.next) markobject(g, uv); /* open upvalues cannot be collected */ if (g->gcstate == GCSatomic) { /* final traversal? */ - for (; o < th->stack_last.p + EXTRA_STACK; o++) + if (!g->gcemergency) + luaD_shrinkstack(th); /* do not change stack in emergency cycle */ + for (o = th->top.p; o < th->stack_last.p + EXTRA_STACK; o++) setnilvalue(s2v(o)); /* clear dead stack slice */ /* 'remarkupvals' may have removed thread from 'twups' list */ if (!isintwups(th) && th->openupval != NULL) { @@ -646,8 +648,6 @@ static int traversethread (global_State *g, lua_State *th) { g->twups = th; } } - else if (!g->gcemergency) - luaD_shrinkstack(th); /* do not change stack in emergency cycle */ return 1 + stacksize(th); } @@ -1710,6 +1710,8 @@ static void fullinc (lua_State *L, global_State *g) { entersweep(L); /* sweep everything to turn them back to white */ /* finish any pending sweep phase to start a new cycle */ luaC_runtilstate(L, bitmask(GCSpause)); + luaC_runtilstate(L, bitmask(GCSpropagate)); /* start new cycle */ + g->gcstate = GCSenteratomic; /* go straight to atomic phase ??? */ luaC_runtilstate(L, bitmask(GCScallfin)); /* run up to finalizers */ /* estimate must be correct after a full GC cycle */ lua_assert(g->GCestimate == gettotalbytes(g)); From 1b3f507f620d996ffb69da7476a19251acfb89ca Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Tue, 25 Jul 2023 16:50:44 -0300 Subject: [PATCH 10/19] Bug: Call hook may be called twice when count hook yields Took the opportunity and moved the code that controls call hooks in 'luaV_execute' into a function. --- ldebug.c | 22 ++++++++++++++++++++++ ldebug.h | 1 + lstate.h | 2 +- lvm.c | 13 +++---------- 4 files changed, 27 insertions(+), 11 deletions(-) diff --git a/ldebug.c b/ldebug.c index 28b1caab..195d02f8 100644 --- a/ldebug.c +++ b/ldebug.c @@ -865,6 +865,28 @@ static int changedline (const Proto *p, int oldpc, int newpc) { } +/* +** Traces Lua calls. If code is running the first instruction of a function, +** and function is not vararg, and it is not coming from an yield, +** calls 'luaD_hookcall'. (Vararg functions will call 'luaD_hookcall' +** after adjusting its variable arguments; otherwise, they could call +** a line/count hook before the call hook. Functions coming from +** an yield already called 'luaD_hookcall' before yielding.) +*/ +int luaG_tracecall (lua_State *L) { + CallInfo *ci = L->ci; + Proto *p = ci_func(ci)->p; + ci->u.l.trap = 1; /* ensure hooks will be checked */ + if (ci->u.l.savedpc == p->code) { /* first instruction (not resuming)? */ + if (p->is_vararg) + return 0; /* hooks will start at VARARGPREP instruction */ + else if (!(ci->callstatus & CIST_HOOKYIELD)) /* not yieded? */ + luaD_hookcall(L, ci); /* check 'call' hook */ + } + return 1; /* keep 'trap' on */ +} + + /* ** Traces the execution of a Lua function. Called before the execution ** of each opcode, when debug is on. 'L->oldpc' stores the last diff --git a/ldebug.h b/ldebug.h index 2c3074c6..2bfce3cb 100644 --- a/ldebug.h +++ b/ldebug.h @@ -58,6 +58,7 @@ LUAI_FUNC const char *luaG_addinfo (lua_State *L, const char *msg, TString *src, int line); LUAI_FUNC l_noret luaG_errormsg (lua_State *L); LUAI_FUNC int luaG_traceexec (lua_State *L, const Instruction *pc); +LUAI_FUNC int luaG_tracecall (lua_State *L); #endif diff --git a/lstate.h b/lstate.h index 40ff89aa..007704c8 100644 --- a/lstate.h +++ b/lstate.h @@ -181,7 +181,7 @@ struct CallInfo { union { struct { /* only for Lua functions */ const Instruction *savedpc; - volatile l_signalT trap; + volatile l_signalT trap; /* function is tracing lines/counts */ int nextraargs; /* # of extra arguments in vararg functions */ } l; struct { /* only for C functions */ diff --git a/lvm.c b/lvm.c index 2b437bdf..a98aaceb 100644 --- a/lvm.c +++ b/lvm.c @@ -1157,18 +1157,11 @@ void luaV_execute (lua_State *L, CallInfo *ci) { startfunc: trap = L->hookmask; returning: /* trap already set */ - cl = clLvalue(s2v(ci->func.p)); + cl = ci_func(ci); k = cl->p->k; pc = ci->u.l.savedpc; - if (l_unlikely(trap)) { - if (pc == cl->p->code) { /* first instruction (not resuming)? */ - if (cl->p->is_vararg) - trap = 0; /* hooks will start after VARARGPREP instruction */ - else /* check 'call' hook */ - luaD_hookcall(L, ci); - } - ci->u.l.trap = 1; /* assume trap is on, for now */ - } + if (l_unlikely(trap)) + trap = luaG_tracecall(L); base = ci->func.p + 1; /* main loop of interpreter */ for (;;) { From f4211a5ea4e235ccfa8b8dfa46031c23e9e839e2 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Thu, 17 Aug 2023 10:42:56 -0300 Subject: [PATCH 11/19] More control over encoding of test files The few UTF-8 test files are commented as such, and there is only one non UTF-8 test file (to test non UTF-8 sources). --- testes/db.lua | 6 ++--- testes/files.lua | 8 +++---- testes/pm.lua | 56 +++++++++++++++++++++++++++++----------------- testes/sort.lua | 2 +- testes/strings.lua | 3 +++ testes/utf8.lua | 2 ++ 6 files changed, 49 insertions(+), 28 deletions(-) diff --git a/testes/db.lua b/testes/db.lua index 67b58934..d3758c41 100644 --- a/testes/db.lua +++ b/testes/db.lua @@ -345,7 +345,7 @@ function f(a,b) local _, y = debug.getlocal(1, 2) assert(x == a and y == b) assert(debug.setlocal(2, 3, "pera") == "AA".."AA") - assert(debug.setlocal(2, 4, "maçã") == "B") + assert(debug.setlocal(2, 4, "manga") == "B") x = debug.getinfo(2) assert(x.func == g and x.what == "Lua" and x.name == 'g' and x.nups == 2 and string.find(x.source, "^@.*db%.lua$")) @@ -373,9 +373,9 @@ function g (...) local arg = {...} do local a,b,c; a=math.sin(40); end local feijao - local AAAA,B = "xuxu", "mamão" + local AAAA,B = "xuxu", "abacate" f(AAAA,B) - assert(AAAA == "pera" and B == "maçã") + assert(AAAA == "pera" and B == "manga") do local B = 13 local x,y = debug.getlocal(1,5) diff --git a/testes/files.lua b/testes/files.lua index be00bf3f..1476006e 100644 --- a/testes/files.lua +++ b/testes/files.lua @@ -92,8 +92,8 @@ assert(io.output():seek("end") == string.len("alo joao")) assert(io.output():seek("set") == 0) -assert(io.write('"álo"', "{a}\n", "second line\n", "third line \n")) -assert(io.write('çfourth_line')) +assert(io.write('"alo"', "{a}\n", "second line\n", "third line \n")) +assert(io.write('Xfourth_line')) io.output(io.stdout) collectgarbage() -- file should be closed by GC assert(io.input() == io.stdin and rawequal(io.output(), io.stdout)) @@ -300,14 +300,14 @@ do -- test error returns end checkerr("invalid format", io.read, "x") assert(io.read(0) == "") -- not eof -assert(io.read(5, 'l') == '"álo"') +assert(io.read(5, 'l') == '"alo"') assert(io.read(0) == "") assert(io.read() == "second line") local x = io.input():seek() assert(io.read() == "third line ") assert(io.input():seek("set", x)) assert(io.read('L') == "third line \n") -assert(io.read(1) == "ç") +assert(io.read(1) == "X") assert(io.read(string.len"fourth_line") == "fourth_line") assert(io.input():seek("cur", -string.len"fourth_line")) assert(io.read() == "fourth_line") diff --git a/testes/pm.lua b/testes/pm.lua index 795596d4..44454dff 100644 --- a/testes/pm.lua +++ b/testes/pm.lua @@ -1,6 +1,9 @@ -- $Id: testes/pm.lua $ -- See Copyright Notice in file all.lua +-- UTF-8 file + + print('testing pattern matching') local function checkerror (msg, f, ...) @@ -50,6 +53,19 @@ assert(f('aLo_ALO', '%a*') == 'aLo') assert(f(" \n\r*&\n\r xuxu \n\n", "%g%g%g+") == "xuxu") + +-- Adapt a pattern to UTF-8 +local function PU (p) + -- break '?' into each individual byte of a character + p = string.gsub(p, "(" .. utf8.charpattern .. ")%?", function (c) + return string.gsub(c, ".", "%0?") + end) + -- change '.' to utf-8 character patterns + p = string.gsub(p, "%.", utf8.charpattern) + return p +end + + assert(f('aaab', 'a*') == 'aaa'); assert(f('aaa', '^.*$') == 'aaa'); assert(f('aaa', 'b*') == ''); @@ -73,16 +89,16 @@ assert(f('aaa', '^.-$') == 'aaa') assert(f('aabaaabaaabaaaba', 'b.*b') == 'baaabaaabaaab') assert(f('aabaaabaaabaaaba', 'b.-b') == 'baaab') assert(f('alo xo', '.o$') == 'xo') -assert(f(' \n isto é assim', '%S%S*') == 'isto') -assert(f(' \n isto é assim', '%S*$') == 'assim') -assert(f(' \n isto é assim', '[a-z]*$') == 'assim') +assert(f(' \n isto é assim', '%S%S*') == 'isto') +assert(f(' \n isto é assim', '%S*$') == 'assim') +assert(f(' \n isto é assim', '[a-z]*$') == 'assim') assert(f('um caracter ? extra', '[^%sa-z]') == '?') assert(f('', 'a?') == '') -assert(f('á', 'á?') == 'á') -assert(f('ábl', 'á?b?l?') == 'ábl') -assert(f(' ábl', 'á?b?l?') == '') +assert(f('á', PU'á?') == 'á') +assert(f('ábl', PU'á?b?l?') == 'ábl') +assert(f(' ábl', PU'á?b?l?') == '') assert(f('aa', '^aa?a?a') == 'aa') -assert(f(']]]áb', '[^]]') == 'á') +assert(f(']]]áb', '[^]]+') == 'áb') assert(f("0alo alo", "%x*") == "0a") assert(f("alo alo", "%C+") == "alo alo") print('+') @@ -136,28 +152,28 @@ assert(string.match("alo xyzK", "(%w+)K") == "xyz") assert(string.match("254 K", "(%d*)K") == "") assert(string.match("alo ", "(%w*)$") == "") assert(not string.match("alo ", "(%w+)$")) -assert(string.find("(álo)", "%(á") == 1) -local a, b, c, d, e = string.match("âlo alo", "^(((.).).* (%w*))$") -assert(a == 'âlo alo' and b == 'âl' and c == 'â' and d == 'alo' and e == nil) +assert(string.find("(álo)", "%(á") == 1) +local a, b, c, d, e = string.match("âlo alo", PU"^(((.).). (%w*))$") +assert(a == 'âlo alo' and b == 'âl' and c == 'â' and d == 'alo' and e == nil) a, b, c, d = string.match('0123456789', '(.+(.?)())') assert(a == '0123456789' and b == '' and c == 11 and d == nil) print('+') -assert(string.gsub('ülo ülo', 'ü', 'x') == 'xlo xlo') -assert(string.gsub('alo úlo ', ' +$', '') == 'alo úlo') -- trim +assert(string.gsub('ülo ülo', 'ü', 'x') == 'xlo xlo') +assert(string.gsub('alo úlo ', ' +$', '') == 'alo úlo') -- trim assert(string.gsub(' alo alo ', '^%s*(.-)%s*$', '%1') == 'alo alo') -- double trim assert(string.gsub('alo alo \n 123\n ', '%s+', ' ') == 'alo alo 123 ') -local t = "abç d" -a, b = string.gsub(t, '(.)', '%1@') -assert('@'..a == string.gsub(t, '', '@') and b == 5) -a, b = string.gsub('abçd', '(.)', '%0@', 2) -assert(a == 'a@b@çd' and b == 2) +local t = "abç d" +a, b = string.gsub(t, PU'(.)', '%1@') +assert(a == "a@b@ç@ @d@" and b == 5) +a, b = string.gsub('abçd', PU'(.)', '%0@', 2) +assert(a == 'a@b@çd' and b == 2) assert(string.gsub('alo alo', '()[al]', '%1') == '12o 56o') assert(string.gsub("abc=xyz", "(%w*)(%p)(%w+)", "%3%2%1-%0") == "xyz=abc-abc=xyz") assert(string.gsub("abc", "%w", "%1%0") == "aabbcc") assert(string.gsub("abc", "%w+", "%0%1") == "abcabc") -assert(string.gsub('áéí', '$', '\0óú') == 'áéí\0óú') +assert(string.gsub('áéí', '$', '\0óú') == 'áéí\0óú') assert(string.gsub('', '^', 'r') == 'r') assert(string.gsub('', '$', 'r') == 'r') print('+') @@ -188,8 +204,8 @@ do end function f(a,b) return string.gsub(a,'.',b) end -assert(string.gsub("trocar tudo em |teste|b| é |beleza|al|", "|([^|]*)|([^|]*)|", f) == - "trocar tudo em bbbbb é alalalalalal") +assert(string.gsub("trocar tudo em |teste|b| é |beleza|al|", "|([^|]*)|([^|]*)|", f) == + "trocar tudo em bbbbb é alalalalalal") local function dostring (s) return load(s, "")() or "" end assert(string.gsub("alo $a='x'$ novamente $return a$", diff --git a/testes/sort.lua b/testes/sort.lua index 52919b8c..40bb2d8a 100644 --- a/testes/sort.lua +++ b/testes/sort.lua @@ -289,7 +289,7 @@ timesort(a, limit, function(x,y) return nil end, "equal") for i,v in pairs(a) do assert(v == false) end -AA = {"álo", "\0first :-)", "alo", "then this one", "45", "and a new"} +AA = {"\xE1lo", "\0first :-)", "alo", "then this one", "45", "and a new"} table.sort(AA) check(AA) diff --git a/testes/strings.lua b/testes/strings.lua index b033c6ab..90983edd 100644 --- a/testes/strings.lua +++ b/testes/strings.lua @@ -1,6 +1,9 @@ -- $Id: testes/strings.lua $ -- See Copyright Notice in file all.lua +-- ISO Latin encoding + + print('testing strings and string library') local maxi = math.maxinteger diff --git a/testes/utf8.lua b/testes/utf8.lua index c5a9dd3f..efadbd5c 100644 --- a/testes/utf8.lua +++ b/testes/utf8.lua @@ -1,6 +1,8 @@ -- $Id: testes/utf8.lua $ -- See Copyright Notice in file all.lua +-- UTF-8 file + print "testing UTF-8 library" local utf8 = require'utf8' From 9b4f39ab14fb2e55345c3d23537d129dac23b091 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Thu, 17 Aug 2023 15:59:28 -0300 Subject: [PATCH 12/19] More disciplined use of 'getstr' and 'tsslen' We may want to add other string variants in the future; this change documents better where the code may need to handle those variants. --- lapi.c | 4 ++-- ldebug.c | 6 +++--- lgc.c | 8 +++++--- lobject.c | 2 +- lobject.h | 18 ++++++++---------- lstate.c | 2 +- lstring.c | 11 ++++++----- lundump.c | 2 +- lvm.c | 17 ++++++++++------- 9 files changed, 37 insertions(+), 33 deletions(-) diff --git a/lapi.c b/lapi.c index 34e64af1..332e97d1 100644 --- a/lapi.c +++ b/lapi.c @@ -417,9 +417,9 @@ LUA_API const char *lua_tolstring (lua_State *L, int idx, size_t *len) { o = index2value(L, idx); /* previous call may reallocate the stack */ } if (len != NULL) - *len = vslen(o); + *len = tsslen(tsvalue(o)); lua_unlock(L); - return svalue(o); + return getstr(tsvalue(o)); } diff --git a/ldebug.c b/ldebug.c index 195d02f8..690ac38f 100644 --- a/ldebug.c +++ b/ldebug.c @@ -426,7 +426,7 @@ static const char *getobjname (const Proto *p, int lastpc, int reg, */ static void kname (const Proto *p, int c, const char **name) { TValue *kvalue = &p->k[c]; - *name = (ttisstring(kvalue)) ? svalue(kvalue) : "?"; + *name = (ttisstring(kvalue)) ? getstr(tsvalue(kvalue)) : "?"; } @@ -569,7 +569,7 @@ static const char *getobjname (const Proto *p, int lastpc, int reg, int b = (op == OP_LOADK) ? GETARG_Bx(i) : GETARG_Ax(p->code[pc + 1]); if (ttisstring(&p->k[b])) { - *name = svalue(&p->k[b]); + *name = getstr(tsvalue(&p->k[b])); return "constant"; } break; @@ -627,7 +627,7 @@ static const char *funcnamefromcode (lua_State *L, const Proto *p, default: return NULL; /* cannot find a reasonable name */ } - *name = getstr(G(L)->tmname[tm]) + 2; + *name = getshrstr(G(L)->tmname[tm]) + 2; return "metamethod"; } diff --git a/lgc.c b/lgc.c index f8f43393..253a2892 100644 --- a/lgc.c +++ b/lgc.c @@ -542,10 +542,12 @@ static void traversestrongtable (global_State *g, Table *h) { static lu_mem traversetable (global_State *g, Table *h) { const char *weakkey, *weakvalue; const TValue *mode = gfasttm(g, h->metatable, TM_MODE); + TString *smode; markobjectN(g, h->metatable); - if (mode && ttisstring(mode) && /* is there a weak mode? */ - (cast_void(weakkey = strchr(svalue(mode), 'k')), - cast_void(weakvalue = strchr(svalue(mode), 'v')), + if (mode && ttisshrstring(mode) && /* is there a weak mode? */ + (cast_void(smode = tsvalue(mode)), + cast_void(weakkey = strchr(getshrstr(smode), 'k')), + cast_void(weakvalue = strchr(getshrstr(smode), 'v')), (weakkey || weakvalue))) { /* is really weak? */ if (!weakkey) /* strong keys? */ traverseweakvalue(g, h); diff --git a/lobject.c b/lobject.c index f73ffc6d..9cfa5227 100644 --- a/lobject.c +++ b/lobject.c @@ -542,7 +542,7 @@ const char *luaO_pushvfstring (lua_State *L, const char *fmt, va_list argp) { addstr2buff(&buff, fmt, strlen(fmt)); /* rest of 'fmt' */ clearbuff(&buff); /* empty buffer into the stack */ lua_assert(buff.pushed == 1); - return svalue(s2v(L->top.p - 1)); + return getstr(tsvalue(s2v(L->top.p - 1))); } diff --git a/lobject.h b/lobject.h index 556608e4..980e42f8 100644 --- a/lobject.h +++ b/lobject.h @@ -386,7 +386,7 @@ typedef struct GCObject { typedef struct TString { CommonHeader; lu_byte extra; /* reserved words for short strings; "has hash" for longs */ - lu_byte shrlen; /* length for short strings */ + lu_byte shrlen; /* length for short strings, 0xFF for long strings */ unsigned int hash; union { size_t lnglen; /* length for long strings */ @@ -398,19 +398,17 @@ typedef struct TString { /* -** Get the actual string (array of bytes) from a 'TString'. +** Get the actual string (array of bytes) from a 'TString'. (Generic +** version and specialized versions for long and short strings.) */ -#define getstr(ts) ((ts)->contents) +#define getstr(ts) ((ts)->contents) +#define getlngstr(ts) check_exp((ts)->shrlen == 0xFF, (ts)->contents) +#define getshrstr(ts) check_exp((ts)->shrlen != 0xFF, (ts)->contents) -/* get the actual string (array of bytes) from a Lua value */ -#define svalue(o) getstr(tsvalue(o)) - /* get string length from 'TString *s' */ -#define tsslen(s) ((s)->tt == LUA_VSHRSTR ? (s)->shrlen : (s)->u.lnglen) - -/* get string length from 'TValue *o' */ -#define vslen(o) tsslen(tsvalue(o)) +#define tsslen(s) \ + ((s)->shrlen != 0xFF ? (s)->shrlen : (s)->u.lnglen) /* }================================================================== */ diff --git a/lstate.c b/lstate.c index 06667dac..7fefacba 100644 --- a/lstate.c +++ b/lstate.c @@ -433,7 +433,7 @@ void luaE_warning (lua_State *L, const char *msg, int tocont) { void luaE_warnerror (lua_State *L, const char *where) { TValue *errobj = s2v(L->top.p - 1); /* error object */ const char *msg = (ttisstring(errobj)) - ? svalue(errobj) + ? getstr(tsvalue(errobj)) : "error object is not a string"; /* produce warning "error in %s (%s)" (where, msg) */ luaE_warning(L, "error in ", 1); diff --git a/lstring.c b/lstring.c index 13dcaf42..1032ad86 100644 --- a/lstring.c +++ b/lstring.c @@ -36,7 +36,7 @@ int luaS_eqlngstr (TString *a, TString *b) { lua_assert(a->tt == LUA_VLNGSTR && b->tt == LUA_VLNGSTR); return (a == b) || /* same instance or... */ ((len == b->u.lnglen) && /* equal length and ... */ - (memcmp(getstr(a), getstr(b), len) == 0)); /* equal contents */ + (memcmp(getlngstr(a), getlngstr(b), len) == 0)); /* equal contents */ } @@ -52,7 +52,7 @@ unsigned int luaS_hashlongstr (TString *ts) { lua_assert(ts->tt == LUA_VLNGSTR); if (ts->extra == 0) { /* no hash? */ size_t len = ts->u.lnglen; - ts->hash = luaS_hash(getstr(ts), len, ts->hash); + ts->hash = luaS_hash(getlngstr(ts), len, ts->hash); ts->extra = 1; /* now it has its hash */ } return ts->hash; @@ -157,6 +157,7 @@ static TString *createstrobj (lua_State *L, size_t l, int tag, unsigned int h) { TString *luaS_createlngstrobj (lua_State *L, size_t l) { TString *ts = createstrobj(L, l, LUA_VLNGSTR, G(L)->seed); ts->u.lnglen = l; + ts->shrlen = 0xFF; /* signals that it is a long string */ return ts; } @@ -193,7 +194,7 @@ static TString *internshrstr (lua_State *L, const char *str, size_t l) { TString **list = &tb->hash[lmod(h, tb->size)]; lua_assert(str != NULL); /* otherwise 'memcmp'/'memcpy' are undefined */ for (ts = *list; ts != NULL; ts = ts->u.hnext) { - if (l == ts->shrlen && (memcmp(str, getstr(ts), l * sizeof(char)) == 0)) { + if (l == ts->shrlen && (memcmp(str, getshrstr(ts), l * sizeof(char)) == 0)) { /* found! */ if (isdead(g, ts)) /* dead (but not collected yet)? */ changewhite(ts); /* resurrect it */ @@ -206,7 +207,7 @@ static TString *internshrstr (lua_State *L, const char *str, size_t l) { list = &tb->hash[lmod(h, tb->size)]; /* rehash with new size */ } ts = createstrobj(L, l, LUA_VSHRSTR, h); - memcpy(getstr(ts), str, l * sizeof(char)); + memcpy(getshrstr(ts), str, l * sizeof(char)); ts->shrlen = cast_byte(l); ts->u.hnext = *list; *list = ts; @@ -226,7 +227,7 @@ TString *luaS_newlstr (lua_State *L, const char *str, size_t l) { if (l_unlikely(l >= (MAX_SIZE - sizeof(TString))/sizeof(char))) luaM_toobig(L); ts = luaS_createlngstrobj(L, l); - memcpy(getstr(ts), str, l * sizeof(char)); + memcpy(getlngstr(ts), str, l * sizeof(char)); return ts; } } diff --git a/lundump.c b/lundump.c index 02aed64f..f1852c35 100644 --- a/lundump.c +++ b/lundump.c @@ -122,7 +122,7 @@ static TString *loadStringN (LoadState *S, Proto *p) { ts = luaS_createlngstrobj(L, size); /* create string */ setsvalue2s(L, L->top.p, ts); /* anchor it ('loadVector' can GC) */ luaD_inctop(L); - loadVector(S, getstr(ts), size); /* load directly in final place */ + loadVector(S, getlngstr(ts), size); /* load directly in final place */ L->top.p--; /* pop string */ } luaC_objbarrier(L, p, ts); diff --git a/lvm.c b/lvm.c index a98aaceb..4d71cfff 100644 --- a/lvm.c +++ b/lvm.c @@ -91,8 +91,10 @@ static int l_strton (const TValue *obj, TValue *result) { lua_assert(obj != result); if (!cvt2num(obj)) /* is object not a string? */ return 0; - else - return (luaO_str2num(svalue(obj), result) == vslen(obj) + 1); + else { + TString *st = tsvalue(obj); + return (luaO_str2num(getstr(st), result) == tsslen(st) + 1); + } } @@ -626,8 +628,9 @@ int luaV_equalobj (lua_State *L, const TValue *t1, const TValue *t2) { static void copy2buff (StkId top, int n, char *buff) { size_t tl = 0; /* size already copied */ do { - size_t l = vslen(s2v(top - n)); /* length of string being copied */ - memcpy(buff + tl, svalue(s2v(top - n)), l * sizeof(char)); + TString *st = tsvalue(s2v(top - n)); + size_t l = tsslen(st); /* length of string being copied */ + memcpy(buff + tl, getstr(st), l * sizeof(char)); tl += l; } while (--n > 0); } @@ -653,11 +656,11 @@ void luaV_concat (lua_State *L, int total) { } else { /* at least two non-empty string values; get as many as possible */ - size_t tl = vslen(s2v(top - 1)); + size_t tl = tsslen(tsvalue(s2v(top - 1))); TString *ts; /* collect total length and number of strings */ for (n = 1; n < total && tostring(L, s2v(top - n - 1)); n++) { - size_t l = vslen(s2v(top - n - 1)); + size_t l = tsslen(tsvalue(s2v(top - n - 1))); if (l_unlikely(l >= (MAX_SIZE/sizeof(char)) - tl)) { L->top.p = top - total; /* pop strings to avoid wasting stack */ luaG_runerror(L, "string length overflow"); @@ -671,7 +674,7 @@ void luaV_concat (lua_State *L, int total) { } else { /* long string; copy strings directly to final result */ ts = luaS_createlngstrobj(L, tl); - copy2buff(top, n, getstr(ts)); + copy2buff(top, n, getlngstr(ts)); } setsvalue2s(L, top - n, ts); /* create result */ } From 5ab6a5756b3c50c99f1388885e9a48a7da8cbe2d Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Wed, 23 Aug 2023 13:49:27 -0300 Subject: [PATCH 13/19] Bug: Wrong line number for function calls --- lparser.c | 12 ++++++------ testes/errors.lua | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/lparser.c b/lparser.c index b745f236..2b888c7c 100644 --- a/lparser.c +++ b/lparser.c @@ -1022,10 +1022,11 @@ static int explist (LexState *ls, expdesc *v) { } -static void funcargs (LexState *ls, expdesc *f, int line) { +static void funcargs (LexState *ls, expdesc *f) { FuncState *fs = ls->fs; expdesc args; int base, nparams; + int line = ls->linenumber; switch (ls->t.token) { case '(': { /* funcargs -> '(' [ explist ] ')' */ luaX_next(ls); @@ -1063,8 +1064,8 @@ static void funcargs (LexState *ls, expdesc *f, int line) { } init_exp(f, VCALL, luaK_codeABC(fs, OP_CALL, base, nparams+1, 2)); luaK_fixline(fs, line); - fs->freereg = base+1; /* call remove function and arguments and leaves - (unless changed) one result */ + fs->freereg = base+1; /* call removes function and arguments and leaves + one result (unless changed later) */ } @@ -1103,7 +1104,6 @@ static void suffixedexp (LexState *ls, expdesc *v) { /* suffixedexp -> primaryexp { '.' NAME | '[' exp ']' | ':' NAME funcargs | funcargs } */ FuncState *fs = ls->fs; - int line = ls->linenumber; primaryexp(ls, v); for (;;) { switch (ls->t.token) { @@ -1123,12 +1123,12 @@ static void suffixedexp (LexState *ls, expdesc *v) { luaX_next(ls); codename(ls, &key); luaK_self(fs, v, &key); - funcargs(ls, v, line); + funcargs(ls, v); break; } case '(': case TK_STRING: case '{': { /* funcargs */ luaK_exp2nextreg(fs, v); - funcargs(ls, v, line); + funcargs(ls, v); break; } default: return; diff --git a/testes/errors.lua b/testes/errors.lua index bf6f389d..b777a329 100644 --- a/testes/errors.lua +++ b/testes/errors.lua @@ -392,19 +392,19 @@ lineerror("a\n=\n-\n\nprint\n;", 3) lineerror([[ a -( +( -- << 23) -]], 1) +]], 2) lineerror([[ local a = {x = 13} a . x -( +( -- << 23 ) -]], 2) +]], 5) lineerror([[ local a = {x = 13} From 9363a8b9901a5643c9da061ea8dda8a86cdc7ef1 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Wed, 23 Aug 2023 13:50:38 -0300 Subject: [PATCH 14/19] Documentation for "LUA_NOENV" Registry field "LUA_NOENV" (that signals to libraries that option -E is on) now part of the "official" API of Lua stand-alone. --- manual/manual.of | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/manual/manual.of b/manual/manual.of index f8d8ddd4..ad120f5e 100644 --- a/manual/manual.of +++ b/manual/manual.of @@ -9026,6 +9026,10 @@ Lua does not consult any environment variables. In particular, the values of @Lid{package.path} and @Lid{package.cpath} are set with the default paths defined in @id{luaconf.h}. +To signal to the libraries that this option is on, +the stand-alone interpreter sets the field +@idx{"LUA_NOENV"} in the registry to a true value. +Other libraries may consult this field for the same purpose. The options @T{-e}, @T{-l}, and @T{-W} are handled in the order they appear. From 07a9eab23ac073362f231ddc7215688cf221ff45 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Fri, 25 Aug 2023 15:55:14 -0300 Subject: [PATCH 15/19] Cannot use 'getshrstr' before setting 'shrlen' --- lstring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lstring.c b/lstring.c index 1032ad86..e921dd0f 100644 --- a/lstring.c +++ b/lstring.c @@ -207,8 +207,8 @@ static TString *internshrstr (lua_State *L, const char *str, size_t l) { list = &tb->hash[lmod(h, tb->size)]; /* rehash with new size */ } ts = createstrobj(L, l, LUA_VSHRSTR, h); - memcpy(getshrstr(ts), str, l * sizeof(char)); ts->shrlen = cast_byte(l); + memcpy(getshrstr(ts), str, l * sizeof(char)); ts->u.hnext = *list; *list = ts; tb->nuse++; From edd8589f478e784bb8d1a8e9a3bb2bb3ca51738c Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Fri, 8 Sep 2023 11:34:39 -0300 Subject: [PATCH 16/19] Avoid casts from unsigned long to floating-point Old Microsoft compilers do not support those casts. --- lmathlib.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/lmathlib.c b/lmathlib.c index d0b1e1e5..f140d623 100644 --- a/lmathlib.c +++ b/lmathlib.c @@ -249,6 +249,15 @@ static int math_type (lua_State *L) { ** =================================================================== */ +/* +** This code uses lots of shifts. ANSI C does not allow shifts greater +** than or equal to the width of the type being shifted, so some shifts +** are written in convoluted ways to match that restriction. For +** preprocessor tests, it assumes a width of 32 bits, so the maximum +** shift there is 31 bits. +*/ + + /* number of binary digits in the mantissa of a float */ #define FIGS l_floatatt(MANT_DIG) @@ -271,16 +280,19 @@ static int math_type (lua_State *L) { /* 'long' has at least 64 bits */ #define Rand64 unsigned long +#define SRand64 long #elif !defined(LUA_USE_C89) && defined(LLONG_MAX) /* there is a 'long long' type (which must have at least 64 bits) */ #define Rand64 unsigned long long +#define SRand64 long long #elif ((LUA_MAXUNSIGNED >> 31) >> 31) >= 3 /* 'lua_Unsigned' has at least 64 bits */ #define Rand64 lua_Unsigned +#define SRand64 lua_Integer #endif @@ -319,23 +331,30 @@ static Rand64 nextrand (Rand64 *state) { } -/* must take care to not shift stuff by more than 63 slots */ - - /* ** Convert bits from a random integer into a float in the ** interval [0,1), getting the higher FIG bits from the ** random unsigned integer and converting that to a float. +** Some old Microsoft compilers cannot cast an unsigned long +** to a floating-point number, so we use a signed long as an +** intermediary. When lua_Number is float or double, the shift ensures +** that 'sx' is non negative; in that case, a good compiler will remove +** the correction. */ /* must throw out the extra (64 - FIGS) bits */ #define shift64_FIG (64 - FIGS) -/* to scale to [0, 1), multiply by scaleFIG = 2^(-FIGS) */ +/* 2^(-FIGS) == 2^-1 / 2^(FIGS-1) */ #define scaleFIG (l_mathop(0.5) / ((Rand64)1 << (FIGS - 1))) static lua_Number I2d (Rand64 x) { - return (lua_Number)(trim64(x) >> shift64_FIG) * scaleFIG; + SRand64 sx = (SRand64)(trim64(x) >> shift64_FIG); + lua_Number res = (lua_Number)(sx) * scaleFIG; + if (sx < 0) + res += 1.0; /* correct the two's complement if negative */ + lua_assert(0 <= res && res < 1); + return res; } /* convert a 'Rand64' to a 'lua_Unsigned' */ @@ -471,8 +490,6 @@ static lua_Number I2d (Rand64 x) { #else /* 32 < FIGS <= 64 */ -/* must take care to not shift stuff by more than 31 slots */ - /* 2^(-FIGS) = 1.0 / 2^30 / 2^3 / 2^(FIGS-33) */ #define scaleFIG \ (l_mathop(1.0) / (UONE << 30) / l_mathop(8.0) / (UONE << (FIGS - 33))) From 6baee9ef9d5657ab582c8a4b9f885ec58ed502d0 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Fri, 8 Sep 2023 16:19:21 -0300 Subject: [PATCH 17/19] Removed test for "corrupted binary dump" Test is too non portable. (For instance, it does not work for different number types.) --- lundump.c | 2 +- testes/calls.lua | 14 -------------- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/lundump.c b/lundump.c index f1852c35..e8d92a85 100644 --- a/lundump.c +++ b/lundump.c @@ -81,7 +81,7 @@ static size_t loadUnsigned (LoadState *S, size_t limit) { static size_t loadSize (LoadState *S) { - return loadUnsigned(S, ~(size_t)0); + return loadUnsigned(S, MAX_SIZET); } diff --git a/testes/calls.lua b/testes/calls.lua index 664be1b4..a1938584 100644 --- a/testes/calls.lua +++ b/testes/calls.lua @@ -342,20 +342,6 @@ do -- another bug (in 5.4.0) end -if not _port then -- another bug (since 5.2) - -- corrupted binary dump: list of upvalue names is larger than number - -- of upvalues, overflowing the array of upvalues. - local code = - "\x1b\x4c\x75\x61\x54\x00\x19\x93\x0d\x0a\x1a\x0a\x04\x08\x08\x78\x56\z - \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x28\x77\x40\x00\x86\x40\z - \x74\x65\x6d\x70\x81\x81\x01\x00\x02\x82\x48\x00\x02\x00\xc7\x00\x01\z - \x00\x80\x80\x80\x82\x00\x00\x80\x81\x82\x78\x80\x82\x81\x86\x40\x74\z - \x65\x6d\x70" - - assert(load(code)) -- segfaults in previous versions -end - - x = string.dump(load("x = 1; return x")) a = assert(load(read1(x), nil, "b")) assert(a() == 1 and _G.x == 1) From 81e4fce5303fdb274bc5572fb168dd766fb8208e Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Thu, 26 Oct 2023 16:12:25 -0300 Subject: [PATCH 18/19] Simpler test in 'luaH_getint' The test whether key is inside the array part of a table uses a bit trick to avoid computing the real size of the array part. --- ltable.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/ltable.c b/ltable.c index 3fb575a1..3353c047 100644 --- a/ltable.c +++ b/ltable.c @@ -252,7 +252,7 @@ LUAI_FUNC unsigned int luaH_realasize (const Table *t) { return t->alimit; /* this is the size */ else { unsigned int size = t->alimit; - /* compute the smallest power of 2 not smaller than 'n' */ + /* compute the smallest power of 2 not smaller than 'size' */ size |= (size >> 1); size |= (size >> 2); size |= (size >> 4); @@ -722,22 +722,36 @@ static void luaH_newkey (lua_State *L, Table *t, const TValue *key, /* ** Search function for integers. If integer is inside 'alimit', get it -** directly from the array part. Otherwise, if 'alimit' is not equal to -** the real size of the array, key still can be in the array part. In -** this case, try to avoid a call to 'luaH_realasize' when key is just -** one more than the limit (so that it can be incremented without -** changing the real size of the array). +** directly from the array part. Otherwise, if 'alimit' is not +** the real size of the array, the key still can be in the array part. +** In this case, do the "Xmilia trick" to check whether 'key-1' is +** smaller than the real size. +** The trick works as follow: let 'p' be an integer such that +** '2^(p+1) >= alimit > 2^p', or '2^(p+1) > alimit-1 >= 2^p'. +** That is, 2^(p+1) is the real size of the array, and 'p' is the highest +** bit on in 'alimit-1'. What we have to check becomes 'key-1 < 2^(p+1)'. +** We compute '(key-1) & ~(alimit-1)', which we call 'res'; it will +** have the 'p' bit cleared. If the key is outside the array, that is, +** 'key-1 >= 2^(p+1)', then 'res' will have some bit on higher than 'p', +** therefore it will be larger or equal to 'alimit', and the check +** will fail. If 'key-1 < 2^(p+1)', then 'res' has no bit on higher than +** 'p', and as the bit 'p' itself was cleared, 'res' will be smaller +** than 2^p, therefore smaller than 'alimit', and the check succeeds. +** As special cases, when 'alimit' is 0 the condition is trivially false, +** and when 'alimit' is 1 the condition simplifies to 'key-1 < alimit'. +** If key is 0 or negative, 'res' will have its higher bit on, so that +** if cannot be smaller than alimit. */ const TValue *luaH_getint (Table *t, lua_Integer key) { - if (l_castS2U(key) - 1u < t->alimit) /* 'key' in [1, t->alimit]? */ + lua_Unsigned alimit = t->alimit; + if (l_castS2U(key) - 1u < alimit) /* 'key' in [1, t->alimit]? */ return &t->array[key - 1]; - else if (!limitequalsasize(t) && /* key still may be in the array part? */ - (l_castS2U(key) == t->alimit + 1 || - l_castS2U(key) - 1u < luaH_realasize(t))) { + else if (!isrealasize(t) && /* key still may be in the array part? */ + (((l_castS2U(key) - 1u) & ~(alimit - 1u)) < alimit)) { t->alimit = cast_uint(key); /* probably '#t' is here now */ return &t->array[key - 1]; } - else { + else { /* key is not in the array part; check the hash */ Node *n = hashint(t, key); for (;;) { /* check whether 'key' is somewhere in the chain */ if (keyisinteger(n) && keyival(n) == key) From 7923dbbf72da303ca1cca17efd24725668992f15 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Wed, 1 Nov 2023 12:00:54 -0300 Subject: [PATCH 19/19] Bug: Recursion in 'getobjname' can stack overflow 'getobjname' now broken in two, a basic version that handles locals, upvalues, and constants, and a full version, which uses the basic version to handle table accesses (globals and fields). --- ldebug.c | 153 +++++++++++++++++++++++++--------------------- testes/errors.lua | 3 + 2 files changed, 87 insertions(+), 69 deletions(-) diff --git a/ldebug.c b/ldebug.c index 690ac38f..b1f16ac9 100644 --- a/ldebug.c +++ b/ldebug.c @@ -417,40 +417,6 @@ LUA_API int lua_getinfo (lua_State *L, const char *what, lua_Debug *ar) { ** ======================================================= */ -static const char *getobjname (const Proto *p, int lastpc, int reg, - const char **name); - - -/* -** Find a "name" for the constant 'c'. -*/ -static void kname (const Proto *p, int c, const char **name) { - TValue *kvalue = &p->k[c]; - *name = (ttisstring(kvalue)) ? getstr(tsvalue(kvalue)) : "?"; -} - - -/* -** Find a "name" for the register 'c'. -*/ -static void rname (const Proto *p, int pc, int c, const char **name) { - const char *what = getobjname(p, pc, c, name); /* search for 'c' */ - if (!(what && *what == 'c')) /* did not find a constant name? */ - *name = "?"; -} - - -/* -** Find a "name" for a 'C' value in an RK instruction. -*/ -static void rkname (const Proto *p, int pc, Instruction i, const char **name) { - int c = GETARG_C(i); /* key index */ - if (GETARG_k(i)) /* is 'c' a constant? */ - kname(p, c, name); - else /* 'c' is a register */ - rname(p, pc, c, name); -} - static int filterpc (int pc, int jmptarget) { if (pc < jmptarget) /* is code conditional (inside a jump)? */ @@ -509,28 +475,29 @@ static int findsetreg (const Proto *p, int lastpc, int reg) { /* -** Check whether table being indexed by instruction 'i' is the -** environment '_ENV' +** Find a "name" for the constant 'c'. */ -static const char *gxf (const Proto *p, int pc, Instruction i, int isup) { - int t = GETARG_B(i); /* table index */ - const char *name; /* name of indexed variable */ - if (isup) /* is an upvalue? */ - name = upvalname(p, t); - else - getobjname(p, pc, t, &name); - return (name && strcmp(name, LUA_ENV) == 0) ? "global" : "field"; +static const char *kname (const Proto *p, int index, const char **name) { + TValue *kvalue = &p->k[index]; + if (ttisstring(kvalue)) { + *name = getstr(tsvalue(kvalue)); + return "constant"; + } + else { + *name = "?"; + return NULL; + } } -static const char *getobjname (const Proto *p, int lastpc, int reg, - const char **name) { - int pc; - *name = luaF_getlocalname(p, reg + 1, lastpc); +static const char *basicgetobjname (const Proto *p, int *ppc, int reg, + const char **name) { + int pc = *ppc; + *name = luaF_getlocalname(p, reg + 1, pc); if (*name) /* is a local? */ return "local"; /* else try symbolic execution */ - pc = findsetreg(p, lastpc, reg); + *ppc = pc = findsetreg(p, pc, reg); if (pc != -1) { /* could find instruction? */ Instruction i = p->code[pc]; OpCode op = GET_OPCODE(i); @@ -538,18 +505,80 @@ static const char *getobjname (const Proto *p, int lastpc, int reg, case OP_MOVE: { int b = GETARG_B(i); /* move from 'b' to 'a' */ if (b < GETARG_A(i)) - return getobjname(p, pc, b, name); /* get name for 'b' */ + return basicgetobjname(p, ppc, b, name); /* get name for 'b' */ break; } + case OP_GETUPVAL: { + *name = upvalname(p, GETARG_B(i)); + return "upvalue"; + } + case OP_LOADK: return kname(p, GETARG_Bx(i), name); + case OP_LOADKX: return kname(p, GETARG_Ax(p->code[pc + 1]), name); + default: break; + } + } + return NULL; /* could not find reasonable name */ +} + + +/* +** Find a "name" for the register 'c'. +*/ +static void rname (const Proto *p, int pc, int c, const char **name) { + const char *what = basicgetobjname(p, &pc, c, name); /* search for 'c' */ + if (!(what && *what == 'c')) /* did not find a constant name? */ + *name = "?"; +} + + +/* +** Find a "name" for a 'C' value in an RK instruction. +*/ +static void rkname (const Proto *p, int pc, Instruction i, const char **name) { + int c = GETARG_C(i); /* key index */ + if (GETARG_k(i)) /* is 'c' a constant? */ + kname(p, c, name); + else /* 'c' is a register */ + rname(p, pc, c, name); +} + + +/* +** Check whether table being indexed by instruction 'i' is the +** environment '_ENV' +*/ +static const char *isEnv (const Proto *p, int pc, Instruction i, int isup) { + int t = GETARG_B(i); /* table index */ + const char *name; /* name of indexed variable */ + if (isup) /* is 't' an upvalue? */ + name = upvalname(p, t); + else /* 't' is a register */ + basicgetobjname(p, &pc, t, &name); + return (name && strcmp(name, LUA_ENV) == 0) ? "global" : "field"; +} + + +/* +** Extend 'basicgetobjname' to handle table accesses +*/ +static const char *getobjname (const Proto *p, int lastpc, int reg, + const char **name) { + const char *kind = basicgetobjname(p, &lastpc, reg, name); + if (kind != NULL) + return kind; + else if (lastpc != -1) { /* could find instruction? */ + Instruction i = p->code[lastpc]; + OpCode op = GET_OPCODE(i); + switch (op) { case OP_GETTABUP: { int k = GETARG_C(i); /* key index */ kname(p, k, name); - return gxf(p, pc, i, 1); + return isEnv(p, lastpc, i, 1); } case OP_GETTABLE: { int k = GETARG_C(i); /* key index */ - rname(p, pc, k, name); - return gxf(p, pc, i, 0); + rname(p, lastpc, k, name); + return isEnv(p, lastpc, i, 0); } case OP_GETI: { *name = "integer index"; @@ -558,24 +587,10 @@ static const char *getobjname (const Proto *p, int lastpc, int reg, case OP_GETFIELD: { int k = GETARG_C(i); /* key index */ kname(p, k, name); - return gxf(p, pc, i, 0); - } - case OP_GETUPVAL: { - *name = upvalname(p, GETARG_B(i)); - return "upvalue"; - } - case OP_LOADK: - case OP_LOADKX: { - int b = (op == OP_LOADK) ? GETARG_Bx(i) - : GETARG_Ax(p->code[pc + 1]); - if (ttisstring(&p->k[b])) { - *name = getstr(tsvalue(&p->k[b])); - return "constant"; - } - break; + return isEnv(p, lastpc, i, 0); } case OP_SELF: { - rkname(p, pc, i, name); + rkname(p, lastpc, i, name); return "method"; } default: break; /* go through to return NULL */ diff --git a/testes/errors.lua b/testes/errors.lua index b777a329..01cfe906 100644 --- a/testes/errors.lua +++ b/testes/errors.lua @@ -121,6 +121,9 @@ assert(not string.find(doit"aaa={13}; local bbbb=1; aaa[bbbb](3)", "'bbbb'")) checkmessage("aaa={13}; local bbbb=1; aaa[bbbb](3)", "number") checkmessage("aaa=(1)..{}", "a table value") +-- bug in 5.4.6 +checkmessage("a = {_ENV = {}}; print(a._ENV.x + 1)", "field 'x'") + _G.aaa, _G.bbbb = nil -- calls