From a4b96ce9a3305ae3585c0bb143fa7342c140f20b Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Wed, 25 Jan 2012 19:05:40 -0200 Subject: [PATCH] first implementation of long strings --- lgc.c | 26 +++++++++------ llimits.h | 11 ++++++- lobject.h | 15 +++++++-- lstate.h | 5 +-- lstring.c | 96 ++++++++++++++++++++++++++++++++++++++++++++----------- lstring.h | 9 ++++-- ltable.c | 21 ++++++++---- ltests.c | 5 +-- lvm.c | 3 +- 9 files changed, 144 insertions(+), 47 deletions(-) diff --git a/lgc.c b/lgc.c index e3fdc9fc..e9dac6c5 100644 --- a/lgc.c +++ b/lgc.c @@ -65,7 +65,11 @@ #define white2gray(x) resetbits(gch(x)->marked, WHITEBITS) #define black2gray(x) resetbit(gch(x)->marked, BLACKBIT) -#define stringmark(s) ((void)((s) && resetbits((s)->tsv.marked, WHITEBITS))) +/* +** dirty trick: we know that 'reallymarkobject' does not use 'g' when +** object is a string +*/ +#define stringmark(s) markobject(NULL, s) #define isfinalized(x) testbit(gch(x)->marked, FINALIZEDBIT) @@ -240,18 +244,18 @@ GCObject *luaC_newobj (lua_State *L, int tt, size_t sz, GCObject **list, /* -** mark an object. Userdata and closed upvalues are visited and turned -** black here. Strings remain gray (it is the same as making them -** black). Other objects are marked gray and added to appropriate list -** to be visited (and turned black) later. (Open upvalues are already -** linked in 'headuv' list.) +** mark an object. Userdata, strings, and closed upvalues are visited +** and turned black here. Other objects are marked gray and added +** to appropriate list to be visited (and turned black) later. (Open +** upvalues are already linked in 'headuv' list.) */ static void reallymarkobject (global_State *g, GCObject *o) { - lua_assert(iswhite(o) && !isdead(g, o)); white2gray(o); switch (gch(o)->tt) { - case LUA_TSTRING: { - return; /* for strings, gray is as good as black */ + case LUA_TSHRSTR: + case LUA_TLNGSTR: { + gray2black(o); + return; /* nothing else to mark */ } case LUA_TUSERDATA: { Table *mt = gco2u(o)->metatable; @@ -663,8 +667,10 @@ static void freeobj (lua_State *L, GCObject *o) { case LUA_TTABLE: luaH_free(L, gco2t(o)); break; case LUA_TTHREAD: luaE_freethread(L, gco2th(o)); break; case LUA_TUSERDATA: luaM_freemem(L, o, sizeudata(gco2u(o))); break; - case LUA_TSTRING: { + case LUA_TSHRSTR: G(L)->strt.nuse--; + /* go through */ + case LUA_TLNGSTR: { luaM_freemem(L, o, sizestring(gco2ts(o))); break; } diff --git a/llimits.h b/llimits.h index 7cf2048b..5466a6b6 100644 --- a/llimits.h +++ b/llimits.h @@ -1,5 +1,5 @@ /* -** $Id: llimits.h,v 1.94 2011/11/29 15:39:48 roberto Exp roberto $ +** $Id: llimits.h,v 1.95 2011/12/06 16:58:36 roberto Exp roberto $ ** Limits, basic types, and some other `installation-dependent' definitions ** See Copyright Notice in lua.h */ @@ -124,6 +124,15 @@ typedef LUAI_UACNUMBER l_uacNumber; #define MAXUPVAL UCHAR_MAX +/* +** maximum length for short strings, that is, strings that are +** internalized. (Cannot be smaller than reserved words or tags +** for metamethods; #"function" = 8, #"__newindex" = 10; should +** not be larger than 255, to allow future changes) +*/ +#define LUA_MAXSHORTLEN (8 * sizeof(void*)) + + /* ** type for virtual-machine instructions ** must be an unsigned with (at least) 4 bytes (see details in lopcodes.h) diff --git a/lobject.h b/lobject.h index a5ace499..568aef3f 100644 --- a/lobject.h +++ b/lobject.h @@ -52,6 +52,12 @@ #define LUA_TCCL (LUA_TFUNCTION | (2 << 4)) /* C closure */ +/* +** LUA_TSTRING variants */ +#define LUA_TSHRSTR (LUA_TSTRING | (0 << 4)) /* short strings */ +#define LUA_TLNGSTR (LUA_TSTRING | (1 << 4)) /* long strings */ + + /* Bit mark for collectable types */ #define BIT_ISCOLLECTABLE (1 << 6) @@ -129,7 +135,9 @@ typedef struct lua_TValue TValue; #define ttisnil(o) checktag((o), LUA_TNIL) #define ttisboolean(o) checktag((o), LUA_TBOOLEAN) #define ttislightuserdata(o) checktag((o), LUA_TLIGHTUSERDATA) -#define ttisstring(o) checktag((o), ctb(LUA_TSTRING)) +#define ttisstring(o) checktype((o), LUA_TSTRING) +#define ttisshrstring(o) checktag((o), ctb(LUA_TSHRSTR)) +#define ttislngstring(o) checktag((o), ctb(LUA_TLNGSTR)) #define ttistable(o) checktag((o), ctb(LUA_TTABLE)) #define ttisfunction(o) checktype(o, LUA_TFUNCTION) #define ttisclosure(o) ((rttype(o) & 0x1F) == LUA_TFUNCTION) @@ -199,7 +207,8 @@ typedef struct lua_TValue TValue; #define setsvalue(L,obj,x) \ { TValue *io=(obj); \ - val_(io).gc=cast(GCObject *, (x)); settt_(io, ctb(LUA_TSTRING)); \ + TString *x_ = (x); \ + val_(io).gc=cast(GCObject *, x_); settt_(io, ctb(x_->tsv.tt)); \ checkliveness(G(L),io); } #define setuvalue(L,obj,x) \ @@ -409,7 +418,7 @@ typedef union TString { L_Umaxalign dummy; /* ensures maximum alignment for strings */ struct { CommonHeader; - lu_byte extra; /* reserved words for strings */ + lu_byte extra; /* reserved words for short strings; "has hash" for longs */ unsigned int hash; size_t len; /* number of characters in string */ } tsv; diff --git a/lstate.h b/lstate.h index cff8e202..38a7fb36 100644 --- a/lstate.h +++ b/lstate.h @@ -1,5 +1,5 @@ /* -** $Id: lstate.h,v 2.74 2011/09/30 12:45:07 roberto Exp roberto $ +** $Id: lstate.h,v 2.75 2012/01/20 22:05:50 roberto Exp roberto $ ** Global State ** See Copyright Notice in lua.h */ @@ -193,7 +193,8 @@ union GCObject { #define gch(o) (&(o)->gch) /* macros to convert a GCObject into a specific value */ -#define rawgco2ts(o) check_exp((o)->gch.tt == LUA_TSTRING, &((o)->ts)) +#define rawgco2ts(o) \ + check_exp(novariant((o)->gch.tt) == LUA_TSTRING, &((o)->ts)) #define gco2ts(o) (&rawgco2ts(o)->tsv) #define rawgco2u(o) check_exp((o)->gch.tt == LUA_TUSERDATA, &((o)->u)) #define gco2u(o) (&rawgco2u(o)->uv) diff --git a/lstring.c b/lstring.c index 95a8a6f1..75861cf4 100644 --- a/lstring.c +++ b/lstring.c @@ -18,7 +18,37 @@ #include "lstring.h" +/* +** equality for long strings +*/ +int luaS_eqlngstr (TString *a, TString *b) { + size_t len = a->tsv.len; + lua_assert(a->tsv.tt == LUA_TLNGSTR && b->tsv.tt == LUA_TLNGSTR); + return (len == b->tsv.len) && (memcmp(getstr(a), getstr(b), len) == 0); +} + +/* +** equality for strings +*/ +int luaS_eqstr (TString *a, TString *b) { + return (a->tsv.tt == b->tsv.tt) && + (a->tsv.tt == LUA_TSHRSTR ? eqshrstr(a, b) : luaS_eqlngstr(a, b)); +} + + +unsigned int luaS_hash (const char *str, size_t l) { + unsigned int h = cast(unsigned int, l); /* seed */ + size_t l1; + for (l1 = 0; l1 < l; l1++) + h = h ^ ((h<<5) + (h>>2) + cast_byte(str[l1])); + return h; +} + + +/* +** resizes the string table +*/ void luaS_resize (lua_State *L, int newsize) { int i; stringtable *tb = &G(L)->strt; @@ -50,36 +80,47 @@ void luaS_resize (lua_State *L, int newsize) { } -static TString *newlstr (lua_State *L, const char *str, size_t l, - unsigned int h) { - size_t totalsize; /* total size of TString object */ - GCObject **list; /* (pointer to) list where it will be inserted */ +/* +** creates a new string object +*/ +static TString *createstrobj (lua_State *L, const char *str, size_t l, + int tag, unsigned int h, GCObject **list) { TString *ts; - stringtable *tb = &G(L)->strt; - if (l+1 > (MAX_SIZET - sizeof(TString))/sizeof(char)) - luaM_toobig(L); - if (tb->nuse >= cast(lu_int32, tb->size) && tb->size <= MAX_INT/2) - luaS_resize(L, tb->size*2); /* too crowded */ + size_t totalsize; /* total size of TString object */ totalsize = sizeof(TString) + ((l + 1) * sizeof(char)); - list = &tb->hash[lmod(h, tb->size)]; - ts = &luaC_newobj(L, LUA_TSTRING, totalsize, list, 0)->ts; + ts = &luaC_newobj(L, tag, totalsize, list, 0)->ts; ts->tsv.len = l; ts->tsv.hash = h; ts->tsv.extra = 0; memcpy(ts+1, str, l*sizeof(char)); ((char *)(ts+1))[l] = '\0'; /* ending 0 */ - tb->nuse++; return ts; } -TString *luaS_newlstr (lua_State *L, const char *str, size_t l) { +/* +** creates a new short string, inserting it into string table +*/ +static TString *newshrstr (lua_State *L, const char *str, size_t l, + unsigned int h) { + GCObject **list; /* (pointer to) list where it will be inserted */ + stringtable *tb = &G(L)->strt; + TString *s; + if (tb->nuse >= cast(lu_int32, tb->size) && tb->size <= MAX_INT/2) + luaS_resize(L, tb->size*2); /* too crowded */ + list = &tb->hash[lmod(h, tb->size)]; + s = createstrobj(L, str, l, LUA_TSHRSTR, h, list); + tb->nuse++; + return s; +} + + +/* +** checks whether short string exists and reuses it or creates a new one +*/ +static TString *internshrstr (lua_State *L, const char *str, size_t l) { GCObject *o; - unsigned int h = cast(unsigned int, l); /* seed */ - size_t step = (l>>5)+1; /* if string is too long, don't hash all its chars */ - size_t l1; - for (l1=l; l1>=step; l1-=step) /* compute hash */ - h = h ^ ((h<<5)+(h>>2)+cast(unsigned char, str[l1-1])); + unsigned int h = luaS_hash(str, l); for (o = G(L)->strt.hash[lmod(h, G(L)->strt.size)]; o != NULL; o = gch(o)->next) { @@ -92,10 +133,27 @@ TString *luaS_newlstr (lua_State *L, const char *str, size_t l) { return ts; } } - return newlstr(L, str, l, h); /* not found; create a new string */ + return newshrstr(L, str, l, h); /* not found; create a new string */ } +/* +** new string (with explicit length) +*/ +TString *luaS_newlstr (lua_State *L, const char *str, size_t l) { + if (l <= LUA_MAXSHORTLEN) /* short string? */ + return internshrstr(L, str, l); + else { + if (l + 1 > (MAX_SIZET - sizeof(TString))/sizeof(char)) + luaM_toobig(L); + return createstrobj(L, str, l, LUA_TLNGSTR, 0, NULL); + } +} + + +/* +** new zero-terminated string +*/ TString *luaS_new (lua_State *L, const char *str) { return luaS_newlstr(L, str, strlen(str)); } diff --git a/lstring.h b/lstring.h index 462bcc87..6cfcbfcc 100644 --- a/lstring.h +++ b/lstring.h @@ -25,15 +25,18 @@ /* ** test whether a string is a reserved word */ -#define isreserved(s) ((s)->tsv.extra > 0) +#define isreserved(s) ((s)->tsv.tt == LUA_TSHRSTR && (s)->tsv.extra > 0) /* -** equality for strings, which are always internalized +** equality for short strings, which are always internalized */ -#define luaS_eqstr(a,b) ((a) == (b)) +#define eqshrstr(a,b) check_exp((a)->tsv.tt == LUA_TSHRSTR, (a) == (b)) +LUAI_FUNC unsigned int luaS_hash (const char *str, size_t l); +LUAI_FUNC int luaS_eqlngstr (TString *a, TString *b); +LUAI_FUNC int luaS_eqstr (TString *a, TString *b); LUAI_FUNC void luaS_resize (lua_State *L, int newsize); LUAI_FUNC Udata *luaS_newudata (lua_State *L, size_t s, Table *e); LUAI_FUNC TString *luaS_newlstr (lua_State *L, const char *str, size_t l); diff --git a/ltable.c b/ltable.c index ce740ebe..3683fe60 100644 --- a/ltable.c +++ b/ltable.c @@ -50,7 +50,7 @@ #define hashpow2(t,n) (gnode(t, lmod((n), sizenode(t)))) -#define hashstr(t,str) hashpow2(t, (str)->tsv.hash) +#define hashstr(t,str) hashpow2(t, (str)->tsv.hash) #define hashboolean(t,p) hashpow2(t, p) @@ -98,7 +98,15 @@ static Node *mainposition (const Table *t, const TValue *key) { switch (ttype(key)) { case LUA_TNUMBER: return hashnum(t, nvalue(key)); - case LUA_TSTRING: + case LUA_TLNGSTR: { + TString *s = rawtsvalue(key); + if (s->tsv.extra == 0) { /* no hash? */ + s->tsv.hash = luaS_hash(getstr(s), s->tsv.len); + s->tsv.extra = 1; /* now it has its hash */ + } + return hashstr(t, rawtsvalue(key)); + } + case LUA_TSHRSTR: return hashstr(t, rawtsvalue(key)); case LUA_TBOOLEAN: return hashboolean(t, bvalue(key)); @@ -453,12 +461,13 @@ const TValue *luaH_getint (Table *t, int key) { /* -** search function for strings +** search function for short strings */ const TValue *luaH_getstr (Table *t, TString *key) { Node *n = hashstr(t, key); + lua_assert(key->tsv.tt == LUA_TSHRSTR); do { /* check whether `key' is somewhere in the chain */ - if (ttisstring(gkey(n)) && luaS_eqstr(rawtsvalue(gkey(n)), key)) + if (ttisshrstring(gkey(n)) && eqshrstr(rawtsvalue(gkey(n)), key)) return gval(n); /* that's it */ else n = gnext(n); } while (n); @@ -470,9 +479,9 @@ const TValue *luaH_getstr (Table *t, TString *key) { ** main search function */ const TValue *luaH_get (Table *t, const TValue *key) { - switch (ttypenv(key)) { + switch (ttype(key)) { case LUA_TNIL: return luaO_nilobject; - case LUA_TSTRING: return luaH_getstr(t, rawtsvalue(key)); + case LUA_TSHRSTR: return luaH_getstr(t, rawtsvalue(key)); case LUA_TNUMBER: { int k; lua_Number n = nvalue(key); diff --git a/ltests.c b/ltests.c index 7b4c0e17..4b65942b 100644 --- a/ltests.c +++ b/ltests.c @@ -1,5 +1,5 @@ /* -** $Id: ltests.c,v 2.124 2011/11/09 19:08:07 roberto Exp roberto $ +** $Id: ltests.c,v 2.125 2012/01/20 22:05:50 roberto Exp roberto $ ** Internal Module for Debugging of the Lua Implementation ** See Copyright Notice in lua.h */ @@ -360,7 +360,8 @@ static void checkobject (global_State *g, GCObject *o) { checkproto(g, gco2p(o)); break; } - case LUA_TSTRING: break; + case LUA_TSHRSTR: + case LUA_TLNGSTR: break; default: lua_assert(0); } } diff --git a/lvm.c b/lvm.c index fbc7c45a..3b7e24b7 100644 --- a/lvm.c +++ b/lvm.c @@ -258,7 +258,8 @@ int luaV_equalobj_ (lua_State *L, const TValue *t1, const TValue *t2) { case LUA_TBOOLEAN: return bvalue(t1) == bvalue(t2); /* true must be 1 !! */ case LUA_TLIGHTUSERDATA: return pvalue(t1) == pvalue(t2); case LUA_TLCF: return fvalue(t1) == fvalue(t2); - case LUA_TSTRING: return luaS_eqstr(rawtsvalue(t1), rawtsvalue(t2)); + case LUA_TSHRSTR: return eqshrstr(rawtsvalue(t1), rawtsvalue(t2)); + case LUA_TLNGSTR: return luaS_eqlngstr(rawtsvalue(t1), rawtsvalue(t2)); case LUA_TUSERDATA: { if (uvalue(t1) == uvalue(t2)) return 1; else if (L == NULL) return 0;