first implementation of long strings

This commit is contained in:
Roberto Ierusalimschy 2012-01-25 19:05:40 -02:00
parent 291f564485
commit a4b96ce9a3
9 changed files with 144 additions and 47 deletions

26
lgc.c
View File

@ -65,7 +65,11 @@
#define white2gray(x) resetbits(gch(x)->marked, WHITEBITS)
#define black2gray(x) resetbit(gch(x)->marked, BLACKBIT)
#define stringmark(s) ((void)((s) && resetbits((s)->tsv.marked, WHITEBITS)))
/*
** dirty trick: we know that 'reallymarkobject' does not use 'g' when
** object is a string
*/
#define stringmark(s) markobject(NULL, s)
#define isfinalized(x) testbit(gch(x)->marked, FINALIZEDBIT)
@ -240,18 +244,18 @@ GCObject *luaC_newobj (lua_State *L, int tt, size_t sz, GCObject **list,
/*
** mark an object. Userdata and closed upvalues are visited and turned
** black here. Strings remain gray (it is the same as making them
** black). Other objects are marked gray and added to appropriate list
** to be visited (and turned black) later. (Open upvalues are already
** linked in 'headuv' list.)
** mark an object. Userdata, strings, and closed upvalues are visited
** and turned black here. Other objects are marked gray and added
** to appropriate list to be visited (and turned black) later. (Open
** upvalues are already linked in 'headuv' list.)
*/
static void reallymarkobject (global_State *g, GCObject *o) {
lua_assert(iswhite(o) && !isdead(g, o));
white2gray(o);
switch (gch(o)->tt) {
case LUA_TSTRING: {
return; /* for strings, gray is as good as black */
case LUA_TSHRSTR:
case LUA_TLNGSTR: {
gray2black(o);
return; /* nothing else to mark */
}
case LUA_TUSERDATA: {
Table *mt = gco2u(o)->metatable;
@ -663,8 +667,10 @@ static void freeobj (lua_State *L, GCObject *o) {
case LUA_TTABLE: luaH_free(L, gco2t(o)); break;
case LUA_TTHREAD: luaE_freethread(L, gco2th(o)); break;
case LUA_TUSERDATA: luaM_freemem(L, o, sizeudata(gco2u(o))); break;
case LUA_TSTRING: {
case LUA_TSHRSTR:
G(L)->strt.nuse--;
/* go through */
case LUA_TLNGSTR: {
luaM_freemem(L, o, sizestring(gco2ts(o)));
break;
}

View File

@ -1,5 +1,5 @@
/*
** $Id: llimits.h,v 1.94 2011/11/29 15:39:48 roberto Exp roberto $
** $Id: llimits.h,v 1.95 2011/12/06 16:58:36 roberto Exp roberto $
** Limits, basic types, and some other `installation-dependent' definitions
** See Copyright Notice in lua.h
*/
@ -124,6 +124,15 @@ typedef LUAI_UACNUMBER l_uacNumber;
#define MAXUPVAL UCHAR_MAX
/*
** maximum length for short strings, that is, strings that are
** internalized. (Cannot be smaller than reserved words or tags
** for metamethods; #"function" = 8, #"__newindex" = 10; should
** not be larger than 255, to allow future changes)
*/
#define LUA_MAXSHORTLEN (8 * sizeof(void*))
/*
** type for virtual-machine instructions
** must be an unsigned with (at least) 4 bytes (see details in lopcodes.h)

View File

@ -52,6 +52,12 @@
#define LUA_TCCL (LUA_TFUNCTION | (2 << 4)) /* C closure */
/*
** LUA_TSTRING variants */
#define LUA_TSHRSTR (LUA_TSTRING | (0 << 4)) /* short strings */
#define LUA_TLNGSTR (LUA_TSTRING | (1 << 4)) /* long strings */
/* Bit mark for collectable types */
#define BIT_ISCOLLECTABLE (1 << 6)
@ -129,7 +135,9 @@ typedef struct lua_TValue TValue;
#define ttisnil(o) checktag((o), LUA_TNIL)
#define ttisboolean(o) checktag((o), LUA_TBOOLEAN)
#define ttislightuserdata(o) checktag((o), LUA_TLIGHTUSERDATA)
#define ttisstring(o) checktag((o), ctb(LUA_TSTRING))
#define ttisstring(o) checktype((o), LUA_TSTRING)
#define ttisshrstring(o) checktag((o), ctb(LUA_TSHRSTR))
#define ttislngstring(o) checktag((o), ctb(LUA_TLNGSTR))
#define ttistable(o) checktag((o), ctb(LUA_TTABLE))
#define ttisfunction(o) checktype(o, LUA_TFUNCTION)
#define ttisclosure(o) ((rttype(o) & 0x1F) == LUA_TFUNCTION)
@ -199,7 +207,8 @@ typedef struct lua_TValue TValue;
#define setsvalue(L,obj,x) \
{ TValue *io=(obj); \
val_(io).gc=cast(GCObject *, (x)); settt_(io, ctb(LUA_TSTRING)); \
TString *x_ = (x); \
val_(io).gc=cast(GCObject *, x_); settt_(io, ctb(x_->tsv.tt)); \
checkliveness(G(L),io); }
#define setuvalue(L,obj,x) \
@ -409,7 +418,7 @@ typedef union TString {
L_Umaxalign dummy; /* ensures maximum alignment for strings */
struct {
CommonHeader;
lu_byte extra; /* reserved words for strings */
lu_byte extra; /* reserved words for short strings; "has hash" for longs */
unsigned int hash;
size_t len; /* number of characters in string */
} tsv;

View File

@ -1,5 +1,5 @@
/*
** $Id: lstate.h,v 2.74 2011/09/30 12:45:07 roberto Exp roberto $
** $Id: lstate.h,v 2.75 2012/01/20 22:05:50 roberto Exp roberto $
** Global State
** See Copyright Notice in lua.h
*/
@ -193,7 +193,8 @@ union GCObject {
#define gch(o) (&(o)->gch)
/* macros to convert a GCObject into a specific value */
#define rawgco2ts(o) check_exp((o)->gch.tt == LUA_TSTRING, &((o)->ts))
#define rawgco2ts(o) \
check_exp(novariant((o)->gch.tt) == LUA_TSTRING, &((o)->ts))
#define gco2ts(o) (&rawgco2ts(o)->tsv)
#define rawgco2u(o) check_exp((o)->gch.tt == LUA_TUSERDATA, &((o)->u))
#define gco2u(o) (&rawgco2u(o)->uv)

View File

@ -18,7 +18,37 @@
#include "lstring.h"
/*
** equality for long strings
*/
int luaS_eqlngstr (TString *a, TString *b) {
size_t len = a->tsv.len;
lua_assert(a->tsv.tt == LUA_TLNGSTR && b->tsv.tt == LUA_TLNGSTR);
return (len == b->tsv.len) && (memcmp(getstr(a), getstr(b), len) == 0);
}
/*
** equality for strings
*/
int luaS_eqstr (TString *a, TString *b) {
return (a->tsv.tt == b->tsv.tt) &&
(a->tsv.tt == LUA_TSHRSTR ? eqshrstr(a, b) : luaS_eqlngstr(a, b));
}
unsigned int luaS_hash (const char *str, size_t l) {
unsigned int h = cast(unsigned int, l); /* seed */
size_t l1;
for (l1 = 0; l1 < l; l1++)
h = h ^ ((h<<5) + (h>>2) + cast_byte(str[l1]));
return h;
}
/*
** resizes the string table
*/
void luaS_resize (lua_State *L, int newsize) {
int i;
stringtable *tb = &G(L)->strt;
@ -50,36 +80,47 @@ void luaS_resize (lua_State *L, int newsize) {
}
static TString *newlstr (lua_State *L, const char *str, size_t l,
unsigned int h) {
size_t totalsize; /* total size of TString object */
GCObject **list; /* (pointer to) list where it will be inserted */
/*
** creates a new string object
*/
static TString *createstrobj (lua_State *L, const char *str, size_t l,
int tag, unsigned int h, GCObject **list) {
TString *ts;
stringtable *tb = &G(L)->strt;
if (l+1 > (MAX_SIZET - sizeof(TString))/sizeof(char))
luaM_toobig(L);
if (tb->nuse >= cast(lu_int32, tb->size) && tb->size <= MAX_INT/2)
luaS_resize(L, tb->size*2); /* too crowded */
size_t totalsize; /* total size of TString object */
totalsize = sizeof(TString) + ((l + 1) * sizeof(char));
list = &tb->hash[lmod(h, tb->size)];
ts = &luaC_newobj(L, LUA_TSTRING, totalsize, list, 0)->ts;
ts = &luaC_newobj(L, tag, totalsize, list, 0)->ts;
ts->tsv.len = l;
ts->tsv.hash = h;
ts->tsv.extra = 0;
memcpy(ts+1, str, l*sizeof(char));
((char *)(ts+1))[l] = '\0'; /* ending 0 */
tb->nuse++;
return ts;
}
TString *luaS_newlstr (lua_State *L, const char *str, size_t l) {
/*
** creates a new short string, inserting it into string table
*/
static TString *newshrstr (lua_State *L, const char *str, size_t l,
unsigned int h) {
GCObject **list; /* (pointer to) list where it will be inserted */
stringtable *tb = &G(L)->strt;
TString *s;
if (tb->nuse >= cast(lu_int32, tb->size) && tb->size <= MAX_INT/2)
luaS_resize(L, tb->size*2); /* too crowded */
list = &tb->hash[lmod(h, tb->size)];
s = createstrobj(L, str, l, LUA_TSHRSTR, h, list);
tb->nuse++;
return s;
}
/*
** checks whether short string exists and reuses it or creates a new one
*/
static TString *internshrstr (lua_State *L, const char *str, size_t l) {
GCObject *o;
unsigned int h = cast(unsigned int, l); /* seed */
size_t step = (l>>5)+1; /* if string is too long, don't hash all its chars */
size_t l1;
for (l1=l; l1>=step; l1-=step) /* compute hash */
h = h ^ ((h<<5)+(h>>2)+cast(unsigned char, str[l1-1]));
unsigned int h = luaS_hash(str, l);
for (o = G(L)->strt.hash[lmod(h, G(L)->strt.size)];
o != NULL;
o = gch(o)->next) {
@ -92,10 +133,27 @@ TString *luaS_newlstr (lua_State *L, const char *str, size_t l) {
return ts;
}
}
return newlstr(L, str, l, h); /* not found; create a new string */
return newshrstr(L, str, l, h); /* not found; create a new string */
}
/*
** new string (with explicit length)
*/
TString *luaS_newlstr (lua_State *L, const char *str, size_t l) {
if (l <= LUA_MAXSHORTLEN) /* short string? */
return internshrstr(L, str, l);
else {
if (l + 1 > (MAX_SIZET - sizeof(TString))/sizeof(char))
luaM_toobig(L);
return createstrobj(L, str, l, LUA_TLNGSTR, 0, NULL);
}
}
/*
** new zero-terminated string
*/
TString *luaS_new (lua_State *L, const char *str) {
return luaS_newlstr(L, str, strlen(str));
}

View File

@ -25,15 +25,18 @@
/*
** test whether a string is a reserved word
*/
#define isreserved(s) ((s)->tsv.extra > 0)
#define isreserved(s) ((s)->tsv.tt == LUA_TSHRSTR && (s)->tsv.extra > 0)
/*
** equality for strings, which are always internalized
** equality for short strings, which are always internalized
*/
#define luaS_eqstr(a,b) ((a) == (b))
#define eqshrstr(a,b) check_exp((a)->tsv.tt == LUA_TSHRSTR, (a) == (b))
LUAI_FUNC unsigned int luaS_hash (const char *str, size_t l);
LUAI_FUNC int luaS_eqlngstr (TString *a, TString *b);
LUAI_FUNC int luaS_eqstr (TString *a, TString *b);
LUAI_FUNC void luaS_resize (lua_State *L, int newsize);
LUAI_FUNC Udata *luaS_newudata (lua_State *L, size_t s, Table *e);
LUAI_FUNC TString *luaS_newlstr (lua_State *L, const char *str, size_t l);

View File

@ -50,7 +50,7 @@
#define hashpow2(t,n) (gnode(t, lmod((n), sizenode(t))))
#define hashstr(t,str) hashpow2(t, (str)->tsv.hash)
#define hashstr(t,str) hashpow2(t, (str)->tsv.hash)
#define hashboolean(t,p) hashpow2(t, p)
@ -98,7 +98,15 @@ static Node *mainposition (const Table *t, const TValue *key) {
switch (ttype(key)) {
case LUA_TNUMBER:
return hashnum(t, nvalue(key));
case LUA_TSTRING:
case LUA_TLNGSTR: {
TString *s = rawtsvalue(key);
if (s->tsv.extra == 0) { /* no hash? */
s->tsv.hash = luaS_hash(getstr(s), s->tsv.len);
s->tsv.extra = 1; /* now it has its hash */
}
return hashstr(t, rawtsvalue(key));
}
case LUA_TSHRSTR:
return hashstr(t, rawtsvalue(key));
case LUA_TBOOLEAN:
return hashboolean(t, bvalue(key));
@ -453,12 +461,13 @@ const TValue *luaH_getint (Table *t, int key) {
/*
** search function for strings
** search function for short strings
*/
const TValue *luaH_getstr (Table *t, TString *key) {
Node *n = hashstr(t, key);
lua_assert(key->tsv.tt == LUA_TSHRSTR);
do { /* check whether `key' is somewhere in the chain */
if (ttisstring(gkey(n)) && luaS_eqstr(rawtsvalue(gkey(n)), key))
if (ttisshrstring(gkey(n)) && eqshrstr(rawtsvalue(gkey(n)), key))
return gval(n); /* that's it */
else n = gnext(n);
} while (n);
@ -470,9 +479,9 @@ const TValue *luaH_getstr (Table *t, TString *key) {
** main search function
*/
const TValue *luaH_get (Table *t, const TValue *key) {
switch (ttypenv(key)) {
switch (ttype(key)) {
case LUA_TNIL: return luaO_nilobject;
case LUA_TSTRING: return luaH_getstr(t, rawtsvalue(key));
case LUA_TSHRSTR: return luaH_getstr(t, rawtsvalue(key));
case LUA_TNUMBER: {
int k;
lua_Number n = nvalue(key);

View File

@ -1,5 +1,5 @@
/*
** $Id: ltests.c,v 2.124 2011/11/09 19:08:07 roberto Exp roberto $
** $Id: ltests.c,v 2.125 2012/01/20 22:05:50 roberto Exp roberto $
** Internal Module for Debugging of the Lua Implementation
** See Copyright Notice in lua.h
*/
@ -360,7 +360,8 @@ static void checkobject (global_State *g, GCObject *o) {
checkproto(g, gco2p(o));
break;
}
case LUA_TSTRING: break;
case LUA_TSHRSTR:
case LUA_TLNGSTR: break;
default: lua_assert(0);
}
}

3
lvm.c
View File

@ -258,7 +258,8 @@ int luaV_equalobj_ (lua_State *L, const TValue *t1, const TValue *t2) {
case LUA_TBOOLEAN: return bvalue(t1) == bvalue(t2); /* true must be 1 !! */
case LUA_TLIGHTUSERDATA: return pvalue(t1) == pvalue(t2);
case LUA_TLCF: return fvalue(t1) == fvalue(t2);
case LUA_TSTRING: return luaS_eqstr(rawtsvalue(t1), rawtsvalue(t2));
case LUA_TSHRSTR: return eqshrstr(rawtsvalue(t1), rawtsvalue(t2));
case LUA_TLNGSTR: return luaS_eqlngstr(rawtsvalue(t1), rawtsvalue(t2));
case LUA_TUSERDATA: {
if (uvalue(t1) == uvalue(t2)) return 1;
else if (L == NULL) return 0;