lua/ltable.c
Roberto Ierusalimschy 853311e5b1 Table rehash can resize only the hash part
If there are no integer keys outside the array part, there is no
reason to resize it, saving the time to count its elements.  Moreover,
assignments to non-integer keys will not collapse a table created with
'table.create'.
2024-10-28 10:54:36 -03:00

1298 lines
41 KiB
C

/*
** $Id: ltable.c $
** Lua tables (hash)
** See Copyright Notice in lua.h
*/
#define ltable_c
#define LUA_CORE
#include "lprefix.h"
/*
** Implementation of tables (aka arrays, objects, or hash tables).
** Tables keep its elements in two parts: an array part and a hash part.
** Non-negative integer keys are all candidates to be kept in the array
** part. The actual size of the array is the largest 'n' such that
** more than half the slots between 1 and n are in use.
** Hash uses a mix of chained scatter table with Brent's variation.
** A main invariant of these tables is that, if an element is not
** in its main position (i.e. the 'original' position that its hash gives
** to it), then the colliding element is in its own main position.
** Hence even when the load factor reaches 100%, performance remains good.
*/
#include <math.h>
#include <limits.h>
#include <string.h>
#include "lua.h"
#include "ldebug.h"
#include "ldo.h"
#include "lgc.h"
#include "lmem.h"
#include "lobject.h"
#include "lstate.h"
#include "lstring.h"
#include "ltable.h"
#include "lvm.h"
/*
** Only tables with hash parts larger than 2^LIMFORLAST has a 'lastfree'
** field that optimizes finding a free slot. That field is stored just
** before the array of nodes, in the same block. Smaller tables do a
** complete search when looking for a free slot.
*/
#define LIMFORLAST 2 /* log2 of real limit */
/*
** The union 'Limbox' stores 'lastfree' and ensures that what follows it
** is properly aligned to store a Node.
*/
typedef struct { Node *dummy; Node follows_pNode; } Limbox_aux;
typedef union {
Node *lastfree;
char padding[offsetof(Limbox_aux, follows_pNode)];
} Limbox;
#define haslastfree(t) ((t)->lsizenode > LIMFORLAST)
#define getlastfree(t) ((cast(Limbox *, (t)->node) - 1)->lastfree)
/*
** MAXABITS is the largest integer such that 2^MAXABITS fits in an
** unsigned int.
*/
#define MAXABITS cast_int(sizeof(int) * CHAR_BIT - 1)
/*
** MAXASIZEB is the maximum number of elements in the array part such
** that the size of the array fits in 'size_t'.
*/
#define MAXASIZEB (MAX_SIZET/(sizeof(Value) + 1))
/*
** MAXASIZE is the maximum size of the array part. It is the minimum
** between 2^MAXABITS and MAXASIZEB.
*/
#define MAXASIZE \
(((1u << MAXABITS) < MAXASIZEB) ? (1u << MAXABITS) : cast_uint(MAXASIZEB))
/*
** MAXHBITS is the largest integer such that 2^MAXHBITS fits in a
** signed int.
*/
#define MAXHBITS (MAXABITS - 1)
/*
** MAXHSIZE is the maximum size of the hash part. It is the minimum
** between 2^MAXHBITS and the maximum size such that, measured in bytes,
** it fits in a 'size_t'.
*/
#define MAXHSIZE luaM_limitN(1u << MAXHBITS, Node)
/*
** When the original hash value is good, hashing by a power of 2
** avoids the cost of '%'.
*/
#define hashpow2(t,n) (gnode(t, lmod((n), sizenode(t))))
/*
** for other types, it is better to avoid modulo by power of 2, as
** they can have many 2 factors.
*/
#define hashmod(t,n) (gnode(t, ((n) % ((sizenode(t)-1u)|1u))))
#define hashstr(t,str) hashpow2(t, (str)->hash)
#define hashboolean(t,p) hashpow2(t, p)
#define hashpointer(t,p) hashmod(t, point2uint(p))
#define dummynode (&dummynode_)
static const Node dummynode_ = {
{{NULL}, LUA_VEMPTY, /* value's value and type */
LUA_VNIL, 0, {NULL}} /* key type, next, and key value */
};
static const TValue absentkey = {ABSTKEYCONSTANT};
/*
** Hash for integers. To allow a good hash, use the remainder operator
** ('%'). If integer fits as a non-negative int, compute an int
** remainder, which is faster. Otherwise, use an unsigned-integer
** remainder, which uses all bits and ensures a non-negative result.
*/
static Node *hashint (const Table *t, lua_Integer i) {
lua_Unsigned ui = l_castS2U(i);
if (ui <= cast_uint(INT_MAX))
return gnode(t, cast_int(ui) % cast_int((sizenode(t)-1) | 1));
else
return hashmod(t, ui);
}
/*
** Hash for floating-point numbers.
** The main computation should be just
** n = frexp(n, &i); return (n * INT_MAX) + i
** but there are some numerical subtleties.
** In a two-complement representation, INT_MAX does not has an exact
** representation as a float, but INT_MIN does; because the absolute
** value of 'frexp' is smaller than 1 (unless 'n' is inf/NaN), the
** absolute value of the product 'frexp * -INT_MIN' is smaller or equal
** to INT_MAX. Next, the use of 'unsigned int' avoids overflows when
** adding 'i'; the use of '~u' (instead of '-u') avoids problems with
** INT_MIN.
*/
#if !defined(l_hashfloat)
static unsigned l_hashfloat (lua_Number n) {
int i;
lua_Integer ni;
n = l_mathop(frexp)(n, &i) * -cast_num(INT_MIN);
if (!lua_numbertointeger(n, &ni)) { /* is 'n' inf/-inf/NaN? */
lua_assert(luai_numisnan(n) || l_mathop(fabs)(n) == cast_num(HUGE_VAL));
return 0;
}
else { /* normal case */
unsigned int u = cast_uint(i) + cast_uint(ni);
return (u <= cast_uint(INT_MAX) ? u : ~u);
}
}
#endif
/*
** returns the 'main' position of an element in a table (that is,
** the index of its hash value).
*/
static Node *mainpositionTV (const Table *t, const TValue *key) {
switch (ttypetag(key)) {
case LUA_VNUMINT: {
lua_Integer i = ivalue(key);
return hashint(t, i);
}
case LUA_VNUMFLT: {
lua_Number n = fltvalue(key);
return hashmod(t, l_hashfloat(n));
}
case LUA_VSHRSTR: {
TString *ts = tsvalue(key);
return hashstr(t, ts);
}
case LUA_VLNGSTR: {
TString *ts = tsvalue(key);
return hashpow2(t, luaS_hashlongstr(ts));
}
case LUA_VFALSE:
return hashboolean(t, 0);
case LUA_VTRUE:
return hashboolean(t, 1);
case LUA_VLIGHTUSERDATA: {
void *p = pvalue(key);
return hashpointer(t, p);
}
case LUA_VLCF: {
lua_CFunction f = fvalue(key);
return hashpointer(t, f);
}
default: {
GCObject *o = gcvalue(key);
return hashpointer(t, o);
}
}
}
l_sinline Node *mainpositionfromnode (const Table *t, Node *nd) {
TValue key;
getnodekey(cast(lua_State *, NULL), &key, nd);
return mainpositionTV(t, &key);
}
/*
** Check whether key 'k1' is equal to the key in node 'n2'. This
** equality is raw, so there are no metamethods. Floats with integer
** values have been normalized, so integers cannot be equal to
** floats. It is assumed that 'eqshrstr' is simply pointer equality, so
** that short strings are handled in the default case.
** A true 'deadok' means to accept dead keys as equal to their original
** values. All dead keys are compared in the default case, by pointer
** identity. (Only collectable objects can produce dead keys.) Note that
** dead long strings are also compared by identity.
** Once a key is dead, its corresponding value may be collected, and
** then another value can be created with the same address. If this
** other value is given to 'next', 'equalkey' will signal a false
** positive. In a regular traversal, this situation should never happen,
** as all keys given to 'next' came from the table itself, and therefore
** could not have been collected. Outside a regular traversal, we
** have garbage in, garbage out. What is relevant is that this false
** positive does not break anything. (In particular, 'next' will return
** some other valid item on the table or nil.)
*/
static int equalkey (const TValue *k1, const Node *n2, int deadok) {
if ((rawtt(k1) != keytt(n2)) && /* not the same variants? */
!(deadok && keyisdead(n2) && iscollectable(k1)))
return 0; /* cannot be same key */
switch (keytt(n2)) {
case LUA_VNIL: case LUA_VFALSE: case LUA_VTRUE:
return 1;
case LUA_VNUMINT:
return (ivalue(k1) == keyival(n2));
case LUA_VNUMFLT:
return luai_numeq(fltvalue(k1), fltvalueraw(keyval(n2)));
case LUA_VLIGHTUSERDATA:
return pvalue(k1) == pvalueraw(keyval(n2));
case LUA_VLCF:
return fvalue(k1) == fvalueraw(keyval(n2));
case ctb(LUA_VLNGSTR):
return luaS_eqlngstr(tsvalue(k1), keystrval(n2));
default:
return gcvalue(k1) == gcvalueraw(keyval(n2));
}
}
/*
** True if value of 'alimit' is equal to the real size of the array
** part of table 't'. (Otherwise, the array part must be larger than
** 'alimit'.)
*/
#define limitequalsasize(t) (isrealasize(t) || ispow2((t)->alimit))
/*
** Returns the real size of the 'array' array
*/
unsigned int luaH_realasize (const Table *t) {
if (limitequalsasize(t))
return t->alimit; /* this is the size */
else {
unsigned int size = t->alimit;
/* compute the smallest power of 2 not smaller than 'size' */
size |= (size >> 1);
size |= (size >> 2);
size |= (size >> 4);
size |= (size >> 8);
#if (UINT_MAX >> 14) > 3 /* unsigned int has more than 16 bits */
size |= (size >> 16);
#if (UINT_MAX >> 30) > 3
size |= (size >> 32); /* unsigned int has more than 32 bits */
#endif
#endif
size++;
lua_assert(ispow2(size) && size/2 < t->alimit && t->alimit < size);
return size;
}
}
/*
** Check whether real size of the array is a power of 2.
** (If it is not, 'alimit' cannot be changed to any other value
** without changing the real size.)
*/
static int ispow2realasize (const Table *t) {
return (!isrealasize(t) || ispow2(t->alimit));
}
static unsigned int setlimittosize (Table *t) {
t->alimit = luaH_realasize(t);
setrealasize(t);
return t->alimit;
}
#define limitasasize(t) check_exp(isrealasize(t), t->alimit)
/*
** "Generic" get version. (Not that generic: not valid for integers,
** which may be in array part, nor for floats with integral values.)
** See explanation about 'deadok' in function 'equalkey'.
*/
static const TValue *getgeneric (Table *t, const TValue *key, int deadok) {
Node *n = mainpositionTV(t, key);
for (;;) { /* check whether 'key' is somewhere in the chain */
if (equalkey(key, n, deadok))
return gval(n); /* that's it */
else {
int nx = gnext(n);
if (nx == 0)
return &absentkey; /* not found */
n += nx;
}
}
}
/*
** returns the index for 'k' if 'k' is an appropriate key to live in
** the array part of a table, 0 otherwise.
*/
static unsigned int arrayindex (lua_Integer k) {
if (l_castS2U(k) - 1u < MAXASIZE) /* 'k' in [1, MAXASIZE]? */
return cast_uint(k); /* 'key' is an appropriate array index */
else
return 0;
}
/*
** returns the index of a 'key' for table traversals. First goes all
** elements in the array part, then elements in the hash part. The
** beginning of a traversal is signaled by 0.
*/
static unsigned findindex (lua_State *L, Table *t, TValue *key,
unsigned asize) {
unsigned int i;
if (ttisnil(key)) return 0; /* first iteration */
i = ttisinteger(key) ? arrayindex(ivalue(key)) : 0;
if (i - 1u < asize) /* is 'key' inside array part? */
return i; /* yes; that's the index */
else {
const TValue *n = getgeneric(t, key, 1);
if (l_unlikely(isabstkey(n)))
luaG_runerror(L, "invalid key to 'next'"); /* key not found */
i = cast_uint(nodefromval(n) - gnode(t, 0)); /* key index in hash table */
/* hash elements are numbered after array ones */
return (i + 1) + asize;
}
}
int luaH_next (lua_State *L, Table *t, StkId key) {
unsigned int asize = luaH_realasize(t);
unsigned int i = findindex(L, t, s2v(key), asize); /* find original key */
for (; i < asize; i++) { /* try first array part */
lu_byte tag = *getArrTag(t, i);
if (!tagisempty(tag)) { /* a non-empty entry? */
setivalue(s2v(key), cast_int(i) + 1);
farr2val(t, i, tag, s2v(key + 1));
return 1;
}
}
for (i -= asize; i < sizenode(t); i++) { /* hash part */
if (!isempty(gval(gnode(t, i)))) { /* a non-empty entry? */
Node *n = gnode(t, i);
getnodekey(L, s2v(key), n);
setobj2s(L, key + 1, gval(n));
return 1;
}
}
return 0; /* no more elements */
}
static void freehash (lua_State *L, Table *t) {
if (!isdummy(t)) {
/* 'node' size in bytes */
size_t bsize = cast_sizet(sizenode(t)) * sizeof(Node);
char *arr = cast_charp(t->node);
if (haslastfree(t)) {
bsize += sizeof(Limbox);
arr -= sizeof(Limbox);
}
luaM_freearray(L, arr, bsize);
}
}
/*
** Check whether an integer key is in the array part. If 'alimit' is
** not the real size of the array, the key still can be in the array
** part. In this case, do the "Xmilia trick" to check whether 'key-1'
** is smaller than the real size.
** The trick works as follow: let 'p' be the integer such that
** '2^(p+1) >= alimit > 2^p', or '2^(p+1) > alimit-1 >= 2^p'. That is,
** 'p' is the highest 1-bit in 'alimit-1', and 2^(p+1) is the real size
** of the array. What we have to check becomes 'key-1 < 2^(p+1)'. We
** compute '(key-1) & ~(alimit-1)', which we call 'res'; it will have
** the 'p' bit cleared. (It may also clear other bits smaller than 'p',
** but no bit higher than 'p'.) If the key is outside the array, that
** is, 'key-1 >= 2^(p+1)', then 'res' will have some 1-bit higher than
** 'p', therefore it will be larger or equal to 'alimit', and the check
** will fail. If 'key-1 < 2^(p+1)', then 'res' has no 1-bit higher than
** 'p', and as the bit 'p' itself was cleared, 'res' will be smaller
** than 2^p, therefore smaller than 'alimit', and the check succeeds.
** As special cases, when 'alimit' is 0 the condition is trivially false,
** and when 'alimit' is 1 the condition simplifies to 'key-1 < alimit'.
** If key is 0 or negative, 'res' will have its higher bit on, so that
** it cannot be smaller than 'alimit'.
*/
static int keyinarray (Table *t, lua_Integer key) {
lua_Unsigned alimit = t->alimit;
if (l_castS2U(key) - 1u < alimit) /* 'key' in [1, t->alimit]? */
return 1;
else if (!isrealasize(t) && /* key still may be in the array part? */
(((l_castS2U(key) - 1u) & ~(alimit - 1u)) < alimit)) {
t->alimit = cast_uint(key); /* probably '#t' is here now */
return 1;
}
else
return 0;
}
/*
** {=============================================================
** Rehash
** ==============================================================
*/
/*
** Compute the optimal size for the array part of table 't'. 'nums' is a
** "count array" where 'nums[i]' is the number of integers in the table
** between 2^(i - 1) + 1 and 2^i. 'pna' enters with the total number of
** integer keys in the table and leaves with the number of keys that
** will go to the array part; return the optimal size. (The condition
** 'twotoi > 0' in the for loop stops the loop if 'twotoi' overflows.)
*/
static unsigned computesizes (unsigned nums[], unsigned *pna) {
int i;
unsigned int twotoi; /* 2^i (candidate for optimal size) */
unsigned int a = 0; /* number of elements smaller than 2^i */
unsigned int na = 0; /* number of elements to go to array part */
unsigned int optimal = 0; /* optimal size for array part */
/* loop while keys can fill more than half of total size */
for (i = 0, twotoi = 1;
twotoi > 0 && *pna > twotoi / 2;
i++, twotoi *= 2) {
a += nums[i];
if (a > twotoi/2) { /* more than half elements present? */
optimal = twotoi; /* optimal size (till now) */
na = a; /* all elements up to 'optimal' will go to array part */
}
}
lua_assert((optimal == 0 || optimal / 2 < na) && na <= optimal);
*pna = na;
return optimal;
}
static unsigned countint (lua_Integer key, unsigned int *nums) {
unsigned int k = arrayindex(key);
if (k != 0) { /* is 'key' an appropriate array index? */
nums[luaO_ceillog2(k)]++; /* count as such */
return 1;
}
else
return 0;
}
l_sinline int arraykeyisempty (const Table *t, lua_Unsigned key) {
int tag = *getArrTag(t, key - 1);
return tagisempty(tag);
}
/*
** Count keys in array part of table 't': Fill 'nums[i]' with
** number of keys that will go into corresponding slice and return
** total number of non-nil keys.
*/
static unsigned numusearray (const Table *t, unsigned *nums) {
int lg;
unsigned int ttlg; /* 2^lg */
unsigned int ause = 0; /* summation of 'nums' */
unsigned int i = 1; /* index to traverse all array keys */
unsigned int asize = limitasasize(t); /* real array size */
/* traverse each slice */
for (lg = 0, ttlg = 1; lg <= MAXABITS; lg++, ttlg *= 2) {
unsigned int lc = 0; /* counter */
unsigned int lim = ttlg;
if (lim > asize) {
lim = asize; /* adjust upper limit */
if (i > lim)
break; /* no more elements to count */
}
/* count elements in range (2^(lg - 1), 2^lg] */
for (; i <= lim; i++) {
if (!arraykeyisempty(t, i))
lc++;
}
nums[lg] += lc;
ause += lc;
}
return ause;
}
static unsigned numusehash (const Table *t, unsigned *nums, unsigned *pna) {
unsigned totaluse = 0; /* total number of elements */
unsigned ause = 0; /* elements added to 'nums' (can go to array part) */
unsigned i = sizenode(t);
while (i--) {
Node *n = &t->node[i];
if (!isempty(gval(n))) {
if (keyisinteger(n))
ause += countint(keyival(n), nums);
totaluse++;
}
}
*pna += ause;
return totaluse;
}
/*
** Convert an "abstract size" (number of slots in an array) to
** "concrete size" (number of bytes in the array).
*/
static size_t concretesize (unsigned int size) {
return size * sizeof(Value) + size; /* space for the two arrays */
}
/*
** Resize the array part of a table. If new size is equal to the old,
** do nothing. Else, if new size is zero, free the old array. (It must
** be present, as the sizes are different.) Otherwise, allocate a new
** array, move the common elements to new proper position, and then
** frees old array.
** When array grows, we could reallocate it, but we still would need
** to move the elements to their new position, so the copy implicit
** in realloc is a waste. When array shrinks, it always erases some
** elements that should still be in the array, so we must reallocate in
** two steps anyway. It is simpler to always reallocate in two steps.
*/
static Value *resizearray (lua_State *L , Table *t,
unsigned oldasize,
unsigned newasize) {
if (oldasize == newasize)
return t->array; /* nothing to be done */
else if (newasize == 0) { /* erasing array? */
Value *op = t->array - oldasize; /* original array's real address */
luaM_freemem(L, op, concretesize(oldasize)); /* free it */
return NULL;
}
else {
size_t newasizeb = concretesize(newasize);
Value *np = cast(Value *,
luaM_reallocvector(L, NULL, 0, newasizeb, lu_byte));
if (np == NULL) /* allocation error? */
return NULL;
if (oldasize > 0) {
Value *op = t->array - oldasize; /* real original array */
unsigned tomove = (oldasize < newasize) ? oldasize : newasize;
lua_assert(tomove > 0);
/* move common elements to new position */
memcpy(np + newasize - tomove,
op + oldasize - tomove,
concretesize(tomove));
luaM_freemem(L, op, concretesize(oldasize));
}
return np + newasize; /* shift pointer to the end of value segment */
}
}
/*
** Creates an array for the hash part of a table with the given
** size, or reuses the dummy node if size is zero.
** The computation for size overflow is in two steps: the first
** comparison ensures that the shift in the second one does not
** overflow.
*/
static void setnodevector (lua_State *L, Table *t, unsigned size) {
if (size == 0) { /* no elements to hash part? */
t->node = cast(Node *, dummynode); /* use common 'dummynode' */
t->lsizenode = 0;
setdummy(t); /* signal that it is using dummy node */
}
else {
int i;
int lsize = luaO_ceillog2(size);
if (lsize > MAXHBITS || (1u << lsize) > MAXHSIZE)
luaG_runerror(L, "table overflow");
size = twoto(lsize);
if (lsize <= LIMFORLAST) /* no 'lastfree' field? */
t->node = luaM_newvector(L, size, Node);
else {
size_t bsize = size * sizeof(Node) + sizeof(Limbox);
char *node = luaM_newblock(L, bsize);
t->node = cast(Node *, node + sizeof(Limbox));
getlastfree(t) = gnode(t, size); /* all positions are free */
}
t->lsizenode = cast_byte(lsize);
setnodummy(t);
for (i = 0; i < cast_int(size); i++) {
Node *n = gnode(t, i);
gnext(n) = 0;
setnilkey(n);
setempty(gval(n));
}
}
}
/*
** (Re)insert all elements from the hash part of 'ot' into table 't'.
*/
static void reinsert (lua_State *L, Table *ot, Table *t) {
unsigned j;
unsigned size = sizenode(ot);
for (j = 0; j < size; j++) {
Node *old = gnode(ot, j);
if (!isempty(gval(old))) {
/* doesn't need barrier/invalidate cache, as entry was
already present in the table */
TValue k;
getnodekey(L, &k, old);
luaH_set(L, t, &k, gval(old));
}
}
}
/*
** Exchange the hash part of 't1' and 't2'. (In 'flags', only the
** dummy bit must be exchanged: The 'isrealasize' is not related
** to the hash part, and the metamethod bits do not change during
** a resize, so the "real" table can keep their values.)
*/
static void exchangehashpart (Table *t1, Table *t2) {
lu_byte lsizenode = t1->lsizenode;
Node *node = t1->node;
int bitdummy1 = t1->flags & BITDUMMY;
t1->lsizenode = t2->lsizenode;
t1->node = t2->node;
t1->flags = cast_byte((t1->flags & NOTBITDUMMY) | (t2->flags & BITDUMMY));
t2->lsizenode = lsizenode;
t2->node = node;
t2->flags = cast_byte((t2->flags & NOTBITDUMMY) | bitdummy1);
}
/*
** Re-insert into the new hash part of a table the elements from the
** vanishing slice of the array part.
*/
static void reinsertOldSlice (lua_State *L, Table *t, unsigned oldasize,
unsigned newasize) {
unsigned i;
t->alimit = newasize; /* pretend array has new size... */
for (i = newasize; i < oldasize; i++) { /* traverse vanishing slice */
lu_byte tag = *getArrTag(t, i);
if (!tagisempty(tag)) { /* a non-empty entry? */
TValue aux;
farr2val(t, i, tag, &aux); /* copy entry into 'aux' */
/* re-insert it into the table */
luaH_setint(L, t, cast_int(i) + 1, &aux);
}
}
t->alimit = oldasize; /* restore current size... */
}
/*
** Clear new slice of the array.
*/
static void clearNewSlice (Table *t, unsigned oldasize, unsigned newasize) {
for (; oldasize < newasize; oldasize++)
*getArrTag(t, oldasize) = LUA_VEMPTY;
}
/*
** Resize table 't' for the new given sizes. Both allocations (for
** the hash part and for the array part) can fail, which creates some
** subtleties. If the first allocation, for the hash part, fails, an
** error is raised and that is it. Otherwise, it copies the elements from
** the shrinking part of the array (if it is shrinking) into the new
** hash. Then it reallocates the array part. If that fails, the table
** is in its original state; the function frees the new hash part and then
** raises the allocation error. Otherwise, it sets the new hash part
** into the table, initializes the new part of the array (if any) with
** nils and reinserts the elements of the old hash back into the new
** parts of the table.
*/
void luaH_resize (lua_State *L, Table *t, unsigned newasize,
unsigned nhsize) {
Table newt; /* to keep the new hash part */
unsigned int oldasize = setlimittosize(t);
Value *newarray;
if (newasize > MAXASIZE)
luaG_runerror(L, "table overflow");
/* create new hash part with appropriate size into 'newt' */
newt.flags = 0;
setnodevector(L, &newt, nhsize);
if (newasize < oldasize) { /* will array shrink? */
/* re-insert into the new hash the elements from vanishing slice */
exchangehashpart(t, &newt); /* pretend table has new hash */
reinsertOldSlice(L, t, oldasize, newasize);
exchangehashpart(t, &newt); /* restore old hash (in case of errors) */
}
/* allocate new array */
newarray = resizearray(L, t, oldasize, newasize);
if (l_unlikely(newarray == NULL && newasize > 0)) { /* allocation failed? */
freehash(L, &newt); /* release new hash part */
luaM_error(L); /* raise error (with array unchanged) */
}
/* allocation ok; initialize new part of the array */
exchangehashpart(t, &newt); /* 't' has the new hash ('newt' has the old) */
t->array = newarray; /* set new array part */
t->alimit = newasize;
clearNewSlice(t, oldasize, newasize);
/* re-insert elements from old hash part into new parts */
reinsert(L, &newt, t); /* 'newt' now has the old hash */
freehash(L, &newt); /* free old hash part */
}
void luaH_resizearray (lua_State *L, Table *t, unsigned int nasize) {
unsigned nsize = allocsizenode(t);
luaH_resize(L, t, nasize, nsize);
}
/*
** nums[i] = number of keys 'k' where 2^(i - 1) < k <= 2^i
*/
static void rehash (lua_State *L, Table *t, const TValue *ek) {
unsigned asize; /* optimal size for array part */
unsigned na = 0; /* number of keys candidate for the array part */
unsigned nums[MAXABITS + 1];
unsigned i;
unsigned totaluse; /* total number of keys */
for (i = 0; i <= MAXABITS; i++) nums[i] = 0; /* reset counts */
setlimittosize(t);
totaluse = 1; /* count extra key */
if (ttisinteger(ek))
na += countint(ivalue(ek), nums); /* extra key may go to array */
totaluse += numusehash(t, nums, &na); /* count keys in hash part */
if (na == 0) {
/* no new keys to enter array part; keep it with the same size */
asize = luaH_realasize(t);
}
else { /* compute best size for array part */
unsigned n = numusearray(t, nums); /* count keys in array part */
totaluse += n; /* all keys in array part are keys */
na += n; /* all keys in array part are candidates for new array part */
asize = computesizes(nums, &na); /* compute new size for array part */
}
/* resize the table to new computed sizes */
luaH_resize(L, t, asize, totaluse - na);
}
/*
** }=============================================================
*/
Table *luaH_new (lua_State *L) {
GCObject *o = luaC_newobj(L, LUA_VTABLE, sizeof(Table));
Table *t = gco2t(o);
t->metatable = NULL;
t->flags = maskflags; /* table has no metamethod fields */
t->array = NULL;
t->alimit = 0;
setnodevector(L, t, 0);
return t;
}
size_t luaH_size (Table *t) {
size_t sz = sizeof(Table)
+ luaH_realasize(t) * (sizeof(Value) + 1);
if (!isdummy(t)) {
sz += sizenode(t) * sizeof(Node);
if (haslastfree(t))
sz += sizeof(Limbox);
}
return sz;
}
/*
** Frees a table.
*/
void luaH_free (lua_State *L, Table *t) {
unsigned int realsize = luaH_realasize(t);
freehash(L, t);
resizearray(L, t, realsize, 0);
luaM_free(L, t);
}
static Node *getfreepos (Table *t) {
if (haslastfree(t)) { /* does it have 'lastfree' information? */
/* look for a spot before 'lastfree', updating 'lastfree' */
while (getlastfree(t) > t->node) {
Node *free = --getlastfree(t);
if (keyisnil(free))
return free;
}
}
else { /* no 'lastfree' information */
if (!isdummy(t)) {
unsigned i = sizenode(t);
while (i--) { /* do a linear search */
Node *free = gnode(t, i);
if (keyisnil(free))
return free;
}
}
}
return NULL; /* could not find a free place */
}
/*
** Inserts a new key into a hash table; first, check whether key's main
** position is free. If not, check whether colliding node is in its main
** position or not: if it is not, move colliding node to an empty place
** and put new key in its main position; otherwise (colliding node is in
** its main position), new key goes to an empty position.
*/
static void luaH_newkey (lua_State *L, Table *t, const TValue *key,
TValue *value) {
Node *mp;
TValue aux;
if (l_unlikely(ttisnil(key)))
luaG_runerror(L, "table index is nil");
else if (ttisfloat(key)) {
lua_Number f = fltvalue(key);
lua_Integer k;
if (luaV_flttointeger(f, &k, F2Ieq)) { /* does key fit in an integer? */
setivalue(&aux, k);
key = &aux; /* insert it as an integer */
}
else if (l_unlikely(luai_numisnan(f)))
luaG_runerror(L, "table index is NaN");
}
if (ttisnil(value))
return; /* do not insert nil values */
mp = mainpositionTV(t, key);
if (!isempty(gval(mp)) || isdummy(t)) { /* main position is taken? */
Node *othern;
Node *f = getfreepos(t); /* get a free place */
if (f == NULL) { /* cannot find a free place? */
rehash(L, t, key); /* grow table */
/* whatever called 'newkey' takes care of TM cache */
luaH_set(L, t, key, value); /* insert key into grown table */
return;
}
lua_assert(!isdummy(t));
othern = mainpositionfromnode(t, mp);
if (othern != mp) { /* is colliding node out of its main position? */
/* yes; move colliding node into free position */
while (othern + gnext(othern) != mp) /* find previous */
othern += gnext(othern);
gnext(othern) = cast_int(f - othern); /* rechain to point to 'f' */
*f = *mp; /* copy colliding node into free pos. (mp->next also goes) */
if (gnext(mp) != 0) {
gnext(f) += cast_int(mp - f); /* correct 'next' */
gnext(mp) = 0; /* now 'mp' is free */
}
setempty(gval(mp));
}
else { /* colliding node is in its own main position */
/* new node will go into free position */
if (gnext(mp) != 0)
gnext(f) = cast_int((mp + gnext(mp)) - f); /* chain new position */
else lua_assert(gnext(f) == 0);
gnext(mp) = cast_int(f - mp);
mp = f;
}
}
setnodekey(L, mp, key);
luaC_barrierback(L, obj2gco(t), key);
lua_assert(isempty(gval(mp)));
setobj2t(L, gval(mp), value);
}
static const TValue *getintfromhash (Table *t, lua_Integer key) {
Node *n = hashint(t, key);
lua_assert(l_castS2U(key) - 1u >= luaH_realasize(t));
for (;;) { /* check whether 'key' is somewhere in the chain */
if (keyisinteger(n) && keyival(n) == key)
return gval(n); /* that's it */
else {
int nx = gnext(n);
if (nx == 0) break;
n += nx;
}
}
return &absentkey;
}
static int hashkeyisempty (Table *t, lua_Unsigned key) {
const TValue *val = getintfromhash(t, l_castU2S(key));
return isempty(val);
}
static lu_byte finishnodeget (const TValue *val, TValue *res) {
if (!ttisnil(val)) {
setobj(((lua_State*)NULL), res, val);
}
return ttypetag(val);
}
lu_byte luaH_getint (Table *t, lua_Integer key, TValue *res) {
if (keyinarray(t, key)) {
lu_byte tag = *getArrTag(t, key - 1);
if (!tagisempty(tag))
farr2val(t, cast_uint(key) - 1, tag, res);
return tag;
}
else
return finishnodeget(getintfromhash(t, key), res);
}
/*
** search function for short strings
*/
const TValue *luaH_Hgetshortstr (Table *t, TString *key) {
Node *n = hashstr(t, key);
lua_assert(key->tt == LUA_VSHRSTR);
for (;;) { /* check whether 'key' is somewhere in the chain */
if (keyisshrstr(n) && eqshrstr(keystrval(n), key))
return gval(n); /* that's it */
else {
int nx = gnext(n);
if (nx == 0)
return &absentkey; /* not found */
n += nx;
}
}
}
lu_byte luaH_getshortstr (Table *t, TString *key, TValue *res) {
return finishnodeget(luaH_Hgetshortstr(t, key), res);
}
static const TValue *Hgetstr (Table *t, TString *key) {
if (key->tt == LUA_VSHRSTR)
return luaH_Hgetshortstr(t, key);
else { /* for long strings, use generic case */
TValue ko;
setsvalue(cast(lua_State *, NULL), &ko, key);
return getgeneric(t, &ko, 0);
}
}
lu_byte luaH_getstr (Table *t, TString *key, TValue *res) {
return finishnodeget(Hgetstr(t, key), res);
}
TString *luaH_getstrkey (Table *t, TString *key) {
const TValue *o = Hgetstr(t, key);
if (!isabstkey(o)) /* string already present? */
return keystrval(nodefromval(o)); /* get saved copy */
else
return NULL;
}
/*
** main search function
*/
lu_byte luaH_get (Table *t, const TValue *key, TValue *res) {
const TValue *slot;
switch (ttypetag(key)) {
case LUA_VSHRSTR:
slot = luaH_Hgetshortstr(t, tsvalue(key));
break;
case LUA_VNUMINT:
return luaH_getint(t, ivalue(key), res);
case LUA_VNIL:
slot = &absentkey;
break;
case LUA_VNUMFLT: {
lua_Integer k;
if (luaV_flttointeger(fltvalue(key), &k, F2Ieq)) /* integral index? */
return luaH_getint(t, k, res); /* use specialized version */
/* else... */
} /* FALLTHROUGH */
default:
slot = getgeneric(t, key, 0);
break;
}
return finishnodeget(slot, res);
}
static int finishnodeset (Table *t, const TValue *slot, TValue *val) {
if (!ttisnil(slot)) {
setobj(((lua_State*)NULL), cast(TValue*, slot), val);
return HOK; /* success */
}
else if (isabstkey(slot))
return HNOTFOUND; /* no slot with that key */
else /* return node encoded */
return cast_int((cast(Node*, slot) - t->node)) + HFIRSTNODE;
}
static int rawfinishnodeset (const TValue *slot, TValue *val) {
if (isabstkey(slot))
return 0; /* no slot with that key */
else {
setobj(((lua_State*)NULL), cast(TValue*, slot), val);
return 1; /* success */
}
}
int luaH_psetint (Table *t, lua_Integer key, TValue *val) {
if (keyinarray(t, key)) {
lu_byte *tag = getArrTag(t, key - 1);
if (!tagisempty(*tag) || checknoTM(t->metatable, TM_NEWINDEX)) {
fval2arr(t, cast_uint(key) - 1, tag, val);
return HOK; /* success */
}
else
return ~cast_int(key - 1); /* empty slot in the array part */
}
else
return finishnodeset(t, getintfromhash(t, key), val);
}
int luaH_psetshortstr (Table *t, TString *key, TValue *val) {
return finishnodeset(t, luaH_Hgetshortstr(t, key), val);
}
int luaH_psetstr (Table *t, TString *key, TValue *val) {
return finishnodeset(t, Hgetstr(t, key), val);
}
int luaH_pset (Table *t, const TValue *key, TValue *val) {
switch (ttypetag(key)) {
case LUA_VSHRSTR: return luaH_psetshortstr(t, tsvalue(key), val);
case LUA_VNUMINT: return luaH_psetint(t, ivalue(key), val);
case LUA_VNIL: return HNOTFOUND;
case LUA_VNUMFLT: {
lua_Integer k;
if (luaV_flttointeger(fltvalue(key), &k, F2Ieq)) /* integral index? */
return luaH_psetint(t, k, val); /* use specialized version */
/* else... */
} /* FALLTHROUGH */
default:
return finishnodeset(t, getgeneric(t, key, 0), val);
}
}
/*
** Finish a raw "set table" operation, where 'slot' is where the value
** should have been (the result of a previous "get table").
** Beware: when using this function you probably need to check a GC
** barrier and invalidate the TM cache.
*/
void luaH_finishset (lua_State *L, Table *t, const TValue *key,
TValue *value, int hres) {
lua_assert(hres != HOK);
if (hres == HNOTFOUND) {
luaH_newkey(L, t, key, value);
}
else if (hres > 0) { /* regular Node? */
setobj2t(L, gval(gnode(t, hres - HFIRSTNODE)), value);
}
else { /* array entry */
hres = ~hres; /* real index */
obj2arr(t, cast_uint(hres), value);
}
}
/*
** beware: when using this function you probably need to check a GC
** barrier and invalidate the TM cache.
*/
void luaH_set (lua_State *L, Table *t, const TValue *key, TValue *value) {
int hres = luaH_pset(t, key, value);
if (hres != HOK)
luaH_finishset(L, t, key, value, hres);
}
/*
** Ditto for a GC barrier. (No need to invalidate the TM cache, as
** integers cannot be keys to metamethods.)
*/
void luaH_setint (lua_State *L, Table *t, lua_Integer key, TValue *value) {
if (keyinarray(t, key))
obj2arr(t, cast_uint(key) - 1, value);
else {
int ok = rawfinishnodeset(getintfromhash(t, key), value);
if (!ok) {
TValue k;
setivalue(&k, key);
luaH_newkey(L, t, &k, value);
}
}
}
/*
** Try to find a boundary in the hash part of table 't'. From the
** caller, we know that 'j' is zero or present and that 'j + 1' is
** present. We want to find a larger key that is absent from the
** table, so that we can do a binary search between the two keys to
** find a boundary. We keep doubling 'j' until we get an absent index.
** If the doubling would overflow, we try LUA_MAXINTEGER. If it is
** absent, we are ready for the binary search. ('j', being max integer,
** is larger or equal to 'i', but it cannot be equal because it is
** absent while 'i' is present; so 'j > i'.) Otherwise, 'j' is a
** boundary. ('j + 1' cannot be a present integer key because it is
** not a valid integer in Lua.)
*/
static lua_Unsigned hash_search (Table *t, lua_Unsigned j) {
lua_Unsigned i;
if (j == 0) j++; /* the caller ensures 'j + 1' is present */
do {
i = j; /* 'i' is a present index */
if (j <= l_castS2U(LUA_MAXINTEGER) / 2)
j *= 2;
else {
j = LUA_MAXINTEGER;
if (hashkeyisempty(t, j)) /* t[j] not present? */
break; /* 'j' now is an absent index */
else /* weird case */
return j; /* well, max integer is a boundary... */
}
} while (!hashkeyisempty(t, j)); /* repeat until an absent t[j] */
/* i < j && t[i] present && t[j] absent */
while (j - i > 1u) { /* do a binary search between them */
lua_Unsigned m = (i + j) / 2;
if (hashkeyisempty(t, m)) j = m;
else i = m;
}
return i;
}
static unsigned int binsearch (Table *array, unsigned int i, unsigned int j) {
while (j - i > 1u) { /* binary search */
unsigned int m = (i + j) / 2;
if (arraykeyisempty(array, m)) j = m;
else i = m;
}
return i;
}
/*
** Try to find a boundary in table 't'. (A 'boundary' is an integer index
** such that t[i] is present and t[i+1] is absent, or 0 if t[1] is absent
** and 'maxinteger' if t[maxinteger] is present.)
** (In the next explanation, we use Lua indices, that is, with base 1.
** The code itself uses base 0 when indexing the array part of the table.)
** The code starts with 'limit = t->alimit', a position in the array
** part that may be a boundary.
**
** (1) If 't[limit]' is empty, there must be a boundary before it.
** As a common case (e.g., after 't[#t]=nil'), check whether 'limit-1'
** is present. If so, it is a boundary. Otherwise, do a binary search
** between 0 and limit to find a boundary. In both cases, try to
** use this boundary as the new 'alimit', as a hint for the next call.
**
** (2) If 't[limit]' is not empty and the array has more elements
** after 'limit', try to find a boundary there. Again, try first
** the special case (which should be quite frequent) where 'limit+1'
** is empty, so that 'limit' is a boundary. Otherwise, check the
** last element of the array part. If it is empty, there must be a
** boundary between the old limit (present) and the last element
** (absent), which is found with a binary search. (This boundary always
** can be a new limit.)
**
** (3) The last case is when there are no elements in the array part
** (limit == 0) or its last element (the new limit) is present.
** In this case, must check the hash part. If there is no hash part
** or 'limit+1' is absent, 'limit' is a boundary. Otherwise, call
** 'hash_search' to find a boundary in the hash part of the table.
** (In those cases, the boundary is not inside the array part, and
** therefore cannot be used as a new limit.)
*/
lua_Unsigned luaH_getn (Table *t) {
unsigned int limit = t->alimit;
if (limit > 0 && arraykeyisempty(t, limit)) { /* (1)? */
/* there must be a boundary before 'limit' */
if (limit >= 2 && !arraykeyisempty(t, limit - 1)) {
/* 'limit - 1' is a boundary; can it be a new limit? */
if (ispow2realasize(t) && !ispow2(limit - 1)) {
t->alimit = limit - 1;
setnorealasize(t); /* now 'alimit' is not the real size */
}
return limit - 1;
}
else { /* must search for a boundary in [0, limit] */
unsigned int boundary = binsearch(t, 0, limit);
/* can this boundary represent the real size of the array? */
if (ispow2realasize(t) && boundary > luaH_realasize(t) / 2) {
t->alimit = boundary; /* use it as the new limit */
setnorealasize(t);
}
return boundary;
}
}
/* 'limit' is zero or present in table */
if (!limitequalsasize(t)) { /* (2)? */
/* 'limit' > 0 and array has more elements after 'limit' */
if (arraykeyisempty(t, limit + 1)) /* 'limit + 1' is empty? */
return limit; /* this is the boundary */
/* else, try last element in the array */
limit = luaH_realasize(t);
if (arraykeyisempty(t, limit)) { /* empty? */
/* there must be a boundary in the array after old limit,
and it must be a valid new limit */
unsigned int boundary = binsearch(t, t->alimit, limit);
t->alimit = boundary;
return boundary;
}
/* else, new limit is present in the table; check the hash part */
}
/* (3) 'limit' is the last element and either is zero or present in table */
lua_assert(limit == luaH_realasize(t) &&
(limit == 0 || !arraykeyisempty(t, limit)));
if (isdummy(t) || hashkeyisempty(t, limit + 1))
return limit; /* 'limit + 1' is absent */
else /* 'limit + 1' is also present */
return hash_search(t, limit);
}
#if defined(LUA_DEBUG)
/* export these functions for the test library */
Node *luaH_mainposition (const Table *t, const TValue *key) {
return mainpositionTV(t, key);
}
#endif