postgres/contrib/ltree/_ltree_gist.c

668 lines
14 KiB
C

/*
* GiST support for ltree[]
* Teodor Sigaev <teodor@stack.net>
*/
#include "ltree.h"
#include "access/gist.h"
#include "access/rtree.h"
#include "access/nbtree.h"
#include "utils/array.h"
#include "crc32.h"
PG_FUNCTION_INFO_V1(_ltree_compress);
Datum _ltree_compress(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(_ltree_same);
Datum _ltree_same(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(_ltree_union);
Datum _ltree_union(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(_ltree_penalty);
Datum _ltree_penalty(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(_ltree_picksplit);
Datum _ltree_picksplit(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(_ltree_consistent);
Datum _ltree_consistent(PG_FUNCTION_ARGS);
#define GETENTRY(vec,pos) ((ltree_gist *) DatumGetPointer(((GISTENTRY *) VARDATA(vec))[(pos)].key))
#define NEXTVAL(x) ( (ltree*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) )
#define SUMBIT(val) ( \
GETBITBYTE(val,0) + \
GETBITBYTE(val,1) + \
GETBITBYTE(val,2) + \
GETBITBYTE(val,3) + \
GETBITBYTE(val,4) + \
GETBITBYTE(val,5) + \
GETBITBYTE(val,6) + \
GETBITBYTE(val,7) \
)
#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
static void
hashing(BITVECP sign, ltree * t)
{
int tlen = t->numlevel;
ltree_level *cur = LTREE_FIRST(t);
int hash;
while (tlen > 0)
{
hash = ltree_crc32_sz(cur->name, cur->len);
AHASH(sign, hash);
cur = LEVEL_NEXT(cur);
tlen--;
}
}
Datum
_ltree_compress(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
GISTENTRY *retval = entry;
if (entry->leafkey)
{ /* ltree */
ltree_gist *key;
ArrayType *val = DatumGetArrayTypeP(entry->key);
int4 len = LTG_HDRSIZE + ASIGLEN;
int num = ArrayGetNItems(ARR_NDIM(val), ARR_DIMS(val));
ltree *item = (ltree *) ARR_DATA_PTR(val);
if (ARR_NDIM(val) != 1)
elog(ERROR, "Dimension of array != 1");
key = (ltree_gist *) palloc(len);
key->len = len;
key->flag = 0;
MemSet(LTG_SIGN(key), 0, sizeof(ASIGLEN));
while (num > 0)
{
hashing(LTG_SIGN(key), item);
num--;
item = NEXTVAL(item);
}
if (PointerGetDatum(val) != entry->key)
pfree(val);
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
gistentryinit(*retval, PointerGetDatum(key),
entry->rel, entry->page,
entry->offset, key->len, FALSE);
}
else if ( !LTG_ISALLTRUE(entry->key) )
{
int4 i,
len;
ltree_gist *key;
BITVECP sign = LTG_SIGN(DatumGetPointer(entry->key));
ALOOPBYTE(
if ((sign[i]&0xff) != 0xff)
PG_RETURN_POINTER(retval);
);
len = LTG_HDRSIZE;
key = (ltree_gist *) palloc(len);
key->len = len;
key->flag = LTG_ALLTRUE;
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
gistentryinit(*retval, PointerGetDatum(key),
entry->rel, entry->page,
entry->offset, key->len, FALSE);
}
PG_RETURN_POINTER(retval);
}
Datum
_ltree_same(PG_FUNCTION_ARGS)
{
ltree_gist *a = (ltree_gist *) PG_GETARG_POINTER(0);
ltree_gist *b = (ltree_gist *) PG_GETARG_POINTER(1);
bool *result = (bool *) PG_GETARG_POINTER(2);
if (LTG_ISALLTRUE(a) && LTG_ISALLTRUE(b))
*result = true;
else if (LTG_ISALLTRUE(a))
*result = false;
else if (LTG_ISALLTRUE(b))
*result = false;
else
{
int4 i;
BITVECP sa = LTG_SIGN(a),
sb = LTG_SIGN(b);
*result = true;
ALOOPBYTE(
if (sa[i] != sb[i])
{
*result = false;
break;
}
);
}
PG_RETURN_POINTER(result);
}
static int4
unionkey(BITVECP sbase, ltree_gist * add)
{
int4 i;
BITVECP sadd = LTG_SIGN(add);
if (LTG_ISALLTRUE(add))
return 1;
ALOOPBYTE(
sbase[i] |= sadd[i];
);
return 0;
}
Datum
_ltree_union(PG_FUNCTION_ARGS)
{
bytea *entryvec = (bytea *) PG_GETARG_POINTER(0);
int *size = (int *) PG_GETARG_POINTER(1);
ABITVEC base;
int4 len = (VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY);
int4 i;
int4 flag = 0;
ltree_gist *result;
MemSet((void *) base, 0, sizeof(ABITVEC));
for (i = 0; i < len; i++)
{
if (unionkey(base, GETENTRY(entryvec, i)))
{
flag = LTG_ALLTRUE;
break;
}
}
len = LTG_HDRSIZE + ((flag & LTG_ALLTRUE) ? 0 : ASIGLEN);
result = (ltree_gist *) palloc(len);
*size = result->len = len;
result->flag = flag;
if (!LTG_ISALLTRUE(result))
memcpy((void *) LTG_SIGN(result), (void *) base, sizeof(ABITVEC));
PG_RETURN_POINTER(result);
}
static int4
sizebitvec(BITVECP sign)
{
int4 size = 0,
i;
ALOOPBYTE(
size += SUMBIT(*(char *) sign);
sign = (BITVECP) (((char *) sign) + 1);
);
return size;
}
Datum
_ltree_penalty(PG_FUNCTION_ARGS)
{
ltree_gist *origval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
ltree_gist *newval = (ltree_gist *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
float *penalty = (float *) PG_GETARG_POINTER(2);
BITVECP orig = LTG_SIGN(origval);
if (LTG_ISALLTRUE(origval))
{
*penalty = 0.1;
PG_RETURN_POINTER(penalty);
}
if (LTG_ISALLTRUE(newval))
*penalty = (float) (ASIGLENBIT - sizebitvec(orig));
else
{
unsigned char valtmp;
BITVECP nval = LTG_SIGN(newval);
int4 i,
unionsize = 0;
ALOOPBYTE(
valtmp = nval[i] | orig[i];
unionsize += SUMBIT(valtmp) - SUMBIT(orig[i]);
);
*penalty = (float) unionsize;
}
PG_RETURN_POINTER(penalty);
}
typedef struct
{
OffsetNumber pos;
int4 cost;
} SPLITCOST;
static int
comparecost(const void *a, const void *b)
{
return ((SPLITCOST *) a)->cost - ((SPLITCOST *) b)->cost;
}
Datum
_ltree_picksplit(PG_FUNCTION_ARGS)
{
bytea *entryvec = (bytea *) PG_GETARG_POINTER(0);
GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
OffsetNumber k,
j;
ltree_gist *datum_l,
*datum_r;
ABITVEC union_l,
union_r;
bool firsttime = true;
int4 size_alpha,
size_beta,
sizeu,
sizei;
int4 size_waste,
waste = 0.0;
int4 size_l,
size_r;
int4 nbytes;
OffsetNumber seed_1 = 0,
seed_2 = 0;
OffsetNumber *left,
*right;
OffsetNumber maxoff;
BITVECP ptra,
ptrb,
ptrc;
int i;
unsigned char valtmp;
SPLITCOST *costvector;
ltree_gist *_k,
*_j;
maxoff = ((VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY)) - 2;
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
v->spl_left = (OffsetNumber *) palloc(nbytes);
v->spl_right = (OffsetNumber *) palloc(nbytes);
for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
{
_k = GETENTRY(entryvec, k);
for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
{
_j = GETENTRY(entryvec, j);
if (LTG_ISALLTRUE(_k) || LTG_ISALLTRUE(_j))
{
sizeu = ASIGLENBIT;
if (LTG_ISALLTRUE(_k) && LTG_ISALLTRUE(_j))
sizei = ASIGLENBIT;
else
sizei = (LTG_ISALLTRUE(_k)) ?
sizebitvec(LTG_SIGN(_j)) : sizebitvec(LTG_SIGN(_k));
}
else
{
sizeu = sizei = 0;
ptra = LTG_SIGN(_j);
ptrb = LTG_SIGN(_k);
/* critical section for bench !!! */
#define COUNT(pos) do { \
if ( GETBITBYTE(*(char*)ptra,pos) ) { \
sizeu++; \
if ( GETBITBYTE(*(char*)ptrb, pos) ) \
sizei++; \
} else if ( GETBITBYTE(*(char*)ptrb, pos) ) \
sizeu++; \
} while(0)
ALOOPBYTE(
COUNT(0);
COUNT(1);
COUNT(2);
COUNT(3);
COUNT(4);
COUNT(5);
COUNT(6);
COUNT(7);
ptra = (BITVECP) (((char *) ptra) + 1);
ptrb = (BITVECP) (((char *) ptrb) + 1);
);
}
size_waste = sizeu - sizei;
if (size_waste > waste || firsttime)
{
waste = size_waste;
seed_1 = k;
seed_2 = j;
firsttime = false;
}
}
}
left = v->spl_left;
v->spl_nleft = 0;
right = v->spl_right;
v->spl_nright = 0;
if (seed_1 == 0 || seed_2 == 0)
{
seed_1 = 1;
seed_2 = 2;
}
/* form initial .. */
if (LTG_ISALLTRUE(GETENTRY(entryvec, seed_1)))
{
datum_l = (ltree_gist *) palloc(LTG_HDRSIZE);
datum_l->len = LTG_HDRSIZE;
datum_l->flag = LTG_ALLTRUE;
size_l = ASIGLENBIT;
}
else
{
datum_l = (ltree_gist *) palloc(LTG_HDRSIZE + ASIGLEN);
datum_l->len = LTG_HDRSIZE + ASIGLEN;
datum_l->flag = 0;
memcpy((void *) LTG_SIGN(datum_l), (void *) LTG_SIGN(GETENTRY(entryvec, seed_1)), sizeof(ABITVEC));
size_l = sizebitvec(LTG_SIGN(datum_l));
}
if (LTG_ISALLTRUE(GETENTRY(entryvec, seed_2)))
{
datum_r = (ltree_gist *) palloc(LTG_HDRSIZE);
datum_r->len = LTG_HDRSIZE;
datum_r->flag = LTG_ALLTRUE;
size_r = ASIGLENBIT;
}
else
{
datum_r = (ltree_gist *) palloc(LTG_HDRSIZE + ASIGLEN);
datum_r->len = LTG_HDRSIZE + ASIGLEN;
datum_r->flag = 0;
memcpy((void *) LTG_SIGN(datum_r), (void *) LTG_SIGN(GETENTRY(entryvec, seed_2)), sizeof(ABITVEC));
size_r = sizebitvec(LTG_SIGN(datum_r));
}
maxoff = OffsetNumberNext(maxoff);
/* sort before ... */
costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
{
costvector[j - 1].pos = j;
_j = GETENTRY(entryvec, j);
if (LTG_ISALLTRUE(_j))
{
size_alpha = ASIGLENBIT - size_l;
size_beta = ASIGLENBIT - size_r;
}
else
{
ptra = LTG_SIGN(datum_l);
ptrb = LTG_SIGN(datum_r);
ptrc = LTG_SIGN(_j);
size_beta = size_alpha = 0;
if (LTG_ISALLTRUE(datum_l))
{
if (!LTG_ISALLTRUE(datum_r))
{
ALOOPBIT(
if (GETBIT(ptrc, i) && !GETBIT(ptrb, i))
size_beta++;
);
}
}
else if (LTG_ISALLTRUE(datum_r))
{
if (!LTG_ISALLTRUE(datum_l))
{
ALOOPBIT(
if (GETBIT(ptrc, i) && !GETBIT(ptra, i))
size_alpha++;
);
}
}
else
{
ALOOPBIT(
if (GETBIT(ptrc, i) && !GETBIT(ptra, i))
size_alpha++;
if (GETBIT(ptrc, i) && !GETBIT(ptrb, i))
size_beta++;
);
}
}
costvector[j - 1].cost = abs(size_alpha - size_beta);
}
qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
for (k = 0; k < maxoff; k++)
{
j = costvector[k].pos;
_j = GETENTRY(entryvec, j);
if (j == seed_1)
{
*left++ = j;
v->spl_nleft++;
continue;
}
else if (j == seed_2)
{
*right++ = j;
v->spl_nright++;
continue;
}
if (LTG_ISALLTRUE(datum_l) || LTG_ISALLTRUE(_j))
size_alpha = ASIGLENBIT;
else
{
ptra = LTG_SIGN(_j);
ptrb = LTG_SIGN(datum_l);
size_alpha = 0;
ALOOPBYTE(
valtmp = union_l[i] = ptra[i] | ptrb[i];
size_alpha += SUMBIT(valtmp);
);
}
if (LTG_ISALLTRUE(datum_r) || LTG_ISALLTRUE(_j))
size_beta = ASIGLENBIT;
else
{
ptra = LTG_SIGN(_j);
ptrb = LTG_SIGN(datum_r);
size_beta = 0;
ALOOPBYTE(
valtmp = union_r[i] = ptra[i] | ptrb[i];
size_beta += SUMBIT(valtmp);
);
}
if (size_alpha - size_l < size_beta - size_r + WISH_F(v->spl_nleft, v->spl_nright, 0.00001))
{
if (!LTG_ISALLTRUE(datum_l))
{
if (size_alpha == ASIGLENBIT)
{
if (size_alpha != size_l)
MemSet((void *) LTG_SIGN(datum_l), 0xff, sizeof(ABITVEC));
}
else
memcpy((void *) LTG_SIGN(datum_l), (void *) union_l, sizeof(ABITVEC));
}
size_l = size_alpha;
*left++ = j;
v->spl_nleft++;
}
else
{
if (!LTG_ISALLTRUE(datum_r))
{
if (size_beta == ASIGLENBIT)
{
if (size_beta != size_r)
MemSet((void *) LTG_SIGN(datum_r), 0xff, sizeof(ABITVEC));
}
else
memcpy((void *) LTG_SIGN(datum_r), (void *) union_r, sizeof(ABITVEC));
}
size_r = size_beta;
*right++ = j;
v->spl_nright++;
}
}
*right = *left = FirstOffsetNumber;
pfree(costvector);
v->spl_ldatum = PointerGetDatum(datum_l);
v->spl_rdatum = PointerGetDatum(datum_r);
PG_RETURN_POINTER(v);
}
static bool
gist_te(ltree_gist * key, ltree * query)
{
ltree_level *curq = LTREE_FIRST(query);
BITVECP sign = LTG_SIGN(key);
int qlen = query->numlevel;
unsigned int hv;
if (LTG_ISALLTRUE(key))
return true;
while (qlen > 0)
{
hv = ltree_crc32_sz(curq->name, curq->len);
if (!GETBIT(sign, AHASHVAL(hv)))
return false;
curq = LEVEL_NEXT(curq);
qlen--;
}
return true;
}
static bool
checkcondition_bit(void *checkval, ITEM * val)
{
return (FLG_CANLOOKSIGN(val->flag)) ? GETBIT(checkval, AHASHVAL(val->val)) : true;
}
static bool
gist_qtxt(ltree_gist * key, ltxtquery * query)
{
if (LTG_ISALLTRUE(key))
return true;
return ltree_execute(
GETQUERY(query),
(void *) LTG_SIGN(key), false,
checkcondition_bit
);
}
static bool
gist_qe(ltree_gist * key, lquery * query)
{
lquery_level *curq = LQUERY_FIRST(query);
BITVECP sign = LTG_SIGN(key);
int qlen = query->numlevel;
if (LTG_ISALLTRUE(key))
return true;
while (qlen > 0)
{
if (curq->numvar && LQL_CANLOOKSIGN(curq))
{
bool isexist = false;
int vlen = curq->numvar;
lquery_variant *curv = LQL_FIRST(curq);
while (vlen > 0)
{
if (GETBIT(sign, AHASHVAL(curv->val)))
{
isexist = true;
break;
}
curv = LVAR_NEXT(curv);
vlen--;
}
if (!isexist)
return false;
}
curq = LQL_NEXT(curq);
qlen--;
}
return true;
}
static bool
_arrq_cons(ltree_gist *key, ArrayType *_query) {
lquery *query = (lquery *) ARR_DATA_PTR(_query);
int num = ArrayGetNItems(ARR_NDIM(_query), ARR_DIMS(_query));
if (ARR_NDIM(_query) != 1)
elog(ERROR, "Dimension of array != 1");
while (num > 0) {
if ( gist_qe(key, query) )
return true;
num--;
query = (lquery*)NEXTVAL(query);
}
return false;
}
Datum
_ltree_consistent(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
char *query = (char *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
ltree_gist *key = (ltree_gist *) DatumGetPointer(entry->key);
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
bool res = false;
#ifndef assert_enabled
#define assert_enabled 0
#endif
switch (strategy)
{
case 10:
case 11:
res = gist_te(key, (ltree *) query);
break;
case 12:
case 13:
res = gist_qe(key, (lquery *) query);
break;
case 14:
case 15:
res = gist_qtxt(key, (ltxtquery *) query);
break;
case 16:
case 17:
res = _arrq_cons(key, (ArrayType *) query);
break;
default:
elog(ERROR, "Unknown StrategyNumber: %d", strategy);
}
PG_RETURN_BOOL(res);
}