
Commit 6b80394781 introduced integer comparison functions designed to be as efficient as possible while avoiding overflow. This commit makes use of these functions in many of the in-tree qsort() comparators to help ensure transitivity. Many of these comparator functions should also see a small performance boost. Author: Mats Kindahl Reviewed-by: Andres Freund, Fabrízio de Royes Mello Discussion: https://postgr.es/m/CA%2B14426g2Wa9QuUpmakwPxXFWG_1FaY0AsApkvcTBy-YfS6uaw%40mail.gmail.com
277 lines
6.1 KiB
C
277 lines
6.1 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* tsquery_gist.c
|
|
* GiST index support for tsquery
|
|
*
|
|
* Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/utils/adt/tsquery_gist.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "access/gist.h"
|
|
#include "access/stratnum.h"
|
|
#include "common/int.h"
|
|
#include "tsearch/ts_utils.h"
|
|
#include "utils/builtins.h"
|
|
|
|
#define GETENTRY(vec,pos) DatumGetTSQuerySign((vec)->vector[pos].key)
|
|
|
|
|
|
Datum
|
|
gtsquery_compress(PG_FUNCTION_ARGS)
|
|
{
|
|
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
|
|
GISTENTRY *retval = entry;
|
|
|
|
if (entry->leafkey)
|
|
{
|
|
TSQuerySign sign;
|
|
|
|
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
|
|
sign = makeTSQuerySign(DatumGetTSQuery(entry->key));
|
|
|
|
gistentryinit(*retval, TSQuerySignGetDatum(sign),
|
|
entry->rel, entry->page,
|
|
entry->offset, false);
|
|
}
|
|
|
|
PG_RETURN_POINTER(retval);
|
|
}
|
|
|
|
/*
|
|
* We do not need a decompress function, because the other gtsquery
|
|
* support functions work with the compressed representation.
|
|
*/
|
|
|
|
Datum
|
|
gtsquery_consistent(PG_FUNCTION_ARGS)
|
|
{
|
|
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
|
|
TSQuery query = PG_GETARG_TSQUERY(1);
|
|
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
|
|
|
|
/* Oid subtype = PG_GETARG_OID(3); */
|
|
bool *recheck = (bool *) PG_GETARG_POINTER(4);
|
|
TSQuerySign key = DatumGetTSQuerySign(entry->key);
|
|
TSQuerySign sq = makeTSQuerySign(query);
|
|
bool retval;
|
|
|
|
/* All cases served by this function are inexact */
|
|
*recheck = true;
|
|
|
|
switch (strategy)
|
|
{
|
|
case RTContainsStrategyNumber:
|
|
if (GIST_LEAF(entry))
|
|
retval = (key & sq) == sq;
|
|
else
|
|
retval = (key & sq) != 0;
|
|
break;
|
|
case RTContainedByStrategyNumber:
|
|
if (GIST_LEAF(entry))
|
|
retval = (key & sq) == key;
|
|
else
|
|
retval = (key & sq) != 0;
|
|
break;
|
|
default:
|
|
retval = false;
|
|
}
|
|
PG_RETURN_BOOL(retval);
|
|
}
|
|
|
|
Datum
|
|
gtsquery_union(PG_FUNCTION_ARGS)
|
|
{
|
|
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
|
|
int *size = (int *) PG_GETARG_POINTER(1);
|
|
TSQuerySign sign;
|
|
int i;
|
|
|
|
sign = 0;
|
|
|
|
for (i = 0; i < entryvec->n; i++)
|
|
sign |= GETENTRY(entryvec, i);
|
|
|
|
*size = sizeof(TSQuerySign);
|
|
|
|
PG_RETURN_TSQUERYSIGN(sign);
|
|
}
|
|
|
|
Datum
|
|
gtsquery_same(PG_FUNCTION_ARGS)
|
|
{
|
|
TSQuerySign a = PG_GETARG_TSQUERYSIGN(0);
|
|
TSQuerySign b = PG_GETARG_TSQUERYSIGN(1);
|
|
bool *result = (bool *) PG_GETARG_POINTER(2);
|
|
|
|
*result = (a == b);
|
|
|
|
PG_RETURN_POINTER(result);
|
|
}
|
|
|
|
static int
|
|
sizebitvec(TSQuerySign sign)
|
|
{
|
|
int size = 0,
|
|
i;
|
|
|
|
for (i = 0; i < TSQS_SIGLEN; i++)
|
|
size += 0x01 & (sign >> i);
|
|
|
|
return size;
|
|
}
|
|
|
|
static int
|
|
hemdist(TSQuerySign a, TSQuerySign b)
|
|
{
|
|
TSQuerySign res = a ^ b;
|
|
|
|
return sizebitvec(res);
|
|
}
|
|
|
|
Datum
|
|
gtsquery_penalty(PG_FUNCTION_ARGS)
|
|
{
|
|
TSQuerySign origval = DatumGetTSQuerySign(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
|
|
TSQuerySign newval = DatumGetTSQuerySign(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
|
|
float *penalty = (float *) PG_GETARG_POINTER(2);
|
|
|
|
*penalty = hemdist(origval, newval);
|
|
|
|
PG_RETURN_POINTER(penalty);
|
|
}
|
|
|
|
|
|
typedef struct
|
|
{
|
|
OffsetNumber pos;
|
|
int32 cost;
|
|
} SPLITCOST;
|
|
|
|
static int
|
|
comparecost(const void *a, const void *b)
|
|
{
|
|
return pg_cmp_s32(((const SPLITCOST *) a)->cost,
|
|
((const SPLITCOST *) b)->cost);
|
|
}
|
|
|
|
#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
|
|
|
|
Datum
|
|
gtsquery_picksplit(PG_FUNCTION_ARGS)
|
|
{
|
|
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
|
|
GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
|
|
OffsetNumber maxoff = entryvec->n - 2;
|
|
OffsetNumber k,
|
|
j;
|
|
TSQuerySign datum_l,
|
|
datum_r;
|
|
int32 size_alpha,
|
|
size_beta;
|
|
int32 size_waste,
|
|
waste = -1;
|
|
int32 nbytes;
|
|
OffsetNumber seed_1 = 0,
|
|
seed_2 = 0;
|
|
OffsetNumber *left,
|
|
*right;
|
|
|
|
SPLITCOST *costvector;
|
|
|
|
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
|
|
left = v->spl_left = (OffsetNumber *) palloc(nbytes);
|
|
right = v->spl_right = (OffsetNumber *) palloc(nbytes);
|
|
v->spl_nleft = v->spl_nright = 0;
|
|
|
|
for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
|
|
for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
|
|
{
|
|
size_waste = hemdist(GETENTRY(entryvec, j), GETENTRY(entryvec, k));
|
|
if (size_waste > waste)
|
|
{
|
|
waste = size_waste;
|
|
seed_1 = k;
|
|
seed_2 = j;
|
|
}
|
|
}
|
|
|
|
|
|
if (seed_1 == 0 || seed_2 == 0)
|
|
{
|
|
seed_1 = 1;
|
|
seed_2 = 2;
|
|
}
|
|
|
|
datum_l = GETENTRY(entryvec, seed_1);
|
|
datum_r = GETENTRY(entryvec, seed_2);
|
|
|
|
maxoff = OffsetNumberNext(maxoff);
|
|
costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
|
|
for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
|
|
{
|
|
costvector[j - 1].pos = j;
|
|
size_alpha = hemdist(GETENTRY(entryvec, seed_1), GETENTRY(entryvec, j));
|
|
size_beta = hemdist(GETENTRY(entryvec, seed_2), GETENTRY(entryvec, j));
|
|
costvector[j - 1].cost = abs(size_alpha - size_beta);
|
|
}
|
|
qsort(costvector, maxoff, sizeof(SPLITCOST), comparecost);
|
|
|
|
for (k = 0; k < maxoff; k++)
|
|
{
|
|
j = costvector[k].pos;
|
|
if (j == seed_1)
|
|
{
|
|
*left++ = j;
|
|
v->spl_nleft++;
|
|
continue;
|
|
}
|
|
else if (j == seed_2)
|
|
{
|
|
*right++ = j;
|
|
v->spl_nright++;
|
|
continue;
|
|
}
|
|
size_alpha = hemdist(datum_l, GETENTRY(entryvec, j));
|
|
size_beta = hemdist(datum_r, GETENTRY(entryvec, j));
|
|
|
|
if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.05))
|
|
{
|
|
datum_l |= GETENTRY(entryvec, j);
|
|
*left++ = j;
|
|
v->spl_nleft++;
|
|
}
|
|
else
|
|
{
|
|
datum_r |= GETENTRY(entryvec, j);
|
|
*right++ = j;
|
|
v->spl_nright++;
|
|
}
|
|
}
|
|
|
|
*right = *left = FirstOffsetNumber;
|
|
v->spl_ldatum = TSQuerySignGetDatum(datum_l);
|
|
v->spl_rdatum = TSQuerySignGetDatum(datum_r);
|
|
|
|
PG_RETURN_POINTER(v);
|
|
}
|
|
|
|
/*
|
|
* Formerly, gtsquery_consistent was declared in pg_proc.h with arguments
|
|
* that did not match the documented conventions for GiST support functions.
|
|
* We fixed that, but we still need a pg_proc entry with the old signature
|
|
* to support reloading pre-9.6 contrib/tsearch2 opclass declarations.
|
|
* This compatibility function should go away eventually.
|
|
*/
|
|
Datum
|
|
gtsquery_consistent_oldsig(PG_FUNCTION_ARGS)
|
|
{
|
|
return gtsquery_consistent(fcinfo);
|
|
}
|