postgres/src/backend/utils/adt/tsquery_gist.c
Nathan Bossart 3b42bdb471 Use new overflow-safe integer comparison functions.
Commit 6b80394781 introduced integer comparison functions designed
to be as efficient as possible while avoiding overflow.  This
commit makes use of these functions in many of the in-tree qsort()
comparators to help ensure transitivity.  Many of these comparator
functions should also see a small performance boost.

Author: Mats Kindahl
Reviewed-by: Andres Freund, Fabrízio de Royes Mello
Discussion: https://postgr.es/m/CA%2B14426g2Wa9QuUpmakwPxXFWG_1FaY0AsApkvcTBy-YfS6uaw%40mail.gmail.com
2024-02-16 14:05:36 -06:00

277 lines
6.1 KiB
C

/*-------------------------------------------------------------------------
*
* tsquery_gist.c
* GiST index support for tsquery
*
* Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* src/backend/utils/adt/tsquery_gist.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/gist.h"
#include "access/stratnum.h"
#include "common/int.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
#define GETENTRY(vec,pos) DatumGetTSQuerySign((vec)->vector[pos].key)
Datum
gtsquery_compress(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
GISTENTRY *retval = entry;
if (entry->leafkey)
{
TSQuerySign sign;
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
sign = makeTSQuerySign(DatumGetTSQuery(entry->key));
gistentryinit(*retval, TSQuerySignGetDatum(sign),
entry->rel, entry->page,
entry->offset, false);
}
PG_RETURN_POINTER(retval);
}
/*
* We do not need a decompress function, because the other gtsquery
* support functions work with the compressed representation.
*/
Datum
gtsquery_consistent(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
TSQuery query = PG_GETARG_TSQUERY(1);
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
/* Oid subtype = PG_GETARG_OID(3); */
bool *recheck = (bool *) PG_GETARG_POINTER(4);
TSQuerySign key = DatumGetTSQuerySign(entry->key);
TSQuerySign sq = makeTSQuerySign(query);
bool retval;
/* All cases served by this function are inexact */
*recheck = true;
switch (strategy)
{
case RTContainsStrategyNumber:
if (GIST_LEAF(entry))
retval = (key & sq) == sq;
else
retval = (key & sq) != 0;
break;
case RTContainedByStrategyNumber:
if (GIST_LEAF(entry))
retval = (key & sq) == key;
else
retval = (key & sq) != 0;
break;
default:
retval = false;
}
PG_RETURN_BOOL(retval);
}
Datum
gtsquery_union(PG_FUNCTION_ARGS)
{
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
int *size = (int *) PG_GETARG_POINTER(1);
TSQuerySign sign;
int i;
sign = 0;
for (i = 0; i < entryvec->n; i++)
sign |= GETENTRY(entryvec, i);
*size = sizeof(TSQuerySign);
PG_RETURN_TSQUERYSIGN(sign);
}
Datum
gtsquery_same(PG_FUNCTION_ARGS)
{
TSQuerySign a = PG_GETARG_TSQUERYSIGN(0);
TSQuerySign b = PG_GETARG_TSQUERYSIGN(1);
bool *result = (bool *) PG_GETARG_POINTER(2);
*result = (a == b);
PG_RETURN_POINTER(result);
}
static int
sizebitvec(TSQuerySign sign)
{
int size = 0,
i;
for (i = 0; i < TSQS_SIGLEN; i++)
size += 0x01 & (sign >> i);
return size;
}
static int
hemdist(TSQuerySign a, TSQuerySign b)
{
TSQuerySign res = a ^ b;
return sizebitvec(res);
}
Datum
gtsquery_penalty(PG_FUNCTION_ARGS)
{
TSQuerySign origval = DatumGetTSQuerySign(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
TSQuerySign newval = DatumGetTSQuerySign(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
float *penalty = (float *) PG_GETARG_POINTER(2);
*penalty = hemdist(origval, newval);
PG_RETURN_POINTER(penalty);
}
typedef struct
{
OffsetNumber pos;
int32 cost;
} SPLITCOST;
static int
comparecost(const void *a, const void *b)
{
return pg_cmp_s32(((const SPLITCOST *) a)->cost,
((const SPLITCOST *) b)->cost);
}
#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
Datum
gtsquery_picksplit(PG_FUNCTION_ARGS)
{
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
OffsetNumber maxoff = entryvec->n - 2;
OffsetNumber k,
j;
TSQuerySign datum_l,
datum_r;
int32 size_alpha,
size_beta;
int32 size_waste,
waste = -1;
int32 nbytes;
OffsetNumber seed_1 = 0,
seed_2 = 0;
OffsetNumber *left,
*right;
SPLITCOST *costvector;
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
left = v->spl_left = (OffsetNumber *) palloc(nbytes);
right = v->spl_right = (OffsetNumber *) palloc(nbytes);
v->spl_nleft = v->spl_nright = 0;
for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
{
size_waste = hemdist(GETENTRY(entryvec, j), GETENTRY(entryvec, k));
if (size_waste > waste)
{
waste = size_waste;
seed_1 = k;
seed_2 = j;
}
}
if (seed_1 == 0 || seed_2 == 0)
{
seed_1 = 1;
seed_2 = 2;
}
datum_l = GETENTRY(entryvec, seed_1);
datum_r = GETENTRY(entryvec, seed_2);
maxoff = OffsetNumberNext(maxoff);
costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
{
costvector[j - 1].pos = j;
size_alpha = hemdist(GETENTRY(entryvec, seed_1), GETENTRY(entryvec, j));
size_beta = hemdist(GETENTRY(entryvec, seed_2), GETENTRY(entryvec, j));
costvector[j - 1].cost = abs(size_alpha - size_beta);
}
qsort(costvector, maxoff, sizeof(SPLITCOST), comparecost);
for (k = 0; k < maxoff; k++)
{
j = costvector[k].pos;
if (j == seed_1)
{
*left++ = j;
v->spl_nleft++;
continue;
}
else if (j == seed_2)
{
*right++ = j;
v->spl_nright++;
continue;
}
size_alpha = hemdist(datum_l, GETENTRY(entryvec, j));
size_beta = hemdist(datum_r, GETENTRY(entryvec, j));
if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.05))
{
datum_l |= GETENTRY(entryvec, j);
*left++ = j;
v->spl_nleft++;
}
else
{
datum_r |= GETENTRY(entryvec, j);
*right++ = j;
v->spl_nright++;
}
}
*right = *left = FirstOffsetNumber;
v->spl_ldatum = TSQuerySignGetDatum(datum_l);
v->spl_rdatum = TSQuerySignGetDatum(datum_r);
PG_RETURN_POINTER(v);
}
/*
* Formerly, gtsquery_consistent was declared in pg_proc.h with arguments
* that did not match the documented conventions for GiST support functions.
* We fixed that, but we still need a pg_proc entry with the old signature
* to support reloading pre-9.6 contrib/tsearch2 opclass declarations.
* This compatibility function should go away eventually.
*/
Datum
gtsquery_consistent_oldsig(PG_FUNCTION_ARGS)
{
return gtsquery_consistent(fcinfo);
}