
A few places are not converted. Some because they are tackled in later commits (e.g. hio.c, xlogutils.c), some because they are more complicated (e.g. brin_pageops.c). Having a few users of ReadBuffer(P_NEW) is good anyway, to ensure the backward compat path stays working. Discussion: https://postgr.es/m/20221029025420.eplyow6k7tgu6he3@awork3.anarazel.de
1342 lines
37 KiB
C
1342 lines
37 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* spgutils.c
|
|
* various support functions for SP-GiST
|
|
*
|
|
*
|
|
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/access/spgist/spgutils.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "access/amvalidate.h"
|
|
#include "access/htup_details.h"
|
|
#include "access/reloptions.h"
|
|
#include "access/spgist_private.h"
|
|
#include "access/toast_compression.h"
|
|
#include "access/transam.h"
|
|
#include "access/xact.h"
|
|
#include "catalog/pg_amop.h"
|
|
#include "commands/vacuum.h"
|
|
#include "nodes/nodeFuncs.h"
|
|
#include "parser/parse_coerce.h"
|
|
#include "storage/bufmgr.h"
|
|
#include "storage/indexfsm.h"
|
|
#include "storage/lmgr.h"
|
|
#include "utils/builtins.h"
|
|
#include "utils/catcache.h"
|
|
#include "utils/index_selfuncs.h"
|
|
#include "utils/lsyscache.h"
|
|
#include "utils/syscache.h"
|
|
|
|
|
|
/*
|
|
* SP-GiST handler function: return IndexAmRoutine with access method parameters
|
|
* and callbacks.
|
|
*/
|
|
Datum
|
|
spghandler(PG_FUNCTION_ARGS)
|
|
{
|
|
IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
|
|
|
|
amroutine->amstrategies = 0;
|
|
amroutine->amsupport = SPGISTNProc;
|
|
amroutine->amoptsprocnum = SPGIST_OPTIONS_PROC;
|
|
amroutine->amcanorder = false;
|
|
amroutine->amcanorderbyop = true;
|
|
amroutine->amcanbackward = false;
|
|
amroutine->amcanunique = false;
|
|
amroutine->amcanmulticol = false;
|
|
amroutine->amoptionalkey = true;
|
|
amroutine->amsearcharray = false;
|
|
amroutine->amsearchnulls = true;
|
|
amroutine->amstorage = true;
|
|
amroutine->amclusterable = false;
|
|
amroutine->ampredlocks = false;
|
|
amroutine->amcanparallel = false;
|
|
amroutine->amcaninclude = true;
|
|
amroutine->amusemaintenanceworkmem = false;
|
|
amroutine->amsummarizing = false;
|
|
amroutine->amparallelvacuumoptions =
|
|
VACUUM_OPTION_PARALLEL_BULKDEL | VACUUM_OPTION_PARALLEL_COND_CLEANUP;
|
|
amroutine->amkeytype = InvalidOid;
|
|
|
|
amroutine->ambuild = spgbuild;
|
|
amroutine->ambuildempty = spgbuildempty;
|
|
amroutine->aminsert = spginsert;
|
|
amroutine->ambulkdelete = spgbulkdelete;
|
|
amroutine->amvacuumcleanup = spgvacuumcleanup;
|
|
amroutine->amcanreturn = spgcanreturn;
|
|
amroutine->amcostestimate = spgcostestimate;
|
|
amroutine->amoptions = spgoptions;
|
|
amroutine->amproperty = spgproperty;
|
|
amroutine->ambuildphasename = NULL;
|
|
amroutine->amvalidate = spgvalidate;
|
|
amroutine->amadjustmembers = spgadjustmembers;
|
|
amroutine->ambeginscan = spgbeginscan;
|
|
amroutine->amrescan = spgrescan;
|
|
amroutine->amgettuple = spggettuple;
|
|
amroutine->amgetbitmap = spggetbitmap;
|
|
amroutine->amendscan = spgendscan;
|
|
amroutine->ammarkpos = NULL;
|
|
amroutine->amrestrpos = NULL;
|
|
amroutine->amestimateparallelscan = NULL;
|
|
amroutine->aminitparallelscan = NULL;
|
|
amroutine->amparallelrescan = NULL;
|
|
|
|
PG_RETURN_POINTER(amroutine);
|
|
}
|
|
|
|
/*
|
|
* GetIndexInputType
|
|
* Determine the nominal input data type for an index column
|
|
*
|
|
* We define the "nominal" input type as the associated opclass's opcintype,
|
|
* or if that is a polymorphic type, the base type of the heap column or
|
|
* expression that is the index's input. The reason for preferring the
|
|
* opcintype is that non-polymorphic opclasses probably don't want to hear
|
|
* about binary-compatible input types. For instance, if a text opclass
|
|
* is being used with a varchar heap column, we want to report "text" not
|
|
* "varchar". Likewise, opclasses don't want to hear about domain types,
|
|
* so if we do consult the actual input type, we make sure to flatten domains.
|
|
*
|
|
* At some point maybe this should go somewhere else, but it's not clear
|
|
* if any other index AMs have a use for it.
|
|
*/
|
|
static Oid
|
|
GetIndexInputType(Relation index, AttrNumber indexcol)
|
|
{
|
|
Oid opcintype;
|
|
AttrNumber heapcol;
|
|
List *indexprs;
|
|
ListCell *indexpr_item;
|
|
|
|
Assert(index->rd_index != NULL);
|
|
Assert(indexcol > 0 && indexcol <= index->rd_index->indnkeyatts);
|
|
opcintype = index->rd_opcintype[indexcol - 1];
|
|
if (!IsPolymorphicType(opcintype))
|
|
return opcintype;
|
|
heapcol = index->rd_index->indkey.values[indexcol - 1];
|
|
if (heapcol != 0) /* Simple index column? */
|
|
return getBaseType(get_atttype(index->rd_index->indrelid, heapcol));
|
|
|
|
/*
|
|
* If the index expressions are already cached, skip calling
|
|
* RelationGetIndexExpressions, as it will make a copy which is overkill.
|
|
* We're not going to modify the trees, and we're not going to do anything
|
|
* that would invalidate the relcache entry before we're done.
|
|
*/
|
|
if (index->rd_indexprs)
|
|
indexprs = index->rd_indexprs;
|
|
else
|
|
indexprs = RelationGetIndexExpressions(index);
|
|
indexpr_item = list_head(indexprs);
|
|
for (int i = 1; i <= index->rd_index->indnkeyatts; i++)
|
|
{
|
|
if (index->rd_index->indkey.values[i - 1] == 0)
|
|
{
|
|
/* expression column */
|
|
if (indexpr_item == NULL)
|
|
elog(ERROR, "wrong number of index expressions");
|
|
if (i == indexcol)
|
|
return getBaseType(exprType((Node *) lfirst(indexpr_item)));
|
|
indexpr_item = lnext(indexprs, indexpr_item);
|
|
}
|
|
}
|
|
elog(ERROR, "wrong number of index expressions");
|
|
return InvalidOid; /* keep compiler quiet */
|
|
}
|
|
|
|
/* Fill in a SpGistTypeDesc struct with info about the specified data type */
|
|
static void
|
|
fillTypeDesc(SpGistTypeDesc *desc, Oid type)
|
|
{
|
|
HeapTuple tp;
|
|
Form_pg_type typtup;
|
|
|
|
desc->type = type;
|
|
tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(type));
|
|
if (!HeapTupleIsValid(tp))
|
|
elog(ERROR, "cache lookup failed for type %u", type);
|
|
typtup = (Form_pg_type) GETSTRUCT(tp);
|
|
desc->attlen = typtup->typlen;
|
|
desc->attbyval = typtup->typbyval;
|
|
desc->attalign = typtup->typalign;
|
|
desc->attstorage = typtup->typstorage;
|
|
ReleaseSysCache(tp);
|
|
}
|
|
|
|
/*
|
|
* Fetch local cache of AM-specific info about the index, initializing it
|
|
* if necessary
|
|
*/
|
|
SpGistCache *
|
|
spgGetCache(Relation index)
|
|
{
|
|
SpGistCache *cache;
|
|
|
|
if (index->rd_amcache == NULL)
|
|
{
|
|
Oid atttype;
|
|
spgConfigIn in;
|
|
FmgrInfo *procinfo;
|
|
Buffer metabuffer;
|
|
SpGistMetaPageData *metadata;
|
|
|
|
cache = MemoryContextAllocZero(index->rd_indexcxt,
|
|
sizeof(SpGistCache));
|
|
|
|
/* SPGiST must have one key column and can also have INCLUDE columns */
|
|
Assert(IndexRelationGetNumberOfKeyAttributes(index) == 1);
|
|
Assert(IndexRelationGetNumberOfAttributes(index) <= INDEX_MAX_KEYS);
|
|
|
|
/*
|
|
* Get the actual (well, nominal) data type of the key column. We
|
|
* pass this to the opclass config function so that polymorphic
|
|
* opclasses are possible.
|
|
*/
|
|
atttype = GetIndexInputType(index, spgKeyColumn + 1);
|
|
|
|
/* Call the config function to get config info for the opclass */
|
|
in.attType = atttype;
|
|
|
|
procinfo = index_getprocinfo(index, 1, SPGIST_CONFIG_PROC);
|
|
FunctionCall2Coll(procinfo,
|
|
index->rd_indcollation[spgKeyColumn],
|
|
PointerGetDatum(&in),
|
|
PointerGetDatum(&cache->config));
|
|
|
|
/*
|
|
* If leafType isn't specified, use the declared index column type,
|
|
* which index.c will have derived from the opclass's opcintype.
|
|
* (Although we now make spgvalidate.c warn if these aren't the same,
|
|
* old user-defined opclasses may not set the STORAGE parameter
|
|
* correctly, so believe leafType if it's given.)
|
|
*/
|
|
if (!OidIsValid(cache->config.leafType))
|
|
{
|
|
cache->config.leafType =
|
|
TupleDescAttr(RelationGetDescr(index), spgKeyColumn)->atttypid;
|
|
|
|
/*
|
|
* If index column type is binary-coercible to atttype (for
|
|
* example, it's a domain over atttype), treat it as plain atttype
|
|
* to avoid thinking we need to compress.
|
|
*/
|
|
if (cache->config.leafType != atttype &&
|
|
IsBinaryCoercible(cache->config.leafType, atttype))
|
|
cache->config.leafType = atttype;
|
|
}
|
|
|
|
/* Get the information we need about each relevant datatype */
|
|
fillTypeDesc(&cache->attType, atttype);
|
|
|
|
if (cache->config.leafType != atttype)
|
|
{
|
|
if (!OidIsValid(index_getprocid(index, 1, SPGIST_COMPRESS_PROC)))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("compress method must be defined when leaf type is different from input type")));
|
|
|
|
fillTypeDesc(&cache->attLeafType, cache->config.leafType);
|
|
}
|
|
else
|
|
{
|
|
/* Save lookups in this common case */
|
|
cache->attLeafType = cache->attType;
|
|
}
|
|
|
|
fillTypeDesc(&cache->attPrefixType, cache->config.prefixType);
|
|
fillTypeDesc(&cache->attLabelType, cache->config.labelType);
|
|
|
|
/* Last, get the lastUsedPages data from the metapage */
|
|
metabuffer = ReadBuffer(index, SPGIST_METAPAGE_BLKNO);
|
|
LockBuffer(metabuffer, BUFFER_LOCK_SHARE);
|
|
|
|
metadata = SpGistPageGetMeta(BufferGetPage(metabuffer));
|
|
|
|
if (metadata->magicNumber != SPGIST_MAGIC_NUMBER)
|
|
elog(ERROR, "index \"%s\" is not an SP-GiST index",
|
|
RelationGetRelationName(index));
|
|
|
|
cache->lastUsedPages = metadata->lastUsedPages;
|
|
|
|
UnlockReleaseBuffer(metabuffer);
|
|
|
|
index->rd_amcache = (void *) cache;
|
|
}
|
|
else
|
|
{
|
|
/* assume it's up to date */
|
|
cache = (SpGistCache *) index->rd_amcache;
|
|
}
|
|
|
|
return cache;
|
|
}
|
|
|
|
/*
|
|
* Compute a tuple descriptor for leaf tuples or index-only-scan result tuples.
|
|
*
|
|
* We can use the relcache's tupdesc as-is in many cases, and it's always
|
|
* OK so far as any INCLUDE columns are concerned. However, the entry for
|
|
* the key column has to match leafType in the first case or attType in the
|
|
* second case. While the relcache's tupdesc *should* show leafType, this
|
|
* might not hold for legacy user-defined opclasses, since before v14 they
|
|
* were not allowed to declare their true storage type in CREATE OPCLASS.
|
|
* Also, attType can be different from what is in the relcache.
|
|
*
|
|
* This function gives back either a pointer to the relcache's tupdesc
|
|
* if that is suitable, or a palloc'd copy that's been adjusted to match
|
|
* the specified key column type. We can avoid doing any catalog lookups
|
|
* here by insisting that the caller pass an SpGistTypeDesc not just an OID.
|
|
*/
|
|
TupleDesc
|
|
getSpGistTupleDesc(Relation index, SpGistTypeDesc *keyType)
|
|
{
|
|
TupleDesc outTupDesc;
|
|
Form_pg_attribute att;
|
|
|
|
if (keyType->type ==
|
|
TupleDescAttr(RelationGetDescr(index), spgKeyColumn)->atttypid)
|
|
outTupDesc = RelationGetDescr(index);
|
|
else
|
|
{
|
|
outTupDesc = CreateTupleDescCopy(RelationGetDescr(index));
|
|
att = TupleDescAttr(outTupDesc, spgKeyColumn);
|
|
/* It's sufficient to update the type-dependent fields of the column */
|
|
att->atttypid = keyType->type;
|
|
att->atttypmod = -1;
|
|
att->attlen = keyType->attlen;
|
|
att->attbyval = keyType->attbyval;
|
|
att->attalign = keyType->attalign;
|
|
att->attstorage = keyType->attstorage;
|
|
/* We shouldn't need to bother with making these valid: */
|
|
att->attcompression = InvalidCompressionMethod;
|
|
att->attcollation = InvalidOid;
|
|
/* In case we changed typlen, we'd better reset following offsets */
|
|
for (int i = spgFirstIncludeColumn; i < outTupDesc->natts; i++)
|
|
TupleDescAttr(outTupDesc, i)->attcacheoff = -1;
|
|
}
|
|
return outTupDesc;
|
|
}
|
|
|
|
/* Initialize SpGistState for working with the given index */
|
|
void
|
|
initSpGistState(SpGistState *state, Relation index)
|
|
{
|
|
SpGistCache *cache;
|
|
|
|
state->index = index;
|
|
|
|
/* Get cached static information about index */
|
|
cache = spgGetCache(index);
|
|
|
|
state->config = cache->config;
|
|
state->attType = cache->attType;
|
|
state->attLeafType = cache->attLeafType;
|
|
state->attPrefixType = cache->attPrefixType;
|
|
state->attLabelType = cache->attLabelType;
|
|
|
|
/* Ensure we have a valid descriptor for leaf tuples */
|
|
state->leafTupDesc = getSpGistTupleDesc(state->index, &state->attLeafType);
|
|
|
|
/* Make workspace for constructing dead tuples */
|
|
state->deadTupleStorage = palloc0(SGDTSIZE);
|
|
|
|
/* Set XID to use in redirection tuples */
|
|
state->myXid = GetTopTransactionIdIfAny();
|
|
|
|
/* Assume we're not in an index build (spgbuild will override) */
|
|
state->isBuild = false;
|
|
}
|
|
|
|
/*
|
|
* Allocate a new page (either by recycling, or by extending the index file).
|
|
*
|
|
* The returned buffer is already pinned and exclusive-locked.
|
|
* Caller is responsible for initializing the page by calling SpGistInitBuffer.
|
|
*/
|
|
Buffer
|
|
SpGistNewBuffer(Relation index)
|
|
{
|
|
Buffer buffer;
|
|
|
|
/* First, try to get a page from FSM */
|
|
for (;;)
|
|
{
|
|
BlockNumber blkno = GetFreeIndexPage(index);
|
|
|
|
if (blkno == InvalidBlockNumber)
|
|
break; /* nothing known to FSM */
|
|
|
|
/*
|
|
* The fixed pages shouldn't ever be listed in FSM, but just in case
|
|
* one is, ignore it.
|
|
*/
|
|
if (SpGistBlockIsFixed(blkno))
|
|
continue;
|
|
|
|
buffer = ReadBuffer(index, blkno);
|
|
|
|
/*
|
|
* We have to guard against the possibility that someone else already
|
|
* recycled this page; the buffer may be locked if so.
|
|
*/
|
|
if (ConditionalLockBuffer(buffer))
|
|
{
|
|
Page page = BufferGetPage(buffer);
|
|
|
|
if (PageIsNew(page))
|
|
return buffer; /* OK to use, if never initialized */
|
|
|
|
if (SpGistPageIsDeleted(page) || PageIsEmpty(page))
|
|
return buffer; /* OK to use */
|
|
|
|
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
|
|
}
|
|
|
|
/* Can't use it, so release buffer and try again */
|
|
ReleaseBuffer(buffer);
|
|
}
|
|
|
|
buffer = ExtendBufferedRel(EB_REL(index), MAIN_FORKNUM, NULL,
|
|
EB_LOCK_FIRST);
|
|
|
|
return buffer;
|
|
}
|
|
|
|
/*
|
|
* Update index metapage's lastUsedPages info from local cache, if possible
|
|
*
|
|
* Updating meta page isn't critical for index working, so
|
|
* 1 use ConditionalLockBuffer to improve concurrency
|
|
* 2 don't WAL-log metabuffer changes to decrease WAL traffic
|
|
*/
|
|
void
|
|
SpGistUpdateMetaPage(Relation index)
|
|
{
|
|
SpGistCache *cache = (SpGistCache *) index->rd_amcache;
|
|
|
|
if (cache != NULL)
|
|
{
|
|
Buffer metabuffer;
|
|
|
|
metabuffer = ReadBuffer(index, SPGIST_METAPAGE_BLKNO);
|
|
|
|
if (ConditionalLockBuffer(metabuffer))
|
|
{
|
|
Page metapage = BufferGetPage(metabuffer);
|
|
SpGistMetaPageData *metadata = SpGistPageGetMeta(metapage);
|
|
|
|
metadata->lastUsedPages = cache->lastUsedPages;
|
|
|
|
/*
|
|
* Set pd_lower just past the end of the metadata. This is
|
|
* essential, because without doing so, metadata will be lost if
|
|
* xlog.c compresses the page. (We must do this here because
|
|
* pre-v11 versions of PG did not set the metapage's pd_lower
|
|
* correctly, so a pg_upgraded index might contain the wrong
|
|
* value.)
|
|
*/
|
|
((PageHeader) metapage)->pd_lower =
|
|
((char *) metadata + sizeof(SpGistMetaPageData)) - (char *) metapage;
|
|
|
|
MarkBufferDirty(metabuffer);
|
|
UnlockReleaseBuffer(metabuffer);
|
|
}
|
|
else
|
|
{
|
|
ReleaseBuffer(metabuffer);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Macro to select proper element of lastUsedPages cache depending on flags */
|
|
/* Masking flags with SPGIST_CACHED_PAGES is just for paranoia's sake */
|
|
#define GET_LUP(c, f) (&(c)->lastUsedPages.cachedPage[((unsigned int) (f)) % SPGIST_CACHED_PAGES])
|
|
|
|
/*
|
|
* Allocate and initialize a new buffer of the type and parity specified by
|
|
* flags. The returned buffer is already pinned and exclusive-locked.
|
|
*
|
|
* When requesting an inner page, if we get one with the wrong parity,
|
|
* we just release the buffer and try again. We will get a different page
|
|
* because GetFreeIndexPage will have marked the page used in FSM. The page
|
|
* is entered in our local lastUsedPages cache, so there's some hope of
|
|
* making use of it later in this session, but otherwise we rely on VACUUM
|
|
* to eventually re-enter the page in FSM, making it available for recycling.
|
|
* Note that such a page does not get marked dirty here, so unless it's used
|
|
* fairly soon, the buffer will just get discarded and the page will remain
|
|
* as it was on disk.
|
|
*
|
|
* When we return a buffer to the caller, the page is *not* entered into
|
|
* the lastUsedPages cache; we expect the caller will do so after it's taken
|
|
* whatever space it will use. This is because after the caller has used up
|
|
* some space, the page might have less space than whatever was cached already
|
|
* so we'd rather not trash the old cache entry.
|
|
*/
|
|
static Buffer
|
|
allocNewBuffer(Relation index, int flags)
|
|
{
|
|
SpGistCache *cache = spgGetCache(index);
|
|
uint16 pageflags = 0;
|
|
|
|
if (GBUF_REQ_LEAF(flags))
|
|
pageflags |= SPGIST_LEAF;
|
|
if (GBUF_REQ_NULLS(flags))
|
|
pageflags |= SPGIST_NULLS;
|
|
|
|
for (;;)
|
|
{
|
|
Buffer buffer;
|
|
|
|
buffer = SpGistNewBuffer(index);
|
|
SpGistInitBuffer(buffer, pageflags);
|
|
|
|
if (pageflags & SPGIST_LEAF)
|
|
{
|
|
/* Leaf pages have no parity concerns, so just use it */
|
|
return buffer;
|
|
}
|
|
else
|
|
{
|
|
BlockNumber blkno = BufferGetBlockNumber(buffer);
|
|
int blkFlags = GBUF_INNER_PARITY(blkno);
|
|
|
|
if ((flags & GBUF_PARITY_MASK) == blkFlags)
|
|
{
|
|
/* Page has right parity, use it */
|
|
return buffer;
|
|
}
|
|
else
|
|
{
|
|
/* Page has wrong parity, record it in cache and try again */
|
|
if (pageflags & SPGIST_NULLS)
|
|
blkFlags |= GBUF_NULLS;
|
|
cache->lastUsedPages.cachedPage[blkFlags].blkno = blkno;
|
|
cache->lastUsedPages.cachedPage[blkFlags].freeSpace =
|
|
PageGetExactFreeSpace(BufferGetPage(buffer));
|
|
UnlockReleaseBuffer(buffer);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Get a buffer of the type and parity specified by flags, having at least
|
|
* as much free space as indicated by needSpace. We use the lastUsedPages
|
|
* cache to assign the same buffer previously requested when possible.
|
|
* The returned buffer is already pinned and exclusive-locked.
|
|
*
|
|
* *isNew is set true if the page was initialized here, false if it was
|
|
* already valid.
|
|
*/
|
|
Buffer
|
|
SpGistGetBuffer(Relation index, int flags, int needSpace, bool *isNew)
|
|
{
|
|
SpGistCache *cache = spgGetCache(index);
|
|
SpGistLastUsedPage *lup;
|
|
|
|
/* Bail out if even an empty page wouldn't meet the demand */
|
|
if (needSpace > SPGIST_PAGE_CAPACITY)
|
|
elog(ERROR, "desired SPGiST tuple size is too big");
|
|
|
|
/*
|
|
* If possible, increase the space request to include relation's
|
|
* fillfactor. This ensures that when we add unrelated tuples to a page,
|
|
* we try to keep 100-fillfactor% available for adding tuples that are
|
|
* related to the ones already on it. But fillfactor mustn't cause an
|
|
* error for requests that would otherwise be legal.
|
|
*/
|
|
needSpace += SpGistGetTargetPageFreeSpace(index);
|
|
needSpace = Min(needSpace, SPGIST_PAGE_CAPACITY);
|
|
|
|
/* Get the cache entry for this flags setting */
|
|
lup = GET_LUP(cache, flags);
|
|
|
|
/* If we have nothing cached, just turn it over to allocNewBuffer */
|
|
if (lup->blkno == InvalidBlockNumber)
|
|
{
|
|
*isNew = true;
|
|
return allocNewBuffer(index, flags);
|
|
}
|
|
|
|
/* fixed pages should never be in cache */
|
|
Assert(!SpGistBlockIsFixed(lup->blkno));
|
|
|
|
/* If cached freeSpace isn't enough, don't bother looking at the page */
|
|
if (lup->freeSpace >= needSpace)
|
|
{
|
|
Buffer buffer;
|
|
Page page;
|
|
|
|
buffer = ReadBuffer(index, lup->blkno);
|
|
|
|
if (!ConditionalLockBuffer(buffer))
|
|
{
|
|
/*
|
|
* buffer is locked by another process, so return a new buffer
|
|
*/
|
|
ReleaseBuffer(buffer);
|
|
*isNew = true;
|
|
return allocNewBuffer(index, flags);
|
|
}
|
|
|
|
page = BufferGetPage(buffer);
|
|
|
|
if (PageIsNew(page) || SpGistPageIsDeleted(page) || PageIsEmpty(page))
|
|
{
|
|
/* OK to initialize the page */
|
|
uint16 pageflags = 0;
|
|
|
|
if (GBUF_REQ_LEAF(flags))
|
|
pageflags |= SPGIST_LEAF;
|
|
if (GBUF_REQ_NULLS(flags))
|
|
pageflags |= SPGIST_NULLS;
|
|
SpGistInitBuffer(buffer, pageflags);
|
|
lup->freeSpace = PageGetExactFreeSpace(page) - needSpace;
|
|
*isNew = true;
|
|
return buffer;
|
|
}
|
|
|
|
/*
|
|
* Check that page is of right type and has enough space. We must
|
|
* recheck this since our cache isn't necessarily up to date.
|
|
*/
|
|
if ((GBUF_REQ_LEAF(flags) ? SpGistPageIsLeaf(page) : !SpGistPageIsLeaf(page)) &&
|
|
(GBUF_REQ_NULLS(flags) ? SpGistPageStoresNulls(page) : !SpGistPageStoresNulls(page)))
|
|
{
|
|
int freeSpace = PageGetExactFreeSpace(page);
|
|
|
|
if (freeSpace >= needSpace)
|
|
{
|
|
/* Success, update freespace info and return the buffer */
|
|
lup->freeSpace = freeSpace - needSpace;
|
|
*isNew = false;
|
|
return buffer;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* fallback to allocation of new buffer
|
|
*/
|
|
UnlockReleaseBuffer(buffer);
|
|
}
|
|
|
|
/* No success with cache, so return a new buffer */
|
|
*isNew = true;
|
|
return allocNewBuffer(index, flags);
|
|
}
|
|
|
|
/*
|
|
* Update lastUsedPages cache when done modifying a page.
|
|
*
|
|
* We update the appropriate cache entry if it already contained this page
|
|
* (its freeSpace is likely obsolete), or if this page has more space than
|
|
* whatever we had cached.
|
|
*/
|
|
void
|
|
SpGistSetLastUsedPage(Relation index, Buffer buffer)
|
|
{
|
|
SpGistCache *cache = spgGetCache(index);
|
|
SpGistLastUsedPage *lup;
|
|
int freeSpace;
|
|
Page page = BufferGetPage(buffer);
|
|
BlockNumber blkno = BufferGetBlockNumber(buffer);
|
|
int flags;
|
|
|
|
/* Never enter fixed pages (root pages) in cache, though */
|
|
if (SpGistBlockIsFixed(blkno))
|
|
return;
|
|
|
|
if (SpGistPageIsLeaf(page))
|
|
flags = GBUF_LEAF;
|
|
else
|
|
flags = GBUF_INNER_PARITY(blkno);
|
|
if (SpGistPageStoresNulls(page))
|
|
flags |= GBUF_NULLS;
|
|
|
|
lup = GET_LUP(cache, flags);
|
|
|
|
freeSpace = PageGetExactFreeSpace(page);
|
|
if (lup->blkno == InvalidBlockNumber || lup->blkno == blkno ||
|
|
lup->freeSpace < freeSpace)
|
|
{
|
|
lup->blkno = blkno;
|
|
lup->freeSpace = freeSpace;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Initialize an SPGiST page to empty, with specified flags
|
|
*/
|
|
void
|
|
SpGistInitPage(Page page, uint16 f)
|
|
{
|
|
SpGistPageOpaque opaque;
|
|
|
|
PageInit(page, BLCKSZ, sizeof(SpGistPageOpaqueData));
|
|
opaque = SpGistPageGetOpaque(page);
|
|
opaque->flags = f;
|
|
opaque->spgist_page_id = SPGIST_PAGE_ID;
|
|
}
|
|
|
|
/*
|
|
* Initialize a buffer's page to empty, with specified flags
|
|
*/
|
|
void
|
|
SpGistInitBuffer(Buffer b, uint16 f)
|
|
{
|
|
Assert(BufferGetPageSize(b) == BLCKSZ);
|
|
SpGistInitPage(BufferGetPage(b), f);
|
|
}
|
|
|
|
/*
|
|
* Initialize metadata page
|
|
*/
|
|
void
|
|
SpGistInitMetapage(Page page)
|
|
{
|
|
SpGistMetaPageData *metadata;
|
|
int i;
|
|
|
|
SpGistInitPage(page, SPGIST_META);
|
|
metadata = SpGistPageGetMeta(page);
|
|
memset(metadata, 0, sizeof(SpGistMetaPageData));
|
|
metadata->magicNumber = SPGIST_MAGIC_NUMBER;
|
|
|
|
/* initialize last-used-page cache to empty */
|
|
for (i = 0; i < SPGIST_CACHED_PAGES; i++)
|
|
metadata->lastUsedPages.cachedPage[i].blkno = InvalidBlockNumber;
|
|
|
|
/*
|
|
* Set pd_lower just past the end of the metadata. This is essential,
|
|
* because without doing so, metadata will be lost if xlog.c compresses
|
|
* the page.
|
|
*/
|
|
((PageHeader) page)->pd_lower =
|
|
((char *) metadata + sizeof(SpGistMetaPageData)) - (char *) page;
|
|
}
|
|
|
|
/*
|
|
* reloptions processing for SPGiST
|
|
*/
|
|
bytea *
|
|
spgoptions(Datum reloptions, bool validate)
|
|
{
|
|
static const relopt_parse_elt tab[] = {
|
|
{"fillfactor", RELOPT_TYPE_INT, offsetof(SpGistOptions, fillfactor)},
|
|
};
|
|
|
|
return (bytea *) build_reloptions(reloptions, validate,
|
|
RELOPT_KIND_SPGIST,
|
|
sizeof(SpGistOptions),
|
|
tab, lengthof(tab));
|
|
}
|
|
|
|
/*
|
|
* Get the space needed to store a non-null datum of the indicated type
|
|
* in an inner tuple (that is, as a prefix or node label).
|
|
* Note the result is already rounded up to a MAXALIGN boundary.
|
|
* Here we follow the convention that pass-by-val types are just stored
|
|
* in their Datum representation (compare memcpyInnerDatum).
|
|
*/
|
|
unsigned int
|
|
SpGistGetInnerTypeSize(SpGistTypeDesc *att, Datum datum)
|
|
{
|
|
unsigned int size;
|
|
|
|
if (att->attbyval)
|
|
size = sizeof(Datum);
|
|
else if (att->attlen > 0)
|
|
size = att->attlen;
|
|
else
|
|
size = VARSIZE_ANY(datum);
|
|
|
|
return MAXALIGN(size);
|
|
}
|
|
|
|
/*
|
|
* Copy the given non-null datum to *target, in the inner-tuple case
|
|
*/
|
|
static void
|
|
memcpyInnerDatum(void *target, SpGistTypeDesc *att, Datum datum)
|
|
{
|
|
unsigned int size;
|
|
|
|
if (att->attbyval)
|
|
{
|
|
memcpy(target, &datum, sizeof(Datum));
|
|
}
|
|
else
|
|
{
|
|
size = (att->attlen > 0) ? att->attlen : VARSIZE_ANY(datum);
|
|
memcpy(target, DatumGetPointer(datum), size);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Compute space required for a leaf tuple holding the given data.
|
|
*
|
|
* This must match the size-calculation portion of spgFormLeafTuple.
|
|
*/
|
|
Size
|
|
SpGistGetLeafTupleSize(TupleDesc tupleDescriptor,
|
|
Datum *datums, bool *isnulls)
|
|
{
|
|
Size size;
|
|
Size data_size;
|
|
bool needs_null_mask = false;
|
|
int natts = tupleDescriptor->natts;
|
|
|
|
/*
|
|
* Decide whether we need a nulls bitmask.
|
|
*
|
|
* If there is only a key attribute (natts == 1), never use a bitmask, for
|
|
* compatibility with the pre-v14 layout of leaf tuples. Otherwise, we
|
|
* need one if any attribute is null.
|
|
*/
|
|
if (natts > 1)
|
|
{
|
|
for (int i = 0; i < natts; i++)
|
|
{
|
|
if (isnulls[i])
|
|
{
|
|
needs_null_mask = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Calculate size of the data part; same as for heap tuples.
|
|
*/
|
|
data_size = heap_compute_data_size(tupleDescriptor, datums, isnulls);
|
|
|
|
/*
|
|
* Compute total size.
|
|
*/
|
|
size = SGLTHDRSZ(needs_null_mask);
|
|
size += data_size;
|
|
size = MAXALIGN(size);
|
|
|
|
/*
|
|
* Ensure that we can replace the tuple with a dead tuple later. This test
|
|
* is unnecessary when there are any non-null attributes, but be safe.
|
|
*/
|
|
if (size < SGDTSIZE)
|
|
size = SGDTSIZE;
|
|
|
|
return size;
|
|
}
|
|
|
|
/*
|
|
* Construct a leaf tuple containing the given heap TID and datum values
|
|
*/
|
|
SpGistLeafTuple
|
|
spgFormLeafTuple(SpGistState *state, ItemPointer heapPtr,
|
|
Datum *datums, bool *isnulls)
|
|
{
|
|
SpGistLeafTuple tup;
|
|
TupleDesc tupleDescriptor = state->leafTupDesc;
|
|
Size size;
|
|
Size hoff;
|
|
Size data_size;
|
|
bool needs_null_mask = false;
|
|
int natts = tupleDescriptor->natts;
|
|
char *tp; /* ptr to tuple data */
|
|
uint16 tupmask = 0; /* unused heap_fill_tuple output */
|
|
|
|
/*
|
|
* Decide whether we need a nulls bitmask.
|
|
*
|
|
* If there is only a key attribute (natts == 1), never use a bitmask, for
|
|
* compatibility with the pre-v14 layout of leaf tuples. Otherwise, we
|
|
* need one if any attribute is null.
|
|
*/
|
|
if (natts > 1)
|
|
{
|
|
for (int i = 0; i < natts; i++)
|
|
{
|
|
if (isnulls[i])
|
|
{
|
|
needs_null_mask = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Calculate size of the data part; same as for heap tuples.
|
|
*/
|
|
data_size = heap_compute_data_size(tupleDescriptor, datums, isnulls);
|
|
|
|
/*
|
|
* Compute total size.
|
|
*/
|
|
hoff = SGLTHDRSZ(needs_null_mask);
|
|
size = hoff + data_size;
|
|
size = MAXALIGN(size);
|
|
|
|
/*
|
|
* Ensure that we can replace the tuple with a dead tuple later. This test
|
|
* is unnecessary when there are any non-null attributes, but be safe.
|
|
*/
|
|
if (size < SGDTSIZE)
|
|
size = SGDTSIZE;
|
|
|
|
/* OK, form the tuple */
|
|
tup = (SpGistLeafTuple) palloc0(size);
|
|
|
|
tup->size = size;
|
|
SGLT_SET_NEXTOFFSET(tup, InvalidOffsetNumber);
|
|
tup->heapPtr = *heapPtr;
|
|
|
|
tp = (char *) tup + hoff;
|
|
|
|
if (needs_null_mask)
|
|
{
|
|
bits8 *bp; /* ptr to null bitmap in tuple */
|
|
|
|
/* Set nullmask presence bit in SpGistLeafTuple header */
|
|
SGLT_SET_HASNULLMASK(tup, true);
|
|
/* Fill the data area and null mask */
|
|
bp = (bits8 *) ((char *) tup + sizeof(SpGistLeafTupleData));
|
|
heap_fill_tuple(tupleDescriptor, datums, isnulls, tp, data_size,
|
|
&tupmask, bp);
|
|
}
|
|
else if (natts > 1 || !isnulls[spgKeyColumn])
|
|
{
|
|
/* Fill data area only */
|
|
heap_fill_tuple(tupleDescriptor, datums, isnulls, tp, data_size,
|
|
&tupmask, (bits8 *) NULL);
|
|
}
|
|
/* otherwise we have no data, nor a bitmap, to fill */
|
|
|
|
return tup;
|
|
}
|
|
|
|
/*
|
|
* Construct a node (to go into an inner tuple) containing the given label
|
|
*
|
|
* Note that the node's downlink is just set invalid here. Caller will fill
|
|
* it in later.
|
|
*/
|
|
SpGistNodeTuple
|
|
spgFormNodeTuple(SpGistState *state, Datum label, bool isnull)
|
|
{
|
|
SpGistNodeTuple tup;
|
|
unsigned int size;
|
|
unsigned short infomask = 0;
|
|
|
|
/* compute space needed (note result is already maxaligned) */
|
|
size = SGNTHDRSZ;
|
|
if (!isnull)
|
|
size += SpGistGetInnerTypeSize(&state->attLabelType, label);
|
|
|
|
/*
|
|
* Here we make sure that the size will fit in the field reserved for it
|
|
* in t_info.
|
|
*/
|
|
if ((size & INDEX_SIZE_MASK) != size)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
|
errmsg("index row requires %zu bytes, maximum size is %zu",
|
|
(Size) size, (Size) INDEX_SIZE_MASK)));
|
|
|
|
tup = (SpGistNodeTuple) palloc0(size);
|
|
|
|
if (isnull)
|
|
infomask |= INDEX_NULL_MASK;
|
|
/* we don't bother setting the INDEX_VAR_MASK bit */
|
|
infomask |= size;
|
|
tup->t_info = infomask;
|
|
|
|
/* The TID field will be filled in later */
|
|
ItemPointerSetInvalid(&tup->t_tid);
|
|
|
|
if (!isnull)
|
|
memcpyInnerDatum(SGNTDATAPTR(tup), &state->attLabelType, label);
|
|
|
|
return tup;
|
|
}
|
|
|
|
/*
|
|
* Construct an inner tuple containing the given prefix and node array
|
|
*/
|
|
SpGistInnerTuple
|
|
spgFormInnerTuple(SpGistState *state, bool hasPrefix, Datum prefix,
|
|
int nNodes, SpGistNodeTuple *nodes)
|
|
{
|
|
SpGistInnerTuple tup;
|
|
unsigned int size;
|
|
unsigned int prefixSize;
|
|
int i;
|
|
char *ptr;
|
|
|
|
/* Compute size needed */
|
|
if (hasPrefix)
|
|
prefixSize = SpGistGetInnerTypeSize(&state->attPrefixType, prefix);
|
|
else
|
|
prefixSize = 0;
|
|
|
|
size = SGITHDRSZ + prefixSize;
|
|
|
|
/* Note: we rely on node tuple sizes to be maxaligned already */
|
|
for (i = 0; i < nNodes; i++)
|
|
size += IndexTupleSize(nodes[i]);
|
|
|
|
/*
|
|
* Ensure that we can replace the tuple with a dead tuple later. This
|
|
* test is unnecessary given current tuple layouts, but let's be safe.
|
|
*/
|
|
if (size < SGDTSIZE)
|
|
size = SGDTSIZE;
|
|
|
|
/*
|
|
* Inner tuple should be small enough to fit on a page
|
|
*/
|
|
if (size > SPGIST_PAGE_CAPACITY - sizeof(ItemIdData))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
|
errmsg("SP-GiST inner tuple size %zu exceeds maximum %zu",
|
|
(Size) size,
|
|
SPGIST_PAGE_CAPACITY - sizeof(ItemIdData)),
|
|
errhint("Values larger than a buffer page cannot be indexed.")));
|
|
|
|
/*
|
|
* Check for overflow of header fields --- probably can't fail if the
|
|
* above succeeded, but let's be paranoid
|
|
*/
|
|
if (size > SGITMAXSIZE ||
|
|
prefixSize > SGITMAXPREFIXSIZE ||
|
|
nNodes > SGITMAXNNODES)
|
|
elog(ERROR, "SPGiST inner tuple header field is too small");
|
|
|
|
/* OK, form the tuple */
|
|
tup = (SpGistInnerTuple) palloc0(size);
|
|
|
|
tup->nNodes = nNodes;
|
|
tup->prefixSize = prefixSize;
|
|
tup->size = size;
|
|
|
|
if (hasPrefix)
|
|
memcpyInnerDatum(SGITDATAPTR(tup), &state->attPrefixType, prefix);
|
|
|
|
ptr = (char *) SGITNODEPTR(tup);
|
|
|
|
for (i = 0; i < nNodes; i++)
|
|
{
|
|
SpGistNodeTuple node = nodes[i];
|
|
|
|
memcpy(ptr, node, IndexTupleSize(node));
|
|
ptr += IndexTupleSize(node);
|
|
}
|
|
|
|
return tup;
|
|
}
|
|
|
|
/*
|
|
* Construct a "dead" tuple to replace a tuple being deleted.
|
|
*
|
|
* The state can be SPGIST_REDIRECT, SPGIST_DEAD, or SPGIST_PLACEHOLDER.
|
|
* For a REDIRECT tuple, a pointer (blkno+offset) must be supplied, and
|
|
* the xid field is filled in automatically.
|
|
*
|
|
* This is called in critical sections, so we don't use palloc; the tuple
|
|
* is built in preallocated storage. It should be copied before another
|
|
* call with different parameters can occur.
|
|
*/
|
|
SpGistDeadTuple
|
|
spgFormDeadTuple(SpGistState *state, int tupstate,
|
|
BlockNumber blkno, OffsetNumber offnum)
|
|
{
|
|
SpGistDeadTuple tuple = (SpGistDeadTuple) state->deadTupleStorage;
|
|
|
|
tuple->tupstate = tupstate;
|
|
tuple->size = SGDTSIZE;
|
|
SGLT_SET_NEXTOFFSET(tuple, InvalidOffsetNumber);
|
|
|
|
if (tupstate == SPGIST_REDIRECT)
|
|
{
|
|
ItemPointerSet(&tuple->pointer, blkno, offnum);
|
|
Assert(TransactionIdIsValid(state->myXid));
|
|
tuple->xid = state->myXid;
|
|
}
|
|
else
|
|
{
|
|
ItemPointerSetInvalid(&tuple->pointer);
|
|
tuple->xid = InvalidTransactionId;
|
|
}
|
|
|
|
return tuple;
|
|
}
|
|
|
|
/*
|
|
* Convert an SPGiST leaf tuple into Datum/isnull arrays.
|
|
*
|
|
* The caller must allocate sufficient storage for the output arrays.
|
|
* (INDEX_MAX_KEYS entries should be enough.)
|
|
*/
|
|
void
|
|
spgDeformLeafTuple(SpGistLeafTuple tup, TupleDesc tupleDescriptor,
|
|
Datum *datums, bool *isnulls, bool keyColumnIsNull)
|
|
{
|
|
bool hasNullsMask = SGLT_GET_HASNULLMASK(tup);
|
|
char *tp; /* ptr to tuple data */
|
|
bits8 *bp; /* ptr to null bitmap in tuple */
|
|
|
|
if (keyColumnIsNull && tupleDescriptor->natts == 1)
|
|
{
|
|
/*
|
|
* Trivial case: there is only the key attribute and we're in a nulls
|
|
* tree. The hasNullsMask bit in the tuple header should not be set
|
|
* (and thus we can't use index_deform_tuple_internal), but
|
|
* nonetheless the result is NULL.
|
|
*
|
|
* Note: currently this is dead code, because noplace calls this when
|
|
* there is only the key attribute. But we should cover the case.
|
|
*/
|
|
Assert(!hasNullsMask);
|
|
|
|
datums[spgKeyColumn] = (Datum) 0;
|
|
isnulls[spgKeyColumn] = true;
|
|
return;
|
|
}
|
|
|
|
tp = (char *) tup + SGLTHDRSZ(hasNullsMask);
|
|
bp = (bits8 *) ((char *) tup + sizeof(SpGistLeafTupleData));
|
|
|
|
index_deform_tuple_internal(tupleDescriptor,
|
|
datums, isnulls,
|
|
tp, bp, hasNullsMask);
|
|
|
|
/*
|
|
* Key column isnull value from the tuple should be consistent with
|
|
* keyColumnIsNull flag from the caller.
|
|
*/
|
|
Assert(keyColumnIsNull == isnulls[spgKeyColumn]);
|
|
}
|
|
|
|
/*
|
|
* Extract the label datums of the nodes within innerTuple
|
|
*
|
|
* Returns NULL if label datums are NULLs
|
|
*/
|
|
Datum *
|
|
spgExtractNodeLabels(SpGistState *state, SpGistInnerTuple innerTuple)
|
|
{
|
|
Datum *nodeLabels;
|
|
int i;
|
|
SpGistNodeTuple node;
|
|
|
|
/* Either all the labels must be NULL, or none. */
|
|
node = SGITNODEPTR(innerTuple);
|
|
if (IndexTupleHasNulls(node))
|
|
{
|
|
SGITITERATE(innerTuple, i, node)
|
|
{
|
|
if (!IndexTupleHasNulls(node))
|
|
elog(ERROR, "some but not all node labels are null in SPGiST inner tuple");
|
|
}
|
|
/* They're all null, so just return NULL */
|
|
return NULL;
|
|
}
|
|
else
|
|
{
|
|
nodeLabels = (Datum *) palloc(sizeof(Datum) * innerTuple->nNodes);
|
|
SGITITERATE(innerTuple, i, node)
|
|
{
|
|
if (IndexTupleHasNulls(node))
|
|
elog(ERROR, "some but not all node labels are null in SPGiST inner tuple");
|
|
nodeLabels[i] = SGNTDATUM(node, state);
|
|
}
|
|
return nodeLabels;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Add a new item to the page, replacing a PLACEHOLDER item if possible.
|
|
* Return the location it's inserted at, or InvalidOffsetNumber on failure.
|
|
*
|
|
* If startOffset isn't NULL, we start searching for placeholders at
|
|
* *startOffset, and update that to the next place to search. This is just
|
|
* an optimization for repeated insertions.
|
|
*
|
|
* If errorOK is false, we throw error when there's not enough room,
|
|
* rather than returning InvalidOffsetNumber.
|
|
*/
|
|
OffsetNumber
|
|
SpGistPageAddNewItem(SpGistState *state, Page page, Item item, Size size,
|
|
OffsetNumber *startOffset, bool errorOK)
|
|
{
|
|
SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
|
|
OffsetNumber i,
|
|
maxoff,
|
|
offnum;
|
|
|
|
if (opaque->nPlaceholder > 0 &&
|
|
PageGetExactFreeSpace(page) + SGDTSIZE >= MAXALIGN(size))
|
|
{
|
|
/* Try to replace a placeholder */
|
|
maxoff = PageGetMaxOffsetNumber(page);
|
|
offnum = InvalidOffsetNumber;
|
|
|
|
for (;;)
|
|
{
|
|
if (startOffset && *startOffset != InvalidOffsetNumber)
|
|
i = *startOffset;
|
|
else
|
|
i = FirstOffsetNumber;
|
|
for (; i <= maxoff; i++)
|
|
{
|
|
SpGistDeadTuple it = (SpGistDeadTuple) PageGetItem(page,
|
|
PageGetItemId(page, i));
|
|
|
|
if (it->tupstate == SPGIST_PLACEHOLDER)
|
|
{
|
|
offnum = i;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Done if we found a placeholder */
|
|
if (offnum != InvalidOffsetNumber)
|
|
break;
|
|
|
|
if (startOffset && *startOffset != InvalidOffsetNumber)
|
|
{
|
|
/* Hint was no good, re-search from beginning */
|
|
*startOffset = InvalidOffsetNumber;
|
|
continue;
|
|
}
|
|
|
|
/* Hmm, no placeholder found? */
|
|
opaque->nPlaceholder = 0;
|
|
break;
|
|
}
|
|
|
|
if (offnum != InvalidOffsetNumber)
|
|
{
|
|
/* Replace the placeholder tuple */
|
|
PageIndexTupleDelete(page, offnum);
|
|
|
|
offnum = PageAddItem(page, item, size, offnum, false, false);
|
|
|
|
/*
|
|
* We should not have failed given the size check at the top of
|
|
* the function, but test anyway. If we did fail, we must PANIC
|
|
* because we've already deleted the placeholder tuple, and
|
|
* there's no other way to keep the damage from getting to disk.
|
|
*/
|
|
if (offnum != InvalidOffsetNumber)
|
|
{
|
|
Assert(opaque->nPlaceholder > 0);
|
|
opaque->nPlaceholder--;
|
|
if (startOffset)
|
|
*startOffset = offnum + 1;
|
|
}
|
|
else
|
|
elog(PANIC, "failed to add item of size %zu to SPGiST index page",
|
|
size);
|
|
|
|
return offnum;
|
|
}
|
|
}
|
|
|
|
/* No luck in replacing a placeholder, so just add it to the page */
|
|
offnum = PageAddItem(page, item, size,
|
|
InvalidOffsetNumber, false, false);
|
|
|
|
if (offnum == InvalidOffsetNumber && !errorOK)
|
|
elog(ERROR, "failed to add item of size %zu to SPGiST index page",
|
|
size);
|
|
|
|
return offnum;
|
|
}
|
|
|
|
/*
|
|
* spgproperty() -- Check boolean properties of indexes.
|
|
*
|
|
* This is optional for most AMs, but is required for SP-GiST because the core
|
|
* property code doesn't support AMPROP_DISTANCE_ORDERABLE.
|
|
*/
|
|
bool
|
|
spgproperty(Oid index_oid, int attno,
|
|
IndexAMProperty prop, const char *propname,
|
|
bool *res, bool *isnull)
|
|
{
|
|
Oid opclass,
|
|
opfamily,
|
|
opcintype;
|
|
CatCList *catlist;
|
|
int i;
|
|
|
|
/* Only answer column-level inquiries */
|
|
if (attno == 0)
|
|
return false;
|
|
|
|
switch (prop)
|
|
{
|
|
case AMPROP_DISTANCE_ORDERABLE:
|
|
break;
|
|
default:
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Currently, SP-GiST distance-ordered scans require that there be a
|
|
* distance operator in the opclass with the default types. So we assume
|
|
* that if such an operator exists, then there's a reason for it.
|
|
*/
|
|
|
|
/* First we need to know the column's opclass. */
|
|
opclass = get_index_column_opclass(index_oid, attno);
|
|
if (!OidIsValid(opclass))
|
|
{
|
|
*isnull = true;
|
|
return true;
|
|
}
|
|
|
|
/* Now look up the opclass family and input datatype. */
|
|
if (!get_opclass_opfamily_and_input_type(opclass, &opfamily, &opcintype))
|
|
{
|
|
*isnull = true;
|
|
return true;
|
|
}
|
|
|
|
/* And now we can check whether the operator is provided. */
|
|
catlist = SearchSysCacheList1(AMOPSTRATEGY,
|
|
ObjectIdGetDatum(opfamily));
|
|
|
|
*res = false;
|
|
|
|
for (i = 0; i < catlist->n_members; i++)
|
|
{
|
|
HeapTuple amoptup = &catlist->members[i]->tuple;
|
|
Form_pg_amop amopform = (Form_pg_amop) GETSTRUCT(amoptup);
|
|
|
|
if (amopform->amoppurpose == AMOP_ORDER &&
|
|
(amopform->amoplefttype == opcintype ||
|
|
amopform->amoprighttype == opcintype) &&
|
|
opfamily_can_sort_type(amopform->amopsortfamily,
|
|
get_op_rettype(amopform->amopopr)))
|
|
{
|
|
*res = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
ReleaseSysCacheList(catlist);
|
|
|
|
*isnull = false;
|
|
|
|
return true;
|
|
}
|