
isn't being used anywhere anymore, and there seems no point in a generic index_keytest() routine when two out of three remaining access methods aren't using it. Also, add a comment documenting a convention for letting access methods define private flag bits in ScanKey sk_flags. There are no such flags at the moment but I'm thinking about changing btree's handling of "required keys" to use flag bits in the keys rather than a count of required key positions. Also, if some AM did still want SK_NEGATE then it would be reasonable to treat it as a private flag bit.
480 lines
12 KiB
C
480 lines
12 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* gistget.c
|
|
* fetch tuples from a GiST scan.
|
|
*
|
|
*
|
|
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* IDENTIFICATION
|
|
* $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.55 2006/01/14 22:03:35 tgl Exp $
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "access/itup.h"
|
|
#include "access/gist_private.h"
|
|
#include "executor/execdebug.h"
|
|
#include "pgstat.h"
|
|
#include "utils/memutils.h"
|
|
|
|
|
|
static OffsetNumber gistfindnext(IndexScanDesc scan, OffsetNumber n,
|
|
ScanDirection dir);
|
|
static int gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, bool ignore_killed_tuples);
|
|
static bool gistindex_keytest(IndexTuple tuple, IndexScanDesc scan,
|
|
OffsetNumber offset);
|
|
|
|
static void
|
|
killtuple(Relation r, GISTScanOpaque so, ItemPointer iptr)
|
|
{
|
|
Buffer buffer = so->curbuf;
|
|
|
|
for (;;)
|
|
{
|
|
Page p;
|
|
BlockNumber blkno;
|
|
OffsetNumber offset,
|
|
maxoff;
|
|
|
|
LockBuffer(buffer, GIST_SHARE);
|
|
gistcheckpage(r, buffer);
|
|
p = (Page) BufferGetPage(buffer);
|
|
|
|
if (buffer == so->curbuf && XLByteEQ(so->stack->lsn, PageGetLSN(p)))
|
|
{
|
|
/* page unchanged, so all is simple */
|
|
offset = ItemPointerGetOffsetNumber(iptr);
|
|
PageGetItemId(p, offset)->lp_flags |= LP_DELETE;
|
|
SetBufferCommitInfoNeedsSave(buffer);
|
|
LockBuffer(buffer, GIST_UNLOCK);
|
|
break;
|
|
}
|
|
|
|
maxoff = PageGetMaxOffsetNumber(p);
|
|
|
|
for (offset = FirstOffsetNumber; offset <= maxoff; offset = OffsetNumberNext(offset))
|
|
{
|
|
IndexTuple ituple = (IndexTuple) PageGetItem(p, PageGetItemId(p, offset));
|
|
|
|
if (ItemPointerEquals(&(ituple->t_tid), iptr))
|
|
{
|
|
/* found */
|
|
PageGetItemId(p, offset)->lp_flags |= LP_DELETE;
|
|
SetBufferCommitInfoNeedsSave(buffer);
|
|
LockBuffer(buffer, GIST_UNLOCK);
|
|
if (buffer != so->curbuf)
|
|
ReleaseBuffer(buffer);
|
|
return;
|
|
}
|
|
}
|
|
|
|
/* follow right link */
|
|
|
|
/*
|
|
* ??? is it good? if tuple dropped by concurrent vacuum, we will read
|
|
* all leaf pages...
|
|
*/
|
|
blkno = GistPageGetOpaque(p)->rightlink;
|
|
LockBuffer(buffer, GIST_UNLOCK);
|
|
if (buffer != so->curbuf)
|
|
ReleaseBuffer(buffer);
|
|
|
|
if (blkno == InvalidBlockNumber)
|
|
/* can't found, dropped by somebody else */
|
|
return;
|
|
buffer = ReadBuffer(r, blkno);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* gistgettuple() -- Get the next tuple in the scan
|
|
*/
|
|
Datum
|
|
gistgettuple(PG_FUNCTION_ARGS)
|
|
{
|
|
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
|
|
ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);
|
|
GISTScanOpaque so;
|
|
ItemPointerData tid;
|
|
bool res;
|
|
|
|
so = (GISTScanOpaque) scan->opaque;
|
|
|
|
/*
|
|
* If we have produced an index tuple in the past and the executor has
|
|
* informed us we need to mark it as "killed", do so now.
|
|
*/
|
|
if (scan->kill_prior_tuple && ItemPointerIsValid(&(scan->currentItemData)))
|
|
killtuple(scan->indexRelation, so, &(scan->currentItemData));
|
|
|
|
/*
|
|
* Get the next tuple that matches the search key. If asked to skip killed
|
|
* tuples, continue looping until we find a non-killed tuple that matches
|
|
* the search key.
|
|
*/
|
|
res = (gistnext(scan, dir, &tid, 1, scan->ignore_killed_tuples)) ? true : false;
|
|
|
|
PG_RETURN_BOOL(res);
|
|
}
|
|
|
|
Datum
|
|
gistgetmulti(PG_FUNCTION_ARGS)
|
|
{
|
|
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
|
|
ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1);
|
|
int32 max_tids = PG_GETARG_INT32(2);
|
|
int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3);
|
|
|
|
*returned_tids = gistnext(scan, ForwardScanDirection, tids, max_tids, false);
|
|
|
|
PG_RETURN_BOOL(*returned_tids == max_tids);
|
|
}
|
|
|
|
/*
|
|
* Fetch a tuples that matchs the search key; this can be invoked
|
|
* either to fetch the first such tuple or subsequent matching
|
|
* tuples. Returns true iff a matching tuple was found.
|
|
*/
|
|
static int
|
|
gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, int maxtids, bool ignore_killed_tuples)
|
|
{
|
|
Page p;
|
|
OffsetNumber n;
|
|
GISTScanOpaque so;
|
|
GISTSearchStack *stk;
|
|
IndexTuple it;
|
|
GISTPageOpaque opaque;
|
|
bool resetoffset = false;
|
|
int ntids = 0;
|
|
|
|
so = (GISTScanOpaque) scan->opaque;
|
|
|
|
if (ItemPointerIsValid(&scan->currentItemData) == false)
|
|
{
|
|
/* Being asked to fetch the first entry, so start at the root */
|
|
Assert(so->curbuf == InvalidBuffer);
|
|
Assert(so->stack == NULL);
|
|
|
|
so->curbuf = ReadBuffer(scan->indexRelation, GIST_ROOT_BLKNO);
|
|
|
|
stk = so->stack = (GISTSearchStack *) palloc0(sizeof(GISTSearchStack));
|
|
|
|
stk->next = NULL;
|
|
stk->block = GIST_ROOT_BLKNO;
|
|
|
|
pgstat_count_index_scan(&scan->xs_pgstat_info);
|
|
}
|
|
else if (so->curbuf == InvalidBuffer)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
for (;;)
|
|
{
|
|
/* First of all, we need lock buffer */
|
|
Assert(so->curbuf != InvalidBuffer);
|
|
LockBuffer(so->curbuf, GIST_SHARE);
|
|
gistcheckpage(scan->indexRelation, so->curbuf);
|
|
p = BufferGetPage(so->curbuf);
|
|
opaque = GistPageGetOpaque(p);
|
|
resetoffset = false;
|
|
|
|
if (XLogRecPtrIsInvalid(so->stack->lsn) || !XLByteEQ(so->stack->lsn, PageGetLSN(p)))
|
|
{
|
|
/* page changed from last visit or visit first time , reset offset */
|
|
so->stack->lsn = PageGetLSN(p);
|
|
resetoffset = true;
|
|
|
|
/* check page split, occured from last visit or visit to parent */
|
|
if (!XLogRecPtrIsInvalid(so->stack->parentlsn) &&
|
|
XLByteLT(so->stack->parentlsn, opaque->nsn) &&
|
|
opaque->rightlink != InvalidBlockNumber /* sanity check */ &&
|
|
(so->stack->next == NULL || so->stack->next->block != opaque->rightlink) /* check if already
|
|
added */ )
|
|
{
|
|
/* detect page split, follow right link to add pages */
|
|
|
|
stk = (GISTSearchStack *) palloc(sizeof(GISTSearchStack));
|
|
stk->next = so->stack->next;
|
|
stk->block = opaque->rightlink;
|
|
stk->parentlsn = so->stack->parentlsn;
|
|
memset(&(stk->lsn), 0, sizeof(GistNSN));
|
|
so->stack->next = stk;
|
|
}
|
|
}
|
|
|
|
/* if page is empty, then just skip it */
|
|
if (PageIsEmpty(p))
|
|
{
|
|
LockBuffer(so->curbuf, GIST_UNLOCK);
|
|
stk = so->stack->next;
|
|
pfree(so->stack);
|
|
so->stack = stk;
|
|
|
|
if (so->stack == NULL)
|
|
{
|
|
ReleaseBuffer(so->curbuf);
|
|
so->curbuf = InvalidBuffer;
|
|
return ntids;
|
|
}
|
|
|
|
so->curbuf = ReleaseAndReadBuffer(so->curbuf, scan->indexRelation,
|
|
stk->block);
|
|
continue;
|
|
}
|
|
|
|
if (!GistPageIsLeaf(p) || resetoffset ||
|
|
!ItemPointerIsValid(&scan->currentItemData))
|
|
{
|
|
if (ScanDirectionIsBackward(dir))
|
|
n = PageGetMaxOffsetNumber(p);
|
|
else
|
|
n = FirstOffsetNumber;
|
|
}
|
|
else
|
|
{
|
|
n = ItemPointerGetOffsetNumber(&(scan->currentItemData));
|
|
|
|
if (ScanDirectionIsBackward(dir))
|
|
n = OffsetNumberPrev(n);
|
|
else
|
|
n = OffsetNumberNext(n);
|
|
}
|
|
|
|
/* wonderfull, we can look at page */
|
|
|
|
for (;;)
|
|
{
|
|
n = gistfindnext(scan, n, dir);
|
|
|
|
if (!OffsetNumberIsValid(n))
|
|
{
|
|
/*
|
|
* We ran out of matching index entries on the current page,
|
|
* so pop the top stack entry and use it to continue the
|
|
* search.
|
|
*/
|
|
LockBuffer(so->curbuf, GIST_UNLOCK);
|
|
stk = so->stack->next;
|
|
pfree(so->stack);
|
|
so->stack = stk;
|
|
|
|
/* If we're out of stack entries, we're done */
|
|
|
|
if (so->stack == NULL)
|
|
{
|
|
ReleaseBuffer(so->curbuf);
|
|
so->curbuf = InvalidBuffer;
|
|
return ntids;
|
|
}
|
|
|
|
so->curbuf = ReleaseAndReadBuffer(so->curbuf,
|
|
scan->indexRelation,
|
|
stk->block);
|
|
/* XXX go up */
|
|
break;
|
|
}
|
|
|
|
if (GistPageIsLeaf(p))
|
|
{
|
|
/*
|
|
* We've found a matching index entry in a leaf page, so
|
|
* return success. Note that we keep "curbuf" pinned so that
|
|
* we can efficiently resume the index scan later.
|
|
*/
|
|
|
|
ItemPointerSet(&(scan->currentItemData),
|
|
BufferGetBlockNumber(so->curbuf), n);
|
|
|
|
if (!(ignore_killed_tuples && ItemIdDeleted(PageGetItemId(p, n))))
|
|
{
|
|
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
|
|
tids[ntids] = scan->xs_ctup.t_self = it->t_tid;
|
|
ntids++;
|
|
|
|
if (ntids == maxtids)
|
|
{
|
|
LockBuffer(so->curbuf, GIST_UNLOCK);
|
|
return ntids;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* We've found an entry in an internal node whose key is
|
|
* consistent with the search key, so push it to stack
|
|
*/
|
|
|
|
stk = (GISTSearchStack *) palloc(sizeof(GISTSearchStack));
|
|
|
|
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
|
|
stk->block = ItemPointerGetBlockNumber(&(it->t_tid));
|
|
memset(&(stk->lsn), 0, sizeof(GistNSN));
|
|
stk->parentlsn = so->stack->lsn;
|
|
|
|
stk->next = so->stack->next;
|
|
so->stack->next = stk;
|
|
|
|
}
|
|
|
|
if (ScanDirectionIsBackward(dir))
|
|
n = OffsetNumberPrev(n);
|
|
else
|
|
n = OffsetNumberNext(n);
|
|
}
|
|
}
|
|
|
|
return ntids;
|
|
}
|
|
|
|
/*
|
|
* gistindex_keytest() -- does this index tuple satisfy the scan key(s)?
|
|
*
|
|
* We must decompress the key in the IndexTuple before passing it to the
|
|
* sk_func (and we have previously overwritten the sk_func to use the
|
|
* user-defined Consistent method, so we actually are invoking that).
|
|
*
|
|
* Note that this function is always invoked in a short-lived memory context,
|
|
* so we don't need to worry about cleaning up allocated memory, either here
|
|
* or in the implementation of any Consistent methods.
|
|
*/
|
|
static bool
|
|
gistindex_keytest(IndexTuple tuple,
|
|
IndexScanDesc scan,
|
|
OffsetNumber offset)
|
|
{
|
|
int keySize = scan->numberOfKeys;
|
|
ScanKey key = scan->keyData;
|
|
Relation r = scan->indexRelation;
|
|
GISTScanOpaque so;
|
|
Page p;
|
|
GISTSTATE *giststate;
|
|
|
|
so = (GISTScanOpaque) scan->opaque;
|
|
giststate = so->giststate;
|
|
p = BufferGetPage(so->curbuf);
|
|
|
|
IncrIndexProcessed();
|
|
|
|
/*
|
|
* Tuple doesn't restore after crash recovery because of inclomplete
|
|
* insert
|
|
*/
|
|
if (!GistPageIsLeaf(p) && GistTupleIsInvalid(tuple))
|
|
return true;
|
|
|
|
while (keySize > 0)
|
|
{
|
|
Datum datum;
|
|
bool isNull;
|
|
Datum test;
|
|
GISTENTRY de;
|
|
|
|
datum = index_getattr(tuple,
|
|
key->sk_attno,
|
|
giststate->tupdesc,
|
|
&isNull);
|
|
/* is the index entry NULL? */
|
|
if (isNull)
|
|
{
|
|
/* XXX eventually should check if SK_ISNULL */
|
|
return false;
|
|
}
|
|
/* is the compared-to datum NULL? */
|
|
if (key->sk_flags & SK_ISNULL)
|
|
return false;
|
|
|
|
gistdentryinit(giststate, key->sk_attno - 1, &de,
|
|
datum, r, p, offset,
|
|
IndexTupleSize(tuple) - sizeof(IndexTupleData),
|
|
FALSE, isNull);
|
|
|
|
/*
|
|
* Call the Consistent function to evaluate the test. The arguments
|
|
* are the index datum (as a GISTENTRY*), the comparison datum, and
|
|
* the comparison operator's strategy number and subtype from pg_amop.
|
|
*
|
|
* (Presently there's no need to pass the subtype since it'll always
|
|
* be zero, but might as well pass it for possible future use.)
|
|
*/
|
|
test = FunctionCall4(&key->sk_func,
|
|
PointerGetDatum(&de),
|
|
key->sk_argument,
|
|
Int32GetDatum(key->sk_strategy),
|
|
ObjectIdGetDatum(key->sk_subtype));
|
|
|
|
if (!DatumGetBool(test))
|
|
return false;
|
|
|
|
keySize--;
|
|
key++;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Return the offset of the first index entry that is consistent with
|
|
* the search key after offset 'n' in the current page. If there are
|
|
* no more consistent entries, return InvalidOffsetNumber.
|
|
* Page should be locked....
|
|
*/
|
|
static OffsetNumber
|
|
gistfindnext(IndexScanDesc scan, OffsetNumber n, ScanDirection dir)
|
|
{
|
|
OffsetNumber maxoff;
|
|
IndexTuple it;
|
|
GISTScanOpaque so;
|
|
MemoryContext oldcxt;
|
|
Page p;
|
|
|
|
so = (GISTScanOpaque) scan->opaque;
|
|
p = BufferGetPage(so->curbuf);
|
|
maxoff = PageGetMaxOffsetNumber(p);
|
|
|
|
/*
|
|
* Make sure we're in a short-lived memory context when we invoke a
|
|
* user-supplied GiST method in gistindex_keytest(), so we don't leak
|
|
* memory
|
|
*/
|
|
oldcxt = MemoryContextSwitchTo(so->tempCxt);
|
|
|
|
/*
|
|
* If we modified the index during the scan, we may have a pointer to a
|
|
* ghost tuple, before the scan. If this is the case, back up one.
|
|
*/
|
|
if (so->flags & GS_CURBEFORE)
|
|
{
|
|
so->flags &= ~GS_CURBEFORE;
|
|
n = OffsetNumberPrev(n);
|
|
}
|
|
|
|
while (n >= FirstOffsetNumber && n <= maxoff)
|
|
{
|
|
it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n));
|
|
if (gistindex_keytest(it, scan, n))
|
|
break;
|
|
|
|
if (ScanDirectionIsBackward(dir))
|
|
n = OffsetNumberPrev(n);
|
|
else
|
|
n = OffsetNumberNext(n);
|
|
}
|
|
|
|
MemoryContextSwitchTo(oldcxt);
|
|
MemoryContextReset(so->tempCxt);
|
|
|
|
/*
|
|
* If we found a matching entry, return its offset; otherwise return
|
|
* InvalidOffsetNumber to inform the caller to go to the next page.
|
|
*/
|
|
if (n >= FirstOffsetNumber && n <= maxoff)
|
|
return n;
|
|
else
|
|
return InvalidOffsetNumber;
|
|
}
|