Tom Lane c8076f09d2 Restructure index AM interface for index building and index tuple deletion,
per previous discussion on pghackers.  Most of the duplicate code in
different AMs' ambuild routines has been moved out to a common routine
in index.c; this means that all index types now do the right things about
inserting recently-dead tuples, etc.  (I also removed support for EXTEND
INDEX in the ambuild routines, since that's about to go away anyway, and
it cluttered the code a lot.)  The retail indextuple deletion routines have
been replaced by a "bulk delete" routine in which the indexscan is inside
the access method.  I haven't pushed this change as far as it should go yet,
but it should allow considerable simplification of the internal bookkeeping
for deletions.  Also, add flag columns to pg_am to eliminate various
hardcoded tests on AM OIDs, and remove unused pg_am columns.

Fix rtree and gist index types to not attempt to store NULLs; before this,
gist usually crashed, while rtree managed not to crash but computed wacko
bounding boxes for NULL entries (which might have had something to do with
the performance problems we've heard about occasionally).

Add AtEOXact routines to hash, rtree, and gist, all of which have static
state that needs to be reset after an error.  We discovered this need long
ago for btree, but missed the other guys.

Oh, one more thing: concurrent VACUUM is now the default.
2001-07-15 22:48:19 +00:00

440 lines
10 KiB
C

/*-------------------------------------------------------------------------
*
* rtscan.c
* routines to manage scans on index relations
*
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtscan.c,v 1.38 2001/07/15 22:48:16 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/genam.h"
#include "access/rtree.h"
/* routines defined and used here */
static void rtregscan(IndexScanDesc s);
static void rtdropscan(IndexScanDesc s);
static void rtadjone(IndexScanDesc s, int op, BlockNumber blkno,
OffsetNumber offnum);
static void adjuststack(RTSTACK *stk, BlockNumber blkno,
OffsetNumber offnum);
static void adjustiptr(IndexScanDesc s, ItemPointer iptr,
int op, BlockNumber blkno, OffsetNumber offnum);
/*
* Whenever we start an rtree scan in a backend, we register it in private
* space. Then if the rtree index gets updated, we check all registered
* scans and adjust them if the tuple they point at got moved by the
* update. We only need to do this in private space, because when we update
* an rtree we have a write lock on the tree, so no other process can have
* any locks at all on it. A single transaction can have write and read
* locks on the same object, so that's why we need to handle this case.
*/
typedef struct RTScanListData
{
IndexScanDesc rtsl_scan;
struct RTScanListData *rtsl_next;
} RTScanListData;
typedef RTScanListData *RTScanList;
/* pointer to list of local scans on rtrees */
static RTScanList RTScans = (RTScanList) NULL;
Datum
rtbeginscan(PG_FUNCTION_ARGS)
{
Relation r = (Relation) PG_GETARG_POINTER(0);
bool fromEnd = PG_GETARG_BOOL(1);
uint16 nkeys = PG_GETARG_UINT16(2);
ScanKey key = (ScanKey) PG_GETARG_POINTER(3);
IndexScanDesc s;
s = RelationGetIndexScan(r, fromEnd, nkeys, key);
rtregscan(s);
PG_RETURN_POINTER(s);
}
Datum
rtrescan(PG_FUNCTION_ARGS)
{
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
bool fromEnd = PG_GETARG_BOOL(1);
ScanKey key = (ScanKey) PG_GETARG_POINTER(2);
RTreeScanOpaque p;
RegProcedure internal_proc;
int i;
/*
* Clear all the pointers.
*/
ItemPointerSetInvalid(&s->currentItemData);
ItemPointerSetInvalid(&s->currentMarkData);
/*
* Set flags.
*/
if (RelationGetNumberOfBlocks(s->relation) == 0)
s->flags = ScanUnmarked;
else if (fromEnd)
s->flags = ScanUnmarked | ScanUncheckedPrevious;
else
s->flags = ScanUnmarked | ScanUncheckedNext;
s->scanFromEnd = fromEnd;
if (s->numberOfKeys > 0)
{
memmove(s->keyData,
key,
s->numberOfKeys * sizeof(ScanKeyData));
}
p = (RTreeScanOpaque) s->opaque;
if (p != (RTreeScanOpaque) NULL)
{
freestack(p->s_stack);
freestack(p->s_markstk);
p->s_stack = p->s_markstk = (RTSTACK *) NULL;
p->s_flags = 0x0;
for (i = 0; i < s->numberOfKeys; i++)
p->s_internalKey[i].sk_argument = s->keyData[i].sk_argument;
}
else
{
/* initialize opaque data */
p = (RTreeScanOpaque) palloc(sizeof(RTreeScanOpaqueData));
p->s_stack = p->s_markstk = (RTSTACK *) NULL;
p->s_internalNKey = s->numberOfKeys;
p->s_flags = 0x0;
s->opaque = p;
if (s->numberOfKeys > 0)
{
p->s_internalKey = (ScanKey) palloc(sizeof(ScanKeyData) * s->numberOfKeys);
/*
* Scans on internal pages use different operators than they
* do on leaf pages. For example, if the user wants all boxes
* that exactly match (x1,y1,x2,y2), then on internal pages we
* need to find all boxes that contain (x1,y1,x2,y2).
*/
for (i = 0; i < s->numberOfKeys; i++)
{
p->s_internalKey[i].sk_argument = s->keyData[i].sk_argument;
internal_proc = RTMapOperator(s->relation,
s->keyData[i].sk_attno,
s->keyData[i].sk_procedure);
ScanKeyEntryInitialize(&(p->s_internalKey[i]),
s->keyData[i].sk_flags,
s->keyData[i].sk_attno,
internal_proc,
s->keyData[i].sk_argument);
}
}
}
PG_RETURN_VOID();
}
Datum
rtmarkpos(PG_FUNCTION_ARGS)
{
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
RTreeScanOpaque p;
RTSTACK *o,
*n,
*tmp;
s->currentMarkData = s->currentItemData;
p = (RTreeScanOpaque) s->opaque;
if (p->s_flags & RTS_CURBEFORE)
p->s_flags |= RTS_MRKBEFORE;
else
p->s_flags &= ~RTS_MRKBEFORE;
o = (RTSTACK *) NULL;
n = p->s_stack;
/* copy the parent stack from the current item data */
while (n != (RTSTACK *) NULL)
{
tmp = (RTSTACK *) palloc(sizeof(RTSTACK));
tmp->rts_child = n->rts_child;
tmp->rts_blk = n->rts_blk;
tmp->rts_parent = o;
o = tmp;
n = n->rts_parent;
}
freestack(p->s_markstk);
p->s_markstk = o;
PG_RETURN_VOID();
}
Datum
rtrestrpos(PG_FUNCTION_ARGS)
{
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
RTreeScanOpaque p;
RTSTACK *o,
*n,
*tmp;
s->currentItemData = s->currentMarkData;
p = (RTreeScanOpaque) s->opaque;
if (p->s_flags & RTS_MRKBEFORE)
p->s_flags |= RTS_CURBEFORE;
else
p->s_flags &= ~RTS_CURBEFORE;
o = (RTSTACK *) NULL;
n = p->s_markstk;
/* copy the parent stack from the current item data */
while (n != (RTSTACK *) NULL)
{
tmp = (RTSTACK *) palloc(sizeof(RTSTACK));
tmp->rts_child = n->rts_child;
tmp->rts_blk = n->rts_blk;
tmp->rts_parent = o;
o = tmp;
n = n->rts_parent;
}
freestack(p->s_stack);
p->s_stack = o;
PG_RETURN_VOID();
}
Datum
rtendscan(PG_FUNCTION_ARGS)
{
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
RTreeScanOpaque p;
p = (RTreeScanOpaque) s->opaque;
if (p != (RTreeScanOpaque) NULL)
{
freestack(p->s_stack);
freestack(p->s_markstk);
pfree(s->opaque);
}
rtdropscan(s);
/* XXX don't unset read lock -- two-phase locking */
PG_RETURN_VOID();
}
static void
rtregscan(IndexScanDesc s)
{
RTScanList l;
l = (RTScanList) palloc(sizeof(RTScanListData));
l->rtsl_scan = s;
l->rtsl_next = RTScans;
RTScans = l;
}
static void
rtdropscan(IndexScanDesc s)
{
RTScanList l;
RTScanList prev;
prev = (RTScanList) NULL;
for (l = RTScans;
l != (RTScanList) NULL && l->rtsl_scan != s;
l = l->rtsl_next)
prev = l;
if (l == (RTScanList) NULL)
elog(ERROR, "rtree scan list corrupted -- cannot find 0x%p", (void *) s);
if (prev == (RTScanList) NULL)
RTScans = l->rtsl_next;
else
prev->rtsl_next = l->rtsl_next;
pfree(l);
}
/*
* AtEOXact_rtree() --- clean up rtree subsystem at xact abort or commit.
*
* This is here because it needs to touch this module's static var RTScans.
*/
void
AtEOXact_rtree(void)
{
/*
* Note: these actions should only be necessary during xact abort; but
* they can't hurt during a commit.
*/
/*
* Reset the active-scans list to empty. We do not need to free the
* list elements, because they're all palloc()'d, so they'll go away
* at end of transaction anyway.
*/
RTScans = NULL;
}
void
rtadjscans(Relation r, int op, BlockNumber blkno, OffsetNumber offnum)
{
RTScanList l;
Oid relid;
relid = RelationGetRelid(r);
for (l = RTScans; l != (RTScanList) NULL; l = l->rtsl_next)
{
if (RelationGetRelid(l->rtsl_scan->relation) == relid)
rtadjone(l->rtsl_scan, op, blkno, offnum);
}
}
/*
* rtadjone() -- adjust one scan for update.
*
* By here, the scan passed in is on a modified relation. Op tells
* us what the modification is, and blkno and offind tell us what
* block and offset index were affected. This routine checks the
* current and marked positions, and the current and marked stacks,
* to see if any stored location needs to be changed because of the
* update. If so, we make the change here.
*/
static void
rtadjone(IndexScanDesc s,
int op,
BlockNumber blkno,
OffsetNumber offnum)
{
RTreeScanOpaque so;
adjustiptr(s, &(s->currentItemData), op, blkno, offnum);
adjustiptr(s, &(s->currentMarkData), op, blkno, offnum);
so = (RTreeScanOpaque) s->opaque;
if (op == RTOP_SPLIT)
{
adjuststack(so->s_stack, blkno, offnum);
adjuststack(so->s_markstk, blkno, offnum);
}
}
/*
* adjustiptr() -- adjust current and marked item pointers in the scan
*
* Depending on the type of update and the place it happened, we
* need to do nothing, to back up one record, or to start over on
* the same page.
*/
static void
adjustiptr(IndexScanDesc s,
ItemPointer iptr,
int op,
BlockNumber blkno,
OffsetNumber offnum)
{
OffsetNumber curoff;
RTreeScanOpaque so;
if (ItemPointerIsValid(iptr))
{
if (ItemPointerGetBlockNumber(iptr) == blkno)
{
curoff = ItemPointerGetOffsetNumber(iptr);
so = (RTreeScanOpaque) s->opaque;
switch (op)
{
case RTOP_DEL:
/* back up one if we need to */
if (curoff >= offnum)
{
if (curoff > FirstOffsetNumber)
{
/* just adjust the item pointer */
ItemPointerSet(iptr, blkno, OffsetNumberPrev(curoff));
}
else
{
/*
* remember that we're before the current
* tuple
*/
ItemPointerSet(iptr, blkno, FirstOffsetNumber);
if (iptr == &(s->currentItemData))
so->s_flags |= RTS_CURBEFORE;
else
so->s_flags |= RTS_MRKBEFORE;
}
}
break;
case RTOP_SPLIT:
/* back to start of page on split */
ItemPointerSet(iptr, blkno, FirstOffsetNumber);
if (iptr == &(s->currentItemData))
so->s_flags &= ~RTS_CURBEFORE;
else
so->s_flags &= ~RTS_MRKBEFORE;
break;
default:
elog(ERROR, "Bad operation in rtree scan adjust: %d", op);
}
}
}
}
/*
* adjuststack() -- adjust the supplied stack for a split on a page in
* the index we're scanning.
*
* If a page on our parent stack has split, we need to back up to the
* beginning of the page and rescan it. The reason for this is that
* the split algorithm for rtrees doesn't order tuples in any useful
* way on a single page. This means on that a split, we may wind up
* looking at some heap tuples more than once. This is handled in the
* access method update code for heaps; if we've modified the tuple we
* are looking at already in this transaction, we ignore the update
* request.
*/
/*ARGSUSED*/
static void
adjuststack(RTSTACK *stk,
BlockNumber blkno,
OffsetNumber offnum)
{
while (stk != (RTSTACK *) NULL)
{
if (stk->rts_blk == blkno)
stk->rts_child = FirstOffsetNumber;
stk = stk->rts_parent;
}
}