
pointer type when it is not necessary to do so. For future reference, casting NULL to a pointer type is only necessary when (a) invoking a function AND either (b) the function has no prototype OR (c) the function is a varargs function.
1365 lines
36 KiB
C
1365 lines
36 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* rtree.c
|
|
* interface routines for the postgres rtree indexed access method.
|
|
*
|
|
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* $PostgreSQL: pgsql/src/backend/access/rtree/rtree.c,v 1.82 2004/01/07 18:56:24 neilc Exp $
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "access/genam.h"
|
|
#include "access/heapam.h"
|
|
#include "access/rtree.h"
|
|
#include "access/xlogutils.h"
|
|
#include "catalog/index.h"
|
|
#include "executor/executor.h"
|
|
#include "miscadmin.h"
|
|
|
|
|
|
/*
|
|
* XXX We assume that all datatypes indexable in rtrees are pass-by-reference.
|
|
* To fix this, you'd need to improve the IndexTupleGetDatum() macro, and
|
|
* do something with the various datum-pfreeing code. However, it's not that
|
|
* unreasonable an assumption in practice.
|
|
*/
|
|
#define IndexTupleGetDatum(itup) \
|
|
PointerGetDatum(((char *) (itup)) + sizeof(IndexTupleData))
|
|
|
|
/*
|
|
* Space-allocation macros. Note we count the item's line pointer in its size.
|
|
*/
|
|
#define RTPageAvailSpace \
|
|
(BLCKSZ - (sizeof(PageHeaderData) - sizeof(ItemIdData)) \
|
|
- MAXALIGN(sizeof(RTreePageOpaqueData)))
|
|
#define IndexTupleTotalSize(itup) \
|
|
(MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData))
|
|
#define IndexTupleAttSize(itup) \
|
|
(IndexTupleSize(itup) - sizeof(IndexTupleData))
|
|
|
|
/* results of rtpicksplit() */
|
|
typedef struct SPLITVEC
|
|
{
|
|
OffsetNumber *spl_left;
|
|
int spl_nleft;
|
|
Datum spl_ldatum;
|
|
OffsetNumber *spl_right;
|
|
int spl_nright;
|
|
Datum spl_rdatum;
|
|
} SPLITVEC;
|
|
|
|
/* for sorting tuples by cost, for picking split */
|
|
typedef struct SPLITCOST
|
|
{
|
|
OffsetNumber offset_number;
|
|
float cost_differential;
|
|
bool choose_left;
|
|
} SPLITCOST;
|
|
|
|
typedef struct RTSTATE
|
|
{
|
|
FmgrInfo unionFn; /* union function */
|
|
FmgrInfo sizeFn; /* size function */
|
|
FmgrInfo interFn; /* intersection function */
|
|
} RTSTATE;
|
|
|
|
/* Working state for rtbuild and its callback */
|
|
typedef struct
|
|
{
|
|
RTSTATE rtState;
|
|
double indtuples;
|
|
} RTBuildState;
|
|
|
|
/* non-export function prototypes */
|
|
static void rtbuildCallback(Relation index,
|
|
HeapTuple htup,
|
|
Datum *attdata,
|
|
char *nulls,
|
|
bool tupleIsAlive,
|
|
void *state);
|
|
static InsertIndexResult rtdoinsert(Relation r, IndexTuple itup,
|
|
RTSTATE *rtstate);
|
|
static void rttighten(Relation r, RTSTACK *stk, Datum datum, int att_size,
|
|
RTSTATE *rtstate);
|
|
static InsertIndexResult rtdosplit(Relation r, Buffer buffer, RTSTACK *stack,
|
|
IndexTuple itup, RTSTATE *rtstate);
|
|
static void rtintinsert(Relation r, RTSTACK *stk, IndexTuple ltup,
|
|
IndexTuple rtup, RTSTATE *rtstate);
|
|
static void rtnewroot(Relation r, IndexTuple lt, IndexTuple rt);
|
|
static void rtpicksplit(Relation r, Page page, SPLITVEC *v, IndexTuple itup,
|
|
RTSTATE *rtstate);
|
|
static void RTInitBuffer(Buffer b, uint32 f);
|
|
static OffsetNumber choose(Relation r, Page p, IndexTuple it,
|
|
RTSTATE *rtstate);
|
|
static int nospace(Page p, IndexTuple it);
|
|
static void initRtstate(RTSTATE *rtstate, Relation index);
|
|
static int qsort_comp_splitcost(const void *a, const void *b);
|
|
|
|
|
|
/*
|
|
* routine to build an index. Basically calls insert over and over
|
|
*/
|
|
Datum
|
|
rtbuild(PG_FUNCTION_ARGS)
|
|
{
|
|
Relation heap = (Relation) PG_GETARG_POINTER(0);
|
|
Relation index = (Relation) PG_GETARG_POINTER(1);
|
|
IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
|
|
double reltuples;
|
|
RTBuildState buildstate;
|
|
Buffer buffer;
|
|
|
|
/* no locking is needed */
|
|
|
|
initRtstate(&buildstate.rtState, index);
|
|
|
|
/*
|
|
* We expect to be called exactly once for any index relation. If
|
|
* that's not the case, big trouble's what we have.
|
|
*/
|
|
if (RelationGetNumberOfBlocks(index) != 0)
|
|
elog(ERROR, "index \"%s\" already contains data",
|
|
RelationGetRelationName(index));
|
|
|
|
/* initialize the root page */
|
|
buffer = ReadBuffer(index, P_NEW);
|
|
RTInitBuffer(buffer, F_LEAF);
|
|
WriteBuffer(buffer);
|
|
|
|
/* build the index */
|
|
buildstate.indtuples = 0;
|
|
|
|
/* do the heap scan */
|
|
reltuples = IndexBuildHeapScan(heap, index, indexInfo,
|
|
rtbuildCallback, (void *) &buildstate);
|
|
|
|
/* okay, all heap tuples are indexed */
|
|
|
|
/*
|
|
* Since we just counted the tuples in the heap, we update its stats
|
|
* in pg_class to guarantee that the planner takes advantage of the
|
|
* index we just created. But, only update statistics during normal
|
|
* index definitions, not for indices on system catalogs created
|
|
* during bootstrap processing. We must close the relations before
|
|
* updating statistics to guarantee that the relcache entries are
|
|
* flushed when we increment the command counter in UpdateStats(). But
|
|
* we do not release any locks on the relations; those will be held
|
|
* until end of transaction.
|
|
*/
|
|
if (IsNormalProcessingMode())
|
|
{
|
|
Oid hrelid = RelationGetRelid(heap);
|
|
Oid irelid = RelationGetRelid(index);
|
|
|
|
heap_close(heap, NoLock);
|
|
index_close(index);
|
|
UpdateStats(hrelid, reltuples);
|
|
UpdateStats(irelid, buildstate.indtuples);
|
|
}
|
|
|
|
PG_RETURN_VOID();
|
|
}
|
|
|
|
/*
|
|
* Per-tuple callback from IndexBuildHeapScan
|
|
*/
|
|
static void
|
|
rtbuildCallback(Relation index,
|
|
HeapTuple htup,
|
|
Datum *attdata,
|
|
char *nulls,
|
|
bool tupleIsAlive,
|
|
void *state)
|
|
{
|
|
RTBuildState *buildstate = (RTBuildState *) state;
|
|
IndexTuple itup;
|
|
InsertIndexResult res;
|
|
|
|
/* form an index tuple and point it at the heap tuple */
|
|
itup = index_formtuple(RelationGetDescr(index), attdata, nulls);
|
|
itup->t_tid = htup->t_self;
|
|
|
|
/* rtree indexes don't index nulls, see notes in rtinsert */
|
|
if (IndexTupleHasNulls(itup))
|
|
{
|
|
pfree(itup);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Since we already have the index relation locked, we call rtdoinsert
|
|
* directly. Normal access method calls dispatch through rtinsert,
|
|
* which locks the relation for write. This is the right thing to do
|
|
* if you're inserting single tups, but not when you're initializing
|
|
* the whole index at once.
|
|
*/
|
|
res = rtdoinsert(index, itup, &buildstate->rtState);
|
|
|
|
if (res)
|
|
pfree(res);
|
|
|
|
buildstate->indtuples += 1;
|
|
|
|
pfree(itup);
|
|
}
|
|
|
|
/*
|
|
* rtinsert -- wrapper for rtree tuple insertion.
|
|
*
|
|
* This is the public interface routine for tuple insertion in rtrees.
|
|
* It doesn't do any work; just locks the relation and passes the buck.
|
|
*/
|
|
Datum
|
|
rtinsert(PG_FUNCTION_ARGS)
|
|
{
|
|
Relation r = (Relation) PG_GETARG_POINTER(0);
|
|
Datum *datum = (Datum *) PG_GETARG_POINTER(1);
|
|
char *nulls = (char *) PG_GETARG_POINTER(2);
|
|
ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
|
|
|
|
#ifdef NOT_USED
|
|
Relation heapRel = (Relation) PG_GETARG_POINTER(4);
|
|
bool checkUnique = PG_GETARG_BOOL(5);
|
|
#endif
|
|
InsertIndexResult res;
|
|
IndexTuple itup;
|
|
RTSTATE rtState;
|
|
|
|
/* generate an index tuple */
|
|
itup = index_formtuple(RelationGetDescr(r), datum, nulls);
|
|
itup->t_tid = *ht_ctid;
|
|
|
|
/*
|
|
* Currently, rtrees do not support indexing NULLs; considerable
|
|
* infrastructure work would have to be done to do anything reasonable
|
|
* with a NULL.
|
|
*/
|
|
if (IndexTupleHasNulls(itup))
|
|
{
|
|
pfree(itup);
|
|
PG_RETURN_POINTER(NULL);
|
|
}
|
|
|
|
initRtstate(&rtState, r);
|
|
|
|
/*
|
|
* Since rtree is not marked "amconcurrent" in pg_am, caller should
|
|
* have acquired exclusive lock on index relation. We need no locking
|
|
* here.
|
|
*/
|
|
|
|
res = rtdoinsert(r, itup, &rtState);
|
|
|
|
PG_RETURN_POINTER(res);
|
|
}
|
|
|
|
static InsertIndexResult
|
|
rtdoinsert(Relation r, IndexTuple itup, RTSTATE *rtstate)
|
|
{
|
|
Page page;
|
|
Buffer buffer;
|
|
BlockNumber blk;
|
|
IndexTuple which;
|
|
OffsetNumber l;
|
|
RTSTACK *stack;
|
|
InsertIndexResult res;
|
|
RTreePageOpaque opaque;
|
|
Datum datum;
|
|
|
|
blk = P_ROOT;
|
|
buffer = InvalidBuffer;
|
|
stack = NULL;
|
|
|
|
do
|
|
{
|
|
/* let go of current buffer before getting next */
|
|
if (buffer != InvalidBuffer)
|
|
ReleaseBuffer(buffer);
|
|
|
|
/* get next buffer */
|
|
buffer = ReadBuffer(r, blk);
|
|
page = (Page) BufferGetPage(buffer);
|
|
|
|
opaque = (RTreePageOpaque) PageGetSpecialPointer(page);
|
|
if (!(opaque->flags & F_LEAF))
|
|
{
|
|
RTSTACK *n;
|
|
ItemId iid;
|
|
|
|
n = (RTSTACK *) palloc(sizeof(RTSTACK));
|
|
n->rts_parent = stack;
|
|
n->rts_blk = blk;
|
|
n->rts_child = choose(r, page, itup, rtstate);
|
|
stack = n;
|
|
|
|
iid = PageGetItemId(page, n->rts_child);
|
|
which = (IndexTuple) PageGetItem(page, iid);
|
|
blk = ItemPointerGetBlockNumber(&(which->t_tid));
|
|
}
|
|
} while (!(opaque->flags & F_LEAF));
|
|
|
|
if (nospace(page, itup))
|
|
{
|
|
/* need to do a split */
|
|
res = rtdosplit(r, buffer, stack, itup, rtstate);
|
|
freestack(stack);
|
|
WriteBuffer(buffer); /* don't forget to release buffer! */
|
|
return res;
|
|
}
|
|
|
|
/* add the item and write the buffer */
|
|
if (PageIsEmpty(page))
|
|
{
|
|
l = PageAddItem(page, (Item) itup, IndexTupleSize(itup),
|
|
FirstOffsetNumber,
|
|
LP_USED);
|
|
}
|
|
else
|
|
{
|
|
l = PageAddItem(page, (Item) itup, IndexTupleSize(itup),
|
|
OffsetNumberNext(PageGetMaxOffsetNumber(page)),
|
|
LP_USED);
|
|
}
|
|
if (l == InvalidOffsetNumber)
|
|
elog(ERROR, "failed to add index item to \"%s\"",
|
|
RelationGetRelationName(r));
|
|
|
|
WriteBuffer(buffer);
|
|
|
|
datum = IndexTupleGetDatum(itup);
|
|
|
|
/* now expand the page boundary in the parent to include the new child */
|
|
rttighten(r, stack, datum, IndexTupleAttSize(itup), rtstate);
|
|
freestack(stack);
|
|
|
|
/* build and return an InsertIndexResult for this insertion */
|
|
res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData));
|
|
ItemPointerSet(&(res->pointerData), blk, l);
|
|
|
|
return res;
|
|
}
|
|
|
|
static void
|
|
rttighten(Relation r,
|
|
RTSTACK *stk,
|
|
Datum datum,
|
|
int att_size,
|
|
RTSTATE *rtstate)
|
|
{
|
|
Datum oldud;
|
|
Datum tdatum;
|
|
Page p;
|
|
float old_size,
|
|
newd_size;
|
|
Buffer b;
|
|
|
|
if (stk == NULL)
|
|
return;
|
|
|
|
b = ReadBuffer(r, stk->rts_blk);
|
|
p = BufferGetPage(b);
|
|
|
|
oldud = IndexTupleGetDatum(PageGetItem(p,
|
|
PageGetItemId(p, stk->rts_child)));
|
|
|
|
FunctionCall2(&rtstate->sizeFn, oldud,
|
|
PointerGetDatum(&old_size));
|
|
|
|
datum = FunctionCall2(&rtstate->unionFn, oldud, datum);
|
|
|
|
FunctionCall2(&rtstate->sizeFn, datum,
|
|
PointerGetDatum(&newd_size));
|
|
|
|
/*
|
|
* If newd_size == 0 we have degenerate rectangles, so we don't know
|
|
* if there was any change, so we have to assume there was.
|
|
*/
|
|
if ((newd_size == 0) || (newd_size != old_size))
|
|
{
|
|
TupleDesc td = RelationGetDescr(r);
|
|
|
|
if (td->attrs[0]->attlen < 0)
|
|
{
|
|
/*
|
|
* This is an internal page, so 'oldud' had better be a union
|
|
* (constant-length) key, too. (See comment below.)
|
|
*/
|
|
Assert(VARSIZE(DatumGetPointer(datum)) ==
|
|
VARSIZE(DatumGetPointer(oldud)));
|
|
memmove(DatumGetPointer(oldud), DatumGetPointer(datum),
|
|
VARSIZE(DatumGetPointer(datum)));
|
|
}
|
|
else
|
|
{
|
|
memmove(DatumGetPointer(oldud), DatumGetPointer(datum),
|
|
att_size);
|
|
}
|
|
WriteBuffer(b);
|
|
|
|
/*
|
|
* The user may be defining an index on variable-sized data (like
|
|
* polygons). If so, we need to get a constant-sized datum for
|
|
* insertion on the internal page. We do this by calling the
|
|
* union proc, which is required to return a rectangle.
|
|
*/
|
|
tdatum = FunctionCall2(&rtstate->unionFn, datum, datum);
|
|
|
|
rttighten(r, stk->rts_parent, tdatum, att_size, rtstate);
|
|
pfree(DatumGetPointer(tdatum));
|
|
}
|
|
else
|
|
ReleaseBuffer(b);
|
|
pfree(DatumGetPointer(datum));
|
|
}
|
|
|
|
/*
|
|
* rtdosplit -- split a page in the tree.
|
|
*
|
|
* rtpicksplit does the interesting work of choosing the split.
|
|
* This routine just does the bit-pushing.
|
|
*/
|
|
static InsertIndexResult
|
|
rtdosplit(Relation r,
|
|
Buffer buffer,
|
|
RTSTACK *stack,
|
|
IndexTuple itup,
|
|
RTSTATE *rtstate)
|
|
{
|
|
Page p;
|
|
Buffer leftbuf,
|
|
rightbuf;
|
|
Page left,
|
|
right;
|
|
ItemId itemid;
|
|
IndexTuple item;
|
|
IndexTuple ltup,
|
|
rtup;
|
|
OffsetNumber maxoff;
|
|
OffsetNumber i;
|
|
OffsetNumber leftoff,
|
|
rightoff;
|
|
BlockNumber lbknum,
|
|
rbknum;
|
|
BlockNumber bufblock;
|
|
RTreePageOpaque opaque;
|
|
int blank;
|
|
InsertIndexResult res;
|
|
char *isnull;
|
|
SPLITVEC v;
|
|
OffsetNumber *spl_left,
|
|
*spl_right;
|
|
TupleDesc tupDesc;
|
|
int n;
|
|
OffsetNumber newitemoff;
|
|
|
|
p = (Page) BufferGetPage(buffer);
|
|
opaque = (RTreePageOpaque) PageGetSpecialPointer(p);
|
|
|
|
rtpicksplit(r, p, &v, itup, rtstate);
|
|
|
|
/*
|
|
* The root of the tree is the first block in the relation. If we're
|
|
* about to split the root, we need to do some hocus-pocus to enforce
|
|
* this guarantee.
|
|
*/
|
|
|
|
if (BufferGetBlockNumber(buffer) == P_ROOT)
|
|
{
|
|
leftbuf = ReadBuffer(r, P_NEW);
|
|
RTInitBuffer(leftbuf, opaque->flags);
|
|
lbknum = BufferGetBlockNumber(leftbuf);
|
|
left = (Page) BufferGetPage(leftbuf);
|
|
}
|
|
else
|
|
{
|
|
leftbuf = buffer;
|
|
IncrBufferRefCount(buffer);
|
|
lbknum = BufferGetBlockNumber(buffer);
|
|
left = (Page) PageGetTempPage(p, sizeof(RTreePageOpaqueData));
|
|
}
|
|
|
|
rightbuf = ReadBuffer(r, P_NEW);
|
|
RTInitBuffer(rightbuf, opaque->flags);
|
|
rbknum = BufferGetBlockNumber(rightbuf);
|
|
right = (Page) BufferGetPage(rightbuf);
|
|
|
|
spl_left = v.spl_left;
|
|
spl_right = v.spl_right;
|
|
leftoff = rightoff = FirstOffsetNumber;
|
|
maxoff = PageGetMaxOffsetNumber(p);
|
|
newitemoff = OffsetNumberNext(maxoff);
|
|
|
|
/* build an InsertIndexResult for this insertion */
|
|
res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData));
|
|
|
|
/*
|
|
* spl_left contains a list of the offset numbers of the tuples that
|
|
* will go to the left page. For each offset number, get the tuple
|
|
* item, then add the item to the left page. Similarly for the right
|
|
* side.
|
|
*/
|
|
|
|
/* fill left node */
|
|
for (n = 0; n < v.spl_nleft; n++)
|
|
{
|
|
i = *spl_left;
|
|
if (i == newitemoff)
|
|
item = itup;
|
|
else
|
|
{
|
|
itemid = PageGetItemId(p, i);
|
|
item = (IndexTuple) PageGetItem(p, itemid);
|
|
}
|
|
|
|
if (PageAddItem(left, (Item) item, IndexTupleSize(item),
|
|
leftoff, LP_USED) == InvalidOffsetNumber)
|
|
elog(ERROR, "failed to add index item to \"%s\"",
|
|
RelationGetRelationName(r));
|
|
leftoff = OffsetNumberNext(leftoff);
|
|
|
|
if (i == newitemoff)
|
|
ItemPointerSet(&(res->pointerData), lbknum, leftoff);
|
|
|
|
spl_left++; /* advance in left split vector */
|
|
}
|
|
|
|
/* fill right node */
|
|
for (n = 0; n < v.spl_nright; n++)
|
|
{
|
|
i = *spl_right;
|
|
if (i == newitemoff)
|
|
item = itup;
|
|
else
|
|
{
|
|
itemid = PageGetItemId(p, i);
|
|
item = (IndexTuple) PageGetItem(p, itemid);
|
|
}
|
|
|
|
if (PageAddItem(right, (Item) item, IndexTupleSize(item),
|
|
rightoff, LP_USED) == InvalidOffsetNumber)
|
|
elog(ERROR, "failed to add index item to \"%s\"",
|
|
RelationGetRelationName(r));
|
|
rightoff = OffsetNumberNext(rightoff);
|
|
|
|
if (i == newitemoff)
|
|
ItemPointerSet(&(res->pointerData), rbknum, rightoff);
|
|
|
|
spl_right++; /* advance in right split vector */
|
|
}
|
|
|
|
/* Make sure we consumed all of the split vectors, and release 'em */
|
|
Assert(*spl_left == InvalidOffsetNumber);
|
|
Assert(*spl_right == InvalidOffsetNumber);
|
|
pfree(v.spl_left);
|
|
pfree(v.spl_right);
|
|
|
|
if ((bufblock = BufferGetBlockNumber(buffer)) != P_ROOT)
|
|
PageRestoreTempPage(left, p);
|
|
WriteBuffer(leftbuf);
|
|
WriteBuffer(rightbuf);
|
|
|
|
/*
|
|
* Okay, the page is split. We have three things left to do:
|
|
*
|
|
* 1) Adjust any active scans on this index to cope with changes we
|
|
* introduced in its structure by splitting this page.
|
|
*
|
|
* 2) "Tighten" the bounding box of the pointer to the left page in the
|
|
* parent node in the tree, if any. Since we moved a bunch of stuff
|
|
* off the left page, we expect it to get smaller. This happens in
|
|
* the internal insertion routine.
|
|
*
|
|
* 3) Insert a pointer to the right page in the parent. This may cause
|
|
* the parent to split. If it does, we need to repeat steps one and
|
|
* two for each split node in the tree.
|
|
*/
|
|
|
|
/* adjust active scans */
|
|
rtadjscans(r, RTOP_SPLIT, bufblock, FirstOffsetNumber);
|
|
|
|
tupDesc = r->rd_att;
|
|
isnull = (char *) palloc(r->rd_rel->relnatts);
|
|
for (blank = 0; blank < r->rd_rel->relnatts; blank++)
|
|
isnull[blank] = ' ';
|
|
|
|
ltup = (IndexTuple) index_formtuple(tupDesc,
|
|
&(v.spl_ldatum), isnull);
|
|
rtup = (IndexTuple) index_formtuple(tupDesc,
|
|
&(v.spl_rdatum), isnull);
|
|
pfree(isnull);
|
|
|
|
/* set pointers to new child pages in the internal index tuples */
|
|
ItemPointerSet(&(ltup->t_tid), lbknum, 1);
|
|
ItemPointerSet(&(rtup->t_tid), rbknum, 1);
|
|
|
|
rtintinsert(r, stack, ltup, rtup, rtstate);
|
|
|
|
pfree(ltup);
|
|
pfree(rtup);
|
|
|
|
return res;
|
|
}
|
|
|
|
static void
|
|
rtintinsert(Relation r,
|
|
RTSTACK *stk,
|
|
IndexTuple ltup,
|
|
IndexTuple rtup,
|
|
RTSTATE *rtstate)
|
|
{
|
|
IndexTuple old;
|
|
Buffer b;
|
|
Page p;
|
|
Datum ldatum,
|
|
rdatum,
|
|
newdatum;
|
|
InsertIndexResult res;
|
|
|
|
if (stk == NULL)
|
|
{
|
|
rtnewroot(r, ltup, rtup);
|
|
return;
|
|
}
|
|
|
|
b = ReadBuffer(r, stk->rts_blk);
|
|
p = BufferGetPage(b);
|
|
old = (IndexTuple) PageGetItem(p, PageGetItemId(p, stk->rts_child));
|
|
|
|
/*
|
|
* This is a hack. Right now, we force rtree internal keys to be
|
|
* constant size. To fix this, need delete the old key and add both
|
|
* left and right for the two new pages. The insertion of left may
|
|
* force a split if the new left key is bigger than the old key.
|
|
*/
|
|
|
|
if (IndexTupleSize(old) != IndexTupleSize(ltup))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
errmsg("variable-length rtree keys are not supported")));
|
|
|
|
/* install pointer to left child */
|
|
memmove(old, ltup, IndexTupleSize(ltup));
|
|
|
|
if (nospace(p, rtup))
|
|
{
|
|
newdatum = IndexTupleGetDatum(ltup);
|
|
rttighten(r, stk->rts_parent, newdatum,
|
|
IndexTupleAttSize(ltup), rtstate);
|
|
res = rtdosplit(r, b, stk->rts_parent, rtup, rtstate);
|
|
WriteBuffer(b); /* don't forget to release buffer! -
|
|
* 01/31/94 */
|
|
pfree(res);
|
|
}
|
|
else
|
|
{
|
|
if (PageAddItem(p, (Item) rtup, IndexTupleSize(rtup),
|
|
PageGetMaxOffsetNumber(p),
|
|
LP_USED) == InvalidOffsetNumber)
|
|
elog(ERROR, "failed to add index item to \"%s\"",
|
|
RelationGetRelationName(r));
|
|
WriteBuffer(b);
|
|
ldatum = IndexTupleGetDatum(ltup);
|
|
rdatum = IndexTupleGetDatum(rtup);
|
|
newdatum = FunctionCall2(&rtstate->unionFn, ldatum, rdatum);
|
|
|
|
rttighten(r, stk->rts_parent, newdatum,
|
|
IndexTupleAttSize(rtup), rtstate);
|
|
|
|
pfree(DatumGetPointer(newdatum));
|
|
}
|
|
}
|
|
|
|
static void
|
|
rtnewroot(Relation r, IndexTuple lt, IndexTuple rt)
|
|
{
|
|
Buffer b;
|
|
Page p;
|
|
|
|
b = ReadBuffer(r, P_ROOT);
|
|
RTInitBuffer(b, 0);
|
|
p = BufferGetPage(b);
|
|
if (PageAddItem(p, (Item) lt, IndexTupleSize(lt),
|
|
FirstOffsetNumber,
|
|
LP_USED) == InvalidOffsetNumber)
|
|
elog(ERROR, "failed to add index item to \"%s\"",
|
|
RelationGetRelationName(r));
|
|
if (PageAddItem(p, (Item) rt, IndexTupleSize(rt),
|
|
OffsetNumberNext(FirstOffsetNumber),
|
|
LP_USED) == InvalidOffsetNumber)
|
|
elog(ERROR, "failed to add index item to \"%s\"",
|
|
RelationGetRelationName(r));
|
|
WriteBuffer(b);
|
|
}
|
|
|
|
/*
|
|
* Choose how to split an rtree page into two pages.
|
|
*
|
|
* We return two vectors of index item numbers, one for the items to be
|
|
* put on the left page, one for the items to be put on the right page.
|
|
* In addition, the item to be added (itup) is listed in the appropriate
|
|
* vector. It is represented by item number N+1 (N = # of items on page).
|
|
*
|
|
* Both vectors have a terminating sentinel value of InvalidOffsetNumber,
|
|
* but the sentinal value is no longer used, because the SPLITVEC
|
|
* vector also contains the length of each vector, and that information
|
|
* is now used to iterate over them in rtdosplit(). --kbb, 21 Sept 2001
|
|
*
|
|
* The bounding-box datums for the two new pages are also returned in *v.
|
|
*
|
|
* This is the quadratic-cost split algorithm Guttman describes in
|
|
* his paper. The reason we chose it is that you can implement this
|
|
* with less information about the data types on which you're operating.
|
|
*
|
|
* We must also deal with a consideration not found in Guttman's algorithm:
|
|
* variable-length data. In particular, the incoming item might be
|
|
* large enough that not just any split will work. In the worst case,
|
|
* our "split" may have to be the new item on one page and all the existing
|
|
* items on the other. Short of that, we have to take care that we do not
|
|
* make a split that leaves both pages too full for the new item.
|
|
*/
|
|
static void
|
|
rtpicksplit(Relation r,
|
|
Page page,
|
|
SPLITVEC *v,
|
|
IndexTuple itup,
|
|
RTSTATE *rtstate)
|
|
{
|
|
OffsetNumber maxoff,
|
|
newitemoff;
|
|
OffsetNumber i,
|
|
j;
|
|
IndexTuple item_1,
|
|
item_2;
|
|
Datum datum_alpha,
|
|
datum_beta;
|
|
Datum datum_l,
|
|
datum_r;
|
|
Datum union_d,
|
|
union_dl,
|
|
union_dr;
|
|
Datum inter_d;
|
|
bool firsttime;
|
|
float size_alpha,
|
|
size_beta,
|
|
size_union,
|
|
size_inter;
|
|
float size_waste,
|
|
waste;
|
|
float size_l,
|
|
size_r;
|
|
int nbytes;
|
|
OffsetNumber seed_1 = 0,
|
|
seed_2 = 0;
|
|
OffsetNumber *left,
|
|
*right;
|
|
Size newitemsz,
|
|
item_1_sz,
|
|
item_2_sz,
|
|
left_avail_space,
|
|
right_avail_space;
|
|
int total_num_tuples,
|
|
num_tuples_without_seeds,
|
|
max_after_split; /* in Guttman's lingo, (M - m) */
|
|
float diff; /* diff between cost of putting tuple left
|
|
* or right */
|
|
SPLITCOST *cost_vector;
|
|
int n;
|
|
|
|
/*
|
|
* First, make sure the new item is not so large that we can't
|
|
* possibly fit it on a page, even by itself. (It's sufficient to
|
|
* make this test here, since any oversize tuple must lead to a page
|
|
* split attempt.)
|
|
*/
|
|
newitemsz = IndexTupleTotalSize(itup);
|
|
if (newitemsz > RTPageAvailSpace)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
|
errmsg("index row size %lu exceeds rtree maximum, %lu",
|
|
(unsigned long) newitemsz,
|
|
(unsigned long) RTPageAvailSpace)));
|
|
|
|
maxoff = PageGetMaxOffsetNumber(page);
|
|
newitemoff = OffsetNumberNext(maxoff); /* phony index for new
|
|
* item */
|
|
total_num_tuples = newitemoff;
|
|
num_tuples_without_seeds = total_num_tuples - 2;
|
|
max_after_split = total_num_tuples / 2; /* works for m = M/2 */
|
|
|
|
/* Make arrays big enough for worst case, including sentinel */
|
|
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
|
|
v->spl_left = (OffsetNumber *) palloc(nbytes);
|
|
v->spl_right = (OffsetNumber *) palloc(nbytes);
|
|
|
|
firsttime = true;
|
|
waste = 0.0;
|
|
|
|
for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i))
|
|
{
|
|
item_1 = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
|
|
datum_alpha = IndexTupleGetDatum(item_1);
|
|
item_1_sz = IndexTupleTotalSize(item_1);
|
|
|
|
for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j))
|
|
{
|
|
item_2 = (IndexTuple) PageGetItem(page, PageGetItemId(page, j));
|
|
datum_beta = IndexTupleGetDatum(item_2);
|
|
item_2_sz = IndexTupleTotalSize(item_2);
|
|
|
|
/*
|
|
* Ignore seed pairs that don't leave room for the new item on
|
|
* either split page.
|
|
*/
|
|
if (newitemsz + item_1_sz > RTPageAvailSpace &&
|
|
newitemsz + item_2_sz > RTPageAvailSpace)
|
|
continue;
|
|
|
|
/* compute the wasted space by unioning these guys */
|
|
union_d = FunctionCall2(&rtstate->unionFn,
|
|
datum_alpha, datum_beta);
|
|
FunctionCall2(&rtstate->sizeFn, union_d,
|
|
PointerGetDatum(&size_union));
|
|
inter_d = FunctionCall2(&rtstate->interFn,
|
|
datum_alpha, datum_beta);
|
|
|
|
/*
|
|
* The interFn may return a NULL pointer (not an SQL null!) to
|
|
* indicate no intersection. sizeFn must cope with this.
|
|
*/
|
|
FunctionCall2(&rtstate->sizeFn, inter_d,
|
|
PointerGetDatum(&size_inter));
|
|
size_waste = size_union - size_inter;
|
|
|
|
if (DatumGetPointer(union_d) != NULL)
|
|
pfree(DatumGetPointer(union_d));
|
|
if (DatumGetPointer(inter_d) != NULL)
|
|
pfree(DatumGetPointer(inter_d));
|
|
|
|
/*
|
|
* are these a more promising split that what we've already
|
|
* seen?
|
|
*/
|
|
if (size_waste > waste || firsttime)
|
|
{
|
|
waste = size_waste;
|
|
seed_1 = i;
|
|
seed_2 = j;
|
|
firsttime = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (firsttime)
|
|
{
|
|
/*
|
|
* There is no possible split except to put the new item on its
|
|
* own page. Since we still have to compute the union rectangles,
|
|
* we play dumb and run through the split algorithm anyway,
|
|
* setting seed_1 = first item on page and seed_2 = new item.
|
|
*/
|
|
seed_1 = FirstOffsetNumber;
|
|
seed_2 = newitemoff;
|
|
}
|
|
|
|
item_1 = (IndexTuple) PageGetItem(page, PageGetItemId(page, seed_1));
|
|
datum_alpha = IndexTupleGetDatum(item_1);
|
|
datum_l = FunctionCall2(&rtstate->unionFn, datum_alpha, datum_alpha);
|
|
FunctionCall2(&rtstate->sizeFn, datum_l, PointerGetDatum(&size_l));
|
|
left_avail_space = RTPageAvailSpace - IndexTupleTotalSize(item_1);
|
|
|
|
if (seed_2 == newitemoff)
|
|
{
|
|
item_2 = itup;
|
|
/* Needn't leave room for new item in calculations below */
|
|
newitemsz = 0;
|
|
}
|
|
else
|
|
item_2 = (IndexTuple) PageGetItem(page, PageGetItemId(page, seed_2));
|
|
datum_beta = IndexTupleGetDatum(item_2);
|
|
datum_r = FunctionCall2(&rtstate->unionFn, datum_beta, datum_beta);
|
|
FunctionCall2(&rtstate->sizeFn, datum_r, PointerGetDatum(&size_r));
|
|
right_avail_space = RTPageAvailSpace - IndexTupleTotalSize(item_2);
|
|
|
|
/*
|
|
* Now split up the regions between the two seeds.
|
|
*
|
|
* The cost_vector array will contain hints for determining where each
|
|
* tuple should go. Each record in the array will contain a boolean,
|
|
* choose_left, that indicates which node the tuple prefers to be on,
|
|
* and the absolute difference in cost between putting the tuple in
|
|
* its favored node and in the other node.
|
|
*
|
|
* Later, we will sort the cost_vector in descending order by cost
|
|
* difference, and consider the tuples in that order for placement.
|
|
* That way, the tuples that *really* want to be in one node or the
|
|
* other get to choose first, and the tuples that don't really care
|
|
* choose last.
|
|
*
|
|
* First, build the cost_vector array. The new index tuple will also be
|
|
* handled in this loop, and represented in the array, with
|
|
* i==newitemoff.
|
|
*
|
|
* In the case of variable size tuples it is possible that we only have
|
|
* the two seeds and no other tuples, in which case we don't do any of
|
|
* this cost_vector stuff.
|
|
*/
|
|
|
|
/* to keep compiler quiet */
|
|
cost_vector = NULL;
|
|
|
|
if (num_tuples_without_seeds > 0)
|
|
{
|
|
cost_vector =
|
|
(SPLITCOST *) palloc(num_tuples_without_seeds * sizeof(SPLITCOST));
|
|
n = 0;
|
|
for (i = FirstOffsetNumber; i <= newitemoff; i = OffsetNumberNext(i))
|
|
{
|
|
/* Compute new union datums and sizes for both choices */
|
|
|
|
if ((i == seed_1) || (i == seed_2))
|
|
continue;
|
|
else if (i == newitemoff)
|
|
item_1 = itup;
|
|
else
|
|
item_1 = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
|
|
|
|
datum_alpha = IndexTupleGetDatum(item_1);
|
|
union_dl = FunctionCall2(&rtstate->unionFn, datum_l, datum_alpha);
|
|
union_dr = FunctionCall2(&rtstate->unionFn, datum_r, datum_alpha);
|
|
FunctionCall2(&rtstate->sizeFn, union_dl,
|
|
PointerGetDatum(&size_alpha));
|
|
FunctionCall2(&rtstate->sizeFn, union_dr,
|
|
PointerGetDatum(&size_beta));
|
|
pfree(DatumGetPointer(union_dl));
|
|
pfree(DatumGetPointer(union_dr));
|
|
|
|
diff = (size_alpha - size_l) - (size_beta - size_r);
|
|
|
|
cost_vector[n].offset_number = i;
|
|
cost_vector[n].cost_differential = fabs(diff);
|
|
cost_vector[n].choose_left = (diff < 0);
|
|
|
|
n++;
|
|
}
|
|
|
|
/*
|
|
* Sort the array. The function qsort_comp_splitcost is set up
|
|
* "backwards", to provided descending order.
|
|
*/
|
|
qsort(cost_vector, num_tuples_without_seeds, sizeof(SPLITCOST),
|
|
&qsort_comp_splitcost);
|
|
}
|
|
|
|
/*
|
|
* Now make the final decisions about where each tuple will go, and
|
|
* build the vectors to return in the SPLITVEC record.
|
|
*
|
|
* The cost_vector array contains (descriptions of) all the tuples, in
|
|
* the order that we want to consider them, so we we just iterate
|
|
* through it and place each tuple in left or right nodes, according
|
|
* to the criteria described below.
|
|
*/
|
|
|
|
left = v->spl_left;
|
|
v->spl_nleft = 0;
|
|
right = v->spl_right;
|
|
v->spl_nright = 0;
|
|
|
|
/*
|
|
* Place the seeds first. left avail space, left union, right avail
|
|
* space, and right union have already been adjusted for the seeds.
|
|
*/
|
|
|
|
*left++ = seed_1;
|
|
v->spl_nleft++;
|
|
|
|
*right++ = seed_2;
|
|
v->spl_nright++;
|
|
|
|
for (n = 0; n < num_tuples_without_seeds; n++)
|
|
{
|
|
bool left_feasible,
|
|
right_feasible,
|
|
choose_left;
|
|
|
|
/*
|
|
* We need to figure out which page needs the least enlargement in
|
|
* order to store the item.
|
|
*/
|
|
|
|
i = cost_vector[n].offset_number;
|
|
|
|
/* Compute new union datums and sizes for both possible additions */
|
|
if (i == newitemoff)
|
|
{
|
|
item_1 = itup;
|
|
/* Needn't leave room for new item anymore */
|
|
newitemsz = 0;
|
|
}
|
|
else
|
|
item_1 = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
|
|
item_1_sz = IndexTupleTotalSize(item_1);
|
|
|
|
datum_alpha = IndexTupleGetDatum(item_1);
|
|
union_dl = FunctionCall2(&rtstate->unionFn, datum_l, datum_alpha);
|
|
union_dr = FunctionCall2(&rtstate->unionFn, datum_r, datum_alpha);
|
|
FunctionCall2(&rtstate->sizeFn, union_dl,
|
|
PointerGetDatum(&size_alpha));
|
|
FunctionCall2(&rtstate->sizeFn, union_dr,
|
|
PointerGetDatum(&size_beta));
|
|
|
|
/*
|
|
* We prefer the page that shows smaller enlargement of its union
|
|
* area (Guttman's algorithm), but we must take care that at least
|
|
* one page will still have room for the new item after this one
|
|
* is added.
|
|
*
|
|
* (We know that all the old items together can fit on one page, so
|
|
* we need not worry about any other problem than failing to fit
|
|
* the new item.)
|
|
*
|
|
* Guttman's algorithm actually has two factors to consider (in
|
|
* order): 1. if one node has so many tuples already assigned to
|
|
* it that the other needs all the rest in order to satisfy the
|
|
* condition that neither node has fewer than m tuples, then that
|
|
* is decisive; 2. otherwise, choose the page that shows the
|
|
* smaller enlargement of its union area.
|
|
*
|
|
* I have chosen m = M/2, where M is the maximum number of tuples on
|
|
* a page. (Actually, this is only strictly true for fixed size
|
|
* tuples. For variable size tuples, there still might have to be
|
|
* only one tuple on a page, if it is really big. But even with
|
|
* variable size tuples we still try to get m as close as possible
|
|
* to M/2.)
|
|
*
|
|
* The question of which page shows the smaller enlargement of its
|
|
* union area has already been answered, and the answer stored in
|
|
* the choose_left field of the SPLITCOST record.
|
|
*/
|
|
left_feasible = (left_avail_space >= item_1_sz &&
|
|
((left_avail_space - item_1_sz) >= newitemsz ||
|
|
right_avail_space >= newitemsz));
|
|
right_feasible = (right_avail_space >= item_1_sz &&
|
|
((right_avail_space - item_1_sz) >= newitemsz ||
|
|
left_avail_space >= newitemsz));
|
|
if (left_feasible && right_feasible)
|
|
{
|
|
/*
|
|
* Both feasible, use Guttman's algorithm. First check the m
|
|
* condition described above, and if that doesn't apply,
|
|
* choose the page with the smaller enlargement of its union
|
|
* area.
|
|
*/
|
|
if (v->spl_nleft > max_after_split)
|
|
choose_left = false;
|
|
else if (v->spl_nright > max_after_split)
|
|
choose_left = true;
|
|
else
|
|
choose_left = cost_vector[n].choose_left;
|
|
}
|
|
else if (left_feasible)
|
|
choose_left = true;
|
|
else if (right_feasible)
|
|
choose_left = false;
|
|
else
|
|
{
|
|
elog(ERROR, "failed to find a workable rtree page split");
|
|
choose_left = false; /* keep compiler quiet */
|
|
}
|
|
|
|
if (choose_left)
|
|
{
|
|
pfree(DatumGetPointer(datum_l));
|
|
pfree(DatumGetPointer(union_dr));
|
|
datum_l = union_dl;
|
|
size_l = size_alpha;
|
|
left_avail_space -= item_1_sz;
|
|
*left++ = i;
|
|
v->spl_nleft++;
|
|
}
|
|
else
|
|
{
|
|
pfree(DatumGetPointer(datum_r));
|
|
pfree(DatumGetPointer(union_dl));
|
|
datum_r = union_dr;
|
|
size_r = size_beta;
|
|
right_avail_space -= item_1_sz;
|
|
*right++ = i;
|
|
v->spl_nright++;
|
|
}
|
|
}
|
|
|
|
if (num_tuples_without_seeds > 0)
|
|
pfree(cost_vector);
|
|
|
|
*left = *right = InvalidOffsetNumber; /* add ending sentinels */
|
|
|
|
v->spl_ldatum = datum_l;
|
|
v->spl_rdatum = datum_r;
|
|
}
|
|
|
|
static void
|
|
RTInitBuffer(Buffer b, uint32 f)
|
|
{
|
|
RTreePageOpaque opaque;
|
|
Page page;
|
|
Size pageSize;
|
|
|
|
pageSize = BufferGetPageSize(b);
|
|
|
|
page = BufferGetPage(b);
|
|
|
|
PageInit(page, pageSize, sizeof(RTreePageOpaqueData));
|
|
|
|
opaque = (RTreePageOpaque) PageGetSpecialPointer(page);
|
|
opaque->flags = f;
|
|
}
|
|
|
|
static OffsetNumber
|
|
choose(Relation r, Page p, IndexTuple it, RTSTATE *rtstate)
|
|
{
|
|
OffsetNumber maxoff;
|
|
OffsetNumber i;
|
|
Datum ud,
|
|
id;
|
|
Datum datum;
|
|
float usize,
|
|
dsize;
|
|
OffsetNumber which;
|
|
float which_grow;
|
|
|
|
id = IndexTupleGetDatum(it);
|
|
maxoff = PageGetMaxOffsetNumber(p);
|
|
which_grow = -1.0;
|
|
which = -1;
|
|
|
|
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
|
|
{
|
|
datum = IndexTupleGetDatum(PageGetItem(p, PageGetItemId(p, i)));
|
|
FunctionCall2(&rtstate->sizeFn, datum,
|
|
PointerGetDatum(&dsize));
|
|
ud = FunctionCall2(&rtstate->unionFn, datum, id);
|
|
FunctionCall2(&rtstate->sizeFn, ud,
|
|
PointerGetDatum(&usize));
|
|
pfree(DatumGetPointer(ud));
|
|
if (which_grow < 0 || usize - dsize < which_grow)
|
|
{
|
|
which = i;
|
|
which_grow = usize - dsize;
|
|
if (which_grow == 0)
|
|
break;
|
|
}
|
|
}
|
|
|
|
return which;
|
|
}
|
|
|
|
static int
|
|
nospace(Page p, IndexTuple it)
|
|
{
|
|
return PageGetFreeSpace(p) < IndexTupleSize(it);
|
|
}
|
|
|
|
void
|
|
freestack(RTSTACK *s)
|
|
{
|
|
RTSTACK *p;
|
|
|
|
while (s != NULL)
|
|
{
|
|
p = s->rts_parent;
|
|
pfree(s);
|
|
s = p;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Bulk deletion of all index entries pointing to a set of heap tuples.
|
|
* The set of target tuples is specified via a callback routine that tells
|
|
* whether any given heap tuple (identified by ItemPointer) is being deleted.
|
|
*
|
|
* Result: a palloc'd struct containing statistical info for VACUUM displays.
|
|
*/
|
|
Datum
|
|
rtbulkdelete(PG_FUNCTION_ARGS)
|
|
{
|
|
Relation rel = (Relation) PG_GETARG_POINTER(0);
|
|
IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(1);
|
|
void *callback_state = (void *) PG_GETARG_POINTER(2);
|
|
IndexBulkDeleteResult *result;
|
|
BlockNumber num_pages;
|
|
double tuples_removed;
|
|
double num_index_tuples;
|
|
IndexScanDesc iscan;
|
|
|
|
tuples_removed = 0;
|
|
num_index_tuples = 0;
|
|
|
|
/*
|
|
* Since rtree is not marked "amconcurrent" in pg_am, caller should
|
|
* have acquired exclusive lock on index relation. We need no locking
|
|
* here.
|
|
*/
|
|
|
|
/*
|
|
* XXX generic implementation --- should be improved!
|
|
*/
|
|
|
|
/* walk through the entire index */
|
|
iscan = index_beginscan(NULL, rel, SnapshotAny, 0, NULL);
|
|
/* including killed tuples */
|
|
iscan->ignore_killed_tuples = false;
|
|
|
|
while (index_getnext_indexitem(iscan, ForwardScanDirection))
|
|
{
|
|
if (callback(&iscan->xs_ctup.t_self, callback_state))
|
|
{
|
|
ItemPointerData indextup = iscan->currentItemData;
|
|
BlockNumber blkno;
|
|
OffsetNumber offnum;
|
|
Buffer buf;
|
|
Page page;
|
|
|
|
blkno = ItemPointerGetBlockNumber(&indextup);
|
|
offnum = ItemPointerGetOffsetNumber(&indextup);
|
|
|
|
/* adjust any scans that will be affected by this deletion */
|
|
/* (namely, my own scan) */
|
|
rtadjscans(rel, RTOP_DEL, blkno, offnum);
|
|
|
|
/* delete the index tuple */
|
|
buf = ReadBuffer(rel, blkno);
|
|
page = BufferGetPage(buf);
|
|
|
|
PageIndexTupleDelete(page, offnum);
|
|
|
|
WriteBuffer(buf);
|
|
|
|
tuples_removed += 1;
|
|
}
|
|
else
|
|
num_index_tuples += 1;
|
|
}
|
|
|
|
index_endscan(iscan);
|
|
|
|
/* return statistics */
|
|
num_pages = RelationGetNumberOfBlocks(rel);
|
|
|
|
result = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
|
|
result->num_pages = num_pages;
|
|
result->num_index_tuples = num_index_tuples;
|
|
result->tuples_removed = tuples_removed;
|
|
|
|
PG_RETURN_POINTER(result);
|
|
}
|
|
|
|
|
|
static void
|
|
initRtstate(RTSTATE *rtstate, Relation index)
|
|
{
|
|
fmgr_info_copy(&rtstate->unionFn,
|
|
index_getprocinfo(index, 1, RT_UNION_PROC),
|
|
CurrentMemoryContext);
|
|
fmgr_info_copy(&rtstate->sizeFn,
|
|
index_getprocinfo(index, 1, RT_SIZE_PROC),
|
|
CurrentMemoryContext);
|
|
fmgr_info_copy(&rtstate->interFn,
|
|
index_getprocinfo(index, 1, RT_INTER_PROC),
|
|
CurrentMemoryContext);
|
|
}
|
|
|
|
/* for sorting SPLITCOST records in descending order */
|
|
static int
|
|
qsort_comp_splitcost(const void *a, const void *b)
|
|
{
|
|
float diff =
|
|
((SPLITCOST *) a)->cost_differential -
|
|
((SPLITCOST *) b)->cost_differential;
|
|
|
|
if (diff < 0)
|
|
return 1;
|
|
else if (diff > 0)
|
|
return -1;
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
#ifdef RTDEBUG
|
|
|
|
void
|
|
_rtdump(Relation r)
|
|
{
|
|
Buffer buf;
|
|
Page page;
|
|
OffsetNumber offnum,
|
|
maxoff;
|
|
BlockNumber blkno;
|
|
BlockNumber nblocks;
|
|
RTreePageOpaque po;
|
|
IndexTuple itup;
|
|
BlockNumber itblkno;
|
|
OffsetNumber itoffno;
|
|
Datum datum;
|
|
char *itkey;
|
|
|
|
nblocks = RelationGetNumberOfBlocks(r);
|
|
for (blkno = 0; blkno < nblocks; blkno++)
|
|
{
|
|
buf = ReadBuffer(r, blkno);
|
|
page = BufferGetPage(buf);
|
|
po = (RTreePageOpaque) PageGetSpecialPointer(page);
|
|
maxoff = PageGetMaxOffsetNumber(page);
|
|
printf("Page %d maxoff %d <%s>\n", blkno, maxoff,
|
|
(po->flags & F_LEAF ? "LEAF" : "INTERNAL"));
|
|
|
|
if (PageIsEmpty(page))
|
|
{
|
|
ReleaseBuffer(buf);
|
|
continue;
|
|
}
|
|
|
|
for (offnum = FirstOffsetNumber;
|
|
offnum <= maxoff;
|
|
offnum = OffsetNumberNext(offnum))
|
|
{
|
|
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
|
|
itblkno = ItemPointerGetBlockNumber(&(itup->t_tid));
|
|
itoffno = ItemPointerGetOffsetNumber(&(itup->t_tid));
|
|
datum = IndexTupleGetDatum(itup);
|
|
itkey = DatumGetCString(DirectFunctionCall1(box_out,
|
|
datum));
|
|
printf("\t[%d] size %d heap <%d,%d> key:%s\n",
|
|
offnum, IndexTupleSize(itup), itblkno, itoffno, itkey);
|
|
pfree(itkey);
|
|
}
|
|
|
|
ReleaseBuffer(buf);
|
|
}
|
|
}
|
|
#endif /* defined RTDEBUG */
|
|
|
|
void
|
|
rtree_redo(XLogRecPtr lsn, XLogRecord *record)
|
|
{
|
|
elog(PANIC, "rtree_redo: unimplemented");
|
|
}
|
|
|
|
void
|
|
rtree_undo(XLogRecPtr lsn, XLogRecord *record)
|
|
{
|
|
elog(PANIC, "rtree_undo: unimplemented");
|
|
}
|
|
|
|
void
|
|
rtree_desc(char *buf, uint8 xl_info, char *rec)
|
|
{
|
|
}
|