diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 9c3814820d..69e79c5fbd 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -1,6 +1,6 @@ @@ -1045,7 +1045,8 @@ indpred text - Query plan for partial index predicate (not functional) + Expression tree (in the form of a nodeToString representation) + for partial index predicate diff --git a/src/backend/access/common/indexvalid.c b/src/backend/access/common/indexvalid.c index 6a7c08b450..94e7efd522 100644 --- a/src/backend/access/common/indexvalid.c +++ b/src/backend/access/common/indexvalid.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/common/Attic/indexvalid.c,v 1.26 2001/01/24 19:42:47 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/common/Attic/indexvalid.c,v 1.27 2001/07/15 22:48:15 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -24,12 +24,9 @@ */ int NIndexTupleProcessed; + /* ---------------- - * index_keytest - * - * old comments - * May eventually combine with other tests (like timeranges)? - * Should have Buffer buffer; as an argument and pass it to amgetattr. + * index_keytest - does this index tuple satisfy the scan key(s)? * ---------------- */ bool @@ -38,16 +35,16 @@ index_keytest(IndexTuple tuple, int scanKeySize, ScanKey key) { - bool isNull; - Datum datum; - Datum test; - IncrIndexProcessed(); while (scanKeySize > 0) { + Datum datum; + bool isNull; + Datum test; + datum = index_getattr(tuple, - key[0].sk_attno, + key->sk_attno, tupdesc, &isNull); @@ -57,25 +54,19 @@ index_keytest(IndexTuple tuple, return false; } - if (key[0].sk_flags & SK_ISNULL) + if (key->sk_flags & SK_ISNULL) return false; - if (key[0].sk_flags & SK_COMMUTE) - { - test = FunctionCall2(&key[0].sk_func, - key[0].sk_argument, datum); - } + if (key->sk_flags & SK_COMMUTE) + test = FunctionCall2(&key->sk_func, key->sk_argument, datum); else - { - test = FunctionCall2(&key[0].sk_func, - datum, key[0].sk_argument); - } + test = FunctionCall2(&key->sk_func, datum, key->sk_argument); - if (DatumGetBool(test) == !!(key[0].sk_flags & SK_NEGATE)) + if (DatumGetBool(test) == !!(key->sk_flags & SK_NEGATE)) return false; - scanKeySize -= 1; key++; + scanKeySize--; } return true; diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 9d6e2040f6..c99c4a7e6e 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.79 2001/06/11 05:00:56 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.80 2001/07/15 22:48:15 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -43,7 +43,23 @@ #define RIGHT_ADDED 0x02 #define BOTH_ADDED ( LEFT_ADDED | RIGHT_ADDED ) + +/* Working state for gistbuild and its callback */ +typedef struct +{ + GISTSTATE giststate; + int numindexattrs; + double indtuples; +} GISTBuildState; + + /* non-export function prototypes */ +static void gistbuildCallback(Relation index, + HeapTuple htup, + Datum *attdata, + char *nulls, + bool tupleIsAlive, + void *state); static void gistdoinsert(Relation r, IndexTuple itup, InsertIndexResult *res, @@ -89,6 +105,7 @@ static void GISTInitBuffer(Buffer b, uint32 f); static OffsetNumber gistchoose(Relation r, Page p, IndexTuple it, GISTSTATE *giststate); +static void gistdelete(Relation r, ItemPointer tid); #ifdef GIST_PAGEADDITEM static IndexTuple gist_tuple_replacekey(Relation r, GISTENTRY entry, IndexTuple t); @@ -116,184 +133,36 @@ gistbuild(PG_FUNCTION_ARGS) Relation heap = (Relation) PG_GETARG_POINTER(0); Relation index = (Relation) PG_GETARG_POINTER(1); IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2); - Node *oldPred = (Node *) PG_GETARG_POINTER(3); - -#ifdef NOT_USED - IndexStrategy istrat = (IndexStrategy) PG_GETARG_POINTER(4); - -#endif - HeapScanDesc hscan; - HeapTuple htup; - IndexTuple itup; - TupleDesc htupdesc, - itupdesc; - Datum attdata[INDEX_MAX_KEYS]; - char nulls[INDEX_MAX_KEYS]; - double nhtups, - nitups; - Node *pred = indexInfo->ii_Predicate; - -#ifndef OMIT_PARTIAL_INDEX - TupleTable tupleTable; - TupleTableSlot *slot; - -#endif - ExprContext *econtext; - GISTSTATE giststate; - GISTENTRY tmpcentry; - Buffer buffer = InvalidBuffer; - bool *compvec; - int i; + double reltuples; + GISTBuildState buildstate; + Buffer buffer; /* no locking is needed */ - initGISTstate(&giststate, index); + initGISTstate(&buildstate.giststate, index); /* * We expect to be called exactly once for any index relation. If * that's not the case, big trouble's what we have. */ - if (oldPred == NULL && RelationGetNumberOfBlocks(index) != 0) - elog(ERROR, "%s already contains data", RelationGetRelationName(index)); + if (RelationGetNumberOfBlocks(index) != 0) + elog(ERROR, "%s already contains data", + RelationGetRelationName(index)); - /* initialize the root page (if this is a new index) */ - if (oldPred == NULL) - { - buffer = ReadBuffer(index, P_NEW); - GISTInitBuffer(buffer, F_LEAF); - WriteBuffer(buffer); - } - - /* get tuple descriptors for heap and index relations */ - htupdesc = RelationGetDescr(heap); - itupdesc = RelationGetDescr(index); - - /* - * If this is a predicate (partial) index, we will need to evaluate - * the predicate using ExecQual, which requires the current tuple to - * be in a slot of a TupleTable. In addition, ExecQual must have an - * ExprContext referring to that slot. Here, we initialize dummy - * TupleTable and ExprContext objects for this purpose. --Nels, Feb 92 - * - * We construct the ExprContext anyway since we need a per-tuple - * temporary memory context for function evaluation -- tgl July 00 - */ -#ifndef OMIT_PARTIAL_INDEX - if (pred != NULL || oldPred != NULL) - { - tupleTable = ExecCreateTupleTable(1); - slot = ExecAllocTableSlot(tupleTable); - ExecSetSlotDescriptor(slot, htupdesc, false); - } - else - { - tupleTable = NULL; - slot = NULL; - } - econtext = MakeExprContext(slot, TransactionCommandContext); -#else - econtext = MakeExprContext(NULL, TransactionCommandContext); -#endif /* OMIT_PARTIAL_INDEX */ + /* initialize the root page */ + buffer = ReadBuffer(index, P_NEW); + GISTInitBuffer(buffer, F_LEAF); + WriteBuffer(buffer); /* build the index */ - nhtups = nitups = 0.0; + buildstate.numindexattrs = indexInfo->ii_NumIndexAttrs; + buildstate.indtuples = 0; - compvec = (bool *) palloc(sizeof(bool) * indexInfo->ii_NumIndexAttrs); - - /* start a heap scan */ - hscan = heap_beginscan(heap, 0, SnapshotNow, 0, (ScanKey) NULL); - - while (HeapTupleIsValid(htup = heap_getnext(hscan, 0))) - { - MemoryContextReset(econtext->ecxt_per_tuple_memory); - - nhtups += 1.0; - -#ifndef OMIT_PARTIAL_INDEX - - /* - * If oldPred != NULL, this is an EXTEND INDEX command, so skip - * this tuple if it was already in the existing partial index - */ - if (oldPred != NULL) - { - slot->val = htup; - if (ExecQual((List *) oldPred, econtext, false)) - { - nitups += 1.0; - continue; - } - } - - /* - * Skip this tuple if it doesn't satisfy the partial-index - * predicate - */ - if (pred != NULL) - { - slot->val = htup; - if (!ExecQual((List *) pred, econtext, false)) - continue; - } -#endif /* OMIT_PARTIAL_INDEX */ - - nitups += 1.0; - - /* - * For the current heap tuple, extract all the attributes we use - * in this index, and note which are null. - */ - FormIndexDatum(indexInfo, - htup, - htupdesc, - econtext->ecxt_per_tuple_memory, - attdata, - nulls); - - /* immediately compress keys to normalize */ - for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++) - { - gistcentryinit(&giststate, i, &tmpcentry, attdata[i], - (Relation) NULL, (Page) NULL, (OffsetNumber) 0, - -1 /* size is currently bogus */ , TRUE); - if (attdata[i] != tmpcentry.key && - !(giststate.keytypbyval)) - compvec[i] = TRUE; - else - compvec[i] = FALSE; - attdata[i] = tmpcentry.key; - } - - /* form an index tuple and point it at the heap tuple */ - itup = index_formtuple(itupdesc, attdata, nulls); - itup->t_tid = htup->t_self; - - /* - * Since we already have the index relation locked, we call - * gistdoinsert directly. Normal access method calls dispatch - * through gistinsert, which locks the relation for write. This - * is the right thing to do if you're inserting single tups, but - * not when you're initializing the whole index at once. - */ - gistdoinsert(index, itup, NULL, &giststate); - - for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++) - if (compvec[i]) - pfree(DatumGetPointer(attdata[i])); - - pfree(itup); - } + /* do the heap scan */ + reltuples = IndexBuildHeapScan(heap, index, indexInfo, + gistbuildCallback, (void *) &buildstate); /* okay, all heap tuples are indexed */ - heap_endscan(hscan); - - pfree(compvec); - -#ifndef OMIT_PARTIAL_INDEX - if (pred != NULL || oldPred != NULL) - ExecDropTupleTable(tupleTable, true); -#endif /* OMIT_PARTIAL_INDEX */ - FreeExprContext(econtext); /* * Since we just counted the tuples in the heap, we update its stats @@ -313,14 +182,8 @@ gistbuild(PG_FUNCTION_ARGS) heap_close(heap, NoLock); index_close(index); - UpdateStats(hrelid, nhtups); - UpdateStats(irelid, nitups); - if (oldPred != NULL) - { - if (nitups == nhtups) - pred = NULL; - UpdateIndexPredicate(irelid, oldPred, pred); - } + UpdateStats(hrelid, reltuples); + UpdateStats(irelid, buildstate.indtuples); } #ifdef GISTDEBUG @@ -330,6 +193,63 @@ gistbuild(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } +/* + * Per-tuple callback from IndexBuildHeapScan + */ +static void +gistbuildCallback(Relation index, + HeapTuple htup, + Datum *attdata, + char *nulls, + bool tupleIsAlive, + void *state) +{ + GISTBuildState *buildstate = (GISTBuildState *) state; + IndexTuple itup; + bool compvec[INDEX_MAX_KEYS]; + GISTENTRY tmpcentry; + int i; + + /* immediately compress keys to normalize */ + for (i = 0; i < buildstate->numindexattrs; i++) + { + gistcentryinit(&buildstate->giststate, i, &tmpcentry, attdata[i], + (Relation) NULL, (Page) NULL, (OffsetNumber) 0, + -1 /* size is currently bogus */ , TRUE); + if (attdata[i] != tmpcentry.key && + !(buildstate->giststate.keytypbyval)) + compvec[i] = TRUE; + else + compvec[i] = FALSE; + attdata[i] = tmpcentry.key; + } + + /* form an index tuple and point it at the heap tuple */ + itup = index_formtuple(RelationGetDescr(index), attdata, nulls); + itup->t_tid = htup->t_self; + + /* GIST indexes don't index nulls, see notes in gistinsert */ + if (! IndexTupleHasNulls(itup)) + { + /* + * Since we already have the index relation locked, we call + * gistdoinsert directly. Normal access method calls dispatch + * through gistinsert, which locks the relation for write. This + * is the right thing to do if you're inserting single tups, but + * not when you're initializing the whole index at once. + */ + gistdoinsert(index, itup, NULL, &buildstate->giststate); + + buildstate->indtuples += 1; + } + + for (i = 0; i < buildstate->numindexattrs; i++) + if (compvec[i]) + pfree(DatumGetPointer(attdata[i])); + + pfree(itup); +} + /* * gistinsert -- wrapper for GiST tuple insertion. * @@ -343,25 +263,28 @@ gistinsert(PG_FUNCTION_ARGS) Datum *datum = (Datum *) PG_GETARG_POINTER(1); char *nulls = (char *) PG_GETARG_POINTER(2); ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3); - #ifdef NOT_USED Relation heapRel = (Relation) PG_GETARG_POINTER(4); - #endif InsertIndexResult res; IndexTuple itup; GISTSTATE giststate; GISTENTRY tmpentry; int i; - bool *compvec; + bool compvec[INDEX_MAX_KEYS]; + + /* + * Since GIST is not marked "amconcurrent" in pg_am, caller should + * have acquired exclusive lock on index relation. We need no locking + * here. + */ initGISTstate(&giststate, r); /* immediately compress keys to normalize */ - compvec = (bool *) palloc(sizeof(bool) * r->rd_att->natts); for (i = 0; i < r->rd_att->natts; i++) { - gistcentryinit(&giststate, i,&tmpentry, datum[i], + gistcentryinit(&giststate, i, &tmpentry, datum[i], (Relation) NULL, (Page) NULL, (OffsetNumber) 0, -1 /* size is currently bogus */ , TRUE); if (datum[i] != tmpentry.key && !(giststate.keytypbyval)) @@ -374,18 +297,24 @@ gistinsert(PG_FUNCTION_ARGS) itup->t_tid = *ht_ctid; /* - * Notes in ExecUtils:ExecOpenIndices() - * - * RelationSetLockForWrite(r); + * Currently, GIST indexes do not support indexing NULLs; considerable + * infrastructure work would have to be done to do anything reasonable + * with a NULL. */ + if (IndexTupleHasNulls(itup)) + { + res = NULL; + } + else + { + res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData)); + gistdoinsert(r, itup, &res, &giststate); + } - res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData)); - gistdoinsert(r, itup, &res, &giststate); for (i = 0; i < r->rd_att->natts; i++) if (compvec[i] == TRUE) pfree(DatumGetPointer(datum[i])); pfree(itup); - pfree(compvec); PG_RETURN_POINTER(res); } @@ -527,9 +456,7 @@ gistlayerinsert(Relation r, BlockNumber blkno, /* key is modified, so old version must be deleted */ ItemPointerSet(&oldtid, blkno, child); - DirectFunctionCall2(gistdelete, - PointerGetDatum(r), - PointerGetDatum(&oldtid)); + gistdelete(r, &oldtid); } ret = INSERTED; @@ -1416,29 +1343,31 @@ gistfreestack(GISTSTACK *s) /* -** remove an entry from a page -*/ -Datum -gistdelete(PG_FUNCTION_ARGS) + * Retail deletion of a single tuple. + * + * NB: this is no longer called externally, but is still needed by + * gistlayerinsert(). That dependency will have to be fixed if GIST + * is ever going to allow concurrent insertions. + */ +static void +gistdelete(Relation r, ItemPointer tid) { - Relation r = (Relation) PG_GETARG_POINTER(0); - ItemPointer tid = (ItemPointer) PG_GETARG_POINTER(1); BlockNumber blkno; OffsetNumber offnum; Buffer buf; Page page; /* - * Notes in ExecUtils:ExecOpenIndices() Also note that only vacuum - * deletes index tuples now... - * - * RelationSetLockForWrite(r); + * Since GIST is not marked "amconcurrent" in pg_am, caller should + * have acquired exclusive lock on index relation. We need no locking + * here. */ blkno = ItemPointerGetBlockNumber(tid); offnum = ItemPointerGetOffsetNumber(tid); /* adjust any scans that will be affected by this deletion */ + /* NB: this works only for scans in *this* backend! */ gistadjscans(r, GISTOP_DEL, blkno, offnum); /* delete the index tuple */ @@ -1448,10 +1377,93 @@ gistdelete(PG_FUNCTION_ARGS) PageIndexTupleDelete(page, offnum); WriteBuffer(buf); - - PG_RETURN_VOID(); } +/* + * Bulk deletion of all index entries pointing to a set of heap tuples. + * The set of target tuples is specified via a callback routine that tells + * whether any given heap tuple (identified by ItemPointer) is being deleted. + * + * Result: a palloc'd struct containing statistical info for VACUUM displays. + */ +Datum +gistbulkdelete(PG_FUNCTION_ARGS) +{ + Relation rel = (Relation) PG_GETARG_POINTER(0); + IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(1); + void *callback_state = (void *) PG_GETARG_POINTER(2); + IndexBulkDeleteResult *result; + BlockNumber num_pages; + double tuples_removed; + double num_index_tuples; + RetrieveIndexResult res; + IndexScanDesc iscan; + + tuples_removed = 0; + num_index_tuples = 0; + + /* + * Since GIST is not marked "amconcurrent" in pg_am, caller should + * have acquired exclusive lock on index relation. We need no locking + * here. + */ + + /* + * XXX generic implementation --- should be improved! + */ + + /* walk through the entire index */ + iscan = index_beginscan(rel, false, 0, (ScanKey) NULL); + + while ((res = index_getnext(iscan, ForwardScanDirection)) + != (RetrieveIndexResult) NULL) + { + ItemPointer heapptr = &res->heap_iptr; + + if (callback(heapptr, callback_state)) + { + ItemPointer indexptr = &res->index_iptr; + BlockNumber blkno; + OffsetNumber offnum; + Buffer buf; + Page page; + + blkno = ItemPointerGetBlockNumber(indexptr); + offnum = ItemPointerGetOffsetNumber(indexptr); + + /* adjust any scans that will be affected by this deletion */ + gistadjscans(rel, GISTOP_DEL, blkno, offnum); + + /* delete the index tuple */ + buf = ReadBuffer(rel, blkno); + page = BufferGetPage(buf); + + PageIndexTupleDelete(page, offnum); + + WriteBuffer(buf); + + tuples_removed += 1; + } + else + num_index_tuples += 1; + + pfree(res); + } + + index_endscan(iscan); + + /* return statistics */ + num_pages = RelationGetNumberOfBlocks(rel); + + result = (IndexBulkDeleteResult *) palloc(sizeof(IndexBulkDeleteResult)); + result->num_pages = num_pages; + result->tuples_removed = tuples_removed; + result->num_index_tuples = num_index_tuples; + + PG_RETURN_POINTER(result); +} + + void initGISTstate(GISTSTATE *giststate, Relation index) { diff --git a/src/backend/access/gist/gistscan.c b/src/backend/access/gist/gistscan.c index 672b121693..9358692a53 100644 --- a/src/backend/access/gist/gistscan.c +++ b/src/backend/access/gist/gistscan.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/gist/gistscan.c,v 1.37 2001/06/28 16:00:07 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/gist/gistscan.c,v 1.38 2001/07/15 22:48:15 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -59,13 +59,8 @@ gistbeginscan(PG_FUNCTION_ARGS) ScanKey key = (ScanKey) PG_GETARG_POINTER(3); IndexScanDesc s; - /* - * Let index_beginscan does its work... - * - * RelationSetLockForRead(r); - */ - s = RelationGetIndexScan(r, fromEnd, nkeys, key); + gistregscan(s); PG_RETURN_POINTER(s); @@ -283,6 +278,27 @@ gistdropscan(IndexScanDesc s) pfree(l); } +/* + * AtEOXact_gist() --- clean up gist subsystem at xact abort or commit. + * + * This is here because it needs to touch this module's static var GISTScans. + */ +void +AtEOXact_gist(void) +{ + /* + * Note: these actions should only be necessary during xact abort; but + * they can't hurt during a commit. + */ + + /* + * Reset the active-scans list to empty. We do not need to free the + * list elements, because they're all palloc()'d, so they'll go away + * at end of transaction anyway. + */ + GISTScans = NULL; +} + void gistadjscans(Relation rel, int op, BlockNumber blkno, OffsetNumber offnum) { diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index 9617fcc33a..9b0e6cf28e 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.51 2001/05/07 00:43:15 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.52 2001/07/15 22:48:15 tgl Exp $ * * NOTES * This file contains only the public interface routines. @@ -21,13 +21,27 @@ #include "access/genam.h" #include "access/hash.h" #include "access/heapam.h" +#include "access/xlogutils.h" #include "catalog/index.h" #include "executor/executor.h" #include "miscadmin.h" + bool BuildingHash = false; -#include "access/xlogutils.h" + +/* Working state for hashbuild and its callback */ +typedef struct +{ + double indtuples; +} HashBuildState; + +static void hashbuildCallback(Relation index, + HeapTuple htup, + Datum *attdata, + char *nulls, + bool tupleIsAlive, + void *state); /* @@ -44,161 +58,32 @@ hashbuild(PG_FUNCTION_ARGS) Relation heap = (Relation) PG_GETARG_POINTER(0); Relation index = (Relation) PG_GETARG_POINTER(1); IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2); - Node *oldPred = (Node *) PG_GETARG_POINTER(3); + double reltuples; + HashBuildState buildstate; -#ifdef NOT_USED - IndexStrategy istrat = (IndexStrategy) PG_GETARG_POINTER(4); - -#endif - HeapScanDesc hscan; - HeapTuple htup; - IndexTuple itup; - TupleDesc htupdesc, - itupdesc; - Datum attdata[INDEX_MAX_KEYS]; - char nulls[INDEX_MAX_KEYS]; - double nhtups, - nitups; - HashItem hitem; - Node *pred = indexInfo->ii_Predicate; - -#ifndef OMIT_PARTIAL_INDEX - TupleTable tupleTable; - TupleTableSlot *slot; - -#endif - ExprContext *econtext; - InsertIndexResult res = NULL; - - /* note that this is a new hash */ + /* set flag to disable locking */ BuildingHash = true; - /* initialize the hash index metadata page (if this is a new index) */ - if (oldPred == NULL) - _hash_metapinit(index); - - /* get tuple descriptors for heap and index relations */ - htupdesc = RelationGetDescr(heap); - itupdesc = RelationGetDescr(index); - /* - * If this is a predicate (partial) index, we will need to evaluate - * the predicate using ExecQual, which requires the current tuple to - * be in a slot of a TupleTable. In addition, ExecQual must have an - * ExprContext referring to that slot. Here, we initialize dummy - * TupleTable and ExprContext objects for this purpose. --Nels, Feb 92 - * - * We construct the ExprContext anyway since we need a per-tuple - * temporary memory context for function evaluation -- tgl July 00 + * We expect to be called exactly once for any index relation. If + * that's not the case, big trouble's what we have. */ -#ifndef OMIT_PARTIAL_INDEX - if (pred != NULL || oldPred != NULL) - { - tupleTable = ExecCreateTupleTable(1); - slot = ExecAllocTableSlot(tupleTable); - ExecSetSlotDescriptor(slot, htupdesc, false); - } - else - { - tupleTable = NULL; - slot = NULL; - } - econtext = MakeExprContext(slot, TransactionCommandContext); -#else - econtext = MakeExprContext(NULL, TransactionCommandContext); -#endif /* OMIT_PARTIAL_INDEX */ + if (RelationGetNumberOfBlocks(index) != 0) + elog(ERROR, "%s already contains data", + RelationGetRelationName(index)); + + /* initialize the hash index metadata page */ + _hash_metapinit(index); /* build the index */ - nhtups = nitups = 0.0; + buildstate.indtuples = 0; - /* start a heap scan */ - hscan = heap_beginscan(heap, 0, SnapshotNow, 0, (ScanKey) NULL); + /* do the heap scan */ + reltuples = IndexBuildHeapScan(heap, index, indexInfo, + hashbuildCallback, (void *) &buildstate); - while (HeapTupleIsValid(htup = heap_getnext(hscan, 0))) - { - MemoryContextReset(econtext->ecxt_per_tuple_memory); - - nhtups += 1.0; - -#ifndef OMIT_PARTIAL_INDEX - - /* - * If oldPred != NULL, this is an EXTEND INDEX command, so skip - * this tuple if it was already in the existing partial index - */ - if (oldPred != NULL) - { - slot->val = htup; - if (ExecQual((List *) oldPred, econtext, false)) - { - nitups += 1.0; - continue; - } - } - - /* - * Skip this tuple if it doesn't satisfy the partial-index - * predicate - */ - if (pred != NULL) - { - slot->val = htup; - if (!ExecQual((List *) pred, econtext, false)) - continue; - } -#endif /* OMIT_PARTIAL_INDEX */ - - nitups += 1.0; - - /* - * For the current heap tuple, extract all the attributes we use - * in this index, and note which are null. - */ - FormIndexDatum(indexInfo, - htup, - htupdesc, - econtext->ecxt_per_tuple_memory, - attdata, - nulls); - - /* form an index tuple and point it at the heap tuple */ - itup = index_formtuple(itupdesc, attdata, nulls); - - /* - * If the single index key is null, we don't insert it into the - * index. Hash tables support scans on '='. Relational algebra - * says that A = B returns null if either A or B is null. This - * means that no qualification used in an index scan could ever - * return true on a null attribute. It also means that indices - * can't be used by ISNULL or NOTNULL scans, but that's an - * artifact of the strategy map architecture chosen in 1986, not - * of the way nulls are handled here. - */ - - if (IndexTupleHasNulls(itup)) - { - pfree(itup); - continue; - } - - itup->t_tid = htup->t_self; - hitem = _hash_formitem(itup); - - res = _hash_doinsert(index, hitem); - - pfree(hitem); - pfree(itup); - pfree(res); - } - - /* okay, all heap tuples are indexed */ - heap_endscan(hscan); - -#ifndef OMIT_PARTIAL_INDEX - if (pred != NULL || oldPred != NULL) - ExecDropTupleTable(tupleTable, true); -#endif /* OMIT_PARTIAL_INDEX */ - FreeExprContext(econtext); + /* all done */ + BuildingHash = false; /* * Since we just counted the tuples in the heap, we update its stats @@ -218,22 +103,53 @@ hashbuild(PG_FUNCTION_ARGS) heap_close(heap, NoLock); index_close(index); - UpdateStats(hrelid, nhtups); - UpdateStats(irelid, nitups); - if (oldPred != NULL) - { - if (nitups == nhtups) - pred = NULL; - UpdateIndexPredicate(irelid, oldPred, pred); - } + UpdateStats(hrelid, reltuples); + UpdateStats(irelid, buildstate.indtuples); } - /* all done */ - BuildingHash = false; - PG_RETURN_VOID(); } +/* + * Per-tuple callback from IndexBuildHeapScan + */ +static void +hashbuildCallback(Relation index, + HeapTuple htup, + Datum *attdata, + char *nulls, + bool tupleIsAlive, + void *state) +{ + HashBuildState *buildstate = (HashBuildState *) state; + IndexTuple itup; + HashItem hitem; + InsertIndexResult res; + + /* form an index tuple and point it at the heap tuple */ + itup = index_formtuple(RelationGetDescr(index), attdata, nulls); + itup->t_tid = htup->t_self; + + /* Hash indexes don't index nulls, see notes in hashinsert */ + if (IndexTupleHasNulls(itup)) + { + pfree(itup); + return; + } + + hitem = _hash_formitem(itup); + + res = _hash_doinsert(index, hitem); + + if (res) + pfree(res); + + buildstate->indtuples += 1; + + pfree(hitem); + pfree(itup); +} + /* * hashinsert() -- insert an index tuple into a hash table. * @@ -248,10 +164,8 @@ hashinsert(PG_FUNCTION_ARGS) Datum *datum = (Datum *) PG_GETARG_POINTER(1); char *nulls = (char *) PG_GETARG_POINTER(2); ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3); - #ifdef NOT_USED Relation heapRel = (Relation) PG_GETARG_POINTER(4); - #endif InsertIndexResult res; HashItem hitem; @@ -261,8 +175,21 @@ hashinsert(PG_FUNCTION_ARGS) itup = index_formtuple(RelationGetDescr(rel), datum, nulls); itup->t_tid = *ht_ctid; + /* + * If the single index key is null, we don't insert it into the + * index. Hash tables support scans on '='. Relational algebra + * says that A = B returns null if either A or B is null. This + * means that no qualification used in an index scan could ever + * return true on a null attribute. It also means that indices + * can't be used by ISNULL or NOTNULL scans, but that's an + * artifact of the strategy map architecture chosen in 1986, not + * of the way nulls are handled here. + */ if (IndexTupleHasNulls(itup)) + { + pfree(itup); PG_RETURN_POINTER((InsertIndexResult) NULL); + } hitem = _hash_formitem(itup); @@ -471,22 +398,74 @@ hashrestrpos(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } -/* stubs */ +/* + * Bulk deletion of all index entries pointing to a set of heap tuples. + * The set of target tuples is specified via a callback routine that tells + * whether any given heap tuple (identified by ItemPointer) is being deleted. + * + * Result: a palloc'd struct containing statistical info for VACUUM displays. + */ Datum -hashdelete(PG_FUNCTION_ARGS) +hashbulkdelete(PG_FUNCTION_ARGS) { Relation rel = (Relation) PG_GETARG_POINTER(0); - ItemPointer tid = (ItemPointer) PG_GETARG_POINTER(1); + IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(1); + void *callback_state = (void *) PG_GETARG_POINTER(2); + IndexBulkDeleteResult *result; + BlockNumber num_pages; + double tuples_removed; + double num_index_tuples; + RetrieveIndexResult res; + IndexScanDesc iscan; - /* adjust any active scans that will be affected by this deletion */ - _hash_adjscans(rel, tid); + tuples_removed = 0; + num_index_tuples = 0; - /* delete the data from the page */ - _hash_pagedel(rel, tid); + /* + * XXX generic implementation --- should be improved! + */ - PG_RETURN_VOID(); + /* walk through the entire index */ + iscan = index_beginscan(rel, false, 0, (ScanKey) NULL); + + while ((res = index_getnext(iscan, ForwardScanDirection)) + != (RetrieveIndexResult) NULL) + { + ItemPointer heapptr = &res->heap_iptr; + + if (callback(heapptr, callback_state)) + { + ItemPointer indexptr = &res->index_iptr; + + /* adjust any active scans that will be affected by deletion */ + /* (namely, my own scan) */ + _hash_adjscans(rel, indexptr); + + /* delete the data from the page */ + _hash_pagedel(rel, indexptr); + + tuples_removed += 1; + } + else + num_index_tuples += 1; + + pfree(res); + } + + index_endscan(iscan); + + /* return statistics */ + num_pages = RelationGetNumberOfBlocks(rel); + + result = (IndexBulkDeleteResult *) palloc(sizeof(IndexBulkDeleteResult)); + result->num_pages = num_pages; + result->tuples_removed = tuples_removed; + result->num_index_tuples = num_index_tuples; + + PG_RETURN_POINTER(result); } + void hash_redo(XLogRecPtr lsn, XLogRecord *record) { diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c index 8e2ed1bb8a..c9fb065dbd 100644 --- a/src/backend/access/hash/hashovfl.c +++ b/src/backend/access/hash/hashovfl.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v 1.29 2001/03/07 21:20:26 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v 1.30 2001/07/15 22:48:15 tgl Exp $ * * NOTES * Overflow pages look like ordinary relation pages. @@ -112,14 +112,14 @@ _hash_getovfladdr(Relation rel, Buffer *metabufp) metap = (HashMetaPage) _hash_chgbufaccess(rel, metabufp, HASH_READ, HASH_WRITE); - splitnum = metap->OVFL_POINT; - max_free = metap->SPARES[splitnum]; + splitnum = metap->hashm_ovflpoint; + max_free = metap->hashm_spares[splitnum]; free_page = (max_free - 1) >> (metap->hashm_bshift + BYTE_TO_BIT); free_bit = (max_free - 1) & (BMPGSZ_BIT(metap) - 1); /* Look through all the free maps to find the first free block */ - first_page = metap->LAST_FREED >> (metap->hashm_bshift + BYTE_TO_BIT); + first_page = metap->hashm_lastfreed >> (metap->hashm_bshift + BYTE_TO_BIT); for (i = first_page; i <= free_page; i++) { Page mappage; @@ -138,7 +138,7 @@ _hash_getovfladdr(Relation rel, Buffer *metabufp) if (i == first_page) { - bit = metap->LAST_FREED & (BMPGSZ_BIT(metap) - 1); + bit = metap->hashm_lastfreed & (BMPGSZ_BIT(metap) - 1); j = bit / BITS_PER_MAP; bit = bit & ~(BITS_PER_MAP - 1); } @@ -153,10 +153,10 @@ _hash_getovfladdr(Relation rel, Buffer *metabufp) } /* No Free Page Found - have to allocate a new page */ - metap->LAST_FREED = metap->SPARES[splitnum]; - metap->SPARES[splitnum]++; - offset = metap->SPARES[splitnum] - - (splitnum ? metap->SPARES[splitnum - 1] : 0); + metap->hashm_lastfreed = metap->hashm_spares[splitnum]; + metap->hashm_spares[splitnum]++; + offset = metap->hashm_spares[splitnum] - + (splitnum ? metap->hashm_spares[splitnum - 1] : 0); #define OVMSG "HASH: Out of overflow pages. Out of luck.\n" @@ -164,9 +164,9 @@ _hash_getovfladdr(Relation rel, Buffer *metabufp) { if (++splitnum >= NCACHED) elog(ERROR, OVMSG); - metap->OVFL_POINT = splitnum; - metap->SPARES[splitnum] = metap->SPARES[splitnum - 1]; - metap->SPARES[splitnum - 1]--; + metap->hashm_ovflpoint = splitnum; + metap->hashm_spares[splitnum] = metap->hashm_spares[splitnum - 1]; + metap->hashm_spares[splitnum - 1]--; offset = 0; } @@ -194,15 +194,15 @@ _hash_getovfladdr(Relation rel, Buffer *metabufp) if (_hash_initbitmap(rel, metap, OADDR_OF(splitnum, offset), 1, free_page)) elog(ERROR, "overflow_page: problem with _hash_initbitmap."); - metap->SPARES[splitnum]++; + metap->hashm_spares[splitnum]++; offset++; if (offset > SPLITMASK) { if (++splitnum >= NCACHED) elog(ERROR, OVMSG); - metap->OVFL_POINT = splitnum; - metap->SPARES[splitnum] = metap->SPARES[splitnum - 1]; - metap->SPARES[splitnum - 1]--; + metap->hashm_ovflpoint = splitnum; + metap->hashm_spares[splitnum] = metap->hashm_spares[splitnum - 1]; + metap->hashm_spares[splitnum - 1]--; offset = 0; } } @@ -235,13 +235,13 @@ found: */ bit = 1 + bit + (i * BMPGSZ_BIT(metap)); - if (bit >= metap->LAST_FREED) - metap->LAST_FREED = bit - 1; + if (bit >= metap->hashm_lastfreed) + metap->hashm_lastfreed = bit - 1; /* Calculate the split number for this page */ - for (i = 0; (i < splitnum) && (bit > metap->SPARES[i]); i++) + for (i = 0; (i < splitnum) && (bit > metap->hashm_spares[i]); i++) ; - offset = (i ? bit - metap->SPARES[i - 1] : bit); + offset = (i ? bit - metap->hashm_spares[i - 1] : bit); if (offset >= SPLITMASK) elog(ERROR, OVMSG); @@ -355,10 +355,10 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf) * element hashm_mapp[bitmappage]. */ splitnum = (addr >> SPLITSHIFT); - ovflpgno = (splitnum ? metap->SPARES[splitnum - 1] : 0) + (addr & SPLITMASK) - 1; + ovflpgno = (splitnum ? metap->hashm_spares[splitnum - 1] : 0) + (addr & SPLITMASK) - 1; - if (ovflpgno < metap->LAST_FREED) - metap->LAST_FREED = ovflpgno; + if (ovflpgno < metap->hashm_lastfreed) + metap->hashm_lastfreed = ovflpgno; bitmappage = (ovflpgno >> (metap->hashm_bshift + BYTE_TO_BIT)); bitmapbit = ovflpgno & (BMPGSZ_BIT(metap) - 1); diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c index d1b3aaa232..b8c520e3c0 100644 --- a/src/backend/access/hash/hashpage.c +++ b/src/backend/access/hash/hashpage.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.31 2001/06/27 23:31:37 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.32 2001/07/15 22:48:15 tgl Exp $ * * NOTES * Postgres hash pages look like ordinary relation pages. The opaque @@ -18,7 +18,7 @@ * address of the page if it is an overflow page. * * The first page in a hash relation, page zero, is special -- it stores - * information describing the hash table; it is referred to as teh + * information describing the hash table; it is referred to as the * "meta page." Pages one and higher store the actual data. * *------------------------------------------------------------------------- @@ -48,6 +48,19 @@ static void _hash_splitpage(Relation rel, Buffer metabuf, Bucket obucket, Bucket * before the lock table is fully initialized, so we can't use it. * Strictly speaking, this violates 2pl, but we don't do 2pl on the * system catalogs anyway. + * + * Note that our page locks are actual lockmanager locks, not buffer + * locks (as are used by btree, for example). This is a good idea because + * the algorithms are not deadlock-free, and we'd better be able to detect + * and recover from deadlocks. + * + * Another important difference from btree is that a hash indexscan + * retains both a lock and a buffer pin on the current index page + * between hashgettuple() calls (btree keeps only a buffer pin). + * Because of this, it's safe to do item deletions with only a regular + * write lock on a hash page --- there cannot be an indexscan stopped on + * the page being deleted, other than an indexscan of our own backend, + * which will be taken care of by _hash_adjscans. */ @@ -350,6 +363,16 @@ _hash_unsetpagelock(Relation rel, } } +/* + * Delete a hash index item. + * + * It is safe to delete an item after acquiring a regular WRITE lock on + * the page, because no other backend can hold a READ lock on the page, + * and that means no other backend currently has an indexscan stopped on + * any item of the item being deleted. Our own backend might have such + * an indexscan (in fact *will*, since that's how VACUUM found the item + * in the first place), but _hash_adjscans will fix the scan position. + */ void _hash_pagedel(Relation rel, ItemPointer tid) { @@ -384,7 +407,7 @@ _hash_pagedel(Relation rel, ItemPointer tid) metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE); metap = (HashMetaPage) BufferGetPage(metabuf); _hash_checkpage((Page) metap, LH_META_PAGE); - ++metap->hashm_nkeys; + metap->hashm_nkeys--; _hash_wrtbuf(rel, metabuf); } @@ -402,32 +425,32 @@ _hash_expandtable(Relation rel, Buffer metabuf) _hash_checkpage((Page) metap, LH_META_PAGE); metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE); - new_bucket = ++metap->MAX_BUCKET; + new_bucket = ++metap->hashm_maxbucket; metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ); - old_bucket = (metap->MAX_BUCKET & metap->LOW_MASK); + old_bucket = (metap->hashm_maxbucket & metap->hashm_lowmask); /* - * If the split point is increasing (MAX_BUCKET's log base 2 * + * If the split point is increasing (hashm_maxbucket's log base 2 * * increases), we need to copy the current contents of the spare split * bucket to the next bucket. */ - spare_ndx = _hash_log2(metap->MAX_BUCKET + 1); - if (spare_ndx > metap->OVFL_POINT) + spare_ndx = _hash_log2(metap->hashm_maxbucket + 1); + if (spare_ndx > metap->hashm_ovflpoint) { metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE); - metap->SPARES[spare_ndx] = metap->SPARES[metap->OVFL_POINT]; - metap->OVFL_POINT = spare_ndx; + metap->hashm_spares[spare_ndx] = metap->hashm_spares[metap->hashm_ovflpoint]; + metap->hashm_ovflpoint = spare_ndx; metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ); } - if (new_bucket > metap->HIGH_MASK) + if (new_bucket > metap->hashm_highmask) { /* Starting a new doubling */ metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE); - metap->LOW_MASK = metap->HIGH_MASK; - metap->HIGH_MASK = new_bucket | metap->LOW_MASK; + metap->hashm_lowmask = metap->hashm_highmask; + metap->hashm_highmask = new_bucket | metap->hashm_lowmask; metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ); } diff --git a/src/backend/access/hash/hashscan.c b/src/backend/access/hash/hashscan.c index 649e42fbeb..f4a91b5710 100644 --- a/src/backend/access/hash/hashscan.c +++ b/src/backend/access/hash/hashscan.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hashscan.c,v 1.24 2001/01/24 19:42:47 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashscan.c,v 1.25 2001/07/15 22:48:15 tgl Exp $ * * NOTES * Because we can be doing an index scan on a relation while we @@ -45,6 +45,31 @@ typedef HashScanListData *HashScanList; static HashScanList HashScans = (HashScanList) NULL; + +/* + * AtEOXact_hash() --- clean up hash subsystem at xact abort or commit. + * + * This is here because it needs to touch this module's static var HashScans. + */ +void +AtEOXact_hash(void) +{ + /* + * Note: these actions should only be necessary during xact abort; but + * they can't hurt during a commit. + */ + + /* + * Reset the active-scans list to empty. We do not need to free the + * list elements, because they're all palloc()'d, so they'll go away + * at end of transaction anyway. + */ + HashScans = NULL; + + /* If we were building a hash, we ain't anymore. */ + BuildingHash = false; +} + /* * _Hash_regscan() -- register a new scan. */ diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index adeccf5cc8..2b6be06168 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/index/indexam.c,v 1.51 2001/06/22 19:16:21 wieck Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/index/indexam.c,v 1.52 2001/07/15 22:48:15 tgl Exp $ * * INTERFACE ROUTINES * index_open - open an index relation by relationId @@ -18,23 +18,17 @@ * index_rescan - restart a scan of an index * index_endscan - end a scan * index_insert - insert an index tuple into a relation - * index_delete - delete an item from an index relation * index_markpos - mark a scan position * index_restrpos - restore a scan position * index_getnext - get the next tuple from a scan - * ** index_fetch - retrieve tuple with tid - * ** index_replace - replace a tuple - * ** index_getattr - get an attribute from an index tuple - * index_getprocid - get a support procedure id from the rel tuple - * - * IndexScanIsValid - check index scan + * index_bulk_delete - bulk deletion of index tuples + * index_cost_estimator - fetch amcostestimate procedure OID + * index_getprocid - get a support procedure OID * * NOTES * This file contains the index_ routines which used * to be a scattered collection of stuff in access/genam. * - * The ** routines: index_fetch, index_replace, and index_getattr - * have not yet been implemented. They may not be needed. * * old comments * Scans are implemented as follows: @@ -210,23 +204,6 @@ index_insert(Relation relation, return specificResult; } -/* ---------------- - * index_delete - delete an item from an index relation - * ---------------- - */ -void -index_delete(Relation relation, ItemPointer indexItem) -{ - RegProcedure procedure; - - RELATION_CHECKS; - GET_REL_PROCEDURE(delete, amdelete); - - OidFunctionCall2(procedure, - PointerGetDatum(relation), - PointerGetDatum(indexItem)); -} - /* ---------------- * index_beginscan - start a scan of an index * ---------------- @@ -378,6 +355,35 @@ index_getnext(IndexScanDesc scan, return result; } +/* ---------------- + * index_bulk_delete - do mass deletion of index entries + * + * callback routine tells whether a given main-heap tuple is + * to be deleted + * + * return value is an optional palloc'd struct of statistics + * ---------------- + */ +IndexBulkDeleteResult * +index_bulk_delete(Relation relation, + IndexBulkDeleteCallback callback, + void *callback_state) +{ + RegProcedure procedure; + IndexBulkDeleteResult *result; + + RELATION_CHECKS; + GET_REL_PROCEDURE(bulk_delete, ambulkdelete); + + result = (IndexBulkDeleteResult *) + DatumGetPointer(OidFunctionCall3(procedure, + PointerGetDatum(relation), + PointerGetDatum((Pointer) callback), + PointerGetDatum(callback_state))); + + return result; +} + /* ---------------- * index_cost_estimator * diff --git a/src/backend/access/nbtree/Makefile b/src/backend/access/nbtree/Makefile index eba9bd4eef..bdc366dd0a 100644 --- a/src/backend/access/nbtree/Makefile +++ b/src/backend/access/nbtree/Makefile @@ -4,7 +4,7 @@ # Makefile for access/nbtree # # IDENTIFICATION -# $Header: /cvsroot/pgsql/src/backend/access/nbtree/Makefile,v 1.10 2000/08/31 16:09:41 petere Exp $ +# $Header: /cvsroot/pgsql/src/backend/access/nbtree/Makefile,v 1.11 2001/07/15 22:48:16 tgl Exp $ # #------------------------------------------------------------------------- @@ -12,7 +12,7 @@ subdir = src/backend/access/nbtree top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = nbtcompare.o nbtinsert.o nbtpage.o nbtree.o nbtscan.o nbtsearch.o \ +OBJS = nbtcompare.o nbtinsert.o nbtpage.o nbtree.o nbtsearch.o \ nbtstrat.o nbtutils.o nbtsort.o all: SUBSYS.o diff --git a/src/backend/access/nbtree/README b/src/backend/access/nbtree/README index cff7ff0d65..d8ec739b2a 100644 --- a/src/backend/access/nbtree/README +++ b/src/backend/access/nbtree/README @@ -1,4 +1,4 @@ -$Header: /cvsroot/pgsql/src/backend/access/nbtree/README,v 1.4 2000/07/25 05:26:40 tgl Exp $ +$Header: /cvsroot/pgsql/src/backend/access/nbtree/README,v 1.5 2001/07/15 22:48:16 tgl Exp $ This directory contains a correct implementation of Lehman and Yao's high-concurrency B-tree management algorithm (P. Lehman and S. Yao, @@ -109,15 +109,11 @@ In addition, the following things are handy to know: is too high a price). Rebuilding corrupted indexes during restart seems more attractive. -+ On deletions, we need to adjust the position of active scans on - the index. The code in nbtscan.c handles this. We don't need to - do this for insertions or splits because _bt_restscan can find the - new position of the previously-found item. NOTE that nbtscan.c - only copes with deletions issued by the current backend. This - essentially means that concurrent deletions are not supported, but - that's true already in the Lehman and Yao algorithm. nbtscan.c - exists only to support VACUUM and allow it to delete items while - it's scanning the index. ++ Deletions are handled by getting a super-exclusive lock on the target + page, so that no other backend has a pin on the page when the deletion + starts. This means no scan is pointing at the page. This is OK for + deleting leaf items, probably not OK for deleting internal nodes; + will need to think harder when it's time to support index compaction. + "ScanKey" data structures are used in two fundamentally different ways in this code. Searches for the initial position for a scan, as well as diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index 8ffb9b9043..c91c568ed2 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.83 2001/06/22 19:16:21 wieck Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.84 2001/07/15 22:48:16 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -126,7 +126,7 @@ top: if (TransactionIdIsValid(xwait)) { /* Have to wait for the other guy ... */ - _bt_relbuf(rel, buf, BT_WRITE); + _bt_relbuf(rel, buf); XactLockTableWait(xwait); /* start over... */ _bt_freestack(stack); @@ -234,7 +234,7 @@ _bt_check_unique(Relation rel, BTItem btitem, Relation heapRel, if (TransactionIdIsValid(xwait)) { if (nbuf != InvalidBuffer) - _bt_relbuf(rel, nbuf, BT_READ); + _bt_relbuf(rel, nbuf); /* Tell _bt_doinsert to wait... */ return xwait; } @@ -263,7 +263,7 @@ _bt_check_unique(Relation rel, BTItem btitem, Relation heapRel, break; nblkno = opaque->btpo_next; if (nbuf != InvalidBuffer) - _bt_relbuf(rel, nbuf, BT_READ); + _bt_relbuf(rel, nbuf); nbuf = _bt_getbuf(rel, nblkno, BT_READ); page = BufferGetPage(nbuf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); @@ -273,7 +273,7 @@ _bt_check_unique(Relation rel, BTItem btitem, Relation heapRel, } if (nbuf != InvalidBuffer) - _bt_relbuf(rel, nbuf, BT_READ); + _bt_relbuf(rel, nbuf); return NullTransactionId; } @@ -397,7 +397,7 @@ _bt_insertonpg(Relation rel, /* step right one page */ BlockNumber rblkno = lpageop->btpo_next; - _bt_relbuf(rel, buf, BT_WRITE); + _bt_relbuf(rel, buf); buf = _bt_getbuf(rel, rblkno, BT_WRITE); page = BufferGetPage(buf); lpageop = (BTPageOpaque) PageGetSpecialPointer(page); @@ -1175,12 +1175,12 @@ _bt_getstackbuf(Relation rel, BTStack stack, int access) */ if (P_RIGHTMOST(opaque)) { - _bt_relbuf(rel, buf, access); + _bt_relbuf(rel, buf); return (InvalidBuffer); } blkno = opaque->btpo_next; - _bt_relbuf(rel, buf, access); + _bt_relbuf(rel, buf); buf = _bt_getbuf(rel, blkno, access); page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); @@ -1449,7 +1449,7 @@ _bt_fixroot(Relation rel, Buffer oldrootbuf, bool release) &itup_off, &itup_blkno); /* Keep lock on new "root" buffer ! */ if (buf != rootbuf) - _bt_relbuf(rel, buf, BT_WRITE); + _bt_relbuf(rel, buf); buf = newbuf; page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); @@ -1525,7 +1525,7 @@ _bt_fixtree(Relation rel, BlockNumber blkno) if (P_ISROOT(opaque)) { /* Tree is Ok now */ - _bt_relbuf(rel, buf, BT_WRITE); + _bt_relbuf(rel, buf); return; } /* Call _bt_fixroot() if there is no upper level */ @@ -1533,12 +1533,12 @@ _bt_fixtree(Relation rel, BlockNumber blkno) { elog(NOTICE, "bt_fixtree[%s]: fixing root page", RelationGetRelationName(rel)); buf = _bt_fixroot(rel, buf, true); - _bt_relbuf(rel, buf, BT_WRITE); + _bt_relbuf(rel, buf); return; } /* Have to go up one level */ pblkno = opaque->btpo_parent; - _bt_relbuf(rel, buf, BT_WRITE); + _bt_relbuf(rel, buf); } blkno = pblkno; } @@ -1571,7 +1571,7 @@ _bt_fixlevel(Relation rel, Buffer buf, BlockNumber limit) page = BufferGetPage(buf); /* copy page to temp storage */ memmove(tbuf, page, PageGetPageSize(page)); - _bt_relbuf(rel, buf, BT_READ); + _bt_relbuf(rel, buf); page = (Page) tbuf; opaque = (BTPageOpaque) PageGetSpecialPointer(page); @@ -1682,7 +1682,7 @@ _bt_fixlevel(Relation rel, Buffer buf, BlockNumber limit) { if (coff[i] != P_FIRSTDATAKEY(newopaque)) elog(ERROR, "bt_fixlevel[%s]: invalid item order(3) (need to recreate index)", RelationGetRelationName(rel)); - _bt_relbuf(rel, buf, BT_WRITE); + _bt_relbuf(rel, buf); buf = newbuf; page = newpage; opaque = newopaque; @@ -1691,7 +1691,7 @@ _bt_fixlevel(Relation rel, Buffer buf, BlockNumber limit) continue; } /* unfound - need to insert on current page */ - _bt_relbuf(rel, newbuf, BT_WRITE); + _bt_relbuf(rel, newbuf); } /* insert pointer */ ritem = (BTItem) PageGetItem(cpage[i - 1], @@ -1718,10 +1718,10 @@ _bt_fixlevel(Relation rel, Buffer buf, BlockNumber limit) &itup_off, &itup_blkno); /* what buffer we need in ? */ if (newitemonleft) - _bt_relbuf(rel, newbuf, BT_WRITE); + _bt_relbuf(rel, newbuf); else { - _bt_relbuf(rel, buf, BT_WRITE); + _bt_relbuf(rel, buf); buf = newbuf; page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); @@ -1741,7 +1741,7 @@ _bt_fixlevel(Relation rel, Buffer buf, BlockNumber limit) /* copy page with pointer to cblkno[cidx] to temp storage */ memmove(tbuf, page, PageGetPageSize(page)); - _bt_relbuf(rel, buf, BT_WRITE); + _bt_relbuf(rel, buf); page = (Page) tbuf; opaque = (BTPageOpaque) PageGetSpecialPointer(page); } @@ -1751,13 +1751,13 @@ _bt_fixlevel(Relation rel, Buffer buf, BlockNumber limit) goodbye = false; /* Pointers to child pages are Ok - right end of child level ? */ - _bt_relbuf(rel, cbuf[0], BT_READ); - _bt_relbuf(rel, cbuf[1], BT_READ); + _bt_relbuf(rel, cbuf[0]); + _bt_relbuf(rel, cbuf[1]); if (cidx == 1 || (cidx == 2 && (P_RIGHTMOST(copaque[2]) || goodbye))) { if (cidx == 2) - _bt_relbuf(rel, cbuf[2], BT_READ); + _bt_relbuf(rel, cbuf[2]); return; } if (cblkno[0] == limit || cblkno[1] == limit) @@ -1819,7 +1819,7 @@ _bt_fixbranch(Relation rel, BlockNumber lblkno, { if (offnum <= stack.bts_offset) elog(ERROR, "bt_fixbranch[%s]: invalid item order (need to recreate index)", RelationGetRelationName(rel)); - _bt_relbuf(rel, buf, BT_READ); + _bt_relbuf(rel, buf); return; } @@ -1837,7 +1837,7 @@ _bt_fixbranch(Relation rel, BlockNumber lblkno, if (rbuf == InvalidBuffer) elog(ERROR, "bt_fixbranch[%s]: right pointer unfound(2) (need to recreate index)", RelationGetRelationName(rel)); rblkno = BufferGetBlockNumber(rbuf); - _bt_relbuf(rel, rbuf, BT_READ); + _bt_relbuf(rel, rbuf); /* * If we have parent item in true_stack then go up one level and @@ -1845,7 +1845,7 @@ _bt_fixbranch(Relation rel, BlockNumber lblkno, */ if (true_stack) { - _bt_relbuf(rel, buf, BT_READ); + _bt_relbuf(rel, buf); blkno = true_stack->bts_blkno; true_stack = true_stack->bts_parent; continue; @@ -1860,19 +1860,19 @@ _bt_fixbranch(Relation rel, BlockNumber lblkno, if (!BTreeInvalidParent(opaque)) { blkno = opaque->btpo_parent; - _bt_relbuf(rel, buf, BT_READ); + _bt_relbuf(rel, buf); continue; } /* Have to switch to excl buf lock and re-check btpo_parent */ - _bt_relbuf(rel, buf, BT_READ); + _bt_relbuf(rel, buf); buf = _bt_getbuf(rel, blkno, BT_WRITE); page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); if (!BTreeInvalidParent(opaque)) { blkno = opaque->btpo_parent; - _bt_relbuf(rel, buf, BT_WRITE); + _bt_relbuf(rel, buf); continue; } @@ -1913,7 +1913,7 @@ _bt_fixup(Relation rel, Buffer buf) if (!BTreeInvalidParent(opaque)) { blkno = opaque->btpo_parent; - _bt_relbuf(rel, buf, BT_WRITE); + _bt_relbuf(rel, buf); elog(NOTICE, "bt_fixup[%s]: checking/fixing upper levels", RelationGetRelationName(rel)); _bt_fixtree(rel, blkno); return; @@ -1921,8 +1921,7 @@ _bt_fixup(Relation rel, Buffer buf) if (P_LEFTMOST(opaque)) break; blkno = opaque->btpo_prev; - LockBuffer(buf, BUFFER_LOCK_UNLOCK); - ReleaseBuffer(buf); + _bt_relbuf(rel, buf); buf = _bt_getbuf(rel, blkno, BT_WRITE); } @@ -1932,9 +1931,7 @@ _bt_fixup(Relation rel, Buffer buf) */ elog(NOTICE, "bt_fixup[%s]: fixing root page", RelationGetRelationName(rel)); buf = _bt_fixroot(rel, buf, true); - _bt_relbuf(rel, buf, BT_WRITE); - - return; + _bt_relbuf(rel, buf); } static OffsetNumber diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 67e1407b22..376274c562 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.52 2001/06/27 23:31:38 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.53 2001/07/15 22:48:16 tgl Exp $ * * NOTES * Postgres btree pages look like ordinary relation pages. The opaque @@ -138,7 +138,7 @@ _bt_getroot(Relation rel, int access) /* If access = BT_READ, caller doesn't want us to create root yet */ if (access == BT_READ) { - _bt_relbuf(rel, metabuf, BT_READ); + _bt_relbuf(rel, metabuf); return InvalidBuffer; } @@ -215,14 +215,14 @@ _bt_getroot(Relation rel, int access) * guarantee no deadlocks, we have to release the metadata * page and start all over again. */ - _bt_relbuf(rel, metabuf, BT_WRITE); + _bt_relbuf(rel, metabuf); return _bt_getroot(rel, access); } } else { rootblkno = metad->btm_root; - _bt_relbuf(rel, metabuf, BT_READ); /* done with the meta page */ + _bt_relbuf(rel, metabuf); /* done with the meta page */ rootbuf = _bt_getbuf(rel, rootblkno, BT_READ); } @@ -270,8 +270,8 @@ _bt_getroot(Relation rel, int access) goto check_parent; } else -/* someone else already fixed root */ { + /* someone else already fixed root */ LockBuffer(rootbuf, BUFFER_LOCK_UNLOCK); LockBuffer(rootbuf, BT_READ); } @@ -283,7 +283,7 @@ _bt_getroot(Relation rel, int access) * chance that parent is root page. */ newrootbuf = _bt_getbuf(rel, rootopaque->btpo_parent, BT_READ); - _bt_relbuf(rel, rootbuf, BT_READ); + _bt_relbuf(rel, rootbuf); rootbuf = newrootbuf; rootpage = BufferGetPage(rootbuf); rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage); @@ -293,7 +293,7 @@ _bt_getroot(Relation rel, int access) } /* try again */ - _bt_relbuf(rel, rootbuf, BT_READ); + _bt_relbuf(rel, rootbuf); return _bt_getroot(rel, access); } @@ -350,10 +350,12 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access) /* * _bt_relbuf() -- release a locked buffer. * - * Lock and pin (refcount) are both dropped. + * Lock and pin (refcount) are both dropped. Note that either read or + * write lock can be dropped this way, but if we modified the buffer, + * this is NOT the right way to release a write lock. */ void -_bt_relbuf(Relation rel, Buffer buf, int access) +_bt_relbuf(Relation rel, Buffer buf) { LockBuffer(buf, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buf); @@ -449,24 +451,23 @@ _bt_metaproot(Relation rel, BlockNumber rootbknum, int level) } /* - * Delete an item from a btree. It had better be a leaf item... + * Delete an item from a btree page. + * + * This routine assumes that the caller has pinned and locked the buffer, + * and will write the buffer afterwards. */ void -_bt_pagedel(Relation rel, ItemPointer tid) +_bt_itemdel(Relation rel, Buffer buf, ItemPointer tid) { - Buffer buf; - Page page; - BlockNumber blkno; + Page page = BufferGetPage(buf); OffsetNumber offno; - blkno = ItemPointerGetBlockNumber(tid); offno = ItemPointerGetOffsetNumber(tid); - buf = _bt_getbuf(rel, blkno, BT_WRITE); - page = BufferGetPage(buf); - START_CRIT_SECTION(); + PageIndexTupleDelete(page, offno); + /* XLOG stuff */ { xl_btree_delete xlrec; @@ -490,8 +491,6 @@ _bt_pagedel(Relation rel, ItemPointer tid) PageSetLSN(page, recptr); PageSetSUI(page, ThisStartUpID); } - END_CRIT_SECTION(); - /* write the buffer and release the lock */ - _bt_wrtbuf(rel, buf); + END_CRIT_SECTION(); } diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index b714296c8f..b142645624 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -12,7 +12,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.81 2001/05/18 21:24:17 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.82 2001/07/15 22:48:16 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -28,11 +28,27 @@ #include "storage/sinval.h" #include "access/xlogutils.h" + +/* Working state for btbuild and its callback */ +typedef struct +{ + bool usefast; + bool isUnique; + bool haveDead; + Relation heapRel; + BTSpool *spool; + /* + * spool2 is needed only when the index is an unique index. Dead + * tuples are put into spool2 instead of spool in order to avoid + * uniqueness check. + */ + BTSpool *spool2; + double indtuples; +} BTBuildState; + + bool BuildingBtree = false; /* see comment in btbuild() */ -bool FastBuild = true; /* use sort/build instead */ - - /* of insertion build */ - +bool FastBuild = true; /* use SORT instead of insertion build */ /* * TEMPORARY FLAG FOR TESTING NEW FIX TREE @@ -41,6 +57,29 @@ bool FastBuild = true; /* use sort/build instead */ bool FixBTree = true; static void _bt_restscan(IndexScanDesc scan); +static void btbuildCallback(Relation index, + HeapTuple htup, + Datum *attdata, + char *nulls, + bool tupleIsAlive, + void *state); + + +/* + * AtEOXact_nbtree() --- clean up nbtree subsystem at xact abort or commit. + */ +void +AtEOXact_nbtree(void) +{ + /* + * Note: these actions should only be necessary during xact abort; but + * they can't hurt during a commit. + */ + + /* If we were building a btree, we ain't anymore. */ + BuildingBtree = false; +} + /* * btbuild() -- build a new btree index. @@ -56,42 +95,10 @@ btbuild(PG_FUNCTION_ARGS) Relation heap = (Relation) PG_GETARG_POINTER(0); Relation index = (Relation) PG_GETARG_POINTER(1); IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2); - Node *oldPred = (Node *) PG_GETARG_POINTER(3); -#ifdef NOT_USED - IndexStrategy istrat = (IndexStrategy) PG_GETARG_POINTER(4); -#endif - HeapScanDesc hscan; - HeapTuple htup; - IndexTuple itup; - TupleDesc htupdesc, - itupdesc; - Datum attdata[INDEX_MAX_KEYS]; - char nulls[INDEX_MAX_KEYS]; - double nhtups, - nitups; - Node *pred = indexInfo->ii_Predicate; -#ifndef OMIT_PARTIAL_INDEX - TupleTable tupleTable; - TupleTableSlot *slot; -#endif - ExprContext *econtext; - InsertIndexResult res = NULL; - BTSpool *spool = NULL; - BTItem btitem; - bool usefast; - Snapshot snapshot; - TransactionId XmaxRecent; + double reltuples; + BTBuildState buildstate; - /* - * spool2 is needed only when the index is an unique index. Dead - * tuples are put into spool2 instead of spool in order to avoid - * uniqueness check. - */ - BTSpool *spool2 = NULL; - bool tupleIsAlive; - int dead_count; - - /* note that this is a new btree */ + /* set flag to disable locking */ BuildingBtree = true; /* @@ -100,220 +107,63 @@ btbuild(PG_FUNCTION_ARGS) * look harder at this. (there is some kind of incremental processing * going on there.) -- pma 08/29/95 */ - usefast = (FastBuild && IsNormalProcessingMode()); + buildstate.usefast = (FastBuild && IsNormalProcessingMode()); + buildstate.isUnique = indexInfo->ii_Unique; + buildstate.haveDead = false; + buildstate.heapRel = heap; + buildstate.spool = NULL; + buildstate.spool2 = NULL; + buildstate.indtuples = 0; #ifdef BTREE_BUILD_STATS if (Show_btree_build_stats) ResetUsage(); #endif /* BTREE_BUILD_STATS */ - /* initialize the btree index metadata page (if this is a new index) */ - if (oldPred == NULL) - _bt_metapinit(index); - - /* get tuple descriptors for heap and index relations */ - htupdesc = RelationGetDescr(heap); - itupdesc = RelationGetDescr(index); - /* - * If this is a predicate (partial) index, we will need to evaluate - * the predicate using ExecQual, which requires the current tuple to - * be in a slot of a TupleTable. In addition, ExecQual must have an - * ExprContext referring to that slot. Here, we initialize dummy - * TupleTable and ExprContext objects for this purpose. --Nels, Feb 92 - * - * We construct the ExprContext anyway since we need a per-tuple - * temporary memory context for function evaluation -- tgl July 00 + * We expect to be called exactly once for any index relation. If + * that's not the case, big trouble's what we have. */ -#ifndef OMIT_PARTIAL_INDEX - if (pred != NULL || oldPred != NULL) - { - tupleTable = ExecCreateTupleTable(1); - slot = ExecAllocTableSlot(tupleTable); - ExecSetSlotDescriptor(slot, htupdesc, false); + if (RelationGetNumberOfBlocks(index) != 0) + elog(ERROR, "%s already contains data", + RelationGetRelationName(index)); + /* initialize the btree index metadata page */ + _bt_metapinit(index); + + if (buildstate.usefast) + { + buildstate.spool = _bt_spoolinit(index, indexInfo->ii_Unique); /* - * we never want to use sort/build if we are extending an existing - * partial index -- it works by inserting the newly-qualifying - * tuples into the existing index. (sort/build would overwrite the - * existing index with one consisting of the newly-qualifying - * tuples.) - */ - usefast = false; - } - else - { - tupleTable = NULL; - slot = NULL; - } - econtext = MakeExprContext(slot, TransactionCommandContext); -#else - econtext = MakeExprContext(NULL, TransactionCommandContext); -#endif /* OMIT_PARTIAL_INDEX */ - - /* build the index */ - nhtups = nitups = 0.0; - - if (usefast) - { - spool = _bt_spoolinit(index, indexInfo->ii_Unique); - - /* - * Different from spool,the uniqueness isn't checked for spool2. + * Different from spool, the uniqueness isn't checked for spool2. */ if (indexInfo->ii_Unique) - spool2 = _bt_spoolinit(index, false); + buildstate.spool2 = _bt_spoolinit(index, false); } - /* start a heap scan */ - dead_count = 0; - snapshot = (IsBootstrapProcessingMode() ? SnapshotNow : SnapshotAny); - hscan = heap_beginscan(heap, 0, snapshot, 0, (ScanKey) NULL); - XmaxRecent = 0; - if (snapshot == SnapshotAny) - GetXmaxRecent(&XmaxRecent); - - while (HeapTupleIsValid(htup = heap_getnext(hscan, 0))) - { - if (snapshot == SnapshotAny) - { - tupleIsAlive = HeapTupleSatisfiesNow(htup->t_data); - if (!tupleIsAlive) - { - if ((htup->t_data->t_infomask & HEAP_XMIN_INVALID) != 0) - continue; - if (htup->t_data->t_infomask & HEAP_XMAX_COMMITTED && - htup->t_data->t_xmax < XmaxRecent) - continue; - } - } - else - tupleIsAlive = true; - - MemoryContextReset(econtext->ecxt_per_tuple_memory); - - nhtups += 1.0; - -#ifndef OMIT_PARTIAL_INDEX - - /* - * If oldPred != NULL, this is an EXTEND INDEX command, so skip - * this tuple if it was already in the existing partial index - */ - if (oldPred != NULL) - { - slot->val = htup; - if (ExecQual((List *) oldPred, econtext, false)) - { - nitups += 1.0; - continue; - } - } - - /* - * Skip this tuple if it doesn't satisfy the partial-index - * predicate - */ - if (pred != NULL) - { - slot->val = htup; - if (!ExecQual((List *) pred, econtext, false)) - continue; - } -#endif /* OMIT_PARTIAL_INDEX */ - - nitups += 1.0; - - /* - * For the current heap tuple, extract all the attributes we use - * in this index, and note which are null. - */ - FormIndexDatum(indexInfo, - htup, - htupdesc, - econtext->ecxt_per_tuple_memory, - attdata, - nulls); - - /* form an index tuple and point it at the heap tuple */ - itup = index_formtuple(itupdesc, attdata, nulls); - - /* - * If the single index key is null, we don't insert it into the - * index. Btrees support scans on <, <=, =, >=, and >. Relational - * algebra says that A op B (where op is one of the operators - * above) returns null if either A or B is null. This means that - * no qualification used in an index scan could ever return true - * on a null attribute. It also means that indices can't be used - * by ISNULL or NOTNULL scans, but that's an artifact of the - * strategy map architecture chosen in 1986, not of the way nulls - * are handled here. - */ - - /* - * New comments: NULLs handling. While we can't do NULL - * comparison, we can follow simple rule for ordering items on - * btree pages - NULLs greater NOT_NULLs and NULL = NULL is TRUE. - * Sure, it's just rule for placing/finding items and no more - - * keytest'll return FALSE for a = 5 for items having 'a' isNULL. - * Look at _bt_compare for how it works. - vadim 03/23/97 - * - * if (itup->t_info & INDEX_NULL_MASK) { pfree(itup); continue; } - */ - - itup->t_tid = htup->t_self; - btitem = _bt_formitem(itup); - - /* - * if we are doing bottom-up btree build, we insert the index into - * a spool file for subsequent processing. otherwise, we insert - * into the btree. - */ - if (usefast) - { - if (tupleIsAlive || !spool2) - _bt_spool(btitem, spool); - else -/* dead tuples are put into spool2 */ - { - dead_count++; - _bt_spool(btitem, spool2); - } - } - else - res = _bt_doinsert(index, btitem, indexInfo->ii_Unique, heap); - - pfree(btitem); - pfree(itup); - if (res) - pfree(res); - } + /* do the heap scan */ + reltuples = IndexBuildHeapScan(heap, index, indexInfo, + btbuildCallback, (void *) &buildstate); /* okay, all heap tuples are indexed */ - heap_endscan(hscan); - if (spool2 && !dead_count) /* spool2 was found to be unnecessary */ + if (buildstate.spool2 && !buildstate.haveDead) { - _bt_spooldestroy(spool2); - spool2 = NULL; + /* spool2 turns out to be unnecessary */ + _bt_spooldestroy(buildstate.spool2); + buildstate.spool2 = NULL; } -#ifndef OMIT_PARTIAL_INDEX - if (pred != NULL || oldPred != NULL) - ExecDropTupleTable(tupleTable, true); -#endif /* OMIT_PARTIAL_INDEX */ - FreeExprContext(econtext); - /* * if we are doing bottom-up btree build, finish the build by (1) * completing the sort of the spool file, (2) inserting the sorted * tuples into btree pages and (3) building the upper levels. */ - if (usefast) + if (buildstate.usefast) { - _bt_leafbuild(spool, spool2); - _bt_spooldestroy(spool); - if (spool2) - _bt_spooldestroy(spool2); + _bt_leafbuild(buildstate.spool, buildstate.spool2); + _bt_spooldestroy(buildstate.spool); + if (buildstate.spool2) + _bt_spooldestroy(buildstate.spool2); } #ifdef BTREE_BUILD_STATS @@ -325,6 +175,9 @@ btbuild(PG_FUNCTION_ARGS) } #endif /* BTREE_BUILD_STATS */ + /* all done */ + BuildingBtree = false; + /* * Since we just counted the tuples in the heap, we update its stats * in pg_class to guarantee that the planner takes advantage of the @@ -343,22 +196,65 @@ btbuild(PG_FUNCTION_ARGS) heap_close(heap, NoLock); index_close(index); - UpdateStats(hrelid, nhtups); - UpdateStats(irelid, nitups); - if (oldPred != NULL) - { - if (nitups == nhtups) - pred = NULL; - UpdateIndexPredicate(irelid, oldPred, pred); - } + UpdateStats(hrelid, reltuples); + UpdateStats(irelid, buildstate.indtuples); } - /* all done */ - BuildingBtree = false; - PG_RETURN_VOID(); } +/* + * Per-tuple callback from IndexBuildHeapScan + */ +static void +btbuildCallback(Relation index, + HeapTuple htup, + Datum *attdata, + char *nulls, + bool tupleIsAlive, + void *state) +{ + BTBuildState *buildstate = (BTBuildState *) state; + IndexTuple itup; + BTItem btitem; + InsertIndexResult res; + + /* form an index tuple and point it at the heap tuple */ + itup = index_formtuple(RelationGetDescr(index), attdata, nulls); + itup->t_tid = htup->t_self; + + btitem = _bt_formitem(itup); + + /* + * if we are doing bottom-up btree build, we insert the index into + * a spool file for subsequent processing. otherwise, we insert + * into the btree. + */ + if (buildstate->usefast) + { + if (tupleIsAlive || buildstate->spool2 == NULL) + _bt_spool(btitem, buildstate->spool); + else + { + /* dead tuples are put into spool2 */ + buildstate->haveDead = true; + _bt_spool(btitem, buildstate->spool2); + } + } + else + { + res = _bt_doinsert(index, btitem, + buildstate->isUnique, buildstate->heapRel); + if (res) + pfree(res); + } + + buildstate->indtuples += 1; + + pfree(btitem); + pfree(itup); +} + /* * btinsert() -- insert an index tuple into a btree. * @@ -423,8 +319,10 @@ btgettuple(PG_FUNCTION_ARGS) /* * Save heap TID to use it in _bt_restscan. Then release the read - * lock on the buffer so that we aren't blocking other backends. NOTE: - * we do keep the pin on the buffer! + * lock on the buffer so that we aren't blocking other backends. + * + * NOTE: we do keep the pin on the buffer! This is essential to ensure + * that someone else doesn't delete the index entry we are stopped on. */ if (res) { @@ -451,9 +349,6 @@ btbeginscan(PG_FUNCTION_ARGS) /* get the scan */ scan = RelationGetIndexScan(rel, fromEnd, keysz, scankey); - /* register scan in case we change pages it's using */ - _bt_regscan(scan); - PG_RETURN_POINTER(scan); } @@ -571,8 +466,6 @@ btendscan(PG_FUNCTION_ARGS) pfree(so->keyData); pfree(so); - _bt_dropscan(scan); - PG_RETURN_VOID(); } @@ -640,20 +533,127 @@ btrestrpos(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } -/* stubs */ +/* + * Bulk deletion of all index entries pointing to a set of heap tuples. + * The set of target tuples is specified via a callback routine that tells + * whether any given heap tuple (identified by ItemPointer) is being deleted. + * + * Result: a palloc'd struct containing statistical info for VACUUM displays. + */ Datum -btdelete(PG_FUNCTION_ARGS) +btbulkdelete(PG_FUNCTION_ARGS) { Relation rel = (Relation) PG_GETARG_POINTER(0); - ItemPointer tid = (ItemPointer) PG_GETARG_POINTER(1); + IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(1); + void *callback_state = (void *) PG_GETARG_POINTER(2); + IndexBulkDeleteResult *result; + BlockNumber num_pages; + double tuples_removed; + double num_index_tuples; + RetrieveIndexResult res; + IndexScanDesc scan; + BTScanOpaque so; + ItemPointer current; - /* adjust any active scans that will be affected by this deletion */ - _bt_adjscans(rel, tid); + tuples_removed = 0; + num_index_tuples = 0; - /* delete the data from the page */ - _bt_pagedel(rel, tid); + /* + * We use a standard IndexScanDesc scan object, but to speed up the loop, + * we skip most of the wrapper layers of index_getnext and instead call + * _bt_step directly. This implies holding buffer lock on a target page + * throughout the loop over the page's tuples. Initially, we have a read + * lock acquired by _bt_step when we stepped onto the page. If we find + * a tuple we need to delete, we trade in the read lock for an exclusive + * write lock; after that, we hold the write lock until we step off the + * page (fortunately, _bt_relbuf doesn't care which kind of lock it's + * releasing). This should minimize the amount of work needed per page. + */ + scan = index_beginscan(rel, false, 0, (ScanKey) NULL); + so = (BTScanOpaque) scan->opaque; + current = &(scan->currentItemData); - PG_RETURN_VOID(); + /* Use _bt_first to get started, then _bt_step to remaining tuples */ + res = _bt_first(scan, ForwardScanDirection); + + if (res != NULL) + { + Buffer buf; + BlockNumber lockedBlock = InvalidBlockNumber; + + pfree(res); + /* we have the buffer pinned and locked */ + buf = so->btso_curbuf; + Assert(BufferIsValid(buf)); + + do + { + Page page; + BlockNumber blkno; + OffsetNumber offnum; + BTItem btitem; + IndexTuple itup; + ItemPointer htup; + + /* current is the next index tuple */ + blkno = ItemPointerGetBlockNumber(current); + offnum = ItemPointerGetOffsetNumber(current); + page = BufferGetPage(buf); + btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum)); + itup = &btitem->bti_itup; + htup = &(itup->t_tid); + + if (callback(htup, callback_state)) + { + /* + * If this is first deletion on this page, trade in read + * lock for a really-exclusive write lock. Then, step back + * one and re-examine the item, because someone else might + * have inserted an item while we weren't holding the lock! + */ + if (blkno != lockedBlock) + { + LockBuffer(buf, BUFFER_LOCK_UNLOCK); + LockBufferForCleanup(buf); + lockedBlock = blkno; + } + else + { + /* Delete the item from the page */ + _bt_itemdel(rel, buf, current); + + /* Mark buffer dirty, but keep the lock and pin */ + WriteNoReleaseBuffer(buf); + + tuples_removed += 1; + } + + /* + * We need to back up the scan one item so that the next + * cycle will re-examine the same offnum on this page. + * + * For now, just hack the current-item index. Will need + * to be smarter when deletion includes removal of empty + * index pages. + */ + current->ip_posid--; + } + else + num_index_tuples += 1; + } while (_bt_step(scan, &buf, ForwardScanDirection)); + } + + index_endscan(scan); + + /* return statistics */ + num_pages = RelationGetNumberOfBlocks(rel); + + result = (IndexBulkDeleteResult *) palloc(sizeof(IndexBulkDeleteResult)); + result->num_pages = num_pages; + result->tuples_removed = tuples_removed; + result->num_index_tuples = num_index_tuples; + + PG_RETURN_POINTER(result); } /* @@ -676,7 +676,7 @@ _bt_restscan(IndexScanDesc scan) /* * Get back the read lock we were holding on the buffer. (We still - * have a reference-count pin on it, though.) + * have a reference-count pin on it, so need not get that.) */ LockBuffer(buf, BT_READ); @@ -729,7 +729,7 @@ _bt_restscan(IndexScanDesc scan) "\n\tRecreate index %s.", RelationGetRelationName(rel)); blkno = opaque->btpo_next; - _bt_relbuf(rel, buf, BT_READ); + _bt_relbuf(rel, buf); buf = _bt_getbuf(rel, blkno, BT_READ); page = BufferGetPage(buf); maxoff = PageGetMaxOffsetNumber(page); diff --git a/src/backend/access/nbtree/nbtscan.c b/src/backend/access/nbtree/nbtscan.c deleted file mode 100644 index e07914b344..0000000000 --- a/src/backend/access/nbtree/nbtscan.c +++ /dev/null @@ -1,224 +0,0 @@ -/*------------------------------------------------------------------------- - * - * btscan.c - * manage scans on btrees. - * - * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/Attic/nbtscan.c,v 1.33 2001/01/24 19:42:48 momjian Exp $ - * - * - * NOTES - * Because we can be doing an index scan on a relation while we update - * it, we need to avoid missing data that moves around in the index. - * Insertions and page splits are no problem because _bt_restscan() - * can figure out where the current item moved to, but if a deletion - * happens at or before the current scan position, we'd better do - * something to stay in sync. - * - * The routines in this file handle the problem for deletions issued - * by the current backend. Currently, that's all we need, since - * deletions are only done by VACUUM and it gets an exclusive lock. - * - * The scheme is to manage a list of active scans in the current backend. - * Whenever we remove a record from an index, we check the list of active - * scans to see if any has been affected. A scan is affected only if it - * is on the same relation, and the same page, as the update. - * - *------------------------------------------------------------------------- - */ - -#include "postgres.h" - -#include "access/nbtree.h" - -typedef struct BTScanListData -{ - IndexScanDesc btsl_scan; - struct BTScanListData *btsl_next; -} BTScanListData; - -typedef BTScanListData *BTScanList; - -static BTScanList BTScans = (BTScanList) NULL; - -static void _bt_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno); - -/* - * AtEOXact_nbtree() --- clean up nbtree subsystem at xact abort or commit. - * - * This is here because it needs to touch this module's static var BTScans. - */ -void -AtEOXact_nbtree(void) -{ - - /* - * Note: these actions should only be necessary during xact abort; but - * they can't hurt during a commit. - */ - - /* - * Reset the active-scans list to empty. We do not need to free the - * list elements, because they're all palloc()'d, so they'll go away - * at end of transaction anyway. - */ - BTScans = NULL; - - /* If we were building a btree, we ain't anymore. */ - BuildingBtree = false; -} - -/* - * _bt_regscan() -- register a new scan. - */ -void -_bt_regscan(IndexScanDesc scan) -{ - BTScanList new_el; - - new_el = (BTScanList) palloc(sizeof(BTScanListData)); - new_el->btsl_scan = scan; - new_el->btsl_next = BTScans; - BTScans = new_el; -} - -/* - * _bt_dropscan() -- drop a scan from the scan list - */ -void -_bt_dropscan(IndexScanDesc scan) -{ - BTScanList chk, - last; - - last = (BTScanList) NULL; - for (chk = BTScans; - chk != (BTScanList) NULL && chk->btsl_scan != scan; - chk = chk->btsl_next) - last = chk; - - if (chk == (BTScanList) NULL) - elog(ERROR, "btree scan list trashed; can't find 0x%p", (void *) scan); - - if (last == (BTScanList) NULL) - BTScans = chk->btsl_next; - else - last->btsl_next = chk->btsl_next; - - pfree(chk); -} - -/* - * _bt_adjscans() -- adjust all scans in the scan list to compensate - * for a given deletion - */ -void -_bt_adjscans(Relation rel, ItemPointer tid) -{ - BTScanList l; - Oid relid; - - relid = RelationGetRelid(rel); - for (l = BTScans; l != (BTScanList) NULL; l = l->btsl_next) - { - if (relid == RelationGetRelid(l->btsl_scan->relation)) - _bt_scandel(l->btsl_scan, - ItemPointerGetBlockNumber(tid), - ItemPointerGetOffsetNumber(tid)); - } -} - -/* - * _bt_scandel() -- adjust a single scan on deletion - * - */ -static void -_bt_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno) -{ - ItemPointer current; - Buffer buf; - BTScanOpaque so; - OffsetNumber start; - Page page; - BTPageOpaque opaque; - - so = (BTScanOpaque) scan->opaque; - buf = so->btso_curbuf; - - current = &(scan->currentItemData); - if (ItemPointerIsValid(current) - && ItemPointerGetBlockNumber(current) == blkno - && ItemPointerGetOffsetNumber(current) >= offno) - { - page = BufferGetPage(buf); - opaque = (BTPageOpaque) PageGetSpecialPointer(page); - start = P_FIRSTDATAKEY(opaque); - if (ItemPointerGetOffsetNumber(current) == start) - ItemPointerSetInvalid(&(so->curHeapIptr)); - else - { - - /* - * We have to lock buffer before _bt_step and unlock it after - * that. - */ - LockBuffer(buf, BT_READ); - _bt_step(scan, &buf, BackwardScanDirection); - if (ItemPointerIsValid(current)) - { - Page pg = BufferGetPage(buf); - BTItem btitem = (BTItem) PageGetItem(pg, - PageGetItemId(pg, ItemPointerGetOffsetNumber(current))); - - so->curHeapIptr = btitem->bti_itup.t_tid; - LockBuffer(buf, BUFFER_LOCK_UNLOCK); - } - } - } - - current = &(scan->currentMarkData); - if (ItemPointerIsValid(current) - && ItemPointerGetBlockNumber(current) == blkno - && ItemPointerGetOffsetNumber(current) >= offno) - { - page = BufferGetPage(so->btso_mrkbuf); - opaque = (BTPageOpaque) PageGetSpecialPointer(page); - start = P_FIRSTDATAKEY(opaque); - - if (ItemPointerGetOffsetNumber(current) == start) - ItemPointerSetInvalid(&(so->mrkHeapIptr)); - else - { - ItemPointerData tmp; - - tmp = *current; - *current = scan->currentItemData; - scan->currentItemData = tmp; - so->btso_curbuf = so->btso_mrkbuf; - so->btso_mrkbuf = buf; - buf = so->btso_curbuf; - LockBuffer(buf, BT_READ); /* as above */ - - _bt_step(scan, &buf, BackwardScanDirection); - - so->btso_curbuf = so->btso_mrkbuf; - so->btso_mrkbuf = buf; - tmp = *current; - *current = scan->currentItemData; - scan->currentItemData = tmp; - if (ItemPointerIsValid(current)) - { - Page pg = BufferGetPage(buf); - BTItem btitem = (BTItem) PageGetItem(pg, - PageGetItemId(pg, ItemPointerGetOffsetNumber(current))); - - so->mrkHeapIptr = btitem->bti_itup.t_tid; - LockBuffer(buf, BUFFER_LOCK_UNLOCK); /* as above */ - } - } - } -} diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 59bf5358e4..295387ed51 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.66 2001/03/23 04:49:51 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.67 2001/07/15 22:48:16 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -94,7 +94,7 @@ _bt_search(Relation rel, int keysz, ScanKey scankey, new_stack->bts_parent = stack_in; /* drop the read lock on the parent page, acquire one on the child */ - _bt_relbuf(rel, *bufP, BT_READ); + _bt_relbuf(rel, *bufP); *bufP = _bt_getbuf(rel, blkno, BT_READ); /* @@ -155,7 +155,7 @@ _bt_moveright(Relation rel, /* step right one page */ BlockNumber rblkno = opaque->btpo_next; - _bt_relbuf(rel, buf, access); + _bt_relbuf(rel, buf); buf = _bt_getbuf(rel, rblkno, access); page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); @@ -406,7 +406,7 @@ _bt_next(IndexScanDesc scan, ScanDirection dir) /* No more items, so close down the current-item info */ ItemPointerSetInvalid(current); so->btso_curbuf = InvalidBuffer; - _bt_relbuf(rel, buf, BT_READ); + _bt_relbuf(rel, buf); return (RetrieveIndexResult) NULL; } @@ -760,7 +760,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) nomatches: ItemPointerSetInvalid(current); so->btso_curbuf = InvalidBuffer; - _bt_relbuf(rel, buf, BT_READ); + _bt_relbuf(rel, buf); res = (RetrieveIndexResult) NULL; } @@ -815,14 +815,14 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) /* if we're at end of scan, release the buffer and return */ if (P_RIGHTMOST(opaque)) { - _bt_relbuf(rel, *bufP, BT_READ); + _bt_relbuf(rel, *bufP); ItemPointerSetInvalid(current); *bufP = so->btso_curbuf = InvalidBuffer; return false; } /* step right one page */ blkno = opaque->btpo_next; - _bt_relbuf(rel, *bufP, BT_READ); + _bt_relbuf(rel, *bufP); *bufP = _bt_getbuf(rel, blkno, BT_READ); page = BufferGetPage(*bufP); opaque = (BTPageOpaque) PageGetSpecialPointer(page); @@ -846,7 +846,7 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) /* if we're at end of scan, release the buffer and return */ if (P_LEFTMOST(opaque)) { - _bt_relbuf(rel, *bufP, BT_READ); + _bt_relbuf(rel, *bufP); ItemPointerSetInvalid(current); *bufP = so->btso_curbuf = InvalidBuffer; return false; @@ -854,7 +854,7 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) /* step left */ obknum = BufferGetBlockNumber(*bufP); blkno = opaque->btpo_prev; - _bt_relbuf(rel, *bufP, BT_READ); + _bt_relbuf(rel, *bufP); *bufP = _bt_getbuf(rel, blkno, BT_READ); page = BufferGetPage(*bufP); opaque = (BTPageOpaque) PageGetSpecialPointer(page); @@ -868,7 +868,7 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) while (opaque->btpo_next != obknum) { blkno = opaque->btpo_next; - _bt_relbuf(rel, *bufP, BT_READ); + _bt_relbuf(rel, *bufP); *bufP = _bt_getbuf(rel, blkno, BT_READ); page = BufferGetPage(*bufP); opaque = (BTPageOpaque) PageGetSpecialPointer(page); @@ -952,7 +952,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir) itup = &(btitem->bti_itup); blkno = ItemPointerGetBlockNumber(&(itup->t_tid)); - _bt_relbuf(rel, buf, BT_READ); + _bt_relbuf(rel, buf); buf = _bt_getbuf(rel, blkno, BT_READ); page = BufferGetPage(buf); @@ -968,7 +968,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir) do { blkno = opaque->btpo_next; - _bt_relbuf(rel, buf, BT_READ); + _bt_relbuf(rel, buf); buf = _bt_getbuf(rel, blkno, BT_READ); page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); @@ -1035,7 +1035,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir) /* no tuples in the index match this scan key */ ItemPointerSetInvalid(current); so->btso_curbuf = InvalidBuffer; - _bt_relbuf(rel, buf, BT_READ); + _bt_relbuf(rel, buf); res = (RetrieveIndexResult) NULL; } diff --git a/src/backend/access/rtree/rtree.c b/src/backend/access/rtree/rtree.c index a8c6a13ea3..21831ef5d6 100644 --- a/src/backend/access/rtree/rtree.c +++ b/src/backend/access/rtree/rtree.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.62 2001/05/07 00:43:16 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.63 2001/07/15 22:48:16 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -62,7 +62,20 @@ typedef struct RTSTATE FmgrInfo interFn; /* intersection function */ } RTSTATE; +/* Working state for rtbuild and its callback */ +typedef struct +{ + RTSTATE rtState; + double indtuples; +} RTBuildState; + /* non-export function prototypes */ +static void rtbuildCallback(Relation index, + HeapTuple htup, + Datum *attdata, + char *nulls, + bool tupleIsAlive, + void *state); static InsertIndexResult rtdoinsert(Relation r, IndexTuple itup, RTSTATE *rtstate); static void rttighten(Relation r, RTSTACK *stk, Datum datum, int att_size, @@ -81,165 +94,44 @@ static int nospace(Page p, IndexTuple it); static void initRtstate(RTSTATE *rtstate, Relation index); +/* + * routine to build an index. Basically calls insert over and over + */ Datum rtbuild(PG_FUNCTION_ARGS) { Relation heap = (Relation) PG_GETARG_POINTER(0); Relation index = (Relation) PG_GETARG_POINTER(1); IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2); - Node *oldPred = (Node *) PG_GETARG_POINTER(3); + double reltuples; + RTBuildState buildstate; + Buffer buffer; -#ifdef NOT_USED - IndexStrategy istrat = (IndexStrategy) PG_GETARG_POINTER(4); + /* no locking is needed */ -#endif - HeapScanDesc hscan; - HeapTuple htup; - IndexTuple itup; - TupleDesc htupdesc, - itupdesc; - Datum attdata[INDEX_MAX_KEYS]; - char nulls[INDEX_MAX_KEYS]; - double nhtups, - nitups; - Node *pred = indexInfo->ii_Predicate; - -#ifndef OMIT_PARTIAL_INDEX - TupleTable tupleTable; - TupleTableSlot *slot; - -#endif - ExprContext *econtext; - InsertIndexResult res = NULL; - Buffer buffer = InvalidBuffer; - RTSTATE rtState; - - initRtstate(&rtState, index); + initRtstate(&buildstate.rtState, index); /* * We expect to be called exactly once for any index relation. If * that's not the case, big trouble's what we have. */ - if (oldPred == NULL && RelationGetNumberOfBlocks(index) != 0) - elog(ERROR, "%s already contains data", RelationGetRelationName(index)); + if (RelationGetNumberOfBlocks(index) != 0) + elog(ERROR, "%s already contains data", + RelationGetRelationName(index)); - /* initialize the root page (if this is a new index) */ - if (oldPred == NULL) - { - buffer = ReadBuffer(index, P_NEW); - RTInitBuffer(buffer, F_LEAF); - WriteBuffer(buffer); - } + /* initialize the root page */ + buffer = ReadBuffer(index, P_NEW); + RTInitBuffer(buffer, F_LEAF); + WriteBuffer(buffer); - /* get tuple descriptors for heap and index relations */ - htupdesc = RelationGetDescr(heap); - itupdesc = RelationGetDescr(index); + /* build the index */ + buildstate.indtuples = 0; - /* - * If this is a predicate (partial) index, we will need to evaluate - * the predicate using ExecQual, which requires the current tuple to - * be in a slot of a TupleTable. In addition, ExecQual must have an - * ExprContext referring to that slot. Here, we initialize dummy - * TupleTable and ExprContext objects for this purpose. --Nels, Feb 92 - * - * We construct the ExprContext anyway since we need a per-tuple - * temporary memory context for function evaluation -- tgl July 00 - */ -#ifndef OMIT_PARTIAL_INDEX - if (pred != NULL || oldPred != NULL) - { - tupleTable = ExecCreateTupleTable(1); - slot = ExecAllocTableSlot(tupleTable); - ExecSetSlotDescriptor(slot, htupdesc, false); - } - else - { - tupleTable = NULL; - slot = NULL; - } - econtext = MakeExprContext(slot, TransactionCommandContext); -#else - econtext = MakeExprContext(NULL, TransactionCommandContext); -#endif /* OMIT_PARTIAL_INDEX */ - - /* count the tuples as we insert them */ - nhtups = nitups = 0.0; - - /* start a heap scan */ - hscan = heap_beginscan(heap, 0, SnapshotNow, 0, (ScanKey) NULL); - - while (HeapTupleIsValid(htup = heap_getnext(hscan, 0))) - { - MemoryContextReset(econtext->ecxt_per_tuple_memory); - - nhtups += 1.0; - -#ifndef OMIT_PARTIAL_INDEX - - /* - * If oldPred != NULL, this is an EXTEND INDEX command, so skip - * this tuple if it was already in the existing partial index - */ - if (oldPred != NULL) - { - slot->val = htup; - if (ExecQual((List *) oldPred, econtext, false)) - { - nitups += 1.0; - continue; - } - } - - /* - * Skip this tuple if it doesn't satisfy the partial-index - * predicate - */ - if (pred != NULL) - { - slot->val = htup; - if (!ExecQual((List *) pred, econtext, false)) - continue; - } -#endif /* OMIT_PARTIAL_INDEX */ - - nitups += 1.0; - - /* - * For the current heap tuple, extract all the attributes we use - * in this index, and note which are null. - */ - FormIndexDatum(indexInfo, - htup, - htupdesc, - econtext->ecxt_per_tuple_memory, - attdata, - nulls); - - /* form an index tuple and point it at the heap tuple */ - itup = index_formtuple(itupdesc, attdata, nulls); - itup->t_tid = htup->t_self; - - /* - * Since we already have the index relation locked, we call - * rtdoinsert directly. Normal access method calls dispatch - * through rtinsert, which locks the relation for write. This is - * the right thing to do if you're inserting single tups, but not - * when you're initializing the whole index at once. - */ - - res = rtdoinsert(index, itup, &rtState); - pfree(itup); - pfree(res); - } + /* do the heap scan */ + reltuples = IndexBuildHeapScan(heap, index, indexInfo, + rtbuildCallback, (void *) &buildstate); /* okay, all heap tuples are indexed */ - heap_endscan(hscan); - -#ifndef OMIT_PARTIAL_INDEX - if (pred != NULL || oldPred != NULL) - ExecDropTupleTable(tupleTable, true); -#endif /* OMIT_PARTIAL_INDEX */ - FreeExprContext(econtext); /* * Since we just counted the tuples in the heap, we update its stats @@ -259,19 +151,56 @@ rtbuild(PG_FUNCTION_ARGS) heap_close(heap, NoLock); index_close(index); - UpdateStats(hrelid, nhtups); - UpdateStats(irelid, nitups); - if (oldPred != NULL) - { - if (nitups == nhtups) - pred = NULL; - UpdateIndexPredicate(irelid, oldPred, pred); - } + UpdateStats(hrelid, reltuples); + UpdateStats(irelid, buildstate.indtuples); } PG_RETURN_VOID(); } +/* + * Per-tuple callback from IndexBuildHeapScan + */ +static void +rtbuildCallback(Relation index, + HeapTuple htup, + Datum *attdata, + char *nulls, + bool tupleIsAlive, + void *state) +{ + RTBuildState *buildstate = (RTBuildState *) state; + IndexTuple itup; + InsertIndexResult res; + + /* form an index tuple and point it at the heap tuple */ + itup = index_formtuple(RelationGetDescr(index), attdata, nulls); + itup->t_tid = htup->t_self; + + /* rtree indexes don't index nulls, see notes in rtinsert */ + if (IndexTupleHasNulls(itup)) + { + pfree(itup); + return; + } + + /* + * Since we already have the index relation locked, we call + * rtdoinsert directly. Normal access method calls dispatch + * through rtinsert, which locks the relation for write. This is + * the right thing to do if you're inserting single tups, but not + * when you're initializing the whole index at once. + */ + res = rtdoinsert(index, itup, &buildstate->rtState); + + if (res) + pfree(res); + + buildstate->indtuples += 1; + + pfree(itup); +} + /* * rtinsert -- wrapper for rtree tuple insertion. * @@ -285,10 +214,8 @@ rtinsert(PG_FUNCTION_ARGS) Datum *datum = (Datum *) PG_GETARG_POINTER(1); char *nulls = (char *) PG_GETARG_POINTER(2); ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3); - #ifdef NOT_USED Relation heapRel = (Relation) PG_GETARG_POINTER(4); - #endif InsertIndexResult res; IndexTuple itup; @@ -297,12 +224,24 @@ rtinsert(PG_FUNCTION_ARGS) /* generate an index tuple */ itup = index_formtuple(RelationGetDescr(r), datum, nulls); itup->t_tid = *ht_ctid; + + /* + * Currently, rtrees do not support indexing NULLs; considerable + * infrastructure work would have to be done to do anything reasonable + * with a NULL. + */ + if (IndexTupleHasNulls(itup)) + { + pfree(itup); + PG_RETURN_POINTER((InsertIndexResult) NULL); + } + initRtstate(&rtState, r); /* - * Notes in ExecUtils:ExecOpenIndices() - * - * RelationSetLockForWrite(r); + * Since rtree is not marked "amconcurrent" in pg_am, caller should + * have acquired exclusive lock on index relation. We need no locking + * here. */ res = rtdoinsert(r, itup, &rtState); @@ -1104,40 +1043,92 @@ freestack(RTSTACK *s) } } +/* + * Bulk deletion of all index entries pointing to a set of heap tuples. + * The set of target tuples is specified via a callback routine that tells + * whether any given heap tuple (identified by ItemPointer) is being deleted. + * + * Result: a palloc'd struct containing statistical info for VACUUM displays. + */ Datum -rtdelete(PG_FUNCTION_ARGS) +rtbulkdelete(PG_FUNCTION_ARGS) { - Relation r = (Relation) PG_GETARG_POINTER(0); - ItemPointer tid = (ItemPointer) PG_GETARG_POINTER(1); - BlockNumber blkno; - OffsetNumber offnum; - Buffer buf; - Page page; + Relation rel = (Relation) PG_GETARG_POINTER(0); + IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(1); + void *callback_state = (void *) PG_GETARG_POINTER(2); + IndexBulkDeleteResult *result; + BlockNumber num_pages; + double tuples_removed; + double num_index_tuples; + RetrieveIndexResult res; + IndexScanDesc iscan; + + tuples_removed = 0; + num_index_tuples = 0; /* - * Notes in ExecUtils:ExecOpenIndices() Also note that only vacuum - * deletes index tuples now... - * - * RelationSetLockForWrite(r); + * Since rtree is not marked "amconcurrent" in pg_am, caller should + * have acquired exclusive lock on index relation. We need no locking + * here. */ - blkno = ItemPointerGetBlockNumber(tid); - offnum = ItemPointerGetOffsetNumber(tid); + /* + * XXX generic implementation --- should be improved! + */ - /* adjust any scans that will be affected by this deletion */ - rtadjscans(r, RTOP_DEL, blkno, offnum); + /* walk through the entire index */ + iscan = index_beginscan(rel, false, 0, (ScanKey) NULL); - /* delete the index tuple */ - buf = ReadBuffer(r, blkno); - page = BufferGetPage(buf); + while ((res = index_getnext(iscan, ForwardScanDirection)) + != (RetrieveIndexResult) NULL) + { + ItemPointer heapptr = &res->heap_iptr; - PageIndexTupleDelete(page, offnum); + if (callback(heapptr, callback_state)) + { + ItemPointer indexptr = &res->index_iptr; + BlockNumber blkno; + OffsetNumber offnum; + Buffer buf; + Page page; - WriteBuffer(buf); + blkno = ItemPointerGetBlockNumber(indexptr); + offnum = ItemPointerGetOffsetNumber(indexptr); - PG_RETURN_VOID(); + /* adjust any scans that will be affected by this deletion */ + /* (namely, my own scan) */ + rtadjscans(rel, RTOP_DEL, blkno, offnum); + + /* delete the index tuple */ + buf = ReadBuffer(rel, blkno); + page = BufferGetPage(buf); + + PageIndexTupleDelete(page, offnum); + + WriteBuffer(buf); + + tuples_removed += 1; + } + else + num_index_tuples += 1; + + pfree(res); + } + + index_endscan(iscan); + + /* return statistics */ + num_pages = RelationGetNumberOfBlocks(rel); + + result = (IndexBulkDeleteResult *) palloc(sizeof(IndexBulkDeleteResult)); + result->num_pages = num_pages; + result->tuples_removed = tuples_removed; + result->num_index_tuples = num_index_tuples; + + PG_RETURN_POINTER(result); } + static void initRtstate(RTSTATE *rtstate, Relation index) { diff --git a/src/backend/access/rtree/rtscan.c b/src/backend/access/rtree/rtscan.c index c9f1ab7b89..1311cfdc29 100644 --- a/src/backend/access/rtree/rtscan.c +++ b/src/backend/access/rtree/rtscan.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtscan.c,v 1.37 2001/06/09 18:16:56 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtscan.c,v 1.38 2001/07/15 22:48:16 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -60,13 +60,8 @@ rtbeginscan(PG_FUNCTION_ARGS) ScanKey key = (ScanKey) PG_GETARG_POINTER(3); IndexScanDesc s; - /* - * Let index_beginscan does its work... - * - * RelationSetLockForRead(r); - */ - s = RelationGetIndexScan(r, fromEnd, nkeys, key); + rtregscan(s); PG_RETURN_POINTER(s); @@ -282,6 +277,27 @@ rtdropscan(IndexScanDesc s) pfree(l); } +/* + * AtEOXact_rtree() --- clean up rtree subsystem at xact abort or commit. + * + * This is here because it needs to touch this module's static var RTScans. + */ +void +AtEOXact_rtree(void) +{ + /* + * Note: these actions should only be necessary during xact abort; but + * they can't hurt during a commit. + */ + + /* + * Reset the active-scans list to empty. We do not need to free the + * list elements, because they're all palloc()'d, so they'll go away + * at end of transaction anyway. + */ + RTScans = NULL; +} + void rtadjscans(Relation r, int op, BlockNumber blkno, OffsetNumber offnum) { diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 6467179231..d32a6dda97 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.106 2001/07/12 04:11:13 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.107 2001/07/15 22:48:16 tgl Exp $ * * NOTES * Transaction aborts can now occur two ways: @@ -156,7 +156,10 @@ #include +#include "access/gistscan.h" +#include "access/hash.h" #include "access/nbtree.h" +#include "access/rtree.h" #include "access/xact.h" #include "catalog/heap.h" #include "catalog/index.h" @@ -1040,7 +1043,10 @@ CommitTransaction(void) smgrDoPendingDeletes(true); AtEOXact_SPI(); + AtEOXact_gist(); + AtEOXact_hash(); AtEOXact_nbtree(); + AtEOXact_rtree(); AtCommit_Cache(); AtCommit_Locks(); AtEOXact_CatCache(true); @@ -1147,7 +1153,10 @@ AbortTransaction(void) smgrDoPendingDeletes(false); AtEOXact_SPI(); + AtEOXact_gist(); + AtEOXact_hash(); AtEOXact_nbtree(); + AtEOXact_rtree(); AtAbort_Cache(); AtEOXact_CatCache(false); AtAbort_Memory(); diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index 23bcc42036..ee1a4b7c31 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/bootstrap/bootstrap.c,v 1.110 2001/06/25 23:03:03 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/bootstrap/bootstrap.c,v 1.111 2001/07/15 22:48:16 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1119,7 +1119,7 @@ build_indices() heap = heap_openr(ILHead->il_heap, NoLock); ind = index_openr(ILHead->il_ind); - index_build(heap, ind, ILHead->il_info, NULL); + index_build(heap, ind, ILHead->il_info); /* * In normal processing mode, index_build would close the heap and diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 1171376bb2..02e29441da 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.170 2001/06/29 21:08:24 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.171 2001/07/15 22:48:17 tgl Exp $ * * * INTERFACE ROUTINES @@ -1031,7 +1031,7 @@ RelationTruncateIndexes(Oid heapId) /* Initialize the index and rebuild */ InitIndexStrategy(indexInfo->ii_NumIndexAttrs, currentIndex, accessMethodId); - index_build(heapRelation, currentIndex, indexInfo, NULL); + index_build(heapRelation, currentIndex, indexInfo); /* * index_build will close both the heap and index relations (but diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 34989055b6..f0fa73e83d 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.155 2001/06/27 23:31:38 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.156 2001/07/15 22:48:17 tgl Exp $ * * * INTERFACE ROUTINES @@ -41,6 +41,7 @@ #include "optimizer/clauses.h" #include "optimizer/prep.h" #include "parser/parse_func.h" +#include "storage/sinval.h" #include "storage/smgr.h" #include "utils/builtins.h" #include "utils/catcache.h" @@ -73,9 +74,6 @@ static void UpdateIndexRelation(Oid indexoid, Oid heapoid, IndexInfo *indexInfo, Oid *classOids, bool islossy, bool primary); -static void DefaultBuild(Relation heapRelation, Relation indexRelation, - IndexInfo *indexInfo, Node *oldPred, - IndexStrategy indexStrategy); static Oid IndexGetRelation(Oid indexId); static bool activate_index(Oid indexId, bool activate, bool inplace); @@ -656,7 +654,7 @@ UpdateIndexPredicate(Oid indexoid, Node *oldPred, Node *predicate) } replace[Anum_pg_index_indpred - 1] = 'r'; - values[Anum_pg_index_indpred - 1] = (Datum) predText; + values[Anum_pg_index_indpred - 1] = PointerGetDatum(predText); newtup = heap_modifytuple(tuple, pg_index, values, nulls, replace); @@ -885,7 +883,7 @@ index_create(char *heapRelationName, /* XXX shouldn't we close the heap and index rels here? */ } else - index_build(heapRelation, indexRelation, indexInfo, NULL); + index_build(heapRelation, indexRelation, indexInfo); } /* ---------------------------------------------------------------- @@ -912,12 +910,13 @@ index_drop(Oid indexId) /* * To drop an index safely, we must grab exclusive lock on its parent * table; otherwise there could be other backends using the index! - * Exclusive lock on the index alone is insufficient because the index - * access routines are a little slipshod about obtaining adequate - * locking (see ExecOpenIndices()). We do grab exclusive lock on the - * index too, just to be safe. Both locks must be held till end of - * transaction, else other backends will still see this index in - * pg_index. + * Exclusive lock on the index alone is insufficient because another + * backend might be in the midst of devising a query plan that will use + * the index. The parser and planner take care to hold an appropriate + * lock on the parent table while working, but having them hold locks on + * all the indexes too seems overly complex. We do grab exclusive lock + * on the index too, just to be safe. Both locks must be held till end of + * transaction, else other backends will still see this index in pg_index. */ heapId = IndexGetRelation(indexId); userHeapRelation = heap_open(heapId, AccessExclusiveLock); @@ -1075,7 +1074,7 @@ BuildIndexInfo(HeapTuple indexTuple) /* * If partial index, convert predicate into expression nodetree */ - if (VARSIZE(&indexStruct->indpred) != 0) + if (VARSIZE(&indexStruct->indpred) > VARHDRSZ) { char *predString; @@ -1625,43 +1624,77 @@ UpdateStats(Oid relid, double reltuples) } -/* ---------------- - * DefaultBuild - * - * NB: this routine is dead code, and likely always has been, because - * there are no access methods that don't supply their own ambuild procedure. - * - * Anyone want to wager whether it would actually work if executed? - * ---------------- +/* + * index_build - invoke access-method-specific index build procedure */ -static void -DefaultBuild(Relation heapRelation, - Relation indexRelation, - IndexInfo *indexInfo, - Node *oldPred, - IndexStrategy indexStrategy) /* not used */ +void +index_build(Relation heapRelation, + Relation indexRelation, + IndexInfo *indexInfo) +{ + RegProcedure procedure; + + /* + * sanity checks + */ + Assert(RelationIsValid(indexRelation)); + Assert(PointerIsValid(indexRelation->rd_am)); + + procedure = indexRelation->rd_am->ambuild; + Assert(RegProcedureIsValid(procedure)); + + /* + * Call the access method's build procedure + */ + OidFunctionCall3(procedure, + PointerGetDatum(heapRelation), + PointerGetDatum(indexRelation), + PointerGetDatum(indexInfo)); +} + + +/* + * IndexBuildHeapScan - scan the heap relation to find tuples to be indexed + * + * This is called back from an access-method-specific index build procedure + * after the AM has done whatever setup it needs. The parent heap relation + * is scanned to find tuples that should be entered into the index. Each + * such tuple is passed to the AM's callback routine, which does the right + * things to add it to the new index. After we return, the AM's index + * build procedure does whatever cleanup is needed; in particular, it should + * close the heap and index relations. + * + * The total count of heap tuples is returned. This is for updating pg_class + * statistics. (It's annoying not to be able to do that here, but we can't + * do it until after the relation is closed.) Note that the index AM itself + * must keep track of the number of index tuples; we don't do so here because + * the AM might reject some of the tuples for its own reasons, such as being + * unable to store NULLs. + */ +double +IndexBuildHeapScan(Relation heapRelation, + Relation indexRelation, + IndexInfo *indexInfo, + IndexBuildCallback callback, + void *callback_state) { HeapScanDesc scan; HeapTuple heapTuple; TupleDesc heapDescriptor; - Datum datum[INDEX_MAX_KEYS]; - char nullv[INDEX_MAX_KEYS]; - double reltuples, - indtuples; + Datum attdata[INDEX_MAX_KEYS]; + char nulls[INDEX_MAX_KEYS]; + double reltuples; Node *predicate = indexInfo->ii_Predicate; - -#ifndef OMIT_PARTIAL_INDEX TupleTable tupleTable; TupleTableSlot *slot; - -#endif ExprContext *econtext; - InsertIndexResult insertResult; + Snapshot snapshot; + TransactionId XmaxRecent; /* - * more & better checking is needed + * sanity checks */ - Assert(OidIsValid(indexRelation->rd_rel->relam)); /* XXX */ + Assert(OidIsValid(indexRelation->rd_rel->relam)); heapDescriptor = RelationGetDescr(heapRelation); @@ -1675,8 +1708,7 @@ DefaultBuild(Relation heapRelation, * We construct the ExprContext anyway since we need a per-tuple * temporary memory context for function evaluation -- tgl July 00 */ -#ifndef OMIT_PARTIAL_INDEX - if (predicate != NULL || oldPred != NULL) + if (predicate != NULL) { tupleTable = ExecCreateTupleTable(1); slot = ExecAllocTableSlot(tupleTable); @@ -1688,155 +1720,158 @@ DefaultBuild(Relation heapRelation, slot = NULL; } econtext = MakeExprContext(slot, TransactionCommandContext); -#else - econtext = MakeExprContext(NULL, TransactionCommandContext); -#endif /* OMIT_PARTIAL_INDEX */ /* - * Ok, begin our scan of the base relation. + * Ok, begin our scan of the base relation. We use SnapshotAny + * because we must retrieve all tuples and do our own time qual checks. */ + if (IsBootstrapProcessingMode()) + { + snapshot = SnapshotNow; + XmaxRecent = InvalidTransactionId; + } + else + { + snapshot = SnapshotAny; + GetXmaxRecent(&XmaxRecent); + } + scan = heap_beginscan(heapRelation, /* relation */ 0, /* start at end */ - SnapshotNow, /* seeself */ + snapshot, /* seeself */ 0, /* number of keys */ (ScanKey) NULL); /* scan key */ - reltuples = indtuples = 0.0; + reltuples = 0; /* - * for each tuple in the base relation, we create an index tuple and - * add it to the index relation. We keep a running count of the - * number of tuples so that we can update pg_class with correct - * statistics when we're done building the index. + * Scan all tuples in the base relation. */ while (HeapTupleIsValid(heapTuple = heap_getnext(scan, 0))) { - MemoryContextReset(econtext->ecxt_per_tuple_memory); + bool tupleIsAlive; - reltuples += 1.0; - -#ifndef OMIT_PARTIAL_INDEX - - /* - * If oldPred != NULL, this is an EXTEND INDEX command, so skip - * this tuple if it was already in the existing partial index - */ - if (oldPred != NULL) + if (snapshot == SnapshotAny) { - slot->val = heapTuple; - if (ExecQual((List *) oldPred, econtext, false)) + /* do our own time qual check */ + bool indexIt; + uint16 sv_infomask; + + /* + * HeapTupleSatisfiesVacuum may update tuple's hint status bits. + * We could possibly get away with not locking the buffer here, + * since caller should hold ShareLock on the relation, but let's + * be conservative about it. + */ + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); + sv_infomask = heapTuple->t_data->t_infomask; + + switch (HeapTupleSatisfiesVacuum(heapTuple->t_data, XmaxRecent)) { - indtuples += 1.0; - continue; + case HEAPTUPLE_DEAD: + indexIt = false; + tupleIsAlive = false; + break; + case HEAPTUPLE_LIVE: + indexIt = true; + tupleIsAlive = true; + break; + case HEAPTUPLE_RECENTLY_DEAD: + /* + * If tuple is recently deleted then we must index it + * anyway to keep VACUUM from complaining. + */ + indexIt = true; + tupleIsAlive = false; + break; + case HEAPTUPLE_INSERT_IN_PROGRESS: + /* + * This should not happen, if caller holds ShareLock on + * the parent relation. + */ + elog(ERROR, "IndexBuildHeapScan: concurrent insert in progress"); + indexIt = tupleIsAlive = false; /* keep compiler quiet */ + break; + case HEAPTUPLE_DELETE_IN_PROGRESS: + /* + * This should not happen, if caller holds ShareLock on + * the parent relation. + */ + elog(ERROR, "IndexBuildHeapScan: concurrent delete in progress"); + indexIt = tupleIsAlive = false; /* keep compiler quiet */ + break; + default: + elog(ERROR, "Unexpected HeapTupleSatisfiesVacuum result"); + indexIt = tupleIsAlive = false; /* keep compiler quiet */ + break; } + + /* check for hint-bit update by HeapTupleSatisfiesVacuum */ + if (sv_infomask != heapTuple->t_data->t_infomask) + SetBufferCommitInfoNeedsSave(scan->rs_cbuf); + + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + + if (! indexIt) + continue; + } + else + { + /* heap_getnext did the time qual check */ + tupleIsAlive = true; } + reltuples += 1; + + MemoryContextReset(econtext->ecxt_per_tuple_memory); + /* - * Skip this tuple if it doesn't satisfy the partial-index - * predicate + * In a partial index, discard tuples that don't satisfy the + * predicate. We can also discard recently-dead tuples, since + * VACUUM doesn't complain about tuple count mismatch for partial + * indexes. */ if (predicate != NULL) { - slot->val = heapTuple; + if (! tupleIsAlive) + continue; + ExecStoreTuple(heapTuple, slot, InvalidBuffer, false); if (!ExecQual((List *) predicate, econtext, false)) continue; } -#endif /* OMIT_PARTIAL_INDEX */ - - indtuples += 1.0; /* - * FormIndexDatum fills in its datum and null parameters with - * attribute information taken from the given heap tuple. + * For the current heap tuple, extract all the attributes we use + * in this index, and note which are null. This also performs + * evaluation of the function, if this is a functional index. */ FormIndexDatum(indexInfo, heapTuple, heapDescriptor, econtext->ecxt_per_tuple_memory, - datum, - nullv); + attdata, + nulls); - insertResult = index_insert(indexRelation, datum, nullv, - &(heapTuple->t_self), heapRelation); + /* + * You'd think we should go ahead and build the index tuple here, + * but some index AMs want to do further processing on the + * data first. So pass the attdata and nulls arrays, instead. + */ - if (insertResult) - pfree(insertResult); + /* Call the AM's callback routine to process the tuple */ + callback(indexRelation, heapTuple, attdata, nulls, tupleIsAlive, + callback_state); } heap_endscan(scan); -#ifndef OMIT_PARTIAL_INDEX - if (predicate != NULL || oldPred != NULL) + if (predicate != NULL) ExecDropTupleTable(tupleTable, true); -#endif /* OMIT_PARTIAL_INDEX */ FreeExprContext(econtext); - /* - * Since we just counted the tuples in the heap, we update its stats - * in pg_class to guarantee that the planner takes advantage of the - * index we just created. But, only update statistics during normal - * index definitions, not for indices on system catalogs created - * during bootstrap processing. We must close the relations before - * updating statistics to guarantee that the relcache entries are - * flushed when we increment the command counter in UpdateStats(). But - * we do not release any locks on the relations; those will be held - * until end of transaction. - */ - if (IsNormalProcessingMode()) - { - Oid hrelid = RelationGetRelid(heapRelation); - Oid irelid = RelationGetRelid(indexRelation); - - heap_close(heapRelation, NoLock); - index_close(indexRelation); - UpdateStats(hrelid, reltuples); - UpdateStats(irelid, indtuples); - if (oldPred != NULL) - { - if (indtuples == reltuples) - predicate = NULL; - UpdateIndexPredicate(irelid, oldPred, predicate); - } - } + return reltuples; } -/* ---------------- - * index_build - * ---------------- - */ -void -index_build(Relation heapRelation, - Relation indexRelation, - IndexInfo *indexInfo, - Node *oldPred) -{ - RegProcedure procedure; - - /* - * sanity checks - */ - Assert(RelationIsValid(indexRelation)); - Assert(PointerIsValid(indexRelation->rd_am)); - - procedure = indexRelation->rd_am->ambuild; - - /* - * use the access method build procedure if supplied, else default. - */ - if (RegProcedureIsValid(procedure)) - OidFunctionCall5(procedure, - PointerGetDatum(heapRelation), - PointerGetDatum(indexRelation), - PointerGetDatum(indexInfo), - PointerGetDatum(oldPred), - PointerGetDatum(RelationGetIndexStrategy(indexRelation))); - else - DefaultBuild(heapRelation, - indexRelation, - indexInfo, - oldPred, - RelationGetIndexStrategy(indexRelation)); -} /* * IndexGetRelation: given an index's relation OID, get the OID of the @@ -1967,7 +2002,7 @@ reindex_index(Oid indexId, bool force, bool inplace) /* Initialize the index and rebuild */ InitIndexStrategy(indexInfo->ii_NumIndexAttrs, iRel, accessMethodId); - index_build(heapRelation, iRel, indexInfo, NULL); + index_build(heapRelation, iRel, indexInfo); /* * index_build will close both the heap and index relations (but not diff --git a/src/backend/catalog/pg_operator.c b/src/backend/catalog/pg_operator.c index 40ee84c018..d96d17752a 100644 --- a/src/backend/catalog/pg_operator.c +++ b/src/backend/catalog/pg_operator.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/catalog/pg_operator.c,v 1.59 2001/06/01 02:41:35 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/catalog/pg_operator.c,v 1.60 2001/07/15 22:48:17 tgl Exp $ * * NOTES * these routines moved here from commands/define.c and somewhat cleaned up. @@ -402,7 +402,7 @@ OperatorShellMake(char *operatorName, * rightSortObjectId -- same as for commutatorObjectId * operatorProcedure -- must access the pg_procedure catalog to get the * ObjectId of the procedure that actually does the operator - * actions this is required. Do an amgetattr to find out the + * actions this is required. Do a lookup to find out the * return type of the procedure * restrictionProcedure -- must access the pg_procedure catalog to get * the ObjectId but this is optional diff --git a/src/backend/commands/command.c b/src/backend/commands/command.c index 6a2bd7dc93..4fcbeeceb6 100644 --- a/src/backend/commands/command.c +++ b/src/backend/commands/command.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/Attic/command.c,v 1.134 2001/06/14 01:09:22 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/Attic/command.c,v 1.135 2001/07/15 22:48:17 tgl Exp $ * * NOTES * The PerformAddAttribute() code, like most of the relation @@ -269,7 +269,7 @@ PerformPortalClose(char *name, CommandDest dest) * Initial idea of ordering the tuple attributes so that all * the variable length domains occured last was scratched. Doing * so would not speed access too much (in general) and would create - * many complications in formtuple, amgetattr, and addattribute. + * many complications in formtuple, heap_getattr, and addattribute. * * scan attribute catalog for name conflict (within rel) * scan type catalog for absence of data type (if not arg) diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 108c4ea378..7398b0b0ce 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/indexcmds.c,v 1.50 2001/06/13 21:44:40 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/indexcmds.c,v 1.51 2001/07/15 22:48:17 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -83,6 +83,8 @@ DefineIndex(char *heapRelationName, Oid *classObjectId; Oid accessMethodId; Oid relationId; + HeapTuple tuple; + Form_pg_am accessMethodForm; IndexInfo *indexInfo; int numberOfAttributes; List *cnfPred = NIL; @@ -107,27 +109,25 @@ DefineIndex(char *heapRelationName, heapRelationName); /* - * compute access method id + * look up the access method, verify it can handle the requested features */ - accessMethodId = GetSysCacheOid(AMNAME, - PointerGetDatum(accessMethodName), - 0, 0, 0); - if (!OidIsValid(accessMethodId)) + tuple = SearchSysCache(AMNAME, + PointerGetDatum(accessMethodName), + 0, 0, 0); + if (!HeapTupleIsValid(tuple)) elog(ERROR, "DefineIndex: access method \"%s\" not found", accessMethodName); + accessMethodId = tuple->t_data->t_oid; + accessMethodForm = (Form_pg_am) GETSTRUCT(tuple); - /* - * XXX Hardwired hacks to check for limitations on supported index - * types. We really ought to be learning this info from entries in the - * pg_am table, instead of having it wired-in here! - */ - if (unique && accessMethodId != BTREE_AM_OID) - elog(ERROR, "DefineIndex: unique indices are only available with the btree access method"); + if (unique && ! accessMethodForm->amcanunique) + elog(ERROR, "DefineIndex: access method \"%s\" does not support UNIQUE indexes", + accessMethodName); + if (numberOfAttributes > 1 && ! accessMethodForm->amcanmulticol) + elog(ERROR, "DefineIndex: access method \"%s\" does not support multi-column indexes", + accessMethodName); - if (numberOfAttributes > 1 && - !( accessMethodId == BTREE_AM_OID || - accessMethodId == GIST_AM_OID)) - elog(ERROR, "DefineIndex: multi-column indices are only available with the btree or GiST access methods"); + ReleaseSysCache(tuple); /* * WITH clause reinstated to handle lossy indices. -- JMH, 7/22/96 @@ -298,7 +298,15 @@ ExtendIndex(char *indexRelationName, Expr *predicate, List *rangetable) InitIndexStrategy(indexInfo->ii_NumIndexAttrs, indexRelation, accessMethodId); - index_build(heapRelation, indexRelation, indexInfo, oldPred); + /* + * XXX currently BROKEN: if we want to support EXTEND INDEX, oldPred + * needs to be passed through to IndexBuildHeapScan. We could do this + * without help from the index AMs if we added an oldPred field to the + * IndexInfo struct. Currently I'm expecting that EXTEND INDEX will + * get removed, so I'm not going to do that --- tgl 7/14/01 + */ + + index_build(heapRelation, indexRelation, indexInfo); /* heap and index rels are closed as a side-effect of index_build */ } diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index f41bb664a2..c53fa05812 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -13,7 +13,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.204 2001/07/13 22:55:59 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.205 2001/07/15 22:48:17 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -128,7 +128,7 @@ static void vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage); static void vacuum_index(VacPageList vacpagelist, Relation indrel, double num_tuples, int keep_tuples); static void scan_index(Relation indrel, double num_tuples); -static VacPage tid_reaped(ItemPointer itemptr, VacPageList vacpagelist); +static bool tid_reaped(ItemPointer itemptr, void *state); static void vac_update_fsm(Relation onerel, VacPageList fraged_pages, BlockNumber rel_pages); static VacPage copy_vac_page(VacPage vacpage); @@ -542,17 +542,11 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt) /* * Do the actual work --- either FULL or "lazy" vacuum - * - * XXX for the moment, lazy vac not supported unless CONCURRENT_VACUUM */ -#ifdef CONCURRENT_VACUUM if (vacstmt->full) full_vacuum_rel(onerel); else lazy_vacuum_rel(onerel, vacstmt); -#else - full_vacuum_rel(onerel); -#endif /* all done with this class, but hold lock until commit */ heap_close(onerel, NoLock); @@ -1049,7 +1043,7 @@ scan_heap(VRelStats *vacrelstats, Relation onerel, elog(MESSAGE_LEVEL, "Pages %u: Changed %u, reaped %u, Empty %u, New %u; \ Tup %.0f: Vac %.0f, Keep/VTL %.0f/%u, UnUsed %.0f, MinLen %lu, MaxLen %lu; \ -Re-using: Free/Avail. Space %.0f/%.0f; EndEmpty/Avail. Pages %u/%u. %s", +Re-using: Free/Avail. Space %.0f/%.0f; EndEmpty/Avail. Pages %u/%u.\n\t%s", nblocks, changed_pages, vacuum_pages->num_pages, empty_pages, new_pages, num_tuples, tups_vacuumed, nkeep, vacrelstats->num_vtlinks, @@ -1965,7 +1959,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel, } Assert(num_moved == checked_moved); - elog(MESSAGE_LEVEL, "Rel %s: Pages: %u --> %u; Tuple(s) moved: %u. %s", + elog(MESSAGE_LEVEL, "Rel %s: Pages: %u --> %u; Tuple(s) moved: %u.\n\t%s", RelationGetRelationName(onerel), nblocks, blkno, num_moved, vac_show_rusage(&ru0)); @@ -2213,7 +2207,7 @@ scan_index(Relation indrel, double num_tuples) nipages = RelationGetNumberOfBlocks(indrel); vac_update_relstats(RelationGetRelid(indrel), nipages, nitups, false); - elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %.0f. %s", + elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %.0f.\n\t%s", RelationGetRelationName(indrel), nipages, nitups, vac_show_rusage(&ru0)); @@ -2247,85 +2241,55 @@ static void vacuum_index(VacPageList vacpagelist, Relation indrel, double num_tuples, int keep_tuples) { - RetrieveIndexResult res; - IndexScanDesc iscan; - ItemPointer heapptr; - int tups_vacuumed; - BlockNumber num_pages; - double num_index_tuples; - VacPage vp; + IndexBulkDeleteResult *stats; VacRUsage ru0; vac_init_rusage(&ru0); - /* walk through the entire index */ - iscan = index_beginscan(indrel, false, 0, (ScanKey) NULL); - tups_vacuumed = 0; - num_index_tuples = 0; + /* Do bulk deletion */ + stats = index_bulk_delete(indrel, tid_reaped, (void *) vacpagelist); - while ((res = index_getnext(iscan, ForwardScanDirection)) - != (RetrieveIndexResult) NULL) - { - heapptr = &res->heap_iptr; - - if ((vp = tid_reaped(heapptr, vacpagelist)) != (VacPage) NULL) - { -#ifdef NOT_USED - elog(DEBUG, "<%x,%x> -> <%x,%x>", - ItemPointerGetBlockNumber(&(res->index_iptr)), - ItemPointerGetOffsetNumber(&(res->index_iptr)), - ItemPointerGetBlockNumber(&(res->heap_iptr)), - ItemPointerGetOffsetNumber(&(res->heap_iptr))); -#endif - if (vp->offsets_free == 0) - { - elog(NOTICE, "Index %s: pointer to EmptyPage (blk %u off %u) - fixing", - RelationGetRelationName(indrel), - vp->blkno, ItemPointerGetOffsetNumber(heapptr)); - } - ++tups_vacuumed; - index_delete(indrel, &res->index_iptr); - } - else - num_index_tuples += 1; - - pfree(res); - } - - index_endscan(iscan); + if (!stats) + return; /* now update statistics in pg_class */ - num_pages = RelationGetNumberOfBlocks(indrel); vac_update_relstats(RelationGetRelid(indrel), - num_pages, num_index_tuples, false); + stats->num_pages, stats->num_index_tuples, + false); - elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %.0f: Deleted %u. %s", - RelationGetRelationName(indrel), num_pages, - num_index_tuples - keep_tuples, tups_vacuumed, + elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %.0f: Deleted %.0f.\n\t%s", + RelationGetRelationName(indrel), stats->num_pages, + stats->num_index_tuples - keep_tuples, stats->tuples_removed, vac_show_rusage(&ru0)); /* * Check for tuple count mismatch. If the index is partial, then * it's OK for it to have fewer tuples than the heap; else we got trouble. */ - if (num_index_tuples != num_tuples + keep_tuples) + if (stats->num_index_tuples != num_tuples + keep_tuples) { - if (num_index_tuples > num_tuples + keep_tuples || + if (stats->num_index_tuples > num_tuples + keep_tuples || ! is_partial_index(indrel)) elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%.0f) IS NOT THE SAME AS HEAP' (%.0f).\ \n\tRecreate the index.", - RelationGetRelationName(indrel), num_index_tuples, num_tuples); + RelationGetRelationName(indrel), + stats->num_index_tuples, num_tuples); } + + pfree(stats); } /* * tid_reaped() -- is a particular tid reaped? * + * This has the right signature to be an IndexBulkDeleteCallback. + * * vacpagelist->VacPage_array is sorted in right order. */ -static VacPage -tid_reaped(ItemPointer itemptr, VacPageList vacpagelist) +static bool +tid_reaped(ItemPointer itemptr, void *state) { + VacPageList vacpagelist = (VacPageList) state; OffsetNumber ioffno; OffsetNumber *voff; VacPage vp, @@ -2342,8 +2306,8 @@ tid_reaped(ItemPointer itemptr, VacPageList vacpagelist) sizeof(VacPage), vac_cmp_blk); - if (vpp == (VacPage *) NULL) - return (VacPage) NULL; + if (vpp == NULL) + return false; /* ok - we are on a partially or fully reaped page */ vp = *vpp; @@ -2351,7 +2315,7 @@ tid_reaped(ItemPointer itemptr, VacPageList vacpagelist) if (vp->offsets_free == 0) { /* this is EmptyPage, so claim all tuples on it are reaped!!! */ - return vp; + return true; } voff = (OffsetNumber *) vac_bsearch((void *) &ioffno, @@ -2360,11 +2324,11 @@ tid_reaped(ItemPointer itemptr, VacPageList vacpagelist) sizeof(OffsetNumber), vac_cmp_offno); - if (voff == (OffsetNumber *) NULL) - return (VacPage) NULL; + if (voff == NULL) + return false; /* tid is reaped */ - return vp; + return true; } /* @@ -2595,6 +2559,13 @@ is_partial_index(Relation indrel) HeapTuple cachetuple; Form_pg_index indexStruct; + /* + * If the index's AM doesn't support nulls, it's partial for our purposes + */ + if (! indrel->rd_am->amindexnulls) + return true; + + /* Otherwise, look to see if there's a partial-index predicate */ cachetuple = SearchSysCache(INDEXRELID, ObjectIdGetDatum(RelationGetRelid(indrel)), 0, 0, 0); @@ -2603,7 +2574,7 @@ is_partial_index(Relation indrel) RelationGetRelid(indrel)); indexStruct = (Form_pg_index) GETSTRUCT(cachetuple); - result = (VARSIZE(&indexStruct->indpred) != 0); + result = (VARSIZE(&indexStruct->indpred) > VARHDRSZ); ReleaseSysCache(cachetuple); return result; diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index 07529fe265..b78f933f0c 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -31,7 +31,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v 1.1 2001/07/13 22:55:59 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v 1.2 2001/07/15 22:48:17 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -112,7 +112,7 @@ static void lazy_record_dead_tuple(LVRelStats *vacrelstats, ItemPointer itemptr); static void lazy_record_free_space(LVRelStats *vacrelstats, BlockNumber page, Size avail); -static bool lazy_tid_reaped(ItemPointer itemptr, LVRelStats *vacrelstats); +static bool lazy_tid_reaped(ItemPointer itemptr, void *state); static void lazy_update_fsm(Relation onerel, LVRelStats *vacrelstats); static int vac_cmp_itemptr(const void *left, const void *right); @@ -371,11 +371,11 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, if (pgchanged) { - WriteBuffer(buf); + SetBufferCommitInfoNeedsSave(buf); changed_pages++; } - else - ReleaseBuffer(buf); + + ReleaseBuffer(buf); } /* If any tuples need to be deleted, perform final vacuum cycle */ @@ -507,64 +507,40 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, static void lazy_vacuum_index(Relation indrel, LVRelStats *vacrelstats) { - RetrieveIndexResult res; - IndexScanDesc iscan; - int tups_vacuumed; - BlockNumber num_pages; - double num_index_tuples; + IndexBulkDeleteResult *stats; VacRUsage ru0; vac_init_rusage(&ru0); /* - * Only btree and hash indexes are currently safe for concurrent access; - * see notes in ExecOpenIndices(). XXX should rely on index AM for this + * If index is unsafe for concurrent access, must lock it. */ - if (indrel->rd_rel->relam != BTREE_AM_OID && - indrel->rd_rel->relam != HASH_AM_OID) + if (! indrel->rd_am->amconcurrent) LockRelation(indrel, AccessExclusiveLock); - /* XXX should use a bulk-delete call here */ - - /* walk through the entire index */ - iscan = index_beginscan(indrel, false, 0, (ScanKey) NULL); - tups_vacuumed = 0; - num_index_tuples = 0; - - while ((res = index_getnext(iscan, ForwardScanDirection)) - != (RetrieveIndexResult) NULL) - { - ItemPointer heapptr = &res->heap_iptr; - - if (lazy_tid_reaped(heapptr, vacrelstats)) - { - index_delete(indrel, &res->index_iptr); - ++tups_vacuumed; - } - else - num_index_tuples += 1; - - pfree(res); - } - - index_endscan(iscan); - - /* now update statistics in pg_class */ - num_pages = RelationGetNumberOfBlocks(indrel); - vac_update_relstats(RelationGetRelid(indrel), - num_pages, num_index_tuples, false); + /* Do bulk deletion */ + stats = index_bulk_delete(indrel, lazy_tid_reaped, (void *) vacrelstats); /* * Release lock acquired above. */ - if (indrel->rd_rel->relam != BTREE_AM_OID && - indrel->rd_rel->relam != HASH_AM_OID) + if (! indrel->rd_am->amconcurrent) UnlockRelation(indrel, AccessExclusiveLock); - elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %.0f: Deleted %u.\n\t%s", - RelationGetRelationName(indrel), num_pages, - num_index_tuples, tups_vacuumed, - vac_show_rusage(&ru0)); + /* now update statistics in pg_class */ + if (stats) + { + vac_update_relstats(RelationGetRelid(indrel), + stats->num_pages, stats->num_index_tuples, + false); + + elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %.0f: Deleted %.0f.\n\t%s", + RelationGetRelationName(indrel), stats->num_pages, + stats->num_index_tuples, stats->tuples_removed, + vac_show_rusage(&ru0)); + + pfree(stats); + } } /* @@ -960,11 +936,14 @@ lazy_record_free_space(LVRelStats *vacrelstats, /* * lazy_tid_reaped() -- is a particular tid deletable? * + * This has the right signature to be an IndexBulkDeleteCallback. + * * Assumes dead_tuples array is in sorted order. */ static bool -lazy_tid_reaped(ItemPointer itemptr, LVRelStats *vacrelstats) +lazy_tid_reaped(ItemPointer itemptr, void *state) { + LVRelStats *vacrelstats = (LVRelStats *) state; ItemPointer res; res = (ItemPointer) bsearch((void *) itemptr, diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 72aceb35f0..9465604b58 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/execUtils.c,v 1.75 2001/03/22 06:16:12 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/execUtils.c,v 1.76 2001/07/15 22:48:17 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -504,25 +504,26 @@ ExecOpenIndices(ResultRelInfo *resultRelInfo) /* * Open (and lock, if necessary) the index relation * - * Hack for not btree and hash indices: they use relation level - * exclusive locking on update (i.e. - they are not ready for - * MVCC) and so we have to exclusively lock indices here to - * prevent deadlocks if we will scan them - index_beginscan places - * AccessShareLock, indices update methods don't use locks at all. - * We release this lock in ExecCloseIndices. Note, that hashes use - * page level locking - i.e. are not deadlock-free - let's them be - * on their way -:)) vadim 03-12-1998 + * If the index AM is not safe for concurrent updates, obtain + * an exclusive lock on the index to lock out other updaters as + * well as readers (index_beginscan places AccessShareLock). + * We will release this lock in ExecCloseIndices. * - * If there are multiple not-btree-or-hash indices, all backends must - * lock the indices in the same order or we will get deadlocks - * here during concurrent updates. This is now guaranteed by + * If the index AM supports concurrent updates, we obtain no lock + * here at all, which is a tad weird, but safe since any critical + * operation on the index (like deleting it) will acquire exclusive + * lock on the parent table. Perhaps someday we should acquire + * RowExclusiveLock on the index here? + * + * If there are multiple not-concurrent-safe indexes, all backends + * must lock the indexes in the same order or we will get deadlocks + * here during concurrent updates. This is guaranteed by * RelationGetIndexList(), which promises to return the index list - * in OID order. tgl 06-19-2000 + * in OID order. */ indexDesc = index_open(indexOid); - if (indexDesc->rd_rel->relam != BTREE_AM_OID && - indexDesc->rd_rel->relam != HASH_AM_OID) + if (! indexDesc->rd_am->amconcurrent) LockRelation(indexDesc, AccessExclusiveLock); /* @@ -560,24 +561,21 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo) { int i; int numIndices; - RelationPtr relationDescs; + RelationPtr indexDescs; numIndices = resultRelInfo->ri_NumIndices; - relationDescs = resultRelInfo->ri_IndexRelationDescs; + indexDescs = resultRelInfo->ri_IndexRelationDescs; for (i = 0; i < numIndices; i++) { - if (relationDescs[i] == NULL) + if (indexDescs[i] == NULL) continue; - /* - * See notes in ExecOpenIndices. - */ - if (relationDescs[i]->rd_rel->relam != BTREE_AM_OID && - relationDescs[i]->rd_rel->relam != HASH_AM_OID) - UnlockRelation(relationDescs[i], AccessExclusiveLock); + /* Drop lock, if one was acquired by ExecOpenIndices */ + if (! indexDescs[i]->rd_am->amconcurrent) + UnlockRelation(indexDescs[i], AccessExclusiveLock); - index_close(relationDescs[i]); + index_close(indexDescs[i]); } /* diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c index bd118b876b..ec26ed05cc 100644 --- a/src/backend/executor/nodeIndexscan.c +++ b/src/backend/executor/nodeIndexscan.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeIndexscan.c,v 1.61 2001/06/22 19:16:22 wieck Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeIndexscan.c,v 1.62 2001/07/15 22:48:17 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -993,7 +993,7 @@ ExecInitIndexScan(IndexScan *node, EState *estate, Plan *parent) ExecOpenScanR(reloid, /* relation */ 0, /* nkeys */ (ScanKey) NULL, /* scan key */ - 0, /* is index */ + false, /* is index */ direction, /* scan direction */ estate->es_snapshot, /* */ ¤tRelation, /* return: rel desc */ @@ -1023,7 +1023,7 @@ ExecInitIndexScan(IndexScan *node, EState *estate, Plan *parent) ExecOpenScanR(indexOid, /* relation */ numScanKeys[i], /* nkeys */ scanKeys[i], /* scan key */ - true, /* is index */ + true, /* is index */ direction, /* scan direction */ estate->es_snapshot, &(relationDescs[i]), /* return: rel desc */ diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c index 5a16b77085..48beffd792 100644 --- a/src/backend/executor/nodeSeqscan.c +++ b/src/backend/executor/nodeSeqscan.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeSeqscan.c,v 1.30 2001/05/27 20:42:19 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeSeqscan.c,v 1.31 2001/07/15 22:48:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -166,7 +166,7 @@ InitScanRelation(SeqScan *node, EState *estate, ExecOpenScanR(reloid, /* relation */ 0, /* nkeys */ NULL, /* scan key */ - 0, /* is index */ + false, /* is index */ direction, /* scan direction */ estate->es_snapshot, ¤tRelation, /* return: rel desc */ diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 749390a4d2..3f537fb0d9 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.66 2001/05/20 20:28:19 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.67 2001/07/15 22:48:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -126,7 +126,7 @@ find_secondary_indexes(Oid relationObjectId) /* Extract info from the pg_index tuple */ info->indexoid = index->indexrelid; info->indproc = index->indproc; /* functional index ?? */ - if (VARSIZE(&index->indpred) != 0) /* partial index ?? */ + if (VARSIZE(&index->indpred) > VARHDRSZ) /* partial index ?? */ { char *predString; diff --git a/src/backend/utils/adt/datum.c b/src/backend/utils/adt/datum.c index d0766d15d7..4e278c0489 100644 --- a/src/backend/utils/adt/datum.c +++ b/src/backend/utils/adt/datum.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/datum.c,v 1.20 2001/03/22 03:59:50 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/datum.c,v 1.21 2001/07/15 22:48:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -118,7 +118,7 @@ datumCopy(Datum value, bool typByVal, int typLen) * * Free the space occupied by a datum CREATED BY "datumCopy" * - * NOTE: DO NOT USE THIS ROUTINE with datums returned by amgetattr() etc. + * NOTE: DO NOT USE THIS ROUTINE with datums returned by heap_getattr() etc. * ONLY datums created by "datumCopy" can be freed! *------------------------------------------------------------------------- */ diff --git a/src/bin/initdb/initdb.sh b/src/bin/initdb/initdb.sh index 42dea03a24..38de46269a 100644 --- a/src/bin/initdb/initdb.sh +++ b/src/bin/initdb/initdb.sh @@ -27,7 +27,7 @@ # Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group # Portions Copyright (c) 1994, Regents of the University of California # -# $Header: /cvsroot/pgsql/src/bin/initdb/Attic/initdb.sh,v 1.129 2001/06/23 23:29:48 petere Exp $ +# $Header: /cvsroot/pgsql/src/bin/initdb/Attic/initdb.sh,v 1.130 2001/07/15 22:48:18 tgl Exp $ # #------------------------------------------------------------------------- @@ -813,7 +813,7 @@ echo "UPDATE pg_database SET \ | "$PGPATH"/postgres $PGSQL_OPT template1 > /dev/null || exit_nicely echo "Vacuuming database." -echo "VACUUM ANALYZE" \ +echo "VACUUM FULL ANALYZE" \ | "$PGPATH"/postgres $PGSQL_OPT template1 > /dev/null || exit_nicely echo "Copying template1 to template0." @@ -824,7 +824,7 @@ echo "UPDATE pg_database SET \ datallowconn = 'f' \ WHERE datname = 'template0'" \ | "$PGPATH"/postgres $PGSQL_OPT template1 > /dev/null || exit_nicely -echo "VACUUM pg_database" \ +echo "VACUUM FULL pg_database" \ | "$PGPATH"/postgres $PGSQL_OPT template1 > /dev/null || exit_nicely diff --git a/src/include/access/genam.h b/src/include/access/genam.h index 0102d8c7e4..db6795c093 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: genam.h,v 1.25 2001/01/24 19:43:19 momjian Exp $ + * $Id: genam.h,v 1.26 2001/07/15 22:48:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -18,8 +18,21 @@ #include "access/relscan.h" #include "access/sdir.h" + +/* Struct for statistics returned by bulk-delete operation */ +typedef struct IndexBulkDeleteResult +{ + BlockNumber num_pages; /* pages remaining in index */ + double tuples_removed; /* # removed by bulk-delete operation */ + double num_index_tuples; /* # remaining */ +} IndexBulkDeleteResult; + +/* Typedef for callback function to determine if a tuple is bulk-deletable */ +typedef bool (*IndexBulkDeleteCallback) (ItemPointer itemptr, void *state); + + /* ---------------- - * generalized index_ interface routines + * generalized index_ interface routines (in indexam.c) * ---------------- */ extern Relation index_open(Oid relationId); @@ -29,7 +42,6 @@ extern InsertIndexResult index_insert(Relation relation, Datum *datum, char *nulls, ItemPointer heap_t_ctid, Relation heapRel); -extern void index_delete(Relation relation, ItemPointer indexItem); extern IndexScanDesc index_beginscan(Relation relation, bool scanFromEnd, uint16 numberOfKeys, ScanKey key); extern void index_rescan(IndexScanDesc scan, bool scanFromEnd, ScanKey key); @@ -38,6 +50,9 @@ extern void index_markpos(IndexScanDesc scan); extern void index_restrpos(IndexScanDesc scan); extern RetrieveIndexResult index_getnext(IndexScanDesc scan, ScanDirection direction); +extern IndexBulkDeleteResult *index_bulk_delete(Relation relation, + IndexBulkDeleteCallback callback, + void *callback_state); extern RegProcedure index_cost_estimator(Relation relation); extern RegProcedure index_getprocid(Relation irel, AttrNumber attnum, uint16 procnum); diff --git a/src/include/access/gist.h b/src/include/access/gist.h index 9e8091a8a0..b555a195db 100644 --- a/src/include/access/gist.h +++ b/src/include/access/gist.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: gist.h,v 1.28 2001/05/31 18:16:55 tgl Exp $ + * $Id: gist.h,v 1.29 2001/07/15 22:48:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -162,7 +162,7 @@ typedef struct GISTENTRY /* gist.c */ extern Datum gistbuild(PG_FUNCTION_ARGS); extern Datum gistinsert(PG_FUNCTION_ARGS); -extern Datum gistdelete(PG_FUNCTION_ARGS); +extern Datum gistbulkdelete(PG_FUNCTION_ARGS); extern void _gistdump(Relation r); extern void gistfreestack(GISTSTACK *s); extern void initGISTstate(GISTSTATE *giststate, Relation index); diff --git a/src/include/access/gistscan.h b/src/include/access/gistscan.h index d4f9403c10..f7955bce9e 100644 --- a/src/include/access/gistscan.h +++ b/src/include/access/gistscan.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: gistscan.h,v 1.15 2001/05/30 19:53:39 tgl Exp $ + * $Id: gistscan.h,v 1.16 2001/07/15 22:48:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -22,5 +22,6 @@ extern Datum gistmarkpos(PG_FUNCTION_ARGS); extern Datum gistrestrpos(PG_FUNCTION_ARGS); extern Datum gistendscan(PG_FUNCTION_ARGS); extern void gistadjscans(Relation r, int op, BlockNumber blkno, OffsetNumber offnum); +extern void AtEOXact_gist(void); #endif /* GISTSCAN_H */ diff --git a/src/include/access/hash.h b/src/include/access/hash.h index 871629a122..e973b81a7c 100644 --- a/src/include/access/hash.h +++ b/src/include/access/hash.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: hash.h,v 1.38 2001/03/22 04:00:27 momjian Exp $ + * $Id: hash.h,v 1.39 2001/07/15 22:48:18 tgl Exp $ * * NOTES * modeled after Margo Seltzer's hash implementation for unix. @@ -55,7 +55,7 @@ typedef uint32 PageOffset; #define OADDR_OF(S,O) ((OverflowPageAddress)((uint32)((uint32)(S) << SPLITSHIFT) + (O))) #define BUCKET_TO_BLKNO(B) \ - ((Bucket) ((B) + ((B) ? metap->SPARES[_hash_log2((B)+1)-1] : 0)) + 1) + ((Bucket) ((B) + ((B) ? metap->hashm_spares[_hash_log2((B)+1)-1] : 0)) + 1) #define OADDR_TO_BLKNO(B) \ ((BlockNumber) \ (BUCKET_TO_BLKNO ( (1 << SPLITNUM((B))) -1 ) + OPAGENUM((B)))); @@ -165,16 +165,6 @@ typedef struct HashMetaPageData typedef HashMetaPageData *HashMetaPage; -/* Short hands for accessing structure */ -#define OVFL_POINT hashm_ovflpoint -#define LAST_FREED hashm_lastfreed -#define MAX_BUCKET hashm_maxbucket -#define FFACTOR hashm_ffactor -#define HIGH_MASK hashm_highmask -#define LOW_MASK hashm_lowmask -#define NKEYS hashm_nkeys -#define SPARES hashm_spares - extern bool BuildingHash; typedef struct HashItemData @@ -256,7 +246,7 @@ extern Datum hashrescan(PG_FUNCTION_ARGS); extern Datum hashendscan(PG_FUNCTION_ARGS); extern Datum hashmarkpos(PG_FUNCTION_ARGS); extern Datum hashrestrpos(PG_FUNCTION_ARGS); -extern Datum hashdelete(PG_FUNCTION_ARGS); +extern Datum hashbulkdelete(PG_FUNCTION_ARGS); /* * Datatype-specific hash functions in hashfunc.c. @@ -310,6 +300,7 @@ extern void _hash_expandtable(Relation rel, Buffer metabuf); extern void _hash_regscan(IndexScanDesc scan); extern void _hash_dropscan(IndexScanDesc scan); extern void _hash_adjscans(Relation rel, ItemPointer tid); +extern void AtEOXact_hash(void); /* hashsearch.c */ diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 1ba7f96330..789dd02742 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: nbtree.h,v 1.55 2001/03/22 04:00:29 momjian Exp $ + * $Id: nbtree.h,v 1.56 2001/07/15 22:48:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -319,6 +319,8 @@ typedef struct xl_btree_newroot */ extern bool BuildingBtree; /* in nbtree.c */ +extern void AtEOXact_nbtree(void); + extern Datum btbuild(PG_FUNCTION_ARGS); extern Datum btinsert(PG_FUNCTION_ARGS); extern Datum btgettuple(PG_FUNCTION_ARGS); @@ -328,7 +330,7 @@ extern void btmovescan(IndexScanDesc scan, Datum v); extern Datum btendscan(PG_FUNCTION_ARGS); extern Datum btmarkpos(PG_FUNCTION_ARGS); extern Datum btrestrpos(PG_FUNCTION_ARGS); -extern Datum btdelete(PG_FUNCTION_ARGS); +extern Datum btbulkdelete(PG_FUNCTION_ARGS); extern void btree_redo(XLogRecPtr lsn, XLogRecord *record); extern void btree_undo(XLogRecPtr lsn, XLogRecord *record); @@ -346,20 +348,12 @@ extern InsertIndexResult _bt_doinsert(Relation rel, BTItem btitem, extern void _bt_metapinit(Relation rel); extern Buffer _bt_getroot(Relation rel, int access); extern Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access); -extern void _bt_relbuf(Relation rel, Buffer buf, int access); +extern void _bt_relbuf(Relation rel, Buffer buf); extern void _bt_wrtbuf(Relation rel, Buffer buf); extern void _bt_wrtnorelbuf(Relation rel, Buffer buf); extern void _bt_pageinit(Page page, Size size); extern void _bt_metaproot(Relation rel, BlockNumber rootbknum, int level); -extern void _bt_pagedel(Relation rel, ItemPointer tid); - -/* - * prototypes for functions in nbtscan.c - */ -extern void _bt_regscan(IndexScanDesc scan); -extern void _bt_dropscan(IndexScanDesc scan); -extern void _bt_adjscans(Relation rel, ItemPointer tid); -extern void AtEOXact_nbtree(void); +extern void _bt_itemdel(Relation rel, Buffer buf, ItemPointer tid); /* * prototypes for functions in nbtsearch.c diff --git a/src/include/access/rtree.h b/src/include/access/rtree.h index 210e873981..237937fe46 100644 --- a/src/include/access/rtree.h +++ b/src/include/access/rtree.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: rtree.h,v 1.23 2001/05/30 19:53:39 tgl Exp $ + * $Id: rtree.h,v 1.24 2001/07/15 22:48:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -110,7 +110,7 @@ extern void freestack(RTSTACK *s); * Defined in access/rtree/ */ extern Datum rtinsert(PG_FUNCTION_ARGS); -extern Datum rtdelete(PG_FUNCTION_ARGS); +extern Datum rtbulkdelete(PG_FUNCTION_ARGS); extern Datum rtgettuple(PG_FUNCTION_ARGS); extern Datum rtbeginscan(PG_FUNCTION_ARGS); @@ -129,6 +129,7 @@ extern void rtree_desc(char *buf, uint8 xl_info, char *rec); /* rtscan.c */ extern void rtadjscans(Relation r, int op, BlockNumber blkno, OffsetNumber offnum); +extern void AtEOXact_rtree(void); /* rtstrat.c */ extern RegProcedure RTMapOperator(Relation r, AttrNumber attnum, diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 162bf4fe5f..a4a132bc41 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -37,7 +37,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: catversion.h,v 1.85 2001/06/22 19:16:24 wieck Exp $ + * $Id: catversion.h,v 1.86 2001/07/15 22:48:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 200106221 +#define CATALOG_VERSION_NO 200107151 #endif diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h index e139dde2cc..f93de9c2e9 100644 --- a/src/include/catalog/index.h +++ b/src/include/catalog/index.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: index.h,v 1.35 2001/05/30 20:52:34 momjian Exp $ + * $Id: index.h,v 1.36 2001/07/15 22:48:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -17,6 +17,16 @@ #include "access/itup.h" #include "nodes/execnodes.h" + +/* Typedef for callback function for IndexBuildHeapScan */ +typedef void (*IndexBuildCallback) (Relation index, + HeapTuple htup, + Datum *attdata, + char *nulls, + bool tupleIsAlive, + void *state); + + extern Form_pg_am AccessMethodObjectIdGetForm(Oid accessMethodObjectId, MemoryContext resultCxt); @@ -56,7 +66,13 @@ extern bool SetReindexProcessing(bool processing); extern bool IsReindexProcessing(void); extern void index_build(Relation heapRelation, Relation indexRelation, - IndexInfo *indexInfo, Node *oldPred); + IndexInfo *indexInfo); + +extern double IndexBuildHeapScan(Relation heapRelation, + Relation indexRelation, + IndexInfo *indexInfo, + IndexBuildCallback callback, + void *callback_state); extern bool reindex_index(Oid indexId, bool force, bool inplace); extern bool activate_indexes_of_a_table(Oid relid, bool activate); diff --git a/src/include/catalog/pg_am.h b/src/include/catalog/pg_am.h index f2de6fb6c0..3bf79404d8 100644 --- a/src/include/catalog/pg_am.h +++ b/src/include/catalog/pg_am.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: pg_am.h,v 1.17 2001/05/30 19:55:08 tgl Exp $ + * $Id: pg_am.h,v 1.18 2001/07/15 22:48:18 tgl Exp $ * * NOTES * the genbki.sh script reads this file and generates .bki @@ -38,30 +38,26 @@ CATALOG(pg_am) { NameData amname; /* access method name */ int4 amowner; /* usesysid of creator */ - int2 amstrategies; /* total NUMBER of strategies by which we - * can traverse/search this AM */ + int2 amstrategies; /* total NUMBER of strategies (operators) by + * which we can traverse/search this AM */ int2 amsupport; /* total NUMBER of support functions that * this AM uses */ int2 amorderstrategy;/* if this AM has a sort order, the * strategy number of the sort operator. * Zero if AM is not ordered. */ + bool amcanunique; /* does AM support UNIQUE indexes? */ + bool amcanmulticol; /* does AM support multi-column indexes? */ + bool amindexnulls; /* does AM support NULL index entries? */ + bool amconcurrent; /* does AM support concurrent updates? */ regproc amgettuple; /* "next valid tuple" function */ regproc aminsert; /* "insert this tuple" function */ - regproc amdelete; /* "delete this tuple" function */ - regproc amgetattr; /* - deprecated */ - regproc amsetlock; /* - deprecated */ - regproc amsettid; /* - deprecated */ - regproc amfreetuple; /* - deprecated */ regproc ambeginscan; /* "start new scan" function */ regproc amrescan; /* "restart this scan" function */ regproc amendscan; /* "end this scan" function */ regproc ammarkpos; /* "mark current scan position" function */ regproc amrestrpos; /* "restore marked scan position" function */ - regproc amopen; /* - deprecated */ - regproc amclose; /* - deprecated */ regproc ambuild; /* "build new index" function */ - regproc amcreate; /* - deprecated */ - regproc amdestroy; /* - deprecated */ + regproc ambulkdelete; /* bulk-delete function */ regproc amcostestimate; /* estimate cost of an indexscan */ } FormData_pg_am; @@ -76,46 +72,40 @@ typedef FormData_pg_am *Form_pg_am; * compiler constants for pg_am * ---------------- */ -#define Natts_pg_am 23 +#define Natts_pg_am 19 #define Anum_pg_am_amname 1 #define Anum_pg_am_amowner 2 #define Anum_pg_am_amstrategies 3 #define Anum_pg_am_amsupport 4 #define Anum_pg_am_amorderstrategy 5 -#define Anum_pg_am_amgettuple 6 -#define Anum_pg_am_aminsert 7 -#define Anum_pg_am_amdelete 8 -#define Anum_pg_am_amgetattr 9 -#define Anum_pg_am_amsetlock 10 -#define Anum_pg_am_amsettid 11 -#define Anum_pg_am_amfreetuple 12 -#define Anum_pg_am_ambeginscan 13 -#define Anum_pg_am_amrescan 14 -#define Anum_pg_am_amendscan 15 -#define Anum_pg_am_ammarkpos 16 -#define Anum_pg_am_amrestrpos 17 -#define Anum_pg_am_amopen 18 -#define Anum_pg_am_amclose 19 -#define Anum_pg_am_ambuild 20 -#define Anum_pg_am_amcreate 21 -#define Anum_pg_am_amdestroy 22 -#define Anum_pg_am_amcostestimate 23 +#define Anum_pg_am_amcanunique 6 +#define Anum_pg_am_amcanmulticol 7 +#define Anum_pg_am_amindexnulls 8 +#define Anum_pg_am_amconcurrent 9 +#define Anum_pg_am_amgettuple 10 +#define Anum_pg_am_aminsert 11 +#define Anum_pg_am_ambeginscan 12 +#define Anum_pg_am_amrescan 13 +#define Anum_pg_am_amendscan 14 +#define Anum_pg_am_ammarkpos 15 +#define Anum_pg_am_amrestrpos 16 +#define Anum_pg_am_ambuild 17 +#define Anum_pg_am_ambulkdelete 18 +#define Anum_pg_am_amcostestimate 19 /* ---------------- * initial contents of pg_am * ---------------- */ -DATA(insert OID = 402 ( rtree PGUID 8 3 0 rtgettuple rtinsert rtdelete - - - - rtbeginscan rtrescan rtendscan rtmarkpos rtrestrpos - - rtbuild - - rtcostestimate )); -DESCR(""); -DATA(insert OID = 403 ( btree PGUID 5 1 1 btgettuple btinsert btdelete - - - - btbeginscan btrescan btendscan btmarkpos btrestrpos - - btbuild - - btcostestimate )); -DESCR(""); +DATA(insert OID = 402 ( rtree PGUID 8 3 0 f f f f rtgettuple rtinsert rtbeginscan rtrescan rtendscan rtmarkpos rtrestrpos rtbuild rtbulkdelete rtcostestimate )); +DESCR("r-tree index access method"); +DATA(insert OID = 403 ( btree PGUID 5 1 1 t t t t btgettuple btinsert btbeginscan btrescan btendscan btmarkpos btrestrpos btbuild btbulkdelete btcostestimate )); +DESCR("b-tree index access method"); #define BTREE_AM_OID 403 -DATA(insert OID = 405 ( hash PGUID 1 1 0 hashgettuple hashinsert hashdelete - - - - hashbeginscan hashrescan hashendscan hashmarkpos hashrestrpos - - hashbuild - - hashcostestimate )); -DESCR(""); -#define HASH_AM_OID 405 -DATA(insert OID = 783 ( gist PGUID 100 7 0 gistgettuple gistinsert gistdelete - - - - gistbeginscan gistrescan gistendscan gistmarkpos gistrestrpos - - gistbuild - - gistcostestimate )); -DESCR(""); -#define GIST_AM_OID 783 +DATA(insert OID = 405 ( hash PGUID 1 1 0 f f f t hashgettuple hashinsert hashbeginscan hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbulkdelete hashcostestimate )); +DESCR("hash index access method"); +DATA(insert OID = 783 ( gist PGUID 100 7 0 f t f f gistgettuple gistinsert gistbeginscan gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistcostestimate )); +DESCR("GiST index access method"); #endif /* PG_AM_H */ diff --git a/src/include/catalog/pg_index.h b/src/include/catalog/pg_index.h index a490c8b582..521a3ec3b6 100644 --- a/src/include/catalog/pg_index.h +++ b/src/include/catalog/pg_index.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: pg_index.h,v 1.21 2001/07/09 18:35:52 momjian Exp $ + * $Id: pg_index.h,v 1.22 2001/07/15 22:48:18 tgl Exp $ * * NOTES * the genbki.sh script reads this file and generates .bki @@ -58,7 +58,9 @@ CATALOG(pg_index) bool indisprimary; /* is this index for primary key */ Oid indreference; /* oid of index of referenced relation (ie * - this index for foreign key */ - text indpred; /* query plan for partial index predicate */ + /* VARIABLE LENGTH FIELD: */ + text indpred; /* expression tree for predicate, + * if a partial index */ } FormData_pg_index; /* ---------------- diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 506e2b2800..f249fcf2d9 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: pg_proc.h,v 1.196 2001/07/11 22:14:02 momjian Exp $ + * $Id: pg_proc.h,v 1.197 2001/07/15 22:48:18 tgl Exp $ * * NOTES * The script catalog/genbki.sh reads this file and generates .bki @@ -210,11 +210,6 @@ DESCR("not equal"); DATA(insert OID = 89 ( version PGUID 12 f t f t 0 f 25 "" 100 0 0 100 pgsql_version - )); DESCR("PostgreSQL version string"); -DATA(insert OID = 1265 ( rtcostestimate PGUID 12 f t f t 8 f 0 "0 0 0 0 0 0 0 0" 100 0 0 100 rtcostestimate - )); -DESCR("r-tree cost estimator"); -DATA(insert OID = 1268 ( btcostestimate PGUID 12 f t f t 8 f 0 "0 0 0 0 0 0 0 0" 100 0 0 100 btcostestimate - )); -DESCR("btree cost estimator"); - /* OIDS 100 - 199 */ DATA(insert OID = 100 ( int8fac PGUID 12 f t t t 1 f 20 "20" 100 0 0 100 int8fac - )); @@ -671,11 +666,9 @@ DESCR("convert float4 to int4"); DATA(insert OID = 320 ( rtinsert PGUID 12 f t f t 5 f 23 "0 0 0 0 0" 100 0 0 100 rtinsert - )); DESCR("r-tree(internal)"); -DATA(insert OID = 321 ( rtdelete PGUID 12 f t f t 2 f 23 "0 0" 100 0 0 100 rtdelete - )); -DESCR("r-tree(internal)"); DATA(insert OID = 322 ( rtgettuple PGUID 12 f t f t 2 f 23 "0 0" 100 0 0 100 rtgettuple - )); DESCR("r-tree(internal)"); -DATA(insert OID = 323 ( rtbuild PGUID 12 f t f t 5 f 23 "0 0 0 0 0" 100 0 0 100 rtbuild - )); +DATA(insert OID = 323 ( rtbuild PGUID 12 f t f t 3 f 23 "0 0 0" 100 0 0 100 rtbuild - )); DESCR("r-tree(internal)"); DATA(insert OID = 324 ( rtbeginscan PGUID 12 f t f t 4 f 23 "0 0 0 0" 100 0 0 100 rtbeginscan - )); DESCR("r-tree(internal)"); @@ -687,13 +680,15 @@ DATA(insert OID = 327 ( rtrestrpos PGUID 12 f t f t 1 f 23 "0" 100 0 0 100 DESCR("r-tree(internal)"); DATA(insert OID = 328 ( rtrescan PGUID 12 f t f t 3 f 23 "0 0 0" 100 0 0 100 rtrescan - )); DESCR("r-tree(internal)"); +DATA(insert OID = 321 ( rtbulkdelete PGUID 12 f t f t 3 f 23 "0 0 0" 100 0 0 100 rtbulkdelete - )); +DESCR("r-tree(internal)"); +DATA(insert OID = 1265 ( rtcostestimate PGUID 12 f t f t 8 f 0 "0 0 0 0 0 0 0 0" 100 0 0 100 rtcostestimate - )); +DESCR("r-tree(internal)"); DATA(insert OID = 330 ( btgettuple PGUID 12 f t f t 2 f 23 "0 0" 100 0 0 100 btgettuple - )); DESCR("btree(internal)"); DATA(insert OID = 331 ( btinsert PGUID 12 f t f t 5 f 23 "0 0 0 0 0" 100 0 0 100 btinsert - )); DESCR("btree(internal)"); -DATA(insert OID = 332 ( btdelete PGUID 12 f t f t 2 f 23 "0 0" 100 0 0 100 btdelete - )); -DESCR("btree(internal)"); DATA(insert OID = 333 ( btbeginscan PGUID 12 f t f t 4 f 23 "0 0 0 0" 100 0 0 100 btbeginscan - )); DESCR("btree(internal)"); DATA(insert OID = 334 ( btrescan PGUID 12 f t f t 3 f 23 "0 0 0" 100 0 0 100 btrescan - )); @@ -704,7 +699,11 @@ DATA(insert OID = 336 ( btmarkpos PGUID 12 f t f t 1 f 23 "0" 100 0 0 100 b DESCR("btree(internal)"); DATA(insert OID = 337 ( btrestrpos PGUID 12 f t f t 1 f 23 "0" 100 0 0 100 btrestrpos - )); DESCR("btree(internal)"); -DATA(insert OID = 338 ( btbuild PGUID 12 f t f t 5 f 23 "0 0 0 0 0" 100 0 0 100 btbuild - )); +DATA(insert OID = 338 ( btbuild PGUID 12 f t f t 3 f 23 "0 0 0" 100 0 0 100 btbuild - )); +DESCR("btree(internal)"); +DATA(insert OID = 332 ( btbulkdelete PGUID 12 f t f t 3 f 23 "0 0 0" 100 0 0 100 btbulkdelete - )); +DESCR("btree(internal)"); +DATA(insert OID = 1268 ( btcostestimate PGUID 12 f t f t 8 f 0 "0 0 0 0 0 0 0 0" 100 0 0 100 btcostestimate - )); DESCR("btree(internal)"); DATA(insert OID = 339 ( poly_same PGUID 12 f t t t 2 f 16 "604 604" 100 0 0 100 poly_same - )); @@ -789,15 +788,10 @@ DESCR("convert name to char()"); DATA(insert OID = 409 ( name PGUID 12 f t t t 1 f 19 "1042" 100 0 0 100 bpchar_name - )); DESCR("convert char() to name"); -DATA(insert OID = 438 ( hashcostestimate PGUID 12 f t f t 8 f 0 "0 0 0 0 0 0 0 0" 100 0 0 100 hashcostestimate - )); -DESCR("hash index cost estimator"); - DATA(insert OID = 440 ( hashgettuple PGUID 12 f t f t 2 f 23 "0 0" 100 0 0 100 hashgettuple - )); DESCR("hash(internal)"); DATA(insert OID = 441 ( hashinsert PGUID 12 f t f t 5 f 23 "0 0 0 0 0" 100 0 0 100 hashinsert - )); DESCR("hash(internal)"); -DATA(insert OID = 442 ( hashdelete PGUID 12 f t f t 2 f 23 "0 0" 100 0 0 100 hashdelete - )); -DESCR("hash(internal)"); DATA(insert OID = 443 ( hashbeginscan PGUID 12 f t f t 4 f 23 "0 0 0 0" 100 0 0 100 hashbeginscan - )); DESCR("hash(internal)"); DATA(insert OID = 444 ( hashrescan PGUID 12 f t f t 3 f 23 "0 0 0" 100 0 0 100 hashrescan - )); @@ -808,8 +802,13 @@ DATA(insert OID = 446 ( hashmarkpos PGUID 12 f t f t 1 f 23 "0" 100 0 0 100 DESCR("hash(internal)"); DATA(insert OID = 447 ( hashrestrpos PGUID 12 f t f t 1 f 23 "0" 100 0 0 100 hashrestrpos - )); DESCR("hash(internal)"); -DATA(insert OID = 448 ( hashbuild PGUID 12 f t f t 5 f 23 "0 0 0 0 0" 100 0 0 100 hashbuild - )); +DATA(insert OID = 448 ( hashbuild PGUID 12 f t f t 3 f 23 "0 0 0" 100 0 0 100 hashbuild - )); DESCR("hash(internal)"); +DATA(insert OID = 442 ( hashbulkdelete PGUID 12 f t f t 3 f 23 "0 0 0" 100 0 0 100 hashbulkdelete - )); +DESCR("hash(internal)"); +DATA(insert OID = 438 ( hashcostestimate PGUID 12 f t f t 8 f 0 "0 0 0 0 0 0 0 0" 100 0 0 100 hashcostestimate - )); +DESCR("hash(internal)"); + DATA(insert OID = 449 ( hashint2 PGUID 12 f t t t 1 f 23 "21" 100 0 0 100 hashint2 - )); DESCR("hash"); DATA(insert OID = 450 ( hashint4 PGUID 12 f t t t 1 f 23 "23" 100 0 0 100 hashint4 - )); @@ -1014,14 +1013,10 @@ DESCR("larger of two"); DATA(insert OID = 771 ( int2smaller PGUID 12 f t t t 2 f 21 "21 21" 100 0 0 100 int2smaller - )); DESCR("smaller of two"); -DATA(insert OID = 772 ( gistcostestimate PGUID 12 f t f t 8 f 0 "0 0 0 0 0 0 0 0" 100 0 0 100 gistcostestimate - )); -DESCR("gist cost estimator"); DATA(insert OID = 774 ( gistgettuple PGUID 12 f t f t 2 f 23 "0 0" 100 0 0 100 gistgettuple - )); DESCR("gist(internal)"); DATA(insert OID = 775 ( gistinsert PGUID 12 f t f t 5 f 23 "0 0 0 0 0" 100 0 0 100 gistinsert - )); DESCR("gist(internal)"); -DATA(insert OID = 776 ( gistdelete PGUID 12 f t f t 2 f 23 "0 0" 100 0 0 100 gistdelete - )); -DESCR("gist(internal)"); DATA(insert OID = 777 ( gistbeginscan PGUID 12 f t f t 4 f 23 "0 0 0 0" 100 0 0 100 gistbeginscan - )); DESCR("gist(internal)"); DATA(insert OID = 778 ( gistrescan PGUID 12 f t f t 3 f 23 "0 0 0" 100 0 0 100 gistrescan - )); @@ -1032,7 +1027,11 @@ DATA(insert OID = 780 ( gistmarkpos PGUID 12 f t f t 1 f 23 "0" 100 0 0 100 DESCR("gist(internal)"); DATA(insert OID = 781 ( gistrestrpos PGUID 12 f t f t 1 f 23 "0" 100 0 0 100 gistrestrpos - )); DESCR("gist(internal)"); -DATA(insert OID = 782 ( gistbuild PGUID 12 f t f t 5 f 23 "0 0 0 0 0" 100 0 0 100 gistbuild - )); +DATA(insert OID = 782 ( gistbuild PGUID 12 f t f t 3 f 23 "0 0 0" 100 0 0 100 gistbuild - )); +DESCR("gist(internal)"); +DATA(insert OID = 776 ( gistbulkdelete PGUID 12 f t f t 3 f 23 "0 0 0" 100 0 0 100 gistbulkdelete - )); +DESCR("gist(internal)"); +DATA(insert OID = 772 ( gistcostestimate PGUID 12 f t f t 8 f 0 "0 0 0 0 0 0 0 0" 100 0 0 100 gistcostestimate - )); DESCR("gist(internal)"); DATA(insert OID = 784 ( tintervaleq PGUID 12 f t f t 2 f 16 "704 704" 100 0 0 100 tintervaleq - )); diff --git a/src/test/regress/expected/oidjoins.out b/src/test/regress/expected/oidjoins.out index 46bc60f695..95c2487985 100644 --- a/src/test/regress/expected/oidjoins.out +++ b/src/test/regress/expected/oidjoins.out @@ -57,14 +57,6 @@ WHERE pg_am.aminsert != 0 AND -----+---------- (0 rows) -SELECT oid, pg_am.amdelete -FROM pg_am -WHERE pg_am.amdelete != 0 AND - NOT EXISTS(SELECT * FROM pg_proc AS t1 WHERE t1.oid = pg_am.amdelete); - oid | amdelete ------+---------- -(0 rows) - SELECT oid, pg_am.ambeginscan FROM pg_am WHERE pg_am.ambeginscan != 0 AND @@ -113,6 +105,14 @@ WHERE pg_am.ambuild != 0 AND -----+--------- (0 rows) +SELECT oid, pg_am.ambulkdelete +FROM pg_am +WHERE pg_am.ambulkdelete != 0 AND + NOT EXISTS(SELECT * FROM pg_proc AS t1 WHERE t1.oid = pg_am.ambulkdelete); + oid | ambulkdelete +-----+-------------- +(0 rows) + SELECT oid, pg_am.amcostestimate FROM pg_am WHERE pg_am.amcostestimate != 0 AND diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 41328c53b1..533655c52e 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -480,8 +480,8 @@ WHERE p1.aggtransfn = p2.oid AND (p2.pronargs = 1 AND p1.aggbasetype = 0))); oid | aggname | oid | proname -------+---------+-----+------------- - 16963 | max | 768 | int4larger - 16977 | min | 769 | int4smaller + 16959 | max | 768 | int4larger + 16973 | min | 769 | int4smaller (2 rows) -- Cross-check finalfn (if present) against its entry in pg_proc. diff --git a/src/test/regress/sql/oidjoins.sql b/src/test/regress/sql/oidjoins.sql index 88727a6c76..34352128f4 100644 --- a/src/test/regress/sql/oidjoins.sql +++ b/src/test/regress/sql/oidjoins.sql @@ -29,10 +29,6 @@ SELECT oid, pg_am.aminsert FROM pg_am WHERE pg_am.aminsert != 0 AND NOT EXISTS(SELECT * FROM pg_proc AS t1 WHERE t1.oid = pg_am.aminsert); -SELECT oid, pg_am.amdelete -FROM pg_am -WHERE pg_am.amdelete != 0 AND - NOT EXISTS(SELECT * FROM pg_proc AS t1 WHERE t1.oid = pg_am.amdelete); SELECT oid, pg_am.ambeginscan FROM pg_am WHERE pg_am.ambeginscan != 0 AND @@ -57,6 +53,10 @@ SELECT oid, pg_am.ambuild FROM pg_am WHERE pg_am.ambuild != 0 AND NOT EXISTS(SELECT * FROM pg_proc AS t1 WHERE t1.oid = pg_am.ambuild); +SELECT oid, pg_am.ambulkdelete +FROM pg_am +WHERE pg_am.ambulkdelete != 0 AND + NOT EXISTS(SELECT * FROM pg_proc AS t1 WHERE t1.oid = pg_am.ambulkdelete); SELECT oid, pg_am.amcostestimate FROM pg_am WHERE pg_am.amcostestimate != 0 AND