/*------------------------------------------------------------------------- * * gistvacuum.c * interface routines for the postgres GiST index access method. * * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.7 2005/09/02 19:02:19 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" #include "access/genam.h" #include "access/gist_private.h" #include "access/gistscan.h" #include "access/heapam.h" #include "catalog/index.h" #include "commands/vacuum.h" #include "miscadmin.h" #include "utils/memutils.h" #include "storage/freespace.h" #include "storage/smgr.h" /* filled by gistbulkdelete, cleared by gistvacuumpcleanup */ static bool needFullVacuum = false; typedef struct { GISTSTATE giststate; Relation index; MemoryContext opCtx; IndexBulkDeleteResult *result; } GistVacuum; typedef struct { IndexTuple *itup; int ituplen; bool emptypage; } ArrayTuple; static ArrayTuple gistVacuumUpdate( GistVacuum *gv, BlockNumber blkno, bool needunion ) { ArrayTuple res = {NULL, 0, false}; Buffer buffer; Page page; OffsetNumber i, maxoff; ItemId iid; int lenaddon=4, curlenaddon=0, ntodelete=0; IndexTuple idxtuple, *addon=NULL; bool needwrite=false; OffsetNumber todelete[MaxOffsetNumber]; ItemPointerData *completed=NULL; int ncompleted=0, lencompleted=16; buffer = ReadBuffer(gv->index, blkno); page = (Page) BufferGetPage(buffer); maxoff = PageGetMaxOffsetNumber(page); if ( GistPageIsLeaf(page) ) { if ( GistTuplesDeleted(page) ) { needunion = needwrite = true; GistClearTuplesDeleted(page); } } else { completed = (ItemPointerData*)palloc( sizeof(ItemPointerData)*lencompleted ); addon=(IndexTuple*)palloc(sizeof(IndexTuple)*lenaddon); for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { ArrayTuple chldtuple; bool needchildunion; iid = PageGetItemId(page, i); idxtuple = (IndexTuple) PageGetItem(page, iid); needchildunion = (GistTupleIsInvalid(idxtuple)) ? true : false; if ( needchildunion ) elog(DEBUG2,"gistVacuumUpdate: Need union for block %u", ItemPointerGetBlockNumber(&(idxtuple->t_tid))); chldtuple = gistVacuumUpdate( gv, ItemPointerGetBlockNumber(&(idxtuple->t_tid)), needchildunion ); if ( chldtuple.ituplen || chldtuple.emptypage ) { PageIndexTupleDelete(page, i); todelete[ ntodelete++ ] = i; i--; maxoff--; needwrite=needunion=true; if ( chldtuple.ituplen ) { while( curlenaddon + chldtuple.ituplen >= lenaddon ) { lenaddon*=2; addon=(IndexTuple*)repalloc( addon, sizeof(IndexTuple)*lenaddon ); } memcpy( addon + curlenaddon, chldtuple.itup, chldtuple.ituplen * sizeof(IndexTuple) ); curlenaddon += chldtuple.ituplen; if ( chldtuple.ituplen > 1 ) { /* child was splitted, so we need mark completion insert(split) */ int j; while( ncompleted + chldtuple.ituplen > lencompleted ) { lencompleted*=2; completed = (ItemPointerData*)repalloc(completed, sizeof(ItemPointerData) * lencompleted); } for(j=0;jt_tid), completed + ncompleted ); ncompleted++; } } pfree( chldtuple.itup ); } } } if ( curlenaddon ) { /* insert updated tuples */ if (gistnospace(page, addon, curlenaddon)) { /* there is no space on page to insert tuples */ IndexTuple *vec; SplitedPageLayout *dist=NULL,*ptr; int i; MemoryContext oldCtx = MemoryContextSwitchTo(gv->opCtx); vec = gistextractbuffer(buffer, &(res.ituplen)); vec = gistjoinvector(vec, &(res.ituplen), addon, curlenaddon); res.itup = gistSplit(gv->index, buffer, vec, &(res.ituplen), &dist, &(gv->giststate)); MemoryContextSwitchTo(oldCtx); vec = (IndexTuple*)palloc( sizeof(IndexTuple) * res.ituplen ); for(i=0;iindex->rd_istemp ) { XLogRecPtr recptr; XLogRecData *rdata; ItemPointerData key; /* set key for incomplete insert */ char *xlinfo; ItemPointerSet(&key, blkno, TUPLE_IS_VALID); rdata = formSplitRdata(gv->index->rd_node, blkno, &key, dist); xlinfo = rdata->data; START_CRIT_SECTION(); recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT, rdata); ptr = dist; while(ptr) { PageSetLSN(BufferGetPage(ptr->buffer), recptr); PageSetTLI(BufferGetPage(ptr->buffer), ThisTimeLineID); ptr=ptr->next; } END_CRIT_SECTION(); pfree( xlinfo ); pfree( rdata ); } else { ptr = dist; while(ptr) { PageSetLSN(BufferGetPage(ptr->buffer), XLogRecPtrForTemp); ptr=ptr->next; } } ptr = dist; while(ptr) { if ( BufferGetBlockNumber(ptr->buffer) != blkno ) LockBuffer( ptr->buffer, GIST_UNLOCK ); WriteBuffer(ptr->buffer); ptr=ptr->next; } if ( blkno == GIST_ROOT_BLKNO ) { ItemPointerData key; /* set key for incomplete insert */ ItemPointerSet(&key, blkno, TUPLE_IS_VALID); oldCtx = MemoryContextSwitchTo(gv->opCtx); gistnewroot(gv->index, buffer, res.itup, res.ituplen, &key); MemoryContextSwitchTo(oldCtx); WriteNoReleaseBuffer(buffer); } needwrite=false; MemoryContextReset(gv->opCtx); needunion = false; /* gistSplit already forms unions */ } else { /* enough free space */ gistfillbuffer(gv->index, page, addon, curlenaddon, InvalidOffsetNumber); } } } if ( needunion ) { /* forms union for page or check empty*/ if ( PageIsEmpty(page) ) { if ( blkno == GIST_ROOT_BLKNO ) { needwrite=true; GistPageSetLeaf( page ); } else { needwrite=true; res.emptypage=true; GistPageSetDeleted( page ); gv->result->pages_deleted++; } } else { IndexTuple *vec, tmp; int veclen=0; MemoryContext oldCtx = MemoryContextSwitchTo(gv->opCtx); vec = gistextractbuffer(buffer, &veclen); tmp = gistunion(gv->index, vec, veclen, &(gv->giststate)); MemoryContextSwitchTo(oldCtx); res.itup=(IndexTuple*)palloc( sizeof(IndexTuple) ); res.ituplen = 1; res.itup[0] = (IndexTuple)palloc( IndexTupleSize(tmp) ); memcpy( res.itup[0], tmp, IndexTupleSize(tmp) ); ItemPointerSetBlockNumber(&(res.itup[0]->t_tid), blkno); GistTupleSetValid( res.itup[0] ); MemoryContextReset(gv->opCtx); } } if ( needwrite ) { if ( !gv->index->rd_istemp ) { XLogRecData *rdata; XLogRecPtr recptr; char *xlinfo; rdata = formUpdateRdata(gv->index->rd_node, blkno, todelete, ntodelete, res.emptypage, addon, curlenaddon, NULL ); xlinfo = rdata->data; START_CRIT_SECTION(); recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_ENTRY_UPDATE, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); END_CRIT_SECTION(); pfree( xlinfo ); pfree( rdata ); } else PageSetLSN(page, XLogRecPtrForTemp); WriteBuffer( buffer ); } else ReleaseBuffer( buffer ); if ( ncompleted && !gv->index->rd_istemp ) gistxlogInsertCompletion( gv->index->rd_node, completed, ncompleted ); for(i=0;ivacuum_full ) { GistVacuum gv; ArrayTuple res; LockRelation(rel, AccessExclusiveLock); gv.index = rel; initGISTstate(&(gv.giststate), rel); gv.opCtx = createTempGistContext(); gv.result = stats; /* walk through the entire index for update tuples */ res = gistVacuumUpdate( &gv, GIST_ROOT_BLKNO, false ); /* cleanup */ if (res.itup) { int i; for(i=0;ivacuum_full ) needLock = false; /* relation locked with AccessExclusiveLock */ /* try to find deleted pages */ if (needLock) LockRelationForExtension(rel, ExclusiveLock); npages = RelationGetNumberOfBlocks(rel); if (needLock) UnlockRelationForExtension(rel, ExclusiveLock); maxFreePages = npages; if ( maxFreePages > MaxFSMPages ) maxFreePages = MaxFSMPages; nFreePages = 0; freePages = (BlockNumber*) palloc (sizeof(BlockNumber) * maxFreePages); for(blkno=GIST_ROOT_BLKNO+1;blknovacuum_full && nFreePages>0 ) { /* try to truncate index */ int i; for(i=0;i= lastFilledBlock ) { nFreePages = i; break; } if ( lastBlock > lastFilledBlock ) RelationTruncate( rel, lastFilledBlock+1 ); stats->pages_removed = lastBlock - lastFilledBlock; } RecordIndexFreeSpace( &rel->rd_node, nFreePages, freePages ); pfree( freePages ); /* return statistics */ stats->pages_free = nFreePages; if (needLock) LockRelationForExtension(rel, ExclusiveLock); stats->num_pages = RelationGetNumberOfBlocks(rel); if (needLock) UnlockRelationForExtension(rel, ExclusiveLock); if (info->vacuum_full) UnlockRelation(rel, AccessExclusiveLock); PG_RETURN_POINTER(stats); } typedef struct GistBDItem { GistNSN parentlsn; BlockNumber blkno; struct GistBDItem *next; } GistBDItem; static void pushStackIfSplited(Page page, GistBDItem *stack) { GISTPageOpaque opaque = GistPageGetOpaque(page); if ( stack->blkno!=GIST_ROOT_BLKNO && !XLogRecPtrIsInvalid( stack->parentlsn ) && XLByteLT( stack->parentlsn, opaque->nsn) && opaque->rightlink != InvalidBlockNumber /* sanity check */ ) { /* split page detected, install right link to the stack */ GistBDItem *ptr = (GistBDItem*) palloc(sizeof(GistBDItem)); ptr->blkno = opaque->rightlink; ptr->parentlsn = stack->parentlsn; ptr->next = stack->next; stack->next = ptr; } } /* * Bulk deletion of all index entries pointing to a set of heap tuples and * check invalid tuples after crash recovery. * The set of target tuples is specified via a callback routine that tells * whether any given heap tuple (identified by ItemPointer) is being deleted. * * Result: a palloc'd struct containing statistical info for VACUUM displays. */ Datum gistbulkdelete(PG_FUNCTION_ARGS) { Relation rel = (Relation) PG_GETARG_POINTER(0); IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(1); void* callback_state = (void *) PG_GETARG_POINTER(2); IndexBulkDeleteResult *result = (IndexBulkDeleteResult*)palloc0(sizeof(IndexBulkDeleteResult)); GistBDItem *stack, *ptr; bool needLock; stack = (GistBDItem*) palloc0(sizeof(GistBDItem)); stack->blkno = GIST_ROOT_BLKNO; needFullVacuum = false; while( stack ) { Buffer buffer = ReadBuffer(rel, stack->blkno); Page page; OffsetNumber i, maxoff; IndexTuple idxtuple; ItemId iid; LockBuffer(buffer, GIST_SHARE); page = (Page) BufferGetPage(buffer); if ( GistPageIsLeaf(page) ) { OffsetNumber todelete[MaxOffsetNumber]; int ntodelete = 0; LockBuffer(buffer, GIST_UNLOCK); LockBuffer(buffer, GIST_EXCLUSIVE); page = (Page) BufferGetPage(buffer); if ( stack->blkno==GIST_ROOT_BLKNO && !GistPageIsLeaf(page) ) { /* the only root can become non-leaf during relock */ LockBuffer(buffer, GIST_UNLOCK); ReleaseBuffer(buffer); /* one more check */ continue; } /* check for split proceeded after look at parent, we should check it after relock */ pushStackIfSplited(page, stack); maxoff = PageGetMaxOffsetNumber(page); for(i=FirstOffsetNumber;i<=maxoff;i=OffsetNumberNext(i)) { iid = PageGetItemId(page, i); idxtuple = (IndexTuple) PageGetItem(page, iid); if ( callback(&(idxtuple->t_tid), callback_state) ) { PageIndexTupleDelete(page, i); todelete[ ntodelete ] = i; i--; maxoff--; ntodelete++; result->tuples_removed += 1; Assert( maxoff == PageGetMaxOffsetNumber(page) ); } else result->num_index_tuples += 1; } if ( ntodelete ) { GistMarkTuplesDeleted(page); if (!rel->rd_istemp ) { XLogRecData *rdata; XLogRecPtr recptr; gistxlogEntryUpdate *xlinfo; rdata = formUpdateRdata(rel->rd_node, stack->blkno, todelete, ntodelete, false, NULL, 0, NULL); xlinfo = (gistxlogEntryUpdate*)rdata->data; START_CRIT_SECTION(); recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_ENTRY_UPDATE, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); END_CRIT_SECTION(); pfree( xlinfo ); pfree( rdata ); } else PageSetLSN(page, XLogRecPtrForTemp); WriteNoReleaseBuffer( buffer ); } } else { /* check for split proceeded after look at parent */ pushStackIfSplited(page, stack); maxoff = PageGetMaxOffsetNumber(page); for(i=FirstOffsetNumber;i<=maxoff;i=OffsetNumberNext(i)) { iid = PageGetItemId(page, i); idxtuple = (IndexTuple) PageGetItem(page, iid); ptr = (GistBDItem*) palloc(sizeof(GistBDItem)); ptr->blkno = ItemPointerGetBlockNumber( &(idxtuple->t_tid) ); ptr->parentlsn = PageGetLSN( page ); ptr->next = stack->next; stack->next = ptr; if ( GistTupleIsInvalid(idxtuple) ) needFullVacuum = true; } } LockBuffer( buffer, GIST_UNLOCK ); ReleaseBuffer( buffer ); ptr = stack->next; pfree( stack ); stack = ptr; vacuum_delay_point(); } needLock = !RELATION_IS_LOCAL(rel); if (needLock) LockRelationForExtension(rel, ExclusiveLock); result->num_pages = RelationGetNumberOfBlocks(rel); if (needLock) UnlockRelationForExtension(rel, ExclusiveLock); PG_RETURN_POINTER( result ); }