Create a routine PageIndexMultiDelete() that replaces a loop around
PageIndexTupleDelete() with a single pass of compactification --- logic mostly lifted from PageRepairFragmentation. I noticed while profiling that a VACUUM that's cleaning up a whole lot of deleted tuples would spend as much as a third of its CPU time in PageIndexTupleDelete; not too surprising considering the loop method was roughly O(N^2) in the number of tuples involved.
This commit is contained in:
parent
775d28302c
commit
94e03330cb
@ -9,7 +9,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.81 2004/12/31 21:59:22 pgsql Exp $
|
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.82 2005/03/22 06:17:03 tgl Exp $
|
||||||
*
|
*
|
||||||
* NOTES
|
* NOTES
|
||||||
* Postgres btree pages look like ordinary relation pages. The opaque
|
* Postgres btree pages look like ordinary relation pages. The opaque
|
||||||
@ -639,17 +639,12 @@ _bt_delitems(Relation rel, Buffer buf,
|
|||||||
OffsetNumber *itemnos, int nitems)
|
OffsetNumber *itemnos, int nitems)
|
||||||
{
|
{
|
||||||
Page page = BufferGetPage(buf);
|
Page page = BufferGetPage(buf);
|
||||||
int i;
|
|
||||||
|
|
||||||
/* No ereport(ERROR) until changes are logged */
|
/* No ereport(ERROR) until changes are logged */
|
||||||
START_CRIT_SECTION();
|
START_CRIT_SECTION();
|
||||||
|
|
||||||
/*
|
/* Fix the page */
|
||||||
* Delete the items in reverse order so we don't have to think about
|
PageIndexMultiDelete(page, itemnos, nitems);
|
||||||
* adjusting item numbers for previous deletions.
|
|
||||||
*/
|
|
||||||
for (i = nitems - 1; i >= 0; i--)
|
|
||||||
PageIndexTupleDelete(page, itemnos[i]);
|
|
||||||
|
|
||||||
/* XLOG stuff */
|
/* XLOG stuff */
|
||||||
if (!rel->rd_istemp)
|
if (!rel->rd_istemp)
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.19 2004/12/31 21:59:22 pgsql Exp $
|
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.20 2005/03/22 06:17:03 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -411,12 +411,7 @@ btree_xlog_delete(bool redo, XLogRecPtr lsn, XLogRecord *record)
|
|||||||
unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeDelete);
|
unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeDelete);
|
||||||
unend = (OffsetNumber *) ((char *) xlrec + record->xl_len);
|
unend = (OffsetNumber *) ((char *) xlrec + record->xl_len);
|
||||||
|
|
||||||
/* be careful to delete from back to front */
|
PageIndexMultiDelete(page, unused, unend - unused);
|
||||||
while (unused < unend)
|
|
||||||
{
|
|
||||||
unend--;
|
|
||||||
PageIndexTupleDelete(page, *unend);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PageSetLSN(page, lsn);
|
PageSetLSN(page, lsn);
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/storage/page/bufpage.c,v 1.62 2004/12/31 22:01:10 pgsql Exp $
|
* $PostgreSQL: pgsql/src/backend/storage/page/bufpage.c,v 1.63 2005/03/22 06:17:03 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -274,13 +274,14 @@ PageRestoreTempPage(Page tempPage, Page oldPage)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* sorting support for PageRepairFragmentation
|
* sorting support for PageRepairFragmentation and PageIndexMultiDelete
|
||||||
*/
|
*/
|
||||||
typedef struct itemIdSortData
|
typedef struct itemIdSortData
|
||||||
{
|
{
|
||||||
int offsetindex; /* linp array index */
|
int offsetindex; /* linp array index */
|
||||||
int itemoff; /* page offset of item data */
|
int itemoff; /* page offset of item data */
|
||||||
Size alignedlen; /* MAXALIGN(item data len) */
|
Size alignedlen; /* MAXALIGN(item data len) */
|
||||||
|
ItemIdData olditemid; /* used only in PageIndexMultiDelete */
|
||||||
} itemIdSortData;
|
} itemIdSortData;
|
||||||
typedef itemIdSortData *itemIdSort;
|
typedef itemIdSortData *itemIdSort;
|
||||||
|
|
||||||
@ -297,7 +298,8 @@ itemoffcompare(const void *itemidp1, const void *itemidp2)
|
|||||||
*
|
*
|
||||||
* Frees fragmented space on a page.
|
* Frees fragmented space on a page.
|
||||||
* It doesn't remove unused line pointers! Please don't change this.
|
* It doesn't remove unused line pointers! Please don't change this.
|
||||||
* This routine is usable for heap pages only.
|
*
|
||||||
|
* This routine is usable for heap pages only, but see PageIndexMultiDelete.
|
||||||
*
|
*
|
||||||
* Returns number of unused line pointers on page. If "unused" is not NULL
|
* Returns number of unused line pointers on page. If "unused" is not NULL
|
||||||
* then the unused[] array is filled with indexes of unused line pointers.
|
* then the unused[] array is filled with indexes of unused line pointers.
|
||||||
@ -543,3 +545,135 @@ PageIndexTupleDelete(Page page, OffsetNumber offnum)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* PageIndexMultiDelete
|
||||||
|
*
|
||||||
|
* This routine handles the case of deleting multiple tuples from an
|
||||||
|
* index page at once. It is considerably faster than a loop around
|
||||||
|
* PageIndexTupleDelete ... however, the caller *must* supply the array
|
||||||
|
* of item numbers to be deleted in item number order!
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
|
||||||
|
{
|
||||||
|
PageHeader phdr = (PageHeader) page;
|
||||||
|
Offset pd_lower = phdr->pd_lower;
|
||||||
|
Offset pd_upper = phdr->pd_upper;
|
||||||
|
Offset pd_special = phdr->pd_special;
|
||||||
|
itemIdSort itemidbase,
|
||||||
|
itemidptr;
|
||||||
|
ItemId lp;
|
||||||
|
int nline,
|
||||||
|
nused;
|
||||||
|
int i;
|
||||||
|
Size totallen;
|
||||||
|
Offset upper;
|
||||||
|
Size size;
|
||||||
|
unsigned offset;
|
||||||
|
int nextitm;
|
||||||
|
OffsetNumber offnum;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If there aren't very many items to delete, then retail
|
||||||
|
* PageIndexTupleDelete is the best way. Delete the items in reverse
|
||||||
|
* order so we don't have to think about adjusting item numbers for
|
||||||
|
* previous deletions.
|
||||||
|
*
|
||||||
|
* TODO: tune the magic number here
|
||||||
|
*/
|
||||||
|
if (nitems <= 2)
|
||||||
|
{
|
||||||
|
while (--nitems >= 0)
|
||||||
|
PageIndexTupleDelete(page, itemnos[nitems]);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* As with PageRepairFragmentation, paranoia seems justified.
|
||||||
|
*/
|
||||||
|
if (pd_lower < SizeOfPageHeaderData ||
|
||||||
|
pd_lower > pd_upper ||
|
||||||
|
pd_upper > pd_special ||
|
||||||
|
pd_special > BLCKSZ ||
|
||||||
|
pd_special != MAXALIGN(pd_special))
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_DATA_CORRUPTED),
|
||||||
|
errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
|
||||||
|
pd_lower, pd_upper, pd_special)));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Scan the item pointer array and build a list of just the ones we
|
||||||
|
* are going to keep. Notice we do not modify the page yet, since
|
||||||
|
* we are still validity-checking.
|
||||||
|
*/
|
||||||
|
nline = PageGetMaxOffsetNumber(page);
|
||||||
|
itemidbase = (itemIdSort) palloc(sizeof(itemIdSortData) * nline);
|
||||||
|
itemidptr = itemidbase;
|
||||||
|
totallen = 0;
|
||||||
|
nused = 0;
|
||||||
|
nextitm = 0;
|
||||||
|
for (offnum = 1; offnum <= nline; offnum++)
|
||||||
|
{
|
||||||
|
lp = PageGetItemId(page, offnum);
|
||||||
|
size = ItemIdGetLength(lp);
|
||||||
|
offset = ItemIdGetOffset(lp);
|
||||||
|
if (offset < pd_upper ||
|
||||||
|
(offset + size) > pd_special ||
|
||||||
|
offset != MAXALIGN(offset))
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_DATA_CORRUPTED),
|
||||||
|
errmsg("corrupted item pointer: offset = %u, size = %u",
|
||||||
|
offset, (unsigned int) size)));
|
||||||
|
|
||||||
|
if (nextitm < nitems && offnum == itemnos[nextitm])
|
||||||
|
{
|
||||||
|
/* skip item to be deleted */
|
||||||
|
nextitm++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
itemidptr->offsetindex = nused; /* where it will go */
|
||||||
|
itemidptr->itemoff = offset;
|
||||||
|
itemidptr->olditemid = *lp;
|
||||||
|
itemidptr->alignedlen = MAXALIGN(size);
|
||||||
|
totallen += itemidptr->alignedlen;
|
||||||
|
itemidptr++;
|
||||||
|
nused++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* this will catch invalid or out-of-order itemnos[] */
|
||||||
|
if (nextitm != nitems)
|
||||||
|
elog(ERROR, "incorrect index offsets supplied");
|
||||||
|
|
||||||
|
if (totallen > (Size) (pd_special - pd_lower))
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_DATA_CORRUPTED),
|
||||||
|
errmsg("corrupted item lengths: total %u, available space %u",
|
||||||
|
(unsigned int) totallen, pd_special - pd_lower)));
|
||||||
|
|
||||||
|
/* sort itemIdSortData array into decreasing itemoff order */
|
||||||
|
qsort((char *) itemidbase, nused, sizeof(itemIdSortData),
|
||||||
|
itemoffcompare);
|
||||||
|
|
||||||
|
/* compactify page and install new itemids */
|
||||||
|
upper = pd_special;
|
||||||
|
|
||||||
|
for (i = 0, itemidptr = itemidbase; i < nused; i++, itemidptr++)
|
||||||
|
{
|
||||||
|
lp = PageGetItemId(page, itemidptr->offsetindex + 1);
|
||||||
|
upper -= itemidptr->alignedlen;
|
||||||
|
memmove((char *) page + upper,
|
||||||
|
(char *) page + itemidptr->itemoff,
|
||||||
|
itemidptr->alignedlen);
|
||||||
|
*lp = itemidptr->olditemid;
|
||||||
|
lp->lp_off = upper;
|
||||||
|
}
|
||||||
|
|
||||||
|
phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
|
||||||
|
phdr->pd_upper = upper;
|
||||||
|
|
||||||
|
pfree(itemidbase);
|
||||||
|
}
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/storage/bufpage.h,v 1.63 2004/12/31 22:03:42 pgsql Exp $
|
* $PostgreSQL: pgsql/src/include/storage/bufpage.h,v 1.64 2005/03/22 06:17:03 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -323,5 +323,6 @@ extern void PageRestoreTempPage(Page tempPage, Page oldPage);
|
|||||||
extern int PageRepairFragmentation(Page page, OffsetNumber *unused);
|
extern int PageRepairFragmentation(Page page, OffsetNumber *unused);
|
||||||
extern Size PageGetFreeSpace(Page page);
|
extern Size PageGetFreeSpace(Page page);
|
||||||
extern void PageIndexTupleDelete(Page page, OffsetNumber offset);
|
extern void PageIndexTupleDelete(Page page, OffsetNumber offset);
|
||||||
|
extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems);
|
||||||
|
|
||||||
#endif /* BUFPAGE_H */
|
#endif /* BUFPAGE_H */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user