Refactor tidstore.c iterator buffering.

Previously, TidStoreIterateNext() would expand the set of offsets for
each block into an internal buffer that it overwrote each time.  In
order to be able to collect the offsets for multiple blocks before
working with them, change the contract.  Now, the offsets are obtained
by a separate call to TidStoreGetBlockOffsets(), which can be called at
a later time.  TidStoreIteratorResult objects are safe to copy and store
in a queue.

Reviewed-by: Noah Misch <noah@leadboat.com>
Discussion: https://postgr.es/m/CAAKRu_bbkmwAzSBgnezancgJeXrQZXy4G4kBTd+5=cr86H5yew@mail.gmail.com
This commit is contained in:
Thomas Munro 2024-07-24 17:24:59 +12:00
parent 1462aad2e4
commit f6bef362ca
4 changed files with 51 additions and 42 deletions

View File

@ -147,9 +147,6 @@ struct TidStoreIter
TidStoreIterResult output; TidStoreIterResult output;
}; };
static void tidstore_iter_extract_tids(TidStoreIter *iter, BlockNumber blkno,
BlocktableEntry *page);
/* /*
* Create a TidStore. The TidStore will live in the memory context that is * Create a TidStore. The TidStore will live in the memory context that is
* CurrentMemoryContext at the time of this call. The TID storage, backed * CurrentMemoryContext at the time of this call. The TID storage, backed
@ -486,13 +483,6 @@ TidStoreBeginIterate(TidStore *ts)
iter = palloc0(sizeof(TidStoreIter)); iter = palloc0(sizeof(TidStoreIter));
iter->ts = ts; iter->ts = ts;
/*
* We start with an array large enough to contain at least the offsets
* from one completely full bitmap element.
*/
iter->output.max_offset = 2 * BITS_PER_BITMAPWORD;
iter->output.offsets = palloc(sizeof(OffsetNumber) * iter->output.max_offset);
if (TidStoreIsShared(ts)) if (TidStoreIsShared(ts))
iter->tree_iter.shared = shared_ts_begin_iterate(ts->tree.shared); iter->tree_iter.shared = shared_ts_begin_iterate(ts->tree.shared);
else else
@ -503,9 +493,9 @@ TidStoreBeginIterate(TidStore *ts)
/* /*
* Scan the TidStore and return the TIDs of the next block. The offsets in * Return a result that contains the next block number and that can be used to
* each iteration result are ordered, as are the block numbers over all * obtain the set of offsets by calling TidStoreGetBlockOffsets(). The result
* iterations. * is copyable.
*/ */
TidStoreIterResult * TidStoreIterResult *
TidStoreIterateNext(TidStoreIter *iter) TidStoreIterateNext(TidStoreIter *iter)
@ -521,8 +511,8 @@ TidStoreIterateNext(TidStoreIter *iter)
if (page == NULL) if (page == NULL)
return NULL; return NULL;
/* Collect TIDs from the key-value pair */ iter->output.blkno = key;
tidstore_iter_extract_tids(iter, (BlockNumber) key, page); iter->output.internal_page = page;
return &(iter->output); return &(iter->output);
} }
@ -540,7 +530,6 @@ TidStoreEndIterate(TidStoreIter *iter)
else else
local_ts_end_iterate(iter->tree_iter.local); local_ts_end_iterate(iter->tree_iter.local);
pfree(iter->output.offsets);
pfree(iter); pfree(iter);
} }
@ -575,24 +564,32 @@ TidStoreGetHandle(TidStore *ts)
return (dsa_pointer) shared_ts_get_handle(ts->tree.shared); return (dsa_pointer) shared_ts_get_handle(ts->tree.shared);
} }
/* Extract TIDs from the given key-value pair */ /*
static void * Given a TidStoreIterResult returned by TidStoreIterateNext(), extract the
tidstore_iter_extract_tids(TidStoreIter *iter, BlockNumber blkno, * offset numbers. Returns the number of offsets filled in, if <=
BlocktableEntry *page) * max_offsets. Otherwise, fills in as much as it can in the given space, and
* returns the size of the buffer that would be needed.
*/
int
TidStoreGetBlockOffsets(TidStoreIterResult *result,
OffsetNumber *offsets,
int max_offsets)
{ {
TidStoreIterResult *result = (&iter->output); BlocktableEntry *page = result->internal_page;
int num_offsets = 0;
int wordnum; int wordnum;
result->num_offsets = 0;
result->blkno = blkno;
if (page->header.nwords == 0) if (page->header.nwords == 0)
{ {
/* we have offsets in the header */ /* we have offsets in the header */
for (int i = 0; i < NUM_FULL_OFFSETS; i++) for (int i = 0; i < NUM_FULL_OFFSETS; i++)
{ {
if (page->header.full_offsets[i] != InvalidOffsetNumber) if (page->header.full_offsets[i] != InvalidOffsetNumber)
result->offsets[result->num_offsets++] = page->header.full_offsets[i]; {
if (num_offsets < max_offsets)
offsets[num_offsets] = page->header.full_offsets[i];
num_offsets++;
}
} }
} }
else else
@ -602,21 +599,19 @@ tidstore_iter_extract_tids(TidStoreIter *iter, BlockNumber blkno,
bitmapword w = page->words[wordnum]; bitmapword w = page->words[wordnum];
int off = wordnum * BITS_PER_BITMAPWORD; int off = wordnum * BITS_PER_BITMAPWORD;
/* Make sure there is enough space to add offsets */
if ((result->num_offsets + BITS_PER_BITMAPWORD) > result->max_offset)
{
result->max_offset *= 2;
result->offsets = repalloc(result->offsets,
sizeof(OffsetNumber) * result->max_offset);
}
while (w != 0) while (w != 0)
{ {
if (w & 1) if (w & 1)
result->offsets[result->num_offsets++] = (OffsetNumber) off; {
if (num_offsets < max_offsets)
offsets[num_offsets] = (OffsetNumber) off;
num_offsets++;
}
off++; off++;
w >>= 1; w >>= 1;
} }
} }
} }
return num_offsets;
} }

View File

@ -2126,12 +2126,17 @@ lazy_vacuum_heap_rel(LVRelState *vacrel)
Buffer buf; Buffer buf;
Page page; Page page;
Size freespace; Size freespace;
OffsetNumber offsets[MaxOffsetNumber];
int num_offsets;
vacuum_delay_point(); vacuum_delay_point();
blkno = iter_result->blkno; blkno = iter_result->blkno;
vacrel->blkno = blkno; vacrel->blkno = blkno;
num_offsets = TidStoreGetBlockOffsets(iter_result, offsets, lengthof(offsets));
Assert(num_offsets <= lengthof(offsets));
/* /*
* Pin the visibility map page in case we need to mark the page * Pin the visibility map page in case we need to mark the page
* all-visible. In most cases this will be very cheap, because we'll * all-visible. In most cases this will be very cheap, because we'll
@ -2143,8 +2148,8 @@ lazy_vacuum_heap_rel(LVRelState *vacrel)
buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL, buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
vacrel->bstrategy); vacrel->bstrategy);
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
lazy_vacuum_heap_page(vacrel, blkno, buf, iter_result->offsets, lazy_vacuum_heap_page(vacrel, blkno, buf, offsets,
iter_result->num_offsets, vmbuffer); num_offsets, vmbuffer);
/* Now that we've vacuumed the page, record its available space */ /* Now that we've vacuumed the page, record its available space */
page = BufferGetPage(buf); page = BufferGetPage(buf);

View File

@ -20,13 +20,14 @@
typedef struct TidStore TidStore; typedef struct TidStore TidStore;
typedef struct TidStoreIter TidStoreIter; typedef struct TidStoreIter TidStoreIter;
/* Result struct for TidStoreIterateNext */ /*
* Result struct for TidStoreIterateNext. This is copyable, but should be
* treated as opaque. Call TidStoreGetOffsets() to obtain the offsets.
*/
typedef struct TidStoreIterResult typedef struct TidStoreIterResult
{ {
BlockNumber blkno; BlockNumber blkno;
int max_offset; void *internal_page;
int num_offsets;
OffsetNumber *offsets;
} TidStoreIterResult; } TidStoreIterResult;
extern TidStore *TidStoreCreateLocal(size_t max_bytes, bool insert_only); extern TidStore *TidStoreCreateLocal(size_t max_bytes, bool insert_only);
@ -42,6 +43,9 @@ extern void TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumbe
extern bool TidStoreIsMember(TidStore *ts, ItemPointer tid); extern bool TidStoreIsMember(TidStore *ts, ItemPointer tid);
extern TidStoreIter *TidStoreBeginIterate(TidStore *ts); extern TidStoreIter *TidStoreBeginIterate(TidStore *ts);
extern TidStoreIterResult *TidStoreIterateNext(TidStoreIter *iter); extern TidStoreIterResult *TidStoreIterateNext(TidStoreIter *iter);
extern int TidStoreGetBlockOffsets(TidStoreIterResult *result,
OffsetNumber *offsets,
int max_offsets);
extern void TidStoreEndIterate(TidStoreIter *iter); extern void TidStoreEndIterate(TidStoreIter *iter);
extern size_t TidStoreMemoryUsage(TidStore *ts); extern size_t TidStoreMemoryUsage(TidStore *ts);
extern dsa_pointer TidStoreGetHandle(TidStore *ts); extern dsa_pointer TidStoreGetHandle(TidStore *ts);

View File

@ -267,9 +267,14 @@ check_set_block_offsets(PG_FUNCTION_ARGS)
iter = TidStoreBeginIterate(tidstore); iter = TidStoreBeginIterate(tidstore);
while ((iter_result = TidStoreIterateNext(iter)) != NULL) while ((iter_result = TidStoreIterateNext(iter)) != NULL)
{ {
for (int i = 0; i < iter_result->num_offsets; i++) OffsetNumber offsets[MaxOffsetNumber];
int num_offsets;
num_offsets = TidStoreGetBlockOffsets(iter_result, offsets, lengthof(offsets));
Assert(num_offsets <= lengthof(offsets));
for (int i = 0; i < num_offsets; i++)
ItemPointerSet(&(items.iter_tids[num_iter_tids++]), iter_result->blkno, ItemPointerSet(&(items.iter_tids[num_iter_tids++]), iter_result->blkno,
iter_result->offsets[i]); offsets[i]);
} }
TidStoreEndIterate(iter); TidStoreEndIterate(iter);
TidStoreUnlock(tidstore); TidStoreUnlock(tidstore);