bufmgr/smgr: Don't cross segment boundaries in StartReadBuffers()

With real AIO it doesn't make sense to cross segment boundaries with one
IO. Add smgrmaxcombine() to allow upper layers to query which buffers can be
merged.

We could continue to cross segment boundaries when not using AIO, but it
doesn't really make sense, because md.c will never be able to perform the read
across the segment boundary in one system call. Which means we'll mark more
buffers as undergoing IO than really makes sense - if another backend desires
to read the same blocks, it'll be blocked longer than necessary. So it seems
better to just never cross the boundary.

Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi>
Reviewed-by: Noah Misch <noah@leadboat.com>
Discussion: https://postgr.es/m/1f6b50a7-38ef-4d87-8246-786d39f46ab9@iki.fi
This commit is contained in:
Andres Freund 2024-10-08 11:37:45 -04:00
parent 488f826c72
commit 755a4c10d1
5 changed files with 65 additions and 0 deletions

View File

@ -1259,6 +1259,7 @@ StartReadBuffersImpl(ReadBuffersOperation *operation,
{
int actual_nblocks = *nblocks;
int io_buffers_len = 0;
int maxcombine = 0;
Assert(*nblocks > 0);
Assert(*nblocks <= MAX_IO_COMBINE_LIMIT);
@ -1290,6 +1291,23 @@ StartReadBuffersImpl(ReadBuffersOperation *operation,
{
/* Extend the readable range to cover this block. */
io_buffers_len++;
/*
* Check how many blocks we can cover with the same IO. The smgr
* implementation might e.g. be limited due to a segment boundary.
*/
if (i == 0 && actual_nblocks > 1)
{
maxcombine = smgrmaxcombine(operation->smgr,
operation->forknum,
blockNum);
if (unlikely(maxcombine < actual_nblocks))
{
elog(DEBUG2, "limiting nblocks at %u from %u to %u",
blockNum, actual_nblocks, maxcombine);
actual_nblocks = maxcombine;
}
}
}
}
*nblocks = actual_nblocks;

View File

@ -803,6 +803,21 @@ buffers_to_iovec(struct iovec *iov, void **buffers, int nblocks)
return iovcnt;
}
/*
* mdmaxcombine() -- Return the maximum number of total blocks that can be
* combined with an IO starting at blocknum.
*/
uint32
mdmaxcombine(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum)
{
BlockNumber segoff;
segoff = blocknum % ((BlockNumber) RELSEG_SIZE);
return RELSEG_SIZE - segoff;
}
/*
* mdreadv() -- Read the specified blocks from a relation.
*/
@ -833,6 +848,9 @@ mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)));
nblocks_this_segment = Min(nblocks_this_segment, lengthof(iov));
if (nblocks_this_segment != nblocks)
elog(ERROR, "read crosses segment boundary");
iovcnt = buffers_to_iovec(iov, buffers, nblocks_this_segment);
size_this_segment = nblocks_this_segment * BLCKSZ;
transferred_this_segment = 0;
@ -956,6 +974,9 @@ mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)));
nblocks_this_segment = Min(nblocks_this_segment, lengthof(iov));
if (nblocks_this_segment != nblocks)
elog(ERROR, "write crosses segment boundary");
iovcnt = buffers_to_iovec(iov, (void **) buffers, nblocks_this_segment);
size_this_segment = nblocks_this_segment * BLCKSZ;
transferred_this_segment = 0;

View File

@ -88,6 +88,8 @@ typedef struct f_smgr
BlockNumber blocknum, int nblocks, bool skipFsync);
bool (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, int nblocks);
uint32 (*smgr_maxcombine) (SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum);
void (*smgr_readv) (SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum,
void **buffers, BlockNumber nblocks);
@ -117,6 +119,7 @@ static const f_smgr smgrsw[] = {
.smgr_extend = mdextend,
.smgr_zeroextend = mdzeroextend,
.smgr_prefetch = mdprefetch,
.smgr_maxcombine = mdmaxcombine,
.smgr_readv = mdreadv,
.smgr_writev = mdwritev,
.smgr_writeback = mdwriteback,
@ -588,6 +591,19 @@ smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
return smgrsw[reln->smgr_which].smgr_prefetch(reln, forknum, blocknum, nblocks);
}
/*
* smgrmaxcombine() - Return the maximum number of total blocks that can be
* combined with an IO starting at blocknum.
*
* The returned value includes the IO for blocknum itself.
*/
uint32
smgrmaxcombine(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum)
{
return smgrsw[reln->smgr_which].smgr_maxcombine(reln, forknum, blocknum);
}
/*
* smgrreadv() -- read a particular block range from a relation into the
* supplied buffers.
@ -595,6 +611,9 @@ smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
* This routine is called from the buffer manager in order to
* instantiate pages in the shared buffer cache. All storage managers
* return pages in the format that POSTGRES expects.
*
* If more than one block is intended to be read, callers need to use
* smgrmaxcombine() to check how many blocks can be combined into one IO.
*/
void
smgrreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
@ -626,6 +645,9 @@ smgrreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
* skipFsync indicates that the caller will make other provisions to
* fsync the relation, so we needn't bother. Temporary relations also
* do not require fsync.
*
* If more than one block is intended to be read, callers need to use
* smgrmaxcombine() to check how many blocks can be combined into one IO.
*/
void
smgrwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,

View File

@ -32,6 +32,8 @@ extern void mdzeroextend(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, int nblocks, bool skipFsync);
extern bool mdprefetch(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, int nblocks);
extern uint32 mdmaxcombine(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum);
extern void mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
void **buffers, BlockNumber nblocks);
extern void mdwritev(SMgrRelation reln, ForkNumber forknum,

View File

@ -92,6 +92,8 @@ extern void smgrzeroextend(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, int nblocks, bool skipFsync);
extern bool smgrprefetch(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, int nblocks);
extern uint32 smgrmaxcombine(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum);
extern void smgrreadv(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum,
void **buffers, BlockNumber nblocks);