mirror of https://github.com/postgres/postgres
Optimize pg_visibility with read streams.
We've measured 5% performance improvement, and this arranges to benefit automatically from future optimizations to the read_stream subsystem. The area lacked test coverage, so close that gap. Nazir Bilal Yavuz Discussion: https://postgr.es/m/CAN55FZ1_Ru3XpMgTwsU67FTH2fs_FrRROmb7x6zs+F44QBEiww@mail.gmail.com Discussion: https://postgr.es/m/CAEudQAozv3wTY5TV2t29JcwPydbmKbiWQkZD42S2OgzdixPMDQ@mail.gmail.com
This commit is contained in:
parent
52c707483c
commit
65c310b310
|
@ -36,6 +36,7 @@ tests += {
|
|||
'tap': {
|
||||
'tests': [
|
||||
't/001_concurrent_transaction.pl',
|
||||
't/002_corrupt_vm.pl',
|
||||
],
|
||||
},
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "storage/bufmgr.h"
|
||||
#include "storage/proc.h"
|
||||
#include "storage/procarray.h"
|
||||
#include "storage/read_stream.h"
|
||||
#include "storage/smgr.h"
|
||||
#include "utils/rel.h"
|
||||
#include "utils/snapmgr.h"
|
||||
|
@ -41,6 +42,17 @@ typedef struct corrupt_items
|
|||
ItemPointer tids;
|
||||
} corrupt_items;
|
||||
|
||||
/* for collect_corrupt_items_read_stream_next_block */
|
||||
struct collect_corrupt_items_read_stream_private
|
||||
{
|
||||
bool all_frozen;
|
||||
bool all_visible;
|
||||
BlockNumber current_blocknum;
|
||||
BlockNumber last_exclusive;
|
||||
Relation rel;
|
||||
Buffer vmbuffer;
|
||||
};
|
||||
|
||||
PG_FUNCTION_INFO_V1(pg_visibility_map);
|
||||
PG_FUNCTION_INFO_V1(pg_visibility_map_rel);
|
||||
PG_FUNCTION_INFO_V1(pg_visibility);
|
||||
|
@ -478,6 +490,8 @@ collect_visibility_data(Oid relid, bool include_pd)
|
|||
BlockNumber blkno;
|
||||
Buffer vmbuffer = InvalidBuffer;
|
||||
BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD);
|
||||
BlockRangeReadStreamPrivate p;
|
||||
ReadStream *stream = NULL;
|
||||
|
||||
rel = relation_open(relid, AccessShareLock);
|
||||
|
||||
|
@ -489,6 +503,20 @@ collect_visibility_data(Oid relid, bool include_pd)
|
|||
info->next = 0;
|
||||
info->count = nblocks;
|
||||
|
||||
/* Create a stream if reading main fork. */
|
||||
if (include_pd)
|
||||
{
|
||||
p.current_blocknum = 0;
|
||||
p.last_exclusive = nblocks;
|
||||
stream = read_stream_begin_relation(READ_STREAM_FULL,
|
||||
bstrategy,
|
||||
rel,
|
||||
MAIN_FORKNUM,
|
||||
block_range_read_stream_cb,
|
||||
&p,
|
||||
0);
|
||||
}
|
||||
|
||||
for (blkno = 0; blkno < nblocks; ++blkno)
|
||||
{
|
||||
int32 mapbits;
|
||||
|
@ -513,8 +541,7 @@ collect_visibility_data(Oid relid, bool include_pd)
|
|||
Buffer buffer;
|
||||
Page page;
|
||||
|
||||
buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
|
||||
bstrategy);
|
||||
buffer = read_stream_next_buffer(stream, NULL);
|
||||
LockBuffer(buffer, BUFFER_LOCK_SHARE);
|
||||
|
||||
page = BufferGetPage(buffer);
|
||||
|
@ -525,6 +552,12 @@ collect_visibility_data(Oid relid, bool include_pd)
|
|||
}
|
||||
}
|
||||
|
||||
if (include_pd)
|
||||
{
|
||||
Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer);
|
||||
read_stream_end(stream);
|
||||
}
|
||||
|
||||
/* Clean up. */
|
||||
if (vmbuffer != InvalidBuffer)
|
||||
ReleaseBuffer(vmbuffer);
|
||||
|
@ -610,6 +643,38 @@ GetStrictOldestNonRemovableTransactionId(Relation rel)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Callback function to get next block for read stream object used in
|
||||
* collect_corrupt_items() function.
|
||||
*/
|
||||
static BlockNumber
|
||||
collect_corrupt_items_read_stream_next_block(ReadStream *stream,
|
||||
void *callback_private_data,
|
||||
void *per_buffer_data)
|
||||
{
|
||||
struct collect_corrupt_items_read_stream_private *p = callback_private_data;
|
||||
|
||||
for (; p->current_blocknum < p->last_exclusive; p->current_blocknum++)
|
||||
{
|
||||
bool check_frozen = false;
|
||||
bool check_visible = false;
|
||||
|
||||
/* Make sure we are interruptible. */
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
|
||||
if (p->all_frozen && VM_ALL_FROZEN(p->rel, p->current_blocknum, &p->vmbuffer))
|
||||
check_frozen = true;
|
||||
if (p->all_visible && VM_ALL_VISIBLE(p->rel, p->current_blocknum, &p->vmbuffer))
|
||||
check_visible = true;
|
||||
if (!check_visible && !check_frozen)
|
||||
continue;
|
||||
|
||||
return p->current_blocknum++;
|
||||
}
|
||||
|
||||
return InvalidBlockNumber;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns a list of items whose visibility map information does not match
|
||||
* the status of the tuples on the page.
|
||||
|
@ -628,12 +693,13 @@ static corrupt_items *
|
|||
collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
|
||||
{
|
||||
Relation rel;
|
||||
BlockNumber nblocks;
|
||||
corrupt_items *items;
|
||||
BlockNumber blkno;
|
||||
Buffer vmbuffer = InvalidBuffer;
|
||||
BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD);
|
||||
TransactionId OldestXmin = InvalidTransactionId;
|
||||
struct collect_corrupt_items_read_stream_private p;
|
||||
ReadStream *stream;
|
||||
Buffer buffer;
|
||||
|
||||
rel = relation_open(relid, AccessShareLock);
|
||||
|
||||
|
@ -643,8 +709,6 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
|
|||
if (all_visible)
|
||||
OldestXmin = GetStrictOldestNonRemovableTransactionId(rel);
|
||||
|
||||
nblocks = RelationGetNumberOfBlocks(rel);
|
||||
|
||||
/*
|
||||
* Guess an initial array size. We don't expect many corrupted tuples, so
|
||||
* start with a small array. This function uses the "next" field to track
|
||||
|
@ -658,34 +722,38 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
|
|||
items->count = 64;
|
||||
items->tids = palloc(items->count * sizeof(ItemPointerData));
|
||||
|
||||
p.current_blocknum = 0;
|
||||
p.last_exclusive = RelationGetNumberOfBlocks(rel);
|
||||
p.rel = rel;
|
||||
p.vmbuffer = InvalidBuffer;
|
||||
p.all_frozen = all_frozen;
|
||||
p.all_visible = all_visible;
|
||||
stream = read_stream_begin_relation(READ_STREAM_FULL,
|
||||
bstrategy,
|
||||
rel,
|
||||
MAIN_FORKNUM,
|
||||
collect_corrupt_items_read_stream_next_block,
|
||||
&p,
|
||||
0);
|
||||
|
||||
/* Loop over every block in the relation. */
|
||||
for (blkno = 0; blkno < nblocks; ++blkno)
|
||||
while ((buffer = read_stream_next_buffer(stream, NULL)) != InvalidBuffer)
|
||||
{
|
||||
bool check_frozen = false;
|
||||
bool check_visible = false;
|
||||
Buffer buffer;
|
||||
bool check_frozen = all_frozen;
|
||||
bool check_visible = all_visible;
|
||||
Page page;
|
||||
OffsetNumber offnum,
|
||||
maxoff;
|
||||
BlockNumber blkno;
|
||||
|
||||
/* Make sure we are interruptible. */
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
|
||||
/* Use the visibility map to decide whether to check this page. */
|
||||
if (all_frozen && VM_ALL_FROZEN(rel, blkno, &vmbuffer))
|
||||
check_frozen = true;
|
||||
if (all_visible && VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
|
||||
check_visible = true;
|
||||
if (!check_visible && !check_frozen)
|
||||
continue;
|
||||
|
||||
/* Read and lock the page. */
|
||||
buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
|
||||
bstrategy);
|
||||
LockBuffer(buffer, BUFFER_LOCK_SHARE);
|
||||
|
||||
page = BufferGetPage(buffer);
|
||||
maxoff = PageGetMaxOffsetNumber(page);
|
||||
blkno = BufferGetBlockNumber(buffer);
|
||||
|
||||
/*
|
||||
* The visibility map bits might have changed while we were acquiring
|
||||
|
@ -778,10 +846,13 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
|
|||
|
||||
UnlockReleaseBuffer(buffer);
|
||||
}
|
||||
read_stream_end(stream);
|
||||
|
||||
/* Clean up. */
|
||||
if (vmbuffer != InvalidBuffer)
|
||||
ReleaseBuffer(vmbuffer);
|
||||
if (p.vmbuffer != InvalidBuffer)
|
||||
ReleaseBuffer(p.vmbuffer);
|
||||
relation_close(rel, AccessShareLock);
|
||||
|
||||
/*
|
||||
|
|
|
@ -0,0 +1,109 @@
|
|||
# Copyright (c) 2021-2024, PostgreSQL Global Development Group
|
||||
|
||||
# Check that pg_check_visible() and pg_check_frozen() report correct TIDs for
|
||||
# corruption.
|
||||
use strict;
|
||||
use warnings FATAL => 'all';
|
||||
use File::Copy;
|
||||
use PostgreSQL::Test::Cluster;
|
||||
use PostgreSQL::Test::Utils;
|
||||
use Test::More;
|
||||
|
||||
my $node = PostgreSQL::Test::Cluster->new('main');
|
||||
$node->init;
|
||||
# Anything holding a snapshot, including auto-analyze of pg_proc, could stop
|
||||
# VACUUM from updating the visibility map.
|
||||
$node->append_conf('postgresql.conf', 'autovacuum=off');
|
||||
$node->start;
|
||||
|
||||
my $blck_size = $node->safe_psql("postgres", "SHOW block_size;");
|
||||
|
||||
# Create a sample table with at least 10 pages and then run VACUUM. 10 is
|
||||
# selected manually as it is big enough to select 5 random tuples from the
|
||||
# relation.
|
||||
$node->safe_psql(
|
||||
'postgres', qq(
|
||||
CREATE EXTENSION pg_visibility;
|
||||
CREATE TABLE corruption_test
|
||||
WITH (autovacuum_enabled = false) AS
|
||||
SELECT
|
||||
i,
|
||||
repeat('a', 10) AS data
|
||||
FROM
|
||||
generate_series(1, $blck_size) i;
|
||||
VACUUM (FREEZE, DISABLE_PAGE_SKIPPING) corruption_test;
|
||||
));
|
||||
|
||||
# VACUUM is run, it is safe to get the number of pages.
|
||||
my $npages = $node->safe_psql(
|
||||
"postgres",
|
||||
"SELECT relpages FROM pg_class
|
||||
WHERE relname = 'corruption_test';"
|
||||
);
|
||||
ok($npages >= 10, 'table has at least 10 pages');
|
||||
|
||||
my $file = $node->safe_psql("postgres",
|
||||
"SELECT pg_relation_filepath('corruption_test');");
|
||||
|
||||
# Delete the first block to make sure that it will be skipped as it is
|
||||
# not visible nor frozen.
|
||||
$node->safe_psql(
|
||||
"postgres",
|
||||
"DELETE FROM corruption_test
|
||||
WHERE (ctid::text::point)[0] = 0;"
|
||||
);
|
||||
|
||||
# Copy visibility map.
|
||||
$node->stop;
|
||||
my $vm_file = $node->data_dir . '/' . $file . '_vm';
|
||||
copy("$vm_file", "${vm_file}_temp");
|
||||
$node->start;
|
||||
|
||||
# Select 5 random tuples that are starting from the second block of the
|
||||
# relation. The first block is skipped because it is deleted above.
|
||||
my $tuples = $node->safe_psql(
|
||||
"postgres",
|
||||
"SELECT ctid FROM (
|
||||
SELECT ctid FROM corruption_test
|
||||
WHERE (ctid::text::point)[0] != 0
|
||||
ORDER BY random() LIMIT 5)
|
||||
ORDER BY ctid ASC;"
|
||||
);
|
||||
|
||||
# Do the changes below to use tuples in the query.
|
||||
# "\n" -> ","
|
||||
# "(" -> "'("
|
||||
# ")" -> ")'"
|
||||
(my $tuples_query = $tuples) =~ s/\n/,/g;
|
||||
$tuples_query =~ s/\(/\'\(/g;
|
||||
$tuples_query =~ s/\)/\)\'/g;
|
||||
|
||||
$node->safe_psql(
|
||||
"postgres",
|
||||
"DELETE FROM corruption_test
|
||||
WHERE ctid in ($tuples_query);"
|
||||
);
|
||||
|
||||
# Overwrite visibility map with the old one.
|
||||
$node->stop;
|
||||
move("${vm_file}_temp", "$vm_file");
|
||||
$node->start;
|
||||
|
||||
my $result = $node->safe_psql(
|
||||
"postgres",
|
||||
"SELECT DISTINCT t_ctid
|
||||
FROM pg_check_visible('corruption_test')
|
||||
ORDER BY t_ctid ASC;"
|
||||
);
|
||||
is($result, $tuples, 'pg_check_visible must report tuples as corrupted');
|
||||
|
||||
$result = $node->safe_psql(
|
||||
"postgres",
|
||||
"SELECT DISTINCT t_ctid
|
||||
FROM pg_check_frozen('corruption_test')
|
||||
ORDER BY t_ctid ASC;"
|
||||
);
|
||||
is($result, $tuples, 'pg_check_frozen must report tuples as corrupted');
|
||||
|
||||
$node->stop;
|
||||
done_testing();
|
Loading…
Reference in New Issue