diff --git a/contrib/pg_visibility/meson.build b/contrib/pg_visibility/meson.build index f3c1263313..609fc5f9f3 100644 --- a/contrib/pg_visibility/meson.build +++ b/contrib/pg_visibility/meson.build @@ -36,6 +36,7 @@ tests += { 'tap': { 'tests': [ 't/001_concurrent_transaction.pl', + 't/002_corrupt_vm.pl', ], }, } diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c index 773ba92e45..724122b1bc 100644 --- a/contrib/pg_visibility/pg_visibility.c +++ b/contrib/pg_visibility/pg_visibility.c @@ -21,6 +21,7 @@ #include "storage/bufmgr.h" #include "storage/proc.h" #include "storage/procarray.h" +#include "storage/read_stream.h" #include "storage/smgr.h" #include "utils/rel.h" #include "utils/snapmgr.h" @@ -41,6 +42,17 @@ typedef struct corrupt_items ItemPointer tids; } corrupt_items; +/* for collect_corrupt_items_read_stream_next_block */ +struct collect_corrupt_items_read_stream_private +{ + bool all_frozen; + bool all_visible; + BlockNumber current_blocknum; + BlockNumber last_exclusive; + Relation rel; + Buffer vmbuffer; +}; + PG_FUNCTION_INFO_V1(pg_visibility_map); PG_FUNCTION_INFO_V1(pg_visibility_map_rel); PG_FUNCTION_INFO_V1(pg_visibility); @@ -478,6 +490,8 @@ collect_visibility_data(Oid relid, bool include_pd) BlockNumber blkno; Buffer vmbuffer = InvalidBuffer; BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD); + BlockRangeReadStreamPrivate p; + ReadStream *stream = NULL; rel = relation_open(relid, AccessShareLock); @@ -489,6 +503,20 @@ collect_visibility_data(Oid relid, bool include_pd) info->next = 0; info->count = nblocks; + /* Create a stream if reading main fork. */ + if (include_pd) + { + p.current_blocknum = 0; + p.last_exclusive = nblocks; + stream = read_stream_begin_relation(READ_STREAM_FULL, + bstrategy, + rel, + MAIN_FORKNUM, + block_range_read_stream_cb, + &p, + 0); + } + for (blkno = 0; blkno < nblocks; ++blkno) { int32 mapbits; @@ -513,8 +541,7 @@ collect_visibility_data(Oid relid, bool include_pd) Buffer buffer; Page page; - buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, - bstrategy); + buffer = read_stream_next_buffer(stream, NULL); LockBuffer(buffer, BUFFER_LOCK_SHARE); page = BufferGetPage(buffer); @@ -525,6 +552,12 @@ collect_visibility_data(Oid relid, bool include_pd) } } + if (include_pd) + { + Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer); + read_stream_end(stream); + } + /* Clean up. */ if (vmbuffer != InvalidBuffer) ReleaseBuffer(vmbuffer); @@ -610,6 +643,38 @@ GetStrictOldestNonRemovableTransactionId(Relation rel) } } +/* + * Callback function to get next block for read stream object used in + * collect_corrupt_items() function. + */ +static BlockNumber +collect_corrupt_items_read_stream_next_block(ReadStream *stream, + void *callback_private_data, + void *per_buffer_data) +{ + struct collect_corrupt_items_read_stream_private *p = callback_private_data; + + for (; p->current_blocknum < p->last_exclusive; p->current_blocknum++) + { + bool check_frozen = false; + bool check_visible = false; + + /* Make sure we are interruptible. */ + CHECK_FOR_INTERRUPTS(); + + if (p->all_frozen && VM_ALL_FROZEN(p->rel, p->current_blocknum, &p->vmbuffer)) + check_frozen = true; + if (p->all_visible && VM_ALL_VISIBLE(p->rel, p->current_blocknum, &p->vmbuffer)) + check_visible = true; + if (!check_visible && !check_frozen) + continue; + + return p->current_blocknum++; + } + + return InvalidBlockNumber; +} + /* * Returns a list of items whose visibility map information does not match * the status of the tuples on the page. @@ -628,12 +693,13 @@ static corrupt_items * collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) { Relation rel; - BlockNumber nblocks; corrupt_items *items; - BlockNumber blkno; Buffer vmbuffer = InvalidBuffer; BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD); TransactionId OldestXmin = InvalidTransactionId; + struct collect_corrupt_items_read_stream_private p; + ReadStream *stream; + Buffer buffer; rel = relation_open(relid, AccessShareLock); @@ -643,8 +709,6 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) if (all_visible) OldestXmin = GetStrictOldestNonRemovableTransactionId(rel); - nblocks = RelationGetNumberOfBlocks(rel); - /* * Guess an initial array size. We don't expect many corrupted tuples, so * start with a small array. This function uses the "next" field to track @@ -658,34 +722,38 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) items->count = 64; items->tids = palloc(items->count * sizeof(ItemPointerData)); + p.current_blocknum = 0; + p.last_exclusive = RelationGetNumberOfBlocks(rel); + p.rel = rel; + p.vmbuffer = InvalidBuffer; + p.all_frozen = all_frozen; + p.all_visible = all_visible; + stream = read_stream_begin_relation(READ_STREAM_FULL, + bstrategy, + rel, + MAIN_FORKNUM, + collect_corrupt_items_read_stream_next_block, + &p, + 0); + /* Loop over every block in the relation. */ - for (blkno = 0; blkno < nblocks; ++blkno) + while ((buffer = read_stream_next_buffer(stream, NULL)) != InvalidBuffer) { - bool check_frozen = false; - bool check_visible = false; - Buffer buffer; + bool check_frozen = all_frozen; + bool check_visible = all_visible; Page page; OffsetNumber offnum, maxoff; + BlockNumber blkno; /* Make sure we are interruptible. */ CHECK_FOR_INTERRUPTS(); - /* Use the visibility map to decide whether to check this page. */ - if (all_frozen && VM_ALL_FROZEN(rel, blkno, &vmbuffer)) - check_frozen = true; - if (all_visible && VM_ALL_VISIBLE(rel, blkno, &vmbuffer)) - check_visible = true; - if (!check_visible && !check_frozen) - continue; - - /* Read and lock the page. */ - buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, - bstrategy); LockBuffer(buffer, BUFFER_LOCK_SHARE); page = BufferGetPage(buffer); maxoff = PageGetMaxOffsetNumber(page); + blkno = BufferGetBlockNumber(buffer); /* * The visibility map bits might have changed while we were acquiring @@ -778,10 +846,13 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) UnlockReleaseBuffer(buffer); } + read_stream_end(stream); /* Clean up. */ if (vmbuffer != InvalidBuffer) ReleaseBuffer(vmbuffer); + if (p.vmbuffer != InvalidBuffer) + ReleaseBuffer(p.vmbuffer); relation_close(rel, AccessShareLock); /* diff --git a/contrib/pg_visibility/t/002_corrupt_vm.pl b/contrib/pg_visibility/t/002_corrupt_vm.pl new file mode 100644 index 0000000000..a30d7c7486 --- /dev/null +++ b/contrib/pg_visibility/t/002_corrupt_vm.pl @@ -0,0 +1,109 @@ +# Copyright (c) 2021-2024, PostgreSQL Global Development Group + +# Check that pg_check_visible() and pg_check_frozen() report correct TIDs for +# corruption. +use strict; +use warnings FATAL => 'all'; +use File::Copy; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +my $node = PostgreSQL::Test::Cluster->new('main'); +$node->init; +# Anything holding a snapshot, including auto-analyze of pg_proc, could stop +# VACUUM from updating the visibility map. +$node->append_conf('postgresql.conf', 'autovacuum=off'); +$node->start; + +my $blck_size = $node->safe_psql("postgres", "SHOW block_size;"); + +# Create a sample table with at least 10 pages and then run VACUUM. 10 is +# selected manually as it is big enough to select 5 random tuples from the +# relation. +$node->safe_psql( + 'postgres', qq( + CREATE EXTENSION pg_visibility; + CREATE TABLE corruption_test + WITH (autovacuum_enabled = false) AS + SELECT + i, + repeat('a', 10) AS data + FROM + generate_series(1, $blck_size) i; + VACUUM (FREEZE, DISABLE_PAGE_SKIPPING) corruption_test; +)); + +# VACUUM is run, it is safe to get the number of pages. +my $npages = $node->safe_psql( + "postgres", + "SELECT relpages FROM pg_class + WHERE relname = 'corruption_test';" +); +ok($npages >= 10, 'table has at least 10 pages'); + +my $file = $node->safe_psql("postgres", + "SELECT pg_relation_filepath('corruption_test');"); + +# Delete the first block to make sure that it will be skipped as it is +# not visible nor frozen. +$node->safe_psql( + "postgres", + "DELETE FROM corruption_test + WHERE (ctid::text::point)[0] = 0;" +); + +# Copy visibility map. +$node->stop; +my $vm_file = $node->data_dir . '/' . $file . '_vm'; +copy("$vm_file", "${vm_file}_temp"); +$node->start; + +# Select 5 random tuples that are starting from the second block of the +# relation. The first block is skipped because it is deleted above. +my $tuples = $node->safe_psql( + "postgres", + "SELECT ctid FROM ( + SELECT ctid FROM corruption_test + WHERE (ctid::text::point)[0] != 0 + ORDER BY random() LIMIT 5) + ORDER BY ctid ASC;" +); + +# Do the changes below to use tuples in the query. +# "\n" -> "," +# "(" -> "'(" +# ")" -> ")'" +(my $tuples_query = $tuples) =~ s/\n/,/g; +$tuples_query =~ s/\(/\'\(/g; +$tuples_query =~ s/\)/\)\'/g; + +$node->safe_psql( + "postgres", + "DELETE FROM corruption_test + WHERE ctid in ($tuples_query);" +); + +# Overwrite visibility map with the old one. +$node->stop; +move("${vm_file}_temp", "$vm_file"); +$node->start; + +my $result = $node->safe_psql( + "postgres", + "SELECT DISTINCT t_ctid + FROM pg_check_visible('corruption_test') + ORDER BY t_ctid ASC;" +); +is($result, $tuples, 'pg_check_visible must report tuples as corrupted'); + +$result = $node->safe_psql( + "postgres", + "SELECT DISTINCT t_ctid + FROM pg_check_frozen('corruption_test') + ORDER BY t_ctid ASC;" +); +is($result, $tuples, 'pg_check_frozen must report tuples as corrupted'); + +$node->stop; +done_testing();