diff --git a/doc/src/sgml/brin.sgml b/doc/src/sgml/brin.sgml index 5140a38baa..ad11109775 100644 --- a/doc/src/sgml/brin.sgml +++ b/doc/src/sgml/brin.sgml @@ -80,6 +80,10 @@ or by automatic summarization executed by autovacuum, as insertions occur. (This last trigger is disabled by default and can be enabled with the autosummarize parameter.) + Conversely, a range can be de-summarized using the + brin_desummarize_range(regclass, bigint) range, + which is useful when the index tuple is no longer a very good + representation because the existing values have changed. diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 25c18d107c..19329dd103 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -19660,6 +19660,14 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup()); gin_clean_pending_list + + brin_summarize_range + + + + brin_desummarize_range + + shows the functions available for index maintenance tasks. @@ -19690,6 +19698,13 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup()); integer summarize the page range covering the given block, if not already summarized + + + brin_desummarize_range(index regclass, blockNumber bigint) + + integer + de-summarize the page range covering the given block, if summarized + gin_clean_pending_list(index regclass) diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index 86e73b6242..649f3488c2 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -908,6 +908,80 @@ brin_summarize_range(PG_FUNCTION_ARGS) PG_RETURN_INT32((int32) numSummarized); } +/* + * SQL-callable interface to mark a range as no longer summarized + */ +Datum +brin_desummarize_range(PG_FUNCTION_ARGS) +{ + Oid indexoid = PG_GETARG_OID(0); + int64 heapBlk64 = PG_GETARG_INT64(1); + BlockNumber heapBlk; + Oid heapoid; + Relation heapRel; + Relation indexRel; + bool done; + + if (heapBlk64 > MaxBlockNumber || heapBlk64 < 0) + { + char *blk = psprintf(INT64_FORMAT, heapBlk64); + + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("block number out of range: %s", blk))); + } + heapBlk = (BlockNumber) heapBlk64; + + /* + * We must lock table before index to avoid deadlocks. However, if the + * passed indexoid isn't an index then IndexGetRelation() will fail. + * Rather than emitting a not-very-helpful error message, postpone + * complaining, expecting that the is-it-an-index test below will fail. + */ + heapoid = IndexGetRelation(indexoid, true); + if (OidIsValid(heapoid)) + heapRel = heap_open(heapoid, ShareUpdateExclusiveLock); + else + heapRel = NULL; + + indexRel = index_open(indexoid, ShareUpdateExclusiveLock); + + /* Must be a BRIN index */ + if (indexRel->rd_rel->relkind != RELKIND_INDEX || + indexRel->rd_rel->relam != BRIN_AM_OID) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a BRIN index", + RelationGetRelationName(indexRel)))); + + /* User must own the index (comparable to privileges needed for VACUUM) */ + if (!pg_class_ownercheck(indexoid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS, + RelationGetRelationName(indexRel)); + + /* + * Since we did the IndexGetRelation call above without any lock, it's + * barely possible that a race against an index drop/recreation could have + * netted us the wrong table. Recheck. + */ + if (heapRel == NULL || heapoid != IndexGetRelation(indexoid, false)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_TABLE), + errmsg("could not open parent table of index %s", + RelationGetRelationName(indexRel)))); + + /* the revmap does the hard work */ + do { + done = brinRevmapDesummarizeRange(indexRel, heapBlk); + } + while (!done); + + relation_close(indexRel, ShareUpdateExclusiveLock); + relation_close(heapRel, ShareUpdateExclusiveLock); + + PG_RETURN_VOID(); +} + /* * Build a BrinDesc used to create or scan a BRIN index */ diff --git a/src/backend/access/brin/brin_revmap.c b/src/backend/access/brin/brin_revmap.c index 5d45b48fd9..35e53a2bac 100644 --- a/src/backend/access/brin/brin_revmap.c +++ b/src/backend/access/brin/brin_revmap.c @@ -168,9 +168,12 @@ brinSetHeapBlockItemptr(Buffer buf, BlockNumber pagesPerRange, iptr = (ItemPointerData *) contents->rm_tids; iptr += HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk); - ItemPointerSet(iptr, - ItemPointerGetBlockNumber(&tid), - ItemPointerGetOffsetNumber(&tid)); + if (ItemPointerIsValid(&tid)) + ItemPointerSet(iptr, + ItemPointerGetBlockNumber(&tid), + ItemPointerGetOffsetNumber(&tid)); + else + ItemPointerSetInvalid(iptr); } /* @@ -304,6 +307,137 @@ brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk, return NULL; } +/* + * Delete an index tuple, marking a page range as unsummarized. + * + * Index must be locked in ShareUpdateExclusiveLock mode. + * + * Return FALSE if caller should retry. + */ +bool +brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk) +{ + BrinRevmap *revmap; + BlockNumber pagesPerRange; + RevmapContents *contents; + ItemPointerData *iptr; + ItemPointerData invalidIptr; + BlockNumber revmapBlk; + Buffer revmapBuf; + Buffer regBuf; + Page revmapPg; + Page regPg; + OffsetNumber revmapOffset; + OffsetNumber regOffset; + ItemId lp; + BrinTuple *tup; + + revmap = brinRevmapInitialize(idxrel, &pagesPerRange, NULL); + + revmapBlk = revmap_get_blkno(revmap, heapBlk); + if (!BlockNumberIsValid(revmapBlk)) + { + /* revmap page doesn't exist: range not summarized, we're done */ + brinRevmapTerminate(revmap); + return true; + } + + /* Lock the revmap page, obtain the index tuple pointer from it */ + revmapBuf = brinLockRevmapPageForUpdate(revmap, heapBlk); + revmapPg = BufferGetPage(revmapBuf); + revmapOffset = HEAPBLK_TO_REVMAP_INDEX(revmap->rm_pagesPerRange, heapBlk); + + contents = (RevmapContents *) PageGetContents(revmapPg); + iptr = contents->rm_tids; + iptr += revmapOffset; + + if (!ItemPointerIsValid(iptr)) + { + /* no index tuple: range not summarized, we're done */ + LockBuffer(revmapBuf, BUFFER_LOCK_UNLOCK); + brinRevmapTerminate(revmap); + return true; + } + + regBuf = ReadBuffer(idxrel, ItemPointerGetBlockNumber(iptr)); + LockBuffer(regBuf, BUFFER_LOCK_EXCLUSIVE); + regPg = BufferGetPage(regBuf); + + /* if this is no longer a regular page, tell caller to start over */ + if (!BRIN_IS_REGULAR_PAGE(regPg)) + { + LockBuffer(revmapBuf, BUFFER_LOCK_UNLOCK); + LockBuffer(regBuf, BUFFER_LOCK_UNLOCK); + brinRevmapTerminate(revmap); + return false; + } + + regOffset = ItemPointerGetOffsetNumber(iptr); + if (regOffset > PageGetMaxOffsetNumber(regPg)) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("corrupted BRIN index: inconsistent range map"))); + + lp = PageGetItemId(regPg, regOffset); + if (!ItemIdIsUsed(lp)) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("corrupted BRIN index: inconsistent range map"))); + tup = (BrinTuple *) PageGetItem(regPg, lp); + /* XXX apply sanity checks? Might as well delete a bogus tuple ... */ + + /* + * We're only removing data, not reading it, so there's no need to + * TestForOldSnapshot here. + */ + + /* + * Because of SUE lock, this function shouldn't run concurrently with + * summarization. Placeholder tuples can only exist as leftovers from + * crashed summarization, so if we detect any, we complain but proceed. + */ + if (BrinTupleIsPlaceholder(tup)) + ereport(WARNING, + (errmsg("leftover placeholder tuple detected in BRIN index \"%s\", deleting", + RelationGetRelationName(idxrel)))); + + START_CRIT_SECTION(); + + ItemPointerSetInvalid(&invalidIptr); + brinSetHeapBlockItemptr(revmapBuf, revmap->rm_pagesPerRange, heapBlk, + invalidIptr); + PageIndexTupleDeleteNoCompact(regPg, regOffset); + /* XXX record free space in FSM? */ + + MarkBufferDirty(regBuf); + MarkBufferDirty(revmapBuf); + + if (RelationNeedsWAL(idxrel)) + { + xl_brin_desummarize xlrec; + XLogRecPtr recptr; + + xlrec.heapBlk = heapBlk; + xlrec.regOffset = regOffset; + + XLogBeginInsert(); + XLogRegisterData((char *) &xlrec, SizeOfBrinDesummarize); + XLogRegisterBuffer(0, revmapBuf, 0); + XLogRegisterBuffer(1, regBuf, REGBUF_STANDARD); + recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_DESUMMARIZE); + PageSetLSN(revmapPg, recptr); + PageSetLSN(regPg, recptr); + } + + END_CRIT_SECTION(); + + UnlockReleaseBuffer(regBuf); + LockBuffer(revmapBuf, BUFFER_LOCK_UNLOCK); + brinRevmapTerminate(revmap); + + return true; +} + /* * Given a heap block number, find the corresponding physical revmap block * number and return it. If the revmap page hasn't been allocated yet, return diff --git a/src/backend/access/brin/brin_xlog.c b/src/backend/access/brin/brin_xlog.c index f416bacc3f..8f5b5ceb3f 100644 --- a/src/backend/access/brin/brin_xlog.c +++ b/src/backend/access/brin/brin_xlog.c @@ -254,6 +254,46 @@ brin_xlog_revmap_extend(XLogReaderState *record) UnlockReleaseBuffer(metabuf); } +static void +brin_xlog_desummarize_page(XLogReaderState *record) +{ + XLogRecPtr lsn = record->EndRecPtr; + xl_brin_desummarize *xlrec; + Buffer buffer; + XLogRedoAction action; + + xlrec = (xl_brin_desummarize *) XLogRecGetData(record); + + /* Update the revmap */ + action = XLogReadBufferForRedo(record, 0, &buffer); + if (action == BLK_NEEDS_REDO) + { + ItemPointerData iptr; + + ItemPointerSetInvalid(&iptr); + brinSetHeapBlockItemptr(buffer, xlrec->pagesPerRange, xlrec->heapBlk, iptr); + + PageSetLSN(BufferGetPage(buffer), lsn); + MarkBufferDirty(buffer); + } + if (BufferIsValid(buffer)) + UnlockReleaseBuffer(buffer); + + /* remove the leftover entry from the regular page */ + action = XLogReadBufferForRedo(record, 1, &buffer); + if (action == BLK_NEEDS_REDO) + { + Page regPg = BufferGetPage(buffer); + + PageIndexTupleDeleteNoCompact(regPg, xlrec->regOffset); + + PageSetLSN(regPg, lsn); + MarkBufferDirty(buffer); + } + if (BufferIsValid(buffer)) + UnlockReleaseBuffer(buffer); +} + void brin_redo(XLogReaderState *record) { @@ -276,6 +316,9 @@ brin_redo(XLogReaderState *record) case XLOG_BRIN_REVMAP_EXTEND: brin_xlog_revmap_extend(record); break; + case XLOG_BRIN_DESUMMARIZE: + brin_xlog_desummarize_page(record); + break; default: elog(PANIC, "brin_redo: unknown op code %u", info); } diff --git a/src/backend/access/rmgrdesc/brindesc.c b/src/backend/access/rmgrdesc/brindesc.c index b58cb5bde9..8eb5275a8b 100644 --- a/src/backend/access/rmgrdesc/brindesc.c +++ b/src/backend/access/rmgrdesc/brindesc.c @@ -61,6 +61,13 @@ brin_desc(StringInfo buf, XLogReaderState *record) appendStringInfo(buf, "targetBlk %u", xlrec->targetBlk); } + else if (info == XLOG_BRIN_DESUMMARIZE) + { + xl_brin_desummarize *xlrec = (xl_brin_desummarize *) rec; + + appendStringInfo(buf, "pagesPerRange %u, heapBlk %u, page offset %u", + xlrec->pagesPerRange, xlrec->heapBlk, xlrec->regOffset); + } } const char * @@ -91,6 +98,9 @@ brin_identify(uint8 info) case XLOG_BRIN_REVMAP_EXTEND: id = "REVMAP_EXTEND"; break; + case XLOG_BRIN_DESUMMARIZE: + id = "DESUMMARIZE"; + break; } return id; diff --git a/src/include/access/brin_revmap.h b/src/include/access/brin_revmap.h index 2ec4169f6d..7fdcf877f4 100644 --- a/src/include/access/brin_revmap.h +++ b/src/include/access/brin_revmap.h @@ -36,5 +36,6 @@ extern void brinSetHeapBlockItemptr(Buffer rmbuf, BlockNumber pagesPerRange, extern BrinTuple *brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk, Buffer *buf, OffsetNumber *off, Size *size, int mode, Snapshot snapshot); +extern bool brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk); #endif /* BRIN_REVMAP_H */ diff --git a/src/include/access/brin_xlog.h b/src/include/access/brin_xlog.h index 33ceb34ea5..89ed334a01 100644 --- a/src/include/access/brin_xlog.h +++ b/src/include/access/brin_xlog.h @@ -33,7 +33,7 @@ #define XLOG_BRIN_UPDATE 0x20 #define XLOG_BRIN_SAMEPAGE_UPDATE 0x30 #define XLOG_BRIN_REVMAP_EXTEND 0x40 -#define XLOG_BRIN_REVMAP_VACUUM 0x50 +#define XLOG_BRIN_DESUMMARIZE 0x50 #define XLOG_BRIN_OPMASK 0x70 /* @@ -124,6 +124,24 @@ typedef struct xl_brin_revmap_extend #define SizeOfBrinRevmapExtend (offsetof(xl_brin_revmap_extend, targetBlk) + \ sizeof(BlockNumber)) +/* + * This is what we need to know about a range de-summarization + * + * Backup block 0: revmap page + * Backup block 1: regular page + */ +typedef struct xl_brin_desummarize +{ + BlockNumber pagesPerRange; + /* page number location to set to invalid */ + OffsetNumber heapBlk; + /* offset of item to delete in regular index page */ + OffsetNumber regOffset; +} xl_brin_desummarize; + +#define SizeOfBrinDesummarize (offsetof(xl_brin_desummarize, regOffset) + \ + sizeof(OffsetNumber)) + extern void brin_redo(XLogReaderState *record); extern void brin_desc(StringInfo buf, XLogReaderState *record); diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index fa3dcacd32..1db7a4d715 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201704011 +#define CATALOG_VERSION_NO 201704012 #endif diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 1b7ab2a997..711211d2e6 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -566,6 +566,8 @@ DATA(insert OID = 3952 ( brin_summarize_new_values PGNSP PGUID 12 1 0 0 0 f f f DESCR("brin: standalone scan new table pages"); DATA(insert OID = 3999 ( brin_summarize_range PGNSP PGUID 12 1 0 0 0 f f f f t f v s 2 0 23 "2205 20" _null_ _null_ _null_ _null_ _null_ brin_summarize_range _null_ _null_ _null_ )); DESCR("brin: standalone scan new table pages"); +DATA(insert OID = 4014 ( brin_desummarize_range PGNSP PGUID 12 1 0 0 0 f f f f t f v s 2 0 2278 "2205 20" _null_ _null_ _null_ _null_ _null_ brin_desummarize_range _null_ _null_ _null_ )); +DESCR("brin: desummarize page range"); DATA(insert OID = 338 ( amvalidate PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 16 "26" _null_ _null_ _null_ _null_ _null_ amvalidate _null_ _null_ _null_ )); DESCR("validate an operator class"); diff --git a/src/test/regress/expected/brin.out b/src/test/regress/expected/brin.out index 3b9c0db833..a40f87aea0 100644 --- a/src/test/regress/expected/brin.out +++ b/src/test/regress/expected/brin.out @@ -392,6 +392,12 @@ INSERT INTO brintest SELECT format('%s/%s%s', odd, even, tenthous)::pg_lsn, box(point(odd, even), point(thousand, twothousand)) FROM tenk1 ORDER BY unique2 LIMIT 5 OFFSET 5; +SELECT brin_desummarize_range('brinidx', 0); + brin_desummarize_range +------------------------ + +(1 row) + VACUUM brintest; -- force a summarization cycle in brinidx UPDATE brintest SET int8col = int8col * int4col; UPDATE brintest SET textcol = '' WHERE textcol IS NOT NULL; @@ -406,6 +412,27 @@ SELECT brin_summarize_new_values('brinidx'); -- ok, no change expected 0 (1 row) +-- Tests for brin_desummarize_range +SELECT brin_desummarize_range('brinidx', -1); -- error, invalid range +ERROR: block number out of range: -1 +SELECT brin_desummarize_range('brinidx', 0); + brin_desummarize_range +------------------------ + +(1 row) + +SELECT brin_desummarize_range('brinidx', 0); + brin_desummarize_range +------------------------ + +(1 row) + +SELECT brin_desummarize_range('brinidx', 100000000); + brin_desummarize_range +------------------------ + +(1 row) + -- Test brin_summarize_range CREATE TABLE brin_summarize ( value int diff --git a/src/test/regress/sql/brin.sql b/src/test/regress/sql/brin.sql index da73df3659..521b22fe56 100644 --- a/src/test/regress/sql/brin.sql +++ b/src/test/regress/sql/brin.sql @@ -400,6 +400,7 @@ INSERT INTO brintest SELECT box(point(odd, even), point(thousand, twothousand)) FROM tenk1 ORDER BY unique2 LIMIT 5 OFFSET 5; +SELECT brin_desummarize_range('brinidx', 0); VACUUM brintest; -- force a summarization cycle in brinidx UPDATE brintest SET int8col = int8col * int4col; @@ -410,6 +411,12 @@ SELECT brin_summarize_new_values('brintest'); -- error, not an index SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index SELECT brin_summarize_new_values('brinidx'); -- ok, no change expected +-- Tests for brin_desummarize_range +SELECT brin_desummarize_range('brinidx', -1); -- error, invalid range +SELECT brin_desummarize_range('brinidx', 0); +SELECT brin_desummarize_range('brinidx', 0); +SELECT brin_desummarize_range('brinidx', 100000000); + -- Test brin_summarize_range CREATE TABLE brin_summarize ( value int