diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 69a81f3246..410d02a394 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -694,6 +694,7 @@ CLOGShmemInit(void) SimpleLruInit(XactCtl, "Xact", CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE, XactSLRULock, "pg_xact", LWTRANCHE_XACT_BUFFER, SYNC_HANDLER_CLOG); + SlruPagePrecedesUnitTests(XactCtl, CLOG_XACTS_PER_PAGE); } /* @@ -912,13 +913,22 @@ TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid) /* - * Decide which of two CLOG page numbers is "older" for truncation purposes. + * Decide whether a CLOG page number is "older" for truncation purposes. * * We need to use comparison of TransactionIds here in order to do the right - * thing with wraparound XID arithmetic. However, if we are asked about - * page number zero, we don't want to hand InvalidTransactionId to - * TransactionIdPrecedes: it'll get weird about permanent xact IDs. So, - * offset both xids by FirstNormalTransactionId to avoid that. + * thing with wraparound XID arithmetic. However, TransactionIdPrecedes() + * would get weird about permanent xact IDs. So, offset both such that xid1, + * xid2, and xid2 + CLOG_XACTS_PER_PAGE - 1 are all normal XIDs; this offset + * is relevant to page 0 and to the page preceding page 0. + * + * The page containing oldestXact-2^31 is the important edge case. The + * portion of that page equaling or following oldestXact-2^31 is expendable, + * but the portion preceding oldestXact-2^31 is not. When oldestXact-2^31 is + * the first XID of a page and segment, the entire page and segment is + * expendable, and we could truncate the segment. Recognizing that case would + * require making oldestXact, not just the page containing oldestXact, + * available to this callback. The benefit would be rare and small, so we + * don't optimize that edge case. */ static bool CLOGPagePrecedes(int page1, int page2) @@ -927,11 +937,12 @@ CLOGPagePrecedes(int page1, int page2) TransactionId xid2; xid1 = ((TransactionId) page1) * CLOG_XACTS_PER_PAGE; - xid1 += FirstNormalTransactionId; + xid1 += FirstNormalTransactionId + 1; xid2 = ((TransactionId) page2) * CLOG_XACTS_PER_PAGE; - xid2 += FirstNormalTransactionId; + xid2 += FirstNormalTransactionId + 1; - return TransactionIdPrecedes(xid1, xid2); + return (TransactionIdPrecedes(xid1, xid2) && + TransactionIdPrecedes(xid1, xid2 + CLOG_XACTS_PER_PAGE - 1)); } diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c index b786eeff7a..9f42461e12 100644 --- a/src/backend/access/transam/commit_ts.c +++ b/src/backend/access/transam/commit_ts.c @@ -557,6 +557,7 @@ CommitTsShmemInit(void) CommitTsSLRULock, "pg_commit_ts", LWTRANCHE_COMMITTS_BUFFER, SYNC_HANDLER_COMMIT_TS); + SlruPagePrecedesUnitTests(CommitTsCtl, COMMIT_TS_XACTS_PER_PAGE); commitTsShared = ShmemInitStruct("CommitTs shared", sizeof(CommitTimestampShared), @@ -927,14 +928,27 @@ AdvanceOldestCommitTsXid(TransactionId oldestXact) /* - * Decide which of two commitTS page numbers is "older" for truncation - * purposes. + * Decide whether a commitTS page number is "older" for truncation purposes. + * Analogous to CLOGPagePrecedes(). * - * We need to use comparison of TransactionIds here in order to do the right - * thing with wraparound XID arithmetic. However, if we are asked about - * page number zero, we don't want to hand InvalidTransactionId to - * TransactionIdPrecedes: it'll get weird about permanent xact IDs. So, - * offset both xids by FirstNormalTransactionId to avoid that. + * At default BLCKSZ, (1 << 31) % COMMIT_TS_XACTS_PER_PAGE == 128. This + * introduces differences compared to CLOG and the other SLRUs having (1 << + * 31) % per_page == 0. This function never tests exactly + * TransactionIdPrecedes(x-2^31, x). When the system reaches xidStopLimit, + * there are two possible counts of page boundaries between oldestXact and the + * latest XID assigned, depending on whether oldestXact is within the first + * 128 entries of its page. Since this function doesn't know the location of + * oldestXact within page2, it returns false for one page that actually is + * expendable. This is a wider (yet still negligible) version of the + * truncation opportunity that CLOGPagePrecedes() cannot recognize. + * + * For the sake of a worked example, number entries with decimal values such + * that page1==1 entries range from 1.0 to 1.999. Let N+0.15 be the number of + * pages that 2^31 entries will span (N is an integer). If oldestXact=N+2.1, + * then the final safe XID assignment leaves newestXact=1.95. We keep page 2, + * because entry=2.85 is the border that toggles whether entries precede the + * last entry of the oldestXact page. While page 2 is expendable at + * oldestXact=N+2.1, it would be precious at oldestXact=N+2.9. */ static bool CommitTsPagePrecedes(int page1, int page2) @@ -943,11 +957,12 @@ CommitTsPagePrecedes(int page1, int page2) TransactionId xid2; xid1 = ((TransactionId) page1) * COMMIT_TS_XACTS_PER_PAGE; - xid1 += FirstNormalTransactionId; + xid1 += FirstNormalTransactionId + 1; xid2 = ((TransactionId) page2) * COMMIT_TS_XACTS_PER_PAGE; - xid2 += FirstNormalTransactionId; + xid2 += FirstNormalTransactionId + 1; - return TransactionIdPrecedes(xid1, xid2); + return (TransactionIdPrecedes(xid1, xid2) && + TransactionIdPrecedes(xid1, xid2 + COMMIT_TS_XACTS_PER_PAGE - 1)); } diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 1233448481..7dcfa02323 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -1852,11 +1852,13 @@ MultiXactShmemInit(void) MultiXactOffsetSLRULock, "pg_multixact/offsets", LWTRANCHE_MULTIXACTOFFSET_BUFFER, SYNC_HANDLER_MULTIXACT_OFFSET); + SlruPagePrecedesUnitTests(MultiXactOffsetCtl, MULTIXACT_OFFSETS_PER_PAGE); SimpleLruInit(MultiXactMemberCtl, "MultiXactMember", NUM_MULTIXACTMEMBER_BUFFERS, 0, MultiXactMemberSLRULock, "pg_multixact/members", LWTRANCHE_MULTIXACTMEMBER_BUFFER, SYNC_HANDLER_MULTIXACT_MEMBER); + /* doesn't call SimpleLruTruncate() or meet criteria for unit tests */ /* Initialize our shared state struct */ MultiXactState = ShmemInitStruct("Shared MultiXact State", @@ -2982,6 +2984,14 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB) * truncate the members SLRU. So we first scan the directory to determine * the earliest offsets page number that we can read without error. * + * When nextMXact is less than one segment away from multiWrapLimit, + * SlruScanDirCbFindEarliest can find some early segment other than the + * actual earliest. (MultiXactOffsetPagePrecedes(EARLIEST, LATEST) + * returns false, because not all pairs of entries have the same answer.) + * That can also arise when an earlier truncation attempt failed unlink() + * or returned early from this function. The only consequence is + * returning early, which wastes space that we could have liberated. + * * NB: It's also possible that the page that oldestMulti is on has already * been truncated away, and we crashed before updating oldestMulti. */ @@ -3096,15 +3106,11 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB) } /* - * Decide which of two MultiXactOffset page numbers is "older" for truncation - * purposes. + * Decide whether a MultiXactOffset page number is "older" for truncation + * purposes. Analogous to CLOGPagePrecedes(). * - * We need to use comparison of MultiXactId here in order to do the right - * thing with wraparound. However, if we are asked about page number zero, we - * don't want to hand InvalidMultiXactId to MultiXactIdPrecedes: it'll get - * weird. So, offset both multis by FirstMultiXactId to avoid that. - * (Actually, the current implementation doesn't do anything weird with - * InvalidMultiXactId, but there's no harm in leaving this code like this.) + * Offsetting the values is optional, because MultiXactIdPrecedes() has + * translational symmetry. */ static bool MultiXactOffsetPagePrecedes(int page1, int page2) @@ -3113,15 +3119,17 @@ MultiXactOffsetPagePrecedes(int page1, int page2) MultiXactId multi2; multi1 = ((MultiXactId) page1) * MULTIXACT_OFFSETS_PER_PAGE; - multi1 += FirstMultiXactId; + multi1 += FirstMultiXactId + 1; multi2 = ((MultiXactId) page2) * MULTIXACT_OFFSETS_PER_PAGE; - multi2 += FirstMultiXactId; + multi2 += FirstMultiXactId + 1; - return MultiXactIdPrecedes(multi1, multi2); + return (MultiXactIdPrecedes(multi1, multi2) && + MultiXactIdPrecedes(multi1, + multi2 + MULTIXACT_OFFSETS_PER_PAGE - 1)); } /* - * Decide which of two MultiXactMember page numbers is "older" for truncation + * Decide whether a MultiXactMember page number is "older" for truncation * purposes. There is no "invalid offset number" so use the numbers verbatim. */ static bool @@ -3133,7 +3141,9 @@ MultiXactMemberPagePrecedes(int page1, int page2) offset1 = ((MultiXactOffset) page1) * MULTIXACT_MEMBERS_PER_PAGE; offset2 = ((MultiXactOffset) page2) * MULTIXACT_MEMBERS_PER_PAGE; - return MultiXactOffsetPrecedes(offset1, offset2); + return (MultiXactOffsetPrecedes(offset1, offset2) && + MultiXactOffsetPrecedes(offset1, + offset2 + MULTIXACT_MEMBERS_PER_PAGE - 1)); } /* diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index 244e518e46..e49e06e896 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -1230,11 +1230,6 @@ SimpleLruTruncate(SlruCtl ctl, int cutoffPage) /* update the stats counter of truncates */ pgstat_count_slru_truncate(shared->slru_stats_idx); - /* - * The cutoff point is the start of the segment containing cutoffPage. - */ - cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT; - /* * Scan shared memory and remove any pages preceding the cutoff page, to * ensure we won't rewrite them later. (Since this is normally called in @@ -1247,9 +1242,7 @@ restart:; /* * While we are holding the lock, make an important safety check: the - * planned cutoff point must be <= the current endpoint page. Otherwise we - * have already wrapped around, and proceeding with the truncation would - * risk removing the current segment. + * current endpoint page must not be eligible for removal. */ if (ctl->PagePrecedes(shared->latest_page_number, cutoffPage)) { @@ -1281,8 +1274,11 @@ restart:; * Hmm, we have (or may have) I/O operations acting on the page, so * we've got to wait for them to finish and then start again. This is * the same logic as in SlruSelectLRUPage. (XXX if page is dirty, - * wouldn't it be OK to just discard it without writing it? For now, - * keep the logic the same as it was.) + * wouldn't it be OK to just discard it without writing it? + * SlruMayDeleteSegment() uses a stricter qualification, so we might + * not delete this page in the end; even if we don't delete it, we + * won't have cause to read its data again. For now, keep the logic + * the same as it was.) */ if (shared->page_status[slotno] == SLRU_PAGE_VALID) SlruInternalWritePage(ctl, slotno, NULL); @@ -1377,19 +1373,134 @@ restart: LWLockRelease(shared->ControlLock); } +/* + * Determine whether a segment is okay to delete. + * + * segpage is the first page of the segment, and cutoffPage is the oldest (in + * PagePrecedes order) page in the SLRU containing still-useful data. Since + * every core PagePrecedes callback implements "wrap around", check the + * segment's first and last pages: + * + * first=cutoff: no; cutoff falls inside this segment + * first>=cutoff && last=cutoff && last>=cutoff: no; every page of this segment is too young + */ +static bool +SlruMayDeleteSegment(SlruCtl ctl, int segpage, int cutoffPage) +{ + int seg_last_page = segpage + SLRU_PAGES_PER_SEGMENT - 1; + + Assert(segpage % SLRU_PAGES_PER_SEGMENT == 0); + + return (ctl->PagePrecedes(segpage, cutoffPage) && + ctl->PagePrecedes(seg_last_page, cutoffPage)); +} + +#ifdef USE_ASSERT_CHECKING +static void +SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset) +{ + TransactionId lhs, + rhs; + int newestPage, + oldestPage; + TransactionId newestXact, + oldestXact; + + /* + * Compare an XID pair having undefined order (see RFC 1982), a pair at + * "opposite ends" of the XID space. TransactionIdPrecedes() treats each + * as preceding the other. If RHS is oldestXact, LHS is the first XID we + * must not assign. + */ + lhs = per_page + offset; /* skip first page to avoid non-normal XIDs */ + rhs = lhs + (1U << 31); + Assert(TransactionIdPrecedes(lhs, rhs)); + Assert(TransactionIdPrecedes(rhs, lhs)); + Assert(!TransactionIdPrecedes(lhs - 1, rhs)); + Assert(TransactionIdPrecedes(rhs, lhs - 1)); + Assert(TransactionIdPrecedes(lhs + 1, rhs)); + Assert(!TransactionIdPrecedes(rhs, lhs + 1)); + Assert(!TransactionIdFollowsOrEquals(lhs, rhs)); + Assert(!TransactionIdFollowsOrEquals(rhs, lhs)); + Assert(!ctl->PagePrecedes(lhs / per_page, lhs / per_page)); + Assert(!ctl->PagePrecedes(lhs / per_page, rhs / per_page)); + Assert(!ctl->PagePrecedes(rhs / per_page, lhs / per_page)); + Assert(!ctl->PagePrecedes((lhs - per_page) / per_page, rhs / per_page)); + Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 3 * per_page) / per_page)); + Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 2 * per_page) / per_page)); + Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 1 * per_page) / per_page) + || (1U << 31) % per_page != 0); /* See CommitTsPagePrecedes() */ + Assert(ctl->PagePrecedes((lhs + 1 * per_page) / per_page, rhs / per_page) + || (1U << 31) % per_page != 0); + Assert(ctl->PagePrecedes((lhs + 2 * per_page) / per_page, rhs / per_page)); + Assert(ctl->PagePrecedes((lhs + 3 * per_page) / per_page, rhs / per_page)); + Assert(!ctl->PagePrecedes(rhs / per_page, (lhs + per_page) / per_page)); + + /* + * GetNewTransactionId() has assigned the last XID it can safely use, and + * that XID is in the *LAST* page of the second segment. We must not + * delete that segment. + */ + newestPage = 2 * SLRU_PAGES_PER_SEGMENT - 1; + newestXact = newestPage * per_page + offset; + Assert(newestXact / per_page == newestPage); + oldestXact = newestXact + 1; + oldestXact -= 1U << 31; + oldestPage = oldestXact / per_page; + Assert(!SlruMayDeleteSegment(ctl, + (newestPage - + newestPage % SLRU_PAGES_PER_SEGMENT), + oldestPage)); + + /* + * GetNewTransactionId() has assigned the last XID it can safely use, and + * that XID is in the *FIRST* page of the second segment. We must not + * delete that segment. + */ + newestPage = SLRU_PAGES_PER_SEGMENT; + newestXact = newestPage * per_page + offset; + Assert(newestXact / per_page == newestPage); + oldestXact = newestXact + 1; + oldestXact -= 1U << 31; + oldestPage = oldestXact / per_page; + Assert(!SlruMayDeleteSegment(ctl, + (newestPage - + newestPage % SLRU_PAGES_PER_SEGMENT), + oldestPage)); +} + +/* + * Unit-test a PagePrecedes function. + * + * This assumes every uint32 >= FirstNormalTransactionId is a valid key. It + * assumes each value occupies a contiguous, fixed-size region of SLRU bytes. + * (MultiXactMemberCtl separates flags from XIDs. AsyncCtl has + * variable-length entries, no keys, and no random access. These unit tests + * do not apply to them.) + */ +void +SlruPagePrecedesUnitTests(SlruCtl ctl, int per_page) +{ + /* Test first, middle and last entries of a page. */ + SlruPagePrecedesTestOffset(ctl, per_page, 0); + SlruPagePrecedesTestOffset(ctl, per_page, per_page / 2); + SlruPagePrecedesTestOffset(ctl, per_page, per_page - 1); +} +#endif + /* * SlruScanDirectory callback - * This callback reports true if there's any segment prior to the one - * containing the page passed as "data". + * This callback reports true if there's any segment wholly prior to the + * one containing the page passed as "data". */ bool SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int segpage, void *data) { int cutoffPage = *(int *) data; - cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT; - - if (ctl->PagePrecedes(segpage, cutoffPage)) + if (SlruMayDeleteSegment(ctl, segpage, cutoffPage)) return true; /* found one; don't iterate any more */ return false; /* keep going */ @@ -1404,7 +1515,7 @@ SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data) { int cutoffPage = *(int *) data; - if (ctl->PagePrecedes(segpage, cutoffPage)) + if (SlruMayDeleteSegment(ctl, segpage, cutoffPage)) SlruInternalDeleteSegment(ctl, segpage / SLRU_PAGES_PER_SEGMENT); return false; /* keep going */ diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c index 8cdc9e0ab7..6a8e521f89 100644 --- a/src/backend/access/transam/subtrans.c +++ b/src/backend/access/transam/subtrans.c @@ -194,6 +194,7 @@ SUBTRANSShmemInit(void) SimpleLruInit(SubTransCtl, "Subtrans", NUM_SUBTRANS_BUFFERS, 0, SubtransSLRULock, "pg_subtrans", LWTRANCHE_SUBTRANS_BUFFER, SYNC_HANDLER_NONE); + SlruPagePrecedesUnitTests(SubTransCtl, SUBTRANS_XACTS_PER_PAGE); } /* @@ -354,13 +355,8 @@ TruncateSUBTRANS(TransactionId oldestXact) /* - * Decide which of two SUBTRANS page numbers is "older" for truncation purposes. - * - * We need to use comparison of TransactionIds here in order to do the right - * thing with wraparound XID arithmetic. However, if we are asked about - * page number zero, we don't want to hand InvalidTransactionId to - * TransactionIdPrecedes: it'll get weird about permanent xact IDs. So, - * offset both xids by FirstNormalTransactionId to avoid that. + * Decide whether a SUBTRANS page number is "older" for truncation purposes. + * Analogous to CLOGPagePrecedes(). */ static bool SubTransPagePrecedes(int page1, int page2) @@ -369,9 +365,10 @@ SubTransPagePrecedes(int page1, int page2) TransactionId xid2; xid1 = ((TransactionId) page1) * SUBTRANS_XACTS_PER_PAGE; - xid1 += FirstNormalTransactionId; + xid1 += FirstNormalTransactionId + 1; xid2 = ((TransactionId) page2) * SUBTRANS_XACTS_PER_PAGE; - xid2 += FirstNormalTransactionId; + xid2 += FirstNormalTransactionId + 1; - return TransactionIdPrecedes(xid1, xid2); + return (TransactionIdPrecedes(xid1, xid2) && + TransactionIdPrecedes(xid1, xid2 + SUBTRANS_XACTS_PER_PAGE - 1)); } diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c index 7c133ec8d3..42b232d98b 100644 --- a/src/backend/commands/async.c +++ b/src/backend/commands/async.c @@ -490,7 +490,12 @@ asyncQueuePageDiff(int p, int q) return diff; } -/* Is p < q, accounting for wraparound? */ +/* + * Is p < q, accounting for wraparound? + * + * Since asyncQueueIsFull() blocks creation of a page that could precede any + * extant page, we need not assess entries within a page. + */ static bool asyncQueuePagePrecedes(int p, int q) { @@ -1352,8 +1357,8 @@ asyncQueueIsFull(void) * logically precedes the current global tail pointer, ie, the head * pointer would wrap around compared to the tail. We cannot create such * a head page for fear of confusing slru.c. For safety we round the tail - * pointer back to a segment boundary (compare the truncation logic in - * asyncQueueAdvanceTail). + * pointer back to a segment boundary (truncation logic in + * asyncQueueAdvanceTail does not do this, so doing it here is optional). * * Note that this test is *not* dependent on how much space there is on * the current head page. This is necessary because asyncQueueAddEntries diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c index 1b262d645b..074df5b38c 100644 --- a/src/backend/storage/lmgr/predicate.c +++ b/src/backend/storage/lmgr/predicate.c @@ -438,7 +438,7 @@ static void SetPossibleUnsafeConflict(SERIALIZABLEXACT *roXact, SERIALIZABLEXACT static void ReleaseRWConflict(RWConflict conflict); static void FlagSxactUnsafe(SERIALIZABLEXACT *sxact); -static bool SerialPagePrecedesLogically(int p, int q); +static bool SerialPagePrecedesLogically(int page1, int page2); static void SerialInit(void); static void SerialAdd(TransactionId xid, SerCommitSeqNo minConflictCommitSeqNo); static SerCommitSeqNo SerialGetMinConflictCommitSeqNo(TransactionId xid); @@ -784,28 +784,80 @@ FlagSxactUnsafe(SERIALIZABLEXACT *sxact) /*------------------------------------------------------------------------*/ /* - * We will work on the page range of 0..SERIAL_MAX_PAGE. - * Compares using wraparound logic, as is required by slru.c. + * Decide whether a Serial page number is "older" for truncation purposes. + * Analogous to CLOGPagePrecedes(). */ static bool -SerialPagePrecedesLogically(int p, int q) +SerialPagePrecedesLogically(int page1, int page2) { - int diff; + TransactionId xid1; + TransactionId xid2; + + xid1 = ((TransactionId) page1) * SERIAL_ENTRIESPERPAGE; + xid1 += FirstNormalTransactionId + 1; + xid2 = ((TransactionId) page2) * SERIAL_ENTRIESPERPAGE; + xid2 += FirstNormalTransactionId + 1; + + return (TransactionIdPrecedes(xid1, xid2) && + TransactionIdPrecedes(xid1, xid2 + SERIAL_ENTRIESPERPAGE - 1)); +} + +#ifdef USE_ASSERT_CHECKING +static void +SerialPagePrecedesLogicallyUnitTests(void) +{ + int per_page = SERIAL_ENTRIESPERPAGE, + offset = per_page / 2; + int newestPage, + oldestPage, + headPage, + targetPage; + TransactionId newestXact, + oldestXact; + + /* GetNewTransactionId() has assigned the last XID it can safely use. */ + newestPage = 2 * SLRU_PAGES_PER_SEGMENT - 1; /* nothing special */ + newestXact = newestPage * per_page + offset; + Assert(newestXact / per_page == newestPage); + oldestXact = newestXact + 1; + oldestXact -= 1U << 31; + oldestPage = oldestXact / per_page; /* - * We have to compare modulo (SERIAL_MAX_PAGE+1)/2. Both inputs should be - * in the range 0..SERIAL_MAX_PAGE. + * In this scenario, the SLRU headPage pertains to the last ~1000 XIDs + * assigned. oldestXact finishes, ~2B XIDs having elapsed since it + * started. Further transactions cause us to summarize oldestXact to + * tailPage. Function must return false so SerialAdd() doesn't zero + * tailPage (which may contain entries for other old, recently-finished + * XIDs) and half the SLRU. Reaching this requires burning ~2B XIDs in + * single-user mode, a negligible possibility. */ - Assert(p >= 0 && p <= SERIAL_MAX_PAGE); - Assert(q >= 0 && q <= SERIAL_MAX_PAGE); + headPage = newestPage; + targetPage = oldestPage; + Assert(!SerialPagePrecedesLogically(headPage, targetPage)); - diff = p - q; - if (diff >= ((SERIAL_MAX_PAGE + 1) / 2)) - diff -= SERIAL_MAX_PAGE + 1; - else if (diff < -((int) (SERIAL_MAX_PAGE + 1) / 2)) - diff += SERIAL_MAX_PAGE + 1; - return diff < 0; + /* + * In this scenario, the SLRU headPage pertains to oldestXact. We're + * summarizing an XID near newestXact. (Assume few other XIDs used + * SERIALIZABLE, hence the minimal headPage advancement. Assume + * oldestXact was long-running and only recently reached the SLRU.) + * Function must return true to make SerialAdd() create targetPage. + * + * Today's implementation mishandles this case, but it doesn't matter + * enough to fix. Verify that the defect affects just one page by + * asserting correct treatment of its prior page. Reaching this case + * requires burning ~2B XIDs in single-user mode, a negligible + * possibility. Moreover, if it does happen, the consequence would be + * mild, namely a new transaction failing in SimpleLruReadPage(). + */ + headPage = oldestPage; + targetPage = newestPage; + Assert(SerialPagePrecedesLogically(headPage, targetPage - 1)); +#if 0 + Assert(SerialPagePrecedesLogically(headPage, targetPage)); +#endif } +#endif /* * Initialize for the tracking of old serializable committed xids. @@ -822,6 +874,10 @@ SerialInit(void) SimpleLruInit(SerialSlruCtl, "Serial", NUM_SERIAL_BUFFERS, 0, SerialSLRULock, "pg_serial", LWTRANCHE_SERIAL_BUFFER, SYNC_HANDLER_NONE); +#ifdef USE_ASSERT_CHECKING + SerialPagePrecedesLogicallyUnitTests(); +#endif + SlruPagePrecedesUnitTests(SerialSlruCtl, SERIAL_ENTRIESPERPAGE); /* * Create or attach to the SerialControl structure. @@ -1030,7 +1086,7 @@ CheckPointPredicate(void) } else { - /* + /*---------- * The SLRU is no longer needed. Truncate to head before we set head * invalid. * @@ -1039,6 +1095,25 @@ CheckPointPredicate(void) * that we leave behind will appear to be new again. In that case it * won't be removed until XID horizon advances enough to make it * current again. + * + * XXX: This should happen in vac_truncate_clog(), not in checkpoints. + * Consider this scenario, starting from a system with no in-progress + * transactions and VACUUM FREEZE having maximized oldestXact: + * - Start a SERIALIZABLE transaction. + * - Start, finish, and summarize a SERIALIZABLE transaction, creating + * one SLRU page. + * - Consume XIDs to reach xidStopLimit. + * - Finish all transactions. Due to the long-running SERIALIZABLE + * transaction, earlier checkpoints did not touch headPage. The + * next checkpoint will change it, but that checkpoint happens after + * the end of the scenario. + * - VACUUM to advance XID limits. + * - Consume ~2M XIDs, crossing the former xidWrapLimit. + * - Start, finish, and summarize a SERIALIZABLE transaction. + * SerialAdd() declines to create the targetPage, because headPage + * is not regarded as in the past relative to that targetPage. The + * transaction instigating the summarize fails in + * SimpleLruReadPage(). */ tailPage = serialControl->headPage; serialControl->headPage = -1; diff --git a/src/include/access/slru.h b/src/include/access/slru.h index 34964675d2..dd52e8cec7 100644 --- a/src/include/access/slru.h +++ b/src/include/access/slru.h @@ -118,9 +118,14 @@ typedef struct SlruCtlData SyncRequestHandler sync_handler; /* - * Decide which of two page numbers is "older" for truncation purposes. We - * need to use comparison of TransactionIds here in order to do the right - * thing with wraparound XID arithmetic. + * Decide whether a page is "older" for truncation and as a hint for + * evicting pages in LRU order. Return true if every entry of the first + * argument is older than every entry of the second argument. Note that + * !PagePrecedes(a,b) && !PagePrecedes(b,a) need not imply a==b; it also + * arises when some entries are older and some are not. For SLRUs using + * SimpleLruTruncate(), this must use modular arithmetic. (For others, + * the behavior of this callback has no functional implications.) Use + * SlruPagePrecedesUnitTests() in SLRUs meeting its criteria. */ bool (*PagePrecedes) (int, int); @@ -145,6 +150,11 @@ extern int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid); extern void SimpleLruWritePage(SlruCtl ctl, int slotno); extern void SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied); +#ifdef USE_ASSERT_CHECKING +extern void SlruPagePrecedesUnitTests(SlruCtl ctl, int per_page); +#else +#define SlruPagePrecedesUnitTests(ctl, per_page) do {} while (0) +#endif extern void SimpleLruTruncate(SlruCtl ctl, int cutoffPage); extern bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno);