diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 73a7f07588..a1408ae4cb 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -457,6 +457,7 @@ CLOGShmemInit(void) ClogCtl->PagePrecedes = CLOGPagePrecedes; SimpleLruInit(ClogCtl, "clog", CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE, CLogControlLock, "pg_clog", LWTRANCHE_CLOG_BUFFERS); + SlruPagePrecedesUnitTests(ClogCtl, CLOG_XACTS_PER_PAGE); } /* @@ -669,13 +670,22 @@ TruncateCLOG(TransactionId oldestXact) /* - * Decide which of two CLOG page numbers is "older" for truncation purposes. + * Decide whether a CLOG page number is "older" for truncation purposes. * * We need to use comparison of TransactionIds here in order to do the right - * thing with wraparound XID arithmetic. However, if we are asked about - * page number zero, we don't want to hand InvalidTransactionId to - * TransactionIdPrecedes: it'll get weird about permanent xact IDs. So, - * offset both xids by FirstNormalTransactionId to avoid that. + * thing with wraparound XID arithmetic. However, TransactionIdPrecedes() + * would get weird about permanent xact IDs. So, offset both such that xid1, + * xid2, and xid2 + CLOG_XACTS_PER_PAGE - 1 are all normal XIDs; this offset + * is relevant to page 0 and to the page preceding page 0. + * + * The page containing oldestXact-2^31 is the important edge case. The + * portion of that page equaling or following oldestXact-2^31 is expendable, + * but the portion preceding oldestXact-2^31 is not. When oldestXact-2^31 is + * the first XID of a page and segment, the entire page and segment is + * expendable, and we could truncate the segment. Recognizing that case would + * require making oldestXact, not just the page containing oldestXact, + * available to this callback. The benefit would be rare and small, so we + * don't optimize that edge case. */ static bool CLOGPagePrecedes(int page1, int page2) @@ -684,11 +694,12 @@ CLOGPagePrecedes(int page1, int page2) TransactionId xid2; xid1 = ((TransactionId) page1) * CLOG_XACTS_PER_PAGE; - xid1 += FirstNormalTransactionId; + xid1 += FirstNormalTransactionId + 1; xid2 = ((TransactionId) page2) * CLOG_XACTS_PER_PAGE; - xid2 += FirstNormalTransactionId; + xid2 += FirstNormalTransactionId + 1; - return TransactionIdPrecedes(xid1, xid2); + return (TransactionIdPrecedes(xid1, xid2) && + TransactionIdPrecedes(xid1, xid2 + CLOG_XACTS_PER_PAGE - 1)); } diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c index e460efd8fb..8eea32c6ef 100644 --- a/src/backend/access/transam/commit_ts.c +++ b/src/backend/access/transam/commit_ts.c @@ -494,6 +494,7 @@ CommitTsShmemInit(void) SimpleLruInit(CommitTsCtl, "commit_timestamp", CommitTsShmemBuffers(), 0, CommitTsControlLock, "pg_commit_ts", LWTRANCHE_COMMITTS_BUFFERS); + SlruPagePrecedesUnitTests(CommitTsCtl, COMMIT_TS_XACTS_PER_PAGE); commitTsShared = ShmemInitStruct("CommitTs shared", sizeof(CommitTimestampShared), @@ -870,13 +871,27 @@ AdvanceOldestCommitTsXid(TransactionId oldestXact) /* - * Decide which of two CLOG page numbers is "older" for truncation purposes. + * Decide whether a commitTS page number is "older" for truncation purposes. + * Analogous to CLOGPagePrecedes(). * - * We need to use comparison of TransactionIds here in order to do the right - * thing with wraparound XID arithmetic. However, if we are asked about - * page number zero, we don't want to hand InvalidTransactionId to - * TransactionIdPrecedes: it'll get weird about permanent xact IDs. So, - * offset both xids by FirstNormalTransactionId to avoid that. + * At default BLCKSZ, (1 << 31) % COMMIT_TS_XACTS_PER_PAGE == 128. This + * introduces differences compared to CLOG and the other SLRUs having (1 << + * 31) % per_page == 0. This function never tests exactly + * TransactionIdPrecedes(x-2^31, x). When the system reaches xidStopLimit, + * there are two possible counts of page boundaries between oldestXact and the + * latest XID assigned, depending on whether oldestXact is within the first + * 128 entries of its page. Since this function doesn't know the location of + * oldestXact within page2, it returns false for one page that actually is + * expendable. This is a wider (yet still negligible) version of the + * truncation opportunity that CLOGPagePrecedes() cannot recognize. + * + * For the sake of a worked example, number entries with decimal values such + * that page1==1 entries range from 1.0 to 1.999. Let N+0.15 be the number of + * pages that 2^31 entries will span (N is an integer). If oldestXact=N+2.1, + * then the final safe XID assignment leaves newestXact=1.95. We keep page 2, + * because entry=2.85 is the border that toggles whether entries precede the + * last entry of the oldestXact page. While page 2 is expendable at + * oldestXact=N+2.1, it would be precious at oldestXact=N+2.9. */ static bool CommitTsPagePrecedes(int page1, int page2) @@ -885,11 +900,12 @@ CommitTsPagePrecedes(int page1, int page2) TransactionId xid2; xid1 = ((TransactionId) page1) * COMMIT_TS_XACTS_PER_PAGE; - xid1 += FirstNormalTransactionId; + xid1 += FirstNormalTransactionId + 1; xid2 = ((TransactionId) page2) * COMMIT_TS_XACTS_PER_PAGE; - xid2 += FirstNormalTransactionId; + xid2 += FirstNormalTransactionId + 1; - return TransactionIdPrecedes(xid1, xid2); + return (TransactionIdPrecedes(xid1, xid2) && + TransactionIdPrecedes(xid1, xid2 + COMMIT_TS_XACTS_PER_PAGE - 1)); } diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 6c9cc1c538..7da993eac0 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -1830,10 +1830,12 @@ MultiXactShmemInit(void) "multixact_offset", NUM_MXACTOFFSET_BUFFERS, 0, MultiXactOffsetControlLock, "pg_multixact/offsets", LWTRANCHE_MXACTOFFSET_BUFFERS); + SlruPagePrecedesUnitTests(MultiXactOffsetCtl, MULTIXACT_OFFSETS_PER_PAGE); SimpleLruInit(MultiXactMemberCtl, "multixact_member", NUM_MXACTMEMBER_BUFFERS, 0, MultiXactMemberControlLock, "pg_multixact/members", LWTRANCHE_MXACTMEMBER_BUFFERS); + /* doesn't call SimpleLruTruncate() or meet criteria for unit tests */ /* Initialize our shared state struct */ MultiXactState = ShmemInitStruct("Shared MultiXact State", @@ -2971,6 +2973,14 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB) * truncate the members SLRU. So we first scan the directory to determine * the earliest offsets page number that we can read without error. * + * When nextMXact is less than one segment away from multiWrapLimit, + * SlruScanDirCbFindEarliest can find some early segment other than the + * actual earliest. (MultiXactOffsetPagePrecedes(EARLIEST, LATEST) + * returns false, because not all pairs of entries have the same answer.) + * That can also arise when an earlier truncation attempt failed unlink() + * or returned early from this function. The only consequence is + * returning early, which wastes space that we could have liberated. + * * NB: It's also possible that the page that oldestMulti is on has already * been truncated away, and we crashed before updating oldestMulti. */ @@ -3085,15 +3095,11 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB) } /* - * Decide which of two MultiXactOffset page numbers is "older" for truncation - * purposes. + * Decide whether a MultiXactOffset page number is "older" for truncation + * purposes. Analogous to CLOGPagePrecedes(). * - * We need to use comparison of MultiXactId here in order to do the right - * thing with wraparound. However, if we are asked about page number zero, we - * don't want to hand InvalidMultiXactId to MultiXactIdPrecedes: it'll get - * weird. So, offset both multis by FirstMultiXactId to avoid that. - * (Actually, the current implementation doesn't do anything weird with - * InvalidMultiXactId, but there's no harm in leaving this code like this.) + * Offsetting the values is optional, because MultiXactIdPrecedes() has + * translational symmetry. */ static bool MultiXactOffsetPagePrecedes(int page1, int page2) @@ -3102,15 +3108,17 @@ MultiXactOffsetPagePrecedes(int page1, int page2) MultiXactId multi2; multi1 = ((MultiXactId) page1) * MULTIXACT_OFFSETS_PER_PAGE; - multi1 += FirstMultiXactId; + multi1 += FirstMultiXactId + 1; multi2 = ((MultiXactId) page2) * MULTIXACT_OFFSETS_PER_PAGE; - multi2 += FirstMultiXactId; + multi2 += FirstMultiXactId + 1; - return MultiXactIdPrecedes(multi1, multi2); + return (MultiXactIdPrecedes(multi1, multi2) && + MultiXactIdPrecedes(multi1, + multi2 + MULTIXACT_OFFSETS_PER_PAGE - 1)); } /* - * Decide which of two MultiXactMember page numbers is "older" for truncation + * Decide whether a MultiXactMember page number is "older" for truncation * purposes. There is no "invalid offset number" so use the numbers verbatim. */ static bool @@ -3122,7 +3130,9 @@ MultiXactMemberPagePrecedes(int page1, int page2) offset1 = ((MultiXactOffset) page1) * MULTIXACT_MEMBERS_PER_PAGE; offset2 = ((MultiXactOffset) page2) * MULTIXACT_MEMBERS_PER_PAGE; - return MultiXactOffsetPrecedes(offset1, offset2); + return (MultiXactOffsetPrecedes(offset1, offset2) && + MultiXactOffsetPrecedes(offset1, + offset2 + MULTIXACT_MEMBERS_PER_PAGE - 1)); } /* diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index 68b4fb7172..66b55b8a2f 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -1173,11 +1173,6 @@ SimpleLruTruncate(SlruCtl ctl, int cutoffPage) SlruShared shared = ctl->shared; int slotno; - /* - * The cutoff point is the start of the segment containing cutoffPage. - */ - cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT; - /* * Scan shared memory and remove any pages preceding the cutoff page, to * ensure we won't rewrite them later. (Since this is normally called in @@ -1190,9 +1185,7 @@ restart:; /* * While we are holding the lock, make an important safety check: the - * planned cutoff point must be <= the current endpoint page. Otherwise we - * have already wrapped around, and proceeding with the truncation would - * risk removing the current segment. + * current endpoint page must not be eligible for removal. */ if (ctl->PagePrecedes(shared->latest_page_number, cutoffPage)) { @@ -1224,8 +1217,11 @@ restart:; * Hmm, we have (or may have) I/O operations acting on the page, so * we've got to wait for them to finish and then start again. This is * the same logic as in SlruSelectLRUPage. (XXX if page is dirty, - * wouldn't it be OK to just discard it without writing it? For now, - * keep the logic the same as it was.) + * wouldn't it be OK to just discard it without writing it? + * SlruMayDeleteSegment() uses a stricter qualification, so we might + * not delete this page in the end; even if we don't delete it, we + * won't have cause to read its data again. For now, keep the logic + * the same as it was.) */ if (shared->page_status[slotno] == SLRU_PAGE_VALID) SlruInternalWritePage(ctl, slotno, NULL); @@ -1315,19 +1311,134 @@ restart: LWLockRelease(shared->ControlLock); } +/* + * Determine whether a segment is okay to delete. + * + * segpage is the first page of the segment, and cutoffPage is the oldest (in + * PagePrecedes order) page in the SLRU containing still-useful data. Since + * every core PagePrecedes callback implements "wrap around", check the + * segment's first and last pages: + * + * first=cutoff: no; cutoff falls inside this segment + * first>=cutoff && last=cutoff && last>=cutoff: no; every page of this segment is too young + */ +static bool +SlruMayDeleteSegment(SlruCtl ctl, int segpage, int cutoffPage) +{ + int seg_last_page = segpage + SLRU_PAGES_PER_SEGMENT - 1; + + Assert(segpage % SLRU_PAGES_PER_SEGMENT == 0); + + return (ctl->PagePrecedes(segpage, cutoffPage) && + ctl->PagePrecedes(seg_last_page, cutoffPage)); +} + +#ifdef USE_ASSERT_CHECKING +static void +SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset) +{ + TransactionId lhs, + rhs; + int newestPage, + oldestPage; + TransactionId newestXact, + oldestXact; + + /* + * Compare an XID pair having undefined order (see RFC 1982), a pair at + * "opposite ends" of the XID space. TransactionIdPrecedes() treats each + * as preceding the other. If RHS is oldestXact, LHS is the first XID we + * must not assign. + */ + lhs = per_page + offset; /* skip first page to avoid non-normal XIDs */ + rhs = lhs + (1U << 31); + Assert(TransactionIdPrecedes(lhs, rhs)); + Assert(TransactionIdPrecedes(rhs, lhs)); + Assert(!TransactionIdPrecedes(lhs - 1, rhs)); + Assert(TransactionIdPrecedes(rhs, lhs - 1)); + Assert(TransactionIdPrecedes(lhs + 1, rhs)); + Assert(!TransactionIdPrecedes(rhs, lhs + 1)); + Assert(!TransactionIdFollowsOrEquals(lhs, rhs)); + Assert(!TransactionIdFollowsOrEquals(rhs, lhs)); + Assert(!ctl->PagePrecedes(lhs / per_page, lhs / per_page)); + Assert(!ctl->PagePrecedes(lhs / per_page, rhs / per_page)); + Assert(!ctl->PagePrecedes(rhs / per_page, lhs / per_page)); + Assert(!ctl->PagePrecedes((lhs - per_page) / per_page, rhs / per_page)); + Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 3 * per_page) / per_page)); + Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 2 * per_page) / per_page)); + Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 1 * per_page) / per_page) + || (1U << 31) % per_page != 0); /* See CommitTsPagePrecedes() */ + Assert(ctl->PagePrecedes((lhs + 1 * per_page) / per_page, rhs / per_page) + || (1U << 31) % per_page != 0); + Assert(ctl->PagePrecedes((lhs + 2 * per_page) / per_page, rhs / per_page)); + Assert(ctl->PagePrecedes((lhs + 3 * per_page) / per_page, rhs / per_page)); + Assert(!ctl->PagePrecedes(rhs / per_page, (lhs + per_page) / per_page)); + + /* + * GetNewTransactionId() has assigned the last XID it can safely use, and + * that XID is in the *LAST* page of the second segment. We must not + * delete that segment. + */ + newestPage = 2 * SLRU_PAGES_PER_SEGMENT - 1; + newestXact = newestPage * per_page + offset; + Assert(newestXact / per_page == newestPage); + oldestXact = newestXact + 1; + oldestXact -= 1U << 31; + oldestPage = oldestXact / per_page; + Assert(!SlruMayDeleteSegment(ctl, + (newestPage - + newestPage % SLRU_PAGES_PER_SEGMENT), + oldestPage)); + + /* + * GetNewTransactionId() has assigned the last XID it can safely use, and + * that XID is in the *FIRST* page of the second segment. We must not + * delete that segment. + */ + newestPage = SLRU_PAGES_PER_SEGMENT; + newestXact = newestPage * per_page + offset; + Assert(newestXact / per_page == newestPage); + oldestXact = newestXact + 1; + oldestXact -= 1U << 31; + oldestPage = oldestXact / per_page; + Assert(!SlruMayDeleteSegment(ctl, + (newestPage - + newestPage % SLRU_PAGES_PER_SEGMENT), + oldestPage)); +} + +/* + * Unit-test a PagePrecedes function. + * + * This assumes every uint32 >= FirstNormalTransactionId is a valid key. It + * assumes each value occupies a contiguous, fixed-size region of SLRU bytes. + * (MultiXactMemberCtl separates flags from XIDs. AsyncCtl has + * variable-length entries, no keys, and no random access. These unit tests + * do not apply to them.) + */ +void +SlruPagePrecedesUnitTests(SlruCtl ctl, int per_page) +{ + /* Test first, middle and last entries of a page. */ + SlruPagePrecedesTestOffset(ctl, per_page, 0); + SlruPagePrecedesTestOffset(ctl, per_page, per_page / 2); + SlruPagePrecedesTestOffset(ctl, per_page, per_page - 1); +} +#endif + /* * SlruScanDirectory callback - * This callback reports true if there's any segment prior to the one - * containing the page passed as "data". + * This callback reports true if there's any segment wholly prior to the + * one containing the page passed as "data". */ bool SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int segpage, void *data) { int cutoffPage = *(int *) data; - cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT; - - if (ctl->PagePrecedes(segpage, cutoffPage)) + if (SlruMayDeleteSegment(ctl, segpage, cutoffPage)) return true; /* found one; don't iterate any more */ return false; /* keep going */ @@ -1342,7 +1453,7 @@ SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data) { int cutoffPage = *(int *) data; - if (ctl->PagePrecedes(segpage, cutoffPage)) + if (SlruMayDeleteSegment(ctl, segpage, cutoffPage)) SlruInternalDeleteSegment(ctl, filename); return false; /* keep going */ diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c index f040915804..e8376939cd 100644 --- a/src/backend/access/transam/subtrans.c +++ b/src/backend/access/transam/subtrans.c @@ -184,6 +184,7 @@ SUBTRANSShmemInit(void) LWTRANCHE_SUBTRANS_BUFFERS); /* Override default assumption that writes should be fsync'd */ SubTransCtl->do_fsync = false; + SlruPagePrecedesUnitTests(SubTransCtl, SUBTRANS_XACTS_PER_PAGE); } /* @@ -359,13 +360,8 @@ TruncateSUBTRANS(TransactionId oldestXact) /* - * Decide which of two SUBTRANS page numbers is "older" for truncation purposes. - * - * We need to use comparison of TransactionIds here in order to do the right - * thing with wraparound XID arithmetic. However, if we are asked about - * page number zero, we don't want to hand InvalidTransactionId to - * TransactionIdPrecedes: it'll get weird about permanent xact IDs. So, - * offset both xids by FirstNormalTransactionId to avoid that. + * Decide whether a SUBTRANS page number is "older" for truncation purposes. + * Analogous to CLOGPagePrecedes(). */ static bool SubTransPagePrecedes(int page1, int page2) @@ -374,9 +370,10 @@ SubTransPagePrecedes(int page1, int page2) TransactionId xid2; xid1 = ((TransactionId) page1) * SUBTRANS_XACTS_PER_PAGE; - xid1 += FirstNormalTransactionId; + xid1 += FirstNormalTransactionId + 1; xid2 = ((TransactionId) page2) * SUBTRANS_XACTS_PER_PAGE; - xid2 += FirstNormalTransactionId; + xid2 += FirstNormalTransactionId + 1; - return TransactionIdPrecedes(xid1, xid2); + return (TransactionIdPrecedes(xid1, xid2) && + TransactionIdPrecedes(xid1, xid2 + SUBTRANS_XACTS_PER_PAGE - 1)); } diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c index 0176799d93..7ab5f050dd 100644 --- a/src/backend/commands/async.c +++ b/src/backend/commands/async.c @@ -404,6 +404,9 @@ static void ClearPendingActionsAndNotifies(void); /* * We will work on the page range of 0..QUEUE_MAX_PAGE. + * + * Since asyncQueueIsFull() blocks creation of a page that could precede any + * extant page, we need not assess entries within a page. */ static bool asyncQueuePagePrecedes(int p, int q) @@ -1234,8 +1237,8 @@ asyncQueueIsFull(void) * logically precedes the current global tail pointer, ie, the head * pointer would wrap around compared to the tail. We cannot create such * a head page for fear of confusing slru.c. For safety we round the tail - * pointer back to a segment boundary (compare the truncation logic in - * asyncQueueAdvanceTail). + * pointer back to a segment boundary (truncation logic in + * asyncQueueAdvanceTail does not do this, so doing it here is optional). * * Note that this test is *not* dependent on how much space there is on * the current head page. This is necessary because asyncQueueAddEntries diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c index 0718e9c886..b1872efe09 100644 --- a/src/backend/storage/lmgr/predicate.c +++ b/src/backend/storage/lmgr/predicate.c @@ -418,7 +418,7 @@ static void SetPossibleUnsafeConflict(SERIALIZABLEXACT *roXact, SERIALIZABLEXACT static void ReleaseRWConflict(RWConflict conflict); static void FlagSxactUnsafe(SERIALIZABLEXACT *sxact); -static bool OldSerXidPagePrecedesLogically(int p, int q); +static bool OldSerXidPagePrecedesLogically(int page1, int page2); static void OldSerXidInit(void); static void OldSerXidAdd(TransactionId xid, SerCommitSeqNo minConflictCommitSeqNo); static SerCommitSeqNo OldSerXidGetMinConflictCommitSeqNo(TransactionId xid); @@ -761,28 +761,80 @@ FlagSxactUnsafe(SERIALIZABLEXACT *sxact) /*------------------------------------------------------------------------*/ /* - * We will work on the page range of 0..OLDSERXID_MAX_PAGE. - * Compares using wraparound logic, as is required by slru.c. + * Decide whether an OldSerXid page number is "older" for truncation purposes. + * Analogous to CLOGPagePrecedes(). */ static bool -OldSerXidPagePrecedesLogically(int p, int q) +OldSerXidPagePrecedesLogically(int page1, int page2) { - int diff; + TransactionId xid1; + TransactionId xid2; + + xid1 = ((TransactionId) page1) * OLDSERXID_ENTRIESPERPAGE; + xid1 += FirstNormalTransactionId + 1; + xid2 = ((TransactionId) page2) * OLDSERXID_ENTRIESPERPAGE; + xid2 += FirstNormalTransactionId + 1; + + return (TransactionIdPrecedes(xid1, xid2) && + TransactionIdPrecedes(xid1, xid2 + OLDSERXID_ENTRIESPERPAGE - 1)); +} + +#ifdef USE_ASSERT_CHECKING +static void +OldSerXidPagePrecedesLogicallyUnitTests(void) +{ + int per_page = OLDSERXID_ENTRIESPERPAGE, + offset = per_page / 2; + int newestPage, + oldestPage, + headPage, + targetPage; + TransactionId newestXact, + oldestXact; + + /* GetNewTransactionId() has assigned the last XID it can safely use. */ + newestPage = 2 * SLRU_PAGES_PER_SEGMENT - 1; /* nothing special */ + newestXact = newestPage * per_page + offset; + Assert(newestXact / per_page == newestPage); + oldestXact = newestXact + 1; + oldestXact -= 1U << 31; + oldestPage = oldestXact / per_page; /* - * We have to compare modulo (OLDSERXID_MAX_PAGE+1)/2. Both inputs should - * be in the range 0..OLDSERXID_MAX_PAGE. + * In this scenario, the SLRU headPage pertains to the last ~1000 XIDs + * assigned. oldestXact finishes, ~2B XIDs having elapsed since it + * started. Further transactions cause us to summarize oldestXact to + * tailPage. Function must return false so OldSerXidAdd() doesn't zero + * tailPage (which may contain entries for other old, recently-finished + * XIDs) and half the SLRU. Reaching this requires burning ~2B XIDs in + * single-user mode, a negligible possibility. */ - Assert(p >= 0 && p <= OLDSERXID_MAX_PAGE); - Assert(q >= 0 && q <= OLDSERXID_MAX_PAGE); + headPage = newestPage; + targetPage = oldestPage; + Assert(!OldSerXidPagePrecedesLogically(headPage, targetPage)); - diff = p - q; - if (diff >= ((OLDSERXID_MAX_PAGE + 1) / 2)) - diff -= OLDSERXID_MAX_PAGE + 1; - else if (diff < -((int) (OLDSERXID_MAX_PAGE + 1) / 2)) - diff += OLDSERXID_MAX_PAGE + 1; - return diff < 0; + /* + * In this scenario, the SLRU headPage pertains to oldestXact. We're + * summarizing an XID near newestXact. (Assume few other XIDs used + * SERIALIZABLE, hence the minimal headPage advancement. Assume + * oldestXact was long-running and only recently reached the SLRU.) + * Function must return true to make OldSerXidAdd() create targetPage. + * + * Today's implementation mishandles this case, but it doesn't matter + * enough to fix. Verify that the defect affects just one page by + * asserting correct treatment of its prior page. Reaching this case + * requires burning ~2B XIDs in single-user mode, a negligible + * possibility. Moreover, if it does happen, the consequence would be + * mild, namely a new transaction failing in SimpleLruReadPage(). + */ + headPage = oldestPage; + targetPage = newestPage; + Assert(OldSerXidPagePrecedesLogically(headPage, targetPage - 1)); +#if 0 + Assert(OldSerXidPagePrecedesLogically(headPage, targetPage)); +#endif } +#endif /* * Initialize for the tracking of old serializable committed xids. @@ -801,6 +853,10 @@ OldSerXidInit(void) LWTRANCHE_OLDSERXID_BUFFERS); /* Override default assumption that writes should be fsync'd */ OldSerXidSlruCtl->do_fsync = false; +#ifdef USE_ASSERT_CHECKING + OldSerXidPagePrecedesLogicallyUnitTests(); +#endif + SlruPagePrecedesUnitTests(OldSerXidSlruCtl, OLDSERXID_ENTRIESPERPAGE); /* * Create or attach to the OldSerXidControl structure. @@ -1051,7 +1107,7 @@ CheckPointPredicate(void) } else { - /* + /*---------- * The SLRU is no longer needed. Truncate to head before we set head * invalid. * @@ -1060,6 +1116,25 @@ CheckPointPredicate(void) * that we leave behind will appear to be new again. In that case it * won't be removed until XID horizon advances enough to make it * current again. + * + * XXX: This should happen in vac_truncate_clog(), not in checkpoints. + * Consider this scenario, starting from a system with no in-progress + * transactions and VACUUM FREEZE having maximized oldestXact: + * - Start a SERIALIZABLE transaction. + * - Start, finish, and summarize a SERIALIZABLE transaction, creating + * one SLRU page. + * - Consume XIDs to reach xidStopLimit. + * - Finish all transactions. Due to the long-running SERIALIZABLE + * transaction, earlier checkpoints did not touch headPage. The + * next checkpoint will change it, but that checkpoint happens after + * the end of the scenario. + * - VACUUM to advance XID limits. + * - Consume ~2M XIDs, crossing the former xidWrapLimit. + * - Start, finish, and summarize a SERIALIZABLE transaction. + * OldSerXidAdd() declines to create the targetPage, because + * headPage is not regarded as in the past relative to that + * targetPage. The transaction instigating the summarize fails in + * SimpleLruReadPage(). */ tailPage = oldSerXidControl->headPage; oldSerXidControl->headPage = -1; diff --git a/src/include/access/slru.h b/src/include/access/slru.h index 5fcebc52fb..a50d936044 100644 --- a/src/include/access/slru.h +++ b/src/include/access/slru.h @@ -126,9 +126,14 @@ typedef struct SlruCtlData bool do_fsync; /* - * Decide which of two page numbers is "older" for truncation purposes. We - * need to use comparison of TransactionIds here in order to do the right - * thing with wraparound XID arithmetic. + * Decide whether a page is "older" for truncation and as a hint for + * evicting pages in LRU order. Return true if every entry of the first + * argument is older than every entry of the second argument. Note that + * !PagePrecedes(a,b) && !PagePrecedes(b,a) need not imply a==b; it also + * arises when some entries are older and some are not. For SLRUs using + * SimpleLruTruncate(), this must use modular arithmetic. (For others, + * the behavior of this callback has no functional implications.) Use + * SlruPagePrecedesUnitTests() in SLRUs meeting its criteria. */ bool (*PagePrecedes) (int, int); @@ -152,6 +157,11 @@ extern int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid); extern void SimpleLruWritePage(SlruCtl ctl, int slotno); extern void SimpleLruFlush(SlruCtl ctl, bool allow_redirtied); +#ifdef USE_ASSERT_CHECKING +extern void SlruPagePrecedesUnitTests(SlruCtl ctl, int per_page); +#else +#define SlruPagePrecedesUnitTests(ctl, per_page) do {} while (0) +#endif extern void SimpleLruTruncate(SlruCtl ctl, int cutoffPage); extern bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno);