diff options
author | Noah Misch <noah@leadboat.com> | 2021-01-16 12:21:35 -0800 |
---|---|---|
committer | Noah Misch <noah@leadboat.com> | 2021-01-16 12:21:39 -0800 |
commit | 1a31d8c52db4220f162ed3c4812569843daf950e (patch) | |
tree | 1e013d12393c31996acef233aa98e2ff8583f47f /src/backend/storage/lmgr/predicate.c | |
parent | fc6d08b27a3395f9a1b30a28f60ad60a01d1523b (diff) |
Prevent excess SimpleLruTruncate() deletion.
Every core SLRU wraps around. With the exception of pg_notify, the wrap
point can fall in the middle of a page. Account for this in the
PagePrecedes callback specification and in SimpleLruTruncate()'s use of
said callback. Update each callback implementation to fit the new
specification. This changes SerialPagePrecedesLogically() from the
style of asyncQueuePagePrecedes() to the style of CLOGPagePrecedes().
(Whereas pg_clog and pg_serial share a key space, pg_serial is nothing
like pg_notify.) The bug fixed here has the same symptoms and user
followup steps as 592a589a04bd456410b853d86bd05faa9432cbbb. Back-patch
to 9.5 (all supported versions).
Reviewed by Andrey Borodin and (in earlier versions) by Tom Lane.
Discussion: https://postgr.es/m/20190202083822.GC32531@gust.leadboat.com
Diffstat (limited to 'src/backend/storage/lmgr/predicate.c')
-rw-r--r-- | src/backend/storage/lmgr/predicate.c | 109 |
1 files changed, 92 insertions, 17 deletions
diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c index 0718e9c886e..b1872efe092 100644 --- a/src/backend/storage/lmgr/predicate.c +++ b/src/backend/storage/lmgr/predicate.c @@ -418,7 +418,7 @@ static void SetPossibleUnsafeConflict(SERIALIZABLEXACT *roXact, SERIALIZABLEXACT static void ReleaseRWConflict(RWConflict conflict); static void FlagSxactUnsafe(SERIALIZABLEXACT *sxact); -static bool OldSerXidPagePrecedesLogically(int p, int q); +static bool OldSerXidPagePrecedesLogically(int page1, int page2); static void OldSerXidInit(void); static void OldSerXidAdd(TransactionId xid, SerCommitSeqNo minConflictCommitSeqNo); static SerCommitSeqNo OldSerXidGetMinConflictCommitSeqNo(TransactionId xid); @@ -761,28 +761,80 @@ FlagSxactUnsafe(SERIALIZABLEXACT *sxact) /*------------------------------------------------------------------------*/ /* - * We will work on the page range of 0..OLDSERXID_MAX_PAGE. - * Compares using wraparound logic, as is required by slru.c. + * Decide whether an OldSerXid page number is "older" for truncation purposes. + * Analogous to CLOGPagePrecedes(). */ static bool -OldSerXidPagePrecedesLogically(int p, int q) +OldSerXidPagePrecedesLogically(int page1, int page2) { - int diff; + TransactionId xid1; + TransactionId xid2; + + xid1 = ((TransactionId) page1) * OLDSERXID_ENTRIESPERPAGE; + xid1 += FirstNormalTransactionId + 1; + xid2 = ((TransactionId) page2) * OLDSERXID_ENTRIESPERPAGE; + xid2 += FirstNormalTransactionId + 1; + + return (TransactionIdPrecedes(xid1, xid2) && + TransactionIdPrecedes(xid1, xid2 + OLDSERXID_ENTRIESPERPAGE - 1)); +} + +#ifdef USE_ASSERT_CHECKING +static void +OldSerXidPagePrecedesLogicallyUnitTests(void) +{ + int per_page = OLDSERXID_ENTRIESPERPAGE, + offset = per_page / 2; + int newestPage, + oldestPage, + headPage, + targetPage; + TransactionId newestXact, + oldestXact; + + /* GetNewTransactionId() has assigned the last XID it can safely use. */ + newestPage = 2 * SLRU_PAGES_PER_SEGMENT - 1; /* nothing special */ + newestXact = newestPage * per_page + offset; + Assert(newestXact / per_page == newestPage); + oldestXact = newestXact + 1; + oldestXact -= 1U << 31; + oldestPage = oldestXact / per_page; /* - * We have to compare modulo (OLDSERXID_MAX_PAGE+1)/2. Both inputs should - * be in the range 0..OLDSERXID_MAX_PAGE. + * In this scenario, the SLRU headPage pertains to the last ~1000 XIDs + * assigned. oldestXact finishes, ~2B XIDs having elapsed since it + * started. Further transactions cause us to summarize oldestXact to + * tailPage. Function must return false so OldSerXidAdd() doesn't zero + * tailPage (which may contain entries for other old, recently-finished + * XIDs) and half the SLRU. Reaching this requires burning ~2B XIDs in + * single-user mode, a negligible possibility. */ - Assert(p >= 0 && p <= OLDSERXID_MAX_PAGE); - Assert(q >= 0 && q <= OLDSERXID_MAX_PAGE); - - diff = p - q; - if (diff >= ((OLDSERXID_MAX_PAGE + 1) / 2)) - diff -= OLDSERXID_MAX_PAGE + 1; - else if (diff < -((int) (OLDSERXID_MAX_PAGE + 1) / 2)) - diff += OLDSERXID_MAX_PAGE + 1; - return diff < 0; + headPage = newestPage; + targetPage = oldestPage; + Assert(!OldSerXidPagePrecedesLogically(headPage, targetPage)); + + /* + * In this scenario, the SLRU headPage pertains to oldestXact. We're + * summarizing an XID near newestXact. (Assume few other XIDs used + * SERIALIZABLE, hence the minimal headPage advancement. Assume + * oldestXact was long-running and only recently reached the SLRU.) + * Function must return true to make OldSerXidAdd() create targetPage. + * + * Today's implementation mishandles this case, but it doesn't matter + * enough to fix. Verify that the defect affects just one page by + * asserting correct treatment of its prior page. Reaching this case + * requires burning ~2B XIDs in single-user mode, a negligible + * possibility. Moreover, if it does happen, the consequence would be + * mild, namely a new transaction failing in SimpleLruReadPage(). + */ + headPage = oldestPage; + targetPage = newestPage; + Assert(OldSerXidPagePrecedesLogically(headPage, targetPage - 1)); +#if 0 + Assert(OldSerXidPagePrecedesLogically(headPage, targetPage)); +#endif } +#endif /* * Initialize for the tracking of old serializable committed xids. @@ -801,6 +853,10 @@ OldSerXidInit(void) LWTRANCHE_OLDSERXID_BUFFERS); /* Override default assumption that writes should be fsync'd */ OldSerXidSlruCtl->do_fsync = false; +#ifdef USE_ASSERT_CHECKING + OldSerXidPagePrecedesLogicallyUnitTests(); +#endif + SlruPagePrecedesUnitTests(OldSerXidSlruCtl, OLDSERXID_ENTRIESPERPAGE); /* * Create or attach to the OldSerXidControl structure. @@ -1051,7 +1107,7 @@ CheckPointPredicate(void) } else { - /* + /*---------- * The SLRU is no longer needed. Truncate to head before we set head * invalid. * @@ -1060,6 +1116,25 @@ CheckPointPredicate(void) * that we leave behind will appear to be new again. In that case it * won't be removed until XID horizon advances enough to make it * current again. + * + * XXX: This should happen in vac_truncate_clog(), not in checkpoints. + * Consider this scenario, starting from a system with no in-progress + * transactions and VACUUM FREEZE having maximized oldestXact: + * - Start a SERIALIZABLE transaction. + * - Start, finish, and summarize a SERIALIZABLE transaction, creating + * one SLRU page. + * - Consume XIDs to reach xidStopLimit. + * - Finish all transactions. Due to the long-running SERIALIZABLE + * transaction, earlier checkpoints did not touch headPage. The + * next checkpoint will change it, but that checkpoint happens after + * the end of the scenario. + * - VACUUM to advance XID limits. + * - Consume ~2M XIDs, crossing the former xidWrapLimit. + * - Start, finish, and summarize a SERIALIZABLE transaction. + * OldSerXidAdd() declines to create the targetPage, because + * headPage is not regarded as in the past relative to that + * targetPage. The transaction instigating the summarize fails in + * SimpleLruReadPage(). */ tailPage = oldSerXidControl->headPage; oldSerXidControl->headPage = -1; |