diff options
| -rw-r--r-- | src/backend/access/transam/clog.c | 27 | ||||
| -rw-r--r-- | src/backend/access/transam/commit_ts.c | 35 | ||||
| -rw-r--r-- | src/backend/access/transam/multixact.c | 38 | ||||
| -rw-r--r-- | src/backend/access/transam/slru.c | 143 | ||||
| -rw-r--r-- | src/backend/access/transam/subtrans.c | 17 | ||||
| -rw-r--r-- | src/backend/commands/async.c | 11 | ||||
| -rw-r--r-- | src/backend/storage/lmgr/predicate.c | 109 | ||||
| -rw-r--r-- | src/include/access/slru.h | 16 | 
8 files changed, 315 insertions, 81 deletions
| diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index fa3763b478d..5d6e088e8bd 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -693,6 +693,7 @@ CLOGShmemInit(void)  	XactCtl->PagePrecedes = CLOGPagePrecedes;  	SimpleLruInit(XactCtl, "Xact", CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE,  				  XactSLRULock, "pg_xact", LWTRANCHE_XACT_BUFFER); +	SlruPagePrecedesUnitTests(XactCtl, CLOG_XACTS_PER_PAGE);  }  /* @@ -926,13 +927,22 @@ TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid)  /* - * Decide which of two CLOG page numbers is "older" for truncation purposes. + * Decide whether a CLOG page number is "older" for truncation purposes.   *   * We need to use comparison of TransactionIds here in order to do the right - * thing with wraparound XID arithmetic.  However, if we are asked about - * page number zero, we don't want to hand InvalidTransactionId to - * TransactionIdPrecedes: it'll get weird about permanent xact IDs.  So, - * offset both xids by FirstNormalTransactionId to avoid that. + * thing with wraparound XID arithmetic.  However, TransactionIdPrecedes() + * would get weird about permanent xact IDs.  So, offset both such that xid1, + * xid2, and xid2 + CLOG_XACTS_PER_PAGE - 1 are all normal XIDs; this offset + * is relevant to page 0 and to the page preceding page 0. + * + * The page containing oldestXact-2^31 is the important edge case.  The + * portion of that page equaling or following oldestXact-2^31 is expendable, + * but the portion preceding oldestXact-2^31 is not.  When oldestXact-2^31 is + * the first XID of a page and segment, the entire page and segment is + * expendable, and we could truncate the segment.  Recognizing that case would + * require making oldestXact, not just the page containing oldestXact, + * available to this callback.  The benefit would be rare and small, so we + * don't optimize that edge case.   */  static bool  CLOGPagePrecedes(int page1, int page2) @@ -941,11 +951,12 @@ CLOGPagePrecedes(int page1, int page2)  	TransactionId xid2;  	xid1 = ((TransactionId) page1) * CLOG_XACTS_PER_PAGE; -	xid1 += FirstNormalTransactionId; +	xid1 += FirstNormalTransactionId + 1;  	xid2 = ((TransactionId) page2) * CLOG_XACTS_PER_PAGE; -	xid2 += FirstNormalTransactionId; +	xid2 += FirstNormalTransactionId + 1; -	return TransactionIdPrecedes(xid1, xid2); +	return (TransactionIdPrecedes(xid1, xid2) && +			TransactionIdPrecedes(xid1, xid2 + CLOG_XACTS_PER_PAGE - 1));  } diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c index 7ae497ff000..87a69f4c210 100644 --- a/src/backend/access/transam/commit_ts.c +++ b/src/backend/access/transam/commit_ts.c @@ -495,6 +495,7 @@ CommitTsShmemInit(void)  	SimpleLruInit(CommitTsCtl, "CommitTs", CommitTsShmemBuffers(), 0,  				  CommitTsSLRULock, "pg_commit_ts",  				  LWTRANCHE_COMMITTS_BUFFER); +	SlruPagePrecedesUnitTests(CommitTsCtl, COMMIT_TS_XACTS_PER_PAGE);  	commitTsShared = ShmemInitStruct("CommitTs shared",  									 sizeof(CommitTimestampShared), @@ -877,14 +878,27 @@ AdvanceOldestCommitTsXid(TransactionId oldestXact)  /* - * Decide which of two commitTS page numbers is "older" for truncation - * purposes. + * Decide whether a commitTS page number is "older" for truncation purposes. + * Analogous to CLOGPagePrecedes().   * - * We need to use comparison of TransactionIds here in order to do the right - * thing with wraparound XID arithmetic.  However, if we are asked about - * page number zero, we don't want to hand InvalidTransactionId to - * TransactionIdPrecedes: it'll get weird about permanent xact IDs.  So, - * offset both xids by FirstNormalTransactionId to avoid that. + * At default BLCKSZ, (1 << 31) % COMMIT_TS_XACTS_PER_PAGE == 128.  This + * introduces differences compared to CLOG and the other SLRUs having (1 << + * 31) % per_page == 0.  This function never tests exactly + * TransactionIdPrecedes(x-2^31, x).  When the system reaches xidStopLimit, + * there are two possible counts of page boundaries between oldestXact and the + * latest XID assigned, depending on whether oldestXact is within the first + * 128 entries of its page.  Since this function doesn't know the location of + * oldestXact within page2, it returns false for one page that actually is + * expendable.  This is a wider (yet still negligible) version of the + * truncation opportunity that CLOGPagePrecedes() cannot recognize. + * + * For the sake of a worked example, number entries with decimal values such + * that page1==1 entries range from 1.0 to 1.999.  Let N+0.15 be the number of + * pages that 2^31 entries will span (N is an integer).  If oldestXact=N+2.1, + * then the final safe XID assignment leaves newestXact=1.95.  We keep page 2, + * because entry=2.85 is the border that toggles whether entries precede the + * last entry of the oldestXact page.  While page 2 is expendable at + * oldestXact=N+2.1, it would be precious at oldestXact=N+2.9.   */  static bool  CommitTsPagePrecedes(int page1, int page2) @@ -893,11 +907,12 @@ CommitTsPagePrecedes(int page1, int page2)  	TransactionId xid2;  	xid1 = ((TransactionId) page1) * COMMIT_TS_XACTS_PER_PAGE; -	xid1 += FirstNormalTransactionId; +	xid1 += FirstNormalTransactionId + 1;  	xid2 = ((TransactionId) page2) * COMMIT_TS_XACTS_PER_PAGE; -	xid2 += FirstNormalTransactionId; +	xid2 += FirstNormalTransactionId + 1; -	return TransactionIdPrecedes(xid1, xid2); +	return (TransactionIdPrecedes(xid1, xid2) && +			TransactionIdPrecedes(xid1, xid2 + COMMIT_TS_XACTS_PER_PAGE - 1));  } diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index ce84dac0c40..ff960833f95 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -1832,10 +1832,12 @@ MultiXactShmemInit(void)  				  "MultiXactOffset", NUM_MULTIXACTOFFSET_BUFFERS, 0,  				  MultiXactOffsetSLRULock, "pg_multixact/offsets",  				  LWTRANCHE_MULTIXACTOFFSET_BUFFER); +	SlruPagePrecedesUnitTests(MultiXactOffsetCtl, MULTIXACT_OFFSETS_PER_PAGE);  	SimpleLruInit(MultiXactMemberCtl,  				  "MultiXactMember", NUM_MULTIXACTMEMBER_BUFFERS, 0,  				  MultiXactMemberSLRULock, "pg_multixact/members",  				  LWTRANCHE_MULTIXACTMEMBER_BUFFER); +	/* doesn't call SimpleLruTruncate() or meet criteria for unit tests */  	/* Initialize our shared state struct */  	MultiXactState = ShmemInitStruct("Shared MultiXact State", @@ -2978,6 +2980,14 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)  	 * truncate the members SLRU.  So we first scan the directory to determine  	 * the earliest offsets page number that we can read without error.  	 * +	 * When nextMXact is less than one segment away from multiWrapLimit, +	 * SlruScanDirCbFindEarliest can find some early segment other than the +	 * actual earliest.  (MultiXactOffsetPagePrecedes(EARLIEST, LATEST) +	 * returns false, because not all pairs of entries have the same answer.) +	 * That can also arise when an earlier truncation attempt failed unlink() +	 * or returned early from this function.  The only consequence is +	 * returning early, which wastes space that we could have liberated. +	 *  	 * NB: It's also possible that the page that oldestMulti is on has already  	 * been truncated away, and we crashed before updating oldestMulti.  	 */ @@ -3092,15 +3102,11 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)  }  /* - * Decide which of two MultiXactOffset page numbers is "older" for truncation - * purposes. - * - * We need to use comparison of MultiXactId here in order to do the right - * thing with wraparound.  However, if we are asked about page number zero, we - * don't want to hand InvalidMultiXactId to MultiXactIdPrecedes: it'll get - * weird.  So, offset both multis by FirstMultiXactId to avoid that. - * (Actually, the current implementation doesn't do anything weird with - * InvalidMultiXactId, but there's no harm in leaving this code like this.) + * Decide whether a MultiXactOffset page number is "older" for truncation + * purposes.  Analogous to CLOGPagePrecedes(). + * + * Offsetting the values is optional, because MultiXactIdPrecedes() has + * translational symmetry.   */  static bool  MultiXactOffsetPagePrecedes(int page1, int page2) @@ -3109,15 +3115,17 @@ MultiXactOffsetPagePrecedes(int page1, int page2)  	MultiXactId multi2;  	multi1 = ((MultiXactId) page1) * MULTIXACT_OFFSETS_PER_PAGE; -	multi1 += FirstMultiXactId; +	multi1 += FirstMultiXactId + 1;  	multi2 = ((MultiXactId) page2) * MULTIXACT_OFFSETS_PER_PAGE; -	multi2 += FirstMultiXactId; +	multi2 += FirstMultiXactId + 1; -	return MultiXactIdPrecedes(multi1, multi2); +	return (MultiXactIdPrecedes(multi1, multi2) && +			MultiXactIdPrecedes(multi1, +								multi2 + MULTIXACT_OFFSETS_PER_PAGE - 1));  }  /* - * Decide which of two MultiXactMember page numbers is "older" for truncation + * Decide whether a MultiXactMember page number is "older" for truncation   * purposes.  There is no "invalid offset number" so use the numbers verbatim.   */  static bool @@ -3129,7 +3137,9 @@ MultiXactMemberPagePrecedes(int page1, int page2)  	offset1 = ((MultiXactOffset) page1) * MULTIXACT_MEMBERS_PER_PAGE;  	offset2 = ((MultiXactOffset) page2) * MULTIXACT_MEMBERS_PER_PAGE; -	return MultiXactOffsetPrecedes(offset1, offset2); +	return (MultiXactOffsetPrecedes(offset1, offset2) && +			MultiXactOffsetPrecedes(offset1, +									offset2 + MULTIXACT_MEMBERS_PER_PAGE - 1));  }  /* diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index 64aabc564f8..c19a1a6808d 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -1231,11 +1231,6 @@ SimpleLruTruncate(SlruCtl ctl, int cutoffPage)  	pgstat_count_slru_truncate(shared->slru_stats_idx);  	/* -	 * The cutoff point is the start of the segment containing cutoffPage. -	 */ -	cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT; - -	/*  	 * Scan shared memory and remove any pages preceding the cutoff page, to  	 * ensure we won't rewrite them later.  (Since this is normally called in  	 * or just after a checkpoint, any dirty pages should have been flushed @@ -1247,9 +1242,7 @@ restart:;  	/*  	 * While we are holding the lock, make an important safety check: the -	 * planned cutoff point must be <= the current endpoint page. Otherwise we -	 * have already wrapped around, and proceeding with the truncation would -	 * risk removing the current segment. +	 * current endpoint page must not be eligible for removal.  	 */  	if (ctl->PagePrecedes(shared->latest_page_number, cutoffPage))  	{ @@ -1281,8 +1274,11 @@ restart:;  		 * Hmm, we have (or may have) I/O operations acting on the page, so  		 * we've got to wait for them to finish and then start again. This is  		 * the same logic as in SlruSelectLRUPage.  (XXX if page is dirty, -		 * wouldn't it be OK to just discard it without writing it?  For now, -		 * keep the logic the same as it was.) +		 * wouldn't it be OK to just discard it without writing it? +		 * SlruMayDeleteSegment() uses a stricter qualification, so we might +		 * not delete this page in the end; even if we don't delete it, we +		 * won't have cause to read its data again.  For now, keep the logic +		 * the same as it was.)  		 */  		if (shared->page_status[slotno] == SLRU_PAGE_VALID)  			SlruInternalWritePage(ctl, slotno, NULL); @@ -1373,18 +1369,133 @@ restart:  }  /* + * Determine whether a segment is okay to delete. + * + * segpage is the first page of the segment, and cutoffPage is the oldest (in + * PagePrecedes order) page in the SLRU containing still-useful data.  Since + * every core PagePrecedes callback implements "wrap around", check the + * segment's first and last pages: + * + * first<cutoff  && last<cutoff:  yes + * first<cutoff  && last>=cutoff: no; cutoff falls inside this segment + * first>=cutoff && last<cutoff:  no; wrap point falls inside this segment + * first>=cutoff && last>=cutoff: no; every page of this segment is too young + */ +static bool +SlruMayDeleteSegment(SlruCtl ctl, int segpage, int cutoffPage) +{ +	int			seg_last_page = segpage + SLRU_PAGES_PER_SEGMENT - 1; + +	Assert(segpage % SLRU_PAGES_PER_SEGMENT == 0); + +	return (ctl->PagePrecedes(segpage, cutoffPage) && +			ctl->PagePrecedes(seg_last_page, cutoffPage)); +} + +#ifdef USE_ASSERT_CHECKING +static void +SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset) +{ +	TransactionId lhs, +				rhs; +	int			newestPage, +				oldestPage; +	TransactionId newestXact, +				oldestXact; + +	/* +	 * Compare an XID pair having undefined order (see RFC 1982), a pair at +	 * "opposite ends" of the XID space.  TransactionIdPrecedes() treats each +	 * as preceding the other.  If RHS is oldestXact, LHS is the first XID we +	 * must not assign. +	 */ +	lhs = per_page + offset;	/* skip first page to avoid non-normal XIDs */ +	rhs = lhs + (1U << 31); +	Assert(TransactionIdPrecedes(lhs, rhs)); +	Assert(TransactionIdPrecedes(rhs, lhs)); +	Assert(!TransactionIdPrecedes(lhs - 1, rhs)); +	Assert(TransactionIdPrecedes(rhs, lhs - 1)); +	Assert(TransactionIdPrecedes(lhs + 1, rhs)); +	Assert(!TransactionIdPrecedes(rhs, lhs + 1)); +	Assert(!TransactionIdFollowsOrEquals(lhs, rhs)); +	Assert(!TransactionIdFollowsOrEquals(rhs, lhs)); +	Assert(!ctl->PagePrecedes(lhs / per_page, lhs / per_page)); +	Assert(!ctl->PagePrecedes(lhs / per_page, rhs / per_page)); +	Assert(!ctl->PagePrecedes(rhs / per_page, lhs / per_page)); +	Assert(!ctl->PagePrecedes((lhs - per_page) / per_page, rhs / per_page)); +	Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 3 * per_page) / per_page)); +	Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 2 * per_page) / per_page)); +	Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 1 * per_page) / per_page) +		   || (1U << 31) % per_page != 0);	/* See CommitTsPagePrecedes() */ +	Assert(ctl->PagePrecedes((lhs + 1 * per_page) / per_page, rhs / per_page) +		   || (1U << 31) % per_page != 0); +	Assert(ctl->PagePrecedes((lhs + 2 * per_page) / per_page, rhs / per_page)); +	Assert(ctl->PagePrecedes((lhs + 3 * per_page) / per_page, rhs / per_page)); +	Assert(!ctl->PagePrecedes(rhs / per_page, (lhs + per_page) / per_page)); + +	/* +	 * GetNewTransactionId() has assigned the last XID it can safely use, and +	 * that XID is in the *LAST* page of the second segment.  We must not +	 * delete that segment. +	 */ +	newestPage = 2 * SLRU_PAGES_PER_SEGMENT - 1; +	newestXact = newestPage * per_page + offset; +	Assert(newestXact / per_page == newestPage); +	oldestXact = newestXact + 1; +	oldestXact -= 1U << 31; +	oldestPage = oldestXact / per_page; +	Assert(!SlruMayDeleteSegment(ctl, +								 (newestPage - +								  newestPage % SLRU_PAGES_PER_SEGMENT), +								 oldestPage)); + +	/* +	 * GetNewTransactionId() has assigned the last XID it can safely use, and +	 * that XID is in the *FIRST* page of the second segment.  We must not +	 * delete that segment. +	 */ +	newestPage = SLRU_PAGES_PER_SEGMENT; +	newestXact = newestPage * per_page + offset; +	Assert(newestXact / per_page == newestPage); +	oldestXact = newestXact + 1; +	oldestXact -= 1U << 31; +	oldestPage = oldestXact / per_page; +	Assert(!SlruMayDeleteSegment(ctl, +								 (newestPage - +								  newestPage % SLRU_PAGES_PER_SEGMENT), +								 oldestPage)); +} + +/* + * Unit-test a PagePrecedes function. + * + * This assumes every uint32 >= FirstNormalTransactionId is a valid key.  It + * assumes each value occupies a contiguous, fixed-size region of SLRU bytes. + * (MultiXactMemberCtl separates flags from XIDs.  AsyncCtl has + * variable-length entries, no keys, and no random access.  These unit tests + * do not apply to them.) + */ +void +SlruPagePrecedesUnitTests(SlruCtl ctl, int per_page) +{ +	/* Test first, middle and last entries of a page. */ +	SlruPagePrecedesTestOffset(ctl, per_page, 0); +	SlruPagePrecedesTestOffset(ctl, per_page, per_page / 2); +	SlruPagePrecedesTestOffset(ctl, per_page, per_page - 1); +} +#endif + +/*   * SlruScanDirectory callback - *		This callback reports true if there's any segment prior to the one - *		containing the page passed as "data". + *		This callback reports true if there's any segment wholly prior to the + *		one containing the page passed as "data".   */  bool  SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int segpage, void *data)  {  	int			cutoffPage = *(int *) data; -	cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT; - -	if (ctl->PagePrecedes(segpage, cutoffPage)) +	if (SlruMayDeleteSegment(ctl, segpage, cutoffPage))  		return true;			/* found one; don't iterate any more */  	return false;				/* keep going */ @@ -1399,7 +1510,7 @@ SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data)  {  	int			cutoffPage = *(int *) data; -	if (ctl->PagePrecedes(segpage, cutoffPage)) +	if (SlruMayDeleteSegment(ctl, segpage, cutoffPage))  		SlruInternalDeleteSegment(ctl, filename);  	return false;				/* keep going */ diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c index d9a31d3c8d2..2eb1a26114e 100644 --- a/src/backend/access/transam/subtrans.c +++ b/src/backend/access/transam/subtrans.c @@ -196,6 +196,7 @@ SUBTRANSShmemInit(void)  				  LWTRANCHE_SUBTRANS_BUFFER);  	/* Override default assumption that writes should be fsync'd */  	SubTransCtl->do_fsync = false; +	SlruPagePrecedesUnitTests(SubTransCtl, SUBTRANS_XACTS_PER_PAGE);  }  /* @@ -373,13 +374,8 @@ TruncateSUBTRANS(TransactionId oldestXact)  /* - * Decide which of two SUBTRANS page numbers is "older" for truncation purposes. - * - * We need to use comparison of TransactionIds here in order to do the right - * thing with wraparound XID arithmetic.  However, if we are asked about - * page number zero, we don't want to hand InvalidTransactionId to - * TransactionIdPrecedes: it'll get weird about permanent xact IDs.  So, - * offset both xids by FirstNormalTransactionId to avoid that. + * Decide whether a SUBTRANS page number is "older" for truncation purposes. + * Analogous to CLOGPagePrecedes().   */  static bool  SubTransPagePrecedes(int page1, int page2) @@ -388,9 +384,10 @@ SubTransPagePrecedes(int page1, int page2)  	TransactionId xid2;  	xid1 = ((TransactionId) page1) * SUBTRANS_XACTS_PER_PAGE; -	xid1 += FirstNormalTransactionId; +	xid1 += FirstNormalTransactionId + 1;  	xid2 = ((TransactionId) page2) * SUBTRANS_XACTS_PER_PAGE; -	xid2 += FirstNormalTransactionId; +	xid2 += FirstNormalTransactionId + 1; -	return TransactionIdPrecedes(xid1, xid2); +	return (TransactionIdPrecedes(xid1, xid2) && +			TransactionIdPrecedes(xid1, xid2 + SUBTRANS_XACTS_PER_PAGE - 1));  } diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c index 13a7409aba4..5739d2b40f0 100644 --- a/src/backend/commands/async.c +++ b/src/backend/commands/async.c @@ -493,7 +493,12 @@ asyncQueuePageDiff(int p, int q)  	return diff;  } -/* Is p < q, accounting for wraparound? */ +/* + * Is p < q, accounting for wraparound? + * + * Since asyncQueueIsFull() blocks creation of a page that could precede any + * extant page, we need not assess entries within a page. + */  static bool  asyncQueuePagePrecedes(int p, int q)  { @@ -1356,8 +1361,8 @@ asyncQueueIsFull(void)  	 * logically precedes the current global tail pointer, ie, the head  	 * pointer would wrap around compared to the tail.  We cannot create such  	 * a head page for fear of confusing slru.c.  For safety we round the tail -	 * pointer back to a segment boundary (compare the truncation logic in -	 * asyncQueueAdvanceTail). +	 * pointer back to a segment boundary (truncation logic in +	 * asyncQueueAdvanceTail does not do this, so doing it here is optional).  	 *  	 * Note that this test is *not* dependent on how much space there is on  	 * the current head page.  This is necessary because asyncQueueAddEntries diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c index d24919f76b6..2d7ed83ea95 100644 --- a/src/backend/storage/lmgr/predicate.c +++ b/src/backend/storage/lmgr/predicate.c @@ -438,7 +438,7 @@ static void SetPossibleUnsafeConflict(SERIALIZABLEXACT *roXact, SERIALIZABLEXACT  static void ReleaseRWConflict(RWConflict conflict);  static void FlagSxactUnsafe(SERIALIZABLEXACT *sxact); -static bool SerialPagePrecedesLogically(int p, int q); +static bool SerialPagePrecedesLogically(int page1, int page2);  static void SerialInit(void);  static void SerialAdd(TransactionId xid, SerCommitSeqNo minConflictCommitSeqNo);  static SerCommitSeqNo SerialGetMinConflictCommitSeqNo(TransactionId xid); @@ -784,28 +784,80 @@ FlagSxactUnsafe(SERIALIZABLEXACT *sxact)  /*------------------------------------------------------------------------*/  /* - * We will work on the page range of 0..SERIAL_MAX_PAGE. - * Compares using wraparound logic, as is required by slru.c. + * Decide whether a Serial page number is "older" for truncation purposes. + * Analogous to CLOGPagePrecedes().   */  static bool -SerialPagePrecedesLogically(int p, int q) +SerialPagePrecedesLogically(int page1, int page2)  { -	int			diff; +	TransactionId xid1; +	TransactionId xid2; + +	xid1 = ((TransactionId) page1) * SERIAL_ENTRIESPERPAGE; +	xid1 += FirstNormalTransactionId + 1; +	xid2 = ((TransactionId) page2) * SERIAL_ENTRIESPERPAGE; +	xid2 += FirstNormalTransactionId + 1; + +	return (TransactionIdPrecedes(xid1, xid2) && +			TransactionIdPrecedes(xid1, xid2 + SERIAL_ENTRIESPERPAGE - 1)); +} + +#ifdef USE_ASSERT_CHECKING +static void +SerialPagePrecedesLogicallyUnitTests(void) +{ +	int			per_page = SERIAL_ENTRIESPERPAGE, +				offset = per_page / 2; +	int			newestPage, +				oldestPage, +				headPage, +				targetPage; +	TransactionId newestXact, +				oldestXact; + +	/* GetNewTransactionId() has assigned the last XID it can safely use. */ +	newestPage = 2 * SLRU_PAGES_PER_SEGMENT - 1;	/* nothing special */ +	newestXact = newestPage * per_page + offset; +	Assert(newestXact / per_page == newestPage); +	oldestXact = newestXact + 1; +	oldestXact -= 1U << 31; +	oldestPage = oldestXact / per_page;  	/* -	 * We have to compare modulo (SERIAL_MAX_PAGE+1)/2.  Both inputs should be -	 * in the range 0..SERIAL_MAX_PAGE. +	 * In this scenario, the SLRU headPage pertains to the last ~1000 XIDs +	 * assigned.  oldestXact finishes, ~2B XIDs having elapsed since it +	 * started.  Further transactions cause us to summarize oldestXact to +	 * tailPage.  Function must return false so SerialAdd() doesn't zero +	 * tailPage (which may contain entries for other old, recently-finished +	 * XIDs) and half the SLRU.  Reaching this requires burning ~2B XIDs in +	 * single-user mode, a negligible possibility.  	 */ -	Assert(p >= 0 && p <= SERIAL_MAX_PAGE); -	Assert(q >= 0 && q <= SERIAL_MAX_PAGE); - -	diff = p - q; -	if (diff >= ((SERIAL_MAX_PAGE + 1) / 2)) -		diff -= SERIAL_MAX_PAGE + 1; -	else if (diff < -((int) (SERIAL_MAX_PAGE + 1) / 2)) -		diff += SERIAL_MAX_PAGE + 1; -	return diff < 0; +	headPage = newestPage; +	targetPage = oldestPage; +	Assert(!SerialPagePrecedesLogically(headPage, targetPage)); + +	/* +	 * In this scenario, the SLRU headPage pertains to oldestXact.  We're +	 * summarizing an XID near newestXact.  (Assume few other XIDs used +	 * SERIALIZABLE, hence the minimal headPage advancement.  Assume +	 * oldestXact was long-running and only recently reached the SLRU.) +	 * Function must return true to make SerialAdd() create targetPage. +	 * +	 * Today's implementation mishandles this case, but it doesn't matter +	 * enough to fix.  Verify that the defect affects just one page by +	 * asserting correct treatment of its prior page.  Reaching this case +	 * requires burning ~2B XIDs in single-user mode, a negligible +	 * possibility.  Moreover, if it does happen, the consequence would be +	 * mild, namely a new transaction failing in SimpleLruReadPage(). +	 */ +	headPage = oldestPage; +	targetPage = newestPage; +	Assert(SerialPagePrecedesLogically(headPage, targetPage - 1)); +#if 0 +	Assert(SerialPagePrecedesLogically(headPage, targetPage)); +#endif  } +#endif  /*   * Initialize for the tracking of old serializable committed xids. @@ -824,6 +876,10 @@ SerialInit(void)  				  LWTRANCHE_SERIAL_BUFFER);  	/* Override default assumption that writes should be fsync'd */  	SerialSlruCtl->do_fsync = false; +#ifdef USE_ASSERT_CHECKING +	SerialPagePrecedesLogicallyUnitTests(); +#endif +	SlruPagePrecedesUnitTests(SerialSlruCtl, SERIAL_ENTRIESPERPAGE);  	/*  	 * Create or attach to the SerialControl structure. @@ -1032,7 +1088,7 @@ CheckPointPredicate(void)  	}  	else  	{ -		/* +		/*----------  		 * The SLRU is no longer needed. Truncate to head before we set head  		 * invalid.  		 * @@ -1041,6 +1097,25 @@ CheckPointPredicate(void)  		 * that we leave behind will appear to be new again. In that case it  		 * won't be removed until XID horizon advances enough to make it  		 * current again. +		 * +		 * XXX: This should happen in vac_truncate_clog(), not in checkpoints. +		 * Consider this scenario, starting from a system with no in-progress +		 * transactions and VACUUM FREEZE having maximized oldestXact: +		 * - Start a SERIALIZABLE transaction. +		 * - Start, finish, and summarize a SERIALIZABLE transaction, creating +		 *   one SLRU page. +		 * - Consume XIDs to reach xidStopLimit. +		 * - Finish all transactions.  Due to the long-running SERIALIZABLE +		 *   transaction, earlier checkpoints did not touch headPage.  The +		 *   next checkpoint will change it, but that checkpoint happens after +		 *   the end of the scenario. +		 * - VACUUM to advance XID limits. +		 * - Consume ~2M XIDs, crossing the former xidWrapLimit. +		 * - Start, finish, and summarize a SERIALIZABLE transaction. +		 *   SerialAdd() declines to create the targetPage, because headPage +		 *   is not regarded as in the past relative to that targetPage.  The +		 *   transaction instigating the summarize fails in +		 *   SimpleLruReadPage().  		 */  		tailPage = serialControl->headPage;  		serialControl->headPage = -1; diff --git a/src/include/access/slru.h b/src/include/access/slru.h index 61fbc80ef0d..19982f6e226 100644 --- a/src/include/access/slru.h +++ b/src/include/access/slru.h @@ -117,9 +117,14 @@ typedef struct SlruCtlData  	bool		do_fsync;  	/* -	 * Decide which of two page numbers is "older" for truncation purposes. We -	 * need to use comparison of TransactionIds here in order to do the right -	 * thing with wraparound XID arithmetic. +	 * Decide whether a page is "older" for truncation and as a hint for +	 * evicting pages in LRU order.  Return true if every entry of the first +	 * argument is older than every entry of the second argument.  Note that +	 * !PagePrecedes(a,b) && !PagePrecedes(b,a) need not imply a==b; it also +	 * arises when some entries are older and some are not.  For SLRUs using +	 * SimpleLruTruncate(), this must use modular arithmetic.  (For others, +	 * the behavior of this callback has no functional implications.)  Use +	 * SlruPagePrecedesUnitTests() in SLRUs meeting its criteria.  	 */  	bool		(*PagePrecedes) (int, int); @@ -143,6 +148,11 @@ extern int	SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno,  									   TransactionId xid);  extern void SimpleLruWritePage(SlruCtl ctl, int slotno);  extern void SimpleLruFlush(SlruCtl ctl, bool allow_redirtied); +#ifdef USE_ASSERT_CHECKING +extern void SlruPagePrecedesUnitTests(SlruCtl ctl, int per_page); +#else +#define SlruPagePrecedesUnitTests(ctl, per_page) do {} while (0) +#endif  extern void SimpleLruTruncate(SlruCtl ctl, int cutoffPage);  extern bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno); | 
