5 files changed, 187 insertions, 49 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index af101689282..3acaf0266cb 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -5687,14 +5687,23 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 			Assert(TransactionIdIsValid(xid));
 
 			/*
-			 * If the xid is older than the cutoff, it has to have aborted,
-			 * otherwise the tuple would have gotten pruned away.
+			 * The updating transaction cannot possibly be still running, but
+			 * verify whether it has committed, and request to set the
+			 * COMMITTED flag if so.  (We normally don't see any tuples in
+			 * this state, because they are removed by page pruning before we
+			 * try to freeze the page; but this can happen if the updating
+			 * transaction commits after the page is pruned but before
+			 * HeapTupleSatisfiesVacuum).
 			 */
 			if (TransactionIdPrecedes(xid, cutoff_xid))
 			{
-				Assert(!TransactionIdDidCommit(xid));
-				*flags |= FRM_INVALIDATE_XMAX;
-				xid = InvalidTransactionId;		/* not strictly necessary */
+				if (TransactionIdDidCommit(xid))
+					*flags = FRM_MARK_COMMITTED | FRM_RETURN_IS_XID;
+				else
+				{
+					*flags |= FRM_INVALIDATE_XMAX;
+					xid = InvalidTransactionId;	/* not strictly necessary */
+				}
 			}
 			else
 			{
@@ -5765,18 +5774,17 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 
 			/*
 			 * It's an update; should we keep it?  If the transaction is known
-			 * aborted then it's okay to ignore it, otherwise not.  However,
-			 * if the Xid is older than the cutoff_xid, we must remove it.
-			 * Note that such an old updater cannot possibly be committed,
-			 * because HeapTupleSatisfiesVacuum would have returned
-			 * HEAPTUPLE_DEAD and we would not be trying to freeze the tuple.
-			 *
-			 * Note the TransactionIdDidAbort() test is just an optimization
-			 * and not strictly necessary for correctness.
+			 * aborted or crashed then it's okay to ignore it, otherwise not.
 			 *
 			 * As with all tuple visibility routines, it's critical to test
-			 * TransactionIdIsInProgress before the transam.c routines,
+			 * TransactionIdIsInProgress before TransactionIdDidCommit,
 			 * because of race conditions explained in detail in tqual.c.
+			 *
+			 * We normally don't see committed updating transactions earlier
+			 * than the cutoff xid, because they are removed by page pruning
+			 * before we try to freeze the page; but it can happen if the
+			 * updating transaction commits after the page is pruned but
+			 * before HeapTupleSatisfiesVacuum.
 			 */
 			if (TransactionIdIsCurrentTransactionId(xid) ||
 				TransactionIdIsInProgress(xid))
@@ -5784,46 +5792,33 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 				Assert(!TransactionIdIsValid(update_xid));
 				update_xid = xid;
 			}
-			else if (!TransactionIdDidAbort(xid))
+			else if (TransactionIdDidCommit(xid))
 			{
 				/*
-				 * Test whether to tell caller to set HEAP_XMAX_COMMITTED
-				 * while we have the Xid still in cache.  Note this can only
-				 * be done if the transaction is known not running.
+				 * The transaction committed, so we can tell caller to set
+				 * HEAP_XMAX_COMMITTED.  (We can only do this because we know
+				 * the transaction is not running.)
 				 */
-				if (TransactionIdDidCommit(xid))
-					update_committed = true;
 				Assert(!TransactionIdIsValid(update_xid));
+				update_committed = true;
 				update_xid = xid;
 			}
 
 			/*
+			 * Not in progress, not committed -- must be aborted or crashed;
+			 * we can ignore it.
+			 */
+
+			/*
 			 * If we determined that it's an Xid corresponding to an update
 			 * that must be retained, additionally add it to the list of
-			 * members of the new Multis, in case we end up using that.  (We
+			 * members of the new Multi, in case we end up using that.  (We
 			 * might still decide to use only an update Xid and not a multi,
 			 * but it's easier to maintain the list as we walk the old members
 			 * list.)
-			 *
-			 * It is possible to end up with a very old updater Xid that
-			 * crashed and thus did not mark itself as aborted in pg_clog.
-			 * That would manifest as a pre-cutoff Xid.  Make sure to ignore
-			 * it.
 			 */
 			if (TransactionIdIsValid(update_xid))
-			{
-				if (!TransactionIdPrecedes(update_xid, cutoff_xid))
-				{
-					newmembers[nnewmembers++] = members[i];
-				}
-				else
-				{
-					/* cannot have committed: would be HEAPTUPLE_DEAD */
-					Assert(!TransactionIdDidCommit(update_xid));
-					update_xid = InvalidTransactionId;
-					update_committed = false;
-				}
-			}
+				newmembers[nnewmembers++] = members[i];
 		}
 		else
 		{
@@ -5890,7 +5885,10 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
  *
  * It is assumed that the caller has checked the tuple with
  * HeapTupleSatisfiesVacuum() and determined that it is not HEAPTUPLE_DEAD
- * (else we should be removing the tuple, not freezing it).
+ * (else we should be removing the tuple, not freezing it).  However, note
+ * that we don't remove HOT tuples even if they are dead, and it'd be incorrect
+ * to freeze them (because that would make them visible), so we mark them as
+ * update-committed, and needing further freezing later on.
  *
  * NB: cutoff_xid *must* be <= the current global xmin, to ensure that any
  * XID older than it could neither be running nor seen as running by any
@@ -5995,7 +5993,18 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
 	else if (TransactionIdIsNormal(xid) &&
 			 TransactionIdPrecedes(xid, cutoff_xid))
 	{
-		freeze_xmax = true;
+		/*
+		 * Must freeze regular XIDs older than the cutoff.  We must not freeze
+		 * a HOT-updated tuple, though; doing so would bring it back to life.
+		 */
+		if (HeapTupleHeaderIsHotUpdated(tuple))
+		{
+			frz->t_infomask |= HEAP_XMAX_COMMITTED;
+			changed = true;
+			/* must not freeze */
+		}
+		else
+			freeze_xmax = true;
 	}
 
 	if (freeze_xmax)
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index 95f5952f63f..a9a19dead25 100644
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -1688,15 +1688,15 @@ lazy_record_dead_tuple(LVRelStats *vacrelstats,
 					   ItemPointer itemptr)
 {
 	/*
-	 * The array shouldn't overflow under normal behavior, but perhaps it
-	 * could if we are given a really small maintenance_work_mem. In that
-	 * case, just forget the last few tuples (we'll get 'em next time).
+	 * The array must never overflow, since we rely on all deletable tuples
+	 * being removed; inability to remove a tuple might cause an old XID to
+	 * persist beyond the freeze limit, which could be disastrous later on.
 	 */
-	if (vacrelstats->num_dead_tuples < vacrelstats->max_dead_tuples)
-	{
-		vacrelstats->dead_tuples[vacrelstats->num_dead_tuples] = *itemptr;
-		vacrelstats->num_dead_tuples++;
-	}
+	if (vacrelstats->num_dead_tuples >= vacrelstats->max_dead_tuples)
+		elog(ERROR, "dead tuple array overflow");
+
+	vacrelstats->dead_tuples[vacrelstats->num_dead_tuples] = *itemptr;
+	vacrelstats->num_dead_tuples++;
 }
 
 /*
diff --git a/src/test/isolation/expected/freeze-the-dead.out b/src/test/isolation/expected/freeze-the-dead.out
new file mode 100644
index 00000000000..dd045613f93
--- /dev/null
+++ b/src/test/isolation/expected/freeze-the-dead.out
@@ -0,0 +1,101 @@
+Parsed test spec with 2 sessions
+
+starting permutation: s1_update s1_commit s1_vacuum s2_key_share s2_commit
+step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3;
+step s1_commit: COMMIT;
+step s1_vacuum: VACUUM FREEZE tab_freeze;
+step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE;
+id             
+
+3              
+step s2_commit: COMMIT;
+
+starting permutation: s1_update s1_commit s2_key_share s1_vacuum s2_commit
+step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3;
+step s1_commit: COMMIT;
+step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE;
+id             
+
+3              
+step s1_vacuum: VACUUM FREEZE tab_freeze;
+step s2_commit: COMMIT;
+
+starting permutation: s1_update s1_commit s2_key_share s2_commit s1_vacuum
+step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3;
+step s1_commit: COMMIT;
+step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE;
+id             
+
+3              
+step s2_commit: COMMIT;
+step s1_vacuum: VACUUM FREEZE tab_freeze;
+
+starting permutation: s1_update s2_key_share s1_commit s1_vacuum s2_commit
+step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3;
+step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE;
+id             
+
+3              
+step s1_commit: COMMIT;
+step s1_vacuum: VACUUM FREEZE tab_freeze;
+step s2_commit: COMMIT;
+
+starting permutation: s1_update s2_key_share s1_commit s2_commit s1_vacuum
+step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3;
+step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE;
+id             
+
+3              
+step s1_commit: COMMIT;
+step s2_commit: COMMIT;
+step s1_vacuum: VACUUM FREEZE tab_freeze;
+
+starting permutation: s1_update s2_key_share s2_commit s1_commit s1_vacuum
+step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3;
+step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE;
+id             
+
+3              
+step s2_commit: COMMIT;
+step s1_commit: COMMIT;
+step s1_vacuum: VACUUM FREEZE tab_freeze;
+
+starting permutation: s2_key_share s1_update s1_commit s1_vacuum s2_commit
+step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE;
+id             
+
+3              
+step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3;
+step s1_commit: COMMIT;
+step s1_vacuum: VACUUM FREEZE tab_freeze;
+step s2_commit: COMMIT;
+
+starting permutation: s2_key_share s1_update s1_commit s2_commit s1_vacuum
+step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE;
+id             
+
+3              
+step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3;
+step s1_commit: COMMIT;
+step s2_commit: COMMIT;
+step s1_vacuum: VACUUM FREEZE tab_freeze;
+
+starting permutation: s2_key_share s1_update s2_commit s1_commit s1_vacuum
+step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE;
+id             
+
+3              
+step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3;
+step s2_commit: COMMIT;
+step s1_commit: COMMIT;
+step s1_vacuum: VACUUM FREEZE tab_freeze;
+
+starting permutation: s2_key_share s2_commit s1_update s1_commit s1_vacuum
+step s2_key_share: SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE;
+id             
+
+3              
+step s2_commit: COMMIT;
+step s1_update: UPDATE tab_freeze SET x = x + 1 WHERE id = 3;
+step s1_commit: COMMIT;
+step s1_vacuum: VACUUM FREEZE tab_freeze;
diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule
index 50ddb43a169..3b4fd533b77 100644
--- a/src/test/isolation/isolation_schedule
+++ b/src/test/isolation/isolation_schedule
@@ -30,6 +30,7 @@ test: lock-committed-keyupdate
 test: update-locked-tuple
 test: propagate-lock-delete
 test: tuplelock-conflict
+test: freeze-the-dead
 test: nowait
 test: nowait-2
 test: nowait-3
diff --git a/src/test/isolation/specs/freeze-the-dead.spec b/src/test/isolation/specs/freeze-the-dead.spec
new file mode 100644
index 00000000000..3cd9965b2fa
--- /dev/null
+++ b/src/test/isolation/specs/freeze-the-dead.spec
@@ -0,0 +1,27 @@
+# Test for interactions of tuple freezing with dead, as well as recently-dead
+# tuples using multixacts via FOR KEY SHARE.
+setup
+{
+  CREATE TABLE tab_freeze (
+    id int PRIMARY KEY,
+    name char(3),
+    x int);
+  INSERT INTO tab_freeze VALUES (1, '111', 0);
+  INSERT INTO tab_freeze VALUES (3, '333', 0);
+}
+
+teardown
+{
+  DROP TABLE tab_freeze;
+}
+
+session "s1"
+setup				{ BEGIN; }
+step "s1_update"	{ UPDATE tab_freeze SET x = x + 1 WHERE id = 3; }
+step "s1_commit"	{ COMMIT; }
+step "s1_vacuum"	{ VACUUM FREEZE tab_freeze; }
+
+session "s2"
+setup				{ BEGIN; }
+step "s2_key_share"	{ SELECT id FROM tab_freeze WHERE id = 3 FOR KEY SHARE; }
+step "s2_commit"	{ COMMIT; }