summaryrefslogtreecommitdiff
path: root/src/backend/access
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2005-08-26 20:07:17 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2005-08-26 20:07:17 +0000
commit26f1202ca318459753a39b2ced5cb6ea9cd8ab8d (patch)
tree7fc767c2ad2f343dbbbe7c3e8feae7d97d388cab /src/backend/access
parentd5bd53c2c5aea50d0a9c5a445da9da9c34990bd8 (diff)
Back-patch fixes for problems with VACUUM destroying t_ctid chains too soon,
and with insufficient paranoia in code that follows t_ctid links. This patch covers the 7.3 branch.
Diffstat (limited to 'src/backend/access')
-rw-r--r--src/backend/access/heap/heapam.c233
1 files changed, 161 insertions, 72 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index b4b20c679b3..f12a76f4977 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.149.2.1 2004/10/13 22:22:21 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.149.2.2 2005/08/26 20:07:15 tgl Exp $
*
*
* INTERFACE ROUTINES
@@ -1014,89 +1014,136 @@ heap_fetch(Relation relation,
/*
* heap_get_latest_tid - get the latest tid of a specified tuple
+ *
+ * Actually, this gets the latest version that is visible according to
+ * the passed snapshot. You can pass SnapshotDirty to get the very latest,
+ * possibly uncommitted version.
+ *
+ * *tid is both an input and an output parameter: it is updated to
+ * show the latest version of the row. Note that it will not be changed
+ * if no version of the row passes the snapshot test.
*/
-ItemPointer
+void
heap_get_latest_tid(Relation relation,
Snapshot snapshot,
ItemPointer tid)
{
- ItemId lp = NULL;
- Buffer buffer;
- PageHeader dp;
- OffsetNumber offnum;
- HeapTupleData tp;
- HeapTupleHeader t_data;
+ BlockNumber blk;
ItemPointerData ctid;
- bool invalidBlock,
- linkend,
- valid;
+ TransactionId priorXmax;
+
+ /* this is to avoid Assert failures on bad input */
+ if (!ItemPointerIsValid(tid))
+ return;
/*
- * get the buffer from the relation descriptor Note that this does a
- * buffer pin.
+ * Since this can be called with user-supplied TID, don't trust the
+ * input too much. (RelationGetNumberOfBlocks is an expensive check,
+ * so we don't check t_ctid links again this way. Note that it would
+ * not do to call it just once and save the result, either.)
*/
-
- buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
-
- if (!BufferIsValid(buffer))
- elog(ERROR, "heap_get_latest_tid: %s relation: ReadBuffer(%lx) failed",
- RelationGetRelationName(relation), (long) tid);
-
- LockBuffer(buffer, BUFFER_LOCK_SHARE);
+ blk = ItemPointerGetBlockNumber(tid);
+ if (blk >= RelationGetNumberOfBlocks(relation))
+ elog(ERROR, "block number %u is out of range for relation \"%s\"",
+ blk, RelationGetRelationName(relation));
/*
- * get the item line pointer corresponding to the requested tid
+ * Loop to chase down t_ctid links. At top of loop, ctid is the
+ * tuple we need to examine, and *tid is the TID we will return if
+ * ctid turns out to be bogus.
+ *
+ * Note that we will loop until we reach the end of the t_ctid chain.
+ * Depending on the snapshot passed, there might be at most one visible
+ * version of the row, but we don't try to optimize for that.
*/
- dp = (PageHeader) BufferGetPage(buffer);
- offnum = ItemPointerGetOffsetNumber(tid);
- invalidBlock = true;
- if (!PageIsNew(dp))
- {
- lp = PageGetItemId(dp, offnum);
- if (ItemIdIsUsed(lp))
- invalidBlock = false;
- }
- if (invalidBlock)
+ ctid = *tid;
+ priorXmax = InvalidTransactionId; /* cannot check first XMIN */
+ for (;;)
{
- LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
- ReleaseBuffer(buffer);
- return NULL;
- }
+ Buffer buffer;
+ PageHeader dp;
+ OffsetNumber offnum;
+ ItemId lp;
+ HeapTupleData tp;
+ bool valid;
- /*
- * more sanity checks
- */
+ /*
+ * Read, pin, and lock the page.
+ */
+ buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid));
- tp.t_datamcxt = NULL;
- t_data = tp.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
- tp.t_len = ItemIdGetLength(lp);
- tp.t_self = *tid;
- ctid = tp.t_data->t_ctid;
+ if (!BufferIsValid(buffer))
+ elog(ERROR, "ReadBuffer(\"%s\", %lu) failed",
+ RelationGetRelationName(relation),
+ (unsigned long) ItemPointerGetBlockNumber(&ctid));
- /*
- * check time qualification of tid
- */
+ LockBuffer(buffer, BUFFER_LOCK_SHARE);
+ dp = (PageHeader) BufferGetPage(buffer);
- HeapTupleSatisfies(&tp, relation, buffer, dp,
- snapshot, 0, (ScanKey) NULL, valid);
+ /*
+ * Check for bogus item number. This is not treated as an error
+ * condition because it can happen while following a t_ctid link.
+ * We just assume that the prior tid is OK and return it unchanged.
+ */
+ offnum = ItemPointerGetOffsetNumber(&ctid);
+ if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(dp))
+ {
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+ ReleaseBuffer(buffer);
+ break;
+ }
+ lp = PageGetItemId(dp, offnum);
+ if (!ItemIdIsUsed(lp))
+ {
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+ ReleaseBuffer(buffer);
+ break;
+ }
- linkend = true;
- if ((t_data->t_infomask & HEAP_XMIN_COMMITTED) != 0 &&
- !ItemPointerEquals(tid, &ctid))
- linkend = false;
+ /* OK to access the tuple */
+ tp.t_self = ctid;
+ tp.t_datamcxt = NULL;
+ tp.t_data = (HeapTupleHeader) PageGetItem(dp, lp);
+ tp.t_len = ItemIdGetLength(lp);
- LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
- ReleaseBuffer(buffer);
+ /*
+ * After following a t_ctid link, we might arrive at an unrelated
+ * tuple. Check for XMIN match.
+ */
+ if (TransactionIdIsValid(priorXmax) &&
+ !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(tp.t_data)))
+ {
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+ ReleaseBuffer(buffer);
+ break;
+ }
- if (!valid)
- {
- if (linkend)
- return NULL;
- heap_get_latest_tid(relation, snapshot, &ctid);
- *tid = ctid;
- }
+ /*
+ * Check time qualification of tuple; if visible, set it as the new
+ * result candidate.
+ */
+ HeapTupleSatisfies(&tp, relation, buffer, dp,
+ snapshot, 0, NULL, valid);
+ if (valid)
+ *tid = ctid;
+
+ /*
+ * If there's a valid t_ctid link, follow it, else we're done.
+ */
+ if ((tp.t_data->t_infomask & (HEAP_XMAX_INVALID |
+ HEAP_MARKED_FOR_UPDATE)) ||
+ ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
+ {
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+ ReleaseBuffer(buffer);
+ break;
+ }
- return tid;
+ ctid = tp.t_data->t_ctid;
+ priorXmax = HeapTupleHeaderGetXmax(tp.t_data);
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+ ReleaseBuffer(buffer);
+ } /* end of loop */
}
/*
@@ -1264,7 +1311,8 @@ simple_heap_insert(Relation relation, HeapTuple tup)
*/
int
heap_delete(Relation relation, ItemPointer tid,
- ItemPointer ctid, CommandId cid)
+ ItemPointer ctid, TransactionId *update_xmax,
+ CommandId cid)
{
ItemId lp;
HeapTupleData tp;
@@ -1288,11 +1336,11 @@ heap_delete(Relation relation, ItemPointer tid,
dp = (PageHeader) BufferGetPage(buffer);
lp = PageGetItemId(dp, ItemPointerGetOffsetNumber(tid));
+
tp.t_datamcxt = NULL;
- tp.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
+ tp.t_data = (HeapTupleHeader) PageGetItem(dp, lp);
tp.t_len = ItemIdGetLength(lp);
tp.t_self = *tid;
- tp.t_tableOid = relation->rd_id;
l1:
sv_infomask = tp.t_data->t_infomask;
@@ -1339,7 +1387,9 @@ l1:
if (result != HeapTupleMayBeUpdated)
{
Assert(result == HeapTupleSelfUpdated || result == HeapTupleUpdated);
+ Assert(!(tp.t_data->t_infomask & HEAP_XMAX_INVALID));
*ctid = tp.t_data->t_ctid;
+ *update_xmax = HeapTupleHeaderGetXmax(tp.t_data);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
return result;
@@ -1429,10 +1479,13 @@ l1:
void
simple_heap_delete(Relation relation, ItemPointer tid)
{
- ItemPointerData ctid;
int result;
+ ItemPointerData update_ctid;
+ TransactionId update_xmax;
- result = heap_delete(relation, tid, &ctid, GetCurrentCommandId());
+ result = heap_delete(relation, tid,
+ &update_ctid, &update_xmax,
+ GetCurrentCommandId());
switch (result)
{
case HeapTupleSelfUpdated:
@@ -1462,7 +1515,8 @@ simple_heap_delete(Relation relation, ItemPointer tid)
*/
int
heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
- ItemPointer ctid, CommandId cid)
+ ItemPointer ctid, TransactionId *update_xmax,
+ CommandId cid)
{
ItemId lp;
HeapTupleData oldtup;
@@ -1547,7 +1601,9 @@ l2:
if (result != HeapTupleMayBeUpdated)
{
Assert(result == HeapTupleSelfUpdated || result == HeapTupleUpdated);
+ Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID));
*ctid = oldtup.t_data->t_ctid;
+ *update_xmax = HeapTupleHeaderGetXmax(oldtup.t_data);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
return result;
@@ -1767,10 +1823,13 @@ l2:
void
simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup)
{
- ItemPointerData ctid;
int result;
+ ItemPointerData update_ctid;
+ TransactionId update_xmax;
- result = heap_update(relation, otid, tup, &ctid, GetCurrentCommandId());
+ result = heap_update(relation, otid, tup,
+ &update_ctid, &update_xmax,
+ GetCurrentCommandId());
switch (result)
{
case HeapTupleSelfUpdated:
@@ -1794,9 +1853,34 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup)
/*
* heap_mark4update - mark a tuple for update
+ *
+ * Note that this acquires a buffer pin, which the caller must release.
+ *
+ * Input parameters:
+ * relation: relation containing tuple (caller must hold suitable lock)
+ * tuple->t_self: TID of tuple to lock (rest of struct need not be valid)
+ * cid: current command ID (used for visibility test, and stored into
+ * tuple's cmax if lock is successful)
+ *
+ * Output parameters:
+ * *tuple: all fields filled in
+ * *buffer: set to buffer holding tuple (pinned but not locked at exit)
+ * *ctid: set to tuple's t_ctid, but only in failure cases
+ * *update_xmax: set to tuple's xmax, but only in failure cases
+ *
+ * Function result may be:
+ * HeapTupleMayBeUpdated: lock was successfully acquired
+ * HeapTupleSelfUpdated: lock failed because tuple updated by self
+ * HeapTupleUpdated: lock failed because tuple updated by other xact
+ *
+ * In the failure cases, the routine returns the tuple's t_ctid and t_xmax.
+ * If t_ctid is the same as t_self, the tuple was deleted; if different, the
+ * tuple was updated, and t_ctid is the location of the replacement tuple.
+ * (t_xmax is needed to verify that the replacement tuple matches.)
*/
int
heap_mark4update(Relation relation, HeapTuple tuple, Buffer *buffer,
+ ItemPointer ctid, TransactionId *update_xmax,
CommandId cid)
{
ItemPointer tid = &(tuple->t_self);
@@ -1818,9 +1902,12 @@ heap_mark4update(Relation relation, HeapTuple tuple, Buffer *buffer,
dp = (PageHeader) BufferGetPage(*buffer);
lp = PageGetItemId(dp, ItemPointerGetOffsetNumber(tid));
+ Assert(ItemIdIsUsed(lp));
+
tuple->t_datamcxt = NULL;
tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
tuple->t_len = ItemIdGetLength(lp);
+ tuple->t_tableOid = RelationGetRelid(relation);
l3:
sv_infomask = tuple->t_data->t_infomask;
@@ -1867,7 +1954,9 @@ l3:
if (result != HeapTupleMayBeUpdated)
{
Assert(result == HeapTupleSelfUpdated || result == HeapTupleUpdated);
- tuple->t_self = tuple->t_data->t_ctid;
+ Assert(!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
+ *ctid = tuple->t_data->t_ctid;
+ *update_xmax = HeapTupleHeaderGetXmax(tuple->t_data);
LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
return result;
}