summaryrefslogtreecommitdiff
path: root/src/backend/access/nbtree/nbtpage.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2010-08-29 19:33:43 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2010-08-29 19:33:43 +0000
commita5c025cbeb9f2364b6a5db152f220869dd9f5216 (patch)
tree1a10759bd94ef59afda6424f9221ab085c9a6052 /src/backend/access/nbtree/nbtpage.c
parente8254980dff098e171ca5a896dbac54977c88aef (diff)
Reduce PANIC to ERROR in some occasionally-reported btree failure cases.
This patch changes _bt_split() and _bt_pagedel() to throw a plain ERROR, rather than PANIC, for several cases that are reported from the field from time to time: * right sibling's left-link doesn't match; * PageAddItem failure during _bt_split(); * parent page's next child isn't right sibling during _bt_pagedel(). In addition the error messages for these cases have been made a bit more verbose, with additional values included. The original motivation for PANIC here was to capture core dumps for subsequent analysis. But with so many users whose platforms don't capture core dumps by default, or who are unprepared to analyze them anyway, it's hard to justify a forced database restart when we can fairly easily detect the problems before we've reached the critical sections where PANIC would be necessary. It is not currently known whether the reports of these messages indicate well-hidden bugs in Postgres, or are a result of storage-level malfeasance; the latter possibility suggests that we ought to try to be more robust even if there is a bug here that's ultimately found. Backpatch to 8.2. The code before that is sufficiently different that it doesn't seem worth the trouble to back-port further.
Diffstat (limited to 'src/backend/access/nbtree/nbtpage.c')
-rw-r--r--src/backend/access/nbtree/nbtpage.c54
1 files changed, 42 insertions, 12 deletions
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index 6bf5fb70588..911db47d510 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.101.2.1 2007/12/31 04:52:20 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.101.2.2 2010/08/29 19:33:43 tgl Exp $
*
* NOTES
* Postgres btree pages look like ordinary relation pages. The opaque
@@ -1027,6 +1027,13 @@ _bt_pagedel(Relation rel, Buffer buf, BTStack stack, bool vacuum_full)
*/
rightsib = opaque->btpo_next;
rbuf = _bt_getbuf(rel, rightsib, BT_WRITE);
+ page = BufferGetPage(rbuf);
+ opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+ if (opaque->btpo_prev != target)
+ elog(ERROR, "right sibling's left-link doesn't match: "
+ "block %u links to %u instead of expected %u in index \"%s\"",
+ rightsib, opaque->btpo_prev, target,
+ RelationGetRelationName(rel));
/*
* Next find and write-lock the current parent of the target page. This is
@@ -1105,6 +1112,38 @@ _bt_pagedel(Relation rel, Buffer buf, BTStack stack, bool vacuum_full)
}
/*
+ * Check that the parent-page index items we're about to delete/overwrite
+ * contain what we expect. This can fail if the index has become
+ * corrupt for some reason. We want to throw any error before entering
+ * the critical section --- otherwise it'd be a PANIC.
+ *
+ * The test on the target item is just an Assert because _bt_getstackbuf
+ * should have guaranteed it has the expected contents. The test on the
+ * next-child downlink is known to sometimes fail in the field, though.
+ */
+ page = BufferGetPage(pbuf);
+ opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+
+#ifdef USE_ASSERT_CHECKING
+ itemid = PageGetItemId(page, poffset);
+ itup = (IndexTuple) PageGetItem(page, itemid);
+ Assert(ItemPointerGetBlockNumber(&(itup->t_tid)) == target);
+#endif
+
+ if (!parent_half_dead)
+ {
+ OffsetNumber nextoffset;
+
+ nextoffset = OffsetNumberNext(poffset);
+ itemid = PageGetItemId(page, nextoffset);
+ itup = (IndexTuple) PageGetItem(page, itemid);
+ if (ItemPointerGetBlockNumber(&(itup->t_tid)) != rightsib)
+ elog(ERROR, "right sibling %u of block %u is not next child %u of block %u in index \"%s\"",
+ rightsib, target, ItemPointerGetBlockNumber(&(itup->t_tid)),
+ parent, RelationGetRelationName(rel));
+ }
+
+ /*
* Here we begin doing the deletion.
*/
@@ -1117,8 +1156,6 @@ _bt_pagedel(Relation rel, Buffer buf, BTStack stack, bool vacuum_full)
* to copy the right sibling's downlink over the target downlink, and then
* delete the following item.
*/
- page = BufferGetPage(pbuf);
- opaque = (BTPageOpaque) PageGetSpecialPointer(page);
if (parent_half_dead)
{
PageIndexTupleDelete(page, poffset);
@@ -1130,23 +1167,16 @@ _bt_pagedel(Relation rel, Buffer buf, BTStack stack, bool vacuum_full)
itemid = PageGetItemId(page, poffset);
itup = (IndexTuple) PageGetItem(page, itemid);
- Assert(ItemPointerGetBlockNumber(&(itup->t_tid)) == target);
ItemPointerSet(&(itup->t_tid), rightsib, P_HIKEY);
nextoffset = OffsetNumberNext(poffset);
- /* This part is just for double-checking */
- itemid = PageGetItemId(page, nextoffset);
- itup = (IndexTuple) PageGetItem(page, itemid);
- if (ItemPointerGetBlockNumber(&(itup->t_tid)) != rightsib)
- elog(PANIC, "right sibling %u of block %u is not next child of %u in index \"%s\"",
- rightsib, target, BufferGetBlockNumber(pbuf),
- RelationGetRelationName(rel));
PageIndexTupleDelete(page, nextoffset);
}
/*
* Update siblings' side-links. Note the target page's side-links will
- * continue to point to the siblings.
+ * continue to point to the siblings. Asserts here are just rechecking
+ * things we already verified above.
*/
if (BufferIsValid(lbuf))
{