summaryrefslogtreecommitdiff
path: root/src/backend/access/gin/ginvacuum.c
diff options
context:
space:
mode:
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>2014-11-20 17:56:26 +0200
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>2014-11-20 18:46:41 +0200
commit2c03216d831160bedd72d45f712601b6f7d03f1c (patch)
treeab6a03d031ffa605d848b0b7067add15e56e2207 /src/backend/access/gin/ginvacuum.c
parent8dc626defec23016dd5988208d8704b858b9d21d (diff)
Revamp the WAL record format.
Each WAL record now carries information about the modified relation and block(s) in a standardized format. That makes it easier to write tools that need that information, like pg_rewind, prefetching the blocks to speed up recovery, etc. There's a whole new API for building WAL records, replacing the XLogRecData chains used previously. The new API consists of XLogRegister* functions, which are called for each buffer and chunk of data that is added to the record. The new API also gives more control over when a full-page image is written, by passing flags to the XLogRegisterBuffer function. This also simplifies the XLogReadBufferForRedo() calls. The function can dig the relation and block number from the WAL record, so they no longer need to be passed as arguments. For the convenience of redo routines, XLogReader now disects each WAL record after reading it, copying the main data part and the per-block data into MAXALIGNed buffers. The data chunks are not aligned within the WAL record, but the redo routines can assume that the pointers returned by XLogRecGet* functions are. Redo routines are now passed the XLogReaderState, which contains the record in the already-disected format, instead of the plain XLogRecord. The new record format also makes the fixed size XLogRecord header smaller, by removing the xl_len field. The length of the "main data" portion is now stored at the end of the WAL record, and there's a separate header after XLogRecord for it. The alignment padding at the end of XLogRecord is also removed. This compansates for the fact that the new format would otherwise be more bulky than the old format. Reviewed by Andres Freund, Amit Kapila, Michael Paquier, Alvaro Herrera, Fujii Masao.
Diffstat (limited to 'src/backend/access/gin/ginvacuum.c')
-rw-r--r--src/backend/access/gin/ginvacuum.c114
1 files changed, 23 insertions, 91 deletions
diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c
index 3a61321a835..6f32600ed79 100644
--- a/src/backend/access/gin/ginvacuum.c
+++ b/src/backend/access/gin/ginvacuum.c
@@ -89,10 +89,6 @@ xlogVacuumPage(Relation index, Buffer buffer)
{
Page page = BufferGetPage(buffer);
XLogRecPtr recptr;
- XLogRecData rdata[3];
- ginxlogVacuumPage xlrec;
- uint16 lower;
- uint16 upper;
/* This is only used for entry tree leaf pages. */
Assert(!GinPageIsData(page));
@@ -101,57 +97,14 @@ xlogVacuumPage(Relation index, Buffer buffer)
if (!RelationNeedsWAL(index))
return;
- xlrec.node = index->rd_node;
- xlrec.blkno = BufferGetBlockNumber(buffer);
-
- /* Assume we can omit data between pd_lower and pd_upper */
- lower = ((PageHeader) page)->pd_lower;
- upper = ((PageHeader) page)->pd_upper;
-
- Assert(lower < BLCKSZ);
- Assert(upper < BLCKSZ);
-
- if (lower >= SizeOfPageHeaderData &&
- upper > lower &&
- upper <= BLCKSZ)
- {
- xlrec.hole_offset = lower;
- xlrec.hole_length = upper - lower;
- }
- else
- {
- /* No "hole" to compress out */
- xlrec.hole_offset = 0;
- xlrec.hole_length = 0;
- }
-
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = sizeof(ginxlogVacuumPage);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &rdata[1];
-
- if (xlrec.hole_length == 0)
- {
- rdata[1].data = (char *) page;
- rdata[1].len = BLCKSZ;
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = NULL;
- }
- else
- {
- /* must skip the hole */
- rdata[1].data = (char *) page;
- rdata[1].len = xlrec.hole_offset;
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = &rdata[2];
-
- rdata[2].data = (char *) page + (xlrec.hole_offset + xlrec.hole_length);
- rdata[2].len = BLCKSZ - (xlrec.hole_offset + xlrec.hole_length);
- rdata[2].buffer = InvalidBuffer;
- rdata[2].next = NULL;
- }
+ /*
+ * Always create a full image, we don't track the changes on the page at
+ * any more fine-grained level. This could obviously be improved...
+ */
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_PAGE, rdata);
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_PAGE);
PageSetLSN(page, recptr);
}
@@ -292,48 +245,27 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
if (RelationNeedsWAL(gvs->index))
{
XLogRecPtr recptr;
- XLogRecData rdata[4];
ginxlogDeletePage data;
- data.node = gvs->index->rd_node;
- data.blkno = deleteBlkno;
- data.parentBlkno = parentBlkno;
+ /*
+ * We can't pass REGBUF_STANDARD for the deleted page, because we
+ * didn't set pd_lower on pre-9.4 versions. The page might've been
+ * binary-upgraded from an older version, and hence not have pd_lower
+ * set correctly. Ditto for the left page, but removing the item from
+ * the parent updated its pd_lower, so we know that's OK at this
+ * point.
+ */
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, dBuffer, 0);
+ XLogRegisterBuffer(1, pBuffer, REGBUF_STANDARD);
+ XLogRegisterBuffer(2, lBuffer, 0);
+
data.parentOffset = myoff;
- data.leftBlkno = leftBlkno;
data.rightLink = GinPageGetOpaque(page)->rightlink;
- /*
- * We can't pass buffer_std = TRUE, because we didn't set pd_lower on
- * pre-9.4 versions. The page might've been binary-upgraded from an
- * older version, and hence not have pd_lower set correctly. Ditto for
- * the left page, but removing the item from the parent updated its
- * pd_lower, so we know that's OK at this point.
- */
- rdata[0].buffer = dBuffer;
- rdata[0].buffer_std = FALSE;
- rdata[0].data = NULL;
- rdata[0].len = 0;
- rdata[0].next = rdata + 1;
-
- rdata[1].buffer = pBuffer;
- rdata[1].buffer_std = TRUE;
- rdata[1].data = NULL;
- rdata[1].len = 0;
- rdata[1].next = rdata + 2;
-
- rdata[2].buffer = lBuffer;
- rdata[2].buffer_std = FALSE;
- rdata[2].data = NULL;
- rdata[2].len = 0;
- rdata[2].next = rdata + 3;
-
- rdata[3].buffer = InvalidBuffer;
- rdata[3].buffer_std = FALSE;
- rdata[3].len = sizeof(ginxlogDeletePage);
- rdata[3].data = (char *) &data;
- rdata[3].next = NULL;
-
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE, rdata);
+ XLogRegisterData((char *) &data, sizeof(ginxlogDeletePage));
+
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE);
PageSetLSN(page, recptr);
PageSetLSN(parentPage, recptr);
PageSetLSN(BufferGetPage(lBuffer), recptr);