summaryrefslogtreecommitdiff
path: root/src/backend/access/gin/gindatapage.c
diff options
context:
space:
mode:
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>2014-11-20 17:56:26 +0200
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>2014-11-20 18:46:41 +0200
commit2c03216d831160bedd72d45f712601b6f7d03f1c (patch)
treeab6a03d031ffa605d848b0b7067add15e56e2207 /src/backend/access/gin/gindatapage.c
parent8dc626defec23016dd5988208d8704b858b9d21d (diff)
Revamp the WAL record format.
Each WAL record now carries information about the modified relation and block(s) in a standardized format. That makes it easier to write tools that need that information, like pg_rewind, prefetching the blocks to speed up recovery, etc. There's a whole new API for building WAL records, replacing the XLogRecData chains used previously. The new API consists of XLogRegister* functions, which are called for each buffer and chunk of data that is added to the record. The new API also gives more control over when a full-page image is written, by passing flags to the XLogRegisterBuffer function. This also simplifies the XLogReadBufferForRedo() calls. The function can dig the relation and block number from the WAL record, so they no longer need to be passed as arguments. For the convenience of redo routines, XLogReader now disects each WAL record after reading it, copying the main data part and the per-block data into MAXALIGNed buffers. The data chunks are not aligned within the WAL record, but the redo routines can assume that the pointers returned by XLogRecGet* functions are. Redo routines are now passed the XLogReaderState, which contains the record in the already-disected format, instead of the plain XLogRecord. The new record format also makes the fixed size XLogRecord header smaller, by removing the xl_len field. The length of the "main data" portion is now stored at the end of the WAL record, and there's a separate header after XLogRecord for it. The alignment padding at the end of XLogRecord is also removed. This compansates for the fact that the new format would otherwise be more bulky than the old format. Reviewed by Andres Freund, Amit Kapila, Michael Paquier, Alvaro Herrera, Fujii Masao.
Diffstat (limited to 'src/backend/access/gin/gindatapage.c')
-rw-r--r--src/backend/access/gin/gindatapage.c162
1 files changed, 46 insertions, 116 deletions
diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c
index 97cd706c08e..012225eaa35 100644
--- a/src/backend/access/gin/gindatapage.c
+++ b/src/backend/access/gin/gindatapage.c
@@ -98,20 +98,19 @@ static ItemPointer dataLeafPageGetUncompressed(Page page, int *nitems);
static void dataSplitPageInternal(GinBtree btree, Buffer origbuf,
GinBtreeStack *stack,
void *insertdata, BlockNumber updateblkno,
- XLogRecData **prdata, Page *newlpage, Page *newrpage);
+ Page *newlpage, Page *newrpage);
static disassembledLeaf *disassembleLeaf(Page page);
static bool leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining);
static bool addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems,
int nNewItems);
-static XLogRecData *constructLeafRecompressWALData(Buffer buf,
- disassembledLeaf *leaf);
+static void registerLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf);
static void dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf);
static void dataPlaceToPageLeafSplit(Buffer buf,
disassembledLeaf *leaf,
ItemPointerData lbound, ItemPointerData rbound,
- XLogRecData **prdata, Page lpage, Page rpage);
+ Page lpage, Page rpage);
/*
* Read TIDs from leaf data page to single uncompressed array. The TIDs are
@@ -428,8 +427,7 @@ GinPageDeletePostingItem(Page page, OffsetNumber offset)
*/
static GinPlaceToPageRC
dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
- void *insertdata, XLogRecData **prdata,
- Page *newlpage, Page *newrpage)
+ void *insertdata, Page *newlpage, Page *newrpage)
{
GinBtreeDataLeafInsertData *items = insertdata;
ItemPointer newItems = &items->items[items->curitem];
@@ -602,9 +600,7 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
*/
MemoryContextSwitchTo(oldCxt);
if (RelationNeedsWAL(btree->index))
- *prdata = constructLeafRecompressWALData(buf, leaf);
- else
- *prdata = NULL;
+ registerLeafRecompressWALData(buf, leaf);
START_CRIT_SECTION();
dataPlaceToPageLeafRecompress(buf, leaf);
@@ -685,7 +681,7 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
*newrpage = MemoryContextAlloc(oldCxt, BLCKSZ);
dataPlaceToPageLeafSplit(buf, leaf, lbound, rbound,
- prdata, *newlpage, *newrpage);
+ *newlpage, *newrpage);
Assert(GinPageRightMost(page) ||
ginCompareItemPointers(GinDataPageGetRightBound(*newlpage),
@@ -791,7 +787,6 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
*/
if (removedsomething)
{
- XLogRecData *payloadrdata = NULL;
bool modified;
/*
@@ -818,7 +813,10 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
}
if (RelationNeedsWAL(indexrel))
- payloadrdata = constructLeafRecompressWALData(buffer, leaf);
+ {
+ XLogBeginInsert();
+ registerLeafRecompressWALData(buffer, leaf);
+ }
START_CRIT_SECTION();
dataPlaceToPageLeafRecompress(buffer, leaf);
@@ -827,18 +825,8 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
if (RelationNeedsWAL(indexrel))
{
XLogRecPtr recptr;
- XLogRecData rdata;
- ginxlogVacuumDataLeafPage xlrec;
- xlrec.node = indexrel->rd_node;
- xlrec.blkno = BufferGetBlockNumber(buffer);
-
- rdata.buffer = InvalidBuffer;
- rdata.data = (char *) &xlrec;
- rdata.len = offsetof(ginxlogVacuumDataLeafPage, data);
- rdata.next = payloadrdata;
-
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE, &rdata);
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE);
PageSetLSN(page, recptr);
}
@@ -850,13 +838,12 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
* Construct a ginxlogRecompressDataLeaf record representing the changes
* in *leaf.
*/
-static XLogRecData *
-constructLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
+static void
+registerLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
{
int nmodified = 0;
char *walbufbegin;
char *walbufend;
- XLogRecData *rdata;
dlist_iter iter;
int segno;
ginxlogRecompressDataLeaf *recompress_xlog;
@@ -871,12 +858,11 @@ constructLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
nmodified++;
}
- walbufbegin = palloc(
- sizeof(ginxlogRecompressDataLeaf) +
- BLCKSZ + /* max size needed to hold the segment
- * data */
- nmodified * 2 + /* (segno + action) per action */
- sizeof(XLogRecData));
+ walbufbegin =
+ palloc(sizeof(ginxlogRecompressDataLeaf) +
+ BLCKSZ + /* max size needed to hold the segment data */
+ nmodified * 2 /* (segno + action) per action */
+ );
walbufend = walbufbegin;
recompress_xlog = (ginxlogRecompressDataLeaf *) walbufend;
@@ -944,14 +930,10 @@ constructLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
segno++;
}
- rdata = (XLogRecData *) MAXALIGN(walbufend);
- rdata->buffer = buf;
- rdata->buffer_std = TRUE;
- rdata->data = walbufbegin;
- rdata->len = walbufend - walbufbegin;
- rdata->next = NULL;
- return rdata;
+ XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+ XLogRegisterBufData(0, walbufbegin, walbufend - walbufbegin);
+
}
/*
@@ -1024,7 +1006,7 @@ dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf)
static void
dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
ItemPointerData lbound, ItemPointerData rbound,
- XLogRecData **prdata, Page lpage, Page rpage)
+ Page lpage, Page rpage)
{
char *ptr;
int segsize;
@@ -1034,10 +1016,6 @@ dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
dlist_node *firstright;
leafSegmentInfo *seginfo;
- /* these must be static so they can be returned to caller */
- static ginxlogSplitDataLeaf split_xlog;
- static XLogRecData rdata[3];
-
/* Initialize temporary pages to hold the new left and right pages */
GinInitPage(lpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
GinInitPage(rpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
@@ -1092,29 +1070,6 @@ dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
Assert(rsize == leaf->rsize);
GinDataPageSetDataSize(rpage, rsize);
*GinDataPageGetRightBound(rpage) = rbound;
-
- /* Create WAL record */
- split_xlog.lsize = lsize;
- split_xlog.rsize = rsize;
- split_xlog.lrightbound = lbound;
- split_xlog.rrightbound = rbound;
-
- rdata[0].buffer = InvalidBuffer;
- rdata[0].data = (char *) &split_xlog;
- rdata[0].len = sizeof(ginxlogSplitDataLeaf);
- rdata[0].next = &rdata[1];
-
- rdata[1].buffer = InvalidBuffer;
- rdata[1].data = (char *) GinDataLeafPageGetPostingList(lpage);
- rdata[1].len = lsize;
- rdata[1].next = &rdata[2];
-
- rdata[2].buffer = InvalidBuffer;
- rdata[2].data = (char *) GinDataLeafPageGetPostingList(rpage);
- rdata[2].len = rsize;
- rdata[2].next = NULL;
-
- *prdata = rdata;
}
/*
@@ -1124,29 +1079,30 @@ dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
*
* In addition to inserting the given item, the downlink of the existing item
* at 'off' is updated to point to 'updateblkno'.
+ *
+ * On INSERTED, registers the buffer as buffer ID 0, with data.
+ * On SPLIT, returns rdata that represents the split pages in *prdata.
*/
static GinPlaceToPageRC
dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
void *insertdata, BlockNumber updateblkno,
- XLogRecData **prdata, Page *newlpage, Page *newrpage)
+ Page *newlpage, Page *newrpage)
{
Page page = BufferGetPage(buf);
OffsetNumber off = stack->off;
PostingItem *pitem;
- /* these must be static so they can be returned to caller */
- static XLogRecData rdata;
+ /* this must be static so it can be returned to caller */
static ginxlogInsertDataInternal data;
/* split if we have to */
if (GinNonLeafDataPageGetFreeSpace(page) < sizeof(PostingItem))
{
dataSplitPageInternal(btree, buf, stack, insertdata, updateblkno,
- prdata, newlpage, newrpage);
+ newlpage, newrpage);
return SPLIT;
}
- *prdata = &rdata;
Assert(GinPageIsData(page));
START_CRIT_SECTION();
@@ -1159,14 +1115,15 @@ dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
pitem = (PostingItem *) insertdata;
GinDataPageAddPostingItem(page, pitem, off);
- data.offset = off;
- data.newitem = *pitem;
+ if (RelationNeedsWAL(btree->index))
+ {
+ data.offset = off;
+ data.newitem = *pitem;
- rdata.buffer = buf;
- rdata.buffer_std = TRUE;
- rdata.data = (char *) &data;
- rdata.len = sizeof(ginxlogInsertDataInternal);
- rdata.next = NULL;
+ XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+ XLogRegisterBufData(0, (char *) &data,
+ sizeof(ginxlogInsertDataInternal));
+ }
return INSERTED;
}
@@ -1178,7 +1135,6 @@ dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
static GinPlaceToPageRC
dataPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
void *insertdata, BlockNumber updateblkno,
- XLogRecData **prdata,
Page *newlpage, Page *newrpage)
{
Page page = BufferGetPage(buf);
@@ -1187,11 +1143,11 @@ dataPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
if (GinPageIsLeaf(page))
return dataPlaceToPageLeaf(btree, buf, stack, insertdata,
- prdata, newlpage, newrpage);
+ newlpage, newrpage);
else
return dataPlaceToPageInternal(btree, buf, stack,
insertdata, updateblkno,
- prdata, newlpage, newrpage);
+ newlpage, newrpage);
}
/*
@@ -1202,7 +1158,7 @@ static void
dataSplitPageInternal(GinBtree btree, Buffer origbuf,
GinBtreeStack *stack,
void *insertdata, BlockNumber updateblkno,
- XLogRecData **prdata, Page *newlpage, Page *newrpage)
+ Page *newlpage, Page *newrpage)
{
Page oldpage = BufferGetPage(origbuf);
OffsetNumber off = stack->off;
@@ -1215,19 +1171,13 @@ dataSplitPageInternal(GinBtree btree, Buffer origbuf,
Page lpage;
Page rpage;
OffsetNumber separator;
-
- /* these must be static so they can be returned to caller */
- static ginxlogSplitDataInternal data;
- static XLogRecData rdata[4];
- static PostingItem allitems[(BLCKSZ / sizeof(PostingItem)) + 1];
+ PostingItem allitems[(BLCKSZ / sizeof(PostingItem)) + 1];
lpage = PageGetTempPage(oldpage);
rpage = PageGetTempPage(oldpage);
GinInitPage(lpage, GinPageGetOpaque(oldpage)->flags, pageSize);
GinInitPage(rpage, GinPageGetOpaque(oldpage)->flags, pageSize);
- *prdata = rdata;
-
/*
* First construct a new list of PostingItems, which includes all the old
* items, and the new item.
@@ -1277,20 +1227,6 @@ dataSplitPageInternal(GinBtree btree, Buffer origbuf,
/* set up right bound for right page */
*GinDataPageGetRightBound(rpage) = oldbound;
- data.separator = separator;
- data.nitem = nitems;
- data.rightbound = oldbound;
-
- rdata[0].buffer = InvalidBuffer;
- rdata[0].data = (char *) &data;
- rdata[0].len = sizeof(ginxlogSplitDataInternal);
- rdata[0].next = &rdata[1];
-
- rdata[1].buffer = InvalidBuffer;
- rdata[1].data = (char *) allitems;
- rdata[1].len = nitems * sizeof(PostingItem);
- rdata[1].next = NULL;
-
*newlpage = lpage;
*newrpage = rpage;
}
@@ -1797,24 +1733,18 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
if (RelationNeedsWAL(index))
{
XLogRecPtr recptr;
- XLogRecData rdata[2];
ginxlogCreatePostingTree data;
- data.node = index->rd_node;
- data.blkno = blkno;
data.size = rootsize;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].data = (char *) &data;
- rdata[0].len = sizeof(ginxlogCreatePostingTree);
- rdata[0].next = &rdata[1];
+ XLogBeginInsert();
+ XLogRegisterData((char *) &data, sizeof(ginxlogCreatePostingTree));
- rdata[1].buffer = InvalidBuffer;
- rdata[1].data = (char *) GinDataLeafPageGetPostingList(page);
- rdata[1].len = rootsize;
- rdata[1].next = NULL;
+ XLogRegisterData((char *) GinDataLeafPageGetPostingList(page),
+ rootsize);
+ XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE, rdata);
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE);
PageSetLSN(page, recptr);
}