summaryrefslogtreecommitdiff
path: root/src/include/access/gin_private.h
diff options
context:
space:
mode:
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>2014-11-20 17:56:26 +0200
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>2014-11-20 18:46:41 +0200
commit2c03216d831160bedd72d45f712601b6f7d03f1c (patch)
treeab6a03d031ffa605d848b0b7067add15e56e2207 /src/include/access/gin_private.h
parent8dc626defec23016dd5988208d8704b858b9d21d (diff)
Revamp the WAL record format.
Each WAL record now carries information about the modified relation and block(s) in a standardized format. That makes it easier to write tools that need that information, like pg_rewind, prefetching the blocks to speed up recovery, etc. There's a whole new API for building WAL records, replacing the XLogRecData chains used previously. The new API consists of XLogRegister* functions, which are called for each buffer and chunk of data that is added to the record. The new API also gives more control over when a full-page image is written, by passing flags to the XLogRegisterBuffer function. This also simplifies the XLogReadBufferForRedo() calls. The function can dig the relation and block number from the WAL record, so they no longer need to be passed as arguments. For the convenience of redo routines, XLogReader now disects each WAL record after reading it, copying the main data part and the per-block data into MAXALIGNed buffers. The data chunks are not aligned within the WAL record, but the redo routines can assume that the pointers returned by XLogRecGet* functions are. Redo routines are now passed the XLogReaderState, which contains the record in the already-disected format, instead of the plain XLogRecord. The new record format also makes the fixed size XLogRecord header smaller, by removing the xl_len field. The length of the "main data" portion is now stored at the end of the WAL record, and there's a separate header after XLogRecord for it. The alignment padding at the end of XLogRecord is also removed. This compansates for the fact that the new format would otherwise be more bulky than the old format. Reviewed by Andres Freund, Amit Kapila, Michael Paquier, Alvaro Herrera, Fujii Masao.
Diffstat (limited to 'src/include/access/gin_private.h')
-rw-r--r--src/include/access/gin_private.h66
1 files changed, 34 insertions, 32 deletions
diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h
index 333316d78e2..3d46f20bb83 100644
--- a/src/include/access/gin_private.h
+++ b/src/include/access/gin_private.h
@@ -13,7 +13,6 @@
#include "access/genam.h"
#include "access/gin.h"
#include "access/itup.h"
-#include "access/xloginsert.h"
#include "fmgr.h"
#include "storage/bufmgr.h"
#include "utils/rbtree.h"
@@ -397,22 +396,22 @@ typedef struct
typedef struct ginxlogCreatePostingTree
{
- RelFileNode node;
- BlockNumber blkno;
uint32 size;
/* A compressed posting list follows */
} ginxlogCreatePostingTree;
-#define XLOG_GIN_INSERT 0x20
-
/*
* The format of the insertion record varies depending on the page type.
* ginxlogInsert is the common part between all variants.
+ *
+ * Backup Blk 0: target page
+ * Backup Blk 1: left child, if this insertion finishes an incomplete split
*/
+
+#define XLOG_GIN_INSERT 0x20
+
typedef struct
{
- RelFileNode node;
- BlockNumber blkno;
uint16 flags; /* GIN_SPLIT_ISLEAF and/or GIN_SPLIT_ISDATA */
/*
@@ -477,14 +476,17 @@ typedef struct
PostingItem newitem;
} ginxlogInsertDataInternal;
-
+/*
+ * Backup Blk 0: new left page (= original page, if not root split)
+ * Backup Blk 1: new right page
+ * Backup Blk 2: original page / new root page, if root split
+ * Backup Blk 3: left child, if this insertion completes an earlier split
+ */
#define XLOG_GIN_SPLIT 0x30
typedef struct ginxlogSplit
{
RelFileNode node;
- BlockNumber lblkno;
- BlockNumber rblkno;
BlockNumber rrlink; /* right link, or root's blocknumber if root
* split */
BlockNumber leftChildBlkno; /* valid on a non-leaf split */
@@ -538,15 +540,6 @@ typedef struct
*/
#define XLOG_GIN_VACUUM_PAGE 0x40
-typedef struct ginxlogVacuumPage
-{
- RelFileNode node;
- BlockNumber blkno;
- uint16 hole_offset; /* number of bytes before "hole" */
- uint16 hole_length; /* number of bytes in "hole" */
- /* entire page contents (minus the hole) follow at end of record */
-} ginxlogVacuumPage;
-
/*
* Vacuuming posting tree leaf page is WAL-logged like recompression caused
* by insertion.
@@ -555,26 +548,28 @@ typedef struct ginxlogVacuumPage
typedef struct ginxlogVacuumDataLeafPage
{
- RelFileNode node;
- BlockNumber blkno;
-
ginxlogRecompressDataLeaf data;
} ginxlogVacuumDataLeafPage;
+/*
+ * Backup Blk 0: deleted page
+ * Backup Blk 1: parent
+ * Backup Blk 2: left sibling
+ */
#define XLOG_GIN_DELETE_PAGE 0x50
typedef struct ginxlogDeletePage
{
- RelFileNode node;
- BlockNumber blkno;
- BlockNumber parentBlkno;
OffsetNumber parentOffset;
- BlockNumber leftBlkno;
BlockNumber rightLink;
} ginxlogDeletePage;
#define XLOG_GIN_UPDATE_META_PAGE 0x60
+/*
+ * Backup Blk 0: metapage
+ * Backup Blk 1: tail page
+ */
typedef struct ginxlogUpdateMeta
{
RelFileNode node;
@@ -591,22 +586,29 @@ typedef struct ginxlogUpdateMeta
typedef struct ginxlogInsertListPage
{
- RelFileNode node;
- BlockNumber blkno;
BlockNumber rightlink;
int32 ntuples;
/* array of inserted tuples follows */
} ginxlogInsertListPage;
+/*
+ * Backup Blk 0: metapage
+ * Backup Blk 1 to (ndeleted + 1): deleted pages
+ */
+
#define XLOG_GIN_DELETE_LISTPAGE 0x80
-#define GIN_NDELETE_AT_ONCE 16
+/*
+ * The WAL record for deleting list pages must contain a block reference to
+ * all the deleted pages, so the number of pages that can be deleted in one
+ * record is limited by XLR_MAX_BLOCK_ID. (block_id 0 is used for the
+ * metapage.)
+ */
+#define GIN_NDELETE_AT_ONCE Min(16, XLR_MAX_BLOCK_ID - 1)
typedef struct ginxlogDeleteListPages
{
- RelFileNode node;
GinMetaPageData metadata;
int32 ndeleted;
- BlockNumber toDelete[GIN_NDELETE_AT_ONCE];
} ginxlogDeleteListPages;
@@ -673,7 +675,7 @@ typedef struct GinBtreeData
/* insert methods */
OffsetNumber (*findChildPtr) (GinBtree, Page, BlockNumber, OffsetNumber);
- GinPlaceToPageRC (*placeToPage) (GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, XLogRecData **, Page *, Page *);
+ GinPlaceToPageRC (*placeToPage) (GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, Page *, Page *);
void *(*prepareDownlink) (GinBtree, Buffer);
void (*fillRoot) (GinBtree, Page, BlockNumber, Page, BlockNumber, Page);