summaryrefslogtreecommitdiff
path: root/src/backend/storage
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2004-06-02 17:28:18 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2004-06-02 17:28:18 +0000
commit2095206de13b3fc9643cac7eef8c0f51b56cb556 (patch)
treea5347eda127bfbb2497de1d74f26663b89f20d67 /src/backend/storage
parent4d0e47d5a9482651007f946228381d3fa0141181 (diff)
Adjust btree index build to not use shared buffers, thereby avoiding the
locking conflict against concurrent CHECKPOINT that was discussed a few weeks ago. Also, if not using WAL archiving (which is always true ATM but won't be if PITR makes it into this release), there's no need to WAL-log the index build process; it's sufficient to force-fsync the completed index before commit. This seems to gain about a factor of 2 in my tests, which is consistent with writing half as much data. I did not try it with WAL on a separate drive though --- probably the gain would be a lot less in that scenario.
Diffstat (limited to 'src/backend/storage')
-rw-r--r--src/backend/storage/smgr/md.c36
-rw-r--r--src/backend/storage/smgr/smgr.c34
2 files changed, 67 insertions, 3 deletions
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 1a0218c4e58..4f0d241215d 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.106 2004/05/31 20:31:33 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.107 2004/06/02 17:28:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -662,6 +662,40 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
}
/*
+ * mdimmedsync() -- Immediately sync a relation to stable storage.
+ */
+bool
+mdimmedsync(SMgrRelation reln)
+{
+ MdfdVec *v;
+ BlockNumber curnblk;
+
+ /*
+ * NOTE: mdnblocks makes sure we have opened all existing segments, so
+ * that fsync loop will get them all!
+ */
+ curnblk = mdnblocks(reln);
+ if (curnblk == InvalidBlockNumber)
+ return false; /* mdnblocks failed */
+
+ v = mdopen(reln, false);
+
+#ifndef LET_OS_MANAGE_FILESIZE
+ while (v != NULL)
+ {
+ if (FileSync(v->mdfd_vfd) < 0)
+ return false;
+ v = v->mdfd_chain;
+ }
+#else
+ if (FileSync(v->mdfd_vfd) < 0)
+ return false;
+#endif
+
+ return true;
+}
+
+/*
* mdsync() -- Sync previous writes to stable storage.
*
* This is only called during checkpoints, and checkpoints should only
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 5320532be4b..8977f026e4f 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.72 2004/05/31 20:31:33 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.73 2004/06/02 17:28:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -48,6 +48,7 @@ typedef struct f_smgr
BlockNumber (*smgr_nblocks) (SMgrRelation reln);
BlockNumber (*smgr_truncate) (SMgrRelation reln, BlockNumber nblocks,
bool isTemp);
+ bool (*smgr_immedsync) (SMgrRelation reln);
bool (*smgr_commit) (void); /* may be NULL */
bool (*smgr_abort) (void); /* may be NULL */
bool (*smgr_sync) (void); /* may be NULL */
@@ -57,7 +58,8 @@ typedef struct f_smgr
static const f_smgr smgrsw[] = {
/* magnetic disk */
{mdinit, NULL, mdclose, mdcreate, mdunlink, mdextend,
- mdread, mdwrite, mdnblocks, mdtruncate, NULL, NULL, mdsync
+ mdread, mdwrite, mdnblocks, mdtruncate, mdimmedsync,
+ NULL, NULL, mdsync
}
};
@@ -583,6 +585,34 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
}
/*
+ * smgrimmedsync() -- Force the specified relation to stable storage.
+ *
+ * Synchronously force all of the specified relation down to disk.
+ *
+ * This is really only useful for non-WAL-logged index building:
+ * instead of incrementally WAL-logging the index build steps,
+ * we can just write completed index pages to disk with smgrwrite
+ * or smgrextend, and then fsync the completed index file before
+ * committing the transaction. (This is sufficient for purposes of
+ * crash recovery, since it effectively duplicates forcing a checkpoint
+ * for the completed index. But it is *not* workable if one wishes
+ * to use the WAL log for PITR or replication purposes.)
+ *
+ * The preceding writes should specify isTemp = true to avoid
+ * duplicative fsyncs.
+ */
+void
+smgrimmedsync(SMgrRelation reln)
+{
+ if (! (*(smgrsw[reln->smgr_which].smgr_immedsync)) (reln))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not sync relation %u/%u: %m",
+ reln->smgr_rnode.tblNode,
+ reln->smgr_rnode.relNode)));
+}
+
+/*
* smgrDoPendingDeletes() -- Take care of relation deletes at end of xact.
*/
void