diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2004-06-02 17:28:18 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2004-06-02 17:28:18 +0000 |
commit | 2095206de13b3fc9643cac7eef8c0f51b56cb556 (patch) | |
tree | a5347eda127bfbb2497de1d74f26663b89f20d67 /src/backend/storage | |
parent | 4d0e47d5a9482651007f946228381d3fa0141181 (diff) |
Adjust btree index build to not use shared buffers, thereby avoiding the
locking conflict against concurrent CHECKPOINT that was discussed a few
weeks ago. Also, if not using WAL archiving (which is always true ATM
but won't be if PITR makes it into this release), there's no need to
WAL-log the index build process; it's sufficient to force-fsync the
completed index before commit. This seems to gain about a factor of 2
in my tests, which is consistent with writing half as much data. I did
not try it with WAL on a separate drive though --- probably the gain would
be a lot less in that scenario.
Diffstat (limited to 'src/backend/storage')
-rw-r--r-- | src/backend/storage/smgr/md.c | 36 | ||||
-rw-r--r-- | src/backend/storage/smgr/smgr.c | 34 |
2 files changed, 67 insertions, 3 deletions
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 1a0218c4e58..4f0d241215d 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.106 2004/05/31 20:31:33 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.107 2004/06/02 17:28:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -662,6 +662,40 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) } /* + * mdimmedsync() -- Immediately sync a relation to stable storage. + */ +bool +mdimmedsync(SMgrRelation reln) +{ + MdfdVec *v; + BlockNumber curnblk; + + /* + * NOTE: mdnblocks makes sure we have opened all existing segments, so + * that fsync loop will get them all! + */ + curnblk = mdnblocks(reln); + if (curnblk == InvalidBlockNumber) + return false; /* mdnblocks failed */ + + v = mdopen(reln, false); + +#ifndef LET_OS_MANAGE_FILESIZE + while (v != NULL) + { + if (FileSync(v->mdfd_vfd) < 0) + return false; + v = v->mdfd_chain; + } +#else + if (FileSync(v->mdfd_vfd) < 0) + return false; +#endif + + return true; +} + +/* * mdsync() -- Sync previous writes to stable storage. * * This is only called during checkpoints, and checkpoints should only diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 5320532be4b..8977f026e4f 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.72 2004/05/31 20:31:33 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.73 2004/06/02 17:28:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -48,6 +48,7 @@ typedef struct f_smgr BlockNumber (*smgr_nblocks) (SMgrRelation reln); BlockNumber (*smgr_truncate) (SMgrRelation reln, BlockNumber nblocks, bool isTemp); + bool (*smgr_immedsync) (SMgrRelation reln); bool (*smgr_commit) (void); /* may be NULL */ bool (*smgr_abort) (void); /* may be NULL */ bool (*smgr_sync) (void); /* may be NULL */ @@ -57,7 +58,8 @@ typedef struct f_smgr static const f_smgr smgrsw[] = { /* magnetic disk */ {mdinit, NULL, mdclose, mdcreate, mdunlink, mdextend, - mdread, mdwrite, mdnblocks, mdtruncate, NULL, NULL, mdsync + mdread, mdwrite, mdnblocks, mdtruncate, mdimmedsync, + NULL, NULL, mdsync } }; @@ -583,6 +585,34 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) } /* + * smgrimmedsync() -- Force the specified relation to stable storage. + * + * Synchronously force all of the specified relation down to disk. + * + * This is really only useful for non-WAL-logged index building: + * instead of incrementally WAL-logging the index build steps, + * we can just write completed index pages to disk with smgrwrite + * or smgrextend, and then fsync the completed index file before + * committing the transaction. (This is sufficient for purposes of + * crash recovery, since it effectively duplicates forcing a checkpoint + * for the completed index. But it is *not* workable if one wishes + * to use the WAL log for PITR or replication purposes.) + * + * The preceding writes should specify isTemp = true to avoid + * duplicative fsyncs. + */ +void +smgrimmedsync(SMgrRelation reln) +{ + if (! (*(smgrsw[reln->smgr_which].smgr_immedsync)) (reln)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not sync relation %u/%u: %m", + reln->smgr_rnode.tblNode, + reln->smgr_rnode.relNode))); +} + +/* * smgrDoPendingDeletes() -- Take care of relation deletes at end of xact. */ void |