diff options
Diffstat (limited to 'src/backend/storage/smgr/md.c')
-rw-r--r-- | src/backend/storage/smgr/md.c | 52 |
1 files changed, 43 insertions, 9 deletions
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 301a24bd66c..a25d07a6f76 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -359,11 +359,10 @@ mdcreate(SMgrRelation reln, ForkNumber forkNum, bool isRedo) * During replay, we would delete the file and then recreate it, which is fine * if the contents of the file were repopulated by subsequent WAL entries. * But if we didn't WAL-log insertions, but instead relied on fsyncing the - * file after populating it (as for instance CLUSTER and CREATE INDEX do), - * the contents of the file would be lost forever. By leaving the empty file - * until after the next checkpoint, we prevent reassignment of the relfilenode - * number until it's safe, because relfilenode assignment skips over any - * existing file. + * file after populating it (as we do at wal_level=minimal), the contents of + * the file would be lost forever. By leaving the empty file until after the + * next checkpoint, we prevent reassignment of the relfilenode number until + * it's safe, because relfilenode assignment skips over any existing file. * * We do not need to go through this dance for temp relations, though, because * we never make WAL entries for temp rels, and so a temp rel poses no threat @@ -1019,12 +1018,19 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks) * mdimmedsync() -- Immediately sync a relation to stable storage. * * Note that only writes already issued are synced; this routine knows - * nothing of dirty buffers that may exist inside the buffer manager. + * nothing of dirty buffers that may exist inside the buffer manager. We + * sync active and inactive segments; smgrDoPendingSyncs() relies on this. + * Consider a relation skipping WAL. Suppose a checkpoint syncs blocks of + * some segment, then mdtruncate() renders that segment inactive. If we + * crash before the next checkpoint syncs the newly-inactive segment, that + * segment may survive recovery, reintroducing unwanted data into the table. */ void mdimmedsync(SMgrRelation reln, ForkNumber forknum) { MdfdVec *v; + BlockNumber segno = 0; + bool active = true; /* * NOTE: mdnblocks makes sure we have opened all active segments, so that @@ -1034,14 +1040,42 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum) v = mdopen(reln, forknum, EXTENSION_FAIL); + /* + * Temporarily open inactive segments, then close them after sync. There + * may be some inactive segments left opened after fsync() error, but that + * is harmless. We don't bother to clean them up and take a risk of + * further trouble. The next mdclose() will soon close them. + */ while (v != NULL) { - if (FileSync(v->mdfd_vfd) < 0) + File vfd = v->mdfd_vfd; + + if (active) + v = v->mdfd_chain; + else + { + Assert(v->mdfd_chain == NULL); + pfree(v); + v = NULL; + } + + if (FileSync(vfd) < 0) ereport(data_sync_elevel(ERROR), (errcode_for_file_access(), errmsg("could not fsync file \"%s\": %m", - FilePathName(v->mdfd_vfd)))); - v = v->mdfd_chain; + FilePathName(vfd)))); + + /* Close inactive segments immediately */ + if (!active) + FileClose(vfd); + + segno++; + + if (v == NULL) + { + v = _mdfd_openseg(reln, forknum, segno, 0); + active = false; + } } } |