summaryrefslogtreecommitdiff
path: root/src/backend/access/transam/slru.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/transam/slru.c')
-rw-r--r--src/backend/access/transam/slru.c154
1 files changed, 106 insertions, 48 deletions
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index fe7d759a8c1..16a78986971 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -63,22 +63,33 @@
snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg)
/*
- * During SimpleLruFlush(), we will usually not need to write/fsync more
- * than one or two physical files, but we may need to write several pages
- * per file. We can consolidate the I/O requests by leaving files open
- * until control returns to SimpleLruFlush(). This data structure remembers
- * which files are open.
+ * During SimpleLruWriteAll(), we will usually not need to write more than one
+ * or two physical files, but we may need to write several pages per file. We
+ * can consolidate the I/O requests by leaving files open until control returns
+ * to SimpleLruWriteAll(). This data structure remembers which files are open.
*/
-#define MAX_FLUSH_BUFFERS 16
+#define MAX_WRITEALL_BUFFERS 16
-typedef struct SlruFlushData
+typedef struct SlruWriteAllData
{
int num_files; /* # files actually open */
- int fd[MAX_FLUSH_BUFFERS]; /* their FD's */
- int segno[MAX_FLUSH_BUFFERS]; /* their log seg#s */
-} SlruFlushData;
+ int fd[MAX_WRITEALL_BUFFERS]; /* their FD's */
+ int segno[MAX_WRITEALL_BUFFERS]; /* their log seg#s */
+} SlruWriteAllData;
-typedef struct SlruFlushData *SlruFlush;
+typedef struct SlruWriteAllData *SlruWriteAll;
+
+/*
+ * Populate a file tag describing a segment file. We only use the segment
+ * number, since we can derive everything else we need by having separate
+ * sync handler functions for clog, multixact etc.
+ */
+#define INIT_SLRUFILETAG(a,xx_handler,xx_segno) \
+( \
+ memset(&(a), 0, sizeof(FileTag)), \
+ (a).handler = (xx_handler), \
+ (a).segno = (xx_segno) \
+)
/*
* Macro to mark a buffer slot "most recently used". Note multiple evaluation
@@ -125,10 +136,10 @@ static int slru_errno;
static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno);
static void SimpleLruWaitIO(SlruCtl ctl, int slotno);
-static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata);
+static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata);
static bool SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno);
static bool SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno,
- SlruFlush fdata);
+ SlruWriteAll fdata);
static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid);
static int SlruSelectLRUPage(SlruCtl ctl, int pageno);
@@ -173,7 +184,8 @@ SimpleLruShmemSize(int nslots, int nlsns)
*/
void
SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
- LWLock *ctllock, const char *subdir, int tranche_id)
+ LWLock *ctllock, const char *subdir, int tranche_id,
+ SyncRequestHandler sync_handler)
{
SlruShared shared;
bool found;
@@ -251,7 +263,7 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
* assume caller set PagePrecedes.
*/
ctl->shared = shared;
- ctl->do_fsync = true; /* default behavior */
+ ctl->sync_handler = sync_handler;
strlcpy(ctl->Dir, subdir, sizeof(ctl->Dir));
}
@@ -523,7 +535,7 @@ SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid)
* Control lock must be held at entry, and will be held at exit.
*/
static void
-SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
+SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata)
{
SlruShared shared = ctl->shared;
int pageno = shared->page_number[slotno];
@@ -587,6 +599,10 @@ SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata)
/* Now it's okay to ereport if we failed */
if (!ok)
SlruReportIOError(ctl, pageno, InvalidTransactionId);
+
+ /* If part of a checkpoint, count this as a buffer written. */
+ if (fdata)
+ CheckpointStats.ckpt_bufs_written++;
}
/*
@@ -730,13 +746,13 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
*
* For now, assume it's not worth keeping a file pointer open across
* independent read/write operations. We do batch operations during
- * SimpleLruFlush, though.
+ * SimpleLruWriteAll, though.
*
* fdata is NULL for a standalone write, pointer to open-file info during
- * SimpleLruFlush.
+ * SimpleLruWriteAll.
*/
static bool
-SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
+SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruWriteAll fdata)
{
SlruShared shared = ctl->shared;
int segno = pageno / SLRU_PAGES_PER_SEGMENT;
@@ -791,7 +807,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
}
/*
- * During a Flush, we may already have the desired file open.
+ * During a WriteAll, we may already have the desired file open.
*/
if (fdata)
{
@@ -837,7 +853,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
if (fdata)
{
- if (fdata->num_files < MAX_FLUSH_BUFFERS)
+ if (fdata->num_files < MAX_WRITEALL_BUFFERS)
{
fdata->fd[fdata->num_files] = fd;
fdata->segno[fdata->num_files] = segno;
@@ -870,23 +886,31 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
}
pgstat_report_wait_end();
- /*
- * If not part of Flush, need to fsync now. We assume this happens
- * infrequently enough that it's not a performance issue.
- */
- if (!fdata)
+ /* Queue up a sync request for the checkpointer. */
+ if (ctl->sync_handler != SYNC_HANDLER_NONE)
{
- pgstat_report_wait_start(WAIT_EVENT_SLRU_SYNC);
- if (ctl->do_fsync && pg_fsync(fd) != 0)
+ FileTag tag;
+
+ INIT_SLRUFILETAG(tag, ctl->sync_handler, segno);
+ if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false))
{
+ /* No space to enqueue sync request. Do it synchronously. */
+ pgstat_report_wait_start(WAIT_EVENT_SLRU_SYNC);
+ if (pg_fsync(fd) != 0)
+ {
+ pgstat_report_wait_end();
+ slru_errcause = SLRU_FSYNC_FAILED;
+ slru_errno = errno;
+ CloseTransientFile(fd);
+ return false;
+ }
pgstat_report_wait_end();
- slru_errcause = SLRU_FSYNC_FAILED;
- slru_errno = errno;
- CloseTransientFile(fd);
- return false;
}
- pgstat_report_wait_end();
+ }
+ /* Close file, unless part of flush request. */
+ if (!fdata)
+ {
if (CloseTransientFile(fd) != 0)
{
slru_errcause = SLRU_CLOSE_FAILED;
@@ -1122,13 +1146,16 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
}
/*
- * Flush dirty pages to disk during checkpoint or database shutdown
+ * Write dirty pages to disk during checkpoint or database shutdown. Flushing
+ * is deferred until the next call to ProcessSyncRequests(), though we do fsync
+ * the containing directory here to make sure that newly created directory
+ * entries are on disk.
*/
void
-SimpleLruFlush(SlruCtl ctl, bool allow_redirtied)
+SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied)
{
SlruShared shared = ctl->shared;
- SlruFlushData fdata;
+ SlruWriteAllData fdata;
int slotno;
int pageno = 0;
int i;
@@ -1162,21 +1189,11 @@ SimpleLruFlush(SlruCtl ctl, bool allow_redirtied)
LWLockRelease(shared->ControlLock);
/*
- * Now fsync and close any files that were open
+ * Now close any files that were open
*/
ok = true;
for (i = 0; i < fdata.num_files; i++)
{
- pgstat_report_wait_start(WAIT_EVENT_SLRU_FLUSH_SYNC);
- if (ctl->do_fsync && pg_fsync(fdata.fd[i]) != 0)
- {
- slru_errcause = SLRU_FSYNC_FAILED;
- slru_errno = errno;
- pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
- ok = false;
- }
- pgstat_report_wait_end();
-
if (CloseTransientFile(fdata.fd[i]) != 0)
{
slru_errcause = SLRU_CLOSE_FAILED;
@@ -1189,7 +1206,7 @@ SimpleLruFlush(SlruCtl ctl, bool allow_redirtied)
SlruReportIOError(ctl, pageno, InvalidTransactionId);
/* Ensure that directory entries for new files are on disk. */
- if (ctl->do_fsync)
+ if (ctl->sync_handler != SYNC_HANDLER_NONE)
fsync_fname(ctl->Dir, true);
}
@@ -1350,6 +1367,19 @@ restart:
snprintf(path, MAXPGPATH, "%s/%04X", ctl->Dir, segno);
ereport(DEBUG2,
(errmsg("removing file \"%s\"", path)));
+
+ /*
+ * Tell the checkpointer to forget any sync requests, before we unlink the
+ * file.
+ */
+ if (ctl->sync_handler != SYNC_HANDLER_NONE)
+ {
+ FileTag tag;
+
+ INIT_SLRUFILETAG(tag, ctl->sync_handler, segno);
+ RegisterSyncRequest(&tag, SYNC_FORGET_REQUEST, true);
+ }
+
unlink(path);
LWLockRelease(shared->ControlLock);
@@ -1448,3 +1478,31 @@ SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
return retval;
}
+
+/*
+ * Individual SLRUs (clog, ...) have to provide a sync.c handler function so
+ * that they can provide the correct "SlruCtl" (otherwise we don't know how to
+ * build the path), but they just forward to this common implementation that
+ * performs the fsync.
+ */
+int
+SlruSyncFileTag(SlruCtl ctl, const FileTag *ftag, char *path)
+{
+ int fd;
+ int save_errno;
+ int result;
+
+ SlruFileName(ctl, path, ftag->segno);
+
+ fd = OpenTransientFile(path, O_RDWR | PG_BINARY);
+ if (fd < 0)
+ return -1;
+
+ result = pg_fsync(fd);
+ save_errno = errno;
+
+ CloseTransientFile(fd);
+
+ errno = save_errno;
+ return result;
+}