diff options
Diffstat (limited to 'src/backend/access/transam/slru.c')
-rw-r--r-- | src/backend/access/transam/slru.c | 154 |
1 files changed, 106 insertions, 48 deletions
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index fe7d759a8c1..16a78986971 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -63,22 +63,33 @@ snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg) /* - * During SimpleLruFlush(), we will usually not need to write/fsync more - * than one or two physical files, but we may need to write several pages - * per file. We can consolidate the I/O requests by leaving files open - * until control returns to SimpleLruFlush(). This data structure remembers - * which files are open. + * During SimpleLruWriteAll(), we will usually not need to write more than one + * or two physical files, but we may need to write several pages per file. We + * can consolidate the I/O requests by leaving files open until control returns + * to SimpleLruWriteAll(). This data structure remembers which files are open. */ -#define MAX_FLUSH_BUFFERS 16 +#define MAX_WRITEALL_BUFFERS 16 -typedef struct SlruFlushData +typedef struct SlruWriteAllData { int num_files; /* # files actually open */ - int fd[MAX_FLUSH_BUFFERS]; /* their FD's */ - int segno[MAX_FLUSH_BUFFERS]; /* their log seg#s */ -} SlruFlushData; + int fd[MAX_WRITEALL_BUFFERS]; /* their FD's */ + int segno[MAX_WRITEALL_BUFFERS]; /* their log seg#s */ +} SlruWriteAllData; -typedef struct SlruFlushData *SlruFlush; +typedef struct SlruWriteAllData *SlruWriteAll; + +/* + * Populate a file tag describing a segment file. We only use the segment + * number, since we can derive everything else we need by having separate + * sync handler functions for clog, multixact etc. + */ +#define INIT_SLRUFILETAG(a,xx_handler,xx_segno) \ +( \ + memset(&(a), 0, sizeof(FileTag)), \ + (a).handler = (xx_handler), \ + (a).segno = (xx_segno) \ +) /* * Macro to mark a buffer slot "most recently used". Note multiple evaluation @@ -125,10 +136,10 @@ static int slru_errno; static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno); static void SimpleLruWaitIO(SlruCtl ctl, int slotno); -static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata); +static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata); static bool SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno); static bool SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, - SlruFlush fdata); + SlruWriteAll fdata); static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid); static int SlruSelectLRUPage(SlruCtl ctl, int pageno); @@ -173,7 +184,8 @@ SimpleLruShmemSize(int nslots, int nlsns) */ void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, - LWLock *ctllock, const char *subdir, int tranche_id) + LWLock *ctllock, const char *subdir, int tranche_id, + SyncRequestHandler sync_handler) { SlruShared shared; bool found; @@ -251,7 +263,7 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, * assume caller set PagePrecedes. */ ctl->shared = shared; - ctl->do_fsync = true; /* default behavior */ + ctl->sync_handler = sync_handler; strlcpy(ctl->Dir, subdir, sizeof(ctl->Dir)); } @@ -523,7 +535,7 @@ SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid) * Control lock must be held at entry, and will be held at exit. */ static void -SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata) +SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata) { SlruShared shared = ctl->shared; int pageno = shared->page_number[slotno]; @@ -587,6 +599,10 @@ SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata) /* Now it's okay to ereport if we failed */ if (!ok) SlruReportIOError(ctl, pageno, InvalidTransactionId); + + /* If part of a checkpoint, count this as a buffer written. */ + if (fdata) + CheckpointStats.ckpt_bufs_written++; } /* @@ -730,13 +746,13 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno) * * For now, assume it's not worth keeping a file pointer open across * independent read/write operations. We do batch operations during - * SimpleLruFlush, though. + * SimpleLruWriteAll, though. * * fdata is NULL for a standalone write, pointer to open-file info during - * SimpleLruFlush. + * SimpleLruWriteAll. */ static bool -SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata) +SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruWriteAll fdata) { SlruShared shared = ctl->shared; int segno = pageno / SLRU_PAGES_PER_SEGMENT; @@ -791,7 +807,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata) } /* - * During a Flush, we may already have the desired file open. + * During a WriteAll, we may already have the desired file open. */ if (fdata) { @@ -837,7 +853,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata) if (fdata) { - if (fdata->num_files < MAX_FLUSH_BUFFERS) + if (fdata->num_files < MAX_WRITEALL_BUFFERS) { fdata->fd[fdata->num_files] = fd; fdata->segno[fdata->num_files] = segno; @@ -870,23 +886,31 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata) } pgstat_report_wait_end(); - /* - * If not part of Flush, need to fsync now. We assume this happens - * infrequently enough that it's not a performance issue. - */ - if (!fdata) + /* Queue up a sync request for the checkpointer. */ + if (ctl->sync_handler != SYNC_HANDLER_NONE) { - pgstat_report_wait_start(WAIT_EVENT_SLRU_SYNC); - if (ctl->do_fsync && pg_fsync(fd) != 0) + FileTag tag; + + INIT_SLRUFILETAG(tag, ctl->sync_handler, segno); + if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false)) { + /* No space to enqueue sync request. Do it synchronously. */ + pgstat_report_wait_start(WAIT_EVENT_SLRU_SYNC); + if (pg_fsync(fd) != 0) + { + pgstat_report_wait_end(); + slru_errcause = SLRU_FSYNC_FAILED; + slru_errno = errno; + CloseTransientFile(fd); + return false; + } pgstat_report_wait_end(); - slru_errcause = SLRU_FSYNC_FAILED; - slru_errno = errno; - CloseTransientFile(fd); - return false; } - pgstat_report_wait_end(); + } + /* Close file, unless part of flush request. */ + if (!fdata) + { if (CloseTransientFile(fd) != 0) { slru_errcause = SLRU_CLOSE_FAILED; @@ -1122,13 +1146,16 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno) } /* - * Flush dirty pages to disk during checkpoint or database shutdown + * Write dirty pages to disk during checkpoint or database shutdown. Flushing + * is deferred until the next call to ProcessSyncRequests(), though we do fsync + * the containing directory here to make sure that newly created directory + * entries are on disk. */ void -SimpleLruFlush(SlruCtl ctl, bool allow_redirtied) +SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied) { SlruShared shared = ctl->shared; - SlruFlushData fdata; + SlruWriteAllData fdata; int slotno; int pageno = 0; int i; @@ -1162,21 +1189,11 @@ SimpleLruFlush(SlruCtl ctl, bool allow_redirtied) LWLockRelease(shared->ControlLock); /* - * Now fsync and close any files that were open + * Now close any files that were open */ ok = true; for (i = 0; i < fdata.num_files; i++) { - pgstat_report_wait_start(WAIT_EVENT_SLRU_FLUSH_SYNC); - if (ctl->do_fsync && pg_fsync(fdata.fd[i]) != 0) - { - slru_errcause = SLRU_FSYNC_FAILED; - slru_errno = errno; - pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT; - ok = false; - } - pgstat_report_wait_end(); - if (CloseTransientFile(fdata.fd[i]) != 0) { slru_errcause = SLRU_CLOSE_FAILED; @@ -1189,7 +1206,7 @@ SimpleLruFlush(SlruCtl ctl, bool allow_redirtied) SlruReportIOError(ctl, pageno, InvalidTransactionId); /* Ensure that directory entries for new files are on disk. */ - if (ctl->do_fsync) + if (ctl->sync_handler != SYNC_HANDLER_NONE) fsync_fname(ctl->Dir, true); } @@ -1350,6 +1367,19 @@ restart: snprintf(path, MAXPGPATH, "%s/%04X", ctl->Dir, segno); ereport(DEBUG2, (errmsg("removing file \"%s\"", path))); + + /* + * Tell the checkpointer to forget any sync requests, before we unlink the + * file. + */ + if (ctl->sync_handler != SYNC_HANDLER_NONE) + { + FileTag tag; + + INIT_SLRUFILETAG(tag, ctl->sync_handler, segno); + RegisterSyncRequest(&tag, SYNC_FORGET_REQUEST, true); + } + unlink(path); LWLockRelease(shared->ControlLock); @@ -1448,3 +1478,31 @@ SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data) return retval; } + +/* + * Individual SLRUs (clog, ...) have to provide a sync.c handler function so + * that they can provide the correct "SlruCtl" (otherwise we don't know how to + * build the path), but they just forward to this common implementation that + * performs the fsync. + */ +int +SlruSyncFileTag(SlruCtl ctl, const FileTag *ftag, char *path) +{ + int fd; + int save_errno; + int result; + + SlruFileName(ctl, path, ftag->segno); + + fd = OpenTransientFile(path, O_RDWR | PG_BINARY); + if (fd < 0) + return -1; + + result = pg_fsync(fd); + save_errno = errno; + + CloseTransientFile(fd); + + errno = save_errno; + return result; +} |