diff options
author | Michael Paquier <michael@paquier.xyz> | 2018-06-25 11:20:19 +0900 |
---|---|---|
committer | Michael Paquier <michael@paquier.xyz> | 2018-06-25 11:20:19 +0900 |
commit | 6eec6724ff4519ff67c9d402d65ad8a6c27e1d0e (patch) | |
tree | f69153be7ff7760fe74f48fab6f7c29b2d20c8b0 /src/backend/replication | |
parent | 6350dcc1f43154d404f52ab4e891ebecef0c2115 (diff) |
Address set of issues with errno handling
System calls mixed up in error code paths are causing two issues which
several code paths have not correctly handled:
1) For write() calls, sometimes the system may return less bytes than
what has been written without errno being set. Some paths were careful
enough to consider that case, and assumed that errno should be set to
ENOSPC, other calls missed that.
2) errno generated by a system call is overwritten by other system calls
which may succeed once an error code path is taken, causing what is
reported to the user to be incorrect.
This patch uses the brute-force approach of correcting all those code
paths. Some refactoring could happen in the future, but this is let as
future work, which is not targeted for back-branches anyway.
Author: Michael Paquier
Reviewed-by: Ashutosh Sharma
Discussion: https://postgr.es/m/20180622061535.GD5215@paquier.xyz
Diffstat (limited to 'src/backend/replication')
-rw-r--r-- | src/backend/replication/basebackup.c | 3 | ||||
-rw-r--r-- | src/backend/replication/logical/origin.c | 15 | ||||
-rw-r--r-- | src/backend/replication/logical/reorderbuffer.c | 4 | ||||
-rw-r--r-- | src/backend/replication/logical/snapbuild.c | 20 | ||||
-rw-r--r-- | src/backend/replication/slot.c | 7 |
5 files changed, 47 insertions, 2 deletions
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index 4a98eb9bdc0..ba4937bbd76 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -463,6 +463,8 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir) fp = AllocateFile(pathbuf, "rb"); if (fp == NULL) { + int save_errno = errno; + /* * Most likely reason for this is that the file was already * removed by a checkpoint, so check for that to get a better @@ -470,6 +472,7 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir) */ CheckXLogRemoved(segno, tli); + errno = save_errno; ereport(ERROR, (errcode_for_file_access(), errmsg("could not open file \"%s\": %m", pathbuf))); diff --git a/src/backend/replication/logical/origin.c b/src/backend/replication/logical/origin.c index 88fa35a9625..6a1d8a63d92 100644 --- a/src/backend/replication/logical/origin.c +++ b/src/backend/replication/logical/origin.c @@ -579,7 +579,12 @@ CheckPointReplicationOrigin(void) /* write magic */ if ((write(tmpfd, &magic, sizeof(magic))) != sizeof(magic)) { + int save_errno = errno; + CloseTransientFile(tmpfd); + + /* if write didn't set errno, assume problem is no disk space */ + errno = save_errno ? save_errno : ENOSPC; ereport(PANIC, (errcode_for_file_access(), errmsg("could not write to file \"%s\": %m", @@ -618,7 +623,12 @@ CheckPointReplicationOrigin(void) if ((write(tmpfd, &disk_state, sizeof(disk_state))) != sizeof(disk_state)) { + int save_errno = errno; + CloseTransientFile(tmpfd); + + /* if write didn't set errno, assume problem is no disk space */ + errno = save_errno ? save_errno : ENOSPC; ereport(PANIC, (errcode_for_file_access(), errmsg("could not write to file \"%s\": %m", @@ -634,7 +644,12 @@ CheckPointReplicationOrigin(void) FIN_CRC32C(crc); if ((write(tmpfd, &crc, sizeof(crc))) != sizeof(crc)) { + int save_errno = errno; + CloseTransientFile(tmpfd); + + /* if write didn't set errno, assume problem is no disk space */ + errno = save_errno ? save_errno : ENOSPC; ereport(PANIC, (errcode_for_file_access(), errmsg("could not write to file \"%s\": %m", diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index 9e5720756fe..4c603563466 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -2309,7 +2309,9 @@ ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn, int save_errno = errno; CloseTransientFile(fd); - errno = save_errno; + + /* if write didn't set errno, assume problem is no disk space */ + errno = save_errno ? save_errno : ENOSPC; ereport(ERROR, (errcode_for_file_access(), errmsg("could not write to data file for XID %u: %m", diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c index fba57a0470c..506e7790785 100644 --- a/src/backend/replication/logical/snapbuild.c +++ b/src/backend/replication/logical/snapbuild.c @@ -1606,7 +1606,12 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn) pgstat_report_wait_start(WAIT_EVENT_SNAPBUILD_WRITE); if ((write(fd, ondisk, needed_length)) != needed_length) { + int save_errno = errno; + CloseTransientFile(fd); + + /* if write didn't set errno, assume problem is no disk space */ + errno = save_errno ? save_errno : ENOSPC; ereport(ERROR, (errcode_for_file_access(), errmsg("could not write to file \"%s\": %m", tmppath))); @@ -1624,7 +1629,10 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn) pgstat_report_wait_start(WAIT_EVENT_SNAPBUILD_SYNC); if (pg_fsync(fd) != 0) { + int save_errno = errno; + CloseTransientFile(fd); + errno = save_errno; ereport(ERROR, (errcode_for_file_access(), errmsg("could not fsync file \"%s\": %m", tmppath))); @@ -1709,7 +1717,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn) pgstat_report_wait_end(); if (readBytes != SnapBuildOnDiskConstantSize) { + int save_errno = errno; + CloseTransientFile(fd); + errno = save_errno; ereport(ERROR, (errcode_for_file_access(), errmsg("could not read file \"%s\", read %d of %d: %m", @@ -1737,7 +1748,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn) pgstat_report_wait_end(); if (readBytes != sizeof(SnapBuild)) { + int save_errno = errno; + CloseTransientFile(fd); + errno = save_errno; ereport(ERROR, (errcode_for_file_access(), errmsg("could not read file \"%s\", read %d of %d: %m", @@ -1754,7 +1768,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn) pgstat_report_wait_end(); if (readBytes != sz) { + int save_errno = errno; + CloseTransientFile(fd); + errno = save_errno; ereport(ERROR, (errcode_for_file_access(), errmsg("could not read file \"%s\", read %d of %d: %m", @@ -1770,7 +1787,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn) pgstat_report_wait_end(); if (readBytes != sz) { + int save_errno = errno; + CloseTransientFile(fd); + errno = save_errno; ereport(ERROR, (errcode_for_file_access(), errmsg("could not read file \"%s\", read %d of %d: %m", diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index a8a16f55e98..cf5baf686ee 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -1268,7 +1268,9 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel) pgstat_report_wait_end(); CloseTransientFile(fd); - errno = save_errno; + + /* if write didn't set errno, assume problem is no disk space */ + errno = save_errno ? save_errno : ENOSPC; ereport(elevel, (errcode_for_file_access(), errmsg("could not write to file \"%s\": %m", @@ -1372,7 +1374,10 @@ RestoreSlotFromDisk(const char *name) pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_RESTORE_SYNC); if (pg_fsync(fd) != 0) { + int save_errno = errno; + CloseTransientFile(fd); + errno = save_errno; ereport(PANIC, (errcode_for_file_access(), errmsg("could not fsync file \"%s\": %m", |