summaryrefslogtreecommitdiff
path: root/src/bin/pg_dump/pg_backup_db.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2016-06-02 13:27:53 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2016-06-02 13:28:17 -0400
commite652273e073566b67c2fd36a5754b3fad2bc0291 (patch)
tree6b137cd6f7ad4b2e31b5b56210ca2a4862da2496 /src/bin/pg_dump/pg_backup_db.c
parent7392eed7c2a327eb1b496f30a64be33404bcf82e (diff)
Redesign handling of SIGTERM/control-C in parallel pg_dump/pg_restore.
Formerly, Unix builds of pg_dump/pg_restore would trap SIGINT and similar signals and set a flag that was tested in various data-transfer loops. This was prone to errors of omission (cf commit 3c8aa6654); and even if the client-side response was prompt, we did nothing that would cause long-running SQL commands (e.g. CREATE INDEX) to terminate early. Also, the master process would effectively do nothing at all upon receipt of SIGINT; the only reason it seemed to work was that in typical scenarios the signal would also be delivered to the child processes. We should support termination when a signal is delivered only to the master process, though. Windows builds had no console interrupt handler, so they would just fall over immediately at control-C, again leaving long-running SQL commands to finish unmolested. To fix, remove the flag-checking approach altogether. Instead, allow the Unix signal handler to send a cancel request directly and then exit(1). In the master process, also have it forward the signal to the children. On Windows, add a console interrupt handler that behaves approximately the same. The main difference is that a single execution of the Windows handler can send all the cancel requests since all the info is available in one process, whereas on Unix each process sends a cancel only for its own database connection. In passing, fix an old problem that DisconnectDatabase tends to send a cancel request before exiting a parallel worker, even if nothing went wrong. This is at least a waste of cycles, and could lead to unexpected log messages, or maybe even data loss if it happened in pg_restore (though in the current code the problem seems to affect only pg_dump). The cause was that after a COPY step, pg_dump was leaving libpq in PGASYNC_BUSY state, causing PQtransactionStatus() to report PQTRANS_ACTIVE. That's normally harmless because the next PQexec() will silently clear the PGASYNC_BUSY state; but in a parallel worker we might exit without any additional SQL commands after a COPY step. So add an extra PQgetResult() call after a COPY to allow libpq to return to PGASYNC_IDLE state. This is a bug fix, IMO, so back-patch to 9.3 where parallel dump/restore were introduced. Thanks to Kyotaro Horiguchi for Windows testing and code suggestions. Original-Patch: <7005.1464657274@sss.pgh.pa.us> Discussion: <20160602.174941.256342236.horiguchi.kyotaro@lab.ntt.co.jp>
Diffstat (limited to 'src/bin/pg_dump/pg_backup_db.c')
-rw-r--r--src/bin/pg_dump/pg_backup_db.c32
1 files changed, 25 insertions, 7 deletions
diff --git a/src/bin/pg_dump/pg_backup_db.c b/src/bin/pg_dump/pg_backup_db.c
index 818bc9efe14..352595e49fa 100644
--- a/src/bin/pg_dump/pg_backup_db.c
+++ b/src/bin/pg_dump/pg_backup_db.c
@@ -12,6 +12,7 @@
#include "postgres_fe.h"
#include "dumputils.h"
+#include "parallel.h"
#include "pg_backup_archiver.h"
#include "pg_backup_db.h"
#include "pg_backup_utils.h"
@@ -106,6 +107,9 @@ ReconnectToServer(ArchiveHandle *AH, const char *dbname, const char *username)
newConn = _connectDB(AH, newdbname, newusername);
+ /* Update ArchiveHandle's connCancel before closing old connection */
+ set_archive_cancel_info(AH, newConn);
+
PQfinish(AH->connection);
AH->connection = newConn;
@@ -327,6 +331,9 @@ ConnectDatabase(Archive *AHX,
_check_database_version(AH);
PQsetNoticeProcessor(AH->connection, notice_processor, NULL);
+
+ /* arrange for SIGINT to issue a query cancel on this connection */
+ set_archive_cancel_info(AH, AH->connection);
}
/*
@@ -337,19 +344,25 @@ void
DisconnectDatabase(Archive *AHX)
{
ArchiveHandle *AH = (ArchiveHandle *) AHX;
- PGcancel *cancel;
char errbuf[1];
if (!AH->connection)
return;
- if (PQtransactionStatus(AH->connection) == PQTRANS_ACTIVE)
+ if (AH->connCancel)
{
- if ((cancel = PQgetCancel(AH->connection)))
- {
- PQcancel(cancel, errbuf, sizeof(errbuf));
- PQfreeCancel(cancel);
- }
+ /*
+ * If we have an active query, send a cancel before closing. This is
+ * of no use for a normal exit, but might be helpful during
+ * exit_horribly().
+ */
+ if (PQtransactionStatus(AH->connection) == PQTRANS_ACTIVE)
+ PQcancel(AH->connCancel, errbuf, sizeof(errbuf));
+
+ /*
+ * Prevent signal handler from sending a cancel after this.
+ */
+ set_archive_cancel_info(AH, NULL);
}
PQfinish(AH->connection);
@@ -631,6 +644,11 @@ EndDBCopyMode(Archive *AHX, const char *tocEntryTag)
tocEntryTag, PQerrorMessage(AH->connection));
PQclear(res);
+ /* Do this to ensure we've pumped libpq back to idle state */
+ if (PQgetResult(AH->connection) != NULL)
+ write_msg(NULL, "WARNING: unexpected extra results during COPY of table \"%s\"\n",
+ tocEntryTag);
+
AH->pgCopyIn = false;
}
}