summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2017-04-22 18:18:25 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2017-04-22 18:18:25 -0400
commit9d5f0718d75772265b476e41f8a7e97dd8856710 (patch)
treef2e06a4ec2f4473d93006784d0ab34f8eb894fad /src
parent551cc9af57814a93f809bc907a21595c0771b1a6 (diff)
Make PostgresNode.pm check server status more carefully.
PostgresNode blithely ignored the exit status of pg_ctl, and in general made no effort to be sure that the server was running when it should be. This caused it to miss server crashes, which is a serious shortcoming in a test scaffold. Make it complain if pg_ctl fails, and modify the start and stop logic to complain if the server doesn't start, or doesn't stop, when expected. Also, have it turn off the "restart_after_crash" configuration parameter in created clusters, as bitter experience has shown that leaving that on can mask crashes too. We might at some point need variant functions that allow for, eg, server start failure to be expected. But no existing test case appears to want that, and it surely shouldn't be the default behavior. Note that this *will* break the buildfarm, as it will expose known bugs that the previous testing failed to. I'm committing it despite that, to verify that we get the expected failures in the buildfarm not just in manual testing. Back-patch into 9.6 where PostgresNode was introduced. (The 9.6 branch is not expected to show any failures.) Discussion: https://postgr.es/m/21432.1492886428@sss.pgh.pa.us
Diffstat (limited to 'src')
-rw-r--r--src/test/perl/PostgresNode.pm42
1 files changed, 26 insertions, 16 deletions
diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm
index b55f8bf57ec..20bc90b0075 100644
--- a/src/test/perl/PostgresNode.pm
+++ b/src/test/perl/PostgresNode.pm
@@ -402,6 +402,7 @@ sub init
open my $conf, ">>$pgdata/postgresql.conf";
print $conf "\n# Added by PostgresNode.pm\n";
print $conf "fsync = off\n";
+ print $conf "restart_after_crash = off\n";
print $conf "log_statement = all\n";
print $conf "port = $port\n";
@@ -636,18 +637,19 @@ sub start
my $port = $self->port;
my $pgdata = $self->data_dir;
my $name = $self->name;
+ BAIL_OUT("node \"$name\" is already running") if defined $self->{_pid};
print("### Starting node \"$name\"\n");
my $ret = TestLib::system_log('pg_ctl', '-w', '-D', $self->data_dir, '-l',
$self->logfile, 'start');
if ($ret != 0)
{
- print "# pg_ctl failed; logfile:\n";
+ print "# pg_ctl start failed; logfile:\n";
print TestLib::slurp_file($self->logfile);
- BAIL_OUT("pg_ctl failed");
+ BAIL_OUT("pg_ctl start failed");
}
- $self->_update_pid;
+ $self->_update_pid(1);
}
=pod
@@ -656,6 +658,10 @@ sub start
Stop the node using pg_ctl -m $mode and wait for it to stop.
+Note: if the node is already known stopped, this does nothing.
+However, if we think it's running and it's not, it's important for
+this to fail. Otherwise, tests might fail to detect server crashes.
+
=cut
sub stop
@@ -667,9 +673,8 @@ sub stop
$mode = 'fast' unless defined $mode;
return unless defined $self->{_pid};
print "### Stopping node \"$name\" using mode $mode\n";
- TestLib::system_log('pg_ctl', '-D', $pgdata, '-m', $mode, 'stop');
- $self->{_pid} = undef;
- $self->_update_pid;
+ TestLib::system_or_bail('pg_ctl', '-D', $pgdata, '-m', $mode, 'stop');
+ $self->_update_pid(0);
}
=pod
@@ -687,7 +692,7 @@ sub reload
my $pgdata = $self->data_dir;
my $name = $self->name;
print "### Reloading node \"$name\"\n";
- TestLib::system_log('pg_ctl', '-D', $pgdata, 'reload');
+ TestLib::system_or_bail('pg_ctl', '-D', $pgdata, 'reload');
}
=pod
@@ -706,9 +711,9 @@ sub restart
my $logfile = $self->logfile;
my $name = $self->name;
print "### Restarting node \"$name\"\n";
- TestLib::system_log('pg_ctl', '-D', $pgdata, '-w', '-l', $logfile,
- 'restart');
- $self->_update_pid;
+ TestLib::system_or_bail('pg_ctl', '-D', $pgdata, '-w', '-l', $logfile,
+ 'restart');
+ $self->_update_pid(1);
}
=pod
@@ -727,7 +732,8 @@ sub promote
my $logfile = $self->logfile;
my $name = $self->name;
print "### Promoting node \"$name\"\n";
- TestLib::system_log('pg_ctl', '-D', $pgdata, '-l', $logfile, 'promote');
+ TestLib::system_or_bail('pg_ctl', '-D', $pgdata, '-l', $logfile,
+ 'promote');
}
# Internal routine to enable streaming replication on a standby node.
@@ -805,22 +811,26 @@ archive_command = '$copy_command'
# Internal method
sub _update_pid
{
- my $self = shift;
+ my ($self, $is_running) = @_;
my $name = $self->name;
# If we can open the PID file, read its first line and that's the PID we
- # want. If the file cannot be opened, presumably the server is not
- # running; don't be noisy in that case.
- if (open my $pidfile, $self->data_dir . "/postmaster.pid")
+ # want.
+ if (open my $pidfile, '<', $self->data_dir . "/postmaster.pid")
{
chomp($self->{_pid} = <$pidfile>);
print "# Postmaster PID for node \"$name\" is $self->{_pid}\n";
close $pidfile;
+
+ # If we found a pidfile when there shouldn't be one, complain.
+ BAIL_OUT("postmaster.pid unexpectedly present") unless $is_running;
return;
}
$self->{_pid} = undef;
- print "# No postmaster PID\n";
+ print "# No postmaster PID for node \"$name\"\n";
+ # Complain if we expected to find a pidfile.
+ BAIL_OUT("postmaster.pid unexpectedly not present") if $is_running;
}
=pod