summaryrefslogtreecommitdiff
path: root/src/bin/pg_verifybackup/pg_verifybackup.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/bin/pg_verifybackup/pg_verifybackup.c')
-rw-r--r--src/bin/pg_verifybackup/pg_verifybackup.c433
1 files changed, 393 insertions, 40 deletions
diff --git a/src/bin/pg_verifybackup/pg_verifybackup.c b/src/bin/pg_verifybackup/pg_verifybackup.c
index 3fcfb167217..b8c94ada5a0 100644
--- a/src/bin/pg_verifybackup/pg_verifybackup.c
+++ b/src/bin/pg_verifybackup/pg_verifybackup.c
@@ -22,6 +22,7 @@
#include "common/parse_manifest.h"
#include "fe_utils/simple_list.h"
#include "getopt_long.h"
+#include "limits.h"
#include "pg_verifybackup.h"
#include "pgtime.h"
@@ -44,6 +45,16 @@
*/
#define READ_CHUNK_SIZE (128 * 1024)
+/*
+ * Tar file information needed for content verification.
+ */
+typedef struct tar_file
+{
+ char *relpath;
+ Oid tblspc_oid;
+ pg_compress_algorithm compress_algorithm;
+} tar_file;
+
static manifest_data *parse_manifest_file(char *manifest_path);
static void verifybackup_version_cb(JsonManifestParseContext *context,
int manifest_version);
@@ -62,12 +73,18 @@ static void report_manifest_error(JsonManifestParseContext *context,
const char *fmt,...)
pg_attribute_printf(2, 3) pg_attribute_noreturn();
-static void verify_backup_directory(verifier_context *context,
- char *relpath, char *fullpath);
-static void verify_backup_file(verifier_context *context,
- char *relpath, char *fullpath);
+static void verify_tar_backup(verifier_context *context, DIR *dir);
+static void verify_plain_backup_directory(verifier_context *context,
+ char *relpath, char *fullpath,
+ DIR *dir);
+static void verify_plain_backup_file(verifier_context *context, char *relpath,
+ char *fullpath);
static void verify_control_file(const char *controlpath,
uint64 manifest_system_identifier);
+static void precheck_tar_backup_file(verifier_context *context, char *relpath,
+ char *fullpath, SimplePtrList *tarfiles);
+static void verify_tar_file(verifier_context *context, char *relpath,
+ char *fullpath, astreamer *streamer);
static void report_extra_backup_files(verifier_context *context);
static void verify_backup_checksums(verifier_context *context);
static void verify_file_checksum(verifier_context *context,
@@ -76,6 +93,10 @@ static void verify_file_checksum(verifier_context *context,
static void parse_required_wal(verifier_context *context,
char *pg_waldump_path,
char *wal_directory);
+static astreamer *create_archive_verifier(verifier_context *context,
+ char *archive_name,
+ Oid tblspc_oid,
+ pg_compress_algorithm compress_algo);
static void progress_report(bool finished);
static void usage(void);
@@ -99,6 +120,7 @@ main(int argc, char **argv)
{"exit-on-error", no_argument, NULL, 'e'},
{"ignore", required_argument, NULL, 'i'},
{"manifest-path", required_argument, NULL, 'm'},
+ {"format", required_argument, NULL, 'F'},
{"no-parse-wal", no_argument, NULL, 'n'},
{"progress", no_argument, NULL, 'P'},
{"quiet", no_argument, NULL, 'q'},
@@ -114,6 +136,7 @@ main(int argc, char **argv)
bool quiet = false;
char *wal_directory = NULL;
char *pg_waldump_path = NULL;
+ DIR *dir;
pg_logging_init(argv[0]);
set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_verifybackup"));
@@ -156,7 +179,7 @@ main(int argc, char **argv)
simple_string_list_append(&context.ignore_list, "recovery.signal");
simple_string_list_append(&context.ignore_list, "standby.signal");
- while ((c = getopt_long(argc, argv, "ei:m:nPqsw:", long_options, NULL)) != -1)
+ while ((c = getopt_long(argc, argv, "eF:i:m:nPqsw:", long_options, NULL)) != -1)
{
switch (c)
{
@@ -175,6 +198,15 @@ main(int argc, char **argv)
manifest_path = pstrdup(optarg);
canonicalize_path(manifest_path);
break;
+ case 'F':
+ if (strcmp(optarg, "p") == 0 || strcmp(optarg, "plain") == 0)
+ context.format = 'p';
+ else if (strcmp(optarg, "t") == 0 || strcmp(optarg, "tar") == 0)
+ context.format = 't';
+ else
+ pg_fatal("invalid backup format \"%s\", must be \"plain\" or \"tar\"",
+ optarg);
+ break;
case 'n':
no_parse_wal = true;
break;
@@ -264,25 +296,75 @@ main(int argc, char **argv)
context.manifest = parse_manifest_file(manifest_path);
/*
- * Now scan the files in the backup directory. At this stage, we verify
- * that every file on disk is present in the manifest and that the sizes
- * match. We also set the "matched" flag on every manifest entry that
- * corresponds to a file on disk.
+ * If the backup directory cannot be found, treat this as a fatal error.
+ */
+ dir = opendir(context.backup_directory);
+ if (dir == NULL)
+ report_fatal_error("could not open directory \"%s\": %m",
+ context.backup_directory);
+
+ /*
+ * At this point, we know that the backup directory exists, so it's now
+ * reasonable to check for files immediately inside it. Thus, before going
+ * further, if the user did not specify the backup format, check for
+ * PG_VERSION to distinguish between tar and plain format.
*/
- verify_backup_directory(&context, NULL, context.backup_directory);
+ if (context.format == '\0')
+ {
+ struct stat sb;
+ char *path;
+
+ path = psprintf("%s/%s", context.backup_directory, "PG_VERSION");
+ if (stat(path, &sb) == 0)
+ context.format = 'p';
+ else if (errno != ENOENT)
+ {
+ pg_log_error("could not stat file \"%s\": %m", path);
+ exit(1);
+ }
+ else
+ {
+ /* No PG_VERSION, so assume tar format. */
+ context.format = 't';
+ }
+ pfree(path);
+ }
+
+ /*
+ * XXX: In the future, we should consider enhancing pg_waldump to read
+ * WAL files from an archive.
+ */
+ if (!no_parse_wal && context.format == 't')
+ {
+ pg_log_error("pg_waldump cannot read tar files");
+ pg_log_error_hint("You must use -n or --no-parse-wal when verifying a tar-format backup.");
+ exit(1);
+ }
+
+ /*
+ * Perform the appropriate type of verification appropriate based on the
+ * backup format. This will close 'dir'.
+ */
+ if (context.format == 'p')
+ verify_plain_backup_directory(&context, NULL, context.backup_directory,
+ dir);
+ else
+ verify_tar_backup(&context, dir);
/*
* The "matched" flag should now be set on every entry in the hash table.
* Any entries for which the bit is not set are files mentioned in the
- * manifest that don't exist on disk.
+ * manifest that don't exist on disk (or in the relevant tar files).
*/
report_extra_backup_files(&context);
/*
- * Now do the expensive work of verifying file checksums, unless we were
- * told to skip it.
+ * If this is a tar-format backup, checksums were already verified above;
+ * but if it's a plain-format backup, we postpone it until this point,
+ * since the earlier checks can be performed just by knowing which files
+ * are present, without needing to read all of them.
*/
- if (!context.skip_checksums)
+ if (context.format == 'p' && !context.skip_checksums)
verify_backup_checksums(&context);
/*
@@ -517,35 +599,27 @@ verifybackup_per_wal_range_cb(JsonManifestParseContext *context,
}
/*
- * Verify one directory.
+ * Verify one directory of a plain-format backup.
*
* 'relpath' is NULL if we are to verify the top-level backup directory,
* and otherwise the relative path to the directory that is to be verified.
*
* 'fullpath' is the backup directory with 'relpath' appended; i.e. the actual
* filesystem path at which it can be found.
+ *
+ * 'dir' is an open directory handle, or NULL if the caller wants us to
+ * open it. If the caller chooses to pass a handle, we'll close it when
+ * we're done with it.
*/
static void
-verify_backup_directory(verifier_context *context, char *relpath,
- char *fullpath)
+verify_plain_backup_directory(verifier_context *context, char *relpath,
+ char *fullpath, DIR *dir)
{
- DIR *dir;
struct dirent *dirent;
- dir = opendir(fullpath);
- if (dir == NULL)
+ /* Open the directory unless the caller did it. */
+ if (dir == NULL && ((dir = opendir(fullpath)) == NULL))
{
- /*
- * If even the toplevel backup directory cannot be found, treat this
- * as a fatal error.
- */
- if (relpath == NULL)
- report_fatal_error("could not open directory \"%s\": %m", fullpath);
-
- /*
- * Otherwise, treat this as a non-fatal error, but ignore any further
- * errors related to this path and anything beneath it.
- */
report_backup_error(context,
"could not open directory \"%s\": %m", fullpath);
simple_string_list_append(&context->ignore_list, relpath);
@@ -570,7 +644,7 @@ verify_backup_directory(verifier_context *context, char *relpath,
newrelpath = psprintf("%s/%s", relpath, filename);
if (!should_ignore_relpath(context, newrelpath))
- verify_backup_file(context, newrelpath, newfullpath);
+ verify_plain_backup_file(context, newrelpath, newfullpath);
pfree(newfullpath);
pfree(newrelpath);
@@ -587,11 +661,12 @@ verify_backup_directory(verifier_context *context, char *relpath,
/*
* Verify one file (which might actually be a directory or a symlink).
*
- * The arguments to this function have the same meaning as the arguments to
- * verify_backup_directory.
+ * The arguments to this function have the same meaning as the similarly named
+ * arguments to verify_plain_backup_directory.
*/
static void
-verify_backup_file(verifier_context *context, char *relpath, char *fullpath)
+verify_plain_backup_file(verifier_context *context, char *relpath,
+ char *fullpath)
{
struct stat sb;
manifest_file *m;
@@ -614,7 +689,7 @@ verify_backup_file(verifier_context *context, char *relpath, char *fullpath)
/* If it's a directory, just recurse. */
if (S_ISDIR(sb.st_mode))
{
- verify_backup_directory(context, relpath, fullpath);
+ verify_plain_backup_directory(context, relpath, fullpath, NULL);
return;
}
@@ -704,6 +779,252 @@ verify_control_file(const char *controlpath, uint64 manifest_system_identifier)
}
/*
+ * Verify tar backup.
+ *
+ * The caller should pass a handle to the target directory, which we will
+ * close when we're done with it.
+ */
+static void
+verify_tar_backup(verifier_context *context, DIR *dir)
+{
+ struct dirent *dirent;
+ SimplePtrList tarfiles = {NULL, NULL};
+ SimplePtrListCell *cell;
+
+ Assert(context->format != 'p');
+
+ progress_report(false);
+
+ /* First pass: scan the directory for tar files. */
+ while (errno = 0, (dirent = readdir(dir)) != NULL)
+ {
+ char *filename = dirent->d_name;
+
+ /* Skip "." and ".." */
+ if (filename[0] == '.' && (filename[1] == '\0'
+ || strcmp(filename, "..") == 0))
+ continue;
+
+ /*
+ * Unless it's something we should ignore, perform prechecks and add
+ * it to the list.
+ */
+ if (!should_ignore_relpath(context, filename))
+ {
+ char *fullpath;
+
+ fullpath = psprintf("%s/%s", context->backup_directory, filename);
+ precheck_tar_backup_file(context, filename, fullpath, &tarfiles);
+ pfree(fullpath);
+ }
+ }
+
+ if (closedir(dir))
+ {
+ report_backup_error(context,
+ "could not close directory \"%s\": %m",
+ context->backup_directory);
+ return;
+ }
+
+ /* Second pass: Perform the final verification of the tar contents. */
+ for (cell = tarfiles.head; cell != NULL; cell = cell->next)
+ {
+ tar_file *tar = (tar_file *) cell->ptr;
+ astreamer *streamer;
+ char *fullpath;
+
+ /*
+ * Prepares the archive streamer stack according to the tar
+ * compression format.
+ */
+ streamer = create_archive_verifier(context,
+ tar->relpath,
+ tar->tblspc_oid,
+ tar->compress_algorithm);
+
+ /* Compute the full pathname to the target file. */
+ fullpath = psprintf("%s/%s", context->backup_directory,
+ tar->relpath);
+
+ /* Invoke the streamer for reading, decompressing, and verifying. */
+ verify_tar_file(context, tar->relpath, fullpath, streamer);
+
+ /* Cleanup. */
+ pfree(tar->relpath);
+ pfree(tar);
+ pfree(fullpath);
+
+ astreamer_finalize(streamer);
+ astreamer_free(streamer);
+ }
+ simple_ptr_list_destroy(&tarfiles);
+
+ progress_report(true);
+}
+
+/*
+ * Preparatory steps for verifying files in tar format backups.
+ *
+ * Carries out basic validation of the tar format backup file, detects the
+ * compression type, and appends that information to the tarfiles list. An
+ * error will be reported if the tar file is inaccessible, or if the file type,
+ * name, or compression type is not as expected.
+ *
+ * The arguments to this function are mostly the same as the
+ * verify_plain_backup_file. The additional argument outputs a list of valid
+ * tar files.
+ */
+static void
+precheck_tar_backup_file(verifier_context *context, char *relpath,
+ char *fullpath, SimplePtrList *tarfiles)
+{
+ struct stat sb;
+ Oid tblspc_oid = InvalidOid;
+ pg_compress_algorithm compress_algorithm;
+ tar_file *tar;
+ char *suffix = NULL;
+
+ /* Should be tar format backup */
+ Assert(context->format == 't');
+
+ /* Get file information */
+ if (stat(fullpath, &sb) != 0)
+ {
+ report_backup_error(context,
+ "could not stat file or directory \"%s\": %m",
+ relpath);
+ return;
+ }
+
+ /* In a tar format backup, we expect only plain files. */
+ if (!S_ISREG(sb.st_mode))
+ {
+ report_backup_error(context,
+ "\"%s\" is not a plain file",
+ relpath);
+ return;
+ }
+
+ /*
+ * We expect tar files for backing up the main directory, tablespace, and
+ * pg_wal directory.
+ *
+ * pg_basebackup writes the main data directory to an archive file named
+ * base.tar, the pg_wal directory to pg_wal.tar, and the tablespace
+ * directory to <tablespaceoid>.tar, each followed by a compression type
+ * extension such as .gz, .lz4, or .zst.
+ */
+ if (strncmp("base", relpath, 4) == 0)
+ suffix = relpath + 4;
+ else if (strncmp("pg_wal", relpath, 6) == 0)
+ suffix = relpath + 6;
+ else
+ {
+ /* Expected a <tablespaceoid>.tar file here. */
+ uint64 num = strtoul(relpath, &suffix, 10);
+
+ /*
+ * Report an error if we didn't consume at least one character, if the
+ * result is 0, or if the value is too large to be a valid OID.
+ */
+ if (suffix == NULL || num <= 0 || num > OID_MAX)
+ report_backup_error(context,
+ "file \"%s\" is not expected in a tar format backup",
+ relpath);
+ tblspc_oid = (Oid) num;
+ }
+
+ /* Now, check the compression type of the tar */
+ if (strcmp(suffix, ".tar") == 0)
+ compress_algorithm = PG_COMPRESSION_NONE;
+ else if (strcmp(suffix, ".tgz") == 0)
+ compress_algorithm = PG_COMPRESSION_GZIP;
+ else if (strcmp(suffix, ".tar.gz") == 0)
+ compress_algorithm = PG_COMPRESSION_GZIP;
+ else if (strcmp(suffix, ".tar.lz4") == 0)
+ compress_algorithm = PG_COMPRESSION_LZ4;
+ else if (strcmp(suffix, ".tar.zst") == 0)
+ compress_algorithm = PG_COMPRESSION_ZSTD;
+ else
+ {
+ report_backup_error(context,
+ "file \"%s\" is not expected in a tar format backup",
+ relpath);
+ return;
+ }
+
+ /*
+ * Ignore WALs, as reading and verification will be handled through
+ * pg_waldump.
+ */
+ if (strncmp("pg_wal", relpath, 6) == 0)
+ return;
+
+ /*
+ * Append the information to the list for complete verification at a later
+ * stage.
+ */
+ tar = pg_malloc(sizeof(tar_file));
+ tar->relpath = pstrdup(relpath);
+ tar->tblspc_oid = tblspc_oid;
+ tar->compress_algorithm = compress_algorithm;
+
+ simple_ptr_list_append(tarfiles, tar);
+
+ /* Update statistics for progress report, if necessary */
+ if (show_progress)
+ total_size += sb.st_size;
+}
+
+/*
+ * Verification of a single tar file content.
+ *
+ * It reads a given tar archive in predefined chunks and passes it to the
+ * streamer, which initiates routines for decompression (if necessary) and then
+ * verifies each member within the tar file.
+ */
+static void
+verify_tar_file(verifier_context *context, char *relpath, char *fullpath,
+ astreamer *streamer)
+{
+ int fd;
+ int rc;
+ char *buffer;
+
+ pg_log_debug("reading \"%s\"", fullpath);
+
+ /* Open the target file. */
+ if ((fd = open(fullpath, O_RDONLY | PG_BINARY, 0)) < 0)
+ {
+ report_backup_error(context, "could not open file \"%s\": %m",
+ relpath);
+ return;
+ }
+
+ buffer = pg_malloc(READ_CHUNK_SIZE * sizeof(uint8));
+
+ /* Perform the reads */
+ while ((rc = read(fd, buffer, READ_CHUNK_SIZE)) > 0)
+ {
+ astreamer_content(streamer, NULL, buffer, rc, ASTREAMER_UNKNOWN);
+
+ /* Report progress */
+ done_size += rc;
+ progress_report(false);
+ }
+
+ if (rc < 0)
+ report_backup_error(context, "could not read file \"%s\": %m",
+ relpath);
+
+ /* Close the file. */
+ if (close(fd) != 0)
+ report_backup_error(context, "could not close file \"%s\": %m",
+ relpath);
+}
+
+/*
* Scan the hash table for entries where the 'matched' flag is not set; report
* that such files are present in the manifest but not on disk.
*/
@@ -830,10 +1151,10 @@ verify_file_checksum(verifier_context *context, manifest_file *m,
/*
* Double-check that we read the expected number of bytes from the file.
- * Normally, a file size mismatch would be caught in verify_backup_file
- * and this check would never be reached, but this provides additional
- * safety and clarity in the event of concurrent modifications or
- * filesystem misbehavior.
+ * Normally, mismatches would be caught in verify_plain_backup_file and
+ * this check would never be reached, but this provides additional safety
+ * and clarity in the event of concurrent modifications or filesystem
+ * misbehavior.
*/
if (bytes_read != m->size)
{
@@ -956,6 +1277,37 @@ should_ignore_relpath(verifier_context *context, const char *relpath)
}
/*
+ * Create a chain of archive streamers appropriate for verifying a given
+ * archive.
+ */
+static astreamer *
+create_archive_verifier(verifier_context *context, char *archive_name,
+ Oid tblspc_oid, pg_compress_algorithm compress_algo)
+{
+ astreamer *streamer = NULL;
+
+ /* Should be here only for tar backup */
+ Assert(context->format == 't');
+
+ /* Last step is the actual verification. */
+ streamer = astreamer_verify_content_new(streamer, context, archive_name,
+ tblspc_oid);
+
+ /* Before that we must parse the tar file. */
+ streamer = astreamer_tar_parser_new(streamer);
+
+ /* Before that we must decompress, if archive is compressed. */
+ if (compress_algo == PG_COMPRESSION_GZIP)
+ streamer = astreamer_gzip_decompressor_new(streamer);
+ else if (compress_algo == PG_COMPRESSION_LZ4)
+ streamer = astreamer_lz4_decompressor_new(streamer);
+ else if (compress_algo == PG_COMPRESSION_ZSTD)
+ streamer = astreamer_zstd_decompressor_new(streamer);
+
+ return streamer;
+}
+
+/*
* Print a progress report based on the global variables.
*
* Progress report is written at maximum once per second, unless the finished
@@ -1010,6 +1362,7 @@ usage(void)
printf(_("Usage:\n %s [OPTION]... BACKUPDIR\n\n"), progname);
printf(_("Options:\n"));
printf(_(" -e, --exit-on-error exit immediately on error\n"));
+ printf(_(" -F, --format=p|t backup format (plain, tar)\n"));
printf(_(" -i, --ignore=RELATIVE_PATH ignore indicated path\n"));
printf(_(" -m, --manifest-path=PATH use specified path for manifest\n"));
printf(_(" -n, --no-parse-wal do not try to parse WAL files\n"));