diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2023-03-17 13:31:40 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2023-03-17 13:31:40 -0400 |
commit | 2b216da1e55dc125ada193328e87e471d49b0938 (patch) | |
tree | cf613269ffdd3edc178bf65a80f5b8f00c185d88 /src/bin/pg_dump/pg_backup_archiver.c | |
parent | ce29cea17fb323b67a6772e9acd1ec63e5c9a4b7 (diff) |
Fix pg_dump for hash partitioning on enum columns.
Hash partitioning on an enum is problematic because the hash codes are
derived from the OIDs assigned to the enum values, which will almost
certainly be different after a dump-and-reload than they were before.
This means that some rows probably end up in different partitions than
before, causing restore to fail because of partition constraint
violations. (pg_upgrade dodges this problem by using hacks to force
the enum values to keep the same OIDs, but that's not possible nor
desirable for pg_dump.)
Users can work around that by specifying --load-via-partition-root,
but since that's a dump-time not restore-time decision, one might
find out the need for it far too late. Instead, teach pg_dump to
apply that option automatically when dealing with a partitioned
table that has hash-on-enum partitioning.
Also deal with a pre-existing issue for --load-via-partition-root
mode: in a parallel restore, we try to TRUNCATE target tables just
before loading them, in order to enable some backend optimizations.
This is bad when using --load-via-partition-root because (a) we're
likely to suffer deadlocks from restore jobs trying to restore rows
into other partitions than they came from, and (b) if we miss getting
a deadlock we might still lose data due to a TRUNCATE removing rows
from some already-completed restore job.
The fix for this is conceptually simple: just don't TRUNCATE if we're
dealing with a --load-via-partition-root case. The tricky bit is for
pg_restore to identify those cases. In dumps using COPY commands we
can inspect each COPY command to see if it targets the nominal target
table or some ancestor. However, in dumps using INSERT commands it's
pretty impractical to examine the INSERTs in advance. To provide a
solution for that going forward, modify pg_dump to mark TABLE DATA
items that are using --load-via-partition-root with a comment.
(This change also responds to a complaint from Robert Haas that
the dump output for --load-via-partition-root is pretty confusing.)
pg_restore checks for the special comment as well as checking the
COPY command if present. This will fail to identify the combination
of --load-via-partition-root and --inserts in pre-existing dump files,
but that should be a pretty rare case in the field. If it does
happen you will probably get a deadlock failure that you can work
around by not using parallel restore, which is the same as before
this bug fix.
Having done this, there seems no remaining reason for the alarmism
in the pg_dump man page about combining --load-via-partition-root
with parallel restore, so remove that warning.
Patch by me; thanks to Julien Rouhaud for review. Back-patch to
v11 where hash partitioning was introduced.
Discussion: https://postgr.es/m/1376149.1675268279@sss.pgh.pa.us
Diffstat (limited to 'src/bin/pg_dump/pg_backup_archiver.c')
-rw-r--r-- | src/bin/pg_dump/pg_backup_archiver.c | 73 |
1 files changed, 64 insertions, 9 deletions
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index eed28b2af89..066f26379e7 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -91,6 +91,7 @@ static RestorePass _tocEntryRestorePass(TocEntry *te); static bool _tocEntryIsACL(TocEntry *te); static void _disableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te); static void _enableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te); +static bool is_load_via_partition_root(TocEntry *te); static void buildTocEntryArrays(ArchiveHandle *AH); static void _moveBefore(TocEntry *pos, TocEntry *te); static int _discoverArchiveFormat(ArchiveHandle *AH); @@ -884,6 +885,8 @@ restore_toc_entry(ArchiveHandle *AH, TocEntry *te, bool is_parallel) } else { + bool use_truncate; + _disableTriggersIfNecessary(AH, te); /* Select owner and schema as necessary */ @@ -895,13 +898,24 @@ restore_toc_entry(ArchiveHandle *AH, TocEntry *te, bool is_parallel) /* * In parallel restore, if we created the table earlier in - * the run then we wrap the COPY in a transaction and - * precede it with a TRUNCATE. If archiving is not on - * this prevents WAL-logging the COPY. This obtains a - * speedup similar to that from using single_txn mode in - * non-parallel restores. + * this run (so that we know it is empty) and we are not + * restoring a load-via-partition-root data item then we + * wrap the COPY in a transaction and precede it with a + * TRUNCATE. If wal_level is set to minimal this prevents + * WAL-logging the COPY. This obtains a speedup similar + * to that from using single_txn mode in non-parallel + * restores. + * + * We mustn't do this for load-via-partition-root cases + * because some data might get moved across partition + * boundaries, risking deadlock and/or loss of previously + * loaded data. (We assume that all partitions of a + * partitioned table will be treated the same way.) */ - if (is_parallel && te->created) + use_truncate = is_parallel && te->created && + !is_load_via_partition_root(te); + + if (use_truncate) { /* * Parallel restore is always talking directly to a @@ -939,7 +953,7 @@ restore_toc_entry(ArchiveHandle *AH, TocEntry *te, bool is_parallel) AH->outputKind = OUTPUT_SQLCMDS; /* close out the transaction started above */ - if (is_parallel && te->created) + if (use_truncate) CommitTransaction(&AH->public); _enableTriggersIfNecessary(AH, te); @@ -1032,6 +1046,43 @@ _enableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te) } /* + * Detect whether a TABLE DATA TOC item is performing "load via partition + * root", that is the target table is an ancestor partition rather than the + * table the TOC item is nominally for. + * + * In newer archive files this can be detected by checking for a special + * comment placed in te->defn. In older files we have to fall back to seeing + * if the COPY statement targets the named table or some other one. This + * will not work for data dumped as INSERT commands, so we could give a false + * negative in that case; fortunately, that's a rarely-used option. + */ +static bool +is_load_via_partition_root(TocEntry *te) +{ + if (te->defn && + strncmp(te->defn, "-- load via partition root ", 27) == 0) + return true; + if (te->copyStmt && *te->copyStmt) + { + PQExpBuffer copyStmt = createPQExpBuffer(); + bool result; + + /* + * Build the initial part of the COPY as it would appear if the + * nominal target table is the actual target. If we see anything + * else, it must be a load-via-partition-root case. + */ + appendPQExpBuffer(copyStmt, "COPY %s ", + fmtQualifiedId(te->namespace, te->tag)); + result = strncmp(te->copyStmt, copyStmt->data, copyStmt->len) != 0; + destroyPQExpBuffer(copyStmt); + return result; + } + /* Assume it's not load-via-partition-root */ + return false; +} + +/* * This is a routine that is part of the dumper interface, hence the 'Archive*' parameter. */ @@ -2974,8 +3025,12 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH) res = res & ~REQ_DATA; } - /* If there's no definition command, there's no schema component */ - if (!te->defn || !te->defn[0]) + /* + * If there's no definition command, there's no schema component. Treat + * "load via partition root" comments as not schema. + */ + if (!te->defn || !te->defn[0] || + strncmp(te->defn, "-- load via partition root ", 27) == 0) res = res & ~REQ_SCHEMA; /* |