diff options
author | Nathan Bossart <nathan@postgresql.org> | 2025-04-04 14:51:08 -0500 |
---|---|---|
committer | Nathan Bossart <nathan@postgresql.org> | 2025-04-04 14:51:08 -0500 |
commit | 7d5c83b4e90c7156655f98b7312a30ae5eeb4d27 (patch) | |
tree | 3720c57d402fc3c497eae60e26291d5ae70202c2 /src/bin/pg_dump/pg_backup_archiver.c | |
parent | e3cc039a7d930ffe86e706944a2b3368bd3ef506 (diff) |
pg_dump: Reduce memory usage of dumps with statistics.
Right now, pg_dump stores all generated commands for statistics in
memory. These commands can be quite large and therefore can
significantly increase pg_dump's memory footprint. To fix, wait
until we are about to write out the commands before generating
them, and be sure to free the commands after writing. This is
implemented via a new defnDumper callback that works much like the
dataDumper one but is specifically designed for TOC entries.
Custom dumps that include data might write the TOC twice (to update
data offset information), which would ordinarily cause pg_dump to
run the attribute statistics queries twice. However, as a hack, we
save the length of the written-out entry in the first pass and skip
over it in the second. While there is no known technical issue
with executing the queries multiple times and rewriting the
results, it's expensive and feels risky, so let's avoid it.
As an exception, we _do_ execute the queries twice for the tar
format. This format does a second pass through the TOC to generate
the restore.sql file. pg_restore doesn't use this file, so even if
the second round of queries returns different results than the
first, it won't corrupt the output; the archive and restore.sql
file will just have different content. A follow-up commit will
teach pg_dump to gather attribute statistics in batches, which our
testing indicates more than makes up for the added expense of
running the queries twice.
Author: Corey Huinker <corey.huinker@gmail.com>
Co-authored-by: Nathan Bossart <nathandbossart@gmail.com>
Reviewed-by: Jeff Davis <pgsql@j-davis.com>
Discussion: https://postgr.es/m/CADkLM%3Dc%2Br05srPy9w%2B-%2BnbmLEo15dKXYQ03Q_xyK%2BriJerigLQ%40mail.gmail.com
Diffstat (limited to 'src/bin/pg_dump/pg_backup_archiver.c')
-rw-r--r-- | src/bin/pg_dump/pg_backup_archiver.c | 83 |
1 files changed, 81 insertions, 2 deletions
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index 1d131e5a57d..2f902494e64 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -1266,6 +1266,9 @@ ArchiveEntry(Archive *AHX, CatalogId catalogId, DumpId dumpId, newToc->dataDumperArg = opts->dumpArg; newToc->hadDumper = opts->dumpFn ? true : false; + newToc->defnDumper = opts->defnFn; + newToc->defnDumperArg = opts->defnArg; + newToc->formatData = NULL; newToc->dataLength = 0; @@ -2621,7 +2624,45 @@ WriteToc(ArchiveHandle *AH) WriteStr(AH, te->tag); WriteStr(AH, te->desc); WriteInt(AH, te->section); - WriteStr(AH, te->defn); + + if (te->defnLen) + { + /* + * defnLen should only be set for custom format's second call to + * WriteToc(), which rewrites the TOC in place to update data + * offsets. Instead of calling the defnDumper a second time + * (which could involve re-executing queries), just skip writing + * the entry. While regenerating the definition should + * theoretically produce the same result as before, it's expensive + * and feels risky. + * + * The custom format only calls WriteToc() a second time if + * fseeko() is usable (see _CloseArchive() in pg_backup_custom.c), + * so we can safely use it without checking. For other formats, + * we fail because one of our assumptions must no longer hold + * true. + * + * XXX This is a layering violation, but the alternative is an + * awkward and complicated callback infrastructure for this + * special case. This might be worth revisiting in the future. + */ + if (AH->format != archCustom) + pg_fatal("unexpected TOC entry in WriteToc(): %d %s %s", + te->dumpId, te->desc, te->tag); + + if (fseeko(AH->FH, te->defnLen, SEEK_CUR != 0)) + pg_fatal("error during file seek: %m"); + } + else if (te->defnDumper) + { + char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg); + + te->defnLen = WriteStr(AH, defn); + pg_free(defn); + } + else + WriteStr(AH, te->defn); + WriteStr(AH, te->dropStmt); WriteStr(AH, te->copyStmt); WriteStr(AH, te->namespace); @@ -3849,7 +3890,7 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx) /* * Actually print the definition. Normally we can just print the defn - * string if any, but we have three special cases: + * string if any, but we have four special cases: * * 1. A crude hack for suppressing AUTHORIZATION clause that old pg_dump * versions put into CREATE SCHEMA. Don't mutate the variant for schema @@ -3862,6 +3903,11 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx) * 3. ACL LARGE OBJECTS entries need special processing because they * contain only one copy of the ACL GRANT/REVOKE commands, which we must * apply to each large object listed in the associated BLOB METADATA. + * + * 4. Entries with a defnDumper need to call it to generate the + * definition. This is primarily intended to provide a way to save memory + * for objects that would otherwise need a lot of it (e.g., statistics + * data). */ if (ropt->noOwner && strcmp(te->desc, "SCHEMA") == 0 && strncmp(te->defn, "--", 2) != 0) @@ -3877,6 +3923,39 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx) { IssueACLPerBlob(AH, te); } + else if (te->defnLen && AH->format != archTar) + { + /* + * If defnLen is set, the defnDumper has already been called for this + * TOC entry. We don't normally expect a defnDumper to be called for + * a TOC entry a second time in _printTocEntry(), but there's an + * exception. The tar format first calls WriteToc(), which scans the + * entire TOC, and then it later calls RestoreArchive() to generate + * restore.sql, which scans the TOC again. There doesn't appear to be + * a good way to prevent a second defnDumper call in this case without + * storing the definition in memory, which defeats the purpose. This + * second defnDumper invocation should generate the same output as the + * first, but even if it doesn't, the worst-case scenario is that + * restore.sql might have different statistics data than the archive. + * + * In all other cases, encountering a TOC entry a second time in + * _printTocEntry() is unexpected, so we fail because one of our + * assumptions must no longer hold true. + * + * XXX This is a layering violation, but the alternative is an awkward + * and complicated callback infrastructure for this special case. This + * might be worth revisiting in the future. + */ + pg_fatal("unexpected TOC entry in _printTocEntry(): %d %s %s", + te->dumpId, te->desc, te->tag); + } + else if (te->defnDumper) + { + char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg); + + te->defnLen = ahprintf(AH, "%s\n\n", defn); + pg_free(defn); + } else if (te->defn && strlen(te->defn) > 0) { ahprintf(AH, "%s\n\n", te->defn); |