summaryrefslogtreecommitdiff
path: root/src/bin/pg_dump/compress_zstd.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2025-10-13 13:01:45 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2025-10-13 13:01:45 -0400
commitfe8192a95e6c7159d639e341740e32966c9cf385 (patch)
treed4b44024f130a69f30538c7b1cd5a5f207b0932b /src/bin/pg_dump/compress_zstd.c
parenta239c4a0c22627a45c0a91d2f4d9755bbc5be7be (diff)
Fix poor buffering logic in pg_dump's lz4 and zstd compression code.
Both of these modules dumped each bit of output that they got from the underlying compression library as a separate "data block" in the emitted archive file. In the case of zstd this'd frequently result in block sizes well under 100 bytes; lz4 is a little better but still produces blocks around 300 bytes, at least in the test case I tried. This bloats the archive file a little bit compared to larger block sizes, but the real problem is that when pg_restore has to skip each data block rather than seeking directly to some target data, tiny block sizes are enormously inefficient. Fix both modules so that they fill their allocated buffer reasonably well before dumping a data block. In the case of lz4, also delete some redundant logic that caused the lz4 frame header to be emitted as a separate data block. (That saves little, but I see no reason to expend extra code to get worse results.) I fixed the "stream API" code too. In those cases, feeding small amounts of data to fwrite() probably doesn't have any meaningful performance consequences. But it seems like a bad idea to leave the two sets of code doing the same thing in two different ways. In passing, remove unnecessary "extra paranoia" check in _ZstdWriteCommon. _CustomWriteFunc (the only possible referent of cs->writeF) already protects itself against zero-length writes, and it's really a modularity violation for _ZstdWriteCommon to know that the custom format disallows empty data blocks. Also, fix Zstd_read_internal to do less work when passed size == 0. Reported-by: Dimitrios Apostolou <jimis@gmx.net> Author: Tom Lane <tgl@sss.pgh.pa.us> Reviewed-by: Chao Li <li.evan.chao@gmail.com> Discussion: https://postgr.es/m/3515357.1760128017@sss.pgh.pa.us
Diffstat (limited to 'src/bin/pg_dump/compress_zstd.c')
-rw-r--r--src/bin/pg_dump/compress_zstd.c42
1 files changed, 21 insertions, 21 deletions
diff --git a/src/bin/pg_dump/compress_zstd.c b/src/bin/pg_dump/compress_zstd.c
index e24d45e1bbe..36c1fd264ee 100644
--- a/src/bin/pg_dump/compress_zstd.c
+++ b/src/bin/pg_dump/compress_zstd.c
@@ -98,24 +98,22 @@ _ZstdWriteCommon(ArchiveHandle *AH, CompressorState *cs, bool flush)
ZSTD_outBuffer *output = &zstdcs->output;
/* Loop while there's any input or until flushed */
- while (input->pos != input->size || flush)
+ while (input->pos < input->size || flush)
{
size_t res;
- output->pos = 0;
res = ZSTD_compressStream2(zstdcs->cstream, output,
input, flush ? ZSTD_e_end : ZSTD_e_continue);
if (ZSTD_isError(res))
pg_fatal("could not compress data: %s", ZSTD_getErrorName(res));
- /*
- * Extra paranoia: avoid zero-length chunks, since a zero length chunk
- * is the EOF marker in the custom format. This should never happen
- * but...
- */
- if (output->pos > 0)
+ /* Dump output buffer if full, or if we're told to flush */
+ if (output->pos >= output->size || flush)
+ {
cs->writeF(AH, output->dst, output->pos);
+ output->pos = 0;
+ }
if (res == 0)
break; /* End of frame or all input consumed */
@@ -289,7 +287,7 @@ Zstd_read_internal(void *ptr, size_t size, CompressFileHandle *CFH, bool exit_on
output->dst = ptr;
output->pos = 0;
- for (;;)
+ while (output->pos < output->size)
{
Assert(input->pos <= input->size);
Assert(input->size <= input_allocated_size);
@@ -343,9 +341,6 @@ Zstd_read_internal(void *ptr, size_t size, CompressFileHandle *CFH, bool exit_on
if (res == 0)
break; /* End of frame */
}
-
- if (output->pos == output->size)
- break; /* We read all the data that fits */
}
return output->pos;
@@ -367,26 +362,31 @@ Zstd_write(const void *ptr, size_t size, CompressFileHandle *CFH)
if (zstdcs->cstream == NULL)
{
zstdcs->output.size = ZSTD_CStreamOutSize();
- zstdcs->output.dst = pg_malloc0(zstdcs->output.size);
+ zstdcs->output.dst = pg_malloc(zstdcs->output.size);
+ zstdcs->output.pos = 0;
zstdcs->cstream = _ZstdCStreamParams(CFH->compression_spec);
if (zstdcs->cstream == NULL)
pg_fatal("could not initialize compression library");
}
/* Consume all input, to be flushed later */
- while (input->pos != input->size)
+ while (input->pos < input->size)
{
- output->pos = 0;
res = ZSTD_compressStream2(zstdcs->cstream, output, input, ZSTD_e_continue);
if (ZSTD_isError(res))
pg_fatal("could not write to file: %s", ZSTD_getErrorName(res));
- errno = 0;
- cnt = fwrite(output->dst, 1, output->pos, zstdcs->fp);
- if (cnt != output->pos)
+ /* Dump output buffer if full */
+ if (output->pos >= output->size)
{
- errno = (errno) ? errno : ENOSPC;
- pg_fatal("could not write to file: %m");
+ errno = 0;
+ cnt = fwrite(output->dst, 1, output->pos, zstdcs->fp);
+ if (cnt != output->pos)
+ {
+ errno = (errno) ? errno : ENOSPC;
+ pg_fatal("could not write to file: %m");
+ }
+ output->pos = 0;
}
}
}
@@ -448,7 +448,6 @@ Zstd_close(CompressFileHandle *CFH)
/* Loop until the compression buffers are fully consumed */
for (;;)
{
- output->pos = 0;
res = ZSTD_compressStream2(zstdcs->cstream, output, input, ZSTD_e_end);
if (ZSTD_isError(res))
{
@@ -466,6 +465,7 @@ Zstd_close(CompressFileHandle *CFH)
success = false;
break;
}
+ output->pos = 0;
if (res == 0)
break; /* End of frame */