From f081a48f9a91a245054e94926e2092bd7e85edff Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Thu, 23 Mar 2023 17:52:32 +0100 Subject: [PATCH] Unify buffer sizes in pg_dump compression API Prior to the introduction of the compression API in e9960732a9, pg_dump would use the ZLIB_IN_SIZE/ZLIB_OUT_SIZE to size input/output buffers. Commit 0da243fed0 introduced similar constants for LZ4, but while gzip defined both buffers to be 4kB, LZ4 used 4kB and 16kB without any clear reasoning why that's desirable. Furthermore, parts of the code unaware of which compression is used (e.g. pg_backup_directory.c) continued to use ZLIB_OUT_SIZE directly. Simplify by replacing the various constants with DEFAULT_IO_BUFFER_SIZE, set to 4kB. The compression implementations still have an option to use a custom value, but considering 4kB was fine for 20+ years, I find that unlikely (and we'd probably just increase the default buffer size). Author: Georgios Kokolatos Reviewed-by: Tomas Vondra, Justin Pryzby Discussion: https://postgr.es/m/33496f7c-3449-1426-d568-63f6bca2ac1f@gmail.com --- src/bin/pg_dump/compress_gzip.c | 22 +++++++++++----------- src/bin/pg_dump/compress_io.h | 5 ++--- src/bin/pg_dump/compress_lz4.c | 11 ++++------- src/bin/pg_dump/compress_none.c | 4 ++-- src/bin/pg_dump/pg_backup_directory.c | 4 ++-- 5 files changed, 21 insertions(+), 25 deletions(-) diff --git a/src/bin/pg_dump/compress_gzip.c b/src/bin/pg_dump/compress_gzip.c index d9c3969332..cec0b41fce 100644 --- a/src/bin/pg_dump/compress_gzip.c +++ b/src/bin/pg_dump/compress_gzip.c @@ -120,8 +120,8 @@ WriteDataToArchiveGzip(ArchiveHandle *AH, CompressorState *cs, * actually allocate one extra byte because some routines want to * append a trailing zero byte to the zlib output. */ - gzipcs->outbuf = pg_malloc(ZLIB_OUT_SIZE + 1); - gzipcs->outsize = ZLIB_OUT_SIZE; + gzipcs->outsize = DEFAULT_IO_BUFFER_SIZE; + gzipcs->outbuf = pg_malloc(gzipcs->outsize + 1); /* * A level of zero simply copies the input one block at the time. This @@ -158,10 +158,10 @@ ReadDataFromArchiveGzip(ArchiveHandle *AH, CompressorState *cs) zp->zfree = Z_NULL; zp->opaque = Z_NULL; - buf = pg_malloc(ZLIB_IN_SIZE); - buflen = ZLIB_IN_SIZE; + buflen = DEFAULT_IO_BUFFER_SIZE; + buf = pg_malloc(buflen); - out = pg_malloc(ZLIB_OUT_SIZE + 1); + out = pg_malloc(DEFAULT_IO_BUFFER_SIZE + 1); if (inflateInit(zp) != Z_OK) pg_fatal("could not initialize compression library: %s", @@ -176,14 +176,14 @@ ReadDataFromArchiveGzip(ArchiveHandle *AH, CompressorState *cs) while (zp->avail_in > 0) { zp->next_out = (void *) out; - zp->avail_out = ZLIB_OUT_SIZE; + zp->avail_out = DEFAULT_IO_BUFFER_SIZE; res = inflate(zp, 0); if (res != Z_OK && res != Z_STREAM_END) pg_fatal("could not uncompress data: %s", zp->msg); - out[ZLIB_OUT_SIZE - zp->avail_out] = '\0'; - ahwrite(out, 1, ZLIB_OUT_SIZE - zp->avail_out, AH); + out[DEFAULT_IO_BUFFER_SIZE - zp->avail_out] = '\0'; + ahwrite(out, 1, DEFAULT_IO_BUFFER_SIZE - zp->avail_out, AH); } } @@ -192,13 +192,13 @@ ReadDataFromArchiveGzip(ArchiveHandle *AH, CompressorState *cs) while (res != Z_STREAM_END) { zp->next_out = (void *) out; - zp->avail_out = ZLIB_OUT_SIZE; + zp->avail_out = DEFAULT_IO_BUFFER_SIZE; res = inflate(zp, 0); if (res != Z_OK && res != Z_STREAM_END) pg_fatal("could not uncompress data: %s", zp->msg); - out[ZLIB_OUT_SIZE - zp->avail_out] = '\0'; - ahwrite(out, 1, ZLIB_OUT_SIZE - zp->avail_out, AH); + out[DEFAULT_IO_BUFFER_SIZE - zp->avail_out] = '\0'; + ahwrite(out, 1, DEFAULT_IO_BUFFER_SIZE - zp->avail_out, AH); } if (inflateEnd(zp) != Z_OK) diff --git a/src/bin/pg_dump/compress_io.h b/src/bin/pg_dump/compress_io.h index 7c2f9b5668..fd8752db0d 100644 --- a/src/bin/pg_dump/compress_io.h +++ b/src/bin/pg_dump/compress_io.h @@ -17,9 +17,8 @@ #include "pg_backup_archiver.h" -/* Initial buffer sizes used in zlib compression. */ -#define ZLIB_OUT_SIZE 4096 -#define ZLIB_IN_SIZE 4096 +/* Default size used for IO buffers */ +#define DEFAULT_IO_BUFFER_SIZE 4096 extern char *supports_compression(const pg_compress_specification compression_spec); diff --git a/src/bin/pg_dump/compress_lz4.c b/src/bin/pg_dump/compress_lz4.c index 278f262162..2f3e552f51 100644 --- a/src/bin/pg_dump/compress_lz4.c +++ b/src/bin/pg_dump/compress_lz4.c @@ -20,9 +20,6 @@ #include #include -#define LZ4_OUT_SIZE (4 * 1024) -#define LZ4_IN_SIZE (16 * 1024) - /* * LZ4F_HEADER_SIZE_MAX first appeared in v1.7.5 of the library. * Redefine it for installations with a lesser version. @@ -57,7 +54,7 @@ ReadDataFromArchiveLZ4(ArchiveHandle *AH, CompressorState *cs) size_t buflen; size_t cnt; - buflen = LZ4_IN_SIZE; + buflen = DEFAULT_IO_BUFFER_SIZE; buf = pg_malloc(buflen); decbuf = pg_malloc(buflen); @@ -208,7 +205,7 @@ LZ4File_init(LZ4File *fs, int size, bool compressing) if (fs->compressing) { - fs->buflen = LZ4F_compressBound(LZ4_IN_SIZE, &fs->prefs); + fs->buflen = LZ4F_compressBound(DEFAULT_IO_BUFFER_SIZE, &fs->prefs); if (fs->buflen < LZ4F_HEADER_SIZE_MAX) fs->buflen = LZ4F_HEADER_SIZE_MAX; @@ -244,7 +241,7 @@ LZ4File_init(LZ4File *fs, int size, bool compressing) return false; } - fs->buflen = size > LZ4_OUT_SIZE ? size : LZ4_OUT_SIZE; + fs->buflen = Max(size, DEFAULT_IO_BUFFER_SIZE); fs->buffer = pg_malloc(fs->buflen); fs->overflowalloclen = fs->buflen; @@ -423,7 +420,7 @@ LZ4File_write(const void *ptr, size_t size, CompressFileHandle *CFH) while (remaining > 0) { - int chunk = remaining < LZ4_IN_SIZE ? remaining : LZ4_IN_SIZE; + int chunk = Min(remaining, DEFAULT_IO_BUFFER_SIZE); remaining -= chunk; diff --git a/src/bin/pg_dump/compress_none.c b/src/bin/pg_dump/compress_none.c index 18f3514d11..736a7957bc 100644 --- a/src/bin/pg_dump/compress_none.c +++ b/src/bin/pg_dump/compress_none.c @@ -33,8 +33,8 @@ ReadDataFromArchiveNone(ArchiveHandle *AH, CompressorState *cs) char *buf; size_t buflen; - buf = pg_malloc(ZLIB_OUT_SIZE); - buflen = ZLIB_OUT_SIZE; + buflen = DEFAULT_IO_BUFFER_SIZE; + buf = pg_malloc(buflen); while ((cnt = cs->readF(AH, &buf, &buflen))) { diff --git a/src/bin/pg_dump/pg_backup_directory.c b/src/bin/pg_dump/pg_backup_directory.c index 525dbf9bf0..abaaa3b10e 100644 --- a/src/bin/pg_dump/pg_backup_directory.c +++ b/src/bin/pg_dump/pg_backup_directory.c @@ -394,8 +394,8 @@ _PrintFileData(ArchiveHandle *AH, char *filename) if (!CFH) pg_fatal("could not open input file \"%s\": %m", filename); - buf = pg_malloc(ZLIB_OUT_SIZE); - buflen = ZLIB_OUT_SIZE; + buflen = DEFAULT_IO_BUFFER_SIZE; + buf = pg_malloc(buflen); while (CFH->read_func(buf, buflen, &cnt, CFH) && cnt > 0) {