Read until EOF vice stat-reported size in read_binary_file

read_binary_file(), used by SQL functions pg_read_file() and friends,
uses stat to determine file length to read, when not passed an explicit
length as an argument. This is problematic, for example, if the file
being read is a virtual file with a stat-reported length of zero.
Arrange to read until EOF, or StringInfo data string lenth limit, is
reached instead.

Original complaint and patch by me, with significant review, corrections,
advice, and code optimizations by Tom Lane. Backpatched to v11. Prior to
that only paths relative to the data and log dirs were allowed for files,
so no "zero length" files were reachable anyway.

Reviewed-By: Tom Lane
Discussion: https://postgr.es/m/flat/969b8d82-5bb2-5fa8-4eb1-f0e685c5d736%40joeconway.com
Backpatch-through: 11
This commit is contained in:
Joe Conway 2020-07-04 06:29:03 -04:00
parent ef799bdd04
commit c2cdaf0cb9
2 changed files with 66 additions and 29 deletions

View File

@ -64,7 +64,7 @@ SELECT pg_file_rename('test_file1', 'test_file2');
(1 row)
SELECT pg_read_file('test_file1'); -- not there
ERROR: could not stat file "test_file1": No such file or directory
ERROR: could not open file "test_file1" for reading: No such file or directory
SELECT pg_read_file('test_file2');
pg_read_file
--------------
@ -93,7 +93,7 @@ SELECT pg_file_rename('test_file2', 'test_file3', 'test_file3_archive');
(1 row)
SELECT pg_read_file('test_file2'); -- not there
ERROR: could not stat file "test_file2": No such file or directory
ERROR: could not open file "test_file2" for reading: No such file or directory
SELECT pg_read_file('test_file3');
pg_read_file
--------------

View File

@ -103,33 +103,11 @@ read_binary_file(const char *filename, int64 seek_offset, int64 bytes_to_read,
bool missing_ok)
{
bytea *buf;
size_t nbytes;
size_t nbytes = 0;
FILE *file;
if (bytes_to_read < 0)
{
if (seek_offset < 0)
bytes_to_read = -seek_offset;
else
{
struct stat fst;
if (stat(filename, &fst) < 0)
{
if (missing_ok && errno == ENOENT)
return NULL;
else
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not stat file \"%s\": %m", filename)));
}
bytes_to_read = fst.st_size - seek_offset;
}
}
/* not sure why anyone thought that int64 length was a good idea */
if (bytes_to_read > (MaxAllocSize - VARHDRSZ))
/* clamp request size to what we can actually deliver */
if (bytes_to_read > (int64) (MaxAllocSize - VARHDRSZ))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("requested length too large")));
@ -151,9 +129,68 @@ read_binary_file(const char *filename, int64 seek_offset, int64 bytes_to_read,
(errcode_for_file_access(),
errmsg("could not seek in file \"%s\": %m", filename)));
buf = (bytea *) palloc((Size) bytes_to_read + VARHDRSZ);
if (bytes_to_read >= 0)
{
/* If passed explicit read size just do it */
buf = (bytea *) palloc((Size) bytes_to_read + VARHDRSZ);
nbytes = fread(VARDATA(buf), 1, (size_t) bytes_to_read, file);
nbytes = fread(VARDATA(buf), 1, (size_t) bytes_to_read, file);
}
else
{
/* Negative read size, read rest of file */
StringInfoData sbuf;
initStringInfo(&sbuf);
/* Leave room in the buffer for the varlena length word */
sbuf.len += VARHDRSZ;
Assert(sbuf.len < sbuf.maxlen);
while (!(feof(file) || ferror(file)))
{
size_t rbytes;
/* Minimum amount to read at a time */
#define MIN_READ_SIZE 4096
/*
* If not at end of file, and sbuf.len is equal to
* MaxAllocSize - 1, then either the file is too large, or
* there is nothing left to read. Attempt to read one more
* byte to see if the end of file has been reached. If not,
* the file is too large; we'd rather give the error message
* for that ourselves.
*/
if (sbuf.len == MaxAllocSize - 1)
{
char rbuf[1];
fread(rbuf, 1, 1, file);
if (!feof(file))
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("file length too large")));
else
break;
}
/* OK, ensure that we can read at least MIN_READ_SIZE */
enlargeStringInfo(&sbuf, MIN_READ_SIZE);
/*
* stringinfo.c likes to allocate in powers of 2, so it's likely
* that much more space is available than we asked for. Use all
* of it, rather than making more fread calls than necessary.
*/
rbytes = fread(sbuf.data + sbuf.len, 1,
(size_t) (sbuf.maxlen - sbuf.len - 1), file);
sbuf.len += rbytes;
nbytes += rbytes;
}
/* Now we can commandeer the stringinfo's buffer as the result */
buf = (bytea *) sbuf.data;
}
if (ferror(file))
ereport(ERROR,