mirror of https://github.com/postgres/postgres
Use pg_pread() and pg_pwrite() for data files and WAL.
Cut down on system calls by doing random I/O using offset-based OS routines where available. Remove the code for tracking the 'virtual' seek position. The only reason left to call FileSeek() was to get the file's size, so provide a new function FileSize() instead. Author: Oskari Saarenmaa, Thomas Munro Reviewed-by: Thomas Munro, Jesper Pedersen, Tom Lane, Alvaro Herrera Discussion: https://postgr.es/m/CAEepm=02rapCpPR3ZGF2vW=SBHSdFYO_bz_f-wwWJonmA3APgw@mail.gmail.com Discussion: https://postgr.es/m/b8748d39-0b19-0514-a1b9-4e5a28e6a208%40gmail.com Discussion: https://postgr.es/m/a86bd200-ebbe-d829-e3ca-0c4474b2fcb7%40ohmu.fi
This commit is contained in:
parent
3fd2a7932e
commit
c24dcd0cfd
|
@ -935,7 +935,7 @@ logical_heap_rewrite_flush_mappings(RewriteState state)
|
|||
* Note that we deviate from the usual WAL coding practices here,
|
||||
* check the above "Logical rewrite support" comment for reasoning.
|
||||
*/
|
||||
written = FileWrite(src->vfd, waldata_start, len,
|
||||
written = FileWrite(src->vfd, waldata_start, len, src->off,
|
||||
WAIT_EVENT_LOGICAL_REWRITE_WRITE);
|
||||
if (written != len)
|
||||
ereport(ERROR,
|
||||
|
|
|
@ -2478,18 +2478,6 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
|
|||
Size nleft;
|
||||
int written;
|
||||
|
||||
/* Need to seek in the file? */
|
||||
if (openLogOff != startoffset)
|
||||
{
|
||||
if (lseek(openLogFile, (off_t) startoffset, SEEK_SET) < 0)
|
||||
ereport(PANIC,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not seek in log file %s to offset %u: %m",
|
||||
XLogFileNameP(ThisTimeLineID, openLogSegNo),
|
||||
startoffset)));
|
||||
openLogOff = startoffset;
|
||||
}
|
||||
|
||||
/* OK to write the page(s) */
|
||||
from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ;
|
||||
nbytes = npages * (Size) XLOG_BLCKSZ;
|
||||
|
@ -2498,7 +2486,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
|
|||
{
|
||||
errno = 0;
|
||||
pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE);
|
||||
written = write(openLogFile, from, nleft);
|
||||
written = pg_pwrite(openLogFile, from, nleft, startoffset);
|
||||
pgstat_report_wait_end();
|
||||
if (written <= 0)
|
||||
{
|
||||
|
@ -2513,6 +2501,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
|
|||
}
|
||||
nleft -= written;
|
||||
from += written;
|
||||
startoffset += written;
|
||||
} while (nleft > 0);
|
||||
|
||||
/* Update state for write */
|
||||
|
@ -11821,22 +11810,9 @@ retry:
|
|||
|
||||
/* Read the requested page */
|
||||
readOff = targetPageOff;
|
||||
if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0)
|
||||
{
|
||||
char fname[MAXFNAMELEN];
|
||||
int save_errno = errno;
|
||||
|
||||
XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size);
|
||||
errno = save_errno;
|
||||
ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not seek in log segment %s to offset %u: %m",
|
||||
fname, readOff)));
|
||||
goto next_record_is_invalid;
|
||||
}
|
||||
|
||||
pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
|
||||
r = read(readFile, readBuf, XLOG_BLCKSZ);
|
||||
r = pg_pread(readFile, readBuf, XLOG_BLCKSZ, (off_t) readOff);
|
||||
if (r != XLOG_BLCKSZ)
|
||||
{
|
||||
char fname[MAXFNAMELEN];
|
||||
|
|
|
@ -67,12 +67,6 @@ struct BufFile
|
|||
int numFiles; /* number of physical files in set */
|
||||
/* all files except the last have length exactly MAX_PHYSICAL_FILESIZE */
|
||||
File *files; /* palloc'd array with numFiles entries */
|
||||
off_t *offsets; /* palloc'd array with numFiles entries */
|
||||
|
||||
/*
|
||||
* offsets[i] is the current seek position of files[i]. We use this to
|
||||
* avoid making redundant FileSeek calls.
|
||||
*/
|
||||
|
||||
bool isInterXact; /* keep open over transactions? */
|
||||
bool dirty; /* does buffer need to be written? */
|
||||
|
@ -116,7 +110,6 @@ makeBufFileCommon(int nfiles)
|
|||
BufFile *file = (BufFile *) palloc(sizeof(BufFile));
|
||||
|
||||
file->numFiles = nfiles;
|
||||
file->offsets = (off_t *) palloc0(sizeof(off_t) * nfiles);
|
||||
file->isInterXact = false;
|
||||
file->dirty = false;
|
||||
file->resowner = CurrentResourceOwner;
|
||||
|
@ -170,10 +163,7 @@ extendBufFile(BufFile *file)
|
|||
|
||||
file->files = (File *) repalloc(file->files,
|
||||
(file->numFiles + 1) * sizeof(File));
|
||||
file->offsets = (off_t *) repalloc(file->offsets,
|
||||
(file->numFiles + 1) * sizeof(off_t));
|
||||
file->files[file->numFiles] = pfile;
|
||||
file->offsets[file->numFiles] = 0L;
|
||||
file->numFiles++;
|
||||
}
|
||||
|
||||
|
@ -396,7 +386,6 @@ BufFileClose(BufFile *file)
|
|||
FileClose(file->files[i]);
|
||||
/* release the buffer space */
|
||||
pfree(file->files);
|
||||
pfree(file->offsets);
|
||||
pfree(file);
|
||||
}
|
||||
|
||||
|
@ -422,27 +411,17 @@ BufFileLoadBuffer(BufFile *file)
|
|||
file->curOffset = 0L;
|
||||
}
|
||||
|
||||
/*
|
||||
* May need to reposition physical file.
|
||||
*/
|
||||
thisfile = file->files[file->curFile];
|
||||
if (file->curOffset != file->offsets[file->curFile])
|
||||
{
|
||||
if (FileSeek(thisfile, file->curOffset, SEEK_SET) != file->curOffset)
|
||||
return; /* seek failed, read nothing */
|
||||
file->offsets[file->curFile] = file->curOffset;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read whatever we can get, up to a full bufferload.
|
||||
*/
|
||||
thisfile = file->files[file->curFile];
|
||||
file->nbytes = FileRead(thisfile,
|
||||
file->buffer.data,
|
||||
sizeof(file->buffer),
|
||||
file->curOffset,
|
||||
WAIT_EVENT_BUFFILE_READ);
|
||||
if (file->nbytes < 0)
|
||||
file->nbytes = 0;
|
||||
file->offsets[file->curFile] += file->nbytes;
|
||||
/* we choose not to advance curOffset here */
|
||||
|
||||
if (file->nbytes > 0)
|
||||
|
@ -491,23 +470,14 @@ BufFileDumpBuffer(BufFile *file)
|
|||
if ((off_t) bytestowrite > availbytes)
|
||||
bytestowrite = (int) availbytes;
|
||||
|
||||
/*
|
||||
* May need to reposition physical file.
|
||||
*/
|
||||
thisfile = file->files[file->curFile];
|
||||
if (file->curOffset != file->offsets[file->curFile])
|
||||
{
|
||||
if (FileSeek(thisfile, file->curOffset, SEEK_SET) != file->curOffset)
|
||||
return; /* seek failed, give up */
|
||||
file->offsets[file->curFile] = file->curOffset;
|
||||
}
|
||||
bytestowrite = FileWrite(thisfile,
|
||||
file->buffer.data + wpos,
|
||||
bytestowrite,
|
||||
file->curOffset,
|
||||
WAIT_EVENT_BUFFILE_WRITE);
|
||||
if (bytestowrite <= 0)
|
||||
return; /* failed to write */
|
||||
file->offsets[file->curFile] += bytestowrite;
|
||||
file->curOffset += bytestowrite;
|
||||
wpos += bytestowrite;
|
||||
|
||||
|
@ -803,11 +773,10 @@ BufFileSize(BufFile *file)
|
|||
{
|
||||
off_t lastFileSize;
|
||||
|
||||
/* Get the size of the last physical file by seeking to end. */
|
||||
lastFileSize = FileSeek(file->files[file->numFiles - 1], 0, SEEK_END);
|
||||
/* Get the size of the last physical file. */
|
||||
lastFileSize = FileSize(file->files[file->numFiles - 1]);
|
||||
if (lastFileSize < 0)
|
||||
return -1;
|
||||
file->offsets[file->numFiles - 1] = lastFileSize;
|
||||
|
||||
return ((file->numFiles - 1) * (off_t) MAX_PHYSICAL_FILESIZE) +
|
||||
lastFileSize;
|
||||
|
@ -849,13 +818,8 @@ BufFileAppend(BufFile *target, BufFile *source)
|
|||
|
||||
target->files = (File *)
|
||||
repalloc(target->files, sizeof(File) * newNumFiles);
|
||||
target->offsets = (off_t *)
|
||||
repalloc(target->offsets, sizeof(off_t) * newNumFiles);
|
||||
for (i = target->numFiles; i < newNumFiles; i++)
|
||||
{
|
||||
target->files[i] = source->files[i - target->numFiles];
|
||||
target->offsets[i] = source->offsets[i - target->numFiles];
|
||||
}
|
||||
target->numFiles = newNumFiles;
|
||||
|
||||
return startBlock;
|
||||
|
|
|
@ -16,8 +16,8 @@
|
|||
* including base tables, scratch files (e.g., sort and hash spool
|
||||
* files), and random calls to C library routines like system(3); it
|
||||
* is quite easy to exceed system limits on the number of open files a
|
||||
* single process can have. (This is around 256 on many modern
|
||||
* operating systems, but can be as low as 32 on others.)
|
||||
* single process can have. (This is around 1024 on many modern
|
||||
* operating systems, but may be lower on others.)
|
||||
*
|
||||
* VFDs are managed as an LRU pool, with actual OS file descriptors
|
||||
* being opened and closed as needed. Obviously, if a routine is
|
||||
|
@ -167,15 +167,6 @@ int max_safe_fds = 32; /* default if not changed */
|
|||
|
||||
#define FileIsNotOpen(file) (VfdCache[file].fd == VFD_CLOSED)
|
||||
|
||||
/*
|
||||
* Note: a VFD's seekPos is normally always valid, but if for some reason
|
||||
* an lseek() fails, it might become set to FileUnknownPos. We can struggle
|
||||
* along without knowing the seek position in many cases, but in some places
|
||||
* we have to fail if we don't have it.
|
||||
*/
|
||||
#define FileUnknownPos ((off_t) -1)
|
||||
#define FilePosIsUnknown(pos) ((pos) < 0)
|
||||
|
||||
/* these are the assigned bits in fdstate below: */
|
||||
#define FD_DELETE_AT_CLOSE (1 << 0) /* T = delete when closed */
|
||||
#define FD_CLOSE_AT_EOXACT (1 << 1) /* T = close at eoXact */
|
||||
|
@ -189,7 +180,6 @@ typedef struct vfd
|
|||
File nextFree; /* link to next free VFD, if in freelist */
|
||||
File lruMoreRecently; /* doubly linked recency-of-use list */
|
||||
File lruLessRecently;
|
||||
off_t seekPos; /* current logical file position, or -1 */
|
||||
off_t fileSize; /* current size of file (0 if not temporary) */
|
||||
char *fileName; /* name of file, or NULL for unused VFD */
|
||||
/* NB: fileName is malloc'd, and must be free'd when closing the VFD */
|
||||
|
@ -407,9 +397,7 @@ pg_fdatasync(int fd)
|
|||
/*
|
||||
* pg_flush_data --- advise OS that the described dirty data should be flushed
|
||||
*
|
||||
* offset of 0 with nbytes 0 means that the entire file should be flushed;
|
||||
* in this case, this function may have side-effects on the file's
|
||||
* seek position!
|
||||
* offset of 0 with nbytes 0 means that the entire file should be flushed
|
||||
*/
|
||||
void
|
||||
pg_flush_data(int fd, off_t offset, off_t nbytes)
|
||||
|
@ -1029,22 +1017,6 @@ LruDelete(File file)
|
|||
|
||||
vfdP = &VfdCache[file];
|
||||
|
||||
/*
|
||||
* Normally we should know the seek position, but if for some reason we
|
||||
* have lost track of it, try again to get it. If we still can't get it,
|
||||
* we have a problem: we will be unable to restore the file seek position
|
||||
* when and if the file is re-opened. But we can't really throw an error
|
||||
* and refuse to close the file, or activities such as transaction cleanup
|
||||
* will be broken.
|
||||
*/
|
||||
if (FilePosIsUnknown(vfdP->seekPos))
|
||||
{
|
||||
vfdP->seekPos = lseek(vfdP->fd, (off_t) 0, SEEK_CUR);
|
||||
if (FilePosIsUnknown(vfdP->seekPos))
|
||||
elog(LOG, "could not seek file \"%s\" before closing: %m",
|
||||
vfdP->fileName);
|
||||
}
|
||||
|
||||
/*
|
||||
* Close the file. We aren't expecting this to fail; if it does, better
|
||||
* to leak the FD than to mess up our internal state.
|
||||
|
@ -1113,33 +1085,6 @@ LruInsert(File file)
|
|||
{
|
||||
++nfile;
|
||||
}
|
||||
|
||||
/*
|
||||
* Seek to the right position. We need no special case for seekPos
|
||||
* equal to FileUnknownPos, as lseek() will certainly reject that
|
||||
* (thus completing the logic noted in LruDelete() that we will fail
|
||||
* to re-open a file if we couldn't get its seek position before
|
||||
* closing).
|
||||
*/
|
||||
if (vfdP->seekPos != (off_t) 0)
|
||||
{
|
||||
if (lseek(vfdP->fd, vfdP->seekPos, SEEK_SET) < 0)
|
||||
{
|
||||
/*
|
||||
* If we fail to restore the seek position, treat it like an
|
||||
* open() failure.
|
||||
*/
|
||||
int save_errno = errno;
|
||||
|
||||
elog(LOG, "could not seek file \"%s\" after re-opening: %m",
|
||||
vfdP->fileName);
|
||||
(void) close(vfdP->fd);
|
||||
vfdP->fd = VFD_CLOSED;
|
||||
--nfile;
|
||||
errno = save_errno;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1406,7 +1351,6 @@ PathNameOpenFilePerm(const char *fileName, int fileFlags, mode_t fileMode)
|
|||
/* Saved flags are adjusted to be OK for re-opening file */
|
||||
vfdP->fileFlags = fileFlags & ~(O_CREAT | O_TRUNC | O_EXCL);
|
||||
vfdP->fileMode = fileMode;
|
||||
vfdP->seekPos = 0;
|
||||
vfdP->fileSize = 0;
|
||||
vfdP->fdstate = 0x0;
|
||||
vfdP->resowner = NULL;
|
||||
|
@ -1820,7 +1764,6 @@ FileClose(File file)
|
|||
|
||||
/*
|
||||
* FilePrefetch - initiate asynchronous read of a given range of the file.
|
||||
* The logical seek position is unaffected.
|
||||
*
|
||||
* Currently the only implementation of this function is using posix_fadvise
|
||||
* which is the simplest standardized interface that accomplishes this.
|
||||
|
@ -1867,10 +1810,6 @@ FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
|
|||
file, VfdCache[file].fileName,
|
||||
(int64) offset, (int64) nbytes));
|
||||
|
||||
/*
|
||||
* Caution: do not call pg_flush_data with nbytes = 0, it could trash the
|
||||
* file's seek position. We prefer to define that as a no-op here.
|
||||
*/
|
||||
if (nbytes <= 0)
|
||||
return;
|
||||
|
||||
|
@ -1884,7 +1823,8 @@ FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
|
|||
}
|
||||
|
||||
int
|
||||
FileRead(File file, char *buffer, int amount, uint32 wait_event_info)
|
||||
FileRead(File file, char *buffer, int amount, off_t offset,
|
||||
uint32 wait_event_info)
|
||||
{
|
||||
int returnCode;
|
||||
Vfd *vfdP;
|
||||
|
@ -1893,7 +1833,7 @@ FileRead(File file, char *buffer, int amount, uint32 wait_event_info)
|
|||
|
||||
DO_DB(elog(LOG, "FileRead: %d (%s) " INT64_FORMAT " %d %p",
|
||||
file, VfdCache[file].fileName,
|
||||
(int64) VfdCache[file].seekPos,
|
||||
(int64) offset,
|
||||
amount, buffer));
|
||||
|
||||
returnCode = FileAccess(file);
|
||||
|
@ -1904,16 +1844,10 @@ FileRead(File file, char *buffer, int amount, uint32 wait_event_info)
|
|||
|
||||
retry:
|
||||
pgstat_report_wait_start(wait_event_info);
|
||||
returnCode = read(vfdP->fd, buffer, amount);
|
||||
returnCode = pg_pread(vfdP->fd, buffer, amount, offset);
|
||||
pgstat_report_wait_end();
|
||||
|
||||
if (returnCode >= 0)
|
||||
{
|
||||
/* if seekPos is unknown, leave it that way */
|
||||
if (!FilePosIsUnknown(vfdP->seekPos))
|
||||
vfdP->seekPos += returnCode;
|
||||
}
|
||||
else
|
||||
if (returnCode < 0)
|
||||
{
|
||||
/*
|
||||
* Windows may run out of kernel buffers and return "Insufficient
|
||||
|
@ -1939,16 +1873,14 @@ retry:
|
|||
/* OK to retry if interrupted */
|
||||
if (errno == EINTR)
|
||||
goto retry;
|
||||
|
||||
/* Trouble, so assume we don't know the file position anymore */
|
||||
vfdP->seekPos = FileUnknownPos;
|
||||
}
|
||||
|
||||
return returnCode;
|
||||
}
|
||||
|
||||
int
|
||||
FileWrite(File file, char *buffer, int amount, uint32 wait_event_info)
|
||||
FileWrite(File file, char *buffer, int amount, off_t offset,
|
||||
uint32 wait_event_info)
|
||||
{
|
||||
int returnCode;
|
||||
Vfd *vfdP;
|
||||
|
@ -1957,7 +1889,7 @@ FileWrite(File file, char *buffer, int amount, uint32 wait_event_info)
|
|||
|
||||
DO_DB(elog(LOG, "FileWrite: %d (%s) " INT64_FORMAT " %d %p",
|
||||
file, VfdCache[file].fileName,
|
||||
(int64) VfdCache[file].seekPos,
|
||||
(int64) offset,
|
||||
amount, buffer));
|
||||
|
||||
returnCode = FileAccess(file);
|
||||
|
@ -1976,26 +1908,13 @@ FileWrite(File file, char *buffer, int amount, uint32 wait_event_info)
|
|||
*/
|
||||
if (temp_file_limit >= 0 && (vfdP->fdstate & FD_TEMP_FILE_LIMIT))
|
||||
{
|
||||
off_t newPos;
|
||||
off_t past_write = offset + amount;
|
||||
|
||||
/*
|
||||
* Normally we should know the seek position, but if for some reason
|
||||
* we have lost track of it, try again to get it. Here, it's fine to
|
||||
* throw an error if we still can't get it.
|
||||
*/
|
||||
if (FilePosIsUnknown(vfdP->seekPos))
|
||||
{
|
||||
vfdP->seekPos = lseek(vfdP->fd, (off_t) 0, SEEK_CUR);
|
||||
if (FilePosIsUnknown(vfdP->seekPos))
|
||||
elog(ERROR, "could not seek file \"%s\": %m", vfdP->fileName);
|
||||
}
|
||||
|
||||
newPos = vfdP->seekPos + amount;
|
||||
if (newPos > vfdP->fileSize)
|
||||
if (past_write > vfdP->fileSize)
|
||||
{
|
||||
uint64 newTotal = temporary_files_size;
|
||||
|
||||
newTotal += newPos - vfdP->fileSize;
|
||||
newTotal += past_write - vfdP->fileSize;
|
||||
if (newTotal > (uint64) temp_file_limit * (uint64) 1024)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
|
||||
|
@ -2007,7 +1926,7 @@ FileWrite(File file, char *buffer, int amount, uint32 wait_event_info)
|
|||
retry:
|
||||
errno = 0;
|
||||
pgstat_report_wait_start(wait_event_info);
|
||||
returnCode = write(vfdP->fd, buffer, amount);
|
||||
returnCode = pg_pwrite(VfdCache[file].fd, buffer, amount, offset);
|
||||
pgstat_report_wait_end();
|
||||
|
||||
/* if write didn't set errno, assume problem is no disk space */
|
||||
|
@ -2016,10 +1935,6 @@ retry:
|
|||
|
||||
if (returnCode >= 0)
|
||||
{
|
||||
/* if seekPos is unknown, leave it that way */
|
||||
if (!FilePosIsUnknown(vfdP->seekPos))
|
||||
vfdP->seekPos += returnCode;
|
||||
|
||||
/*
|
||||
* Maintain fileSize and temporary_files_size if it's a temp file.
|
||||
*
|
||||
|
@ -2029,12 +1944,12 @@ retry:
|
|||
*/
|
||||
if (vfdP->fdstate & FD_TEMP_FILE_LIMIT)
|
||||
{
|
||||
off_t newPos = vfdP->seekPos;
|
||||
off_t past_write = offset + amount;
|
||||
|
||||
if (newPos > vfdP->fileSize)
|
||||
if (past_write > vfdP->fileSize)
|
||||
{
|
||||
temporary_files_size += newPos - vfdP->fileSize;
|
||||
vfdP->fileSize = newPos;
|
||||
temporary_files_size += past_write - vfdP->fileSize;
|
||||
vfdP->fileSize = past_write;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2060,9 +1975,6 @@ retry:
|
|||
/* OK to retry if interrupted */
|
||||
if (errno == EINTR)
|
||||
goto retry;
|
||||
|
||||
/* Trouble, so assume we don't know the file position anymore */
|
||||
vfdP->seekPos = FileUnknownPos;
|
||||
}
|
||||
|
||||
return returnCode;
|
||||
|
@ -2090,93 +2002,26 @@ FileSync(File file, uint32 wait_event_info)
|
|||
}
|
||||
|
||||
off_t
|
||||
FileSeek(File file, off_t offset, int whence)
|
||||
FileSize(File file)
|
||||
{
|
||||
Vfd *vfdP;
|
||||
|
||||
Assert(FileIsValid(file));
|
||||
|
||||
DO_DB(elog(LOG, "FileSeek: %d (%s) " INT64_FORMAT " " INT64_FORMAT " %d",
|
||||
file, VfdCache[file].fileName,
|
||||
(int64) VfdCache[file].seekPos,
|
||||
(int64) offset, whence));
|
||||
DO_DB(elog(LOG, "FileSize %d (%s)",
|
||||
file, VfdCache[file].fileName));
|
||||
|
||||
vfdP = &VfdCache[file];
|
||||
|
||||
if (FileIsNotOpen(file))
|
||||
{
|
||||
switch (whence)
|
||||
{
|
||||
case SEEK_SET:
|
||||
if (offset < 0)
|
||||
{
|
||||
errno = EINVAL;
|
||||
return (off_t) -1;
|
||||
}
|
||||
vfdP->seekPos = offset;
|
||||
break;
|
||||
case SEEK_CUR:
|
||||
if (FilePosIsUnknown(vfdP->seekPos) ||
|
||||
vfdP->seekPos + offset < 0)
|
||||
{
|
||||
errno = EINVAL;
|
||||
return (off_t) -1;
|
||||
}
|
||||
vfdP->seekPos += offset;
|
||||
break;
|
||||
case SEEK_END:
|
||||
if (FileAccess(file) < 0)
|
||||
return (off_t) -1;
|
||||
vfdP->seekPos = lseek(vfdP->fd, offset, whence);
|
||||
break;
|
||||
default:
|
||||
elog(ERROR, "invalid whence: %d", whence);
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (whence)
|
||||
{
|
||||
case SEEK_SET:
|
||||
if (offset < 0)
|
||||
{
|
||||
errno = EINVAL;
|
||||
return (off_t) -1;
|
||||
}
|
||||
if (vfdP->seekPos != offset)
|
||||
vfdP->seekPos = lseek(vfdP->fd, offset, whence);
|
||||
break;
|
||||
case SEEK_CUR:
|
||||
if (offset != 0 || FilePosIsUnknown(vfdP->seekPos))
|
||||
vfdP->seekPos = lseek(vfdP->fd, offset, whence);
|
||||
break;
|
||||
case SEEK_END:
|
||||
vfdP->seekPos = lseek(vfdP->fd, offset, whence);
|
||||
break;
|
||||
default:
|
||||
elog(ERROR, "invalid whence: %d", whence);
|
||||
break;
|
||||
}
|
||||
if (FileAccess(file) < 0)
|
||||
return (off_t) -1;
|
||||
}
|
||||
|
||||
return vfdP->seekPos;
|
||||
return lseek(VfdCache[file].fd, 0, SEEK_END);
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX not actually used but here for completeness
|
||||
*/
|
||||
#ifdef NOT_USED
|
||||
off_t
|
||||
FileTell(File file)
|
||||
{
|
||||
Assert(FileIsValid(file));
|
||||
DO_DB(elog(LOG, "FileTell %d (%s)",
|
||||
file, VfdCache[file].fileName));
|
||||
return VfdCache[file].seekPos;
|
||||
}
|
||||
#endif
|
||||
|
||||
int
|
||||
FileTruncate(File file, off_t offset, uint32 wait_event_info)
|
||||
{
|
||||
|
|
|
@ -522,22 +522,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
|||
|
||||
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
|
||||
|
||||
/*
|
||||
* Note: because caller usually obtained blocknum by calling mdnblocks,
|
||||
* which did a seek(SEEK_END), this seek is often redundant and will be
|
||||
* optimized away by fd.c. It's not redundant, however, if there is a
|
||||
* partial page at the end of the file. In that case we want to try to
|
||||
* overwrite the partial page with a full page. It's also not redundant
|
||||
* if bufmgr.c had to dump another buffer of the same file to make room
|
||||
* for the new page's buffer.
|
||||
*/
|
||||
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not seek to block %u in file \"%s\": %m",
|
||||
blocknum, FilePathName(v->mdfd_vfd))));
|
||||
|
||||
if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
|
||||
if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
|
||||
{
|
||||
if (nbytes < 0)
|
||||
ereport(ERROR,
|
||||
|
@ -748,13 +733,7 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
|||
|
||||
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
|
||||
|
||||
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not seek to block %u in file \"%s\": %m",
|
||||
blocknum, FilePathName(v->mdfd_vfd))));
|
||||
|
||||
nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ, WAIT_EVENT_DATA_FILE_READ);
|
||||
nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_READ);
|
||||
|
||||
TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
|
@ -824,13 +803,7 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
|||
|
||||
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
|
||||
|
||||
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not seek to block %u in file \"%s\": %m",
|
||||
blocknum, FilePathName(v->mdfd_vfd))));
|
||||
|
||||
nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, WAIT_EVENT_DATA_FILE_WRITE);
|
||||
nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_WRITE);
|
||||
|
||||
TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
|
@ -1979,7 +1952,7 @@ _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
|
|||
{
|
||||
off_t len;
|
||||
|
||||
len = FileSeek(seg->mdfd_vfd, 0L, SEEK_END);
|
||||
len = FileSize(seg->mdfd_vfd);
|
||||
if (len < 0)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
/*
|
||||
* calls:
|
||||
*
|
||||
* File {Close, Read, Write, Seek, Tell, Sync}
|
||||
* File {Close, Read, Write, Size, Sync}
|
||||
* {Path Name Open, Allocate, Free} File
|
||||
*
|
||||
* These are NOT JUST RENAMINGS OF THE UNIX ROUTINES.
|
||||
|
@ -42,10 +42,6 @@
|
|||
#include <dirent.h>
|
||||
|
||||
|
||||
/*
|
||||
* FileSeek uses the standard UNIX lseek(2) flags.
|
||||
*/
|
||||
|
||||
typedef int File;
|
||||
|
||||
|
||||
|
@ -68,10 +64,10 @@ extern File PathNameOpenFilePerm(const char *fileName, int fileFlags, mode_t fil
|
|||
extern File OpenTemporaryFile(bool interXact);
|
||||
extern void FileClose(File file);
|
||||
extern int FilePrefetch(File file, off_t offset, int amount, uint32 wait_event_info);
|
||||
extern int FileRead(File file, char *buffer, int amount, uint32 wait_event_info);
|
||||
extern int FileWrite(File file, char *buffer, int amount, uint32 wait_event_info);
|
||||
extern int FileRead(File file, char *buffer, int amount, off_t offset, uint32 wait_event_info);
|
||||
extern int FileWrite(File file, char *buffer, int amount, off_t offset, uint32 wait_event_info);
|
||||
extern int FileSync(File file, uint32 wait_event_info);
|
||||
extern off_t FileSeek(File file, off_t offset, int whence);
|
||||
extern off_t FileSize(File file);
|
||||
extern int FileTruncate(File file, off_t offset, uint32 wait_event_info);
|
||||
extern void FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info);
|
||||
extern char *FilePathName(File file);
|
||||
|
|
Loading…
Reference in New Issue