Buffer manager modifications to keep a local buffer-dirtied bit as well
as a shared dirtybit for each shared buffer. The shared dirtybit still controls writing the buffer, but the local bit controls whether we need to fsync the buffer's file. This arrangement fixes a bug that allowed some required fsyncs to be missed, and should improve performance as well. For more info see my post of same date on pghackers.
This commit is contained in:
parent
9c38a8d296
commit
1f6d8b90b8
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.62 2000/03/17 02:36:05 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.63 2000/04/09 04:43:16 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Transaction aborts can now occur two ways:
|
||||
@ -642,7 +642,7 @@ RecordTransactionCommit()
|
||||
{
|
||||
FlushBufferPool();
|
||||
if (leak)
|
||||
ResetBufferPool();
|
||||
ResetBufferPool(true);
|
||||
|
||||
/*
|
||||
* have the transaction access methods record the status
|
||||
@ -658,7 +658,7 @@ RecordTransactionCommit()
|
||||
}
|
||||
|
||||
if (leak)
|
||||
ResetBufferPool();
|
||||
ResetBufferPool(true);
|
||||
}
|
||||
|
||||
|
||||
@ -759,7 +759,10 @@ RecordTransactionAbort()
|
||||
if (SharedBufferChanged && !TransactionIdDidCommit(xid))
|
||||
TransactionIdAbort(xid);
|
||||
|
||||
ResetBufferPool();
|
||||
/*
|
||||
* Tell bufmgr and smgr to release resources.
|
||||
*/
|
||||
ResetBufferPool(false); /* false -> is abort */
|
||||
}
|
||||
|
||||
/* --------------------------------
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/catalog/catalog.c,v 1.30 2000/01/26 05:56:10 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/catalog/catalog.c,v 1.31 2000/04/09 04:43:15 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -23,25 +23,87 @@
|
||||
#include "utils/syscache.h"
|
||||
|
||||
/*
|
||||
* relpath - path to the relation
|
||||
* Perhaps this should be in-line code in relopen().
|
||||
* relpath - construct path to a relation's file
|
||||
*
|
||||
* Note that this only works with relations that are visible to the current
|
||||
* backend, ie, either in the current database or shared system relations.
|
||||
*
|
||||
* Result is a palloc'd string.
|
||||
*/
|
||||
char *
|
||||
relpath(const char *relname)
|
||||
{
|
||||
char *path;
|
||||
size_t bufsize = 0;
|
||||
|
||||
if (IsSharedSystemRelationName(relname))
|
||||
{
|
||||
bufsize = strlen(DataDir) + sizeof(NameData) + 2;
|
||||
/* Shared system relations live in DataDir */
|
||||
size_t bufsize = strlen(DataDir) + sizeof(NameData) + 2;
|
||||
|
||||
path = (char *) palloc(bufsize);
|
||||
snprintf(path, bufsize, "%s/%s", DataDir, relname);
|
||||
snprintf(path, bufsize, "%s%c%s", DataDir, SEP_CHAR, relname);
|
||||
return path;
|
||||
}
|
||||
/*
|
||||
* If it is in the current database, assume it is in current working
|
||||
* directory. NB: this does not work during bootstrap!
|
||||
*/
|
||||
return pstrdup(relname);
|
||||
}
|
||||
|
||||
/*
|
||||
* relpath_blind - construct path to a relation's file
|
||||
*
|
||||
* Construct the path using only the info available to smgrblindwrt,
|
||||
* namely the names and OIDs of the database and relation. (Shared system
|
||||
* relations are identified with dbid = 0.) Note that we may have to
|
||||
* access a relation belonging to a different database!
|
||||
*
|
||||
* Result is a palloc'd string.
|
||||
*/
|
||||
|
||||
char *
|
||||
relpath_blind(const char *dbname, const char *relname,
|
||||
Oid dbid, Oid relid)
|
||||
{
|
||||
char *path;
|
||||
|
||||
if (dbid == (Oid) 0)
|
||||
{
|
||||
/* Shared system relations live in DataDir */
|
||||
path = (char *) palloc(strlen(DataDir) + sizeof(NameData) + 2);
|
||||
sprintf(path, "%s%c%s", DataDir, SEP_CHAR, relname);
|
||||
}
|
||||
else if (dbid == MyDatabaseId)
|
||||
{
|
||||
/* XXX why is this inconsistent with relpath() ? */
|
||||
path = (char *) palloc(strlen(DatabasePath) + sizeof(NameData) + 2);
|
||||
sprintf(path, "%s%c%s", DatabasePath, SEP_CHAR, relname);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* this is work around only !!! */
|
||||
char dbpathtmp[MAXPGPATH];
|
||||
Oid id;
|
||||
char *dbpath;
|
||||
|
||||
GetRawDatabaseInfo(dbname, &id, dbpathtmp);
|
||||
|
||||
if (id != dbid)
|
||||
elog(FATAL, "relpath_blind: oid of db %s is not %u",
|
||||
dbname, dbid);
|
||||
dbpath = ExpandDatabasePath(dbpathtmp);
|
||||
if (dbpath == NULL)
|
||||
elog(FATAL, "relpath_blind: can't expand path for db %s",
|
||||
dbname);
|
||||
path = (char *) palloc(strlen(dbpath) + sizeof(NameData) + 2);
|
||||
sprintf(path, "%s%c%s", dbpath, SEP_CHAR, relname);
|
||||
pfree(dbpath);
|
||||
}
|
||||
return path;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* IsSystemRelationName
|
||||
* True iff name is the name of a system catalog relation.
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.32 2000/01/26 05:56:50 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.33 2000/04/09 04:43:18 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -65,9 +65,11 @@ long *NWaitIOBackendP;
|
||||
extern IpcSemaphoreId WaitIOSemId;
|
||||
|
||||
long *PrivateRefCount; /* also used in freelist.c */
|
||||
bits8 *BufferLocks; /* */
|
||||
long *CommitInfoNeedsSave;/* to write buffers where we have filled
|
||||
* in t_infomask */
|
||||
bits8 *BufferLocks; /* flag bits showing locks I have set */
|
||||
BufferTag *BufferTagLastDirtied; /* tag buffer had when last dirtied by me */
|
||||
BufferBlindId *BufferBlindLastDirtied; /* and its BlindId too */
|
||||
bool *BufferDirtiedByMe; /* T if buf has been dirtied in cur xact */
|
||||
|
||||
|
||||
/*
|
||||
* Data Structures:
|
||||
@ -247,7 +249,9 @@ InitBufferPool(IPCKey key)
|
||||
#endif
|
||||
PrivateRefCount = (long *) calloc(NBuffers, sizeof(long));
|
||||
BufferLocks = (bits8 *) calloc(NBuffers, sizeof(bits8));
|
||||
CommitInfoNeedsSave = (long *) calloc(NBuffers, sizeof(long));
|
||||
BufferTagLastDirtied = (BufferTag *) calloc(NBuffers, sizeof(BufferTag));
|
||||
BufferBlindLastDirtied = (BufferBlindId *) calloc(NBuffers, sizeof(BufferBlindId));
|
||||
BufferDirtiedByMe = (bool *) calloc(NBuffers, sizeof(bool));
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.77 2000/03/31 02:43:31 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.78 2000/04/09 04:43:18 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -94,8 +94,10 @@ static Buffer ReadBufferWithBufferLock(Relation relation, BlockNumber blockNum,
|
||||
bool bufferLockHeld);
|
||||
static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum,
|
||||
bool *foundPtr, bool bufferLockHeld);
|
||||
static void SetBufferDirtiedByMe(Buffer buffer, BufferDesc *bufHdr);
|
||||
static void ClearBufferDirtiedByMe(Buffer buffer, BufferDesc *bufHdr);
|
||||
static void BufferSync(void);
|
||||
static int BufferReplace(BufferDesc *bufHdr, bool bufferLockHeld);
|
||||
static int BufferReplace(BufferDesc *bufHdr);
|
||||
void PrintBufferDescs(void);
|
||||
|
||||
/* ---------------------------------------------------
|
||||
@ -176,7 +178,7 @@ is_userbuffer(Buffer buffer)
|
||||
{
|
||||
BufferDesc *buf = &BufferDescriptors[buffer - 1];
|
||||
|
||||
if (IsSystemRelationName(buf->sb_relname))
|
||||
if (IsSystemRelationName(buf->blind.relname))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
@ -199,7 +201,7 @@ ReadBuffer_Debug(char *file,
|
||||
|
||||
fprintf(stderr, "PIN(RD) %ld relname = %s, blockNum = %d, \
|
||||
refcount = %ld, file: %s, line: %d\n",
|
||||
buffer, buf->sb_relname, buf->tag.blockNum,
|
||||
buffer, buf->blind.relname, buf->tag.blockNum,
|
||||
PrivateRefCount[buffer - 1], file, line);
|
||||
}
|
||||
return buffer;
|
||||
@ -390,7 +392,6 @@ BufferAlloc(Relation reln,
|
||||
* If there's no IO for the buffer and the buffer
|
||||
* is BROKEN,it should be read again. So start a
|
||||
* new buffer IO here.
|
||||
|
||||
*
|
||||
* wierd race condition:
|
||||
*
|
||||
@ -465,33 +466,24 @@ BufferAlloc(Relation reln,
|
||||
* in WaitIO until we're done.
|
||||
*/
|
||||
inProgress = TRUE;
|
||||
#ifdef HAS_TEST_AND_SET
|
||||
|
||||
/*
|
||||
* All code paths that acquire this lock pin the buffer first;
|
||||
* since no one had it pinned (it just came off the free
|
||||
* list), no one else can have this lock.
|
||||
*/
|
||||
#endif /* HAS_TEST_AND_SET */
|
||||
StartBufferIO(buf, false);
|
||||
|
||||
/*
|
||||
* Write the buffer out, being careful to release BufMgrLock
|
||||
* before starting the I/O.
|
||||
*
|
||||
* This #ifndef is here because a few extra semops REALLY kill
|
||||
* you on machines that don't have spinlocks. If you don't
|
||||
* operate with much concurrency, well...
|
||||
*/
|
||||
smok = BufferReplace(buf, true);
|
||||
#ifndef OPTIMIZE_SINGLE
|
||||
SpinAcquire(BufMgrLock);
|
||||
#endif /* OPTIMIZE_SINGLE */
|
||||
smok = BufferReplace(buf);
|
||||
|
||||
if (smok == FALSE)
|
||||
{
|
||||
elog(NOTICE, "BufferAlloc: cannot write block %u for %s/%s",
|
||||
buf->tag.blockNum, buf->sb_dbname, buf->sb_relname);
|
||||
buf->tag.blockNum, buf->blind.dbname, buf->blind.relname);
|
||||
inProgress = FALSE;
|
||||
buf->flags |= BM_IO_ERROR;
|
||||
buf->flags &= ~BM_IO_IN_PROGRESS;
|
||||
@ -516,7 +508,7 @@ BufferAlloc(Relation reln,
|
||||
if (buf->flags & BM_JUST_DIRTIED)
|
||||
{
|
||||
elog(FATAL, "BufferAlloc: content of block %u (%s) changed while flushing",
|
||||
buf->tag.blockNum, buf->sb_relname);
|
||||
buf->tag.blockNum, buf->blind.relname);
|
||||
}
|
||||
else
|
||||
buf->flags &= ~BM_DIRTY;
|
||||
@ -562,6 +554,7 @@ BufferAlloc(Relation reln,
|
||||
*/
|
||||
if (buf != NULL)
|
||||
{
|
||||
buf->flags &= ~BM_IO_IN_PROGRESS;
|
||||
TerminateBufferIO(buf);
|
||||
/* give up the buffer since we don't need it any more */
|
||||
PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0;
|
||||
@ -572,7 +565,6 @@ BufferAlloc(Relation reln,
|
||||
AddBufferToFreelist(buf);
|
||||
buf->flags |= BM_FREE;
|
||||
}
|
||||
buf->flags &= ~BM_IO_IN_PROGRESS;
|
||||
}
|
||||
|
||||
PinBuffer(buf2);
|
||||
@ -619,8 +611,8 @@ BufferAlloc(Relation reln,
|
||||
}
|
||||
|
||||
/* record the database name and relation name for this buffer */
|
||||
strcpy(buf->sb_relname, RelationGetPhysicalRelationName(reln));
|
||||
strcpy(buf->sb_dbname, DatabaseName);
|
||||
strcpy(buf->blind.dbname, DatabaseName);
|
||||
strcpy(buf->blind.relname, RelationGetPhysicalRelationName(reln));
|
||||
|
||||
INIT_BUFFERTAG(&(buf->tag), reln, blockNum);
|
||||
if (!BufTableInsert(buf))
|
||||
@ -683,9 +675,9 @@ WriteBuffer(Buffer buffer)
|
||||
SpinAcquire(BufMgrLock);
|
||||
Assert(bufHdr->refcount > 0);
|
||||
bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
|
||||
SetBufferDirtiedByMe(buffer, bufHdr);
|
||||
UnpinBuffer(bufHdr);
|
||||
SpinRelease(BufMgrLock);
|
||||
CommitInfoNeedsSave[buffer - 1] = 0;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
@ -702,7 +694,7 @@ WriteBuffer_Debug(char *file, int line, Buffer buffer)
|
||||
buf = &BufferDescriptors[buffer - 1];
|
||||
fprintf(stderr, "UNPIN(WR) %ld relname = %s, blockNum = %d, \
|
||||
refcount = %ld, file: %s, line: %d\n",
|
||||
buffer, buf->sb_relname, buf->tag.blockNum,
|
||||
buffer, buf->blind.relname, buf->tag.blockNum,
|
||||
PrivateRefCount[buffer - 1], file, line);
|
||||
}
|
||||
}
|
||||
@ -767,8 +759,9 @@ DirtyBufferCopy(Oid dbid, Oid relid, BlockNumber blkno, char *dest)
|
||||
*
|
||||
* 'buffer' is known to be dirty/pinned, so there should not be a
|
||||
* problem reading the BufferDesc members without the BufMgrLock
|
||||
* (nobody should be able to change tags, flags, etc. out from under
|
||||
* us). Unpin if 'release' is TRUE.
|
||||
* (nobody should be able to change tags out from under us).
|
||||
*
|
||||
* Unpin if 'release' is TRUE.
|
||||
*/
|
||||
int
|
||||
FlushBuffer(Buffer buffer, bool release)
|
||||
@ -784,6 +777,8 @@ FlushBuffer(Buffer buffer, bool release)
|
||||
if (BAD_BUFFER_ID(buffer))
|
||||
return STATUS_ERROR;
|
||||
|
||||
Assert(PrivateRefCount[buffer - 1] > 0); /* else caller didn't pin */
|
||||
|
||||
bufHdr = &BufferDescriptors[buffer - 1];
|
||||
bufdb = bufHdr->tag.relId.dbId;
|
||||
|
||||
@ -809,7 +804,7 @@ FlushBuffer(Buffer buffer, bool release)
|
||||
if (status == SM_FAIL)
|
||||
{
|
||||
elog(ERROR, "FlushBuffer: cannot flush block %u of the relation %s",
|
||||
bufHdr->tag.blockNum, bufHdr->sb_relname);
|
||||
bufHdr->tag.blockNum, bufHdr->blind.relname);
|
||||
return STATUS_ERROR;
|
||||
}
|
||||
BufferFlushCount++;
|
||||
@ -820,19 +815,21 @@ FlushBuffer(Buffer buffer, bool release)
|
||||
|
||||
/*
|
||||
* If this buffer was marked by someone as DIRTY while we were
|
||||
* flushing it out we must not clear DIRTY flag - vadim 01/17/97
|
||||
* flushing it out we must not clear shared DIRTY flag - vadim 01/17/97
|
||||
*
|
||||
* ... but we can clear BufferDirtiedByMe anyway - tgl 3/31/00
|
||||
*/
|
||||
if (bufHdr->flags & BM_JUST_DIRTIED)
|
||||
{
|
||||
elog(NOTICE, "FlushBuffer: content of block %u (%s) changed while flushing",
|
||||
bufHdr->tag.blockNum, bufHdr->sb_relname);
|
||||
bufHdr->tag.blockNum, bufHdr->blind.relname);
|
||||
}
|
||||
else
|
||||
bufHdr->flags &= ~BM_DIRTY;
|
||||
ClearBufferDirtiedByMe(buffer, bufHdr);
|
||||
if (release)
|
||||
UnpinBuffer(bufHdr);
|
||||
SpinRelease(BufMgrLock);
|
||||
CommitInfoNeedsSave[buffer - 1] = 0;
|
||||
|
||||
return STATUS_OK;
|
||||
}
|
||||
@ -857,9 +854,10 @@ WriteNoReleaseBuffer(Buffer buffer)
|
||||
SharedBufferChanged = true;
|
||||
|
||||
SpinAcquire(BufMgrLock);
|
||||
Assert(bufHdr->refcount > 0);
|
||||
bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
|
||||
SetBufferDirtiedByMe(buffer, bufHdr);
|
||||
SpinRelease(BufMgrLock);
|
||||
CommitInfoNeedsSave[buffer - 1] = 0;
|
||||
|
||||
return STATUS_OK;
|
||||
}
|
||||
@ -901,11 +899,6 @@ ReleaseAndReadBuffer(Buffer buffer,
|
||||
AddBufferToFreelist(bufHdr);
|
||||
bufHdr->flags |= BM_FREE;
|
||||
}
|
||||
if (CommitInfoNeedsSave[buffer - 1])
|
||||
{
|
||||
bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
|
||||
CommitInfoNeedsSave[buffer - 1] = 0;
|
||||
}
|
||||
retbuf = ReadBufferWithBufferLock(relation, blockNum, true);
|
||||
return retbuf;
|
||||
}
|
||||
@ -915,13 +908,120 @@ ReleaseAndReadBuffer(Buffer buffer,
|
||||
return ReadBuffer(relation, blockNum);
|
||||
}
|
||||
|
||||
/*
|
||||
* SetBufferDirtiedByMe -- mark a shared buffer as being dirtied by this xact
|
||||
*
|
||||
* This flag essentially remembers that we need to write and fsync this buffer
|
||||
* before we can commit the transaction. The write might end up getting done
|
||||
* by another backend, but we must do the fsync ourselves (else we could
|
||||
* commit before the data actually reaches disk). We do not issue fsync
|
||||
* instantly upon write; the storage manager keeps track of which files need
|
||||
* to be fsync'd before commit can occur. A key aspect of this data structure
|
||||
* is that we will be able to notify the storage manager that an fsync is
|
||||
* needed even after another backend has done the physical write and replaced
|
||||
* the buffer contents with something else!
|
||||
*
|
||||
* NB: we must be holding the bufmgr lock at entry, and the buffer must be
|
||||
* pinned so that no other backend can take it away from us.
|
||||
*/
|
||||
static void
|
||||
SetBufferDirtiedByMe(Buffer buffer, BufferDesc *bufHdr)
|
||||
{
|
||||
BufferTag *tagLastDirtied = & BufferTagLastDirtied[buffer - 1];
|
||||
Relation reln;
|
||||
int status;
|
||||
|
||||
/*
|
||||
* If the flag is already set, check to see whether the buffertag is
|
||||
* the same. If not, some other backend already wrote the buffer data
|
||||
* that we dirtied. We must tell the storage manager to make an fsync
|
||||
* pending on that file before we can overwrite the old tag value.
|
||||
*/
|
||||
if (BufferDirtiedByMe[buffer - 1])
|
||||
{
|
||||
if (bufHdr->tag.relId.dbId == tagLastDirtied->relId.dbId &&
|
||||
bufHdr->tag.relId.relId == tagLastDirtied->relId.relId &&
|
||||
bufHdr->tag.blockNum == tagLastDirtied->blockNum)
|
||||
return; /* Same tag already dirtied, so no work */
|
||||
|
||||
#ifndef OPTIMIZE_SINGLE
|
||||
SpinRelease(BufMgrLock);
|
||||
#endif /* OPTIMIZE_SINGLE */
|
||||
|
||||
reln = RelationIdCacheGetRelation(tagLastDirtied->relId.relId);
|
||||
|
||||
if (reln == (Relation) NULL)
|
||||
{
|
||||
status = smgrblindmarkdirty(DEFAULT_SMGR,
|
||||
BufferBlindLastDirtied[buffer - 1].dbname,
|
||||
BufferBlindLastDirtied[buffer - 1].relname,
|
||||
tagLastDirtied->relId.dbId,
|
||||
tagLastDirtied->relId.relId,
|
||||
tagLastDirtied->blockNum);
|
||||
}
|
||||
else
|
||||
{
|
||||
status = smgrmarkdirty(DEFAULT_SMGR, reln,
|
||||
tagLastDirtied->blockNum);
|
||||
/* drop relcache refcnt incremented by RelationIdCacheGetRelation */
|
||||
RelationDecrementReferenceCount(reln);
|
||||
}
|
||||
if (status == SM_FAIL)
|
||||
{
|
||||
elog(ERROR, "SetBufferDirtiedByMe: cannot mark %u for %s",
|
||||
tagLastDirtied->blockNum,
|
||||
BufferBlindLastDirtied[buffer - 1].relname);
|
||||
}
|
||||
|
||||
#ifndef OPTIMIZE_SINGLE
|
||||
SpinAcquire(BufMgrLock);
|
||||
#endif /* OPTIMIZE_SINGLE */
|
||||
|
||||
}
|
||||
|
||||
*tagLastDirtied = bufHdr->tag;
|
||||
BufferBlindLastDirtied[buffer - 1] = bufHdr->blind;
|
||||
BufferDirtiedByMe[buffer - 1] = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* ClearBufferDirtiedByMe -- mark a shared buffer as no longer needing fsync
|
||||
*
|
||||
* If we write out a buffer ourselves, then the storage manager will set its
|
||||
* needs-fsync flag for that file automatically, and so we can clear our own
|
||||
* flag that says it needs to be done later.
|
||||
*
|
||||
* NB: we must be holding the bufmgr lock at entry.
|
||||
*/
|
||||
static void
|
||||
ClearBufferDirtiedByMe(Buffer buffer, BufferDesc *bufHdr)
|
||||
{
|
||||
BufferTag *tagLastDirtied = & BufferTagLastDirtied[buffer - 1];
|
||||
|
||||
/*
|
||||
* Do *not* clear the flag if it refers to some other buffertag than
|
||||
* the data we just wrote. This is unlikely, but possible if some
|
||||
* other backend replaced the buffer contents since we set our flag.
|
||||
*/
|
||||
if (bufHdr->tag.relId.dbId == tagLastDirtied->relId.dbId &&
|
||||
bufHdr->tag.relId.relId == tagLastDirtied->relId.relId &&
|
||||
bufHdr->tag.blockNum == tagLastDirtied->blockNum)
|
||||
{
|
||||
BufferDirtiedByMe[buffer - 1] = false;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* BufferSync -- Flush all dirty buffers in the pool.
|
||||
*
|
||||
* This is called at transaction commit time. It does the wrong thing,
|
||||
* right now. We should flush only our own changes to stable storage,
|
||||
* and we should obey the lock protocol on the buffer manager metadata
|
||||
* as we do it. Also, we need to be sure that no other transaction is
|
||||
* This is called at transaction commit time. We find all buffers
|
||||
* that have been dirtied by the current xact and flush them to disk.
|
||||
* We do *not* flush dirty buffers that have been dirtied by other xacts.
|
||||
* (This is a substantial change from pre-7.0 behavior.)
|
||||
*
|
||||
* OLD COMMENTS (do these still apply?)
|
||||
*
|
||||
* Also, we need to be sure that no other transaction is
|
||||
* modifying the page as we flush it. This is only a problem for objects
|
||||
* that use a non-two-phase locking protocol, like btree indices. For
|
||||
* those objects, we would like to set a write lock for the duration of
|
||||
@ -936,21 +1036,49 @@ static void
|
||||
BufferSync()
|
||||
{
|
||||
int i;
|
||||
Oid bufdb;
|
||||
Oid bufrel;
|
||||
Relation reln;
|
||||
BufferDesc *bufHdr;
|
||||
int status;
|
||||
Relation reln;
|
||||
bool didwrite;
|
||||
|
||||
SpinAcquire(BufMgrLock);
|
||||
for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++)
|
||||
{
|
||||
/* Ignore buffers that were not dirtied by me */
|
||||
if (! BufferDirtiedByMe[i])
|
||||
continue;
|
||||
|
||||
SpinAcquire(BufMgrLock);
|
||||
|
||||
/*
|
||||
* We only need to write if the buffer is still dirty and still
|
||||
* contains the same disk page that it contained when we dirtied it.
|
||||
* Otherwise, someone else has already written our changes for us,
|
||||
* and we need only fsync.
|
||||
*
|
||||
* (NOTE: it's still possible to do an unnecessary write, if other
|
||||
* xacts have written and then re-dirtied the page since our last
|
||||
* change to it. But that should be pretty uncommon, and there's
|
||||
* no easy way to detect it anyway.)
|
||||
*/
|
||||
reln = NULL;
|
||||
didwrite = false;
|
||||
if ((bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
|
||||
{
|
||||
Oid bufdb;
|
||||
Oid bufrel;
|
||||
|
||||
bufdb = bufHdr->tag.relId.dbId;
|
||||
bufrel = bufHdr->tag.relId.relId;
|
||||
if (bufdb == MyDatabaseId || bufdb == (Oid) 0)
|
||||
if (bufdb == BufferTagLastDirtied[i].relId.dbId &&
|
||||
bufrel == BufferTagLastDirtied[i].relId.relId &&
|
||||
bufHdr->tag.blockNum == BufferTagLastDirtied[i].blockNum)
|
||||
{
|
||||
/*
|
||||
* Try to find relation for buf. This could fail, if the
|
||||
* rel has been flushed from the relcache since we dirtied
|
||||
* the page. That should be uncommon, so paying the extra
|
||||
* cost of a blind write when it happens seems OK.
|
||||
*/
|
||||
reln = RelationIdCacheGetRelation(bufrel);
|
||||
|
||||
/*
|
||||
@ -970,14 +1098,11 @@ BufferSync()
|
||||
if (bufHdr->flags & BM_IO_ERROR)
|
||||
{
|
||||
elog(ERROR, "BufferSync: write error %u for %s",
|
||||
bufHdr->tag.blockNum, bufHdr->sb_relname);
|
||||
bufHdr->tag.blockNum, bufHdr->blind.relname);
|
||||
}
|
||||
/* drop refcnt from RelationIdCacheGetRelation */
|
||||
if (reln != (Relation) NULL)
|
||||
RelationDecrementReferenceCount(reln);
|
||||
continue;
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
/*
|
||||
* To check if block content changed while flushing (see
|
||||
* below). - vadim 01/17/97
|
||||
@ -987,19 +1112,20 @@ BufferSync()
|
||||
StartBufferIO(bufHdr, false); /* output IO start */
|
||||
|
||||
/*
|
||||
* If we didn't have the reldesc in our local cache, flush
|
||||
* If we didn't have the reldesc in our local cache, write
|
||||
* this page out using the 'blind write' storage manager
|
||||
* routine. If we did find it, use the standard
|
||||
* interface.
|
||||
*/
|
||||
|
||||
#ifndef OPTIMIZE_SINGLE
|
||||
SpinRelease(BufMgrLock);
|
||||
#endif /* OPTIMIZE_SINGLE */
|
||||
if (reln == (Relation) NULL)
|
||||
{
|
||||
status = smgrblindwrt(DEFAULT_SMGR, bufHdr->sb_dbname,
|
||||
bufHdr->sb_relname, bufdb, bufrel,
|
||||
status = smgrblindwrt(DEFAULT_SMGR,
|
||||
bufHdr->blind.dbname,
|
||||
bufHdr->blind.relname,
|
||||
bufdb, bufrel,
|
||||
bufHdr->tag.blockNum,
|
||||
(char *) MAKE_PTR(bufHdr->data));
|
||||
}
|
||||
@ -1018,26 +1144,68 @@ BufferSync()
|
||||
{
|
||||
bufHdr->flags |= BM_IO_ERROR;
|
||||
elog(ERROR, "BufferSync: cannot write %u for %s",
|
||||
bufHdr->tag.blockNum, bufHdr->sb_relname);
|
||||
bufHdr->tag.blockNum, bufHdr->blind.relname);
|
||||
}
|
||||
bufHdr->flags &= ~BM_IO_IN_PROGRESS; /* mark IO finished */
|
||||
TerminateBufferIO(bufHdr); /* Sync IO finished */
|
||||
BufferFlushCount++;
|
||||
didwrite = true;
|
||||
|
||||
/*
|
||||
* If this buffer was marked by someone as DIRTY while we
|
||||
* were flushing it out we must not clear DIRTY flag -
|
||||
* vadim 01/17/97
|
||||
*
|
||||
* but it is OK to clear BufferDirtiedByMe - tgl 3/31/00
|
||||
*/
|
||||
if (!(bufHdr->flags & BM_JUST_DIRTIED))
|
||||
bufHdr->flags &= ~BM_DIRTY;
|
||||
/* drop refcnt from RelationIdCacheGetRelation */
|
||||
}
|
||||
|
||||
/* drop refcnt obtained by RelationIdCacheGetRelation */
|
||||
if (reln != (Relation) NULL)
|
||||
RelationDecrementReferenceCount(reln);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If we did not write the buffer (because someone else did),
|
||||
* we must still fsync the file containing it, to ensure that the
|
||||
* write is down to disk before we commit.
|
||||
*/
|
||||
if (! didwrite)
|
||||
{
|
||||
#ifndef OPTIMIZE_SINGLE
|
||||
SpinRelease(BufMgrLock);
|
||||
#endif /* OPTIMIZE_SINGLE */
|
||||
|
||||
reln = RelationIdCacheGetRelation(BufferTagLastDirtied[i].relId.relId);
|
||||
if (reln == (Relation) NULL)
|
||||
{
|
||||
status = smgrblindmarkdirty(DEFAULT_SMGR,
|
||||
BufferBlindLastDirtied[i].dbname,
|
||||
BufferBlindLastDirtied[i].relname,
|
||||
BufferTagLastDirtied[i].relId.dbId,
|
||||
BufferTagLastDirtied[i].relId.relId,
|
||||
BufferTagLastDirtied[i].blockNum);
|
||||
}
|
||||
else
|
||||
{
|
||||
status = smgrmarkdirty(DEFAULT_SMGR, reln,
|
||||
BufferTagLastDirtied[i].blockNum);
|
||||
/* drop relcache refcnt incremented by RelationIdCacheGetRelation */
|
||||
RelationDecrementReferenceCount(reln);
|
||||
|
||||
}
|
||||
#ifndef OPTIMIZE_SINGLE
|
||||
SpinAcquire(BufMgrLock);
|
||||
#endif /* OPTIMIZE_SINGLE */
|
||||
}
|
||||
|
||||
BufferDirtiedByMe[i] = false;
|
||||
|
||||
SpinRelease(BufMgrLock);
|
||||
}
|
||||
|
||||
LocalBufferSync();
|
||||
}
|
||||
@ -1166,13 +1334,19 @@ ResetBufferUsage()
|
||||
/* ----------------------------------------------
|
||||
* ResetBufferPool
|
||||
*
|
||||
* this routine is supposed to be called when a transaction aborts.
|
||||
* This routine is supposed to be called when a transaction aborts.
|
||||
* it will release all the buffer pins held by the transaction.
|
||||
* Currently, we also call it during commit if BufferPoolCheckLeak
|
||||
* detected a problem --- in that case, isCommit is TRUE, and we
|
||||
* only clean up buffer pin counts.
|
||||
*
|
||||
* During abort, we also forget any pending fsync requests. Dirtied buffers
|
||||
* will still get written, eventually, but there will be no fsync for them.
|
||||
*
|
||||
* ----------------------------------------------
|
||||
*/
|
||||
void
|
||||
ResetBufferPool()
|
||||
ResetBufferPool(bool isCommit)
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -1193,10 +1367,15 @@ ResetBufferPool()
|
||||
SpinRelease(BufMgrLock);
|
||||
}
|
||||
PrivateRefCount[i] = 0;
|
||||
CommitInfoNeedsSave[i] = 0;
|
||||
|
||||
if (! isCommit)
|
||||
BufferDirtiedByMe[i] = false;
|
||||
}
|
||||
|
||||
ResetLocalBufferPool();
|
||||
|
||||
if (! isCommit)
|
||||
smgrabort();
|
||||
}
|
||||
|
||||
/* -----------------------------------------------
|
||||
@ -1222,7 +1401,7 @@ BufferPoolCheckLeak()
|
||||
"Buffer Leak: [%03d] (freeNext=%ld, freePrev=%ld, \
|
||||
relname=%s, blockNum=%d, flags=0x%x, refcount=%d %ld)",
|
||||
i - 1, buf->freeNext, buf->freePrev,
|
||||
buf->sb_relname, buf->tag.blockNum, buf->flags,
|
||||
buf->blind.relname, buf->tag.blockNum, buf->flags,
|
||||
buf->refcount, PrivateRefCount[i - 1]);
|
||||
result = 1;
|
||||
}
|
||||
@ -1306,25 +1485,25 @@ BufferGetRelation(Buffer buffer)
|
||||
/*
|
||||
* BufferReplace
|
||||
*
|
||||
* Flush the buffer corresponding to 'bufHdr'
|
||||
* Write out the buffer corresponding to 'bufHdr'
|
||||
*
|
||||
* This routine used to flush the data to disk (ie, force immediate fsync)
|
||||
* but that's no longer necessary because BufferSync is smarter than before.
|
||||
*
|
||||
* BufMgrLock must be held at entry, and the buffer must be pinned.
|
||||
*/
|
||||
static int
|
||||
BufferReplace(BufferDesc *bufHdr, bool bufferLockHeld)
|
||||
BufferReplace(BufferDesc *bufHdr)
|
||||
{
|
||||
Relation reln;
|
||||
Oid bufdb,
|
||||
bufrel;
|
||||
int status;
|
||||
|
||||
if (!bufferLockHeld)
|
||||
SpinAcquire(BufMgrLock);
|
||||
|
||||
/*
|
||||
* first try to find the reldesc in the cache, if no luck, don't
|
||||
* bother to build the reldesc from scratch, just do a blind write.
|
||||
*/
|
||||
|
||||
bufdb = bufHdr->tag.relId.dbId;
|
||||
bufrel = bufHdr->tag.relId.relId;
|
||||
|
||||
@ -1336,22 +1515,27 @@ BufferReplace(BufferDesc *bufHdr, bool bufferLockHeld)
|
||||
/* To check if block content changed while flushing. - vadim 01/17/97 */
|
||||
bufHdr->flags &= ~BM_JUST_DIRTIED;
|
||||
|
||||
#ifndef OPTIMIZE_SINGLE
|
||||
SpinRelease(BufMgrLock);
|
||||
#endif /* OPTIMIZE_SINGLE */
|
||||
|
||||
if (reln != (Relation) NULL)
|
||||
{
|
||||
status = smgrflush(DEFAULT_SMGR, reln, bufHdr->tag.blockNum,
|
||||
status = smgrwrite(DEFAULT_SMGR, reln, bufHdr->tag.blockNum,
|
||||
(char *) MAKE_PTR(bufHdr->data));
|
||||
}
|
||||
else
|
||||
{
|
||||
/* blind write always flushes */
|
||||
status = smgrblindwrt(DEFAULT_SMGR, bufHdr->sb_dbname,
|
||||
bufHdr->sb_relname, bufdb, bufrel,
|
||||
status = smgrblindwrt(DEFAULT_SMGR, bufHdr->blind.dbname,
|
||||
bufHdr->blind.relname, bufdb, bufrel,
|
||||
bufHdr->tag.blockNum,
|
||||
(char *) MAKE_PTR(bufHdr->data));
|
||||
}
|
||||
|
||||
#ifndef OPTIMIZE_SINGLE
|
||||
SpinAcquire(BufMgrLock);
|
||||
#endif /* OPTIMIZE_SINGLE */
|
||||
|
||||
/* drop relcache refcnt incremented by RelationIdCacheGetRelation */
|
||||
if (reln != (Relation) NULL)
|
||||
RelationDecrementReferenceCount(reln);
|
||||
@ -1359,6 +1543,11 @@ BufferReplace(BufferDesc *bufHdr, bool bufferLockHeld)
|
||||
if (status == SM_FAIL)
|
||||
return FALSE;
|
||||
|
||||
/* If we had marked this buffer as needing to be fsync'd, we can forget
|
||||
* about that, because it's now the storage manager's responsibility.
|
||||
*/
|
||||
ClearBufferDirtiedByMe(BufferDescriptorGetBuffer(bufHdr), bufHdr);
|
||||
|
||||
BufferFlushCount++;
|
||||
|
||||
return TRUE;
|
||||
@ -1440,7 +1629,7 @@ ReleaseRelationBuffers(Relation rel)
|
||||
}
|
||||
/* Now we can do what we came for */
|
||||
buf->flags &= ~ ( BM_DIRTY | BM_JUST_DIRTIED);
|
||||
CommitInfoNeedsSave[i - 1] = 0;
|
||||
ClearBufferDirtiedByMe(i, buf);
|
||||
/*
|
||||
* Release any refcount we may have.
|
||||
*
|
||||
@ -1502,6 +1691,7 @@ DropBuffers(Oid dbid)
|
||||
}
|
||||
/* Now we can do what we came for */
|
||||
buf->flags &= ~ ( BM_DIRTY | BM_JUST_DIRTIED);
|
||||
ClearBufferDirtiedByMe(i, buf);
|
||||
/*
|
||||
* The thing should be free, if caller has checked that
|
||||
* no backends are running in that database.
|
||||
@ -1533,7 +1723,7 @@ PrintBufferDescs()
|
||||
elog(DEBUG, "[%02d] (freeNext=%ld, freePrev=%ld, relname=%s, \
|
||||
blockNum=%d, flags=0x%x, refcount=%d %ld)",
|
||||
i, buf->freeNext, buf->freePrev,
|
||||
buf->sb_relname, buf->tag.blockNum, buf->flags,
|
||||
buf->blind.relname, buf->tag.blockNum, buf->flags,
|
||||
buf->refcount, PrivateRefCount[i]);
|
||||
}
|
||||
SpinRelease(BufMgrLock);
|
||||
@ -1544,7 +1734,7 @@ blockNum=%d, flags=0x%x, refcount=%d %ld)",
|
||||
for (i = 0; i < NBuffers; ++i, ++buf)
|
||||
{
|
||||
printf("[%-2d] (%s, %d) flags=0x%x, refcnt=%d %ld)\n",
|
||||
i, buf->sb_relname, buf->tag.blockNum,
|
||||
i, buf->blind.relname, buf->tag.blockNum,
|
||||
buf->flags, buf->refcount, PrivateRefCount[i]);
|
||||
}
|
||||
}
|
||||
@ -1562,7 +1752,7 @@ PrintPinnedBufs()
|
||||
if (PrivateRefCount[i] > 0)
|
||||
elog(NOTICE, "[%02d] (freeNext=%ld, freePrev=%ld, relname=%s, \
|
||||
blockNum=%d, flags=0x%x, refcount=%d %ld)\n",
|
||||
i, buf->freeNext, buf->freePrev, buf->sb_relname,
|
||||
i, buf->freeNext, buf->freePrev, buf->blind.relname,
|
||||
buf->tag.blockNum, buf->flags,
|
||||
buf->refcount, PrivateRefCount[i]);
|
||||
}
|
||||
@ -1601,33 +1791,42 @@ BufferPoolBlowaway()
|
||||
* FlushRelationBuffers
|
||||
*
|
||||
* This function removes from the buffer pool all pages of a relation
|
||||
* that have blocknumber >= specified block. If doFlush is true,
|
||||
* dirty buffers are written out --- otherwise it's an error for any
|
||||
* of the buffers to be dirty.
|
||||
* that have blocknumber >= specified block. Pages that are dirty are
|
||||
* written out first. If expectDirty is false, a notice is emitted
|
||||
* warning of dirty buffers, but we proceed anyway. An error code is
|
||||
* returned if we fail to dump a dirty buffer or if we find one of
|
||||
* the target pages is pinned into the cache.
|
||||
*
|
||||
* This is used by VACUUM before truncating the relation to the given
|
||||
* number of blocks. For VACUUM, we pass doFlush = false since it would
|
||||
* mean a bug in VACUUM if any of the unwanted pages were still dirty.
|
||||
* (TRUNCATE TABLE also uses it in the same way.)
|
||||
* number of blocks. For VACUUM, we pass expectDirty = false since it
|
||||
* could mean a bug in VACUUM if any of the unwanted pages were still
|
||||
* dirty. (TRUNCATE TABLE also uses it in the same way.)
|
||||
*
|
||||
* This is also used by RENAME TABLE (with block = 0 and doFlush = true)
|
||||
* This is also used by RENAME TABLE (with block=0 and expectDirty=true)
|
||||
* to clear out the buffer cache before renaming the physical files of
|
||||
* a relation. Without that, some other backend might try to do a
|
||||
* blind write of a buffer page (relying on the sb_relname of the buffer)
|
||||
* blind write of a buffer page (relying on the BlindId of the buffer)
|
||||
* and fail because it's not got the right filename anymore.
|
||||
*
|
||||
* In both cases, the caller should be holding AccessExclusiveLock on
|
||||
* the target relation to ensure that no other backend is busy reading
|
||||
* more blocks of the relation...
|
||||
* more blocks of the relation.
|
||||
*
|
||||
* Returns: 0 - Ok, -1 - DIRTY, -2 - PINNED
|
||||
* Formerly, we considered it an error condition if we found unexpectedly
|
||||
* dirty buffers. However, since BufferSync no longer forces out all
|
||||
* dirty buffers at every xact commit, it's possible for dirty buffers
|
||||
* to still be present in the cache due to failure of an earlier
|
||||
* transaction. So, downgrade the error to a mere notice. Maybe we
|
||||
* shouldn't even emit a notice...
|
||||
*
|
||||
* Returns: 0 - Ok, -1 - FAILED TO WRITE DIRTY BUFFER, -2 - PINNED
|
||||
*
|
||||
* XXX currently it sequentially searches the buffer pool, should be
|
||||
* changed to more clever ways of searching.
|
||||
* --------------------------------------------------------------------
|
||||
*/
|
||||
int
|
||||
FlushRelationBuffers(Relation rel, BlockNumber block, bool doFlush)
|
||||
FlushRelationBuffers(Relation rel, BlockNumber block, bool expectDirty)
|
||||
{
|
||||
int i;
|
||||
BufferDesc *buf;
|
||||
@ -1642,8 +1841,10 @@ FlushRelationBuffers(Relation rel, BlockNumber block, bool doFlush)
|
||||
{
|
||||
if (buf->flags & BM_DIRTY)
|
||||
{
|
||||
if (doFlush)
|
||||
{
|
||||
if (! expectDirty)
|
||||
elog(NOTICE, "FlushRelationBuffers(%s (local), %u): block %u is dirty",
|
||||
RelationGetRelationName(rel),
|
||||
block, buf->tag.blockNum);
|
||||
if (FlushBuffer(-i-1, false) != STATUS_OK)
|
||||
{
|
||||
elog(NOTICE, "FlushRelationBuffers(%s (local), %u): block %u is dirty, could not flush it",
|
||||
@ -1652,14 +1853,6 @@ FlushRelationBuffers(Relation rel, BlockNumber block, bool doFlush)
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
elog(NOTICE, "FlushRelationBuffers(%s (local), %u): block %u is dirty",
|
||||
RelationGetRelationName(rel),
|
||||
block, buf->tag.blockNum);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if (LocalRefCount[i] > 0)
|
||||
{
|
||||
elog(NOTICE, "FlushRelationBuffers(%s (local), %u): block %u is referenced (%ld)",
|
||||
@ -1676,39 +1869,42 @@ FlushRelationBuffers(Relation rel, BlockNumber block, bool doFlush)
|
||||
SpinAcquire(BufMgrLock);
|
||||
for (i = 0; i < NBuffers; i++)
|
||||
{
|
||||
recheck:
|
||||
buf = &BufferDescriptors[i];
|
||||
if (buf->tag.relId.dbId == MyDatabaseId &&
|
||||
buf->tag.relId.relId == RelationGetRelid(rel) &&
|
||||
if (buf->tag.relId.relId == RelationGetRelid(rel) &&
|
||||
(buf->tag.relId.dbId == MyDatabaseId ||
|
||||
buf->tag.relId.dbId == (Oid) NULL) &&
|
||||
buf->tag.blockNum >= block)
|
||||
{
|
||||
if (buf->flags & BM_DIRTY)
|
||||
{
|
||||
if (doFlush)
|
||||
{
|
||||
PinBuffer(buf);
|
||||
SpinRelease(BufMgrLock);
|
||||
if (FlushBuffer(i+1, false) != STATUS_OK)
|
||||
if (! expectDirty)
|
||||
elog(NOTICE, "FlushRelationBuffers(%s, %u): block %u is dirty (private %ld, global %d)",
|
||||
RelationGetRelationName(rel), block,
|
||||
buf->tag.blockNum,
|
||||
PrivateRefCount[i], buf->refcount);
|
||||
if (FlushBuffer(i+1, true) != STATUS_OK)
|
||||
{
|
||||
elog(NOTICE, "FlushRelationBuffers(%s, %u): block %u is dirty (private %ld, global %d), could not flush it",
|
||||
buf->sb_relname, block, buf->tag.blockNum,
|
||||
RelationGetRelationName(rel), block,
|
||||
buf->tag.blockNum,
|
||||
PrivateRefCount[i], buf->refcount);
|
||||
return -1;
|
||||
}
|
||||
SpinAcquire(BufMgrLock);
|
||||
}
|
||||
else
|
||||
{
|
||||
SpinRelease(BufMgrLock);
|
||||
elog(NOTICE, "FlushRelationBuffers(%s, %u): block %u is dirty (private %ld, global %d)",
|
||||
buf->sb_relname, block, buf->tag.blockNum,
|
||||
PrivateRefCount[i], buf->refcount);
|
||||
return -1;
|
||||
}
|
||||
/* Buffer could already be reassigned, so must recheck
|
||||
* whether it still belongs to rel before freeing it!
|
||||
*/
|
||||
goto recheck;
|
||||
}
|
||||
if (!(buf->flags & BM_FREE))
|
||||
{
|
||||
SpinRelease(BufMgrLock);
|
||||
elog(NOTICE, "FlushRelationBuffers(%s, %u): block %u is referenced (private %ld, global %d)",
|
||||
buf->sb_relname, block, buf->tag.blockNum,
|
||||
RelationGetRelationName(rel), block,
|
||||
buf->tag.blockNum,
|
||||
PrivateRefCount[i], buf->refcount);
|
||||
return -2;
|
||||
}
|
||||
@ -1755,11 +1951,6 @@ ReleaseBuffer(Buffer buffer)
|
||||
AddBufferToFreelist(bufHdr);
|
||||
bufHdr->flags |= BM_FREE;
|
||||
}
|
||||
if (CommitInfoNeedsSave[buffer - 1])
|
||||
{
|
||||
bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
|
||||
CommitInfoNeedsSave[buffer - 1] = 0;
|
||||
}
|
||||
SpinRelease(BufMgrLock);
|
||||
}
|
||||
|
||||
@ -1777,7 +1968,7 @@ IncrBufferRefCount_Debug(char *file, int line, Buffer buffer)
|
||||
|
||||
fprintf(stderr, "PIN(Incr) %ld relname = %s, blockNum = %d, \
|
||||
refcount = %ld, file: %s, line: %d\n",
|
||||
buffer, buf->sb_relname, buf->tag.blockNum,
|
||||
buffer, buf->blind.relname, buf->tag.blockNum,
|
||||
PrivateRefCount[buffer - 1], file, line);
|
||||
}
|
||||
}
|
||||
@ -1795,7 +1986,7 @@ ReleaseBuffer_Debug(char *file, int line, Buffer buffer)
|
||||
|
||||
fprintf(stderr, "UNPIN(Rel) %ld relname = %s, blockNum = %d, \
|
||||
refcount = %ld, file: %s, line: %d\n",
|
||||
buffer, buf->sb_relname, buf->tag.blockNum,
|
||||
buffer, buf->blind.relname, buf->tag.blockNum,
|
||||
PrivateRefCount[buffer - 1], file, line);
|
||||
}
|
||||
}
|
||||
@ -1822,7 +2013,7 @@ ReleaseAndReadBuffer_Debug(char *file,
|
||||
|
||||
fprintf(stderr, "UNPIN(Rel&Rd) %ld relname = %s, blockNum = %d, \
|
||||
refcount = %ld, file: %s, line: %d\n",
|
||||
buffer, buf->sb_relname, buf->tag.blockNum,
|
||||
buffer, buf->blind.relname, buf->tag.blockNum,
|
||||
PrivateRefCount[buffer - 1], file, line);
|
||||
}
|
||||
if (ShowPinTrace && BufferIsLocal(buffer) && is_userbuffer(buffer))
|
||||
@ -1831,7 +2022,7 @@ refcount = %ld, file: %s, line: %d\n",
|
||||
|
||||
fprintf(stderr, "PIN(Rel&Rd) %ld relname = %s, blockNum = %d, \
|
||||
refcount = %ld, file: %s, line: %d\n",
|
||||
b, buf->sb_relname, buf->tag.blockNum,
|
||||
b, buf->blind.relname, buf->tag.blockNum,
|
||||
PrivateRefCount[b - 1], file, line);
|
||||
}
|
||||
return b;
|
||||
@ -1983,11 +2174,43 @@ _bm_die(Oid dbId, Oid relId, int blkNo, int bufNo,
|
||||
|
||||
#endif /* BMTRACE */
|
||||
|
||||
/*
|
||||
* SetBufferCommitInfoNeedsSave
|
||||
*
|
||||
* Mark a buffer dirty when we have updated tuple commit-status bits in it.
|
||||
*
|
||||
* This is similar to WriteNoReleaseBuffer, except that we do not set
|
||||
* SharedBufferChanged or BufferDirtiedByMe, because we have not made a
|
||||
* critical change that has to be flushed to disk before xact commit --- the
|
||||
* status-bit update could be redone by someone else just as easily. The
|
||||
* buffer will be marked dirty, but it will not be written to disk until
|
||||
* there is another reason to write it.
|
||||
*
|
||||
* This routine might get called many times on the same page, if we are making
|
||||
* the first scan after commit of an xact that added/deleted many tuples.
|
||||
* So, be as quick as we can if the buffer is already dirty.
|
||||
*/
|
||||
void
|
||||
SetBufferCommitInfoNeedsSave(Buffer buffer)
|
||||
{
|
||||
if (!BufferIsLocal(buffer))
|
||||
CommitInfoNeedsSave[buffer - 1]++;
|
||||
BufferDesc *bufHdr;
|
||||
|
||||
if (BufferIsLocal(buffer))
|
||||
return;
|
||||
|
||||
if (BAD_BUFFER_ID(buffer))
|
||||
return;
|
||||
|
||||
bufHdr = &BufferDescriptors[buffer - 1];
|
||||
|
||||
if ((bufHdr->flags & (BM_DIRTY | BM_JUST_DIRTIED)) !=
|
||||
(BM_DIRTY | BM_JUST_DIRTIED))
|
||||
{
|
||||
SpinAcquire(BufMgrLock);
|
||||
Assert(bufHdr->refcount > 0);
|
||||
bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
|
||||
SpinRelease(BufMgrLock);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@ -2175,7 +2398,16 @@ static void StartBufferIO(BufferDesc *buf, bool forInput)
|
||||
Assert(!(buf->flags & BM_IO_IN_PROGRESS));
|
||||
buf->flags |= BM_IO_IN_PROGRESS;
|
||||
#ifdef HAS_TEST_AND_SET
|
||||
Assert(S_LOCK_FREE(&(buf->io_in_progress_lock)))
|
||||
/*
|
||||
* There used to be
|
||||
*
|
||||
* Assert(S_LOCK_FREE(&(buf->io_in_progress_lock)));
|
||||
*
|
||||
* here, but that's wrong because of the way WaitIO works: someone else
|
||||
* waiting for the I/O to complete will succeed in grabbing the lock for
|
||||
* a few instructions, and if we context-swap back to here the Assert
|
||||
* could fail. Tiny window for failure, but I've seen it happen -- tgl
|
||||
*/
|
||||
S_LOCK(&(buf->io_in_progress_lock));
|
||||
#endif /* HAS_TEST_AND_SET */
|
||||
InProgressBuf = buf;
|
||||
@ -2217,7 +2449,7 @@ static void ContinueBufferIO(BufferDesc *buf, bool forInput)
|
||||
IsForInput = forInput;
|
||||
}
|
||||
|
||||
extern void InitBufferIO(void)
|
||||
void InitBufferIO(void)
|
||||
{
|
||||
InProgressBuf = (BufferDesc *)0;
|
||||
}
|
||||
@ -2229,7 +2461,7 @@ extern void InitBufferIO(void)
|
||||
* set in case of output,this routine would kill all
|
||||
* backends and reset postmaster.
|
||||
*/
|
||||
extern void AbortBufferIO(void)
|
||||
void AbortBufferIO(void)
|
||||
{
|
||||
BufferDesc *buf = InProgressBuf;
|
||||
if (buf)
|
||||
@ -2252,8 +2484,8 @@ extern void AbortBufferIO(void)
|
||||
buf->flags |= BM_DIRTY;
|
||||
}
|
||||
buf->flags |= BM_IO_ERROR;
|
||||
TerminateBufferIO(buf);
|
||||
buf->flags &= ~BM_IO_IN_PROGRESS;
|
||||
TerminateBufferIO(buf);
|
||||
SpinRelease(BufMgrLock);
|
||||
}
|
||||
}
|
||||
|
@ -9,7 +9,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/freelist.c,v 1.20 2000/01/26 05:56:52 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/freelist.c,v 1.21 2000/04/09 04:43:19 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -122,7 +122,7 @@ PinBuffer_Debug(char *file, int line, BufferDesc *buf)
|
||||
|
||||
fprintf(stderr, "PIN(Pin) %ld relname = %s, blockNum = %d, \
|
||||
refcount = %ld, file: %s, line: %d\n",
|
||||
buffer, buf->sb_relname, buf->tag.blockNum,
|
||||
buffer, buf->blind.relname, buf->tag.blockNum,
|
||||
PrivateRefCount[buffer - 1], file, line);
|
||||
}
|
||||
}
|
||||
@ -168,7 +168,7 @@ UnpinBuffer_Debug(char *file, int line, BufferDesc *buf)
|
||||
|
||||
fprintf(stderr, "UNPIN(Unpin) %ld relname = %s, blockNum = %d, \
|
||||
refcount = %ld, file: %s, line: %d\n",
|
||||
buffer, buf->sb_relname, buf->tag.blockNum,
|
||||
buffer, buf->blind.relname, buf->tag.blockNum,
|
||||
PrivateRefCount[buffer - 1], file, line);
|
||||
}
|
||||
}
|
||||
@ -304,7 +304,7 @@ PrintBufferFreeList()
|
||||
int i = (buf - BufferDescriptors);
|
||||
|
||||
printf("[%-2d] (%s, %d) flags=0x%x, refcnt=%d %ld, nxt=%ld prv=%ld)\n",
|
||||
i, buf->sb_relname, buf->tag.blockNum,
|
||||
i, buf->blind.relname, buf->tag.blockNum,
|
||||
buf->flags, buf->refcount, PrivateRefCount[i],
|
||||
buf->freeNext, buf->freePrev);
|
||||
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/file/fd.c,v 1.54 2000/03/17 02:36:19 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/file/fd.c,v 1.55 2000/04/09 04:43:19 tgl Exp $
|
||||
*
|
||||
* NOTES:
|
||||
*
|
||||
@ -293,7 +293,7 @@ LruDelete(File file)
|
||||
vfdP->seekPos = (long) lseek(vfdP->fd, 0L, SEEK_CUR);
|
||||
Assert(vfdP->seekPos != -1);
|
||||
|
||||
/* if we have written to the file, sync it */
|
||||
/* if we have written to the file, sync it before closing */
|
||||
if (vfdP->fdstate & FD_DIRTY)
|
||||
{
|
||||
returnValue = pg_fsync(vfdP->fd);
|
||||
@ -381,9 +381,6 @@ tryAgain:
|
||||
returnValue = lseek(vfdP->fd, vfdP->seekPos, SEEK_SET);
|
||||
Assert(returnValue != -1);
|
||||
}
|
||||
|
||||
/* Update state as appropriate for re-open (needed?) */
|
||||
vfdP->fdstate &= ~FD_DIRTY;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -804,7 +801,7 @@ FileWrite(File file, char *buffer, int amount)
|
||||
if (returnCode > 0)
|
||||
VfdCache[file].seekPos += returnCode;
|
||||
|
||||
/* record the write */
|
||||
/* mark the file as needing fsync */
|
||||
VfdCache[file].fdstate |= FD_DIRTY;
|
||||
|
||||
return returnCode;
|
||||
@ -873,6 +870,35 @@ FileTruncate(File file, long offset)
|
||||
return returnCode;
|
||||
}
|
||||
|
||||
/*
|
||||
* FileSync --- if a file is marked as dirty, fsync it.
|
||||
*
|
||||
* The FD_DIRTY bit is slightly misnamed: it doesn't mean that we need to
|
||||
* write the file, but that we *have* written it and need to execute an
|
||||
* fsync() to ensure the changes are down on disk before we mark the current
|
||||
* transaction committed.
|
||||
*
|
||||
* FD_DIRTY is set by FileWrite or by an explicit FileMarkDirty() call.
|
||||
* It is cleared after successfully fsync'ing the file. FileClose() will
|
||||
* fsync a dirty File that is about to be closed, since there will be no
|
||||
* other place to remember the need to fsync after the VFD is gone.
|
||||
*
|
||||
* Note that the DIRTY bit is logically associated with the actual disk file,
|
||||
* not with any particular kernel FD we might have open for it. We assume
|
||||
* that fsync will force out any dirty buffers for that file, whether or not
|
||||
* they were written through the FD being used for the fsync call --- they
|
||||
* might even have been written by some other backend!
|
||||
*
|
||||
* Note also that LruDelete currently fsyncs a dirty file that it is about
|
||||
* to close the kernel file descriptor for. The idea there is to avoid
|
||||
* having to re-open the kernel descriptor later. But it's not real clear
|
||||
* that this is a performance win; we could end up fsyncing the same file
|
||||
* multiple times in a transaction, which would probably cost more time
|
||||
* than is saved by avoiding an open() call. This should be studied.
|
||||
*
|
||||
* This routine used to think it could skip the fsync if the file is
|
||||
* physically closed, but that is now WRONG; see comments for FileMarkDirty.
|
||||
*/
|
||||
int
|
||||
FileSync(File file)
|
||||
{
|
||||
@ -880,23 +906,66 @@ FileSync(File file)
|
||||
|
||||
Assert(FileIsValid(file));
|
||||
|
||||
/*
|
||||
* If the file isn't open, then we don't need to sync it; we always
|
||||
* sync files when we close them. Also, if we haven't done any writes
|
||||
* that we haven't already synced, we can ignore the request.
|
||||
*/
|
||||
|
||||
if (VfdCache[file].fd < 0 || !(VfdCache[file].fdstate & FD_DIRTY))
|
||||
if (!(VfdCache[file].fdstate & FD_DIRTY))
|
||||
{
|
||||
/* Need not sync if file is not dirty. */
|
||||
returnCode = 0;
|
||||
}
|
||||
else if (disableFsync)
|
||||
{
|
||||
/* Don't force the file open if pg_fsync isn't gonna sync it. */
|
||||
returnCode = 0;
|
||||
VfdCache[file].fdstate &= ~FD_DIRTY;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* We don't use FileAccess() because we don't want to force the
|
||||
* file to the front of the LRU ring; we aren't expecting to
|
||||
* access it again soon.
|
||||
*/
|
||||
if (FileIsNotOpen(file))
|
||||
{
|
||||
returnCode = LruInsert(file);
|
||||
if (returnCode != 0)
|
||||
return returnCode;
|
||||
}
|
||||
returnCode = pg_fsync(VfdCache[file].fd);
|
||||
if (returnCode == 0)
|
||||
VfdCache[file].fdstate &= ~FD_DIRTY;
|
||||
}
|
||||
|
||||
return returnCode;
|
||||
}
|
||||
|
||||
/*
|
||||
* FileMarkDirty --- mark a file as needing fsync at transaction commit.
|
||||
*
|
||||
* Since FileWrite marks the file dirty, this routine is not needed in
|
||||
* normal use. It is called when the buffer manager detects that some other
|
||||
* backend has written out a shared buffer that this backend dirtied (but
|
||||
* didn't write) in the current xact. In that scenario, we need to fsync
|
||||
* the file before we can commit. We cannot assume that the other backend
|
||||
* has fsync'd the file yet; we need to do our own fsync to ensure that
|
||||
* (a) the disk page is written and (b) this backend's commit is delayed
|
||||
* until the write is complete.
|
||||
*
|
||||
* Note we are assuming that an fsync issued by this backend will write
|
||||
* kernel disk buffers that were dirtied by another backend. Furthermore,
|
||||
* it doesn't matter whether we currently have the file physically open;
|
||||
* we must fsync even if we have to re-open the file to do it.
|
||||
*/
|
||||
void
|
||||
FileMarkDirty(File file)
|
||||
{
|
||||
Assert(FileIsValid(file));
|
||||
|
||||
DO_DB(elog(DEBUG, "FileMarkDirty: %d (%s)",
|
||||
file, VfdCache[file].fileName));
|
||||
|
||||
VfdCache[file].fdstate |= FD_DIRTY;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Routines that want to use stdio (ie, FILE*) should use AllocateFile
|
||||
* rather than plain fopen(). This lets fd.c deal with freeing FDs if
|
||||
@ -992,6 +1061,12 @@ closeAllVfds()
|
||||
* exit (it doesn't particularly care which). All still-open temporary-file
|
||||
* VFDs are closed, which also causes the underlying files to be deleted.
|
||||
* Furthermore, all "allocated" stdio files are closed.
|
||||
*
|
||||
* This routine is not involved in fsync'ing non-temporary files at xact
|
||||
* commit; that is done by FileSync under control of the buffer manager.
|
||||
* During a commit, that is done *before* control gets here. If we still
|
||||
* have any needs-fsync bits set when we get here, we assume this is abort
|
||||
* and clear them.
|
||||
*/
|
||||
void
|
||||
AtEOXact_Files(void)
|
||||
@ -1006,6 +1081,8 @@ AtEOXact_Files(void)
|
||||
if ((VfdCache[i].fdstate & FD_TEMPORARY) &&
|
||||
VfdCache[i].fileName != NULL)
|
||||
FileClose(i);
|
||||
else
|
||||
VfdCache[i].fdstate &= ~FD_DIRTY;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.64 2000/02/07 02:38:18 inoue Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.65 2000/04/09 04:43:20 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -48,7 +48,12 @@
|
||||
typedef struct _MdfdVec
|
||||
{
|
||||
int mdfd_vfd; /* fd number in vfd pool */
|
||||
uint16 mdfd_flags; /* clean, dirty, free */
|
||||
int mdfd_flags; /* free, temporary */
|
||||
|
||||
/* these are the assigned bits in mdfd_flags: */
|
||||
#define MDFD_FREE (1 << 0)/* unused entry */
|
||||
#define MDFD_TEMP (1 << 1)/* close this entry at transaction end */
|
||||
|
||||
int mdfd_lstbcnt; /* most recent block count */
|
||||
int mdfd_nextFree; /* next free vector */
|
||||
#ifndef LET_OS_MANAGE_FILESIZE
|
||||
@ -62,13 +67,13 @@ static int Md_Free = -1; /* head of freelist of unused fdvec entries */
|
||||
static int CurFd = 0; /* first never-used fdvec index */
|
||||
static MemoryContext MdCxt; /* context for all my allocations */
|
||||
|
||||
#define MDFD_DIRTY (uint16) 0x01
|
||||
#define MDFD_FREE (uint16) 0x02
|
||||
|
||||
/* routines declared here */
|
||||
static void mdclose_fd(int fd);
|
||||
static int _mdfd_getrelnfd(Relation reln);
|
||||
static MdfdVec *_mdfd_openseg(Relation reln, int segno, int oflags);
|
||||
static MdfdVec *_mdfd_getseg(Relation reln, int blkno);
|
||||
static MdfdVec *_mdfd_blind_getseg(char *dbname, char *relname,
|
||||
Oid dbid, Oid relid, int blkno);
|
||||
static int _fdvec_alloc(void);
|
||||
static void _fdvec_free(int);
|
||||
static BlockNumber _mdnblocks(File file, Size blcksz);
|
||||
@ -186,6 +191,8 @@ mdcreate(Relation reln)
|
||||
#endif
|
||||
Md_fdvec[vfd].mdfd_lstbcnt = 0;
|
||||
|
||||
pfree(path);
|
||||
|
||||
return vfd;
|
||||
}
|
||||
|
||||
@ -290,9 +297,6 @@ mdextend(Relation reln, char *buffer)
|
||||
return SM_FAIL;
|
||||
}
|
||||
|
||||
/* remember that we did a write, so we can sync at xact commit */
|
||||
v->mdfd_flags |= MDFD_DIRTY;
|
||||
|
||||
/* try to keep the last block count current, though it's just a hint */
|
||||
#ifndef LET_OS_MANAGE_FILESIZE
|
||||
if ((v->mdfd_lstbcnt = (++nblocks % RELSEG_SIZE)) == 0)
|
||||
@ -367,6 +371,8 @@ mdopen(Relation reln)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
pfree(path);
|
||||
|
||||
return vfd;
|
||||
}
|
||||
|
||||
@ -382,13 +388,24 @@ int
|
||||
mdclose(Relation reln)
|
||||
{
|
||||
int fd;
|
||||
MdfdVec *v;
|
||||
MemoryContext oldcxt;
|
||||
|
||||
fd = RelationGetFile(reln);
|
||||
if (fd < 0)
|
||||
return SM_SUCCESS; /* already closed, so no work */
|
||||
|
||||
mdclose_fd(fd);
|
||||
|
||||
reln->rd_fd = -1;
|
||||
|
||||
return SM_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
mdclose_fd(int fd)
|
||||
{
|
||||
MdfdVec *v;
|
||||
MemoryContext oldcxt;
|
||||
|
||||
oldcxt = MemoryContextSwitchTo(MdCxt);
|
||||
#ifndef LET_OS_MANAGE_FILESIZE
|
||||
for (v = &Md_fdvec[fd]; v != (MdfdVec *) NULL;)
|
||||
@ -398,17 +415,14 @@ mdclose(Relation reln)
|
||||
/* if not closed already */
|
||||
if (v->mdfd_vfd >= 0)
|
||||
{
|
||||
|
||||
/*
|
||||
* We sync the file descriptor so that we don't need to reopen
|
||||
* it at transaction commit to force changes to disk.
|
||||
* it at transaction commit to force changes to disk. (This
|
||||
* is not really optional, because we are about to forget that
|
||||
* the file even exists...)
|
||||
*/
|
||||
|
||||
FileSync(v->mdfd_vfd);
|
||||
FileClose(v->mdfd_vfd);
|
||||
|
||||
/* mark this file descriptor as clean in our private table */
|
||||
v->mdfd_flags &= ~MDFD_DIRTY;
|
||||
}
|
||||
/* Now free vector */
|
||||
v = v->mdfd_chain;
|
||||
@ -423,28 +437,20 @@ mdclose(Relation reln)
|
||||
{
|
||||
if (v->mdfd_vfd >= 0)
|
||||
{
|
||||
|
||||
/*
|
||||
* We sync the file descriptor so that we don't need to reopen
|
||||
* it at transaction commit to force changes to disk.
|
||||
* it at transaction commit to force changes to disk. (This
|
||||
* is not really optional, because we are about to forget that
|
||||
* the file even exists...)
|
||||
*/
|
||||
|
||||
FileSync(v->mdfd_vfd);
|
||||
FileClose(v->mdfd_vfd);
|
||||
|
||||
/* mark this file descriptor as clean in our private table */
|
||||
v->mdfd_flags &= ~MDFD_DIRTY;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
MemoryContextSwitchTo(oldcxt);
|
||||
|
||||
_fdvec_free(fd);
|
||||
|
||||
/* be sure to mark relation closed */
|
||||
reln->rd_fd = -1;
|
||||
|
||||
return SM_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -521,8 +527,6 @@ mdwrite(Relation reln, BlockNumber blocknum, char *buffer)
|
||||
if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ)
|
||||
status = SM_FAIL;
|
||||
|
||||
v->mdfd_flags |= MDFD_DIRTY;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -560,14 +564,6 @@ mdflush(Relation reln, BlockNumber blocknum, char *buffer)
|
||||
|| FileSync(v->mdfd_vfd) < 0)
|
||||
status = SM_FAIL;
|
||||
|
||||
/*
|
||||
* By here, the block is written and changes have been forced to
|
||||
* stable storage. Mark the descriptor as clean until the next write,
|
||||
* so we don't sync it again unnecessarily at transaction commit.
|
||||
*/
|
||||
|
||||
v->mdfd_flags &= ~MDFD_DIRTY;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -575,141 +571,89 @@ mdflush(Relation reln, BlockNumber blocknum, char *buffer)
|
||||
* mdblindwrt() -- Write a block to disk blind.
|
||||
*
|
||||
* We have to be able to do this using only the name and OID of
|
||||
* the database and relation in which the block belongs. This
|
||||
* is a synchronous write.
|
||||
* the database and relation in which the block belongs. Otherwise
|
||||
* this is just like mdwrite().
|
||||
*/
|
||||
int
|
||||
mdblindwrt(char *dbstr,
|
||||
char *relstr,
|
||||
mdblindwrt(char *dbname,
|
||||
char *relname,
|
||||
Oid dbid,
|
||||
Oid relid,
|
||||
BlockNumber blkno,
|
||||
char *buffer)
|
||||
{
|
||||
int fd;
|
||||
int segno;
|
||||
long seekpos;
|
||||
int status;
|
||||
char *path;
|
||||
long seekpos;
|
||||
MdfdVec *v;
|
||||
|
||||
#ifndef LET_OS_MANAGE_FILESIZE
|
||||
int nchars;
|
||||
v = _mdfd_blind_getseg(dbname, relname, dbid, relid, blkno);
|
||||
|
||||
/* be sure we have enough space for the '.segno', if any */
|
||||
segno = blkno / RELSEG_SIZE;
|
||||
if (segno > 0)
|
||||
nchars = 10;
|
||||
else
|
||||
nchars = 0;
|
||||
|
||||
/* construct the path to the file and open it */
|
||||
/* system table? then put in system area... */
|
||||
if (dbid == (Oid) 0)
|
||||
{
|
||||
path = (char *) palloc(strlen(DataDir) + sizeof(NameData) + 2 + nchars);
|
||||
if (segno == 0)
|
||||
sprintf(path, "%s/%s", DataDir, relstr);
|
||||
else
|
||||
sprintf(path, "%s/%s.%d", DataDir, relstr, segno);
|
||||
}
|
||||
/* user table? then put in user database area... */
|
||||
else if (dbid == MyDatabaseId)
|
||||
{
|
||||
path = (char *) palloc(strlen(DatabasePath) + 2 * sizeof(NameData) + 2 + nchars);
|
||||
if (segno == 0)
|
||||
sprintf(path, "%s%c%s", DatabasePath, SEP_CHAR, relstr);
|
||||
else
|
||||
sprintf(path, "%s%c%s.%d", DatabasePath, SEP_CHAR, relstr, segno);
|
||||
}
|
||||
else
|
||||
/* this is work arround only !!! */
|
||||
{
|
||||
char dbpath[MAXPGPATH];
|
||||
Oid id;
|
||||
char *tmpPath;
|
||||
|
||||
GetRawDatabaseInfo(dbstr, &id, dbpath);
|
||||
|
||||
if (id != dbid)
|
||||
elog(FATAL, "mdblindwrt: oid of db %s is not %u", dbstr, dbid);
|
||||
tmpPath = ExpandDatabasePath(dbpath);
|
||||
if (tmpPath == NULL)
|
||||
elog(FATAL, "mdblindwrt: can't expand path for db %s", dbstr);
|
||||
path = (char *) palloc(strlen(tmpPath) + 2 * sizeof(NameData) + 2 + nchars);
|
||||
if (segno == 0)
|
||||
sprintf(path, "%s%c%s", tmpPath, SEP_CHAR, relstr);
|
||||
else
|
||||
sprintf(path, "%s%c%s.%d", tmpPath, SEP_CHAR, relstr, segno);
|
||||
pfree(tmpPath);
|
||||
}
|
||||
#else
|
||||
/* construct the path to the file and open it */
|
||||
/* system table? then put in system area... */
|
||||
if (dbid == (Oid) 0)
|
||||
{
|
||||
path = (char *) palloc(strlen(DataDir) + sizeof(NameData) + 2);
|
||||
sprintf(path, "%s/%s", DataDir, relstr);
|
||||
}
|
||||
/* user table? then put in user database area... */
|
||||
else if (dbid == MyDatabaseId)
|
||||
{
|
||||
path = (char *) palloc(strlen(DatabasePath) + 2 * sizeof(NameData) + 2);
|
||||
sprintf(path, "%s%c%s", DatabasePath, SEP_CHAR, relstr);
|
||||
}
|
||||
else
|
||||
/* this is work arround only !!! */
|
||||
{
|
||||
char dbpath[MAXPGPATH];
|
||||
Oid id;
|
||||
char *tmpPath;
|
||||
|
||||
GetRawDatabaseInfo(dbstr, &id, dbpath);
|
||||
|
||||
if (id != dbid)
|
||||
elog(FATAL, "mdblindwrt: oid of db %s is not %u", dbstr, dbid);
|
||||
tmpPath = ExpandDatabasePath(dbpath);
|
||||
if (tmpPath == NULL)
|
||||
elog(FATAL, "mdblindwrt: can't expand path for db %s", dbstr);
|
||||
path = (char *) palloc(strlen(tmpPath) + 2 * sizeof(NameData) + 2);
|
||||
sprintf(path, "%s%c%s", tmpPath, SEP_CHAR, relstr);
|
||||
pfree(tmpPath);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef __CYGWIN32__
|
||||
if ((fd = open(path, O_RDWR, 0600)) < 0)
|
||||
#else
|
||||
if ((fd = open(path, O_RDWR | O_BINARY, 0600)) < 0)
|
||||
#endif
|
||||
if (v == NULL)
|
||||
return SM_FAIL;
|
||||
|
||||
/* seek to the right spot */
|
||||
#ifndef LET_OS_MANAGE_FILESIZE
|
||||
seekpos = (long) (BLCKSZ * (blkno % RELSEG_SIZE));
|
||||
#ifdef DIAGNOSTIC
|
||||
if (seekpos >= BLCKSZ * RELSEG_SIZE)
|
||||
elog(FATAL, "seekpos too big!");
|
||||
#endif
|
||||
#else
|
||||
seekpos = (long) (BLCKSZ * (blkno));
|
||||
#endif
|
||||
|
||||
if (lseek(fd, seekpos, SEEK_SET) != seekpos)
|
||||
{
|
||||
close(fd);
|
||||
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
|
||||
return SM_FAIL;
|
||||
}
|
||||
|
||||
status = SM_SUCCESS;
|
||||
|
||||
/* write and sync the block */
|
||||
if (write(fd, buffer, BLCKSZ) != BLCKSZ || (pg_fsync(fd) < 0))
|
||||
if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ)
|
||||
status = SM_FAIL;
|
||||
|
||||
if (close(fd) < 0)
|
||||
status = SM_FAIL;
|
||||
|
||||
pfree(path);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/*
|
||||
* mdmarkdirty() -- Mark the specified block "dirty" (ie, needs fsync).
|
||||
*
|
||||
* Returns SM_SUCCESS or SM_FAIL.
|
||||
*/
|
||||
int
|
||||
mdmarkdirty(Relation reln, BlockNumber blkno)
|
||||
{
|
||||
MdfdVec *v;
|
||||
|
||||
v = _mdfd_getseg(reln, blkno);
|
||||
|
||||
FileMarkDirty(v->mdfd_vfd);
|
||||
|
||||
return SM_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* mdblindmarkdirty() -- Mark the specified block "dirty" (ie, needs fsync).
|
||||
*
|
||||
* We have to be able to do this using only the name and OID of
|
||||
* the database and relation in which the block belongs. Otherwise
|
||||
* this is just like mdmarkdirty().
|
||||
*/
|
||||
int
|
||||
mdblindmarkdirty(char *dbname,
|
||||
char *relname,
|
||||
Oid dbid,
|
||||
Oid relid,
|
||||
BlockNumber blkno)
|
||||
{
|
||||
MdfdVec *v;
|
||||
|
||||
v = _mdfd_blind_getseg(dbname, relname, dbid, relid, blkno);
|
||||
|
||||
if (v == NULL)
|
||||
return SM_FAIL;
|
||||
|
||||
FileMarkDirty(v->mdfd_vfd);
|
||||
|
||||
return SM_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* mdnblocks() -- Get the number of blocks stored in a relation.
|
||||
*
|
||||
@ -873,19 +817,26 @@ mdcommit()
|
||||
|
||||
for (i = 0; i < CurFd; i++)
|
||||
{
|
||||
#ifndef LET_OS_MANAGE_FILESIZE
|
||||
for (v = &Md_fdvec[i]; v != (MdfdVec *) NULL; v = v->mdfd_chain)
|
||||
#else
|
||||
v = &Md_fdvec[i];
|
||||
if (v->mdfd_flags & MDFD_FREE)
|
||||
continue;
|
||||
if (v->mdfd_flags & MDFD_TEMP)
|
||||
{
|
||||
/* Sync and close the file */
|
||||
mdclose_fd(i);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Sync, but keep the file entry */
|
||||
|
||||
#ifndef LET_OS_MANAGE_FILESIZE
|
||||
for ( ; v != (MdfdVec *) NULL; v = v->mdfd_chain)
|
||||
#else
|
||||
if (v != (MdfdVec *) NULL)
|
||||
#endif
|
||||
{
|
||||
if (v->mdfd_flags & MDFD_DIRTY)
|
||||
{
|
||||
if (FileSync(v->mdfd_vfd) < 0)
|
||||
return SM_FAIL;
|
||||
|
||||
v->mdfd_flags &= ~MDFD_DIRTY;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -908,13 +859,14 @@ mdabort()
|
||||
|
||||
for (i = 0; i < CurFd; i++)
|
||||
{
|
||||
#ifndef LET_OS_MANAGE_FILESIZE
|
||||
for (v = &Md_fdvec[i]; v != (MdfdVec *) NULL; v = v->mdfd_chain)
|
||||
v->mdfd_flags &= ~MDFD_DIRTY;
|
||||
#else
|
||||
v = &Md_fdvec[i];
|
||||
v->mdfd_flags &= ~MDFD_DIRTY;
|
||||
#endif
|
||||
if (v->mdfd_flags & MDFD_FREE)
|
||||
continue;
|
||||
if (v->mdfd_flags & MDFD_TEMP)
|
||||
{
|
||||
/* Close the file */
|
||||
mdclose_fd(i);
|
||||
}
|
||||
}
|
||||
|
||||
return SM_SUCCESS;
|
||||
@ -995,7 +947,6 @@ _fdvec_free(int fdvec)
|
||||
Md_fdvec[fdvec].mdfd_nextFree = Md_Free;
|
||||
Md_fdvec[fdvec].mdfd_flags = MDFD_FREE;
|
||||
Md_Free = fdvec;
|
||||
|
||||
}
|
||||
|
||||
static MdfdVec *
|
||||
@ -1004,19 +955,17 @@ _mdfd_openseg(Relation reln, int segno, int oflags)
|
||||
MemoryContext oldcxt;
|
||||
MdfdVec *v;
|
||||
int fd;
|
||||
bool dofree;
|
||||
char *path,
|
||||
*fullpath;
|
||||
|
||||
/* be sure we have enough space for the '.segno', if any */
|
||||
path = relpath(RelationGetPhysicalRelationName(reln));
|
||||
|
||||
dofree = false;
|
||||
if (segno > 0)
|
||||
{
|
||||
dofree = true;
|
||||
fullpath = (char *) palloc(strlen(path) + 12);
|
||||
sprintf(fullpath, "%s.%d", path, segno);
|
||||
pfree(path);
|
||||
}
|
||||
else
|
||||
fullpath = path;
|
||||
@ -1028,7 +977,6 @@ _mdfd_openseg(Relation reln, int segno, int oflags)
|
||||
fd = FileNameOpenFile(fullpath, O_RDWR | O_BINARY | oflags, 0600);
|
||||
#endif
|
||||
|
||||
if (dofree)
|
||||
pfree(fullpath);
|
||||
|
||||
if (fd < 0)
|
||||
@ -1109,6 +1057,104 @@ _mdfd_getseg(Relation reln, int blkno)
|
||||
return v;
|
||||
}
|
||||
|
||||
/* Find the segment of the relation holding the specified block.
|
||||
* This is the same as _mdfd_getseg() except that we must work
|
||||
* "blind" with no Relation struct.
|
||||
*
|
||||
* NOTE: we have no easy way to tell whether a FD already exists for the
|
||||
* target relation, so we always make a new one. This should probably
|
||||
* be improved somehow, but I doubt it's a significant performance issue
|
||||
* under normal circumstances. The FD is marked to be closed at end of xact
|
||||
* so that we don't accumulate a lot of dead FDs.
|
||||
*/
|
||||
|
||||
static MdfdVec *
|
||||
_mdfd_blind_getseg(char *dbname, char *relname, Oid dbid, Oid relid,
|
||||
int blkno)
|
||||
{
|
||||
MdfdVec *v;
|
||||
char *path;
|
||||
int fd;
|
||||
int vfd;
|
||||
#ifndef LET_OS_MANAGE_FILESIZE
|
||||
int segno;
|
||||
int targsegno;
|
||||
#endif
|
||||
|
||||
/* construct the path to the file and open it */
|
||||
path = relpath_blind(dbname, relname, dbid, relid);
|
||||
|
||||
#ifndef __CYGWIN32__
|
||||
fd = FileNameOpenFile(path, O_RDWR, 0600);
|
||||
#else
|
||||
fd = FileNameOpenFile(path, O_RDWR | O_BINARY, 0600);
|
||||
#endif
|
||||
|
||||
if (fd < 0)
|
||||
return NULL;
|
||||
|
||||
vfd = _fdvec_alloc();
|
||||
if (vfd < 0)
|
||||
return NULL;
|
||||
|
||||
Md_fdvec[vfd].mdfd_vfd = fd;
|
||||
Md_fdvec[vfd].mdfd_flags = MDFD_TEMP;
|
||||
Md_fdvec[vfd].mdfd_lstbcnt = _mdnblocks(fd, BLCKSZ);
|
||||
#ifndef LET_OS_MANAGE_FILESIZE
|
||||
Md_fdvec[vfd].mdfd_chain = (MdfdVec *) NULL;
|
||||
|
||||
#ifdef DIAGNOSTIC
|
||||
if (Md_fdvec[vfd].mdfd_lstbcnt > RELSEG_SIZE)
|
||||
elog(FATAL, "segment too big on relopen!");
|
||||
#endif
|
||||
|
||||
targsegno = blkno / RELSEG_SIZE;
|
||||
for (v = &Md_fdvec[vfd], segno = 1; segno <= targsegno; segno++)
|
||||
{
|
||||
char *segpath;
|
||||
MdfdVec *newv;
|
||||
MemoryContext oldcxt;
|
||||
|
||||
segpath = (char *) palloc(strlen(path) + 12);
|
||||
sprintf(segpath, "%s.%d", path, segno);
|
||||
|
||||
#ifndef __CYGWIN32__
|
||||
fd = FileNameOpenFile(segpath, O_RDWR | O_CREAT, 0600);
|
||||
#else
|
||||
fd = FileNameOpenFile(segpath, O_RDWR | O_BINARY | O_CREAT, 0600);
|
||||
#endif
|
||||
|
||||
pfree(segpath);
|
||||
|
||||
if (fd < 0)
|
||||
return (MdfdVec *) NULL;
|
||||
|
||||
/* allocate an mdfdvec entry for it */
|
||||
oldcxt = MemoryContextSwitchTo(MdCxt);
|
||||
newv = (MdfdVec *) palloc(sizeof(MdfdVec));
|
||||
MemoryContextSwitchTo(oldcxt);
|
||||
|
||||
/* fill the entry */
|
||||
newv->mdfd_vfd = fd;
|
||||
newv->mdfd_flags = MDFD_TEMP;
|
||||
newv->mdfd_lstbcnt = _mdnblocks(fd, BLCKSZ);
|
||||
newv->mdfd_chain = (MdfdVec *) NULL;
|
||||
#ifdef DIAGNOSTIC
|
||||
if (newv->mdfd_lstbcnt > RELSEG_SIZE)
|
||||
elog(FATAL, "segment too big on open!");
|
||||
#endif
|
||||
v->mdfd_chain = newv;
|
||||
v = newv;
|
||||
}
|
||||
#else
|
||||
v = &Md_fdvec[vfd];
|
||||
#endif
|
||||
|
||||
pfree(path);
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
static BlockNumber
|
||||
_mdnblocks(File file, Size blcksz)
|
||||
{
|
||||
|
@ -11,7 +11,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.32 2000/01/26 05:57:05 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.33 2000/04/09 04:43:20 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -23,21 +23,30 @@ static void smgrshutdown(int dummy);
|
||||
|
||||
typedef struct f_smgr
|
||||
{
|
||||
int (*smgr_init) ();/* may be NULL */
|
||||
int (*smgr_shutdown) (); /* may be NULL */
|
||||
int (*smgr_create) ();
|
||||
int (*smgr_unlink) ();
|
||||
int (*smgr_extend) ();
|
||||
int (*smgr_open) ();
|
||||
int (*smgr_close) ();
|
||||
int (*smgr_read) ();
|
||||
int (*smgr_write) ();
|
||||
int (*smgr_flush) ();
|
||||
int (*smgr_blindwrt) ();
|
||||
int (*smgr_nblocks) ();
|
||||
int (*smgr_truncate) ();
|
||||
int (*smgr_commit) (); /* may be NULL */
|
||||
int (*smgr_abort) (); /* may be NULL */
|
||||
int (*smgr_init) (void); /* may be NULL */
|
||||
int (*smgr_shutdown) (void); /* may be NULL */
|
||||
int (*smgr_create) (Relation reln);
|
||||
int (*smgr_unlink) (Relation reln);
|
||||
int (*smgr_extend) (Relation reln, char *buffer);
|
||||
int (*smgr_open) (Relation reln);
|
||||
int (*smgr_close) (Relation reln);
|
||||
int (*smgr_read) (Relation reln, BlockNumber blocknum,
|
||||
char *buffer);
|
||||
int (*smgr_write) (Relation reln, BlockNumber blocknum,
|
||||
char *buffer);
|
||||
int (*smgr_flush) (Relation reln, BlockNumber blocknum,
|
||||
char *buffer);
|
||||
int (*smgr_blindwrt) (char *dbname, char *relname,
|
||||
Oid dbid, Oid relid,
|
||||
BlockNumber blkno, char *buffer);
|
||||
int (*smgr_markdirty) (Relation reln, BlockNumber blkno);
|
||||
int (*smgr_blindmarkdirty) (char *dbname, char *relname,
|
||||
Oid dbid, Oid relid,
|
||||
BlockNumber blkno);
|
||||
int (*smgr_nblocks) (Relation reln);
|
||||
int (*smgr_truncate) (Relation reln, int nblocks);
|
||||
int (*smgr_commit) (void); /* may be NULL */
|
||||
int (*smgr_abort) (void); /* may be NULL */
|
||||
} f_smgr;
|
||||
|
||||
/*
|
||||
@ -49,14 +58,14 @@ static f_smgr smgrsw[] = {
|
||||
|
||||
/* magnetic disk */
|
||||
{mdinit, NULL, mdcreate, mdunlink, mdextend, mdopen, mdclose,
|
||||
mdread, mdwrite, mdflush, mdblindwrt, mdnblocks, mdtruncate,
|
||||
mdcommit, mdabort},
|
||||
mdread, mdwrite, mdflush, mdblindwrt, mdmarkdirty, mdblindmarkdirty,
|
||||
mdnblocks, mdtruncate, mdcommit, mdabort},
|
||||
|
||||
#ifdef STABLE_MEMORY_STORAGE
|
||||
/* main memory */
|
||||
{mminit, mmshutdown, mmcreate, mmunlink, mmextend, mmopen, mmclose,
|
||||
mmread, mmwrite, mmflush, mmblindwrt, mmnblocks, NULL,
|
||||
mmcommit, mmabort},
|
||||
mmread, mmwrite, mmflush, mmblindwrt, mmmarkdirty, mmblindmarkdirty,
|
||||
mmnblocks, NULL, mmcommit, mmabort},
|
||||
|
||||
#endif
|
||||
};
|
||||
@ -299,6 +308,7 @@ smgrblindwrt(int16 which,
|
||||
char *relstr;
|
||||
int status;
|
||||
|
||||
/* strdup here is probably redundant */
|
||||
dbstr = pstrdup(dbname);
|
||||
relstr = pstrdup(relname);
|
||||
|
||||
@ -315,6 +325,67 @@ smgrblindwrt(int16 which,
|
||||
return status;
|
||||
}
|
||||
|
||||
/*
|
||||
* smgrmarkdirty() -- Mark a page dirty (needs fsync).
|
||||
*
|
||||
* Mark the specified page as needing to be fsync'd before commit.
|
||||
* Ordinarily, the storage manager will do this implicitly during
|
||||
* smgrwrite(). However, the buffer manager may discover that some
|
||||
* other backend has written a buffer that we dirtied in the current
|
||||
* transaction. In that case, we still need to fsync the file to be
|
||||
* sure the page is down to disk before we commit.
|
||||
*/
|
||||
int
|
||||
smgrmarkdirty(int16 which,
|
||||
Relation reln,
|
||||
BlockNumber blkno)
|
||||
{
|
||||
int status;
|
||||
|
||||
status = (*(smgrsw[which].smgr_markdirty)) (reln, blkno);
|
||||
|
||||
if (status == SM_FAIL)
|
||||
elog(ERROR, "cannot mark block %d of %s",
|
||||
blkno, RelationGetRelationName(reln));
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/*
|
||||
* smgrblindmarkdirty() -- Mark a page dirty, "blind".
|
||||
*
|
||||
* Just like smgrmarkdirty, except we don't have a reldesc.
|
||||
*/
|
||||
int
|
||||
smgrblindmarkdirty(int16 which,
|
||||
char *dbname,
|
||||
char *relname,
|
||||
Oid dbid,
|
||||
Oid relid,
|
||||
BlockNumber blkno)
|
||||
{
|
||||
char *dbstr;
|
||||
char *relstr;
|
||||
int status;
|
||||
|
||||
/* strdup here is probably redundant */
|
||||
dbstr = pstrdup(dbname);
|
||||
relstr = pstrdup(relname);
|
||||
|
||||
status = (*(smgrsw[which].smgr_blindmarkdirty)) (dbstr, relstr,
|
||||
dbid, relid,
|
||||
blkno);
|
||||
|
||||
if (status == SM_FAIL)
|
||||
elog(ERROR, "cannot mark block %d of %s [%s] blind",
|
||||
blkno, relstr, dbstr);
|
||||
|
||||
pfree(dbstr);
|
||||
pfree(relstr);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/*
|
||||
* smgrnblocks() -- Calculate the number of POSTGRES blocks in the
|
||||
* supplied relation.
|
||||
@ -378,7 +449,6 @@ smgrcommit()
|
||||
return SM_SUCCESS;
|
||||
}
|
||||
|
||||
#ifdef NOT_USED
|
||||
int
|
||||
smgrabort()
|
||||
{
|
||||
@ -396,8 +466,6 @@ smgrabort()
|
||||
return SM_SUCCESS;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef NOT_USED
|
||||
bool
|
||||
smgriswo(int16 smgrno)
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: catalog.h,v 1.10 2000/01/26 05:57:56 momjian Exp $
|
||||
* $Id: catalog.h,v 1.11 2000/04/09 04:43:14 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -17,6 +17,8 @@
|
||||
#include "access/tupdesc.h"
|
||||
|
||||
extern char *relpath(const char *relname);
|
||||
extern char *relpath_blind(const char *dbname, const char *relname,
|
||||
Oid dbid, Oid relid);
|
||||
extern bool IsSystemRelationName(const char *relname);
|
||||
extern bool IsSharedSystemRelationName(const char *relname);
|
||||
extern Oid newoid(void);
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: buf_internals.h,v 1.35 2000/01/26 05:58:32 momjian Exp $
|
||||
* $Id: buf_internals.h,v 1.36 2000/04/09 04:43:18 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -61,6 +61,16 @@ typedef struct buftag
|
||||
(a)->relId = (xx_reln)->rd_lockInfo.lockRelId \
|
||||
)
|
||||
|
||||
/* If we have to write a buffer "blind" (without a relcache entry),
|
||||
* the BufferTag is not enough information. BufferBlindId carries the
|
||||
* additional information needed.
|
||||
*/
|
||||
typedef struct bufblindid
|
||||
{
|
||||
char dbname[NAMEDATALEN]; /* name of db in which buf belongs */
|
||||
char relname[NAMEDATALEN]; /* name of reln */
|
||||
} BufferBlindId;
|
||||
|
||||
#define BAD_BUFFER_ID(bid) ((bid) < 1 || (bid) > NBuffers)
|
||||
#define INVALID_DESCRIPTOR (-3)
|
||||
|
||||
@ -98,8 +108,7 @@ typedef struct sbufdesc
|
||||
bool ri_lock; /* read-intent lock */
|
||||
bool w_lock; /* context exclusively locked */
|
||||
|
||||
char sb_dbname[NAMEDATALEN]; /* name of db in which buf belongs */
|
||||
char sb_relname[NAMEDATALEN]; /* name of reln */
|
||||
BufferBlindId blind; /* extra info to support blind write */
|
||||
} BufferDesc;
|
||||
|
||||
/*
|
||||
@ -164,7 +173,9 @@ extern BufferDesc *BufferDescriptors;
|
||||
extern BufferBlock BufferBlocks;
|
||||
extern long *PrivateRefCount;
|
||||
extern bits8 *BufferLocks;
|
||||
extern long *CommitInfoNeedsSave;
|
||||
extern BufferTag *BufferTagLastDirtied;
|
||||
extern BufferBlindId *BufferBlindLastDirtied;
|
||||
extern bool *BufferDirtiedByMe;
|
||||
extern SPINLOCK BufMgrLock;
|
||||
|
||||
/* localbuf.c */
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: bufmgr.h,v 1.35 2000/03/31 02:43:30 tgl Exp $
|
||||
* $Id: bufmgr.h,v 1.36 2000/04/09 04:43:18 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -164,7 +164,7 @@ extern int FlushBuffer(Buffer buffer, bool release);
|
||||
extern void InitBufferPool(IPCKey key);
|
||||
extern void PrintBufferUsage(FILE *statfp);
|
||||
extern void ResetBufferUsage(void);
|
||||
extern void ResetBufferPool(void);
|
||||
extern void ResetBufferPool(bool isCommit);
|
||||
extern int BufferPoolCheckLeak(void);
|
||||
extern void FlushBufferPool(void);
|
||||
extern BlockNumber BufferGetBlockNumber(Buffer buffer);
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: fd.h,v 1.19 2000/01/26 05:58:32 momjian Exp $
|
||||
* $Id: fd.h,v 1.20 2000/04/09 04:43:18 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -15,7 +15,7 @@
|
||||
/*
|
||||
* calls:
|
||||
*
|
||||
* File {Close, Read, Write, Seek, Tell, Sync}
|
||||
* File {Close, Read, Write, Seek, Tell, MarkDirty, Sync}
|
||||
* {File Name Open, Allocate, Free} File
|
||||
*
|
||||
* These are NOT JUST RENAMINGS OF THE UNIX ROUTINES.
|
||||
@ -58,6 +58,7 @@ extern int FileWrite(File file, char *buffer, int amount);
|
||||
extern long FileSeek(File file, long offset, int whence);
|
||||
extern int FileTruncate(File file, long offset);
|
||||
extern int FileSync(File file);
|
||||
extern void FileMarkDirty(File file);
|
||||
|
||||
/* Operations that allow use of regular stdio --- USE WITH CAUTION */
|
||||
extern FILE *AllocateFile(char *name, char *mode);
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: smgr.h,v 1.17 2000/01/26 05:58:33 momjian Exp $
|
||||
* $Id: smgr.h,v 1.18 2000/04/09 04:43:18 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -35,11 +35,17 @@ extern int smgrwrite(int16 which, Relation reln, BlockNumber blocknum,
|
||||
char *buffer);
|
||||
extern int smgrflush(int16 which, Relation reln, BlockNumber blocknum,
|
||||
char *buffer);
|
||||
extern int smgrblindwrt(int16 which, char *dbname, char *relname, Oid dbid,
|
||||
Oid relid, BlockNumber blkno, char *buffer);
|
||||
extern int smgrblindwrt(int16 which, char *dbname, char *relname,
|
||||
Oid dbid, Oid relid,
|
||||
BlockNumber blkno, char *buffer);
|
||||
extern int smgrmarkdirty(int16 which, Relation reln, BlockNumber blkno);
|
||||
extern int smgrblindmarkdirty(int16 which, char *dbname, char *relname,
|
||||
Oid dbid, Oid relid,
|
||||
BlockNumber blkno);
|
||||
extern int smgrnblocks(int16 which, Relation reln);
|
||||
extern int smgrtruncate(int16 which, Relation reln, int nblocks);
|
||||
extern int smgrcommit(void);
|
||||
extern int smgrabort(void);
|
||||
|
||||
|
||||
|
||||
@ -55,8 +61,11 @@ extern int mdclose(Relation reln);
|
||||
extern int mdread(Relation reln, BlockNumber blocknum, char *buffer);
|
||||
extern int mdwrite(Relation reln, BlockNumber blocknum, char *buffer);
|
||||
extern int mdflush(Relation reln, BlockNumber blocknum, char *buffer);
|
||||
extern int mdblindwrt(char *dbstr, char *relstr, Oid dbid, Oid relid,
|
||||
extern int mdblindwrt(char *dbname, char *relname, Oid dbid, Oid relid,
|
||||
BlockNumber blkno, char *buffer);
|
||||
extern int mdmarkdirty(Relation reln, BlockNumber blkno);
|
||||
extern int mdblindmarkdirty(char *dbname, char *relname, Oid dbid, Oid relid,
|
||||
BlockNumber blkno);
|
||||
extern int mdnblocks(Relation reln);
|
||||
extern int mdtruncate(Relation reln, int nblocks);
|
||||
extern int mdcommit(void);
|
||||
@ -66,7 +75,6 @@ extern int mdabort(void);
|
||||
extern SPINLOCK MMCacheLock;
|
||||
|
||||
extern int mminit(void);
|
||||
extern int mmshutdown(void);
|
||||
extern int mmcreate(Relation reln);
|
||||
extern int mmunlink(Relation reln);
|
||||
extern int mmextend(Relation reln, char *buffer);
|
||||
@ -75,11 +83,17 @@ extern int mmclose(Relation reln);
|
||||
extern int mmread(Relation reln, BlockNumber blocknum, char *buffer);
|
||||
extern int mmwrite(Relation reln, BlockNumber blocknum, char *buffer);
|
||||
extern int mmflush(Relation reln, BlockNumber blocknum, char *buffer);
|
||||
extern int mmblindwrt(char *dbstr, char *relstr, Oid dbid, Oid relid,
|
||||
extern int mmblindwrt(char *dbname, char *relname, Oid dbid, Oid relid,
|
||||
BlockNumber blkno, char *buffer);
|
||||
extern int mmmarkdirty(Relation reln, BlockNumber blkno);
|
||||
extern int mmblindmarkdirty(char *dbname, char *relname, Oid dbid, Oid relid,
|
||||
BlockNumber blkno);
|
||||
extern int mmnblocks(Relation reln);
|
||||
extern int mmtruncate(Relation reln, int nblocks);
|
||||
extern int mmcommit(void);
|
||||
extern int mmabort(void);
|
||||
|
||||
extern int mmshutdown(void);
|
||||
extern int MMShmemSize(void);
|
||||
|
||||
/* smgrtype.c */
|
||||
|
Loading…
x
Reference in New Issue
Block a user