Prevent re-use of a deleted relation's relfilenode until after the next
checkpoint. This guards against an unlikely data-loss scenario in which we re-use the relfilenode, then crash, then replay the deletion and recreation of the file. Even then we'd be OK if all insertions into the new relation had been WAL-logged ... but that's not guaranteed given all the no-WAL-logging optimizations that have recently been added. Patch by Heikki Linnakangas, per a discussion last month.
This commit is contained in:
parent
7a550cb95c
commit
6cc4451b5c
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.286 2007/10/12 19:39:59 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.287 2007/11/15 20:36:40 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -45,6 +45,7 @@
|
|||||||
#include "storage/fd.h"
|
#include "storage/fd.h"
|
||||||
#include "storage/pmsignal.h"
|
#include "storage/pmsignal.h"
|
||||||
#include "storage/procarray.h"
|
#include "storage/procarray.h"
|
||||||
|
#include "storage/smgr.h"
|
||||||
#include "storage/spin.h"
|
#include "storage/spin.h"
|
||||||
#include "utils/builtins.h"
|
#include "utils/builtins.h"
|
||||||
#include "utils/pg_locale.h"
|
#include "utils/pg_locale.h"
|
||||||
@ -5663,6 +5664,14 @@ CreateCheckPoint(int flags)
|
|||||||
UpdateControlFile();
|
UpdateControlFile();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Let smgr prepare for checkpoint; this has to happen before we
|
||||||
|
* determine the REDO pointer. Note that smgr must not do anything
|
||||||
|
* that'd have to be undone if we decide no checkpoint is needed.
|
||||||
|
*/
|
||||||
|
smgrpreckpt();
|
||||||
|
|
||||||
|
/* Begin filling in the checkpoint WAL record */
|
||||||
MemSet(&checkPoint, 0, sizeof(checkPoint));
|
MemSet(&checkPoint, 0, sizeof(checkPoint));
|
||||||
checkPoint.ThisTimeLineID = ThisTimeLineID;
|
checkPoint.ThisTimeLineID = ThisTimeLineID;
|
||||||
checkPoint.time = time(NULL);
|
checkPoint.time = time(NULL);
|
||||||
@ -5886,6 +5895,11 @@ CreateCheckPoint(int flags)
|
|||||||
*/
|
*/
|
||||||
END_CRIT_SECTION();
|
END_CRIT_SECTION();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Let smgr do post-checkpoint cleanup (eg, deleting old files).
|
||||||
|
*/
|
||||||
|
smgrpostckpt();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Delete old log files (those no longer needed even for previous
|
* Delete old log files (those no longer needed even for previous
|
||||||
* checkpoint).
|
* checkpoint).
|
||||||
|
@ -37,7 +37,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/commands/tablespace.c,v 1.49 2007/08/01 22:45:08 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/commands/tablespace.c,v 1.50 2007/11/15 20:36:40 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -57,6 +57,7 @@
|
|||||||
#include "commands/comment.h"
|
#include "commands/comment.h"
|
||||||
#include "commands/tablespace.h"
|
#include "commands/tablespace.h"
|
||||||
#include "miscadmin.h"
|
#include "miscadmin.h"
|
||||||
|
#include "postmaster/bgwriter.h"
|
||||||
#include "storage/fd.h"
|
#include "storage/fd.h"
|
||||||
#include "utils/acl.h"
|
#include "utils/acl.h"
|
||||||
#include "utils/builtins.h"
|
#include "utils/builtins.h"
|
||||||
@ -460,13 +461,29 @@ DropTableSpace(DropTableSpaceStmt *stmt)
|
|||||||
LWLockAcquire(TablespaceCreateLock, LW_EXCLUSIVE);
|
LWLockAcquire(TablespaceCreateLock, LW_EXCLUSIVE);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Try to remove the physical infrastructure
|
* Try to remove the physical infrastructure.
|
||||||
*/
|
*/
|
||||||
if (!remove_tablespace_directories(tablespaceoid, false))
|
if (!remove_tablespace_directories(tablespaceoid, false))
|
||||||
ereport(ERROR,
|
{
|
||||||
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
/*
|
||||||
errmsg("tablespace \"%s\" is not empty",
|
* Not all files deleted? However, there can be lingering empty files
|
||||||
tablespacename)));
|
* in the directories, left behind by for example DROP TABLE, that
|
||||||
|
* have been scheduled for deletion at next checkpoint (see comments
|
||||||
|
* in mdunlink() for details). We could just delete them immediately,
|
||||||
|
* but we can't tell them apart from important data files that we
|
||||||
|
* mustn't delete. So instead, we force a checkpoint which will clean
|
||||||
|
* out any lingering files, and try again.
|
||||||
|
*/
|
||||||
|
RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
|
||||||
|
if (!remove_tablespace_directories(tablespaceoid, false))
|
||||||
|
{
|
||||||
|
/* Still not empty, the files must be important then */
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||||
|
errmsg("tablespace \"%s\" is not empty",
|
||||||
|
tablespacename)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Record the filesystem change in XLOG */
|
/* Record the filesystem change in XLOG */
|
||||||
{
|
{
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.129 2007/07/03 14:51:24 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.130 2007/11/15 20:36:40 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -34,6 +34,7 @@
|
|||||||
/* special values for the segno arg to RememberFsyncRequest */
|
/* special values for the segno arg to RememberFsyncRequest */
|
||||||
#define FORGET_RELATION_FSYNC (InvalidBlockNumber)
|
#define FORGET_RELATION_FSYNC (InvalidBlockNumber)
|
||||||
#define FORGET_DATABASE_FSYNC (InvalidBlockNumber-1)
|
#define FORGET_DATABASE_FSYNC (InvalidBlockNumber-1)
|
||||||
|
#define UNLINK_RELATION_REQUEST (InvalidBlockNumber-2)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* On Windows, we have to interpret EACCES as possibly meaning the same as
|
* On Windows, we have to interpret EACCES as possibly meaning the same as
|
||||||
@ -113,6 +114,10 @@ static MemoryContext MdCxt; /* context for all md.c allocations */
|
|||||||
* table remembers the pending operations. We use a hash table mostly as
|
* table remembers the pending operations. We use a hash table mostly as
|
||||||
* a convenient way of eliminating duplicate requests.
|
* a convenient way of eliminating duplicate requests.
|
||||||
*
|
*
|
||||||
|
* We use a similar mechanism to remember no-longer-needed files that can
|
||||||
|
* be deleted after the next checkpoint, but we use a linked list instead of
|
||||||
|
* a hash table, because we don't expect there to be any duplicate requests.
|
||||||
|
*
|
||||||
* (Regular backends do not track pending operations locally, but forward
|
* (Regular backends do not track pending operations locally, but forward
|
||||||
* them to the bgwriter.)
|
* them to the bgwriter.)
|
||||||
*/
|
*/
|
||||||
@ -131,9 +136,17 @@ typedef struct
|
|||||||
CycleCtr cycle_ctr; /* mdsync_cycle_ctr when request was made */
|
CycleCtr cycle_ctr; /* mdsync_cycle_ctr when request was made */
|
||||||
} PendingOperationEntry;
|
} PendingOperationEntry;
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
RelFileNode rnode; /* the dead relation to delete */
|
||||||
|
CycleCtr cycle_ctr; /* mdckpt_cycle_ctr when request was made */
|
||||||
|
} PendingUnlinkEntry;
|
||||||
|
|
||||||
static HTAB *pendingOpsTable = NULL;
|
static HTAB *pendingOpsTable = NULL;
|
||||||
|
static List *pendingUnlinks = NIL;
|
||||||
|
|
||||||
static CycleCtr mdsync_cycle_ctr = 0;
|
static CycleCtr mdsync_cycle_ctr = 0;
|
||||||
|
static CycleCtr mdckpt_cycle_ctr = 0;
|
||||||
|
|
||||||
|
|
||||||
typedef enum /* behavior for mdopen & _mdfd_getseg */
|
typedef enum /* behavior for mdopen & _mdfd_getseg */
|
||||||
@ -146,6 +159,7 @@ typedef enum /* behavior for mdopen & _mdfd_getseg */
|
|||||||
/* local routines */
|
/* local routines */
|
||||||
static MdfdVec *mdopen(SMgrRelation reln, ExtensionBehavior behavior);
|
static MdfdVec *mdopen(SMgrRelation reln, ExtensionBehavior behavior);
|
||||||
static void register_dirty_segment(SMgrRelation reln, MdfdVec *seg);
|
static void register_dirty_segment(SMgrRelation reln, MdfdVec *seg);
|
||||||
|
static void register_unlink(RelFileNode rnode);
|
||||||
static MdfdVec *_fdvec_alloc(void);
|
static MdfdVec *_fdvec_alloc(void);
|
||||||
|
|
||||||
#ifndef LET_OS_MANAGE_FILESIZE
|
#ifndef LET_OS_MANAGE_FILESIZE
|
||||||
@ -188,6 +202,7 @@ mdinit(void)
|
|||||||
100L,
|
100L,
|
||||||
&hash_ctl,
|
&hash_ctl,
|
||||||
HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
|
HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
|
||||||
|
pendingUnlinks = NIL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -254,14 +269,37 @@ mdcreate(SMgrRelation reln, bool isRedo)
|
|||||||
* Note that we're passed a RelFileNode --- by the time this is called,
|
* Note that we're passed a RelFileNode --- by the time this is called,
|
||||||
* there won't be an SMgrRelation hashtable entry anymore.
|
* there won't be an SMgrRelation hashtable entry anymore.
|
||||||
*
|
*
|
||||||
|
* Actually, we don't unlink the first segment file of the relation, but
|
||||||
|
* just truncate it to zero length, and record a request to unlink it after
|
||||||
|
* the next checkpoint. Additional segments can be unlinked immediately,
|
||||||
|
* however. Leaving the empty file in place prevents that relfilenode
|
||||||
|
* number from being reused. The scenario this protects us from is:
|
||||||
|
* 1. We delete a relation (and commit, and actually remove its file).
|
||||||
|
* 2. We create a new relation, which by chance gets the same relfilenode as
|
||||||
|
* the just-deleted one (OIDs must've wrapped around for that to happen).
|
||||||
|
* 3. We crash before another checkpoint occurs.
|
||||||
|
* During replay, we would delete the file and then recreate it, which is fine
|
||||||
|
* if the contents of the file were repopulated by subsequent WAL entries.
|
||||||
|
* But if we didn't WAL-log insertions, but instead relied on fsyncing the
|
||||||
|
* file after populating it (as for instance CLUSTER and CREATE INDEX do),
|
||||||
|
* the contents of the file would be lost forever. By leaving the empty file
|
||||||
|
* until after the next checkpoint, we prevent reassignment of the relfilenode
|
||||||
|
* number until it's safe, because relfilenode assignment skips over any
|
||||||
|
* existing file.
|
||||||
|
*
|
||||||
* If isRedo is true, it's okay for the relation to be already gone.
|
* If isRedo is true, it's okay for the relation to be already gone.
|
||||||
* Also, any failure should be reported as WARNING not ERROR, because
|
* Also, we should remove the file immediately instead of queuing a request
|
||||||
|
* for later, since during redo there's no possibility of creating a
|
||||||
|
* conflicting relation.
|
||||||
|
*
|
||||||
|
* Note: any failure should be reported as WARNING not ERROR, because
|
||||||
* we are usually not in a transaction anymore when this is called.
|
* we are usually not in a transaction anymore when this is called.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
mdunlink(RelFileNode rnode, bool isRedo)
|
mdunlink(RelFileNode rnode, bool isRedo)
|
||||||
{
|
{
|
||||||
char *path;
|
char *path;
|
||||||
|
int ret;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We have to clean out any pending fsync requests for the doomed relation,
|
* We have to clean out any pending fsync requests for the doomed relation,
|
||||||
@ -271,8 +309,15 @@ mdunlink(RelFileNode rnode, bool isRedo)
|
|||||||
|
|
||||||
path = relpath(rnode);
|
path = relpath(rnode);
|
||||||
|
|
||||||
/* Delete the first segment, or only segment if not doing segmenting */
|
/*
|
||||||
if (unlink(path) < 0)
|
* Delete or truncate the first segment, or only segment if not doing
|
||||||
|
* segmenting
|
||||||
|
*/
|
||||||
|
if (isRedo)
|
||||||
|
ret = unlink(path);
|
||||||
|
else
|
||||||
|
ret = truncate(path, 0);
|
||||||
|
if (ret < 0)
|
||||||
{
|
{
|
||||||
if (!isRedo || errno != ENOENT)
|
if (!isRedo || errno != ENOENT)
|
||||||
ereport(WARNING,
|
ereport(WARNING,
|
||||||
@ -316,6 +361,10 @@ mdunlink(RelFileNode rnode, bool isRedo)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
pfree(path);
|
pfree(path);
|
||||||
|
|
||||||
|
/* Register request to unlink first segment later */
|
||||||
|
if (!isRedo)
|
||||||
|
register_unlink(rnode);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1063,6 +1112,91 @@ mdsync(void)
|
|||||||
mdsync_in_progress = false;
|
mdsync_in_progress = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* mdpreckpt() -- Do pre-checkpoint work
|
||||||
|
*
|
||||||
|
* To distinguish unlink requests that arrived before this checkpoint
|
||||||
|
* started from those that arrived during the checkpoint, we use a cycle
|
||||||
|
* counter similar to the one we use for fsync requests. That cycle
|
||||||
|
* counter is incremented here.
|
||||||
|
*
|
||||||
|
* This must be called *before* the checkpoint REDO point is determined.
|
||||||
|
* That ensures that we won't delete files too soon.
|
||||||
|
*
|
||||||
|
* Note that we can't do anything here that depends on the assumption
|
||||||
|
* that the checkpoint will be completed.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
mdpreckpt(void)
|
||||||
|
{
|
||||||
|
ListCell *cell;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In case the prior checkpoint wasn't completed, stamp all entries in
|
||||||
|
* the list with the current cycle counter. Anything that's in the
|
||||||
|
* list at the start of checkpoint can surely be deleted after the
|
||||||
|
* checkpoint is finished, regardless of when the request was made.
|
||||||
|
*/
|
||||||
|
foreach(cell, pendingUnlinks)
|
||||||
|
{
|
||||||
|
PendingUnlinkEntry *entry = (PendingUnlinkEntry *) lfirst(cell);
|
||||||
|
|
||||||
|
entry->cycle_ctr = mdckpt_cycle_ctr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Any unlink requests arriving after this point will be assigned the
|
||||||
|
* next cycle counter, and won't be unlinked until next checkpoint.
|
||||||
|
*/
|
||||||
|
mdckpt_cycle_ctr++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* mdpostckpt() -- Do post-checkpoint work
|
||||||
|
*
|
||||||
|
* Remove any lingering files that can now be safely removed.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
mdpostckpt(void)
|
||||||
|
{
|
||||||
|
while (pendingUnlinks != NIL)
|
||||||
|
{
|
||||||
|
PendingUnlinkEntry *entry = (PendingUnlinkEntry *) linitial(pendingUnlinks);
|
||||||
|
char *path;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* New entries are appended to the end, so if the entry is new
|
||||||
|
* we've reached the end of old entries.
|
||||||
|
*/
|
||||||
|
if (entry->cycle_ctr == mdsync_cycle_ctr)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Else assert we haven't missed it */
|
||||||
|
Assert((CycleCtr) (entry->cycle_ctr + 1) == mdckpt_cycle_ctr);
|
||||||
|
|
||||||
|
/* Unlink the file */
|
||||||
|
path = relpath(entry->rnode);
|
||||||
|
if (unlink(path) < 0)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* ENOENT shouldn't happen either, but it doesn't really matter
|
||||||
|
* because we would've deleted it now anyway.
|
||||||
|
*/
|
||||||
|
if (errno != ENOENT)
|
||||||
|
ereport(WARNING,
|
||||||
|
(errcode_for_file_access(),
|
||||||
|
errmsg("could not remove relation %u/%u/%u: %m",
|
||||||
|
entry->rnode.spcNode,
|
||||||
|
entry->rnode.dbNode,
|
||||||
|
entry->rnode.relNode)));
|
||||||
|
}
|
||||||
|
pfree(path);
|
||||||
|
|
||||||
|
pendingUnlinks = list_delete_first(pendingUnlinks);
|
||||||
|
pfree(entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* register_dirty_segment() -- Mark a relation segment as needing fsync
|
* register_dirty_segment() -- Mark a relation segment as needing fsync
|
||||||
*
|
*
|
||||||
@ -1096,19 +1230,53 @@ register_dirty_segment(SMgrRelation reln, MdfdVec *seg)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* register_unlink() -- Schedule a file to be deleted after next checkpoint
|
||||||
|
*
|
||||||
|
* As with register_dirty_segment, this could involve either a local or
|
||||||
|
* a remote pending-ops table.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
register_unlink(RelFileNode rnode)
|
||||||
|
{
|
||||||
|
if (pendingOpsTable)
|
||||||
|
{
|
||||||
|
/* push it into local pending-ops table */
|
||||||
|
RememberFsyncRequest(rnode, UNLINK_RELATION_REQUEST);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Notify the bgwriter about it. If we fail to queue the request
|
||||||
|
* message, we have to sleep and try again, because we can't simply
|
||||||
|
* delete the file now. Ugly, but hopefully won't happen often.
|
||||||
|
*
|
||||||
|
* XXX should we just leave the file orphaned instead?
|
||||||
|
*/
|
||||||
|
Assert(IsUnderPostmaster);
|
||||||
|
while (!ForwardFsyncRequest(rnode, UNLINK_RELATION_REQUEST))
|
||||||
|
pg_usleep(10000L); /* 10 msec seems a good number */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* RememberFsyncRequest() -- callback from bgwriter side of fsync request
|
* RememberFsyncRequest() -- callback from bgwriter side of fsync request
|
||||||
*
|
*
|
||||||
* We stuff the fsync request into the local hash table for execution
|
* We stuff most fsync requests into the local hash table for execution
|
||||||
* during the bgwriter's next checkpoint.
|
* during the bgwriter's next checkpoint. UNLINK requests go into a
|
||||||
|
* separate linked list, however, because they get processed separately.
|
||||||
*
|
*
|
||||||
* The range of possible segment numbers is way less than the range of
|
* The range of possible segment numbers is way less than the range of
|
||||||
* BlockNumber, so we can reserve high values of segno for special purposes.
|
* BlockNumber, so we can reserve high values of segno for special purposes.
|
||||||
* We define two: FORGET_RELATION_FSYNC means to cancel pending fsyncs for
|
* We define three:
|
||||||
* a relation, and FORGET_DATABASE_FSYNC means to cancel pending fsyncs for
|
* - FORGET_RELATION_FSYNC means to cancel pending fsyncs for a relation
|
||||||
* a whole database. (These are a tad slow because the hash table has to be
|
* - FORGET_DATABASE_FSYNC means to cancel pending fsyncs for a whole database
|
||||||
* searched linearly, but it doesn't seem worth rethinking the table structure
|
* - UNLINK_RELATION_REQUEST is a request to delete the file after the next
|
||||||
* for them.)
|
* checkpoint.
|
||||||
|
*
|
||||||
|
* (Handling the FORGET_* requests is a tad slow because the hash table has
|
||||||
|
* to be searched linearly, but it doesn't seem worth rethinking the table
|
||||||
|
* structure for them.)
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
RememberFsyncRequest(RelFileNode rnode, BlockNumber segno)
|
RememberFsyncRequest(RelFileNode rnode, BlockNumber segno)
|
||||||
@ -1147,6 +1315,20 @@ RememberFsyncRequest(RelFileNode rnode, BlockNumber segno)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (segno == UNLINK_RELATION_REQUEST)
|
||||||
|
{
|
||||||
|
/* Unlink request: put it in the linked list */
|
||||||
|
MemoryContext oldcxt = MemoryContextSwitchTo(MdCxt);
|
||||||
|
PendingUnlinkEntry *entry;
|
||||||
|
|
||||||
|
entry = palloc(sizeof(PendingUnlinkEntry));
|
||||||
|
entry->rnode = rnode;
|
||||||
|
entry->cycle_ctr = mdckpt_cycle_ctr;
|
||||||
|
|
||||||
|
pendingUnlinks = lappend(pendingUnlinks, entry);
|
||||||
|
|
||||||
|
MemoryContextSwitchTo(oldcxt);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* Normal case: enter a request to fsync this segment */
|
/* Normal case: enter a request to fsync this segment */
|
||||||
|
@ -11,7 +11,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.106 2007/09/05 18:10:48 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.107 2007/11/15 20:36:40 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -55,9 +55,11 @@ typedef struct f_smgr
|
|||||||
void (*smgr_truncate) (SMgrRelation reln, BlockNumber nblocks,
|
void (*smgr_truncate) (SMgrRelation reln, BlockNumber nblocks,
|
||||||
bool isTemp);
|
bool isTemp);
|
||||||
void (*smgr_immedsync) (SMgrRelation reln);
|
void (*smgr_immedsync) (SMgrRelation reln);
|
||||||
void (*smgr_commit) (void); /* may be NULL */
|
void (*smgr_commit) (void); /* may be NULL */
|
||||||
void (*smgr_abort) (void); /* may be NULL */
|
void (*smgr_abort) (void); /* may be NULL */
|
||||||
void (*smgr_sync) (void); /* may be NULL */
|
void (*smgr_pre_ckpt) (void); /* may be NULL */
|
||||||
|
void (*smgr_sync) (void); /* may be NULL */
|
||||||
|
void (*smgr_post_ckpt) (void); /* may be NULL */
|
||||||
} f_smgr;
|
} f_smgr;
|
||||||
|
|
||||||
|
|
||||||
@ -65,7 +67,7 @@ static const f_smgr smgrsw[] = {
|
|||||||
/* magnetic disk */
|
/* magnetic disk */
|
||||||
{mdinit, NULL, mdclose, mdcreate, mdunlink, mdextend,
|
{mdinit, NULL, mdclose, mdcreate, mdunlink, mdextend,
|
||||||
mdread, mdwrite, mdnblocks, mdtruncate, mdimmedsync,
|
mdread, mdwrite, mdnblocks, mdtruncate, mdimmedsync,
|
||||||
NULL, NULL, mdsync
|
NULL, NULL, mdpreckpt, mdsync, mdpostckpt
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -778,7 +780,22 @@ smgrabort(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* smgrsync() -- Sync files to disk at checkpoint time.
|
* smgrpreckpt() -- Prepare for checkpoint.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
smgrpreckpt(void)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < NSmgr; i++)
|
||||||
|
{
|
||||||
|
if (smgrsw[i].smgr_pre_ckpt)
|
||||||
|
(*(smgrsw[i].smgr_pre_ckpt)) ();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* smgrsync() -- Sync files to disk during checkpoint.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
smgrsync(void)
|
smgrsync(void)
|
||||||
@ -792,6 +809,21 @@ smgrsync(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* smgrpostckpt() -- Post-checkpoint cleanup.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
smgrpostckpt(void)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < NSmgr; i++)
|
||||||
|
{
|
||||||
|
if (smgrsw[i].smgr_post_ckpt)
|
||||||
|
(*(smgrsw[i].smgr_post_ckpt)) ();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
smgr_redo(XLogRecPtr lsn, XLogRecord *record)
|
smgr_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.59 2007/09/05 18:10:48 tgl Exp $
|
* $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.60 2007/11/15 20:36:40 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -83,7 +83,9 @@ extern void AtSubAbort_smgr(void);
|
|||||||
extern void PostPrepare_smgr(void);
|
extern void PostPrepare_smgr(void);
|
||||||
extern void smgrcommit(void);
|
extern void smgrcommit(void);
|
||||||
extern void smgrabort(void);
|
extern void smgrabort(void);
|
||||||
|
extern void smgrpreckpt(void);
|
||||||
extern void smgrsync(void);
|
extern void smgrsync(void);
|
||||||
|
extern void smgrpostckpt(void);
|
||||||
|
|
||||||
extern void smgr_redo(XLogRecPtr lsn, XLogRecord *record);
|
extern void smgr_redo(XLogRecPtr lsn, XLogRecord *record);
|
||||||
extern void smgr_desc(StringInfo buf, uint8 xl_info, char *rec);
|
extern void smgr_desc(StringInfo buf, uint8 xl_info, char *rec);
|
||||||
@ -104,7 +106,9 @@ extern void mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer,
|
|||||||
extern BlockNumber mdnblocks(SMgrRelation reln);
|
extern BlockNumber mdnblocks(SMgrRelation reln);
|
||||||
extern void mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp);
|
extern void mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp);
|
||||||
extern void mdimmedsync(SMgrRelation reln);
|
extern void mdimmedsync(SMgrRelation reln);
|
||||||
|
extern void mdpreckpt(void);
|
||||||
extern void mdsync(void);
|
extern void mdsync(void);
|
||||||
|
extern void mdpostckpt(void);
|
||||||
|
|
||||||
extern void RememberFsyncRequest(RelFileNode rnode, BlockNumber segno);
|
extern void RememberFsyncRequest(RelFileNode rnode, BlockNumber segno);
|
||||||
extern void ForgetRelationFsyncRequests(RelFileNode rnode);
|
extern void ForgetRelationFsyncRequests(RelFileNode rnode);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user