
While looking at Robert Foggia's report, I noticed a passel of other issues in the same area: * The scheme for backslash-quoting newlines in pathnames is just wrong; it will misbehave if the last ordinary character in a pathname is a backslash. I'm not sure why we're bothering to allow newlines in tablespace paths, but if we're going to do it we should do it without introducing other problems. Hence, backslashes themselves have to be backslashed too. * The author hadn't read the sscanf man page very carefully, because this code would drop any leading whitespace from the path. (I doubt that a tablespace path with leading whitespace could happen in practice; but if we're bothering to allow newlines in the path, it sure seems like leading whitespace is little less implausible.) Using sscanf for the task of finding the first space is overkill anyway. * While I'm not 100% sure what the rationale for escaping both \r and \n is, if the idea is to allow Windows newlines in the file then this code failed, because it'd throw an error if it saw \r followed by \n. * There's no cross-check for an incomplete final line in the map file, which would be a likely apparent symptom of the improper-escaping bug. On the generation end, aside from the escaping issue we have: * If needtblspcmapfile is true then do_pg_start_backup will pass back escaped strings in tablespaceinfo->path values, which no caller wants or is prepared to deal with. I'm not sure if there's a live bug from that, but it looks like there might be (given the dubious assumption that anyone actually has newlines in their tablespace paths). * It's not being very paranoid about the possibility of random stuff in the pg_tblspc directory. IMO we should ignore anything without an OID-like name. The escaping rule change doesn't seem back-patchable: it'll require doubling of backslashes in the tablespace_map file, which is basically a basebackup format change. The odds of that causing trouble are considerably more than the odds of the existing bug causing trouble. The rest of this seems somewhat unlikely to cause problems too, so no back-patch.
407 lines
14 KiB
C
407 lines
14 KiB
C
/*
|
|
* xlog.h
|
|
*
|
|
* PostgreSQL write-ahead log manager
|
|
*
|
|
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* src/include/access/xlog.h
|
|
*/
|
|
#ifndef XLOG_H
|
|
#define XLOG_H
|
|
|
|
#include "access/rmgr.h"
|
|
#include "access/xlogdefs.h"
|
|
#include "access/xloginsert.h"
|
|
#include "access/xlogreader.h"
|
|
#include "datatype/timestamp.h"
|
|
#include "lib/stringinfo.h"
|
|
#include "nodes/pg_list.h"
|
|
#include "storage/fd.h"
|
|
|
|
|
|
/* Sync methods */
|
|
#define SYNC_METHOD_FSYNC 0
|
|
#define SYNC_METHOD_FDATASYNC 1
|
|
#define SYNC_METHOD_OPEN 2 /* for O_SYNC */
|
|
#define SYNC_METHOD_FSYNC_WRITETHROUGH 3
|
|
#define SYNC_METHOD_OPEN_DSYNC 4 /* for O_DSYNC */
|
|
extern int sync_method;
|
|
|
|
extern PGDLLIMPORT TimeLineID ThisTimeLineID; /* current TLI */
|
|
|
|
/*
|
|
* Prior to 8.4, all activity during recovery was carried out by the startup
|
|
* process. This local variable continues to be used in many parts of the
|
|
* code to indicate actions taken by RecoveryManagers. Other processes that
|
|
* potentially perform work during recovery should check RecoveryInProgress().
|
|
* See XLogCtl notes in xlog.c.
|
|
*/
|
|
extern bool InRecovery;
|
|
|
|
/*
|
|
* Like InRecovery, standbyState is only valid in the startup process.
|
|
* In all other processes it will have the value STANDBY_DISABLED (so
|
|
* InHotStandby will read as false).
|
|
*
|
|
* In DISABLED state, we're performing crash recovery or hot standby was
|
|
* disabled in postgresql.conf.
|
|
*
|
|
* In INITIALIZED state, we've run InitRecoveryTransactionEnvironment, but
|
|
* we haven't yet processed a RUNNING_XACTS or shutdown-checkpoint WAL record
|
|
* to initialize our primary-transaction tracking system.
|
|
*
|
|
* When the transaction tracking is initialized, we enter the SNAPSHOT_PENDING
|
|
* state. The tracked information might still be incomplete, so we can't allow
|
|
* connections yet, but redo functions must update the in-memory state when
|
|
* appropriate.
|
|
*
|
|
* In SNAPSHOT_READY mode, we have full knowledge of transactions that are
|
|
* (or were) running on the primary at the current WAL location. Snapshots
|
|
* can be taken, and read-only queries can be run.
|
|
*/
|
|
typedef enum
|
|
{
|
|
STANDBY_DISABLED,
|
|
STANDBY_INITIALIZED,
|
|
STANDBY_SNAPSHOT_PENDING,
|
|
STANDBY_SNAPSHOT_READY
|
|
} HotStandbyState;
|
|
|
|
extern HotStandbyState standbyState;
|
|
|
|
#define InHotStandby (standbyState >= STANDBY_SNAPSHOT_PENDING)
|
|
|
|
/*
|
|
* Recovery target type.
|
|
* Only set during a Point in Time recovery, not when in standby mode.
|
|
*/
|
|
typedef enum
|
|
{
|
|
RECOVERY_TARGET_UNSET,
|
|
RECOVERY_TARGET_XID,
|
|
RECOVERY_TARGET_TIME,
|
|
RECOVERY_TARGET_NAME,
|
|
RECOVERY_TARGET_LSN,
|
|
RECOVERY_TARGET_IMMEDIATE
|
|
} RecoveryTargetType;
|
|
|
|
/*
|
|
* Recovery target TimeLine goal
|
|
*/
|
|
typedef enum
|
|
{
|
|
RECOVERY_TARGET_TIMELINE_CONTROLFILE,
|
|
RECOVERY_TARGET_TIMELINE_LATEST,
|
|
RECOVERY_TARGET_TIMELINE_NUMERIC
|
|
} RecoveryTargetTimeLineGoal;
|
|
|
|
extern XLogRecPtr ProcLastRecPtr;
|
|
extern XLogRecPtr XactLastRecEnd;
|
|
extern PGDLLIMPORT XLogRecPtr XactLastCommitEnd;
|
|
|
|
extern bool reachedConsistency;
|
|
|
|
/* these variables are GUC parameters related to XLOG */
|
|
extern int wal_segment_size;
|
|
extern int min_wal_size_mb;
|
|
extern int max_wal_size_mb;
|
|
extern int wal_keep_size_mb;
|
|
extern int max_slot_wal_keep_size_mb;
|
|
extern int XLOGbuffers;
|
|
extern int XLogArchiveTimeout;
|
|
extern int wal_retrieve_retry_interval;
|
|
extern char *XLogArchiveCommand;
|
|
extern bool EnableHotStandby;
|
|
extern bool fullPageWrites;
|
|
extern bool wal_log_hints;
|
|
extern bool wal_compression;
|
|
extern bool wal_init_zero;
|
|
extern bool wal_recycle;
|
|
extern bool *wal_consistency_checking;
|
|
extern char *wal_consistency_checking_string;
|
|
extern bool log_checkpoints;
|
|
extern char *recoveryRestoreCommand;
|
|
extern char *recoveryEndCommand;
|
|
extern char *archiveCleanupCommand;
|
|
extern bool recoveryTargetInclusive;
|
|
extern int recoveryTargetAction;
|
|
extern int recovery_min_apply_delay;
|
|
extern char *PrimaryConnInfo;
|
|
extern char *PrimarySlotName;
|
|
extern bool wal_receiver_create_temp_slot;
|
|
extern bool track_wal_io_timing;
|
|
|
|
/* indirectly set via GUC system */
|
|
extern TransactionId recoveryTargetXid;
|
|
extern char *recovery_target_time_string;
|
|
extern const char *recoveryTargetName;
|
|
extern XLogRecPtr recoveryTargetLSN;
|
|
extern RecoveryTargetType recoveryTarget;
|
|
extern char *PromoteTriggerFile;
|
|
extern RecoveryTargetTimeLineGoal recoveryTargetTimeLineGoal;
|
|
extern TimeLineID recoveryTargetTLIRequested;
|
|
extern TimeLineID recoveryTargetTLI;
|
|
|
|
extern int CheckPointSegments;
|
|
|
|
/* option set locally in startup process only when signal files exist */
|
|
extern bool StandbyModeRequested;
|
|
extern bool StandbyMode;
|
|
|
|
/* Archive modes */
|
|
typedef enum ArchiveMode
|
|
{
|
|
ARCHIVE_MODE_OFF = 0, /* disabled */
|
|
ARCHIVE_MODE_ON, /* enabled while server is running normally */
|
|
ARCHIVE_MODE_ALWAYS /* enabled always (even during recovery) */
|
|
} ArchiveMode;
|
|
extern int XLogArchiveMode;
|
|
|
|
/* WAL levels */
|
|
typedef enum WalLevel
|
|
{
|
|
WAL_LEVEL_MINIMAL = 0,
|
|
WAL_LEVEL_REPLICA,
|
|
WAL_LEVEL_LOGICAL
|
|
} WalLevel;
|
|
|
|
/* Recovery states */
|
|
typedef enum RecoveryState
|
|
{
|
|
RECOVERY_STATE_CRASH = 0, /* crash recovery */
|
|
RECOVERY_STATE_ARCHIVE, /* archive recovery */
|
|
RECOVERY_STATE_DONE /* currently in production */
|
|
} RecoveryState;
|
|
|
|
/* Recovery pause states */
|
|
typedef enum RecoveryPauseState
|
|
{
|
|
RECOVERY_NOT_PAUSED, /* pause not requested */
|
|
RECOVERY_PAUSE_REQUESTED, /* pause requested, but not yet paused */
|
|
RECOVERY_PAUSED /* recovery is paused */
|
|
} RecoveryPauseState;
|
|
|
|
extern PGDLLIMPORT int wal_level;
|
|
|
|
/* Is WAL archiving enabled (always or only while server is running normally)? */
|
|
#define XLogArchivingActive() \
|
|
(AssertMacro(XLogArchiveMode == ARCHIVE_MODE_OFF || wal_level >= WAL_LEVEL_REPLICA), XLogArchiveMode > ARCHIVE_MODE_OFF)
|
|
/* Is WAL archiving enabled always (even during recovery)? */
|
|
#define XLogArchivingAlways() \
|
|
(AssertMacro(XLogArchiveMode == ARCHIVE_MODE_OFF || wal_level >= WAL_LEVEL_REPLICA), XLogArchiveMode == ARCHIVE_MODE_ALWAYS)
|
|
#define XLogArchiveCommandSet() (XLogArchiveCommand[0] != '\0')
|
|
|
|
/*
|
|
* Is WAL-logging necessary for archival or log-shipping, or can we skip
|
|
* WAL-logging if we fsync() the data before committing instead?
|
|
*/
|
|
#define XLogIsNeeded() (wal_level >= WAL_LEVEL_REPLICA)
|
|
|
|
/*
|
|
* Is a full-page image needed for hint bit updates?
|
|
*
|
|
* Normally, we don't WAL-log hint bit updates, but if checksums are enabled,
|
|
* we have to protect them against torn page writes. When you only set
|
|
* individual bits on a page, it's still consistent no matter what combination
|
|
* of the bits make it to disk, but the checksum wouldn't match. Also WAL-log
|
|
* them if forced by wal_log_hints=on.
|
|
*/
|
|
#define XLogHintBitIsNeeded() (DataChecksumsEnabled() || wal_log_hints)
|
|
|
|
/* Do we need to WAL-log information required only for Hot Standby and logical replication? */
|
|
#define XLogStandbyInfoActive() (wal_level >= WAL_LEVEL_REPLICA)
|
|
|
|
/* Do we need to WAL-log information required only for logical replication? */
|
|
#define XLogLogicalInfoActive() (wal_level >= WAL_LEVEL_LOGICAL)
|
|
|
|
#ifdef WAL_DEBUG
|
|
extern bool XLOG_DEBUG;
|
|
#endif
|
|
|
|
/*
|
|
* OR-able request flag bits for checkpoints. The "cause" bits are used only
|
|
* for logging purposes. Note: the flags must be defined so that it's
|
|
* sensible to OR together request flags arising from different requestors.
|
|
*/
|
|
|
|
/* These directly affect the behavior of CreateCheckPoint and subsidiaries */
|
|
#define CHECKPOINT_IS_SHUTDOWN 0x0001 /* Checkpoint is for shutdown */
|
|
#define CHECKPOINT_END_OF_RECOVERY 0x0002 /* Like shutdown checkpoint, but
|
|
* issued at end of WAL recovery */
|
|
#define CHECKPOINT_IMMEDIATE 0x0004 /* Do it without delays */
|
|
#define CHECKPOINT_FORCE 0x0008 /* Force even if no activity */
|
|
#define CHECKPOINT_FLUSH_ALL 0x0010 /* Flush all pages, including those
|
|
* belonging to unlogged tables */
|
|
/* These are important to RequestCheckpoint */
|
|
#define CHECKPOINT_WAIT 0x0020 /* Wait for completion */
|
|
#define CHECKPOINT_REQUESTED 0x0040 /* Checkpoint request has been made */
|
|
/* These indicate the cause of a checkpoint request */
|
|
#define CHECKPOINT_CAUSE_XLOG 0x0080 /* XLOG consumption */
|
|
#define CHECKPOINT_CAUSE_TIME 0x0100 /* Elapsed time */
|
|
|
|
/*
|
|
* Flag bits for the record being inserted, set using XLogSetRecordFlags().
|
|
*/
|
|
#define XLOG_INCLUDE_ORIGIN 0x01 /* include the replication origin */
|
|
#define XLOG_MARK_UNIMPORTANT 0x02 /* record not important for durability */
|
|
#define XLOG_INCLUDE_XID 0x04 /* include XID of top-level xact */
|
|
|
|
|
|
/* Checkpoint statistics */
|
|
typedef struct CheckpointStatsData
|
|
{
|
|
TimestampTz ckpt_start_t; /* start of checkpoint */
|
|
TimestampTz ckpt_write_t; /* start of flushing buffers */
|
|
TimestampTz ckpt_sync_t; /* start of fsyncs */
|
|
TimestampTz ckpt_sync_end_t; /* end of fsyncs */
|
|
TimestampTz ckpt_end_t; /* end of checkpoint */
|
|
|
|
int ckpt_bufs_written; /* # of buffers written */
|
|
|
|
int ckpt_segs_added; /* # of new xlog segments created */
|
|
int ckpt_segs_removed; /* # of xlog segments deleted */
|
|
int ckpt_segs_recycled; /* # of xlog segments recycled */
|
|
|
|
int ckpt_sync_rels; /* # of relations synced */
|
|
uint64 ckpt_longest_sync; /* Longest sync for one relation */
|
|
uint64 ckpt_agg_sync_time; /* The sum of all the individual sync
|
|
* times, which is not necessarily the
|
|
* same as the total elapsed time for the
|
|
* entire sync phase. */
|
|
} CheckpointStatsData;
|
|
|
|
extern CheckpointStatsData CheckpointStats;
|
|
|
|
/*
|
|
* GetWALAvailability return codes
|
|
*/
|
|
typedef enum WALAvailability
|
|
{
|
|
WALAVAIL_INVALID_LSN, /* parameter error */
|
|
WALAVAIL_RESERVED, /* WAL segment is within max_wal_size */
|
|
WALAVAIL_EXTENDED, /* WAL segment is reserved by a slot or
|
|
* wal_keep_size */
|
|
WALAVAIL_UNRESERVED, /* no longer reserved, but not removed yet */
|
|
WALAVAIL_REMOVED /* WAL segment has been removed */
|
|
} WALAvailability;
|
|
|
|
struct XLogRecData;
|
|
|
|
extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata,
|
|
XLogRecPtr fpw_lsn,
|
|
uint8 flags,
|
|
int num_fpi);
|
|
extern void XLogFlush(XLogRecPtr RecPtr);
|
|
extern bool XLogBackgroundFlush(void);
|
|
extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
|
|
extern int XLogFileInit(XLogSegNo segno, bool *use_existent, bool use_lock);
|
|
extern int XLogFileOpen(XLogSegNo segno);
|
|
|
|
extern void CheckXLogRemoved(XLogSegNo segno, TimeLineID tli);
|
|
extern XLogSegNo XLogGetLastRemovedSegno(void);
|
|
extern void XLogSetAsyncXactLSN(XLogRecPtr record);
|
|
extern void XLogSetReplicationSlotMinimumLSN(XLogRecPtr lsn);
|
|
|
|
extern void xlog_redo(XLogReaderState *record);
|
|
extern void xlog_desc(StringInfo buf, XLogReaderState *record);
|
|
extern const char *xlog_identify(uint8 info);
|
|
|
|
extern void issue_xlog_fsync(int fd, XLogSegNo segno);
|
|
|
|
extern bool RecoveryInProgress(void);
|
|
extern RecoveryState GetRecoveryState(void);
|
|
extern bool HotStandbyActive(void);
|
|
extern bool HotStandbyActiveInReplay(void);
|
|
extern bool XLogInsertAllowed(void);
|
|
extern void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream);
|
|
extern XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI);
|
|
extern XLogRecPtr GetXLogInsertRecPtr(void);
|
|
extern XLogRecPtr GetXLogWriteRecPtr(void);
|
|
extern RecoveryPauseState GetRecoveryPauseState(void);
|
|
extern void SetRecoveryPause(bool recoveryPause);
|
|
extern TimestampTz GetLatestXTime(void);
|
|
extern TimestampTz GetCurrentChunkReplayStartTime(void);
|
|
|
|
extern void UpdateControlFile(void);
|
|
extern uint64 GetSystemIdentifier(void);
|
|
extern char *GetMockAuthenticationNonce(void);
|
|
extern bool DataChecksumsEnabled(void);
|
|
extern XLogRecPtr GetFakeLSNForUnloggedRel(void);
|
|
extern Size XLOGShmemSize(void);
|
|
extern void XLOGShmemInit(void);
|
|
extern void BootStrapXLOG(void);
|
|
extern void LocalProcessControlFile(bool reset);
|
|
extern void StartupXLOG(void);
|
|
extern void ShutdownXLOG(int code, Datum arg);
|
|
extern void InitXLOGAccess(void);
|
|
extern void CreateCheckPoint(int flags);
|
|
extern bool CreateRestartPoint(int flags);
|
|
extern WALAvailability GetWALAvailability(XLogRecPtr targetLSN);
|
|
extern XLogRecPtr CalculateMaxmumSafeLSN(void);
|
|
extern void XLogPutNextOid(Oid nextOid);
|
|
extern XLogRecPtr XLogRestorePoint(const char *rpName);
|
|
extern void UpdateFullPageWrites(void);
|
|
extern void GetFullPageWriteInfo(XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p);
|
|
extern XLogRecPtr GetRedoRecPtr(void);
|
|
extern XLogRecPtr GetInsertRecPtr(void);
|
|
extern XLogRecPtr GetFlushRecPtr(void);
|
|
extern XLogRecPtr GetLastImportantRecPtr(void);
|
|
extern void RemovePromoteSignalFiles(void);
|
|
|
|
extern bool PromoteIsTriggered(void);
|
|
extern bool CheckPromoteSignal(void);
|
|
extern void WakeupRecovery(void);
|
|
extern void SetWalWriterSleeping(bool sleeping);
|
|
|
|
extern void StartupRequestWalReceiverRestart(void);
|
|
extern void XLogRequestWalReceiverReply(void);
|
|
|
|
extern void assign_max_wal_size(int newval, void *extra);
|
|
extern void assign_checkpoint_completion_target(double newval, void *extra);
|
|
|
|
/*
|
|
* Routines to start, stop, and get status of a base backup.
|
|
*/
|
|
|
|
/*
|
|
* Session-level status of base backups
|
|
*
|
|
* This is used in parallel with the shared memory status to control parallel
|
|
* execution of base backup functions for a given session, be it a backend
|
|
* dedicated to replication or a normal backend connected to a database. The
|
|
* update of the session-level status happens at the same time as the shared
|
|
* memory counters to keep a consistent global and local state of the backups
|
|
* running.
|
|
*/
|
|
typedef enum SessionBackupState
|
|
{
|
|
SESSION_BACKUP_NONE,
|
|
SESSION_BACKUP_EXCLUSIVE,
|
|
SESSION_BACKUP_NON_EXCLUSIVE
|
|
} SessionBackupState;
|
|
|
|
extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast,
|
|
TimeLineID *starttli_p, StringInfo labelfile,
|
|
List **tablespaces, StringInfo tblspcmapfile);
|
|
extern XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive,
|
|
TimeLineID *stoptli_p);
|
|
extern void do_pg_abort_backup(int code, Datum arg);
|
|
extern void register_persistent_abort_backup_handler(void);
|
|
extern SessionBackupState get_backup_status(void);
|
|
|
|
/* File path names (all relative to $PGDATA) */
|
|
#define RECOVERY_SIGNAL_FILE "recovery.signal"
|
|
#define STANDBY_SIGNAL_FILE "standby.signal"
|
|
#define BACKUP_LABEL_FILE "backup_label"
|
|
#define BACKUP_LABEL_OLD "backup_label.old"
|
|
|
|
#define TABLESPACE_MAP "tablespace_map"
|
|
#define TABLESPACE_MAP_OLD "tablespace_map.old"
|
|
|
|
/* files to signal promotion to primary */
|
|
#define PROMOTE_SIGNAL_FILE "promote"
|
|
|
|
#endif /* XLOG_H */
|