From 507069de6dbe18c2163f27fbc780673eef8c5622 Mon Sep 17 00:00:00 2001 From: Magnus Hagander Date: Sun, 30 Jan 2011 21:30:09 +0100 Subject: [PATCH] Add option to include WAL in base backup When included, this makes the base backup a complete working "clone" of the initial database, ready to have a postmaster started against it without the need to set up any log archiving or similar. Magnus Hagander, reviewed by Fujii Masao and Heikki Linnakangas --- doc/src/sgml/protocol.sgml | 19 +++- doc/src/sgml/ref/pg_basebackup.sgml | 32 +++++- src/backend/replication/basebackup.c | 149 ++++++++++++++++++++----- src/backend/replication/repl_gram.y | 10 +- src/backend/replication/repl_scanner.l | 1 + src/backend/replication/walsender.c | 8 +- src/bin/pg_basebackup/pg_basebackup.c | 21 +++- src/include/replication/walsender.h | 1 + 8 files changed, 197 insertions(+), 44 deletions(-) diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index 73f26b432d..0775b7a8f4 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -1460,7 +1460,7 @@ The commands accepted in walsender mode are: - BASE_BACKUP [LABEL 'label'] [PROGRESS] [FAST] + BASE_BACKUP [LABEL 'label'] [PROGRESS] [FAST] [WAL] Instructs the server to start streaming a base backup. @@ -1505,6 +1505,18 @@ The commands accepted in walsender mode are: + + + WAL + + + Include the necessary WAL segments in the backup. This will include + all the files between start and stop backup in the + pg_xlog directory of the base directory tar + file. + + + @@ -1561,7 +1573,10 @@ The commands accepted in walsender mode are: - pg_xlog (including subdirectories) + pg_xlog, including subdirectories. If the backup is run + with wal files included, a synthesized version of pg_xlog will be + included, but it will only contain the files necessary for the + backup to work, not the rest of the contents. diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml index 321c8cade1..f4f78fbbfc 100644 --- a/doc/src/sgml/ref/pg_basebackup.sgml +++ b/doc/src/sgml/ref/pg_basebackup.sgml @@ -144,6 +144,31 @@ PostgreSQL documentation + + + + + + Includes the required transaction log files (WAL files) in the + backup. This will include all transaction logs generated during + the backup. If this option is specified, it is possible to start + a postmaster directly in the extracted directory without the need + to consult the log archive, thus making this a completely standalone + backup. + + + + The transaction log files are collected at the end of the backup. + Therefore, it is necessary for the + parameter to be set high + enough that the log is not removed before the end of the backup. + If the log has been rotated when it's time to transfer it, the + backup will fail and be unusable. + + + + + @@ -164,7 +189,7 @@ PostgreSQL documentation - + Sets checkpoint mode to fast or spread (default). @@ -191,7 +216,10 @@ PostgreSQL documentation Enables progress reporting. Turning this on will deliver an approximate progress report during the backup. Since the database may change during the backup, this is only an approximation and may not end at exactly - 100%. + 100%. In particular, when WAL log is included in the + backup, the total amount of data cannot be estimated in advance, and + in this case the progress report will only count towards the total + amount of data without WAL. When this is enabled, the backup will start by enumerating the size of diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index 251ed8e083..d0248f6dcc 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -37,6 +37,7 @@ typedef struct const char *label; bool progress; bool fastcheckpoint; + bool includewal; } basebackup_options; @@ -46,11 +47,17 @@ static void _tarWriteHeader(char *filename, char *linktarget, struct stat * statbuf); static void send_int8_string(StringInfoData *buf, int64 intval); static void SendBackupHeader(List *tablespaces); -static void SendBackupDirectory(char *location, char *spcoid); static void base_backup_cleanup(int code, Datum arg); static void perform_base_backup(basebackup_options *opt, DIR *tblspcdir); static void parse_basebackup_options(List *options, basebackup_options *opt); +/* + * Size of each block sent into the tar stream for larger files. + * + * XLogSegSize *MUST* be evenly dividable by this + */ +#define TAR_SEND_SIZE 32768 + typedef struct { char *oid; @@ -78,7 +85,10 @@ base_backup_cleanup(int code, Datum arg) static void perform_base_backup(basebackup_options *opt, DIR *tblspcdir) { - do_pg_start_backup(opt->label, opt->fastcheckpoint); + XLogRecPtr startptr; + XLogRecPtr endptr; + + startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint); PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0); { @@ -87,12 +97,6 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir) struct dirent *de; tablespaceinfo *ti; - - /* Add a node for the base directory */ - ti = palloc0(sizeof(tablespaceinfo)); - ti->size = opt->progress ? sendDir(".", 1, true) : -1; - tablespaces = lappend(tablespaces, ti); - /* Collect information about all tablespaces */ while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL) { @@ -120,6 +124,10 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir) tablespaces = lappend(tablespaces, ti); } + /* Add a node for the base directory at the end */ + ti = palloc0(sizeof(tablespaceinfo)); + ti->size = opt->progress ? sendDir(".", 1, true) : -1; + tablespaces = lappend(tablespaces, ti); /* Send tablespace header */ SendBackupHeader(tablespaces); @@ -128,13 +136,102 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir) foreach(lc, tablespaces) { tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc); + StringInfoData buf; - SendBackupDirectory(ti->path, ti->oid); + /* Send CopyOutResponse message */ + pq_beginmessage(&buf, 'H'); + pq_sendbyte(&buf, 0); /* overall format */ + pq_sendint(&buf, 0, 2); /* natts */ + pq_endmessage(&buf); + + sendDir(ti->path == NULL ? "." : ti->path, + ti->path == NULL ? 1 : strlen(ti->path), + false); + + /* + * If we're including WAL, and this is the main data directory we + * don't terminate the tar stream here. Instead, we will append + * the xlog files below and terminate it then. This is safe since + * the main data directory is always sent *last*. + */ + if (opt->includewal && ti->path == NULL) + { + Assert(lnext(lc) == NULL); + } + else + pq_putemptymessage('c'); /* CopyDone */ } } PG_END_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0); - do_pg_stop_backup(); + endptr = do_pg_stop_backup(); + + if (opt->includewal) + { + /* + * We've left the last tar file "open", so we can now append the + * required WAL files to it. + */ + uint32 logid, + logseg; + uint32 endlogid, + endlogseg; + struct stat statbuf; + + MemSet(&statbuf, 0, sizeof(statbuf)); + statbuf.st_mode = S_IRUSR | S_IWUSR; +#ifndef WIN32 + statbuf.st_uid = geteuid(); + statbuf.st_gid = getegid(); +#endif + statbuf.st_size = XLogSegSize; + statbuf.st_mtime = time(NULL); + + XLByteToSeg(startptr, logid, logseg); + XLByteToPrevSeg(endptr, endlogid, endlogseg); + + while (true) + { + /* Send another xlog segment */ + char fn[MAXPGPATH]; + int i; + + XLogFilePath(fn, ThisTimeLineID, logid, logseg); + _tarWriteHeader(fn, NULL, &statbuf); + + /* Send the actual WAL file contents, block-by-block */ + for (i = 0; i < XLogSegSize / TAR_SEND_SIZE; i++) + { + char buf[TAR_SEND_SIZE]; + XLogRecPtr ptr; + + ptr.xlogid = logid; + ptr.xrecoff = logseg * XLogSegSize + TAR_SEND_SIZE * i; + + XLogRead(buf, ptr, TAR_SEND_SIZE); + if (pq_putmessage('d', buf, TAR_SEND_SIZE)) + ereport(ERROR, + (errmsg("base backup could not send data, aborting backup"))); + } + + /* + * Files are always fixed size, and always end on a 512 byte + * boundary, so padding is never necessary. + */ + + + /* Advance to the next WAL file */ + NextLogSeg(logid, logseg); + + /* Have we reached our stop position yet? */ + if (logid > endlogid || + (logid == endlogid && logseg > endlogseg)) + break; + } + + /* Send CopyDone message for the last tar file */ + pq_putemptymessage('c'); + } } /* @@ -147,6 +244,7 @@ parse_basebackup_options(List *options, basebackup_options *opt) bool o_label = false; bool o_progress = false; bool o_fast = false; + bool o_wal = false; MemSet(opt, 0, sizeof(*opt)); foreach(lopt, options) @@ -180,6 +278,15 @@ parse_basebackup_options(List *options, basebackup_options *opt) opt->fastcheckpoint = true; o_fast = true; } + else if (strcmp(defel->defname, "wal") == 0) + { + if (o_wal) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("duplicate option \"%s\"", defel->defname))); + opt->includewal = true; + o_wal = true; + } else elog(ERROR, "option \"%s\" not recognized", defel->defname); @@ -316,26 +423,6 @@ SendBackupHeader(List *tablespaces) pq_puttextmessage('C', "SELECT"); } -static void -SendBackupDirectory(char *location, char *spcoid) -{ - StringInfoData buf; - - /* Send CopyOutResponse message */ - pq_beginmessage(&buf, 'H'); - pq_sendbyte(&buf, 0); /* overall format */ - pq_sendint(&buf, 0, 2); /* natts */ - pq_endmessage(&buf); - - /* tar up the data directory if NULL, otherwise the tablespace */ - sendDir(location == NULL ? "." : location, - location == NULL ? 1 : strlen(location), - false); - - /* Send CopyDone message */ - pq_putemptymessage('c'); -} - static int64 sendDir(char *path, int basepathlen, bool sizeonly) @@ -506,7 +593,7 @@ static void sendFile(char *filename, int basepathlen, struct stat * statbuf) { FILE *fp; - char buf[32768]; + char buf[TAR_SEND_SIZE]; size_t cnt; pgoff_t len = 0; size_t pad; diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y index 879a0bd7db..e1a4a51a0e 100644 --- a/src/backend/replication/repl_gram.y +++ b/src/backend/replication/repl_gram.y @@ -71,6 +71,7 @@ Node *replication_parse_result; %token K_LABEL %token K_PROGRESS %token K_FAST +%token K_WAL %token K_START_REPLICATION %type command @@ -106,7 +107,7 @@ identify_system: ; /* - * BASE_BACKUP [LABEL