From 64f34eb2e2ce4bca7351d8c88a6999aeed000c4a Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Mon, 8 Jul 2024 16:18:00 -0500 Subject: [PATCH] Use CREATE DATABASE ... STRATEGY = FILE_COPY in pg_upgrade. While this strategy is ordinarily quite costly because it requires performing two checkpoints, testing shows that it tends to be a faster choice than WAL_LOG during pg_upgrade, presumably because fsync is turned off. Furthermore, we can skip the checkpoints altogether because the problems they are intended to prevent don't apply to pg_upgrade. Instead, we just need to CHECKPOINT once in the new cluster after making any changes to template0 and before restoring the rest of the databases. This ensures that said template0 changes are written out to disk prior to creating the databases via FILE_COPY. Co-authored-by: Matthias van de Meent Reviewed-by: Ranier Vilela, Dilip Kumar, Robert Haas, Michael Paquier Discussion: https://postgr.es/m/Zl9ta3FtgdjizkJ5%40nathan --- src/backend/commands/dbcommands.c | 18 +++++++++++++++--- src/bin/pg_dump/pg_dump.c | 8 +++++++- src/bin/pg_upgrade/pg_upgrade.c | 12 ++++++++++++ 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index be629ea92c..7026352bc9 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -563,9 +563,14 @@ CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dst_dboid, Oid src_tsid, * happened while we're copying files, a file might be deleted just when * we're about to copy it, causing the lstat() call in copydir() to fail * with ENOENT. + * + * In binary upgrade mode, we can skip this checkpoint because pg_upgrade + * is careful to ensure that template0 is fully written to disk prior to + * any CREATE DATABASE commands. */ - RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | - CHECKPOINT_WAIT | CHECKPOINT_FLUSH_ALL); + if (!IsBinaryUpgrade) + RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | + CHECKPOINT_WAIT | CHECKPOINT_FLUSH_ALL); /* * Iterate through all tablespaces of the template database, and copy each @@ -657,10 +662,17 @@ CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dst_dboid, Oid src_tsid, * seem to be much we can do about that except document it as a * limitation. * + * In binary upgrade mode, we can skip this checkpoint because neither of + * these problems applies: we don't ever replay the WAL generated during + * pg_upgrade, and we don't support taking base backups during pg_upgrade + * (not to mention that we don't concurrently modify template0, either). + * * See CreateDatabaseUsingWalLog() for a less cheesy CREATE DATABASE * strategy that avoids these problems. */ - RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT); + if (!IsBinaryUpgrade) + RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | + CHECKPOINT_WAIT); } /* diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 5426f1177c..b8b1888bd3 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -3144,10 +3144,16 @@ dumpDatabase(Archive *fout) * since those can't be altered later. Other DB properties are left to * the DATABASE PROPERTIES entry, so that they can be applied after * reconnecting to the target DB. + * + * For binary upgrade, we use the FILE_COPY strategy because testing has + * shown it to be faster. When the server is in binary upgrade mode, it + * will also skip the checkpoints this strategy ordinarily performs. */ if (dopt->binary_upgrade) { - appendPQExpBuffer(creaQry, "CREATE DATABASE %s WITH TEMPLATE = template0 OID = %u", + appendPQExpBuffer(creaQry, + "CREATE DATABASE %s WITH TEMPLATE = template0 " + "OID = %u STRATEGY = FILE_COPY", qdatname, dbCatId.oid); } else diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c index af370768b6..03eb738fd7 100644 --- a/src/bin/pg_upgrade/pg_upgrade.c +++ b/src/bin/pg_upgrade/pg_upgrade.c @@ -534,9 +534,21 @@ static void create_new_objects(void) { int dbnum; + PGconn *conn_new_template1; prep_status_progress("Restoring database schemas in the new cluster"); + /* + * Ensure that any changes to template0 are fully written out to disk + * prior to restoring the databases. This is necessary because we use the + * FILE_COPY strategy to create the databases (which testing has shown to + * be faster), and when the server is in binary upgrade mode, it skips the + * checkpoints this strategy ordinarily performs. + */ + conn_new_template1 = connectToServer(&new_cluster, "template1"); + PQclear(executeQueryOrDie(conn_new_template1, "CHECKPOINT")); + PQfinish(conn_new_template1); + /* * We cannot process the template1 database concurrently with others, * because when it's transiently dropped, connection attempts would fail.