Improve performance of dumpSequenceData().

As one might guess, this function dumps the sequence data.  It is
called once per sequence, and each such call executes a query to
retrieve the relevant data for a single sequence.  This can cause
pg_dump to take significantly longer, especially when there are
many sequences.

This commit improves the performance of this function by gathering
all the sequence data with a single query at the beginning of
pg_dump.  This information is stored in a sorted array that
dumpSequenceData() can bsearch() for what it needs.  This follows a
similar approach as previous commits that introduced sorted arrays
for role information, pg_class information, and sequence metadata.
As with those commits, this patch will cause pg_dump to use more
memory, but that isn't expected to be too egregious.

Note that we use the brand new function pg_sequence_read_tuple() in
the query that gathers all sequence data, so we must continue to
use the preexisting query-per-sequence approach for versions older
than 18.

Reviewed-by: Euler Taveira, Michael Paquier, Tom Lane
Discussion: https://postgr.es/m/20240503025140.GA1227404%40nathanxps13
This commit is contained in:
Nathan Bossart 2024-07-31 10:12:42 -05:00
parent c8b06bb969
commit bd15b7db48
1 changed files with 64 additions and 19 deletions

View File

@ -132,6 +132,8 @@ typedef struct
int64 startv; /* start value */
int64 incby; /* increment value */
int64 cache; /* cache size */
int64 last_value; /* last value of sequence */
bool is_called; /* whether nextval advances before returning */
} SequenceItem;
typedef enum OidOptions
@ -17330,16 +17332,30 @@ collectSequences(Archive *fout)
* Before Postgres 10, sequence metadata is in the sequence itself. With
* some extra effort, we might be able to use the sorted table for those
* versions, but for now it seems unlikely to be worth it.
*
* Since version 18, we can gather the sequence data in this query with
* pg_sequence_read_tuple(), but we only do so for non-schema-only dumps.
*/
if (fout->remoteVersion < 100000)
return;
else if (fout->remoteVersion < 180000 ||
(fout->dopt->schemaOnly && !fout->dopt->sequence_data))
query = "SELECT seqrelid, format_type(seqtypid, NULL), "
"seqstart, seqincrement, "
"seqmax, seqmin, "
"seqcache, seqcycle, "
"NULL, 'f' "
"FROM pg_catalog.pg_sequence "
"ORDER BY seqrelid";
else
query = "SELECT seqrelid, format_type(seqtypid, NULL), "
"seqstart, seqincrement, "
"seqmax, seqmin, "
"seqcache, seqcycle "
"FROM pg_catalog.pg_sequence "
"ORDER BY seqrelid";
"seqcache, seqcycle, "
"last_value, is_called "
"FROM pg_catalog.pg_sequence, "
"pg_sequence_read_tuple(seqrelid) "
"ORDER BY seqrelid;";
res = ExecuteSqlQuery(fout, query, PGRES_TUPLES_OK);
@ -17356,6 +17372,8 @@ collectSequences(Archive *fout)
sequences[i].minv = strtoi64(PQgetvalue(res, i, 5), NULL, 10);
sequences[i].cache = strtoi64(PQgetvalue(res, i, 6), NULL, 10);
sequences[i].cycled = (strcmp(PQgetvalue(res, i, 7), "t") == 0);
sequences[i].last_value = strtoi64(PQgetvalue(res, i, 8), NULL, 10);
sequences[i].is_called = (strcmp(PQgetvalue(res, i, 9), "t") == 0);
}
PQclear(res);
@ -17622,11 +17640,22 @@ static void
dumpSequenceData(Archive *fout, const TableDataInfo *tdinfo)
{
TableInfo *tbinfo = tdinfo->tdtable;
PGresult *res;
char *last;
int64 last;
bool called;
PQExpBuffer query = createPQExpBuffer();
/*
* For versions >= 18, the sequence information is gathered in the sorted
* array before any calls to dumpSequenceData(). See collectSequences()
* for more information.
*
* For older versions, we have to query the sequence relations
* individually.
*/
if (fout->remoteVersion < 180000)
{
PGresult *res;
appendPQExpBuffer(query,
"SELECT last_value, is_called FROM %s",
fmtQualifiedDumpable(tbinfo));
@ -17639,13 +17668,31 @@ dumpSequenceData(Archive *fout, const TableDataInfo *tdinfo)
PQntuples(res)),
tbinfo->dobj.name, PQntuples(res));
last = PQgetvalue(res, 0, 0);
last = strtoi64(PQgetvalue(res, 0, 0), NULL, 10);
called = (strcmp(PQgetvalue(res, 0, 1), "t") == 0);
PQclear(res);
}
else
{
SequenceItem key = {0};
SequenceItem *entry;
Assert(sequences);
Assert(tbinfo->dobj.catId.oid);
key.oid = tbinfo->dobj.catId.oid;
entry = bsearch(&key, sequences, nsequences,
sizeof(SequenceItem), SequenceItemCmp);
last = entry->last_value;
called = entry->is_called;
}
resetPQExpBuffer(query);
appendPQExpBufferStr(query, "SELECT pg_catalog.setval(");
appendStringLiteralAH(query, fmtQualifiedDumpable(tbinfo), fout);
appendPQExpBuffer(query, ", %s, %s);\n",
appendPQExpBuffer(query, ", " INT64_FORMAT ", %s);\n",
last, (called ? "true" : "false"));
if (tdinfo->dobj.dump & DUMP_COMPONENT_DATA)
@ -17659,8 +17706,6 @@ dumpSequenceData(Archive *fout, const TableDataInfo *tdinfo)
.deps = &(tbinfo->dobj.dumpId),
.nDeps = 1));
PQclear(res);
destroyPQExpBuffer(query);
}