Check relkind before using TABLESAMPLE in postgres_fdw

Check the remote relkind before trying to use TABLESAMPLE to acquire
sample from the remote relation. Even if the remote server version has
TABLESAMPLE support, the foreign table may point to incompatible relkind
(e.g. a view or a sequence).

If the relkind does not support TABLESAMPLE, error out if TABLESAMPLE
was requested specifically (as system/bernoulli), or fallback to random
just like we do for old server versions.

We currently end up disabling sampling for such relkind values anyway,
due to reltuples being -1 or 1, but that seems rather accidental, and
might get broken by improving reltuples estimates, etc.  So better to
make the check explicit.

Reported-by: Tom Lane
Discussion: https://postgr.es/m/951485.1672461744%40sss.pgh.pa.us
This commit is contained in:
Tomas Vondra 2023-01-07 14:22:09 +01:00
parent d913928c9c
commit 57d11ef028
3 changed files with 47 additions and 22 deletions

View File

@ -2368,14 +2368,15 @@ deparseAnalyzeSizeSql(StringInfo buf, Relation rel)
}
/*
* Construct SELECT statement to acquire the number of rows of a relation.
* Construct SELECT statement to acquire the number of rows and the relkind of
* a relation.
*
* Note: we just return the remote server's reltuples value, which might
* be off a good deal, but it doesn't seem worth working harder. See
* comments in postgresAcquireSampleRowsFunc.
*/
void
deparseAnalyzeTuplesSql(StringInfo buf, Relation rel)
deparseAnalyzeInfoSql(StringInfo buf, Relation rel)
{
StringInfoData relname;
@ -2383,7 +2384,7 @@ deparseAnalyzeTuplesSql(StringInfo buf, Relation rel)
initStringInfo(&relname);
deparseRelation(&relname, rel);
appendStringInfoString(buf, "SELECT reltuples FROM pg_catalog.pg_class WHERE oid = ");
appendStringInfoString(buf, "SELECT reltuples, relkind FROM pg_catalog.pg_class WHERE oid = ");
deparseStringLiteral(buf, relname.data);
appendStringInfoString(buf, "::pg_catalog.regclass");
}

View File

@ -4974,11 +4974,14 @@ postgresAnalyzeForeignTable(Relation relation,
}
/*
* postgresCountTuplesForForeignTable
* postgresGetAnalyzeInfoForForeignTable
* Count tuples in foreign table (just get pg_class.reltuples).
*
* can_tablesample determines if the remote relation supports acquiring the
* sample using TABLESAMPLE.
*/
static double
postgresCountTuplesForForeignTable(Relation relation)
postgresGetAnalyzeInfoForForeignTable(Relation relation, bool *can_tablesample)
{
ForeignTable *table;
UserMapping *user;
@ -4986,6 +4989,10 @@ postgresCountTuplesForForeignTable(Relation relation)
StringInfoData sql;
PGresult *volatile res = NULL;
volatile double reltuples = -1;
volatile char relkind = 0;
/* assume the remote relation does not support TABLESAMPLE */
*can_tablesample = false;
/*
* Get the connection to use. We do the remote access as the table's
@ -4999,7 +5006,7 @@ postgresCountTuplesForForeignTable(Relation relation)
* Construct command to get page count for relation.
*/
initStringInfo(&sql);
deparseAnalyzeTuplesSql(&sql, relation);
deparseAnalyzeInfoSql(&sql, relation);
/* In what follows, do not risk leaking any PGresults. */
PG_TRY();
@ -5008,9 +5015,10 @@ postgresCountTuplesForForeignTable(Relation relation)
if (PQresultStatus(res) != PGRES_TUPLES_OK)
pgfdw_report_error(ERROR, res, conn, false, sql.data);
if (PQntuples(res) != 1 || PQnfields(res) != 1)
if (PQntuples(res) != 1 || PQnfields(res) != 2)
elog(ERROR, "unexpected result from deparseAnalyzeTuplesSql query");
reltuples = strtod(PQgetvalue(res, 0, 0), NULL);
relkind = *(PQgetvalue(res, 0, 1));
}
PG_FINALLY();
{
@ -5021,6 +5029,11 @@ postgresCountTuplesForForeignTable(Relation relation)
ReleaseConnection(conn);
/* TABLESAMPLE is supported only for regular tables and matviews */
*can_tablesample = (relkind == RELKIND_RELATION ||
relkind == RELKIND_MATVIEW ||
relkind == RELKIND_PARTITIONED_TABLE);
return reltuples;
}
@ -5147,19 +5160,6 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("remote server does not support TABLESAMPLE feature")));
/*
* For "auto" method, pick the one we believe is best. For servers with
* TABLESAMPLE support we pick BERNOULLI, for old servers we fall-back to
* random() to at least reduce network transfer.
*/
if (method == ANALYZE_SAMPLE_AUTO)
{
if (server_version_num < 95000)
method = ANALYZE_SAMPLE_RANDOM;
else
method = ANALYZE_SAMPLE_BERNOULLI;
}
/*
* If we've decided to do remote sampling, calculate the sampling rate. We
* need to get the number of tuples from the remote server, but skip that
@ -5167,7 +5167,18 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel,
*/
if (method != ANALYZE_SAMPLE_OFF)
{
reltuples = postgresCountTuplesForForeignTable(relation);
bool can_tablesample;
reltuples = postgresGetAnalyzeInfoForForeignTable(relation,
&can_tablesample);
/*
* Make sure we're not choosing TABLESAMPLE when the remote relation does
* not support that. But only do this for "auto" - if the user explicitly
* requested BERNOULLI/SYSTEM, it's better to fail.
*/
if (!can_tablesample && (method == ANALYZE_SAMPLE_AUTO))
method = ANALYZE_SAMPLE_RANDOM;
/*
* Remote's reltuples could be 0 or -1 if the table has never been
@ -5212,6 +5223,19 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel,
}
}
/*
* For "auto" method, pick the one we believe is best. For servers with
* TABLESAMPLE support we pick BERNOULLI, for old servers we fall-back to
* random() to at least reduce network transfer.
*/
if (method == ANALYZE_SAMPLE_AUTO)
{
if (server_version_num < 95000)
method = ANALYZE_SAMPLE_RANDOM;
else
method = ANALYZE_SAMPLE_BERNOULLI;
}
/*
* Construct cursor that retrieves whole rows from remote.
*/

View File

@ -223,7 +223,7 @@ extern void deparseDirectDeleteSql(StringInfo buf, PlannerInfo *root,
List *returningList,
List **retrieved_attrs);
extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel);
extern void deparseAnalyzeTuplesSql(StringInfo buf, Relation rel);
extern void deparseAnalyzeInfoSql(StringInfo buf, Relation rel);
extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
PgFdwSamplingMethod sample_method,
double sample_frac,