postgres/contrib/postgres_fdw/connection.c

2227 lines
67 KiB
C

/*-------------------------------------------------------------------------
*
* connection.c
* Connection management functions for postgres_fdw
*
* Portions Copyright (c) 2012-2023, PostgreSQL Global Development Group
*
* IDENTIFICATION
* contrib/postgres_fdw/connection.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/htup_details.h"
#include "access/xact.h"
#include "catalog/pg_user_mapping.h"
#include "commands/defrem.h"
#include "funcapi.h"
#include "libpq/libpq-be.h"
#include "libpq/libpq-be-fe-helpers.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "postgres_fdw.h"
#include "storage/fd.h"
#include "storage/latch.h"
#include "utils/builtins.h"
#include "utils/datetime.h"
#include "utils/hsearch.h"
#include "utils/inval.h"
#include "utils/memutils.h"
#include "utils/syscache.h"
/*
* Connection cache hash table entry
*
* The lookup key in this hash table is the user mapping OID. We use just one
* connection per user mapping ID, which ensures that all the scans use the
* same snapshot during a query. Using the user mapping OID rather than
* the foreign server OID + user OID avoids creating multiple connections when
* the public user mapping applies to all user OIDs.
*
* The "conn" pointer can be NULL if we don't currently have a live connection.
* When we do have a connection, xact_depth tracks the current depth of
* transactions and subtransactions open on the remote side. We need to issue
* commands at the same nesting depth on the remote as we're executing at
* ourselves, so that rolling back a subtransaction will kill the right
* queries and not the wrong ones.
*/
typedef Oid ConnCacheKey;
typedef struct ConnCacheEntry
{
ConnCacheKey key; /* hash key (must be first) */
PGconn *conn; /* connection to foreign server, or NULL */
/* Remaining fields are invalid when conn is NULL: */
int xact_depth; /* 0 = no xact open, 1 = main xact open, 2 =
* one level of subxact open, etc */
bool have_prep_stmt; /* have we prepared any stmts in this xact? */
bool have_error; /* have any subxacts aborted in this xact? */
bool changing_xact_state; /* xact state change in process */
bool parallel_commit; /* do we commit (sub)xacts in parallel? */
bool parallel_abort; /* do we abort (sub)xacts in parallel? */
bool invalidated; /* true if reconnect is pending */
bool keep_connections; /* setting value of keep_connections
* server option */
Oid serverid; /* foreign server OID used to get server name */
uint32 server_hashvalue; /* hash value of foreign server OID */
uint32 mapping_hashvalue; /* hash value of user mapping OID */
PgFdwConnState state; /* extra per-connection state */
} ConnCacheEntry;
/*
* Connection cache (initialized on first use)
*/
static HTAB *ConnectionHash = NULL;
/* for assigning cursor numbers and prepared statement numbers */
static unsigned int cursor_number = 0;
static unsigned int prep_stmt_number = 0;
/* tracks whether any work is needed in callback functions */
static bool xact_got_connection = false;
/*
* Milliseconds to wait to cancel an in-progress query or execute a cleanup
* query; if it takes longer than 30 seconds to do these, we assume the
* connection is dead.
*/
#define CONNECTION_CLEANUP_TIMEOUT 30000
/* Macro for constructing abort command to be sent */
#define CONSTRUCT_ABORT_COMMAND(sql, entry, toplevel) \
do { \
if (toplevel) \
snprintf((sql), sizeof(sql), \
"ABORT TRANSACTION"); \
else \
snprintf((sql), sizeof(sql), \
"ROLLBACK TO SAVEPOINT s%d; RELEASE SAVEPOINT s%d", \
(entry)->xact_depth, (entry)->xact_depth); \
} while(0)
/*
* SQL functions
*/
PG_FUNCTION_INFO_V1(postgres_fdw_get_connections);
PG_FUNCTION_INFO_V1(postgres_fdw_disconnect);
PG_FUNCTION_INFO_V1(postgres_fdw_disconnect_all);
/* prototypes of private functions */
static void make_new_connection(ConnCacheEntry *entry, UserMapping *user);
static PGconn *connect_pg_server(ForeignServer *server, UserMapping *user);
static void disconnect_pg_server(ConnCacheEntry *entry);
static void check_conn_params(const char **keywords, const char **values, UserMapping *user);
static void configure_remote_session(PGconn *conn);
static void do_sql_command_begin(PGconn *conn, const char *sql);
static void do_sql_command_end(PGconn *conn, const char *sql,
bool consume_input);
static void begin_remote_xact(ConnCacheEntry *entry);
static void pgfdw_xact_callback(XactEvent event, void *arg);
static void pgfdw_subxact_callback(SubXactEvent event,
SubTransactionId mySubid,
SubTransactionId parentSubid,
void *arg);
static void pgfdw_inval_callback(Datum arg, int cacheid, uint32 hashvalue);
static void pgfdw_reject_incomplete_xact_state_change(ConnCacheEntry *entry);
static void pgfdw_reset_xact_state(ConnCacheEntry *entry, bool toplevel);
static bool pgfdw_cancel_query(PGconn *conn);
static bool pgfdw_cancel_query_begin(PGconn *conn);
static bool pgfdw_cancel_query_end(PGconn *conn, TimestampTz endtime,
bool consume_input);
static bool pgfdw_exec_cleanup_query(PGconn *conn, const char *query,
bool ignore_errors);
static bool pgfdw_exec_cleanup_query_begin(PGconn *conn, const char *query);
static bool pgfdw_exec_cleanup_query_end(PGconn *conn, const char *query,
TimestampTz endtime,
bool consume_input,
bool ignore_errors);
static bool pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime,
PGresult **result, bool *timed_out);
static void pgfdw_abort_cleanup(ConnCacheEntry *entry, bool toplevel);
static bool pgfdw_abort_cleanup_begin(ConnCacheEntry *entry, bool toplevel,
List **pending_entries,
List **cancel_requested);
static void pgfdw_finish_pre_commit_cleanup(List *pending_entries);
static void pgfdw_finish_pre_subcommit_cleanup(List *pending_entries,
int curlevel);
static void pgfdw_finish_abort_cleanup(List *pending_entries,
List *cancel_requested,
bool toplevel);
static void pgfdw_security_check(const char **keywords, const char **values,
UserMapping *user, PGconn *conn);
static bool UserMappingPasswordRequired(UserMapping *user);
static bool disconnect_cached_connections(Oid serverid);
/*
* Get a PGconn which can be used to execute queries on the remote PostgreSQL
* server with the user's authorization. A new connection is established
* if we don't already have a suitable one, and a transaction is opened at
* the right subtransaction nesting depth if we didn't do that already.
*
* will_prep_stmt must be true if caller intends to create any prepared
* statements. Since those don't go away automatically at transaction end
* (not even on error), we need this flag to cue manual cleanup.
*
* If state is not NULL, *state receives the per-connection state associated
* with the PGconn.
*/
PGconn *
GetConnection(UserMapping *user, bool will_prep_stmt, PgFdwConnState **state)
{
bool found;
bool retry = false;
ConnCacheEntry *entry;
ConnCacheKey key;
MemoryContext ccxt = CurrentMemoryContext;
/* First time through, initialize connection cache hashtable */
if (ConnectionHash == NULL)
{
HASHCTL ctl;
ctl.keysize = sizeof(ConnCacheKey);
ctl.entrysize = sizeof(ConnCacheEntry);
ConnectionHash = hash_create("postgres_fdw connections", 8,
&ctl,
HASH_ELEM | HASH_BLOBS);
/*
* Register some callback functions that manage connection cleanup.
* This should be done just once in each backend.
*/
RegisterXactCallback(pgfdw_xact_callback, NULL);
RegisterSubXactCallback(pgfdw_subxact_callback, NULL);
CacheRegisterSyscacheCallback(FOREIGNSERVEROID,
pgfdw_inval_callback, (Datum) 0);
CacheRegisterSyscacheCallback(USERMAPPINGOID,
pgfdw_inval_callback, (Datum) 0);
}
/* Set flag that we did GetConnection during the current transaction */
xact_got_connection = true;
/* Create hash key for the entry. Assume no pad bytes in key struct */
key = user->umid;
/*
* Find or create cached entry for requested connection.
*/
entry = hash_search(ConnectionHash, &key, HASH_ENTER, &found);
if (!found)
{
/*
* We need only clear "conn" here; remaining fields will be filled
* later when "conn" is set.
*/
entry->conn = NULL;
}
/* Reject further use of connections which failed abort cleanup. */
pgfdw_reject_incomplete_xact_state_change(entry);
/*
* If the connection needs to be remade due to invalidation, disconnect as
* soon as we're out of all transactions.
*/
if (entry->conn != NULL && entry->invalidated && entry->xact_depth == 0)
{
elog(DEBUG3, "closing connection %p for option changes to take effect",
entry->conn);
disconnect_pg_server(entry);
}
/*
* If cache entry doesn't have a connection, we have to establish a new
* connection. (If connect_pg_server throws an error, the cache entry
* will remain in a valid empty state, ie conn == NULL.)
*/
if (entry->conn == NULL)
make_new_connection(entry, user);
/*
* We check the health of the cached connection here when using it. In
* cases where we're out of all transactions, if a broken connection is
* detected, we try to reestablish a new connection later.
*/
PG_TRY();
{
/* Process a pending asynchronous request if any. */
if (entry->state.pendingAreq)
process_pending_request(entry->state.pendingAreq);
/* Start a new transaction or subtransaction if needed. */
begin_remote_xact(entry);
}
PG_CATCH();
{
MemoryContext ecxt = MemoryContextSwitchTo(ccxt);
ErrorData *errdata = CopyErrorData();
/*
* Determine whether to try to reestablish the connection.
*
* After a broken connection is detected in libpq, any error other
* than connection failure (e.g., out-of-memory) can be thrown
* somewhere between return from libpq and the expected ereport() call
* in pgfdw_report_error(). In this case, since PQstatus() indicates
* CONNECTION_BAD, checking only PQstatus() causes the false detection
* of connection failure. To avoid this, we also verify that the
* error's sqlstate is ERRCODE_CONNECTION_FAILURE. Note that also
* checking only the sqlstate can cause another false detection
* because pgfdw_report_error() may report ERRCODE_CONNECTION_FAILURE
* for any libpq-originated error condition.
*/
if (errdata->sqlerrcode != ERRCODE_CONNECTION_FAILURE ||
PQstatus(entry->conn) != CONNECTION_BAD ||
entry->xact_depth > 0)
{
MemoryContextSwitchTo(ecxt);
PG_RE_THROW();
}
/* Clean up the error state */
FlushErrorState();
FreeErrorData(errdata);
errdata = NULL;
retry = true;
}
PG_END_TRY();
/*
* If a broken connection is detected, disconnect it, reestablish a new
* connection and retry a new remote transaction. If connection failure is
* reported again, we give up getting a connection.
*/
if (retry)
{
Assert(entry->xact_depth == 0);
ereport(DEBUG3,
(errmsg_internal("could not start remote transaction on connection %p",
entry->conn)),
errdetail_internal("%s", pchomp(PQerrorMessage(entry->conn))));
elog(DEBUG3, "closing connection %p to reestablish a new one",
entry->conn);
disconnect_pg_server(entry);
make_new_connection(entry, user);
begin_remote_xact(entry);
}
/* Remember if caller will prepare statements */
entry->have_prep_stmt |= will_prep_stmt;
/* If caller needs access to the per-connection state, return it. */
if (state)
*state = &entry->state;
return entry->conn;
}
/*
* Reset all transient state fields in the cached connection entry and
* establish new connection to the remote server.
*/
static void
make_new_connection(ConnCacheEntry *entry, UserMapping *user)
{
ForeignServer *server = GetForeignServer(user->serverid);
ListCell *lc;
Assert(entry->conn == NULL);
/* Reset all transient state fields, to be sure all are clean */
entry->xact_depth = 0;
entry->have_prep_stmt = false;
entry->have_error = false;
entry->changing_xact_state = false;
entry->invalidated = false;
entry->serverid = server->serverid;
entry->server_hashvalue =
GetSysCacheHashValue1(FOREIGNSERVEROID,
ObjectIdGetDatum(server->serverid));
entry->mapping_hashvalue =
GetSysCacheHashValue1(USERMAPPINGOID,
ObjectIdGetDatum(user->umid));
memset(&entry->state, 0, sizeof(entry->state));
/*
* Determine whether to keep the connection that we're about to make here
* open even after the transaction using it ends, so that the subsequent
* transactions can re-use it.
*
* By default, all the connections to any foreign servers are kept open.
*
* Also determine whether to commit/abort (sub)transactions opened on the
* remote server in parallel at (sub)transaction end, which is disabled by
* default.
*
* Note: it's enough to determine these only when making a new connection
* because if these settings for it are changed, it will be closed and
* re-made later.
*/
entry->keep_connections = true;
entry->parallel_commit = false;
entry->parallel_abort = false;
foreach(lc, server->options)
{
DefElem *def = (DefElem *) lfirst(lc);
if (strcmp(def->defname, "keep_connections") == 0)
entry->keep_connections = defGetBoolean(def);
else if (strcmp(def->defname, "parallel_commit") == 0)
entry->parallel_commit = defGetBoolean(def);
else if (strcmp(def->defname, "parallel_abort") == 0)
entry->parallel_abort = defGetBoolean(def);
}
/* Now try to make the connection */
entry->conn = connect_pg_server(server, user);
elog(DEBUG3, "new postgres_fdw connection %p for server \"%s\" (user mapping oid %u, userid %u)",
entry->conn, server->servername, user->umid, user->userid);
}
/*
* Check that non-superuser has used password or delegated credentials
* to establish connection; otherwise, he's piggybacking on the
* postgres server's user identity. See also dblink_security_check()
* in contrib/dblink and check_conn_params.
*/
static void
pgfdw_security_check(const char **keywords, const char **values, UserMapping *user, PGconn *conn)
{
/* Superusers bypass the check */
if (superuser_arg(user->userid))
return;
#ifdef ENABLE_GSS
/* Connected via GSSAPI with delegated credentials- all good. */
if (PQconnectionUsedGSSAPI(conn) && be_gssapi_get_delegation(MyProcPort))
return;
#endif
/* Ok if superuser set PW required false. */
if (!UserMappingPasswordRequired(user))
return;
/* Connected via PW, with PW required true, and provided non-empty PW. */
if (PQconnectionUsedPassword(conn))
{
/* ok if params contain a non-empty password */
for (int i = 0; keywords[i] != NULL; i++)
{
if (strcmp(keywords[i], "password") == 0 && values[i][0] != '\0')
return;
}
}
ereport(ERROR,
(errcode(ERRCODE_S_R_E_PROHIBITED_SQL_STATEMENT_ATTEMPTED),
errmsg("password or GSSAPI delegated credentials required"),
errdetail("Non-superuser cannot connect if the server does not request a password or use GSSAPI with delegated credentials."),
errhint("Target server's authentication method must be changed or password_required=false set in the user mapping attributes.")));
}
/*
* Connect to remote server using specified server and user mapping properties.
*/
static PGconn *
connect_pg_server(ForeignServer *server, UserMapping *user)
{
PGconn *volatile conn = NULL;
/*
* Use PG_TRY block to ensure closing connection on error.
*/
PG_TRY();
{
const char **keywords;
const char **values;
char *appname = NULL;
int n;
/*
* Construct connection params from generic options of ForeignServer
* and UserMapping. (Some of them might not be libpq options, in
* which case we'll just waste a few array slots.) Add 4 extra slots
* for application_name, fallback_application_name, client_encoding,
* end marker.
*/
n = list_length(server->options) + list_length(user->options) + 4;
keywords = (const char **) palloc(n * sizeof(char *));
values = (const char **) palloc(n * sizeof(char *));
n = 0;
n += ExtractConnectionOptions(server->options,
keywords + n, values + n);
n += ExtractConnectionOptions(user->options,
keywords + n, values + n);
/*
* Use pgfdw_application_name as application_name if set.
*
* PQconnectdbParams() processes the parameter arrays from start to
* end. If any key word is repeated, the last value is used. Therefore
* note that pgfdw_application_name must be added to the arrays after
* options of ForeignServer are, so that it can override
* application_name set in ForeignServer.
*/
if (pgfdw_application_name && *pgfdw_application_name != '\0')
{
keywords[n] = "application_name";
values[n] = pgfdw_application_name;
n++;
}
/*
* Search the parameter arrays to find application_name setting, and
* replace escape sequences in it with status information if found.
* The arrays are searched backwards because the last value is used if
* application_name is repeatedly set.
*/
for (int i = n - 1; i >= 0; i--)
{
if (strcmp(keywords[i], "application_name") == 0 &&
*(values[i]) != '\0')
{
/*
* Use this application_name setting if it's not empty string
* even after any escape sequences in it are replaced.
*/
appname = process_pgfdw_appname(values[i]);
if (appname[0] != '\0')
{
values[i] = appname;
break;
}
/*
* This empty application_name is not used, so we set
* values[i] to NULL and keep searching the array to find the
* next one.
*/
values[i] = NULL;
pfree(appname);
appname = NULL;
}
}
/* Use "postgres_fdw" as fallback_application_name */
keywords[n] = "fallback_application_name";
values[n] = "postgres_fdw";
n++;
/* Set client_encoding so that libpq can convert encoding properly. */
keywords[n] = "client_encoding";
values[n] = GetDatabaseEncodingName();
n++;
keywords[n] = values[n] = NULL;
/* verify the set of connection parameters */
check_conn_params(keywords, values, user);
/* OK to make connection */
conn = libpqsrv_connect_params(keywords, values,
false, /* expand_dbname */
WAIT_EVENT_EXTENSION);
if (!conn || PQstatus(conn) != CONNECTION_OK)
ereport(ERROR,
(errcode(ERRCODE_SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION),
errmsg("could not connect to server \"%s\"",
server->servername),
errdetail_internal("%s", pchomp(PQerrorMessage(conn)))));
/* Perform post-connection security checks */
pgfdw_security_check(keywords, values, user, conn);
/* Prepare new session for use */
configure_remote_session(conn);
if (appname != NULL)
pfree(appname);
pfree(keywords);
pfree(values);
}
PG_CATCH();
{
libpqsrv_disconnect(conn);
PG_RE_THROW();
}
PG_END_TRY();
return conn;
}
/*
* Disconnect any open connection for a connection cache entry.
*/
static void
disconnect_pg_server(ConnCacheEntry *entry)
{
if (entry->conn != NULL)
{
libpqsrv_disconnect(entry->conn);
entry->conn = NULL;
}
}
/*
* Return true if the password_required is defined and false for this user
* mapping, otherwise false. The mapping has been pre-validated.
*/
static bool
UserMappingPasswordRequired(UserMapping *user)
{
ListCell *cell;
foreach(cell, user->options)
{
DefElem *def = (DefElem *) lfirst(cell);
if (strcmp(def->defname, "password_required") == 0)
return defGetBoolean(def);
}
return true;
}
/*
* For non-superusers, insist that the connstr specify a password or that the
* user provided their own GSSAPI delegated credentials. This
* prevents a password from being picked up from .pgpass, a service file, the
* environment, etc. We don't want the postgres user's passwords,
* certificates, etc to be accessible to non-superusers. (See also
* dblink_connstr_check in contrib/dblink.)
*/
static void
check_conn_params(const char **keywords, const char **values, UserMapping *user)
{
int i;
/* no check required if superuser */
if (superuser_arg(user->userid))
return;
#ifdef ENABLE_GSS
/* ok if the user provided their own delegated credentials */
if (be_gssapi_get_delegation(MyProcPort))
return;
#endif
/* ok if params contain a non-empty password */
for (i = 0; keywords[i] != NULL; i++)
{
if (strcmp(keywords[i], "password") == 0 && values[i][0] != '\0')
return;
}
/* ok if the superuser explicitly said so at user mapping creation time */
if (!UserMappingPasswordRequired(user))
return;
ereport(ERROR,
(errcode(ERRCODE_S_R_E_PROHIBITED_SQL_STATEMENT_ATTEMPTED),
errmsg("password or GSSAPI delegated credentials required"),
errdetail("Non-superusers must delegate GSSAPI credentials or provide a password in the user mapping.")));
}
/*
* Issue SET commands to make sure remote session is configured properly.
*
* We do this just once at connection, assuming nothing will change the
* values later. Since we'll never send volatile function calls to the
* remote, there shouldn't be any way to break this assumption from our end.
* It's possible to think of ways to break it at the remote end, eg making
* a foreign table point to a view that includes a set_config call ---
* but once you admit the possibility of a malicious view definition,
* there are any number of ways to break things.
*/
static void
configure_remote_session(PGconn *conn)
{
int remoteversion = PQserverVersion(conn);
/* Force the search path to contain only pg_catalog (see deparse.c) */
do_sql_command(conn, "SET search_path = pg_catalog");
/*
* Set remote timezone; this is basically just cosmetic, since all
* transmitted and returned timestamptzs should specify a zone explicitly
* anyway. However it makes the regression test outputs more predictable.
*
* We don't risk setting remote zone equal to ours, since the remote
* server might use a different timezone database. Instead, use UTC
* (quoted, because very old servers are picky about case).
*/
do_sql_command(conn, "SET timezone = 'UTC'");
/*
* Set values needed to ensure unambiguous data output from remote. (This
* logic should match what pg_dump does. See also set_transmission_modes
* in postgres_fdw.c.)
*/
do_sql_command(conn, "SET datestyle = ISO");
if (remoteversion >= 80400)
do_sql_command(conn, "SET intervalstyle = postgres");
if (remoteversion >= 90000)
do_sql_command(conn, "SET extra_float_digits = 3");
else
do_sql_command(conn, "SET extra_float_digits = 2");
}
/*
* Convenience subroutine to issue a non-data-returning SQL command to remote
*/
void
do_sql_command(PGconn *conn, const char *sql)
{
do_sql_command_begin(conn, sql);
do_sql_command_end(conn, sql, false);
}
static void
do_sql_command_begin(PGconn *conn, const char *sql)
{
if (!PQsendQuery(conn, sql))
pgfdw_report_error(ERROR, NULL, conn, false, sql);
}
static void
do_sql_command_end(PGconn *conn, const char *sql, bool consume_input)
{
PGresult *res;
/*
* If requested, consume whatever data is available from the socket. (Note
* that if all data is available, this allows pgfdw_get_result to call
* PQgetResult without forcing the overhead of WaitLatchOrSocket, which
* would be large compared to the overhead of PQconsumeInput.)
*/
if (consume_input && !PQconsumeInput(conn))
pgfdw_report_error(ERROR, NULL, conn, false, sql);
res = pgfdw_get_result(conn, sql);
if (PQresultStatus(res) != PGRES_COMMAND_OK)
pgfdw_report_error(ERROR, res, conn, true, sql);
PQclear(res);
}
/*
* Start remote transaction or subtransaction, if needed.
*
* Note that we always use at least REPEATABLE READ in the remote session.
* This is so that, if a query initiates multiple scans of the same or
* different foreign tables, we will get snapshot-consistent results from
* those scans. A disadvantage is that we can't provide sane emulation of
* READ COMMITTED behavior --- it would be nice if we had some other way to
* control which remote queries share a snapshot.
*/
static void
begin_remote_xact(ConnCacheEntry *entry)
{
int curlevel = GetCurrentTransactionNestLevel();
/* Start main transaction if we haven't yet */
if (entry->xact_depth <= 0)
{
const char *sql;
elog(DEBUG3, "starting remote transaction on connection %p",
entry->conn);
if (IsolationIsSerializable())
sql = "START TRANSACTION ISOLATION LEVEL SERIALIZABLE";
else
sql = "START TRANSACTION ISOLATION LEVEL REPEATABLE READ";
entry->changing_xact_state = true;
do_sql_command(entry->conn, sql);
entry->xact_depth = 1;
entry->changing_xact_state = false;
}
/*
* If we're in a subtransaction, stack up savepoints to match our level.
* This ensures we can rollback just the desired effects when a
* subtransaction aborts.
*/
while (entry->xact_depth < curlevel)
{
char sql[64];
snprintf(sql, sizeof(sql), "SAVEPOINT s%d", entry->xact_depth + 1);
entry->changing_xact_state = true;
do_sql_command(entry->conn, sql);
entry->xact_depth++;
entry->changing_xact_state = false;
}
}
/*
* Release connection reference count created by calling GetConnection.
*/
void
ReleaseConnection(PGconn *conn)
{
/*
* Currently, we don't actually track connection references because all
* cleanup is managed on a transaction or subtransaction basis instead. So
* there's nothing to do here.
*/
}
/*
* Assign a "unique" number for a cursor.
*
* These really only need to be unique per connection within a transaction.
* For the moment we ignore the per-connection point and assign them across
* all connections in the transaction, but we ask for the connection to be
* supplied in case we want to refine that.
*
* Note that even if wraparound happens in a very long transaction, actual
* collisions are highly improbable; just be sure to use %u not %d to print.
*/
unsigned int
GetCursorNumber(PGconn *conn)
{
return ++cursor_number;
}
/*
* Assign a "unique" number for a prepared statement.
*
* This works much like GetCursorNumber, except that we never reset the counter
* within a session. That's because we can't be 100% sure we've gotten rid
* of all prepared statements on all connections, and it's not really worth
* increasing the risk of prepared-statement name collisions by resetting.
*/
unsigned int
GetPrepStmtNumber(PGconn *conn)
{
return ++prep_stmt_number;
}
/*
* Submit a query and wait for the result.
*
* This function is interruptible by signals.
*
* Caller is responsible for the error handling on the result.
*/
PGresult *
pgfdw_exec_query(PGconn *conn, const char *query, PgFdwConnState *state)
{
/* First, process a pending asynchronous request, if any. */
if (state && state->pendingAreq)
process_pending_request(state->pendingAreq);
/*
* Submit a query. Since we don't use non-blocking mode, this also can
* block. But its risk is relatively small, so we ignore that for now.
*/
if (!PQsendQuery(conn, query))
pgfdw_report_error(ERROR, NULL, conn, false, query);
/* Wait for the result. */
return pgfdw_get_result(conn, query);
}
/*
* Wait for the result from a prior asynchronous execution function call.
*
* This function offers quick responsiveness by checking for any interruptions.
*
* This function emulates PQexec()'s behavior of returning the last result
* when there are many.
*
* Caller is responsible for the error handling on the result.
*/
PGresult *
pgfdw_get_result(PGconn *conn, const char *query)
{
PGresult *volatile last_res = NULL;
/* In what follows, do not leak any PGresults on an error. */
PG_TRY();
{
for (;;)
{
PGresult *res;
while (PQisBusy(conn))
{
int wc;
/* Sleep until there's something to do */
wc = WaitLatchOrSocket(MyLatch,
WL_LATCH_SET | WL_SOCKET_READABLE |
WL_EXIT_ON_PM_DEATH,
PQsocket(conn),
-1L, WAIT_EVENT_EXTENSION);
ResetLatch(MyLatch);
CHECK_FOR_INTERRUPTS();
/* Data available in socket? */
if (wc & WL_SOCKET_READABLE)
{
if (!PQconsumeInput(conn))
pgfdw_report_error(ERROR, NULL, conn, false, query);
}
}
res = PQgetResult(conn);
if (res == NULL)
break; /* query is complete */
PQclear(last_res);
last_res = res;
}
}
PG_CATCH();
{
PQclear(last_res);
PG_RE_THROW();
}
PG_END_TRY();
return last_res;
}
/*
* Report an error we got from the remote server.
*
* elevel: error level to use (typically ERROR, but might be less)
* res: PGresult containing the error
* conn: connection we did the query on
* clear: if true, PQclear the result (otherwise caller will handle it)
* sql: NULL, or text of remote command we tried to execute
*
* Note: callers that choose not to throw ERROR for a remote error are
* responsible for making sure that the associated ConnCacheEntry gets
* marked with have_error = true.
*/
void
pgfdw_report_error(int elevel, PGresult *res, PGconn *conn,
bool clear, const char *sql)
{
/* If requested, PGresult must be released before leaving this function. */
PG_TRY();
{
char *diag_sqlstate = PQresultErrorField(res, PG_DIAG_SQLSTATE);
char *message_primary = PQresultErrorField(res, PG_DIAG_MESSAGE_PRIMARY);
char *message_detail = PQresultErrorField(res, PG_DIAG_MESSAGE_DETAIL);
char *message_hint = PQresultErrorField(res, PG_DIAG_MESSAGE_HINT);
char *message_context = PQresultErrorField(res, PG_DIAG_CONTEXT);
int sqlstate;
if (diag_sqlstate)
sqlstate = MAKE_SQLSTATE(diag_sqlstate[0],
diag_sqlstate[1],
diag_sqlstate[2],
diag_sqlstate[3],
diag_sqlstate[4]);
else
sqlstate = ERRCODE_CONNECTION_FAILURE;
/*
* If we don't get a message from the PGresult, try the PGconn. This
* is needed because for connection-level failures, PQexec may just
* return NULL, not a PGresult at all.
*/
if (message_primary == NULL)
message_primary = pchomp(PQerrorMessage(conn));
ereport(elevel,
(errcode(sqlstate),
(message_primary != NULL && message_primary[0] != '\0') ?
errmsg_internal("%s", message_primary) :
errmsg("could not obtain message string for remote error"),
message_detail ? errdetail_internal("%s", message_detail) : 0,
message_hint ? errhint("%s", message_hint) : 0,
message_context ? errcontext("%s", message_context) : 0,
sql ? errcontext("remote SQL command: %s", sql) : 0));
}
PG_FINALLY();
{
if (clear)
PQclear(res);
}
PG_END_TRY();
}
/*
* pgfdw_xact_callback --- cleanup at main-transaction end.
*
* This runs just late enough that it must not enter user-defined code
* locally. (Entering such code on the remote side is fine. Its remote
* COMMIT TRANSACTION may run deferred triggers.)
*/
static void
pgfdw_xact_callback(XactEvent event, void *arg)
{
HASH_SEQ_STATUS scan;
ConnCacheEntry *entry;
List *pending_entries = NIL;
List *cancel_requested = NIL;
/* Quick exit if no connections were touched in this transaction. */
if (!xact_got_connection)
return;
/*
* Scan all connection cache entries to find open remote transactions, and
* close them.
*/
hash_seq_init(&scan, ConnectionHash);
while ((entry = (ConnCacheEntry *) hash_seq_search(&scan)))
{
PGresult *res;
/* Ignore cache entry if no open connection right now */
if (entry->conn == NULL)
continue;
/* If it has an open remote transaction, try to close it */
if (entry->xact_depth > 0)
{
elog(DEBUG3, "closing remote transaction on connection %p",
entry->conn);
switch (event)
{
case XACT_EVENT_PARALLEL_PRE_COMMIT:
case XACT_EVENT_PRE_COMMIT:
/*
* If abort cleanup previously failed for this connection,
* we can't issue any more commands against it.
*/
pgfdw_reject_incomplete_xact_state_change(entry);
/* Commit all remote transactions during pre-commit */
entry->changing_xact_state = true;
if (entry->parallel_commit)
{
do_sql_command_begin(entry->conn, "COMMIT TRANSACTION");
pending_entries = lappend(pending_entries, entry);
continue;
}
do_sql_command(entry->conn, "COMMIT TRANSACTION");
entry->changing_xact_state = false;
/*
* If there were any errors in subtransactions, and we
* made prepared statements, do a DEALLOCATE ALL to make
* sure we get rid of all prepared statements. This is
* annoying and not terribly bulletproof, but it's
* probably not worth trying harder.
*
* DEALLOCATE ALL only exists in 8.3 and later, so this
* constrains how old a server postgres_fdw can
* communicate with. We intentionally ignore errors in
* the DEALLOCATE, so that we can hobble along to some
* extent with older servers (leaking prepared statements
* as we go; but we don't really support update operations
* pre-8.3 anyway).
*/
if (entry->have_prep_stmt && entry->have_error)
{
res = PQexec(entry->conn, "DEALLOCATE ALL");
PQclear(res);
}
entry->have_prep_stmt = false;
entry->have_error = false;
break;
case XACT_EVENT_PRE_PREPARE:
/*
* We disallow any remote transactions, since it's not
* very reasonable to hold them open until the prepared
* transaction is committed. For the moment, throw error
* unconditionally; later we might allow read-only cases.
* Note that the error will cause us to come right back
* here with event == XACT_EVENT_ABORT, so we'll clean up
* the connection state at that point.
*/
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot PREPARE a transaction that has operated on postgres_fdw foreign tables")));
break;
case XACT_EVENT_PARALLEL_COMMIT:
case XACT_EVENT_COMMIT:
case XACT_EVENT_PREPARE:
/* Pre-commit should have closed the open transaction */
elog(ERROR, "missed cleaning up connection during pre-commit");
break;
case XACT_EVENT_PARALLEL_ABORT:
case XACT_EVENT_ABORT:
/* Rollback all remote transactions during abort */
if (entry->parallel_abort)
{
if (pgfdw_abort_cleanup_begin(entry, true,
&pending_entries,
&cancel_requested))
continue;
}
else
pgfdw_abort_cleanup(entry, true);
break;
}
}
/* Reset state to show we're out of a transaction */
pgfdw_reset_xact_state(entry, true);
}
/* If there are any pending connections, finish cleaning them up */
if (pending_entries || cancel_requested)
{
if (event == XACT_EVENT_PARALLEL_PRE_COMMIT ||
event == XACT_EVENT_PRE_COMMIT)
{
Assert(cancel_requested == NIL);
pgfdw_finish_pre_commit_cleanup(pending_entries);
}
else
{
Assert(event == XACT_EVENT_PARALLEL_ABORT ||
event == XACT_EVENT_ABORT);
pgfdw_finish_abort_cleanup(pending_entries, cancel_requested,
true);
}
}
/*
* Regardless of the event type, we can now mark ourselves as out of the
* transaction. (Note: if we are here during PRE_COMMIT or PRE_PREPARE,
* this saves a useless scan of the hashtable during COMMIT or PREPARE.)
*/
xact_got_connection = false;
/* Also reset cursor numbering for next transaction */
cursor_number = 0;
}
/*
* pgfdw_subxact_callback --- cleanup at subtransaction end.
*/
static void
pgfdw_subxact_callback(SubXactEvent event, SubTransactionId mySubid,
SubTransactionId parentSubid, void *arg)
{
HASH_SEQ_STATUS scan;
ConnCacheEntry *entry;
int curlevel;
List *pending_entries = NIL;
List *cancel_requested = NIL;
/* Nothing to do at subxact start, nor after commit. */
if (!(event == SUBXACT_EVENT_PRE_COMMIT_SUB ||
event == SUBXACT_EVENT_ABORT_SUB))
return;
/* Quick exit if no connections were touched in this transaction. */
if (!xact_got_connection)
return;
/*
* Scan all connection cache entries to find open remote subtransactions
* of the current level, and close them.
*/
curlevel = GetCurrentTransactionNestLevel();
hash_seq_init(&scan, ConnectionHash);
while ((entry = (ConnCacheEntry *) hash_seq_search(&scan)))
{
char sql[100];
/*
* We only care about connections with open remote subtransactions of
* the current level.
*/
if (entry->conn == NULL || entry->xact_depth < curlevel)
continue;
if (entry->xact_depth > curlevel)
elog(ERROR, "missed cleaning up remote subtransaction at level %d",
entry->xact_depth);
if (event == SUBXACT_EVENT_PRE_COMMIT_SUB)
{
/*
* If abort cleanup previously failed for this connection, we
* can't issue any more commands against it.
*/
pgfdw_reject_incomplete_xact_state_change(entry);
/* Commit all remote subtransactions during pre-commit */
snprintf(sql, sizeof(sql), "RELEASE SAVEPOINT s%d", curlevel);
entry->changing_xact_state = true;
if (entry->parallel_commit)
{
do_sql_command_begin(entry->conn, sql);
pending_entries = lappend(pending_entries, entry);
continue;
}
do_sql_command(entry->conn, sql);
entry->changing_xact_state = false;
}
else
{
/* Rollback all remote subtransactions during abort */
if (entry->parallel_abort)
{
if (pgfdw_abort_cleanup_begin(entry, false,
&pending_entries,
&cancel_requested))
continue;
}
else
pgfdw_abort_cleanup(entry, false);
}
/* OK, we're outta that level of subtransaction */
pgfdw_reset_xact_state(entry, false);
}
/* If there are any pending connections, finish cleaning them up */
if (pending_entries || cancel_requested)
{
if (event == SUBXACT_EVENT_PRE_COMMIT_SUB)
{
Assert(cancel_requested == NIL);
pgfdw_finish_pre_subcommit_cleanup(pending_entries, curlevel);
}
else
{
Assert(event == SUBXACT_EVENT_ABORT_SUB);
pgfdw_finish_abort_cleanup(pending_entries, cancel_requested,
false);
}
}
}
/*
* Connection invalidation callback function
*
* After a change to a pg_foreign_server or pg_user_mapping catalog entry,
* close connections depending on that entry immediately if current transaction
* has not used those connections yet. Otherwise, mark those connections as
* invalid and then make pgfdw_xact_callback() close them at the end of current
* transaction, since they cannot be closed in the midst of the transaction
* using them. Closed connections will be remade at the next opportunity if
* necessary.
*
* Although most cache invalidation callbacks blow away all the related stuff
* regardless of the given hashvalue, connections are expensive enough that
* it's worth trying to avoid that.
*
* NB: We could avoid unnecessary disconnection more strictly by examining
* individual option values, but it seems too much effort for the gain.
*/
static void
pgfdw_inval_callback(Datum arg, int cacheid, uint32 hashvalue)
{
HASH_SEQ_STATUS scan;
ConnCacheEntry *entry;
Assert(cacheid == FOREIGNSERVEROID || cacheid == USERMAPPINGOID);
/* ConnectionHash must exist already, if we're registered */
hash_seq_init(&scan, ConnectionHash);
while ((entry = (ConnCacheEntry *) hash_seq_search(&scan)))
{
/* Ignore invalid entries */
if (entry->conn == NULL)
continue;
/* hashvalue == 0 means a cache reset, must clear all state */
if (hashvalue == 0 ||
(cacheid == FOREIGNSERVEROID &&
entry->server_hashvalue == hashvalue) ||
(cacheid == USERMAPPINGOID &&
entry->mapping_hashvalue == hashvalue))
{
/*
* Close the connection immediately if it's not used yet in this
* transaction. Otherwise mark it as invalid so that
* pgfdw_xact_callback() can close it at the end of this
* transaction.
*/
if (entry->xact_depth == 0)
{
elog(DEBUG3, "discarding connection %p", entry->conn);
disconnect_pg_server(entry);
}
else
entry->invalidated = true;
}
}
}
/*
* Raise an error if the given connection cache entry is marked as being
* in the middle of an xact state change. This should be called at which no
* such change is expected to be in progress; if one is found to be in
* progress, it means that we aborted in the middle of a previous state change
* and now don't know what the remote transaction state actually is.
* Such connections can't safely be further used. Re-establishing the
* connection would change the snapshot and roll back any writes already
* performed, so that's not an option, either. Thus, we must abort.
*/
static void
pgfdw_reject_incomplete_xact_state_change(ConnCacheEntry *entry)
{
ForeignServer *server;
/* nothing to do for inactive entries and entries of sane state */
if (entry->conn == NULL || !entry->changing_xact_state)
return;
/* make sure this entry is inactive */
disconnect_pg_server(entry);
/* find server name to be shown in the message below */
server = GetForeignServer(entry->serverid);
ereport(ERROR,
(errcode(ERRCODE_CONNECTION_EXCEPTION),
errmsg("connection to server \"%s\" was lost",
server->servername)));
}
/*
* Reset state to show we're out of a (sub)transaction.
*/
static void
pgfdw_reset_xact_state(ConnCacheEntry *entry, bool toplevel)
{
if (toplevel)
{
/* Reset state to show we're out of a transaction */
entry->xact_depth = 0;
/*
* If the connection isn't in a good idle state, it is marked as
* invalid or keep_connections option of its server is disabled, then
* discard it to recover. Next GetConnection will open a new
* connection.
*/
if (PQstatus(entry->conn) != CONNECTION_OK ||
PQtransactionStatus(entry->conn) != PQTRANS_IDLE ||
entry->changing_xact_state ||
entry->invalidated ||
!entry->keep_connections)
{
elog(DEBUG3, "discarding connection %p", entry->conn);
disconnect_pg_server(entry);
}
}
else
{
/* Reset state to show we're out of a subtransaction */
entry->xact_depth--;
}
}
/*
* Cancel the currently-in-progress query (whose query text we do not have)
* and ignore the result. Returns true if we successfully cancel the query
* and discard any pending result, and false if not.
*
* It's not a huge problem if we throw an ERROR here, but if we get into error
* recursion trouble, we'll end up slamming the connection shut, which will
* necessitate failing the entire toplevel transaction even if subtransactions
* were used. Try to use WARNING where we can.
*
* XXX: if the query was one sent by fetch_more_data_begin(), we could get the
* query text from the pendingAreq saved in the per-connection state, then
* report the query using it.
*/
static bool
pgfdw_cancel_query(PGconn *conn)
{
TimestampTz endtime;
/*
* If it takes too long to cancel the query and discard the result, assume
* the connection is dead.
*/
endtime = TimestampTzPlusMilliseconds(GetCurrentTimestamp(),
CONNECTION_CLEANUP_TIMEOUT);
if (!pgfdw_cancel_query_begin(conn))
return false;
return pgfdw_cancel_query_end(conn, endtime, false);
}
static bool
pgfdw_cancel_query_begin(PGconn *conn)
{
PGcancel *cancel;
char errbuf[256];
/*
* Issue cancel request. Unfortunately, there's no good way to limit the
* amount of time that we might block inside PQgetCancel().
*/
if ((cancel = PQgetCancel(conn)))
{
if (!PQcancel(cancel, errbuf, sizeof(errbuf)))
{
ereport(WARNING,
(errcode(ERRCODE_CONNECTION_FAILURE),
errmsg("could not send cancel request: %s",
errbuf)));
PQfreeCancel(cancel);
return false;
}
PQfreeCancel(cancel);
}
return true;
}
static bool
pgfdw_cancel_query_end(PGconn *conn, TimestampTz endtime, bool consume_input)
{
PGresult *result = NULL;
bool timed_out;
/*
* If requested, consume whatever data is available from the socket. (Note
* that if all data is available, this allows pgfdw_get_cleanup_result to
* call PQgetResult without forcing the overhead of WaitLatchOrSocket,
* which would be large compared to the overhead of PQconsumeInput.)
*/
if (consume_input && !PQconsumeInput(conn))
{
ereport(WARNING,
(errcode(ERRCODE_CONNECTION_FAILURE),
errmsg("could not get result of cancel request: %s",
pchomp(PQerrorMessage(conn)))));
return false;
}
/* Get and discard the result of the query. */
if (pgfdw_get_cleanup_result(conn, endtime, &result, &timed_out))
{
if (timed_out)
ereport(WARNING,
(errmsg("could not get result of cancel request due to timeout")));
else
ereport(WARNING,
(errcode(ERRCODE_CONNECTION_FAILURE),
errmsg("could not get result of cancel request: %s",
pchomp(PQerrorMessage(conn)))));
return false;
}
PQclear(result);
return true;
}
/*
* Submit a query during (sub)abort cleanup and wait up to 30 seconds for the
* result. If the query is executed without error, the return value is true.
* If the query is executed successfully but returns an error, the return
* value is true if and only if ignore_errors is set. If the query can't be
* sent or times out, the return value is false.
*
* It's not a huge problem if we throw an ERROR here, but if we get into error
* recursion trouble, we'll end up slamming the connection shut, which will
* necessitate failing the entire toplevel transaction even if subtransactions
* were used. Try to use WARNING where we can.
*/
static bool
pgfdw_exec_cleanup_query(PGconn *conn, const char *query, bool ignore_errors)
{
TimestampTz endtime;
/*
* If it takes too long to execute a cleanup query, assume the connection
* is dead. It's fairly likely that this is why we aborted in the first
* place (e.g. statement timeout, user cancel), so the timeout shouldn't
* be too long.
*/
endtime = TimestampTzPlusMilliseconds(GetCurrentTimestamp(),
CONNECTION_CLEANUP_TIMEOUT);
if (!pgfdw_exec_cleanup_query_begin(conn, query))
return false;
return pgfdw_exec_cleanup_query_end(conn, query, endtime,
false, ignore_errors);
}
static bool
pgfdw_exec_cleanup_query_begin(PGconn *conn, const char *query)
{
/*
* Submit a query. Since we don't use non-blocking mode, this also can
* block. But its risk is relatively small, so we ignore that for now.
*/
if (!PQsendQuery(conn, query))
{
pgfdw_report_error(WARNING, NULL, conn, false, query);
return false;
}
return true;
}
static bool
pgfdw_exec_cleanup_query_end(PGconn *conn, const char *query,
TimestampTz endtime, bool consume_input,
bool ignore_errors)
{
PGresult *result = NULL;
bool timed_out;
/*
* If requested, consume whatever data is available from the socket. (Note
* that if all data is available, this allows pgfdw_get_cleanup_result to
* call PQgetResult without forcing the overhead of WaitLatchOrSocket,
* which would be large compared to the overhead of PQconsumeInput.)
*/
if (consume_input && !PQconsumeInput(conn))
{
pgfdw_report_error(WARNING, NULL, conn, false, query);
return false;
}
/* Get the result of the query. */
if (pgfdw_get_cleanup_result(conn, endtime, &result, &timed_out))
{
if (timed_out)
ereport(WARNING,
(errmsg("could not get query result due to timeout"),
query ? errcontext("remote SQL command: %s", query) : 0));
else
pgfdw_report_error(WARNING, NULL, conn, false, query);
return false;
}
/* Issue a warning if not successful. */
if (PQresultStatus(result) != PGRES_COMMAND_OK)
{
pgfdw_report_error(WARNING, result, conn, true, query);
return ignore_errors;
}
PQclear(result);
return true;
}
/*
* Get, during abort cleanup, the result of a query that is in progress. This
* might be a query that is being interrupted by transaction abort, or it might
* be a query that was initiated as part of transaction abort to get the remote
* side back to the appropriate state.
*
* endtime is the time at which we should give up and assume the remote
* side is dead. Returns true if the timeout expired or connection trouble
* occurred, false otherwise. Sets *result except in case of a timeout.
* Sets timed_out to true only when the timeout expired.
*/
static bool
pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime, PGresult **result,
bool *timed_out)
{
volatile bool failed = false;
PGresult *volatile last_res = NULL;
*timed_out = false;
/* In what follows, do not leak any PGresults on an error. */
PG_TRY();
{
for (;;)
{
PGresult *res;
while (PQisBusy(conn))
{
int wc;
TimestampTz now = GetCurrentTimestamp();
long cur_timeout;
/* If timeout has expired, give up, else get sleep time. */
cur_timeout = TimestampDifferenceMilliseconds(now, endtime);
if (cur_timeout <= 0)
{
*timed_out = true;
failed = true;
goto exit;
}
/* Sleep until there's something to do */
wc = WaitLatchOrSocket(MyLatch,
WL_LATCH_SET | WL_SOCKET_READABLE |
WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
PQsocket(conn),
cur_timeout, WAIT_EVENT_EXTENSION);
ResetLatch(MyLatch);
CHECK_FOR_INTERRUPTS();
/* Data available in socket? */
if (wc & WL_SOCKET_READABLE)
{
if (!PQconsumeInput(conn))
{
/* connection trouble */
failed = true;
goto exit;
}
}
}
res = PQgetResult(conn);
if (res == NULL)
break; /* query is complete */
PQclear(last_res);
last_res = res;
}
exit: ;
}
PG_CATCH();
{
PQclear(last_res);
PG_RE_THROW();
}
PG_END_TRY();
if (failed)
PQclear(last_res);
else
*result = last_res;
return failed;
}
/*
* Abort remote transaction or subtransaction.
*
* "toplevel" should be set to true if toplevel (main) transaction is
* rollbacked, false otherwise.
*
* Set entry->changing_xact_state to false on success, true on failure.
*/
static void
pgfdw_abort_cleanup(ConnCacheEntry *entry, bool toplevel)
{
char sql[100];
/*
* Don't try to clean up the connection if we're already in error
* recursion trouble.
*/
if (in_error_recursion_trouble())
entry->changing_xact_state = true;
/*
* If connection is already unsalvageable, don't touch it further.
*/
if (entry->changing_xact_state)
return;
/*
* Mark this connection as in the process of changing transaction state.
*/
entry->changing_xact_state = true;
/* Assume we might have lost track of prepared statements */
entry->have_error = true;
/*
* If a command has been submitted to the remote server by using an
* asynchronous execution function, the command might not have yet
* completed. Check to see if a command is still being processed by the
* remote server, and if so, request cancellation of the command.
*/
if (PQtransactionStatus(entry->conn) == PQTRANS_ACTIVE &&
!pgfdw_cancel_query(entry->conn))
return; /* Unable to cancel running query */
CONSTRUCT_ABORT_COMMAND(sql, entry, toplevel);
if (!pgfdw_exec_cleanup_query(entry->conn, sql, false))
return; /* Unable to abort remote (sub)transaction */
if (toplevel)
{
if (entry->have_prep_stmt && entry->have_error &&
!pgfdw_exec_cleanup_query(entry->conn,
"DEALLOCATE ALL",
true))
return; /* Trouble clearing prepared statements */
entry->have_prep_stmt = false;
entry->have_error = false;
}
/*
* If pendingAreq of the per-connection state is not NULL, it means that
* an asynchronous fetch begun by fetch_more_data_begin() was not done
* successfully and thus the per-connection state was not reset in
* fetch_more_data(); in that case reset the per-connection state here.
*/
if (entry->state.pendingAreq)
memset(&entry->state, 0, sizeof(entry->state));
/* Disarm changing_xact_state if it all worked */
entry->changing_xact_state = false;
}
/*
* Like pgfdw_abort_cleanup, submit an abort command or cancel request, but
* don't wait for the result.
*
* Returns true if the abort command or cancel request is successfully issued,
* false otherwise. If the abort command is successfully issued, the given
* connection cache entry is appended to *pending_entries. Otherwise, if the
* cancel request is successfully issued, it is appended to *cancel_requested.
*/
static bool
pgfdw_abort_cleanup_begin(ConnCacheEntry *entry, bool toplevel,
List **pending_entries, List **cancel_requested)
{
/*
* Don't try to clean up the connection if we're already in error
* recursion trouble.
*/
if (in_error_recursion_trouble())
entry->changing_xact_state = true;
/*
* If connection is already unsalvageable, don't touch it further.
*/
if (entry->changing_xact_state)
return false;
/*
* Mark this connection as in the process of changing transaction state.
*/
entry->changing_xact_state = true;
/* Assume we might have lost track of prepared statements */
entry->have_error = true;
/*
* If a command has been submitted to the remote server by using an
* asynchronous execution function, the command might not have yet
* completed. Check to see if a command is still being processed by the
* remote server, and if so, request cancellation of the command.
*/
if (PQtransactionStatus(entry->conn) == PQTRANS_ACTIVE)
{
if (!pgfdw_cancel_query_begin(entry->conn))
return false; /* Unable to cancel running query */
*cancel_requested = lappend(*cancel_requested, entry);
}
else
{
char sql[100];
CONSTRUCT_ABORT_COMMAND(sql, entry, toplevel);
if (!pgfdw_exec_cleanup_query_begin(entry->conn, sql))
return false; /* Unable to abort remote transaction */
*pending_entries = lappend(*pending_entries, entry);
}
return true;
}
/*
* Finish pre-commit cleanup of connections on each of which we've sent a
* COMMIT command to the remote server.
*/
static void
pgfdw_finish_pre_commit_cleanup(List *pending_entries)
{
ConnCacheEntry *entry;
List *pending_deallocs = NIL;
ListCell *lc;
Assert(pending_entries);
/*
* Get the result of the COMMIT command for each of the pending entries
*/
foreach(lc, pending_entries)
{
entry = (ConnCacheEntry *) lfirst(lc);
Assert(entry->changing_xact_state);
/*
* We might already have received the result on the socket, so pass
* consume_input=true to try to consume it first
*/
do_sql_command_end(entry->conn, "COMMIT TRANSACTION", true);
entry->changing_xact_state = false;
/* Do a DEALLOCATE ALL in parallel if needed */
if (entry->have_prep_stmt && entry->have_error)
{
/* Ignore errors (see notes in pgfdw_xact_callback) */
if (PQsendQuery(entry->conn, "DEALLOCATE ALL"))
{
pending_deallocs = lappend(pending_deallocs, entry);
continue;
}
}
entry->have_prep_stmt = false;
entry->have_error = false;
pgfdw_reset_xact_state(entry, true);
}
/* No further work if no pending entries */
if (!pending_deallocs)
return;
/*
* Get the result of the DEALLOCATE command for each of the pending
* entries
*/
foreach(lc, pending_deallocs)
{
PGresult *res;
entry = (ConnCacheEntry *) lfirst(lc);
/* Ignore errors (see notes in pgfdw_xact_callback) */
while ((res = PQgetResult(entry->conn)) != NULL)
{
PQclear(res);
/* Stop if the connection is lost (else we'll loop infinitely) */
if (PQstatus(entry->conn) == CONNECTION_BAD)
break;
}
entry->have_prep_stmt = false;
entry->have_error = false;
pgfdw_reset_xact_state(entry, true);
}
}
/*
* Finish pre-subcommit cleanup of connections on each of which we've sent a
* RELEASE command to the remote server.
*/
static void
pgfdw_finish_pre_subcommit_cleanup(List *pending_entries, int curlevel)
{
ConnCacheEntry *entry;
char sql[100];
ListCell *lc;
Assert(pending_entries);
/*
* Get the result of the RELEASE command for each of the pending entries
*/
snprintf(sql, sizeof(sql), "RELEASE SAVEPOINT s%d", curlevel);
foreach(lc, pending_entries)
{
entry = (ConnCacheEntry *) lfirst(lc);
Assert(entry->changing_xact_state);
/*
* We might already have received the result on the socket, so pass
* consume_input=true to try to consume it first
*/
do_sql_command_end(entry->conn, sql, true);
entry->changing_xact_state = false;
pgfdw_reset_xact_state(entry, false);
}
}
/*
* Finish abort cleanup of connections on each of which we've sent an abort
* command or cancel request to the remote server.
*/
static void
pgfdw_finish_abort_cleanup(List *pending_entries, List *cancel_requested,
bool toplevel)
{
List *pending_deallocs = NIL;
ListCell *lc;
/*
* For each of the pending cancel requests (if any), get and discard the
* result of the query, and submit an abort command to the remote server.
*/
if (cancel_requested)
{
foreach(lc, cancel_requested)
{
ConnCacheEntry *entry = (ConnCacheEntry *) lfirst(lc);
TimestampTz endtime;
char sql[100];
Assert(entry->changing_xact_state);
/*
* Set end time. You might think we should do this before issuing
* cancel request like in normal mode, but that is problematic,
* because if, for example, it took longer than 30 seconds to
* process the first few entries in the cancel_requested list, it
* would cause a timeout error when processing each of the
* remaining entries in the list, leading to slamming that entry's
* connection shut.
*/
endtime = TimestampTzPlusMilliseconds(GetCurrentTimestamp(),
CONNECTION_CLEANUP_TIMEOUT);
if (!pgfdw_cancel_query_end(entry->conn, endtime, true))
{
/* Unable to cancel running query */
pgfdw_reset_xact_state(entry, toplevel);
continue;
}
/* Send an abort command in parallel if needed */
CONSTRUCT_ABORT_COMMAND(sql, entry, toplevel);
if (!pgfdw_exec_cleanup_query_begin(entry->conn, sql))
{
/* Unable to abort remote (sub)transaction */
pgfdw_reset_xact_state(entry, toplevel);
}
else
pending_entries = lappend(pending_entries, entry);
}
}
/* No further work if no pending entries */
if (!pending_entries)
return;
/*
* Get the result of the abort command for each of the pending entries
*/
foreach(lc, pending_entries)
{
ConnCacheEntry *entry = (ConnCacheEntry *) lfirst(lc);
TimestampTz endtime;
char sql[100];
Assert(entry->changing_xact_state);
/*
* Set end time. We do this now, not before issuing the command like
* in normal mode, for the same reason as for the cancel_requested
* entries.
*/
endtime = TimestampTzPlusMilliseconds(GetCurrentTimestamp(),
CONNECTION_CLEANUP_TIMEOUT);
CONSTRUCT_ABORT_COMMAND(sql, entry, toplevel);
if (!pgfdw_exec_cleanup_query_end(entry->conn, sql, endtime,
true, false))
{
/* Unable to abort remote (sub)transaction */
pgfdw_reset_xact_state(entry, toplevel);
continue;
}
if (toplevel)
{
/* Do a DEALLOCATE ALL in parallel if needed */
if (entry->have_prep_stmt && entry->have_error)
{
if (!pgfdw_exec_cleanup_query_begin(entry->conn,
"DEALLOCATE ALL"))
{
/* Trouble clearing prepared statements */
pgfdw_reset_xact_state(entry, toplevel);
}
else
pending_deallocs = lappend(pending_deallocs, entry);
continue;
}
entry->have_prep_stmt = false;
entry->have_error = false;
}
/* Reset the per-connection state if needed */
if (entry->state.pendingAreq)
memset(&entry->state, 0, sizeof(entry->state));
/* We're done with this entry; unset the changing_xact_state flag */
entry->changing_xact_state = false;
pgfdw_reset_xact_state(entry, toplevel);
}
/* No further work if no pending entries */
if (!pending_deallocs)
return;
Assert(toplevel);
/*
* Get the result of the DEALLOCATE command for each of the pending
* entries
*/
foreach(lc, pending_deallocs)
{
ConnCacheEntry *entry = (ConnCacheEntry *) lfirst(lc);
TimestampTz endtime;
Assert(entry->changing_xact_state);
Assert(entry->have_prep_stmt);
Assert(entry->have_error);
/*
* Set end time. We do this now, not before issuing the command like
* in normal mode, for the same reason as for the cancel_requested
* entries.
*/
endtime = TimestampTzPlusMilliseconds(GetCurrentTimestamp(),
CONNECTION_CLEANUP_TIMEOUT);
if (!pgfdw_exec_cleanup_query_end(entry->conn, "DEALLOCATE ALL",
endtime, true, true))
{
/* Trouble clearing prepared statements */
pgfdw_reset_xact_state(entry, toplevel);
continue;
}
entry->have_prep_stmt = false;
entry->have_error = false;
/* Reset the per-connection state if needed */
if (entry->state.pendingAreq)
memset(&entry->state, 0, sizeof(entry->state));
/* We're done with this entry; unset the changing_xact_state flag */
entry->changing_xact_state = false;
pgfdw_reset_xact_state(entry, toplevel);
}
}
/*
* List active foreign server connections.
*
* This function takes no input parameter and returns setof record made of
* following values:
* - server_name - server name of active connection. In case the foreign server
* is dropped but still the connection is active, then the server name will
* be NULL in output.
* - valid - true/false representing whether the connection is valid or not.
* Note that the connections can get invalidated in pgfdw_inval_callback.
*
* No records are returned when there are no cached connections at all.
*/
Datum
postgres_fdw_get_connections(PG_FUNCTION_ARGS)
{
#define POSTGRES_FDW_GET_CONNECTIONS_COLS 2
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
HASH_SEQ_STATUS scan;
ConnCacheEntry *entry;
InitMaterializedSRF(fcinfo, 0);
/* If cache doesn't exist, we return no records */
if (!ConnectionHash)
PG_RETURN_VOID();
hash_seq_init(&scan, ConnectionHash);
while ((entry = (ConnCacheEntry *) hash_seq_search(&scan)))
{
ForeignServer *server;
Datum values[POSTGRES_FDW_GET_CONNECTIONS_COLS] = {0};
bool nulls[POSTGRES_FDW_GET_CONNECTIONS_COLS] = {0};
/* We only look for open remote connections */
if (!entry->conn)
continue;
server = GetForeignServerExtended(entry->serverid, FSV_MISSING_OK);
/*
* The foreign server may have been dropped in current explicit
* transaction. It is not possible to drop the server from another
* session when the connection associated with it is in use in the
* current transaction, if tried so, the drop query in another session
* blocks until the current transaction finishes.
*
* Even though the server is dropped in the current transaction, the
* cache can still have associated active connection entry, say we
* call such connections dangling. Since we can not fetch the server
* name from system catalogs for dangling connections, instead we show
* NULL value for server name in output.
*
* We could have done better by storing the server name in the cache
* entry instead of server oid so that it could be used in the output.
* But the server name in each cache entry requires 64 bytes of
* memory, which is huge, when there are many cached connections and
* the use case i.e. dropping the foreign server within the explicit
* current transaction seems rare. So, we chose to show NULL value for
* server name in output.
*
* Such dangling connections get closed either in next use or at the
* end of current explicit transaction in pgfdw_xact_callback.
*/
if (!server)
{
/*
* If the server has been dropped in the current explicit
* transaction, then this entry would have been invalidated in
* pgfdw_inval_callback at the end of drop server command. Note
* that this connection would not have been closed in
* pgfdw_inval_callback because it is still being used in the
* current explicit transaction. So, assert that here.
*/
Assert(entry->conn && entry->xact_depth > 0 && entry->invalidated);
/* Show null, if no server name was found */
nulls[0] = true;
}
else
values[0] = CStringGetTextDatum(server->servername);
values[1] = BoolGetDatum(!entry->invalidated);
tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
}
PG_RETURN_VOID();
}
/*
* Disconnect the specified cached connections.
*
* This function discards the open connections that are established by
* postgres_fdw from the local session to the foreign server with
* the given name. Note that there can be multiple connections to
* the given server using different user mappings. If the connections
* are used in the current local transaction, they are not disconnected
* and warning messages are reported. This function returns true
* if it disconnects at least one connection, otherwise false. If no
* foreign server with the given name is found, an error is reported.
*/
Datum
postgres_fdw_disconnect(PG_FUNCTION_ARGS)
{
ForeignServer *server;
char *servername;
servername = text_to_cstring(PG_GETARG_TEXT_PP(0));
server = GetForeignServerByName(servername, false);
PG_RETURN_BOOL(disconnect_cached_connections(server->serverid));
}
/*
* Disconnect all the cached connections.
*
* This function discards all the open connections that are established by
* postgres_fdw from the local session to the foreign servers.
* If the connections are used in the current local transaction, they are
* not disconnected and warning messages are reported. This function
* returns true if it disconnects at least one connection, otherwise false.
*/
Datum
postgres_fdw_disconnect_all(PG_FUNCTION_ARGS)
{
PG_RETURN_BOOL(disconnect_cached_connections(InvalidOid));
}
/*
* Workhorse to disconnect cached connections.
*
* This function scans all the connection cache entries and disconnects
* the open connections whose foreign server OID matches with
* the specified one. If InvalidOid is specified, it disconnects all
* the cached connections.
*
* This function emits a warning for each connection that's used in
* the current transaction and doesn't close it. It returns true if
* it disconnects at least one connection, otherwise false.
*
* Note that this function disconnects even the connections that are
* established by other users in the same local session using different
* user mappings. This leads even non-superuser to be able to close
* the connections established by superusers in the same local session.
*
* XXX As of now we don't see any security risk doing this. But we should
* set some restrictions on that, for example, prevent non-superuser
* from closing the connections established by superusers even
* in the same session?
*/
static bool
disconnect_cached_connections(Oid serverid)
{
HASH_SEQ_STATUS scan;
ConnCacheEntry *entry;
bool all = !OidIsValid(serverid);
bool result = false;
/*
* Connection cache hashtable has not been initialized yet in this
* session, so return false.
*/
if (!ConnectionHash)
return false;
hash_seq_init(&scan, ConnectionHash);
while ((entry = (ConnCacheEntry *) hash_seq_search(&scan)))
{
/* Ignore cache entry if no open connection right now. */
if (!entry->conn)
continue;
if (all || entry->serverid == serverid)
{
/*
* Emit a warning because the connection to close is used in the
* current transaction and cannot be disconnected right now.
*/
if (entry->xact_depth > 0)
{
ForeignServer *server;
server = GetForeignServerExtended(entry->serverid,
FSV_MISSING_OK);
if (!server)
{
/*
* If the foreign server was dropped while its connection
* was used in the current transaction, the connection
* must have been marked as invalid by
* pgfdw_inval_callback at the end of DROP SERVER command.
*/
Assert(entry->invalidated);
ereport(WARNING,
(errmsg("cannot close dropped server connection because it is still in use")));
}
else
ereport(WARNING,
(errmsg("cannot close connection for server \"%s\" because it is still in use",
server->servername)));
}
else
{
elog(DEBUG3, "discarding connection %p", entry->conn);
disconnect_pg_server(entry);
result = true;
}
}
}
return result;
}