2013-02-21 14:26:23 +04:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* connection.c
|
|
|
|
* Connection management functions for postgres_fdw
|
|
|
|
*
|
2022-01-08 03:04:57 +03:00
|
|
|
* Portions Copyright (c) 2012-2022, PostgreSQL Global Development Group
|
2013-02-21 14:26:23 +04:00
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
|
|
|
* contrib/postgres_fdw/connection.c
|
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
#include "access/htup_details.h"
|
2013-02-21 14:26:23 +04:00
|
|
|
#include "access/xact.h"
|
2019-06-11 07:39:31 +03:00
|
|
|
#include "catalog/pg_user_mapping.h"
|
2019-12-20 08:53:34 +03:00
|
|
|
#include "commands/defrem.h"
|
2021-01-18 09:11:08 +03:00
|
|
|
#include "funcapi.h"
|
2013-02-21 14:26:23 +04:00
|
|
|
#include "mb/pg_wchar.h"
|
|
|
|
#include "miscadmin.h"
|
2016-10-04 17:50:13 +03:00
|
|
|
#include "pgstat.h"
|
2019-10-23 06:56:22 +03:00
|
|
|
#include "postgres_fdw.h"
|
Account explicitly for long-lived FDs that are allocated outside fd.c.
The comments in fd.c have long claimed that all file allocations should
go through that module, but in reality that's not always practical.
fd.c doesn't supply APIs for invoking some FD-producing syscalls like
pipe() or epoll_create(); and the APIs it does supply for non-virtual
FDs are mostly insistent on releasing those FDs at transaction end;
and in some cases the actual open() call is in code that can't be made
to use fd.c, such as libpq.
This has led to a situation where, in a modern server, there are likely
to be seven or so long-lived FDs per backend process that are not known
to fd.c. Since NUM_RESERVED_FDS is only 10, that meant we had *very*
few spare FDs if max_files_per_process is >= the system ulimit and
fd.c had opened all the files it thought it safely could. The
contrib/postgres_fdw regression test, in particular, could easily be
made to fall over by running it under a restrictive ulimit.
To improve matters, invent functions Acquire/Reserve/ReleaseExternalFD
that allow outside callers to tell fd.c that they have or want to allocate
a FD that's not directly managed by fd.c. Add calls to track all the
fixed FDs in a standard backend session, so that we are honestly
guaranteeing that NUM_RESERVED_FDS FDs remain unused below the EMFILE
limit in a backend's idle state. The coding rules for these functions say
that there's no need to call them in code that just allocates one FD over
a fairly short interval; we can dip into NUM_RESERVED_FDS for such cases.
That means that there aren't all that many places where we need to worry.
But postgres_fdw and dblink must use this facility to account for
long-lived FDs consumed by libpq connections. There may be other places
where it's worth doing such accounting, too, but this seems like enough
to solve the immediate problem.
Internally to fd.c, "external" FDs are limited to max_safe_fds/3 FDs.
(Callers can choose to ignore this limit, but of course it's unwise
to do so except for fixed file allocations.) I also reduced the limit
on "allocated" files to max_safe_fds/3 FDs (it had been max_safe_fds/2).
Conceivably a smarter rule could be used here --- but in practice,
on reasonable systems, max_safe_fds should be large enough that this
isn't much of an issue, so KISS for now. To avoid possible regression
in the number of external or allocated files that can be opened,
increase FD_MINFREE and the lower limit on max_files_per_process a
little bit; we now insist that the effective "ulimit -n" be at least 64.
This seems like pretty clearly a bug fix, but in view of the lack of
field complaints, I'll refrain from risking a back-patch.
Discussion: https://postgr.es/m/E1izCmM-0005pV-Co@gemulon.postgresql.org
2020-02-25 01:28:33 +03:00
|
|
|
#include "storage/fd.h"
|
2016-04-21 17:46:09 +03:00
|
|
|
#include "storage/latch.h"
|
2021-01-18 09:11:08 +03:00
|
|
|
#include "utils/builtins.h"
|
2020-08-17 10:50:13 +03:00
|
|
|
#include "utils/datetime.h"
|
2013-02-21 14:26:23 +04:00
|
|
|
#include "utils/hsearch.h"
|
2017-07-21 19:51:38 +03:00
|
|
|
#include "utils/inval.h"
|
2013-02-21 14:26:23 +04:00
|
|
|
#include "utils/memutils.h"
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
#include "utils/syscache.h"
|
2013-02-21 14:26:23 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Connection cache hash table entry
|
|
|
|
*
|
Avoid multiple foreign server connections when all use same user mapping.
Previously, postgres_fdw's connection cache was keyed by user OID and
server OID, but this can lead to multiple connections when it's not
really necessary. In particular, if all relevant users are mapped to
the public user mapping, then their connection options are certainly
the same, so one connection can be used for all of them.
While we're cleaning things up here, drop the "server" argument to
GetConnection(), which isn't really needed. This saves a few cycles
because callers no longer have to look this up; the function itself
does, but only when establishing a new connection, not when reusing
an existing one.
Ashutosh Bapat, with a few small changes by me.
2016-01-28 20:05:19 +03:00
|
|
|
* The lookup key in this hash table is the user mapping OID. We use just one
|
|
|
|
* connection per user mapping ID, which ensures that all the scans use the
|
|
|
|
* same snapshot during a query. Using the user mapping OID rather than
|
|
|
|
* the foreign server OID + user OID avoids creating multiple connections when
|
|
|
|
* the public user mapping applies to all user OIDs.
|
2013-02-21 14:26:23 +04:00
|
|
|
*
|
|
|
|
* The "conn" pointer can be NULL if we don't currently have a live connection.
|
|
|
|
* When we do have a connection, xact_depth tracks the current depth of
|
|
|
|
* transactions and subtransactions open on the remote side. We need to issue
|
|
|
|
* commands at the same nesting depth on the remote as we're executing at
|
|
|
|
* ourselves, so that rolling back a subtransaction will kill the right
|
|
|
|
* queries and not the wrong ones.
|
|
|
|
*/
|
Avoid multiple foreign server connections when all use same user mapping.
Previously, postgres_fdw's connection cache was keyed by user OID and
server OID, but this can lead to multiple connections when it's not
really necessary. In particular, if all relevant users are mapped to
the public user mapping, then their connection options are certainly
the same, so one connection can be used for all of them.
While we're cleaning things up here, drop the "server" argument to
GetConnection(), which isn't really needed. This saves a few cycles
because callers no longer have to look this up; the function itself
does, but only when establishing a new connection, not when reusing
an existing one.
Ashutosh Bapat, with a few small changes by me.
2016-01-28 20:05:19 +03:00
|
|
|
typedef Oid ConnCacheKey;
|
2013-02-21 14:26:23 +04:00
|
|
|
|
|
|
|
typedef struct ConnCacheEntry
|
|
|
|
{
|
|
|
|
ConnCacheKey key; /* hash key (must be first) */
|
|
|
|
PGconn *conn; /* connection to foreign server, or NULL */
|
2017-07-21 19:51:38 +03:00
|
|
|
/* Remaining fields are invalid when conn is NULL: */
|
2013-02-21 14:26:23 +04:00
|
|
|
int xact_depth; /* 0 = no xact open, 1 = main xact open, 2 =
|
|
|
|
* one level of subxact open, etc */
|
2013-03-10 22:14:53 +04:00
|
|
|
bool have_prep_stmt; /* have we prepared any stmts in this xact? */
|
|
|
|
bool have_error; /* have any subxacts aborted in this xact? */
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
bool changing_xact_state; /* xact state change in process */
|
2022-02-24 08:30:00 +03:00
|
|
|
bool parallel_commit; /* do we commit (sub)xacts in parallel? */
|
2017-07-21 19:51:38 +03:00
|
|
|
bool invalidated; /* true if reconnect is pending */
|
2021-04-02 13:45:42 +03:00
|
|
|
bool keep_connections; /* setting value of keep_connections
|
|
|
|
* server option */
|
2021-01-15 04:30:19 +03:00
|
|
|
Oid serverid; /* foreign server OID used to get server name */
|
2017-07-21 19:51:38 +03:00
|
|
|
uint32 server_hashvalue; /* hash value of foreign server OID */
|
|
|
|
uint32 mapping_hashvalue; /* hash value of user mapping OID */
|
Add support for asynchronous execution.
This implements asynchronous execution, which runs multiple parts of a
non-parallel-aware Append concurrently rather than serially to improve
performance when possible. Currently, the only node type that can be
run concurrently is a ForeignScan that is an immediate child of such an
Append. In the case where such ForeignScans access data on different
remote servers, this would run those ForeignScans concurrently, and
overlap the remote operations to be performed simultaneously, so it'll
improve the performance especially when the operations involve
time-consuming ones such as remote join and remote aggregation.
We may extend this to other node types such as joins or aggregates over
ForeignScans in the future.
This also adds the support for postgres_fdw, which is enabled by the
table-level/server-level option "async_capable". The default is false.
Robert Haas, Kyotaro Horiguchi, Thomas Munro, and myself. This commit
is mostly based on the patch proposed by Robert Haas, but also uses
stuff from the patch proposed by Kyotaro Horiguchi and from the patch
proposed by Thomas Munro. Reviewed by Kyotaro Horiguchi, Konstantin
Knizhnik, Andrey Lepikhov, Movead Li, Thomas Munro, Justin Pryzby, and
others.
Discussion: https://postgr.es/m/CA%2BTgmoaXQEt4tZ03FtQhnzeDEMzBck%2BLrni0UWHVVgOTnA6C1w%40mail.gmail.com
Discussion: https://postgr.es/m/CA%2BhUKGLBRyu0rHrDCMC4%3DRn3252gogyp1SjOgG8SEKKZv%3DFwfQ%40mail.gmail.com
Discussion: https://postgr.es/m/20200228.170650.667613673625155850.horikyota.ntt%40gmail.com
2021-03-31 12:45:00 +03:00
|
|
|
PgFdwConnState state; /* extra per-connection state */
|
2013-02-21 14:26:23 +04:00
|
|
|
} ConnCacheEntry;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Connection cache (initialized on first use)
|
|
|
|
*/
|
|
|
|
static HTAB *ConnectionHash = NULL;
|
|
|
|
|
2013-03-10 22:14:53 +04:00
|
|
|
/* for assigning cursor numbers and prepared statement numbers */
|
2013-02-21 14:26:23 +04:00
|
|
|
static unsigned int cursor_number = 0;
|
2013-03-10 22:14:53 +04:00
|
|
|
static unsigned int prep_stmt_number = 0;
|
2013-02-21 14:26:23 +04:00
|
|
|
|
|
|
|
/* tracks whether any work is needed in callback functions */
|
|
|
|
static bool xact_got_connection = false;
|
|
|
|
|
2021-01-18 09:11:08 +03:00
|
|
|
/*
|
|
|
|
* SQL functions
|
|
|
|
*/
|
|
|
|
PG_FUNCTION_INFO_V1(postgres_fdw_get_connections);
|
2021-01-25 21:54:46 +03:00
|
|
|
PG_FUNCTION_INFO_V1(postgres_fdw_disconnect);
|
|
|
|
PG_FUNCTION_INFO_V1(postgres_fdw_disconnect_all);
|
2021-01-18 09:11:08 +03:00
|
|
|
|
2013-02-21 14:26:23 +04:00
|
|
|
/* prototypes of private functions */
|
2020-10-16 07:58:45 +03:00
|
|
|
static void make_new_connection(ConnCacheEntry *entry, UserMapping *user);
|
2013-02-21 14:26:23 +04:00
|
|
|
static PGconn *connect_pg_server(ForeignServer *server, UserMapping *user);
|
2017-07-21 19:51:38 +03:00
|
|
|
static void disconnect_pg_server(ConnCacheEntry *entry);
|
postgres_fdw: Judge password use by run-as user, not session user.
This is a backward incompatibility which should be noted in the
release notes for PostgreSQL 11.
For security reasons, we require that a postgres_fdw foreign table use
password authentication when accessing a remote server, so that an
unprivileged user cannot usurp the server's credentials. Superusers
are exempt from this requirement, because we assume they are entitled
to usurp the server's credentials or, at least, can find some other
way to do it.
But what should happen when the foreign table is accessed by a view
owned by a user different from the session user? Is it the view owner
that must be a superuser in order to avoid the requirement of using a
password, or the session user? Historically it was the latter, but
this requirement makes it the former instead. This allows superusers
to delegate to other users the right to select from a foreign table
that doesn't use password authentication by creating a view over the
foreign table and handing out rights to the view. It is also more
consistent with the idea that access to a view should use the view
owner's privileges rather than the session user's privileges.
The upshot of this change is that a superuser selecting from a view
created by a non-superuser may now get an error complaining that no
password was used, while a non-superuser selecting from a view
created by a superuser will no longer receive such an error.
No documentation changes are present in this patch because the
wording of the documentation already suggests that it works this
way. We should perhaps adjust the documentation in the back-branches,
but that's a task for another patch.
Originally proposed by Jeff Janes, but with different semantics;
adjusted to work like this by me per discussion.
Discussion: http://postgr.es/m/CA+TgmoaY4HsVZJv5SqEjCKLDwtCTSwXzKpRftgj50wmMMBwciA@mail.gmail.com
2017-12-05 19:19:45 +03:00
|
|
|
static void check_conn_params(const char **keywords, const char **values, UserMapping *user);
|
2013-02-22 15:03:46 +04:00
|
|
|
static void configure_remote_session(PGconn *conn);
|
2022-02-24 08:30:00 +03:00
|
|
|
static void do_sql_command_begin(PGconn *conn, const char *sql);
|
|
|
|
static void do_sql_command_end(PGconn *conn, const char *sql,
|
|
|
|
bool consume_input);
|
2013-02-21 14:26:23 +04:00
|
|
|
static void begin_remote_xact(ConnCacheEntry *entry);
|
|
|
|
static void pgfdw_xact_callback(XactEvent event, void *arg);
|
|
|
|
static void pgfdw_subxact_callback(SubXactEvent event,
|
|
|
|
SubTransactionId mySubid,
|
|
|
|
SubTransactionId parentSubid,
|
|
|
|
void *arg);
|
2017-07-21 19:51:38 +03:00
|
|
|
static void pgfdw_inval_callback(Datum arg, int cacheid, uint32 hashvalue);
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
static void pgfdw_reject_incomplete_xact_state_change(ConnCacheEntry *entry);
|
2022-02-24 08:30:00 +03:00
|
|
|
static void pgfdw_reset_xact_state(ConnCacheEntry *entry, bool toplevel);
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
static bool pgfdw_cancel_query(PGconn *conn);
|
|
|
|
static bool pgfdw_exec_cleanup_query(PGconn *conn, const char *query,
|
|
|
|
bool ignore_errors);
|
|
|
|
static bool pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime,
|
2021-12-08 17:31:46 +03:00
|
|
|
PGresult **result, bool *timed_out);
|
2022-03-25 09:30:00 +03:00
|
|
|
static void pgfdw_abort_cleanup(ConnCacheEntry *entry, bool toplevel);
|
2022-02-24 08:30:00 +03:00
|
|
|
static void pgfdw_finish_pre_commit_cleanup(List *pending_entries);
|
|
|
|
static void pgfdw_finish_pre_subcommit_cleanup(List *pending_entries,
|
|
|
|
int curlevel);
|
2019-12-20 08:53:34 +03:00
|
|
|
static bool UserMappingPasswordRequired(UserMapping *user);
|
2021-01-25 21:54:46 +03:00
|
|
|
static bool disconnect_cached_connections(Oid serverid);
|
2013-02-21 14:26:23 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Get a PGconn which can be used to execute queries on the remote PostgreSQL
|
|
|
|
* server with the user's authorization. A new connection is established
|
|
|
|
* if we don't already have a suitable one, and a transaction is opened at
|
|
|
|
* the right subtransaction nesting depth if we didn't do that already.
|
|
|
|
*
|
2013-03-10 22:14:53 +04:00
|
|
|
* will_prep_stmt must be true if caller intends to create any prepared
|
|
|
|
* statements. Since those don't go away automatically at transaction end
|
|
|
|
* (not even on error), we need this flag to cue manual cleanup.
|
Add support for asynchronous execution.
This implements asynchronous execution, which runs multiple parts of a
non-parallel-aware Append concurrently rather than serially to improve
performance when possible. Currently, the only node type that can be
run concurrently is a ForeignScan that is an immediate child of such an
Append. In the case where such ForeignScans access data on different
remote servers, this would run those ForeignScans concurrently, and
overlap the remote operations to be performed simultaneously, so it'll
improve the performance especially when the operations involve
time-consuming ones such as remote join and remote aggregation.
We may extend this to other node types such as joins or aggregates over
ForeignScans in the future.
This also adds the support for postgres_fdw, which is enabled by the
table-level/server-level option "async_capable". The default is false.
Robert Haas, Kyotaro Horiguchi, Thomas Munro, and myself. This commit
is mostly based on the patch proposed by Robert Haas, but also uses
stuff from the patch proposed by Kyotaro Horiguchi and from the patch
proposed by Thomas Munro. Reviewed by Kyotaro Horiguchi, Konstantin
Knizhnik, Andrey Lepikhov, Movead Li, Thomas Munro, Justin Pryzby, and
others.
Discussion: https://postgr.es/m/CA%2BTgmoaXQEt4tZ03FtQhnzeDEMzBck%2BLrni0UWHVVgOTnA6C1w%40mail.gmail.com
Discussion: https://postgr.es/m/CA%2BhUKGLBRyu0rHrDCMC4%3DRn3252gogyp1SjOgG8SEKKZv%3DFwfQ%40mail.gmail.com
Discussion: https://postgr.es/m/20200228.170650.667613673625155850.horikyota.ntt%40gmail.com
2021-03-31 12:45:00 +03:00
|
|
|
*
|
|
|
|
* If state is not NULL, *state receives the per-connection state associated
|
|
|
|
* with the PGconn.
|
2013-02-21 14:26:23 +04:00
|
|
|
*/
|
|
|
|
PGconn *
|
Add support for asynchronous execution.
This implements asynchronous execution, which runs multiple parts of a
non-parallel-aware Append concurrently rather than serially to improve
performance when possible. Currently, the only node type that can be
run concurrently is a ForeignScan that is an immediate child of such an
Append. In the case where such ForeignScans access data on different
remote servers, this would run those ForeignScans concurrently, and
overlap the remote operations to be performed simultaneously, so it'll
improve the performance especially when the operations involve
time-consuming ones such as remote join and remote aggregation.
We may extend this to other node types such as joins or aggregates over
ForeignScans in the future.
This also adds the support for postgres_fdw, which is enabled by the
table-level/server-level option "async_capable". The default is false.
Robert Haas, Kyotaro Horiguchi, Thomas Munro, and myself. This commit
is mostly based on the patch proposed by Robert Haas, but also uses
stuff from the patch proposed by Kyotaro Horiguchi and from the patch
proposed by Thomas Munro. Reviewed by Kyotaro Horiguchi, Konstantin
Knizhnik, Andrey Lepikhov, Movead Li, Thomas Munro, Justin Pryzby, and
others.
Discussion: https://postgr.es/m/CA%2BTgmoaXQEt4tZ03FtQhnzeDEMzBck%2BLrni0UWHVVgOTnA6C1w%40mail.gmail.com
Discussion: https://postgr.es/m/CA%2BhUKGLBRyu0rHrDCMC4%3DRn3252gogyp1SjOgG8SEKKZv%3DFwfQ%40mail.gmail.com
Discussion: https://postgr.es/m/20200228.170650.667613673625155850.horikyota.ntt%40gmail.com
2021-03-31 12:45:00 +03:00
|
|
|
GetConnection(UserMapping *user, bool will_prep_stmt, PgFdwConnState **state)
|
2013-02-21 14:26:23 +04:00
|
|
|
{
|
|
|
|
bool found;
|
2020-10-16 07:58:45 +03:00
|
|
|
bool retry = false;
|
2013-02-21 14:26:23 +04:00
|
|
|
ConnCacheEntry *entry;
|
|
|
|
ConnCacheKey key;
|
2020-10-16 07:58:45 +03:00
|
|
|
MemoryContext ccxt = CurrentMemoryContext;
|
2013-02-21 14:26:23 +04:00
|
|
|
|
|
|
|
/* First time through, initialize connection cache hashtable */
|
|
|
|
if (ConnectionHash == NULL)
|
|
|
|
{
|
|
|
|
HASHCTL ctl;
|
|
|
|
|
|
|
|
ctl.keysize = sizeof(ConnCacheKey);
|
|
|
|
ctl.entrysize = sizeof(ConnCacheEntry);
|
|
|
|
ConnectionHash = hash_create("postgres_fdw connections", 8,
|
|
|
|
&ctl,
|
Improve hash_create()'s API for some added robustness.
Invent a new flag bit HASH_STRINGS to specify C-string hashing, which
was formerly the default; and add assertions insisting that exactly
one of the bits HASH_STRINGS, HASH_BLOBS, and HASH_FUNCTION be set.
This is in hopes of preventing recurrences of the type of oversight
fixed in commit a1b8aa1e4 (i.e., mistakenly omitting HASH_BLOBS).
Also, when HASH_STRINGS is specified, insist that the keysize be
more than 8 bytes. This is a heuristic, but it should catch
accidental use of HASH_STRINGS for integer or pointer keys.
(Nearly all existing use-cases set the keysize to NAMEDATALEN or
more, so there's little reason to think this restriction should
be problematic.)
Tweak hash_create() to insist that the HASH_ELEM flag be set, and
remove the defaults it had for keysize and entrysize. Since those
defaults were undocumented and basically useless, no callers
omitted HASH_ELEM anyway.
Also, remove memset's zeroing the HASHCTL parameter struct from
those callers that had one. This has never been really necessary,
and while it wasn't a bad coding convention it was confusing that
some callers did it and some did not. We might as well save a few
cycles by standardizing on "not".
Also improve the documentation for hash_create().
In passing, improve reinit.c's usage of a hash table by storing
the key as a binary Oid rather than a string; and, since that's
a temporary hash table, allocate it in CurrentMemoryContext for
neatness.
Discussion: https://postgr.es/m/590625.1607878171@sss.pgh.pa.us
2020-12-15 19:38:53 +03:00
|
|
|
HASH_ELEM | HASH_BLOBS);
|
2013-02-21 14:26:23 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Register some callback functions that manage connection cleanup.
|
|
|
|
* This should be done just once in each backend.
|
|
|
|
*/
|
|
|
|
RegisterXactCallback(pgfdw_xact_callback, NULL);
|
|
|
|
RegisterSubXactCallback(pgfdw_subxact_callback, NULL);
|
2017-07-21 19:51:38 +03:00
|
|
|
CacheRegisterSyscacheCallback(FOREIGNSERVEROID,
|
|
|
|
pgfdw_inval_callback, (Datum) 0);
|
|
|
|
CacheRegisterSyscacheCallback(USERMAPPINGOID,
|
|
|
|
pgfdw_inval_callback, (Datum) 0);
|
2013-02-21 14:26:23 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Set flag that we did GetConnection during the current transaction */
|
|
|
|
xact_got_connection = true;
|
|
|
|
|
|
|
|
/* Create hash key for the entry. Assume no pad bytes in key struct */
|
Avoid multiple foreign server connections when all use same user mapping.
Previously, postgres_fdw's connection cache was keyed by user OID and
server OID, but this can lead to multiple connections when it's not
really necessary. In particular, if all relevant users are mapped to
the public user mapping, then their connection options are certainly
the same, so one connection can be used for all of them.
While we're cleaning things up here, drop the "server" argument to
GetConnection(), which isn't really needed. This saves a few cycles
because callers no longer have to look this up; the function itself
does, but only when establishing a new connection, not when reusing
an existing one.
Ashutosh Bapat, with a few small changes by me.
2016-01-28 20:05:19 +03:00
|
|
|
key = user->umid;
|
2013-02-21 14:26:23 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Find or create cached entry for requested connection.
|
|
|
|
*/
|
|
|
|
entry = hash_search(ConnectionHash, &key, HASH_ENTER, &found);
|
|
|
|
if (!found)
|
|
|
|
{
|
2017-07-21 19:51:38 +03:00
|
|
|
/*
|
|
|
|
* We need only clear "conn" here; remaining fields will be filled
|
|
|
|
* later when "conn" is set.
|
|
|
|
*/
|
2013-02-21 14:26:23 +04:00
|
|
|
entry->conn = NULL;
|
|
|
|
}
|
|
|
|
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
/* Reject further use of connections which failed abort cleanup. */
|
|
|
|
pgfdw_reject_incomplete_xact_state_change(entry);
|
|
|
|
|
2017-07-21 19:51:38 +03:00
|
|
|
/*
|
|
|
|
* If the connection needs to be remade due to invalidation, disconnect as
|
2020-10-16 07:58:45 +03:00
|
|
|
* soon as we're out of all transactions.
|
2017-07-21 19:51:38 +03:00
|
|
|
*/
|
2020-10-16 07:58:45 +03:00
|
|
|
if (entry->conn != NULL && entry->invalidated && entry->xact_depth == 0)
|
2017-07-21 19:51:38 +03:00
|
|
|
{
|
2020-10-16 07:58:45 +03:00
|
|
|
elog(DEBUG3, "closing connection %p for option changes to take effect",
|
|
|
|
entry->conn);
|
2017-07-21 19:51:38 +03:00
|
|
|
disconnect_pg_server(entry);
|
|
|
|
}
|
|
|
|
|
2013-02-21 14:26:23 +04:00
|
|
|
/*
|
|
|
|
* If cache entry doesn't have a connection, we have to establish a new
|
|
|
|
* connection. (If connect_pg_server throws an error, the cache entry
|
2017-07-21 19:51:38 +03:00
|
|
|
* will remain in a valid empty state, ie conn == NULL.)
|
2013-02-21 14:26:23 +04:00
|
|
|
*/
|
|
|
|
if (entry->conn == NULL)
|
2020-10-16 07:58:45 +03:00
|
|
|
make_new_connection(entry, user);
|
2013-02-21 14:26:23 +04:00
|
|
|
|
|
|
|
/*
|
2021-10-07 12:15:00 +03:00
|
|
|
* We check the health of the cached connection here when using it. In
|
|
|
|
* cases where we're out of all transactions, if a broken connection is
|
|
|
|
* detected, we try to reestablish a new connection later.
|
2013-02-21 14:26:23 +04:00
|
|
|
*/
|
2020-10-06 04:31:09 +03:00
|
|
|
PG_TRY();
|
|
|
|
{
|
Add support for asynchronous execution.
This implements asynchronous execution, which runs multiple parts of a
non-parallel-aware Append concurrently rather than serially to improve
performance when possible. Currently, the only node type that can be
run concurrently is a ForeignScan that is an immediate child of such an
Append. In the case where such ForeignScans access data on different
remote servers, this would run those ForeignScans concurrently, and
overlap the remote operations to be performed simultaneously, so it'll
improve the performance especially when the operations involve
time-consuming ones such as remote join and remote aggregation.
We may extend this to other node types such as joins or aggregates over
ForeignScans in the future.
This also adds the support for postgres_fdw, which is enabled by the
table-level/server-level option "async_capable". The default is false.
Robert Haas, Kyotaro Horiguchi, Thomas Munro, and myself. This commit
is mostly based on the patch proposed by Robert Haas, but also uses
stuff from the patch proposed by Kyotaro Horiguchi and from the patch
proposed by Thomas Munro. Reviewed by Kyotaro Horiguchi, Konstantin
Knizhnik, Andrey Lepikhov, Movead Li, Thomas Munro, Justin Pryzby, and
others.
Discussion: https://postgr.es/m/CA%2BTgmoaXQEt4tZ03FtQhnzeDEMzBck%2BLrni0UWHVVgOTnA6C1w%40mail.gmail.com
Discussion: https://postgr.es/m/CA%2BhUKGLBRyu0rHrDCMC4%3DRn3252gogyp1SjOgG8SEKKZv%3DFwfQ%40mail.gmail.com
Discussion: https://postgr.es/m/20200228.170650.667613673625155850.horikyota.ntt%40gmail.com
2021-03-31 12:45:00 +03:00
|
|
|
/* Process a pending asynchronous request if any. */
|
|
|
|
if (entry->state.pendingAreq)
|
|
|
|
process_pending_request(entry->state.pendingAreq);
|
2020-10-06 04:31:09 +03:00
|
|
|
/* Start a new transaction or subtransaction if needed. */
|
|
|
|
begin_remote_xact(entry);
|
|
|
|
}
|
|
|
|
PG_CATCH();
|
|
|
|
{
|
2020-10-16 07:58:45 +03:00
|
|
|
MemoryContext ecxt = MemoryContextSwitchTo(ccxt);
|
|
|
|
ErrorData *errdata = CopyErrorData();
|
|
|
|
|
|
|
|
/*
|
2021-10-07 12:15:00 +03:00
|
|
|
* Determine whether to try to reestablish the connection.
|
2020-10-16 07:58:45 +03:00
|
|
|
*
|
|
|
|
* After a broken connection is detected in libpq, any error other
|
|
|
|
* than connection failure (e.g., out-of-memory) can be thrown
|
|
|
|
* somewhere between return from libpq and the expected ereport() call
|
|
|
|
* in pgfdw_report_error(). In this case, since PQstatus() indicates
|
|
|
|
* CONNECTION_BAD, checking only PQstatus() causes the false detection
|
|
|
|
* of connection failure. To avoid this, we also verify that the
|
|
|
|
* error's sqlstate is ERRCODE_CONNECTION_FAILURE. Note that also
|
|
|
|
* checking only the sqlstate can cause another false detection
|
|
|
|
* because pgfdw_report_error() may report ERRCODE_CONNECTION_FAILURE
|
|
|
|
* for any libpq-originated error condition.
|
|
|
|
*/
|
|
|
|
if (errdata->sqlerrcode != ERRCODE_CONNECTION_FAILURE ||
|
|
|
|
PQstatus(entry->conn) != CONNECTION_BAD ||
|
|
|
|
entry->xact_depth > 0)
|
|
|
|
{
|
|
|
|
MemoryContextSwitchTo(ecxt);
|
2020-10-06 04:31:09 +03:00
|
|
|
PG_RE_THROW();
|
2020-10-16 07:58:45 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Clean up the error state */
|
|
|
|
FlushErrorState();
|
|
|
|
FreeErrorData(errdata);
|
|
|
|
errdata = NULL;
|
|
|
|
|
|
|
|
retry = true;
|
2020-10-06 04:31:09 +03:00
|
|
|
}
|
|
|
|
PG_END_TRY();
|
|
|
|
|
2020-10-16 07:58:45 +03:00
|
|
|
/*
|
|
|
|
* If a broken connection is detected, disconnect it, reestablish a new
|
|
|
|
* connection and retry a new remote transaction. If connection failure is
|
|
|
|
* reported again, we give up getting a connection.
|
|
|
|
*/
|
|
|
|
if (retry)
|
2020-10-06 04:31:09 +03:00
|
|
|
{
|
2020-10-16 07:58:45 +03:00
|
|
|
Assert(entry->xact_depth == 0);
|
|
|
|
|
2020-10-06 04:31:09 +03:00
|
|
|
ereport(DEBUG3,
|
|
|
|
(errmsg_internal("could not start remote transaction on connection %p",
|
|
|
|
entry->conn)),
|
|
|
|
errdetail_internal("%s", pchomp(PQerrorMessage(entry->conn))));
|
2020-10-16 07:58:45 +03:00
|
|
|
|
|
|
|
elog(DEBUG3, "closing connection %p to reestablish a new one",
|
|
|
|
entry->conn);
|
|
|
|
disconnect_pg_server(entry);
|
|
|
|
|
|
|
|
if (entry->conn == NULL)
|
|
|
|
make_new_connection(entry, user);
|
|
|
|
|
|
|
|
begin_remote_xact(entry);
|
2020-10-06 04:31:09 +03:00
|
|
|
}
|
2013-02-21 14:26:23 +04:00
|
|
|
|
2013-03-10 22:14:53 +04:00
|
|
|
/* Remember if caller will prepare statements */
|
|
|
|
entry->have_prep_stmt |= will_prep_stmt;
|
|
|
|
|
Add support for asynchronous execution.
This implements asynchronous execution, which runs multiple parts of a
non-parallel-aware Append concurrently rather than serially to improve
performance when possible. Currently, the only node type that can be
run concurrently is a ForeignScan that is an immediate child of such an
Append. In the case where such ForeignScans access data on different
remote servers, this would run those ForeignScans concurrently, and
overlap the remote operations to be performed simultaneously, so it'll
improve the performance especially when the operations involve
time-consuming ones such as remote join and remote aggregation.
We may extend this to other node types such as joins or aggregates over
ForeignScans in the future.
This also adds the support for postgres_fdw, which is enabled by the
table-level/server-level option "async_capable". The default is false.
Robert Haas, Kyotaro Horiguchi, Thomas Munro, and myself. This commit
is mostly based on the patch proposed by Robert Haas, but also uses
stuff from the patch proposed by Kyotaro Horiguchi and from the patch
proposed by Thomas Munro. Reviewed by Kyotaro Horiguchi, Konstantin
Knizhnik, Andrey Lepikhov, Movead Li, Thomas Munro, Justin Pryzby, and
others.
Discussion: https://postgr.es/m/CA%2BTgmoaXQEt4tZ03FtQhnzeDEMzBck%2BLrni0UWHVVgOTnA6C1w%40mail.gmail.com
Discussion: https://postgr.es/m/CA%2BhUKGLBRyu0rHrDCMC4%3DRn3252gogyp1SjOgG8SEKKZv%3DFwfQ%40mail.gmail.com
Discussion: https://postgr.es/m/20200228.170650.667613673625155850.horikyota.ntt%40gmail.com
2021-03-31 12:45:00 +03:00
|
|
|
/* If caller needs access to the per-connection state, return it. */
|
|
|
|
if (state)
|
|
|
|
*state = &entry->state;
|
|
|
|
|
2013-02-21 14:26:23 +04:00
|
|
|
return entry->conn;
|
|
|
|
}
|
|
|
|
|
2020-10-16 07:58:45 +03:00
|
|
|
/*
|
|
|
|
* Reset all transient state fields in the cached connection entry and
|
|
|
|
* establish new connection to the remote server.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
make_new_connection(ConnCacheEntry *entry, UserMapping *user)
|
|
|
|
{
|
|
|
|
ForeignServer *server = GetForeignServer(user->serverid);
|
2021-04-02 13:45:42 +03:00
|
|
|
ListCell *lc;
|
2020-10-16 07:58:45 +03:00
|
|
|
|
|
|
|
Assert(entry->conn == NULL);
|
|
|
|
|
|
|
|
/* Reset all transient state fields, to be sure all are clean */
|
|
|
|
entry->xact_depth = 0;
|
|
|
|
entry->have_prep_stmt = false;
|
|
|
|
entry->have_error = false;
|
|
|
|
entry->changing_xact_state = false;
|
|
|
|
entry->invalidated = false;
|
2021-01-15 04:30:19 +03:00
|
|
|
entry->serverid = server->serverid;
|
2020-10-16 07:58:45 +03:00
|
|
|
entry->server_hashvalue =
|
|
|
|
GetSysCacheHashValue1(FOREIGNSERVEROID,
|
|
|
|
ObjectIdGetDatum(server->serverid));
|
|
|
|
entry->mapping_hashvalue =
|
|
|
|
GetSysCacheHashValue1(USERMAPPINGOID,
|
|
|
|
ObjectIdGetDatum(user->umid));
|
Add support for asynchronous execution.
This implements asynchronous execution, which runs multiple parts of a
non-parallel-aware Append concurrently rather than serially to improve
performance when possible. Currently, the only node type that can be
run concurrently is a ForeignScan that is an immediate child of such an
Append. In the case where such ForeignScans access data on different
remote servers, this would run those ForeignScans concurrently, and
overlap the remote operations to be performed simultaneously, so it'll
improve the performance especially when the operations involve
time-consuming ones such as remote join and remote aggregation.
We may extend this to other node types such as joins or aggregates over
ForeignScans in the future.
This also adds the support for postgres_fdw, which is enabled by the
table-level/server-level option "async_capable". The default is false.
Robert Haas, Kyotaro Horiguchi, Thomas Munro, and myself. This commit
is mostly based on the patch proposed by Robert Haas, but also uses
stuff from the patch proposed by Kyotaro Horiguchi and from the patch
proposed by Thomas Munro. Reviewed by Kyotaro Horiguchi, Konstantin
Knizhnik, Andrey Lepikhov, Movead Li, Thomas Munro, Justin Pryzby, and
others.
Discussion: https://postgr.es/m/CA%2BTgmoaXQEt4tZ03FtQhnzeDEMzBck%2BLrni0UWHVVgOTnA6C1w%40mail.gmail.com
Discussion: https://postgr.es/m/CA%2BhUKGLBRyu0rHrDCMC4%3DRn3252gogyp1SjOgG8SEKKZv%3DFwfQ%40mail.gmail.com
Discussion: https://postgr.es/m/20200228.170650.667613673625155850.horikyota.ntt%40gmail.com
2021-03-31 12:45:00 +03:00
|
|
|
memset(&entry->state, 0, sizeof(entry->state));
|
2020-10-16 07:58:45 +03:00
|
|
|
|
2021-04-02 13:45:42 +03:00
|
|
|
/*
|
|
|
|
* Determine whether to keep the connection that we're about to make here
|
|
|
|
* open even after the transaction using it ends, so that the subsequent
|
|
|
|
* transactions can re-use it.
|
|
|
|
*
|
|
|
|
* By default, all the connections to any foreign servers are kept open.
|
2022-02-24 08:30:00 +03:00
|
|
|
*
|
|
|
|
* Also determine whether to commit (sub)transactions opened on the remote
|
2022-05-12 11:30:00 +03:00
|
|
|
* server in parallel at (sub)transaction end, which is disabled by
|
|
|
|
* default.
|
|
|
|
*
|
|
|
|
* Note: it's enough to determine these only when making a new connection
|
|
|
|
* because these settings for it are changed, it will be closed and
|
|
|
|
* re-made later.
|
2021-04-02 13:45:42 +03:00
|
|
|
*/
|
|
|
|
entry->keep_connections = true;
|
2022-02-24 08:30:00 +03:00
|
|
|
entry->parallel_commit = false;
|
2021-04-02 13:45:42 +03:00
|
|
|
foreach(lc, server->options)
|
|
|
|
{
|
|
|
|
DefElem *def = (DefElem *) lfirst(lc);
|
|
|
|
|
|
|
|
if (strcmp(def->defname, "keep_connections") == 0)
|
|
|
|
entry->keep_connections = defGetBoolean(def);
|
2022-02-24 08:30:00 +03:00
|
|
|
else if (strcmp(def->defname, "parallel_commit") == 0)
|
|
|
|
entry->parallel_commit = defGetBoolean(def);
|
2021-04-02 13:45:42 +03:00
|
|
|
}
|
|
|
|
|
2020-10-16 07:58:45 +03:00
|
|
|
/* Now try to make the connection */
|
|
|
|
entry->conn = connect_pg_server(server, user);
|
|
|
|
|
|
|
|
elog(DEBUG3, "new postgres_fdw connection %p for server \"%s\" (user mapping oid %u, userid %u)",
|
|
|
|
entry->conn, server->servername, user->umid, user->userid);
|
|
|
|
}
|
|
|
|
|
2013-02-21 14:26:23 +04:00
|
|
|
/*
|
|
|
|
* Connect to remote server using specified server and user mapping properties.
|
|
|
|
*/
|
|
|
|
static PGconn *
|
|
|
|
connect_pg_server(ForeignServer *server, UserMapping *user)
|
|
|
|
{
|
|
|
|
PGconn *volatile conn = NULL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Use PG_TRY block to ensure closing connection on error.
|
|
|
|
*/
|
|
|
|
PG_TRY();
|
|
|
|
{
|
|
|
|
const char **keywords;
|
|
|
|
const char **values;
|
2021-12-24 10:55:11 +03:00
|
|
|
char *appname = NULL;
|
2013-02-21 14:26:23 +04:00
|
|
|
int n;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Construct connection params from generic options of ForeignServer
|
|
|
|
* and UserMapping. (Some of them might not be libpq options, in
|
2021-09-07 06:27:30 +03:00
|
|
|
* which case we'll just waste a few array slots.) Add 4 extra slots
|
|
|
|
* for application_name, fallback_application_name, client_encoding,
|
|
|
|
* end marker.
|
2013-02-21 14:26:23 +04:00
|
|
|
*/
|
2021-09-07 06:27:30 +03:00
|
|
|
n = list_length(server->options) + list_length(user->options) + 4;
|
2013-02-21 14:26:23 +04:00
|
|
|
keywords = (const char **) palloc(n * sizeof(char *));
|
|
|
|
values = (const char **) palloc(n * sizeof(char *));
|
|
|
|
|
|
|
|
n = 0;
|
|
|
|
n += ExtractConnectionOptions(server->options,
|
|
|
|
keywords + n, values + n);
|
|
|
|
n += ExtractConnectionOptions(user->options,
|
|
|
|
keywords + n, values + n);
|
|
|
|
|
2021-09-07 06:27:30 +03:00
|
|
|
/*
|
|
|
|
* Use pgfdw_application_name as application_name if set.
|
|
|
|
*
|
|
|
|
* PQconnectdbParams() processes the parameter arrays from start to
|
|
|
|
* end. If any key word is repeated, the last value is used. Therefore
|
|
|
|
* note that pgfdw_application_name must be added to the arrays after
|
|
|
|
* options of ForeignServer are, so that it can override
|
|
|
|
* application_name set in ForeignServer.
|
|
|
|
*/
|
|
|
|
if (pgfdw_application_name && *pgfdw_application_name != '\0')
|
|
|
|
{
|
|
|
|
keywords[n] = "application_name";
|
|
|
|
values[n] = pgfdw_application_name;
|
|
|
|
n++;
|
|
|
|
}
|
|
|
|
|
2021-12-24 10:55:11 +03:00
|
|
|
/*
|
|
|
|
* Search the parameter arrays to find application_name setting, and
|
|
|
|
* replace escape sequences in it with status information if found.
|
|
|
|
* The arrays are searched backwards because the last value is used if
|
|
|
|
* application_name is repeatedly set.
|
|
|
|
*/
|
|
|
|
for (int i = n - 1; i >= 0; i--)
|
|
|
|
{
|
|
|
|
if (strcmp(keywords[i], "application_name") == 0 &&
|
|
|
|
*(values[i]) != '\0')
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Use this application_name setting if it's not empty string
|
|
|
|
* even after any escape sequences in it are replaced.
|
|
|
|
*/
|
|
|
|
appname = process_pgfdw_appname(values[i]);
|
|
|
|
if (appname[0] != '\0')
|
|
|
|
{
|
|
|
|
values[i] = appname;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This empty application_name is not used, so we set
|
|
|
|
* values[i] to NULL and keep searching the array to find the
|
|
|
|
* next one.
|
|
|
|
*/
|
|
|
|
values[i] = NULL;
|
|
|
|
pfree(appname);
|
|
|
|
appname = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-09-07 06:27:30 +03:00
|
|
|
/* Use "postgres_fdw" as fallback_application_name */
|
2013-02-21 14:26:23 +04:00
|
|
|
keywords[n] = "fallback_application_name";
|
|
|
|
values[n] = "postgres_fdw";
|
|
|
|
n++;
|
|
|
|
|
|
|
|
/* Set client_encoding so that libpq can convert encoding properly. */
|
|
|
|
keywords[n] = "client_encoding";
|
|
|
|
values[n] = GetDatabaseEncodingName();
|
|
|
|
n++;
|
|
|
|
|
|
|
|
keywords[n] = values[n] = NULL;
|
|
|
|
|
Account explicitly for long-lived FDs that are allocated outside fd.c.
The comments in fd.c have long claimed that all file allocations should
go through that module, but in reality that's not always practical.
fd.c doesn't supply APIs for invoking some FD-producing syscalls like
pipe() or epoll_create(); and the APIs it does supply for non-virtual
FDs are mostly insistent on releasing those FDs at transaction end;
and in some cases the actual open() call is in code that can't be made
to use fd.c, such as libpq.
This has led to a situation where, in a modern server, there are likely
to be seven or so long-lived FDs per backend process that are not known
to fd.c. Since NUM_RESERVED_FDS is only 10, that meant we had *very*
few spare FDs if max_files_per_process is >= the system ulimit and
fd.c had opened all the files it thought it safely could. The
contrib/postgres_fdw regression test, in particular, could easily be
made to fall over by running it under a restrictive ulimit.
To improve matters, invent functions Acquire/Reserve/ReleaseExternalFD
that allow outside callers to tell fd.c that they have or want to allocate
a FD that's not directly managed by fd.c. Add calls to track all the
fixed FDs in a standard backend session, so that we are honestly
guaranteeing that NUM_RESERVED_FDS FDs remain unused below the EMFILE
limit in a backend's idle state. The coding rules for these functions say
that there's no need to call them in code that just allocates one FD over
a fairly short interval; we can dip into NUM_RESERVED_FDS for such cases.
That means that there aren't all that many places where we need to worry.
But postgres_fdw and dblink must use this facility to account for
long-lived FDs consumed by libpq connections. There may be other places
where it's worth doing such accounting, too, but this seems like enough
to solve the immediate problem.
Internally to fd.c, "external" FDs are limited to max_safe_fds/3 FDs.
(Callers can choose to ignore this limit, but of course it's unwise
to do so except for fixed file allocations.) I also reduced the limit
on "allocated" files to max_safe_fds/3 FDs (it had been max_safe_fds/2).
Conceivably a smarter rule could be used here --- but in practice,
on reasonable systems, max_safe_fds should be large enough that this
isn't much of an issue, so KISS for now. To avoid possible regression
in the number of external or allocated files that can be opened,
increase FD_MINFREE and the lower limit on max_files_per_process a
little bit; we now insist that the effective "ulimit -n" be at least 64.
This seems like pretty clearly a bug fix, but in view of the lack of
field complaints, I'll refrain from risking a back-patch.
Discussion: https://postgr.es/m/E1izCmM-0005pV-Co@gemulon.postgresql.org
2020-02-25 01:28:33 +03:00
|
|
|
/* verify the set of connection parameters */
|
postgres_fdw: Judge password use by run-as user, not session user.
This is a backward incompatibility which should be noted in the
release notes for PostgreSQL 11.
For security reasons, we require that a postgres_fdw foreign table use
password authentication when accessing a remote server, so that an
unprivileged user cannot usurp the server's credentials. Superusers
are exempt from this requirement, because we assume they are entitled
to usurp the server's credentials or, at least, can find some other
way to do it.
But what should happen when the foreign table is accessed by a view
owned by a user different from the session user? Is it the view owner
that must be a superuser in order to avoid the requirement of using a
password, or the session user? Historically it was the latter, but
this requirement makes it the former instead. This allows superusers
to delegate to other users the right to select from a foreign table
that doesn't use password authentication by creating a view over the
foreign table and handing out rights to the view. It is also more
consistent with the idea that access to a view should use the view
owner's privileges rather than the session user's privileges.
The upshot of this change is that a superuser selecting from a view
created by a non-superuser may now get an error complaining that no
password was used, while a non-superuser selecting from a view
created by a superuser will no longer receive such an error.
No documentation changes are present in this patch because the
wording of the documentation already suggests that it works this
way. We should perhaps adjust the documentation in the back-branches,
but that's a task for another patch.
Originally proposed by Jeff Janes, but with different semantics;
adjusted to work like this by me per discussion.
Discussion: http://postgr.es/m/CA+TgmoaY4HsVZJv5SqEjCKLDwtCTSwXzKpRftgj50wmMMBwciA@mail.gmail.com
2017-12-05 19:19:45 +03:00
|
|
|
check_conn_params(keywords, values, user);
|
2013-02-21 14:26:23 +04:00
|
|
|
|
Account explicitly for long-lived FDs that are allocated outside fd.c.
The comments in fd.c have long claimed that all file allocations should
go through that module, but in reality that's not always practical.
fd.c doesn't supply APIs for invoking some FD-producing syscalls like
pipe() or epoll_create(); and the APIs it does supply for non-virtual
FDs are mostly insistent on releasing those FDs at transaction end;
and in some cases the actual open() call is in code that can't be made
to use fd.c, such as libpq.
This has led to a situation where, in a modern server, there are likely
to be seven or so long-lived FDs per backend process that are not known
to fd.c. Since NUM_RESERVED_FDS is only 10, that meant we had *very*
few spare FDs if max_files_per_process is >= the system ulimit and
fd.c had opened all the files it thought it safely could. The
contrib/postgres_fdw regression test, in particular, could easily be
made to fall over by running it under a restrictive ulimit.
To improve matters, invent functions Acquire/Reserve/ReleaseExternalFD
that allow outside callers to tell fd.c that they have or want to allocate
a FD that's not directly managed by fd.c. Add calls to track all the
fixed FDs in a standard backend session, so that we are honestly
guaranteeing that NUM_RESERVED_FDS FDs remain unused below the EMFILE
limit in a backend's idle state. The coding rules for these functions say
that there's no need to call them in code that just allocates one FD over
a fairly short interval; we can dip into NUM_RESERVED_FDS for such cases.
That means that there aren't all that many places where we need to worry.
But postgres_fdw and dblink must use this facility to account for
long-lived FDs consumed by libpq connections. There may be other places
where it's worth doing such accounting, too, but this seems like enough
to solve the immediate problem.
Internally to fd.c, "external" FDs are limited to max_safe_fds/3 FDs.
(Callers can choose to ignore this limit, but of course it's unwise
to do so except for fixed file allocations.) I also reduced the limit
on "allocated" files to max_safe_fds/3 FDs (it had been max_safe_fds/2).
Conceivably a smarter rule could be used here --- but in practice,
on reasonable systems, max_safe_fds should be large enough that this
isn't much of an issue, so KISS for now. To avoid possible regression
in the number of external or allocated files that can be opened,
increase FD_MINFREE and the lower limit on max_files_per_process a
little bit; we now insist that the effective "ulimit -n" be at least 64.
This seems like pretty clearly a bug fix, but in view of the lack of
field complaints, I'll refrain from risking a back-patch.
Discussion: https://postgr.es/m/E1izCmM-0005pV-Co@gemulon.postgresql.org
2020-02-25 01:28:33 +03:00
|
|
|
/*
|
|
|
|
* We must obey fd.c's limit on non-virtual file descriptors. Assume
|
|
|
|
* that a PGconn represents one long-lived FD. (Doing this here also
|
|
|
|
* ensures that VFDs are closed if needed to make room.)
|
|
|
|
*/
|
|
|
|
if (!AcquireExternalFD())
|
2020-02-25 02:43:23 +03:00
|
|
|
{
|
|
|
|
#ifndef WIN32 /* can't write #if within ereport() macro */
|
Account explicitly for long-lived FDs that are allocated outside fd.c.
The comments in fd.c have long claimed that all file allocations should
go through that module, but in reality that's not always practical.
fd.c doesn't supply APIs for invoking some FD-producing syscalls like
pipe() or epoll_create(); and the APIs it does supply for non-virtual
FDs are mostly insistent on releasing those FDs at transaction end;
and in some cases the actual open() call is in code that can't be made
to use fd.c, such as libpq.
This has led to a situation where, in a modern server, there are likely
to be seven or so long-lived FDs per backend process that are not known
to fd.c. Since NUM_RESERVED_FDS is only 10, that meant we had *very*
few spare FDs if max_files_per_process is >= the system ulimit and
fd.c had opened all the files it thought it safely could. The
contrib/postgres_fdw regression test, in particular, could easily be
made to fall over by running it under a restrictive ulimit.
To improve matters, invent functions Acquire/Reserve/ReleaseExternalFD
that allow outside callers to tell fd.c that they have or want to allocate
a FD that's not directly managed by fd.c. Add calls to track all the
fixed FDs in a standard backend session, so that we are honestly
guaranteeing that NUM_RESERVED_FDS FDs remain unused below the EMFILE
limit in a backend's idle state. The coding rules for these functions say
that there's no need to call them in code that just allocates one FD over
a fairly short interval; we can dip into NUM_RESERVED_FDS for such cases.
That means that there aren't all that many places where we need to worry.
But postgres_fdw and dblink must use this facility to account for
long-lived FDs consumed by libpq connections. There may be other places
where it's worth doing such accounting, too, but this seems like enough
to solve the immediate problem.
Internally to fd.c, "external" FDs are limited to max_safe_fds/3 FDs.
(Callers can choose to ignore this limit, but of course it's unwise
to do so except for fixed file allocations.) I also reduced the limit
on "allocated" files to max_safe_fds/3 FDs (it had been max_safe_fds/2).
Conceivably a smarter rule could be used here --- but in practice,
on reasonable systems, max_safe_fds should be large enough that this
isn't much of an issue, so KISS for now. To avoid possible regression
in the number of external or allocated files that can be opened,
increase FD_MINFREE and the lower limit on max_files_per_process a
little bit; we now insist that the effective "ulimit -n" be at least 64.
This seems like pretty clearly a bug fix, but in view of the lack of
field complaints, I'll refrain from risking a back-patch.
Discussion: https://postgr.es/m/E1izCmM-0005pV-Co@gemulon.postgresql.org
2020-02-25 01:28:33 +03:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION),
|
|
|
|
errmsg("could not connect to server \"%s\"",
|
|
|
|
server->servername),
|
|
|
|
errdetail("There are too many open files on the local server."),
|
2020-02-25 02:43:23 +03:00
|
|
|
errhint("Raise the server's max_files_per_process and/or \"ulimit -n\" limits.")));
|
Account explicitly for long-lived FDs that are allocated outside fd.c.
The comments in fd.c have long claimed that all file allocations should
go through that module, but in reality that's not always practical.
fd.c doesn't supply APIs for invoking some FD-producing syscalls like
pipe() or epoll_create(); and the APIs it does supply for non-virtual
FDs are mostly insistent on releasing those FDs at transaction end;
and in some cases the actual open() call is in code that can't be made
to use fd.c, such as libpq.
This has led to a situation where, in a modern server, there are likely
to be seven or so long-lived FDs per backend process that are not known
to fd.c. Since NUM_RESERVED_FDS is only 10, that meant we had *very*
few spare FDs if max_files_per_process is >= the system ulimit and
fd.c had opened all the files it thought it safely could. The
contrib/postgres_fdw regression test, in particular, could easily be
made to fall over by running it under a restrictive ulimit.
To improve matters, invent functions Acquire/Reserve/ReleaseExternalFD
that allow outside callers to tell fd.c that they have or want to allocate
a FD that's not directly managed by fd.c. Add calls to track all the
fixed FDs in a standard backend session, so that we are honestly
guaranteeing that NUM_RESERVED_FDS FDs remain unused below the EMFILE
limit in a backend's idle state. The coding rules for these functions say
that there's no need to call them in code that just allocates one FD over
a fairly short interval; we can dip into NUM_RESERVED_FDS for such cases.
That means that there aren't all that many places where we need to worry.
But postgres_fdw and dblink must use this facility to account for
long-lived FDs consumed by libpq connections. There may be other places
where it's worth doing such accounting, too, but this seems like enough
to solve the immediate problem.
Internally to fd.c, "external" FDs are limited to max_safe_fds/3 FDs.
(Callers can choose to ignore this limit, but of course it's unwise
to do so except for fixed file allocations.) I also reduced the limit
on "allocated" files to max_safe_fds/3 FDs (it had been max_safe_fds/2).
Conceivably a smarter rule could be used here --- but in practice,
on reasonable systems, max_safe_fds should be large enough that this
isn't much of an issue, so KISS for now. To avoid possible regression
in the number of external or allocated files that can be opened,
increase FD_MINFREE and the lower limit on max_files_per_process a
little bit; we now insist that the effective "ulimit -n" be at least 64.
This seems like pretty clearly a bug fix, but in view of the lack of
field complaints, I'll refrain from risking a back-patch.
Discussion: https://postgr.es/m/E1izCmM-0005pV-Co@gemulon.postgresql.org
2020-02-25 01:28:33 +03:00
|
|
|
#else
|
2020-02-25 02:43:23 +03:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION),
|
|
|
|
errmsg("could not connect to server \"%s\"",
|
|
|
|
server->servername),
|
|
|
|
errdetail("There are too many open files on the local server."),
|
|
|
|
errhint("Raise the server's max_files_per_process setting.")));
|
Account explicitly for long-lived FDs that are allocated outside fd.c.
The comments in fd.c have long claimed that all file allocations should
go through that module, but in reality that's not always practical.
fd.c doesn't supply APIs for invoking some FD-producing syscalls like
pipe() or epoll_create(); and the APIs it does supply for non-virtual
FDs are mostly insistent on releasing those FDs at transaction end;
and in some cases the actual open() call is in code that can't be made
to use fd.c, such as libpq.
This has led to a situation where, in a modern server, there are likely
to be seven or so long-lived FDs per backend process that are not known
to fd.c. Since NUM_RESERVED_FDS is only 10, that meant we had *very*
few spare FDs if max_files_per_process is >= the system ulimit and
fd.c had opened all the files it thought it safely could. The
contrib/postgres_fdw regression test, in particular, could easily be
made to fall over by running it under a restrictive ulimit.
To improve matters, invent functions Acquire/Reserve/ReleaseExternalFD
that allow outside callers to tell fd.c that they have or want to allocate
a FD that's not directly managed by fd.c. Add calls to track all the
fixed FDs in a standard backend session, so that we are honestly
guaranteeing that NUM_RESERVED_FDS FDs remain unused below the EMFILE
limit in a backend's idle state. The coding rules for these functions say
that there's no need to call them in code that just allocates one FD over
a fairly short interval; we can dip into NUM_RESERVED_FDS for such cases.
That means that there aren't all that many places where we need to worry.
But postgres_fdw and dblink must use this facility to account for
long-lived FDs consumed by libpq connections. There may be other places
where it's worth doing such accounting, too, but this seems like enough
to solve the immediate problem.
Internally to fd.c, "external" FDs are limited to max_safe_fds/3 FDs.
(Callers can choose to ignore this limit, but of course it's unwise
to do so except for fixed file allocations.) I also reduced the limit
on "allocated" files to max_safe_fds/3 FDs (it had been max_safe_fds/2).
Conceivably a smarter rule could be used here --- but in practice,
on reasonable systems, max_safe_fds should be large enough that this
isn't much of an issue, so KISS for now. To avoid possible regression
in the number of external or allocated files that can be opened,
increase FD_MINFREE and the lower limit on max_files_per_process a
little bit; we now insist that the effective "ulimit -n" be at least 64.
This seems like pretty clearly a bug fix, but in view of the lack of
field complaints, I'll refrain from risking a back-patch.
Discussion: https://postgr.es/m/E1izCmM-0005pV-Co@gemulon.postgresql.org
2020-02-25 01:28:33 +03:00
|
|
|
#endif
|
2020-02-25 02:43:23 +03:00
|
|
|
}
|
Account explicitly for long-lived FDs that are allocated outside fd.c.
The comments in fd.c have long claimed that all file allocations should
go through that module, but in reality that's not always practical.
fd.c doesn't supply APIs for invoking some FD-producing syscalls like
pipe() or epoll_create(); and the APIs it does supply for non-virtual
FDs are mostly insistent on releasing those FDs at transaction end;
and in some cases the actual open() call is in code that can't be made
to use fd.c, such as libpq.
This has led to a situation where, in a modern server, there are likely
to be seven or so long-lived FDs per backend process that are not known
to fd.c. Since NUM_RESERVED_FDS is only 10, that meant we had *very*
few spare FDs if max_files_per_process is >= the system ulimit and
fd.c had opened all the files it thought it safely could. The
contrib/postgres_fdw regression test, in particular, could easily be
made to fall over by running it under a restrictive ulimit.
To improve matters, invent functions Acquire/Reserve/ReleaseExternalFD
that allow outside callers to tell fd.c that they have or want to allocate
a FD that's not directly managed by fd.c. Add calls to track all the
fixed FDs in a standard backend session, so that we are honestly
guaranteeing that NUM_RESERVED_FDS FDs remain unused below the EMFILE
limit in a backend's idle state. The coding rules for these functions say
that there's no need to call them in code that just allocates one FD over
a fairly short interval; we can dip into NUM_RESERVED_FDS for such cases.
That means that there aren't all that many places where we need to worry.
But postgres_fdw and dblink must use this facility to account for
long-lived FDs consumed by libpq connections. There may be other places
where it's worth doing such accounting, too, but this seems like enough
to solve the immediate problem.
Internally to fd.c, "external" FDs are limited to max_safe_fds/3 FDs.
(Callers can choose to ignore this limit, but of course it's unwise
to do so except for fixed file allocations.) I also reduced the limit
on "allocated" files to max_safe_fds/3 FDs (it had been max_safe_fds/2).
Conceivably a smarter rule could be used here --- but in practice,
on reasonable systems, max_safe_fds should be large enough that this
isn't much of an issue, so KISS for now. To avoid possible regression
in the number of external or allocated files that can be opened,
increase FD_MINFREE and the lower limit on max_files_per_process a
little bit; we now insist that the effective "ulimit -n" be at least 64.
This seems like pretty clearly a bug fix, but in view of the lack of
field complaints, I'll refrain from risking a back-patch.
Discussion: https://postgr.es/m/E1izCmM-0005pV-Co@gemulon.postgresql.org
2020-02-25 01:28:33 +03:00
|
|
|
|
|
|
|
/* OK to make connection */
|
2013-02-21 14:26:23 +04:00
|
|
|
conn = PQconnectdbParams(keywords, values, false);
|
Account explicitly for long-lived FDs that are allocated outside fd.c.
The comments in fd.c have long claimed that all file allocations should
go through that module, but in reality that's not always practical.
fd.c doesn't supply APIs for invoking some FD-producing syscalls like
pipe() or epoll_create(); and the APIs it does supply for non-virtual
FDs are mostly insistent on releasing those FDs at transaction end;
and in some cases the actual open() call is in code that can't be made
to use fd.c, such as libpq.
This has led to a situation where, in a modern server, there are likely
to be seven or so long-lived FDs per backend process that are not known
to fd.c. Since NUM_RESERVED_FDS is only 10, that meant we had *very*
few spare FDs if max_files_per_process is >= the system ulimit and
fd.c had opened all the files it thought it safely could. The
contrib/postgres_fdw regression test, in particular, could easily be
made to fall over by running it under a restrictive ulimit.
To improve matters, invent functions Acquire/Reserve/ReleaseExternalFD
that allow outside callers to tell fd.c that they have or want to allocate
a FD that's not directly managed by fd.c. Add calls to track all the
fixed FDs in a standard backend session, so that we are honestly
guaranteeing that NUM_RESERVED_FDS FDs remain unused below the EMFILE
limit in a backend's idle state. The coding rules for these functions say
that there's no need to call them in code that just allocates one FD over
a fairly short interval; we can dip into NUM_RESERVED_FDS for such cases.
That means that there aren't all that many places where we need to worry.
But postgres_fdw and dblink must use this facility to account for
long-lived FDs consumed by libpq connections. There may be other places
where it's worth doing such accounting, too, but this seems like enough
to solve the immediate problem.
Internally to fd.c, "external" FDs are limited to max_safe_fds/3 FDs.
(Callers can choose to ignore this limit, but of course it's unwise
to do so except for fixed file allocations.) I also reduced the limit
on "allocated" files to max_safe_fds/3 FDs (it had been max_safe_fds/2).
Conceivably a smarter rule could be used here --- but in practice,
on reasonable systems, max_safe_fds should be large enough that this
isn't much of an issue, so KISS for now. To avoid possible regression
in the number of external or allocated files that can be opened,
increase FD_MINFREE and the lower limit on max_files_per_process a
little bit; we now insist that the effective "ulimit -n" be at least 64.
This seems like pretty clearly a bug fix, but in view of the lack of
field complaints, I'll refrain from risking a back-patch.
Discussion: https://postgr.es/m/E1izCmM-0005pV-Co@gemulon.postgresql.org
2020-02-25 01:28:33 +03:00
|
|
|
|
|
|
|
if (!conn)
|
|
|
|
ReleaseExternalFD(); /* because the PG_CATCH block won't */
|
|
|
|
|
2013-02-21 14:26:23 +04:00
|
|
|
if (!conn || PQstatus(conn) != CONNECTION_OK)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION),
|
|
|
|
errmsg("could not connect to server \"%s\"",
|
|
|
|
server->servername),
|
2017-02-27 16:30:06 +03:00
|
|
|
errdetail_internal("%s", pchomp(PQerrorMessage(conn)))));
|
2013-02-21 14:26:23 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Check that non-superuser has used password to establish connection;
|
|
|
|
* otherwise, he's piggybacking on the postgres server's user
|
2019-12-20 08:53:34 +03:00
|
|
|
* identity. See also dblink_security_check() in contrib/dblink and
|
|
|
|
* check_conn_params.
|
2013-02-21 14:26:23 +04:00
|
|
|
*/
|
2019-12-20 08:53:34 +03:00
|
|
|
if (!superuser_arg(user->userid) && UserMappingPasswordRequired(user) &&
|
|
|
|
!PQconnectionUsedPassword(conn))
|
2013-02-21 14:26:23 +04:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_S_R_E_PROHIBITED_SQL_STATEMENT_ATTEMPTED),
|
|
|
|
errmsg("password is required"),
|
|
|
|
errdetail("Non-superuser cannot connect if the server does not request a password."),
|
2019-12-20 08:53:34 +03:00
|
|
|
errhint("Target server's authentication method must be changed or password_required=false set in the user mapping attributes.")));
|
2013-02-21 14:26:23 +04:00
|
|
|
|
2013-02-22 15:03:46 +04:00
|
|
|
/* Prepare new session for use */
|
|
|
|
configure_remote_session(conn);
|
|
|
|
|
2021-12-24 10:55:11 +03:00
|
|
|
if (appname != NULL)
|
|
|
|
pfree(appname);
|
2013-02-21 14:26:23 +04:00
|
|
|
pfree(keywords);
|
|
|
|
pfree(values);
|
|
|
|
}
|
|
|
|
PG_CATCH();
|
|
|
|
{
|
|
|
|
/* Release PGconn data structure if we managed to create one */
|
|
|
|
if (conn)
|
Account explicitly for long-lived FDs that are allocated outside fd.c.
The comments in fd.c have long claimed that all file allocations should
go through that module, but in reality that's not always practical.
fd.c doesn't supply APIs for invoking some FD-producing syscalls like
pipe() or epoll_create(); and the APIs it does supply for non-virtual
FDs are mostly insistent on releasing those FDs at transaction end;
and in some cases the actual open() call is in code that can't be made
to use fd.c, such as libpq.
This has led to a situation where, in a modern server, there are likely
to be seven or so long-lived FDs per backend process that are not known
to fd.c. Since NUM_RESERVED_FDS is only 10, that meant we had *very*
few spare FDs if max_files_per_process is >= the system ulimit and
fd.c had opened all the files it thought it safely could. The
contrib/postgres_fdw regression test, in particular, could easily be
made to fall over by running it under a restrictive ulimit.
To improve matters, invent functions Acquire/Reserve/ReleaseExternalFD
that allow outside callers to tell fd.c that they have or want to allocate
a FD that's not directly managed by fd.c. Add calls to track all the
fixed FDs in a standard backend session, so that we are honestly
guaranteeing that NUM_RESERVED_FDS FDs remain unused below the EMFILE
limit in a backend's idle state. The coding rules for these functions say
that there's no need to call them in code that just allocates one FD over
a fairly short interval; we can dip into NUM_RESERVED_FDS for such cases.
That means that there aren't all that many places where we need to worry.
But postgres_fdw and dblink must use this facility to account for
long-lived FDs consumed by libpq connections. There may be other places
where it's worth doing such accounting, too, but this seems like enough
to solve the immediate problem.
Internally to fd.c, "external" FDs are limited to max_safe_fds/3 FDs.
(Callers can choose to ignore this limit, but of course it's unwise
to do so except for fixed file allocations.) I also reduced the limit
on "allocated" files to max_safe_fds/3 FDs (it had been max_safe_fds/2).
Conceivably a smarter rule could be used here --- but in practice,
on reasonable systems, max_safe_fds should be large enough that this
isn't much of an issue, so KISS for now. To avoid possible regression
in the number of external or allocated files that can be opened,
increase FD_MINFREE and the lower limit on max_files_per_process a
little bit; we now insist that the effective "ulimit -n" be at least 64.
This seems like pretty clearly a bug fix, but in view of the lack of
field complaints, I'll refrain from risking a back-patch.
Discussion: https://postgr.es/m/E1izCmM-0005pV-Co@gemulon.postgresql.org
2020-02-25 01:28:33 +03:00
|
|
|
{
|
2013-02-21 14:26:23 +04:00
|
|
|
PQfinish(conn);
|
Account explicitly for long-lived FDs that are allocated outside fd.c.
The comments in fd.c have long claimed that all file allocations should
go through that module, but in reality that's not always practical.
fd.c doesn't supply APIs for invoking some FD-producing syscalls like
pipe() or epoll_create(); and the APIs it does supply for non-virtual
FDs are mostly insistent on releasing those FDs at transaction end;
and in some cases the actual open() call is in code that can't be made
to use fd.c, such as libpq.
This has led to a situation where, in a modern server, there are likely
to be seven or so long-lived FDs per backend process that are not known
to fd.c. Since NUM_RESERVED_FDS is only 10, that meant we had *very*
few spare FDs if max_files_per_process is >= the system ulimit and
fd.c had opened all the files it thought it safely could. The
contrib/postgres_fdw regression test, in particular, could easily be
made to fall over by running it under a restrictive ulimit.
To improve matters, invent functions Acquire/Reserve/ReleaseExternalFD
that allow outside callers to tell fd.c that they have or want to allocate
a FD that's not directly managed by fd.c. Add calls to track all the
fixed FDs in a standard backend session, so that we are honestly
guaranteeing that NUM_RESERVED_FDS FDs remain unused below the EMFILE
limit in a backend's idle state. The coding rules for these functions say
that there's no need to call them in code that just allocates one FD over
a fairly short interval; we can dip into NUM_RESERVED_FDS for such cases.
That means that there aren't all that many places where we need to worry.
But postgres_fdw and dblink must use this facility to account for
long-lived FDs consumed by libpq connections. There may be other places
where it's worth doing such accounting, too, but this seems like enough
to solve the immediate problem.
Internally to fd.c, "external" FDs are limited to max_safe_fds/3 FDs.
(Callers can choose to ignore this limit, but of course it's unwise
to do so except for fixed file allocations.) I also reduced the limit
on "allocated" files to max_safe_fds/3 FDs (it had been max_safe_fds/2).
Conceivably a smarter rule could be used here --- but in practice,
on reasonable systems, max_safe_fds should be large enough that this
isn't much of an issue, so KISS for now. To avoid possible regression
in the number of external or allocated files that can be opened,
increase FD_MINFREE and the lower limit on max_files_per_process a
little bit; we now insist that the effective "ulimit -n" be at least 64.
This seems like pretty clearly a bug fix, but in view of the lack of
field complaints, I'll refrain from risking a back-patch.
Discussion: https://postgr.es/m/E1izCmM-0005pV-Co@gemulon.postgresql.org
2020-02-25 01:28:33 +03:00
|
|
|
ReleaseExternalFD();
|
|
|
|
}
|
2013-02-21 14:26:23 +04:00
|
|
|
PG_RE_THROW();
|
|
|
|
}
|
|
|
|
PG_END_TRY();
|
|
|
|
|
|
|
|
return conn;
|
|
|
|
}
|
|
|
|
|
2017-07-21 19:51:38 +03:00
|
|
|
/*
|
|
|
|
* Disconnect any open connection for a connection cache entry.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
disconnect_pg_server(ConnCacheEntry *entry)
|
|
|
|
{
|
|
|
|
if (entry->conn != NULL)
|
|
|
|
{
|
|
|
|
PQfinish(entry->conn);
|
|
|
|
entry->conn = NULL;
|
Account explicitly for long-lived FDs that are allocated outside fd.c.
The comments in fd.c have long claimed that all file allocations should
go through that module, but in reality that's not always practical.
fd.c doesn't supply APIs for invoking some FD-producing syscalls like
pipe() or epoll_create(); and the APIs it does supply for non-virtual
FDs are mostly insistent on releasing those FDs at transaction end;
and in some cases the actual open() call is in code that can't be made
to use fd.c, such as libpq.
This has led to a situation where, in a modern server, there are likely
to be seven or so long-lived FDs per backend process that are not known
to fd.c. Since NUM_RESERVED_FDS is only 10, that meant we had *very*
few spare FDs if max_files_per_process is >= the system ulimit and
fd.c had opened all the files it thought it safely could. The
contrib/postgres_fdw regression test, in particular, could easily be
made to fall over by running it under a restrictive ulimit.
To improve matters, invent functions Acquire/Reserve/ReleaseExternalFD
that allow outside callers to tell fd.c that they have or want to allocate
a FD that's not directly managed by fd.c. Add calls to track all the
fixed FDs in a standard backend session, so that we are honestly
guaranteeing that NUM_RESERVED_FDS FDs remain unused below the EMFILE
limit in a backend's idle state. The coding rules for these functions say
that there's no need to call them in code that just allocates one FD over
a fairly short interval; we can dip into NUM_RESERVED_FDS for such cases.
That means that there aren't all that many places where we need to worry.
But postgres_fdw and dblink must use this facility to account for
long-lived FDs consumed by libpq connections. There may be other places
where it's worth doing such accounting, too, but this seems like enough
to solve the immediate problem.
Internally to fd.c, "external" FDs are limited to max_safe_fds/3 FDs.
(Callers can choose to ignore this limit, but of course it's unwise
to do so except for fixed file allocations.) I also reduced the limit
on "allocated" files to max_safe_fds/3 FDs (it had been max_safe_fds/2).
Conceivably a smarter rule could be used here --- but in practice,
on reasonable systems, max_safe_fds should be large enough that this
isn't much of an issue, so KISS for now. To avoid possible regression
in the number of external or allocated files that can be opened,
increase FD_MINFREE and the lower limit on max_files_per_process a
little bit; we now insist that the effective "ulimit -n" be at least 64.
This seems like pretty clearly a bug fix, but in view of the lack of
field complaints, I'll refrain from risking a back-patch.
Discussion: https://postgr.es/m/E1izCmM-0005pV-Co@gemulon.postgresql.org
2020-02-25 01:28:33 +03:00
|
|
|
ReleaseExternalFD();
|
2017-07-21 19:51:38 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-12-20 08:53:34 +03:00
|
|
|
/*
|
|
|
|
* Return true if the password_required is defined and false for this user
|
|
|
|
* mapping, otherwise false. The mapping has been pre-validated.
|
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
UserMappingPasswordRequired(UserMapping *user)
|
|
|
|
{
|
|
|
|
ListCell *cell;
|
|
|
|
|
|
|
|
foreach(cell, user->options)
|
|
|
|
{
|
|
|
|
DefElem *def = (DefElem *) lfirst(cell);
|
2020-05-14 20:06:38 +03:00
|
|
|
|
2019-12-20 08:53:34 +03:00
|
|
|
if (strcmp(def->defname, "password_required") == 0)
|
|
|
|
return defGetBoolean(def);
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2013-02-21 14:26:23 +04:00
|
|
|
/*
|
|
|
|
* For non-superusers, insist that the connstr specify a password. This
|
2019-12-20 08:53:34 +03:00
|
|
|
* prevents a password from being picked up from .pgpass, a service file, the
|
|
|
|
* environment, etc. We don't want the postgres user's passwords,
|
|
|
|
* certificates, etc to be accessible to non-superusers. (See also
|
|
|
|
* dblink_connstr_check in contrib/dblink.)
|
2013-02-21 14:26:23 +04:00
|
|
|
*/
|
|
|
|
static void
|
postgres_fdw: Judge password use by run-as user, not session user.
This is a backward incompatibility which should be noted in the
release notes for PostgreSQL 11.
For security reasons, we require that a postgres_fdw foreign table use
password authentication when accessing a remote server, so that an
unprivileged user cannot usurp the server's credentials. Superusers
are exempt from this requirement, because we assume they are entitled
to usurp the server's credentials or, at least, can find some other
way to do it.
But what should happen when the foreign table is accessed by a view
owned by a user different from the session user? Is it the view owner
that must be a superuser in order to avoid the requirement of using a
password, or the session user? Historically it was the latter, but
this requirement makes it the former instead. This allows superusers
to delegate to other users the right to select from a foreign table
that doesn't use password authentication by creating a view over the
foreign table and handing out rights to the view. It is also more
consistent with the idea that access to a view should use the view
owner's privileges rather than the session user's privileges.
The upshot of this change is that a superuser selecting from a view
created by a non-superuser may now get an error complaining that no
password was used, while a non-superuser selecting from a view
created by a superuser will no longer receive such an error.
No documentation changes are present in this patch because the
wording of the documentation already suggests that it works this
way. We should perhaps adjust the documentation in the back-branches,
but that's a task for another patch.
Originally proposed by Jeff Janes, but with different semantics;
adjusted to work like this by me per discussion.
Discussion: http://postgr.es/m/CA+TgmoaY4HsVZJv5SqEjCKLDwtCTSwXzKpRftgj50wmMMBwciA@mail.gmail.com
2017-12-05 19:19:45 +03:00
|
|
|
check_conn_params(const char **keywords, const char **values, UserMapping *user)
|
2013-02-21 14:26:23 +04:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/* no check required if superuser */
|
postgres_fdw: Judge password use by run-as user, not session user.
This is a backward incompatibility which should be noted in the
release notes for PostgreSQL 11.
For security reasons, we require that a postgres_fdw foreign table use
password authentication when accessing a remote server, so that an
unprivileged user cannot usurp the server's credentials. Superusers
are exempt from this requirement, because we assume they are entitled
to usurp the server's credentials or, at least, can find some other
way to do it.
But what should happen when the foreign table is accessed by a view
owned by a user different from the session user? Is it the view owner
that must be a superuser in order to avoid the requirement of using a
password, or the session user? Historically it was the latter, but
this requirement makes it the former instead. This allows superusers
to delegate to other users the right to select from a foreign table
that doesn't use password authentication by creating a view over the
foreign table and handing out rights to the view. It is also more
consistent with the idea that access to a view should use the view
owner's privileges rather than the session user's privileges.
The upshot of this change is that a superuser selecting from a view
created by a non-superuser may now get an error complaining that no
password was used, while a non-superuser selecting from a view
created by a superuser will no longer receive such an error.
No documentation changes are present in this patch because the
wording of the documentation already suggests that it works this
way. We should perhaps adjust the documentation in the back-branches,
but that's a task for another patch.
Originally proposed by Jeff Janes, but with different semantics;
adjusted to work like this by me per discussion.
Discussion: http://postgr.es/m/CA+TgmoaY4HsVZJv5SqEjCKLDwtCTSwXzKpRftgj50wmMMBwciA@mail.gmail.com
2017-12-05 19:19:45 +03:00
|
|
|
if (superuser_arg(user->userid))
|
2013-02-21 14:26:23 +04:00
|
|
|
return;
|
|
|
|
|
|
|
|
/* ok if params contain a non-empty password */
|
|
|
|
for (i = 0; keywords[i] != NULL; i++)
|
|
|
|
{
|
|
|
|
if (strcmp(keywords[i], "password") == 0 && values[i][0] != '\0')
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2019-12-20 08:53:34 +03:00
|
|
|
/* ok if the superuser explicitly said so at user mapping creation time */
|
|
|
|
if (!UserMappingPasswordRequired(user))
|
|
|
|
return;
|
|
|
|
|
2013-02-21 14:26:23 +04:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_S_R_E_PROHIBITED_SQL_STATEMENT_ATTEMPTED),
|
|
|
|
errmsg("password is required"),
|
|
|
|
errdetail("Non-superusers must provide a password in the user mapping.")));
|
|
|
|
}
|
|
|
|
|
2013-02-22 15:03:46 +04:00
|
|
|
/*
|
|
|
|
* Issue SET commands to make sure remote session is configured properly.
|
|
|
|
*
|
|
|
|
* We do this just once at connection, assuming nothing will change the
|
|
|
|
* values later. Since we'll never send volatile function calls to the
|
|
|
|
* remote, there shouldn't be any way to break this assumption from our end.
|
|
|
|
* It's possible to think of ways to break it at the remote end, eg making
|
|
|
|
* a foreign table point to a view that includes a set_config call ---
|
|
|
|
* but once you admit the possibility of a malicious view definition,
|
|
|
|
* there are any number of ways to break things.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
configure_remote_session(PGconn *conn)
|
|
|
|
{
|
2013-03-12 05:31:28 +04:00
|
|
|
int remoteversion = PQserverVersion(conn);
|
2013-02-22 15:03:46 +04:00
|
|
|
|
|
|
|
/* Force the search path to contain only pg_catalog (see deparse.c) */
|
2013-03-12 05:31:28 +04:00
|
|
|
do_sql_command(conn, "SET search_path = pg_catalog");
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set remote timezone; this is basically just cosmetic, since all
|
|
|
|
* transmitted and returned timestamptzs should specify a zone explicitly
|
|
|
|
* anyway. However it makes the regression test outputs more predictable.
|
|
|
|
*
|
|
|
|
* We don't risk setting remote zone equal to ours, since the remote
|
2013-03-23 01:22:31 +04:00
|
|
|
* server might use a different timezone database. Instead, use UTC
|
|
|
|
* (quoted, because very old servers are picky about case).
|
2013-03-12 05:31:28 +04:00
|
|
|
*/
|
2013-03-23 01:22:31 +04:00
|
|
|
do_sql_command(conn, "SET timezone = 'UTC'");
|
2013-03-12 05:31:28 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Set values needed to ensure unambiguous data output from remote. (This
|
|
|
|
* logic should match what pg_dump does. See also set_transmission_modes
|
|
|
|
* in postgres_fdw.c.)
|
|
|
|
*/
|
|
|
|
do_sql_command(conn, "SET datestyle = ISO");
|
|
|
|
if (remoteversion >= 80400)
|
|
|
|
do_sql_command(conn, "SET intervalstyle = postgres");
|
|
|
|
if (remoteversion >= 90000)
|
|
|
|
do_sql_command(conn, "SET extra_float_digits = 3");
|
|
|
|
else
|
|
|
|
do_sql_command(conn, "SET extra_float_digits = 2");
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Convenience subroutine to issue a non-data-returning SQL command to remote
|
|
|
|
*/
|
Allow TRUNCATE command to truncate foreign tables.
This commit introduces new foreign data wrapper API for TRUNCATE.
It extends TRUNCATE command so that it accepts foreign tables as
the targets to truncate and invokes that API. Also it extends postgres_fdw
so that it can issue TRUNCATE command to foreign servers, by adding
new routine for that TRUNCATE API.
The information about options specified in TRUNCATE command, e.g.,
ONLY, CACADE, etc is passed to FDW via API. The list of foreign tables to
truncate is also passed to FDW. FDW truncates the foreign data sources
that the passed foreign tables specify, based on those information.
For example, postgres_fdw constructs TRUNCATE command using them
and issues it to the foreign server.
For performance, TRUNCATE command invokes the FDW routine for
TRUNCATE once per foreign server that foreign tables to truncate belong to.
Author: Kazutaka Onishi, Kohei KaiGai, slightly modified by Fujii Masao
Reviewed-by: Bharath Rupireddy, Michael Paquier, Zhihong Yu, Alvaro Herrera, Stephen Frost, Ashutosh Bapat, Amit Langote, Daniel Gustafsson, Ibrar Ahmed, Fujii Masao
Discussion: https://postgr.es/m/CAOP8fzb_gkReLput7OvOK+8NHgw-RKqNv59vem7=524krQTcWA@mail.gmail.com
Discussion: https://postgr.es/m/CAJuF6cMWDDqU-vn_knZgma+2GMaout68YUgn1uyDnexRhqqM5Q@mail.gmail.com
2021-04-08 14:56:08 +03:00
|
|
|
void
|
2013-03-12 05:31:28 +04:00
|
|
|
do_sql_command(PGconn *conn, const char *sql)
|
|
|
|
{
|
2022-02-24 08:30:00 +03:00
|
|
|
do_sql_command_begin(conn, sql);
|
|
|
|
do_sql_command_end(conn, sql, false);
|
|
|
|
}
|
2013-03-12 05:31:28 +04:00
|
|
|
|
2022-02-24 08:30:00 +03:00
|
|
|
static void
|
|
|
|
do_sql_command_begin(PGconn *conn, const char *sql)
|
|
|
|
{
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
if (!PQsendQuery(conn, sql))
|
|
|
|
pgfdw_report_error(ERROR, NULL, conn, false, sql);
|
2022-02-24 08:30:00 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
do_sql_command_end(PGconn *conn, const char *sql, bool consume_input)
|
|
|
|
{
|
|
|
|
PGresult *res;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If requested, consume whatever data is available from the socket.
|
|
|
|
* (Note that if all data is available, this allows pgfdw_get_result to
|
|
|
|
* call PQgetResult without forcing the overhead of WaitLatchOrSocket,
|
|
|
|
* which would be large compared to the overhead of PQconsumeInput.)
|
|
|
|
*/
|
|
|
|
if (consume_input && !PQconsumeInput(conn))
|
|
|
|
pgfdw_report_error(ERROR, NULL, conn, false, sql);
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
res = pgfdw_get_result(conn, sql);
|
2013-02-22 15:03:46 +04:00
|
|
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
2014-02-04 06:30:02 +04:00
|
|
|
pgfdw_report_error(ERROR, res, conn, true, sql);
|
2013-02-22 15:03:46 +04:00
|
|
|
PQclear(res);
|
|
|
|
}
|
|
|
|
|
2013-02-21 14:26:23 +04:00
|
|
|
/*
|
|
|
|
* Start remote transaction or subtransaction, if needed.
|
|
|
|
*
|
|
|
|
* Note that we always use at least REPEATABLE READ in the remote session.
|
|
|
|
* This is so that, if a query initiates multiple scans of the same or
|
|
|
|
* different foreign tables, we will get snapshot-consistent results from
|
|
|
|
* those scans. A disadvantage is that we can't provide sane emulation of
|
|
|
|
* READ COMMITTED behavior --- it would be nice if we had some other way to
|
|
|
|
* control which remote queries share a snapshot.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
begin_remote_xact(ConnCacheEntry *entry)
|
|
|
|
{
|
|
|
|
int curlevel = GetCurrentTransactionNestLevel();
|
|
|
|
|
|
|
|
/* Start main transaction if we haven't yet */
|
|
|
|
if (entry->xact_depth <= 0)
|
|
|
|
{
|
|
|
|
const char *sql;
|
|
|
|
|
|
|
|
elog(DEBUG3, "starting remote transaction on connection %p",
|
|
|
|
entry->conn);
|
|
|
|
|
2013-02-21 18:28:42 +04:00
|
|
|
if (IsolationIsSerializable())
|
2013-02-21 14:26:23 +04:00
|
|
|
sql = "START TRANSACTION ISOLATION LEVEL SERIALIZABLE";
|
|
|
|
else
|
|
|
|
sql = "START TRANSACTION ISOLATION LEVEL REPEATABLE READ";
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
entry->changing_xact_state = true;
|
2013-03-12 05:31:28 +04:00
|
|
|
do_sql_command(entry->conn, sql);
|
2013-02-21 14:26:23 +04:00
|
|
|
entry->xact_depth = 1;
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
entry->changing_xact_state = false;
|
2013-02-21 14:26:23 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we're in a subtransaction, stack up savepoints to match our level.
|
|
|
|
* This ensures we can rollback just the desired effects when a
|
|
|
|
* subtransaction aborts.
|
|
|
|
*/
|
|
|
|
while (entry->xact_depth < curlevel)
|
|
|
|
{
|
|
|
|
char sql[64];
|
|
|
|
|
|
|
|
snprintf(sql, sizeof(sql), "SAVEPOINT s%d", entry->xact_depth + 1);
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
entry->changing_xact_state = true;
|
2013-03-12 05:31:28 +04:00
|
|
|
do_sql_command(entry->conn, sql);
|
2013-02-21 14:26:23 +04:00
|
|
|
entry->xact_depth++;
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
entry->changing_xact_state = false;
|
2013-02-21 14:26:23 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Release connection reference count created by calling GetConnection.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
ReleaseConnection(PGconn *conn)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Currently, we don't actually track connection references because all
|
|
|
|
* cleanup is managed on a transaction or subtransaction basis instead. So
|
|
|
|
* there's nothing to do here.
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Assign a "unique" number for a cursor.
|
|
|
|
*
|
|
|
|
* These really only need to be unique per connection within a transaction.
|
|
|
|
* For the moment we ignore the per-connection point and assign them across
|
|
|
|
* all connections in the transaction, but we ask for the connection to be
|
|
|
|
* supplied in case we want to refine that.
|
|
|
|
*
|
|
|
|
* Note that even if wraparound happens in a very long transaction, actual
|
|
|
|
* collisions are highly improbable; just be sure to use %u not %d to print.
|
|
|
|
*/
|
|
|
|
unsigned int
|
|
|
|
GetCursorNumber(PGconn *conn)
|
|
|
|
{
|
|
|
|
return ++cursor_number;
|
|
|
|
}
|
|
|
|
|
2013-03-10 22:14:53 +04:00
|
|
|
/*
|
|
|
|
* Assign a "unique" number for a prepared statement.
|
|
|
|
*
|
|
|
|
* This works much like GetCursorNumber, except that we never reset the counter
|
|
|
|
* within a session. That's because we can't be 100% sure we've gotten rid
|
|
|
|
* of all prepared statements on all connections, and it's not really worth
|
|
|
|
* increasing the risk of prepared-statement name collisions by resetting.
|
|
|
|
*/
|
|
|
|
unsigned int
|
|
|
|
GetPrepStmtNumber(PGconn *conn)
|
|
|
|
{
|
|
|
|
return ++prep_stmt_number;
|
|
|
|
}
|
|
|
|
|
2016-04-21 17:46:09 +03:00
|
|
|
/*
|
|
|
|
* Submit a query and wait for the result.
|
|
|
|
*
|
|
|
|
* This function is interruptible by signals.
|
|
|
|
*
|
|
|
|
* Caller is responsible for the error handling on the result.
|
|
|
|
*/
|
|
|
|
PGresult *
|
Add support for asynchronous execution.
This implements asynchronous execution, which runs multiple parts of a
non-parallel-aware Append concurrently rather than serially to improve
performance when possible. Currently, the only node type that can be
run concurrently is a ForeignScan that is an immediate child of such an
Append. In the case where such ForeignScans access data on different
remote servers, this would run those ForeignScans concurrently, and
overlap the remote operations to be performed simultaneously, so it'll
improve the performance especially when the operations involve
time-consuming ones such as remote join and remote aggregation.
We may extend this to other node types such as joins or aggregates over
ForeignScans in the future.
This also adds the support for postgres_fdw, which is enabled by the
table-level/server-level option "async_capable". The default is false.
Robert Haas, Kyotaro Horiguchi, Thomas Munro, and myself. This commit
is mostly based on the patch proposed by Robert Haas, but also uses
stuff from the patch proposed by Kyotaro Horiguchi and from the patch
proposed by Thomas Munro. Reviewed by Kyotaro Horiguchi, Konstantin
Knizhnik, Andrey Lepikhov, Movead Li, Thomas Munro, Justin Pryzby, and
others.
Discussion: https://postgr.es/m/CA%2BTgmoaXQEt4tZ03FtQhnzeDEMzBck%2BLrni0UWHVVgOTnA6C1w%40mail.gmail.com
Discussion: https://postgr.es/m/CA%2BhUKGLBRyu0rHrDCMC4%3DRn3252gogyp1SjOgG8SEKKZv%3DFwfQ%40mail.gmail.com
Discussion: https://postgr.es/m/20200228.170650.667613673625155850.horikyota.ntt%40gmail.com
2021-03-31 12:45:00 +03:00
|
|
|
pgfdw_exec_query(PGconn *conn, const char *query, PgFdwConnState *state)
|
2016-04-21 17:46:09 +03:00
|
|
|
{
|
Add support for asynchronous execution.
This implements asynchronous execution, which runs multiple parts of a
non-parallel-aware Append concurrently rather than serially to improve
performance when possible. Currently, the only node type that can be
run concurrently is a ForeignScan that is an immediate child of such an
Append. In the case where such ForeignScans access data on different
remote servers, this would run those ForeignScans concurrently, and
overlap the remote operations to be performed simultaneously, so it'll
improve the performance especially when the operations involve
time-consuming ones such as remote join and remote aggregation.
We may extend this to other node types such as joins or aggregates over
ForeignScans in the future.
This also adds the support for postgres_fdw, which is enabled by the
table-level/server-level option "async_capable". The default is false.
Robert Haas, Kyotaro Horiguchi, Thomas Munro, and myself. This commit
is mostly based on the patch proposed by Robert Haas, but also uses
stuff from the patch proposed by Kyotaro Horiguchi and from the patch
proposed by Thomas Munro. Reviewed by Kyotaro Horiguchi, Konstantin
Knizhnik, Andrey Lepikhov, Movead Li, Thomas Munro, Justin Pryzby, and
others.
Discussion: https://postgr.es/m/CA%2BTgmoaXQEt4tZ03FtQhnzeDEMzBck%2BLrni0UWHVVgOTnA6C1w%40mail.gmail.com
Discussion: https://postgr.es/m/CA%2BhUKGLBRyu0rHrDCMC4%3DRn3252gogyp1SjOgG8SEKKZv%3DFwfQ%40mail.gmail.com
Discussion: https://postgr.es/m/20200228.170650.667613673625155850.horikyota.ntt%40gmail.com
2021-03-31 12:45:00 +03:00
|
|
|
/* First, process a pending asynchronous request, if any. */
|
|
|
|
if (state && state->pendingAreq)
|
|
|
|
process_pending_request(state->pendingAreq);
|
|
|
|
|
2016-04-21 17:46:09 +03:00
|
|
|
/*
|
|
|
|
* Submit a query. Since we don't use non-blocking mode, this also can
|
|
|
|
* block. But its risk is relatively small, so we ignore that for now.
|
|
|
|
*/
|
|
|
|
if (!PQsendQuery(conn, query))
|
|
|
|
pgfdw_report_error(ERROR, NULL, conn, false, query);
|
|
|
|
|
|
|
|
/* Wait for the result. */
|
|
|
|
return pgfdw_get_result(conn, query);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Wait for the result from a prior asynchronous execution function call.
|
|
|
|
*
|
|
|
|
* This function offers quick responsiveness by checking for any interruptions.
|
|
|
|
*
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
* This function emulates PQexec()'s behavior of returning the last result
|
2016-04-21 17:46:09 +03:00
|
|
|
* when there are many.
|
|
|
|
*
|
|
|
|
* Caller is responsible for the error handling on the result.
|
|
|
|
*/
|
|
|
|
PGresult *
|
|
|
|
pgfdw_get_result(PGconn *conn, const char *query)
|
|
|
|
{
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
PGresult *volatile last_res = NULL;
|
2016-04-21 17:46:09 +03:00
|
|
|
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
/* In what follows, do not leak any PGresults on an error. */
|
|
|
|
PG_TRY();
|
2016-04-21 17:46:09 +03:00
|
|
|
{
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
for (;;)
|
2016-04-21 17:46:09 +03:00
|
|
|
{
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
PGresult *res;
|
2016-04-21 17:46:09 +03:00
|
|
|
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
while (PQisBusy(conn))
|
|
|
|
{
|
|
|
|
int wc;
|
2016-04-21 17:46:09 +03:00
|
|
|
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
/* Sleep until there's something to do */
|
|
|
|
wc = WaitLatchOrSocket(MyLatch,
|
Add WL_EXIT_ON_PM_DEATH pseudo-event.
Users of the WaitEventSet and WaitLatch() APIs can now choose between
asking for WL_POSTMASTER_DEATH and then handling it explicitly, or asking
for WL_EXIT_ON_PM_DEATH to trigger immediate exit on postmaster death.
This reduces code duplication, since almost all callers want the latter.
Repair all code that was previously ignoring postmaster death completely,
or requesting the event but ignoring it, or requesting the event but then
doing an unconditional PostmasterIsAlive() call every time through its
event loop (which is an expensive syscall on platforms for which we don't
have USE_POSTMASTER_DEATH_SIGNAL support).
Assert that callers of WaitLatchXXX() under the postmaster remember to
ask for either WL_POSTMASTER_DEATH or WL_EXIT_ON_PM_DEATH, to prevent
future bugs.
The only process that doesn't handle postmaster death is syslogger. It
waits until all backends holding the write end of the syslog pipe
(including the postmaster) have closed it by exiting, to be sure to
capture any parting messages. By using the WaitEventSet API directly
it avoids the new assertion, and as a by-product it may be slightly
more efficient on platforms that have epoll().
Author: Thomas Munro
Reviewed-by: Kyotaro Horiguchi, Heikki Linnakangas, Tom Lane
Discussion: https://postgr.es/m/CAEepm%3D1TCviRykkUb69ppWLr_V697rzd1j3eZsRMmbXvETfqbQ%40mail.gmail.com,
https://postgr.es/m/CAEepm=2LqHzizbe7muD7-2yHUbTOoF7Q+qkSD5Q41kuhttRTwA@mail.gmail.com
2018-11-23 10:16:41 +03:00
|
|
|
WL_LATCH_SET | WL_SOCKET_READABLE |
|
|
|
|
WL_EXIT_ON_PM_DEATH,
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
PQsocket(conn),
|
|
|
|
-1L, PG_WAIT_EXTENSION);
|
|
|
|
ResetLatch(MyLatch);
|
2016-04-21 17:46:09 +03:00
|
|
|
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
|
|
|
|
/* Data available in socket? */
|
|
|
|
if (wc & WL_SOCKET_READABLE)
|
|
|
|
{
|
|
|
|
if (!PQconsumeInput(conn))
|
|
|
|
pgfdw_report_error(ERROR, NULL, conn, false, query);
|
|
|
|
}
|
2016-04-21 17:46:09 +03:00
|
|
|
}
|
|
|
|
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
res = PQgetResult(conn);
|
|
|
|
if (res == NULL)
|
|
|
|
break; /* query is complete */
|
2016-04-21 17:46:09 +03:00
|
|
|
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
PQclear(last_res);
|
|
|
|
last_res = res;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
PG_CATCH();
|
|
|
|
{
|
2016-04-21 17:46:09 +03:00
|
|
|
PQclear(last_res);
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
PG_RE_THROW();
|
2016-04-21 17:46:09 +03:00
|
|
|
}
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
PG_END_TRY();
|
2016-04-21 17:46:09 +03:00
|
|
|
|
|
|
|
return last_res;
|
|
|
|
}
|
|
|
|
|
2013-02-21 14:26:23 +04:00
|
|
|
/*
|
|
|
|
* Report an error we got from the remote server.
|
|
|
|
*
|
|
|
|
* elevel: error level to use (typically ERROR, but might be less)
|
|
|
|
* res: PGresult containing the error
|
2014-02-04 06:30:02 +04:00
|
|
|
* conn: connection we did the query on
|
2013-02-21 14:26:23 +04:00
|
|
|
* clear: if true, PQclear the result (otherwise caller will handle it)
|
|
|
|
* sql: NULL, or text of remote command we tried to execute
|
2013-03-10 22:14:53 +04:00
|
|
|
*
|
|
|
|
* Note: callers that choose not to throw ERROR for a remote error are
|
|
|
|
* responsible for making sure that the associated ConnCacheEntry gets
|
|
|
|
* marked with have_error = true.
|
2013-02-21 14:26:23 +04:00
|
|
|
*/
|
|
|
|
void
|
2014-02-04 06:30:02 +04:00
|
|
|
pgfdw_report_error(int elevel, PGresult *res, PGconn *conn,
|
|
|
|
bool clear, const char *sql)
|
2013-02-21 14:26:23 +04:00
|
|
|
{
|
|
|
|
/* If requested, PGresult must be released before leaving this function. */
|
|
|
|
PG_TRY();
|
|
|
|
{
|
|
|
|
char *diag_sqlstate = PQresultErrorField(res, PG_DIAG_SQLSTATE);
|
|
|
|
char *message_primary = PQresultErrorField(res, PG_DIAG_MESSAGE_PRIMARY);
|
|
|
|
char *message_detail = PQresultErrorField(res, PG_DIAG_MESSAGE_DETAIL);
|
|
|
|
char *message_hint = PQresultErrorField(res, PG_DIAG_MESSAGE_HINT);
|
|
|
|
char *message_context = PQresultErrorField(res, PG_DIAG_CONTEXT);
|
|
|
|
int sqlstate;
|
|
|
|
|
|
|
|
if (diag_sqlstate)
|
|
|
|
sqlstate = MAKE_SQLSTATE(diag_sqlstate[0],
|
|
|
|
diag_sqlstate[1],
|
|
|
|
diag_sqlstate[2],
|
|
|
|
diag_sqlstate[3],
|
|
|
|
diag_sqlstate[4]);
|
|
|
|
else
|
|
|
|
sqlstate = ERRCODE_CONNECTION_FAILURE;
|
|
|
|
|
2014-02-04 06:30:02 +04:00
|
|
|
/*
|
|
|
|
* If we don't get a message from the PGresult, try the PGconn. This
|
|
|
|
* is needed because for connection-level failures, PQexec may just
|
|
|
|
* return NULL, not a PGresult at all.
|
|
|
|
*/
|
|
|
|
if (message_primary == NULL)
|
2017-02-27 16:30:06 +03:00
|
|
|
message_primary = pchomp(PQerrorMessage(conn));
|
2014-02-04 06:30:02 +04:00
|
|
|
|
2013-02-21 14:26:23 +04:00
|
|
|
ereport(elevel,
|
|
|
|
(errcode(sqlstate),
|
2021-12-03 11:35:29 +03:00
|
|
|
(message_primary != NULL && message_primary[0] != '\0') ?
|
|
|
|
errmsg_internal("%s", message_primary) :
|
2016-12-22 02:47:54 +03:00
|
|
|
errmsg("could not obtain message string for remote error"),
|
2013-02-21 14:26:23 +04:00
|
|
|
message_detail ? errdetail_internal("%s", message_detail) : 0,
|
|
|
|
message_hint ? errhint("%s", message_hint) : 0,
|
|
|
|
message_context ? errcontext("%s", message_context) : 0,
|
2018-03-23 00:33:10 +03:00
|
|
|
sql ? errcontext("remote SQL command: %s", sql) : 0));
|
2013-02-21 14:26:23 +04:00
|
|
|
}
|
2019-11-01 13:09:52 +03:00
|
|
|
PG_FINALLY();
|
2013-02-21 14:26:23 +04:00
|
|
|
{
|
|
|
|
if (clear)
|
|
|
|
PQclear(res);
|
|
|
|
}
|
|
|
|
PG_END_TRY();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* pgfdw_xact_callback --- cleanup at main-transaction end.
|
In security-restricted operations, block enqueue of at-commit user code.
Specifically, this blocks DECLARE ... WITH HOLD and firing of deferred
triggers within index expressions and materialized view queries. An
attacker having permission to create non-temp objects in at least one
schema could execute arbitrary SQL functions under the identity of the
bootstrap superuser. One can work around the vulnerability by disabling
autovacuum and not manually running ANALYZE, CLUSTER, REINDEX, CREATE
INDEX, VACUUM FULL, or REFRESH MATERIALIZED VIEW. (Don't restore from
pg_dump, since it runs some of those commands.) Plain VACUUM (without
FULL) is safe, and all commands are fine when a trusted user owns the
target object. Performance may degrade quickly under this workaround,
however. Back-patch to 9.5 (all supported versions).
Reviewed by Robert Haas. Reported by Etienne Stalmans.
Security: CVE-2020-25695
2020-11-09 18:32:09 +03:00
|
|
|
*
|
|
|
|
* This runs just late enough that it must not enter user-defined code
|
|
|
|
* locally. (Entering such code on the remote side is fine. Its remote
|
|
|
|
* COMMIT TRANSACTION may run deferred triggers.)
|
2013-02-21 14:26:23 +04:00
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pgfdw_xact_callback(XactEvent event, void *arg)
|
|
|
|
{
|
|
|
|
HASH_SEQ_STATUS scan;
|
|
|
|
ConnCacheEntry *entry;
|
2022-02-24 08:30:00 +03:00
|
|
|
List *pending_entries = NIL;
|
2013-02-21 14:26:23 +04:00
|
|
|
|
|
|
|
/* Quick exit if no connections were touched in this transaction. */
|
|
|
|
if (!xact_got_connection)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Scan all connection cache entries to find open remote transactions, and
|
|
|
|
* close them.
|
|
|
|
*/
|
|
|
|
hash_seq_init(&scan, ConnectionHash);
|
|
|
|
while ((entry = (ConnCacheEntry *) hash_seq_search(&scan)))
|
|
|
|
{
|
|
|
|
PGresult *res;
|
|
|
|
|
2014-02-04 06:30:02 +04:00
|
|
|
/* Ignore cache entry if no open connection right now */
|
|
|
|
if (entry->conn == NULL)
|
2013-02-21 14:26:23 +04:00
|
|
|
continue;
|
|
|
|
|
2014-02-04 06:30:02 +04:00
|
|
|
/* If it has an open remote transaction, try to close it */
|
|
|
|
if (entry->xact_depth > 0)
|
2013-02-21 14:26:23 +04:00
|
|
|
{
|
2014-02-04 06:30:02 +04:00
|
|
|
elog(DEBUG3, "closing remote transaction on connection %p",
|
|
|
|
entry->conn);
|
|
|
|
|
|
|
|
switch (event)
|
|
|
|
{
|
Create an infrastructure for parallel computation in PostgreSQL.
This does four basic things. First, it provides convenience routines
to coordinate the startup and shutdown of parallel workers. Second,
it synchronizes various pieces of state (e.g. GUCs, combo CID
mappings, transaction snapshot) from the parallel group leader to the
worker processes. Third, it prohibits various operations that would
result in unsafe changes to that state while parallelism is active.
Finally, it propagates events that would result in an ErrorResponse,
NoticeResponse, or NotifyResponse message being sent to the client
from the parallel workers back to the master, from which they can then
be sent on to the client.
Robert Haas, Amit Kapila, Noah Misch, Rushabh Lathia, Jeevan Chalke.
Suggestions and review from Andres Freund, Heikki Linnakangas, Noah
Misch, Simon Riggs, Euler Taveira, and Jim Nasby.
2015-04-30 22:02:14 +03:00
|
|
|
case XACT_EVENT_PARALLEL_PRE_COMMIT:
|
2014-02-04 06:30:02 +04:00
|
|
|
case XACT_EVENT_PRE_COMMIT:
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If abort cleanup previously failed for this connection,
|
|
|
|
* we can't issue any more commands against it.
|
|
|
|
*/
|
|
|
|
pgfdw_reject_incomplete_xact_state_change(entry);
|
|
|
|
|
2014-02-04 06:30:02 +04:00
|
|
|
/* Commit all remote transactions during pre-commit */
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
entry->changing_xact_state = true;
|
2022-02-24 08:30:00 +03:00
|
|
|
if (entry->parallel_commit)
|
|
|
|
{
|
|
|
|
do_sql_command_begin(entry->conn, "COMMIT TRANSACTION");
|
|
|
|
pending_entries = lappend(pending_entries, entry);
|
|
|
|
continue;
|
|
|
|
}
|
2014-02-04 06:30:02 +04:00
|
|
|
do_sql_command(entry->conn, "COMMIT TRANSACTION");
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
entry->changing_xact_state = false;
|
2014-02-04 06:30:02 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If there were any errors in subtransactions, and we
|
|
|
|
* made prepared statements, do a DEALLOCATE ALL to make
|
|
|
|
* sure we get rid of all prepared statements. This is
|
|
|
|
* annoying and not terribly bulletproof, but it's
|
|
|
|
* probably not worth trying harder.
|
|
|
|
*
|
|
|
|
* DEALLOCATE ALL only exists in 8.3 and later, so this
|
|
|
|
* constrains how old a server postgres_fdw can
|
|
|
|
* communicate with. We intentionally ignore errors in
|
|
|
|
* the DEALLOCATE, so that we can hobble along to some
|
|
|
|
* extent with older servers (leaking prepared statements
|
|
|
|
* as we go; but we don't really support update operations
|
|
|
|
* pre-8.3 anyway).
|
|
|
|
*/
|
2013-03-10 22:14:53 +04:00
|
|
|
if (entry->have_prep_stmt && entry->have_error)
|
|
|
|
{
|
|
|
|
res = PQexec(entry->conn, "DEALLOCATE ALL");
|
|
|
|
PQclear(res);
|
|
|
|
}
|
|
|
|
entry->have_prep_stmt = false;
|
|
|
|
entry->have_error = false;
|
2014-02-04 06:30:02 +04:00
|
|
|
break;
|
|
|
|
case XACT_EVENT_PRE_PREPARE:
|
|
|
|
|
|
|
|
/*
|
2019-11-08 11:00:30 +03:00
|
|
|
* We disallow any remote transactions, since it's not
|
|
|
|
* very reasonable to hold them open until the prepared
|
|
|
|
* transaction is committed. For the moment, throw error
|
|
|
|
* unconditionally; later we might allow read-only cases.
|
|
|
|
* Note that the error will cause us to come right back
|
|
|
|
* here with event == XACT_EVENT_ABORT, so we'll clean up
|
|
|
|
* the connection state at that point.
|
2014-02-04 06:30:02 +04:00
|
|
|
*/
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
2019-11-08 11:00:30 +03:00
|
|
|
errmsg("cannot PREPARE a transaction that has operated on postgres_fdw foreign tables")));
|
2014-02-04 06:30:02 +04:00
|
|
|
break;
|
Create an infrastructure for parallel computation in PostgreSQL.
This does four basic things. First, it provides convenience routines
to coordinate the startup and shutdown of parallel workers. Second,
it synchronizes various pieces of state (e.g. GUCs, combo CID
mappings, transaction snapshot) from the parallel group leader to the
worker processes. Third, it prohibits various operations that would
result in unsafe changes to that state while parallelism is active.
Finally, it propagates events that would result in an ErrorResponse,
NoticeResponse, or NotifyResponse message being sent to the client
from the parallel workers back to the master, from which they can then
be sent on to the client.
Robert Haas, Amit Kapila, Noah Misch, Rushabh Lathia, Jeevan Chalke.
Suggestions and review from Andres Freund, Heikki Linnakangas, Noah
Misch, Simon Riggs, Euler Taveira, and Jim Nasby.
2015-04-30 22:02:14 +03:00
|
|
|
case XACT_EVENT_PARALLEL_COMMIT:
|
2014-02-04 06:30:02 +04:00
|
|
|
case XACT_EVENT_COMMIT:
|
|
|
|
case XACT_EVENT_PREPARE:
|
|
|
|
/* Pre-commit should have closed the open transaction */
|
|
|
|
elog(ERROR, "missed cleaning up connection during pre-commit");
|
|
|
|
break;
|
Create an infrastructure for parallel computation in PostgreSQL.
This does four basic things. First, it provides convenience routines
to coordinate the startup and shutdown of parallel workers. Second,
it synchronizes various pieces of state (e.g. GUCs, combo CID
mappings, transaction snapshot) from the parallel group leader to the
worker processes. Third, it prohibits various operations that would
result in unsafe changes to that state while parallelism is active.
Finally, it propagates events that would result in an ErrorResponse,
NoticeResponse, or NotifyResponse message being sent to the client
from the parallel workers back to the master, from which they can then
be sent on to the client.
Robert Haas, Amit Kapila, Noah Misch, Rushabh Lathia, Jeevan Chalke.
Suggestions and review from Andres Freund, Heikki Linnakangas, Noah
Misch, Simon Riggs, Euler Taveira, and Jim Nasby.
2015-04-30 22:02:14 +03:00
|
|
|
case XACT_EVENT_PARALLEL_ABORT:
|
2014-02-04 06:30:02 +04:00
|
|
|
case XACT_EVENT_ABORT:
|
2022-03-25 09:30:00 +03:00
|
|
|
/* Rollback all remote transactions during abort */
|
|
|
|
pgfdw_abort_cleanup(entry, true);
|
2014-02-04 06:30:02 +04:00
|
|
|
break;
|
|
|
|
}
|
2013-02-21 14:26:23 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Reset state to show we're out of a transaction */
|
2022-02-24 08:30:00 +03:00
|
|
|
pgfdw_reset_xact_state(entry, true);
|
|
|
|
}
|
2013-02-21 14:26:23 +04:00
|
|
|
|
2022-02-24 08:30:00 +03:00
|
|
|
/* If there are any pending connections, finish cleaning them up */
|
|
|
|
if (pending_entries)
|
|
|
|
{
|
|
|
|
Assert(event == XACT_EVENT_PARALLEL_PRE_COMMIT ||
|
|
|
|
event == XACT_EVENT_PRE_COMMIT);
|
|
|
|
pgfdw_finish_pre_commit_cleanup(pending_entries);
|
2013-02-21 14:26:23 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Regardless of the event type, we can now mark ourselves as out of the
|
|
|
|
* transaction. (Note: if we are here during PRE_COMMIT or PRE_PREPARE,
|
|
|
|
* this saves a useless scan of the hashtable during COMMIT or PREPARE.)
|
|
|
|
*/
|
|
|
|
xact_got_connection = false;
|
|
|
|
|
|
|
|
/* Also reset cursor numbering for next transaction */
|
|
|
|
cursor_number = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* pgfdw_subxact_callback --- cleanup at subtransaction end.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pgfdw_subxact_callback(SubXactEvent event, SubTransactionId mySubid,
|
|
|
|
SubTransactionId parentSubid, void *arg)
|
|
|
|
{
|
|
|
|
HASH_SEQ_STATUS scan;
|
|
|
|
ConnCacheEntry *entry;
|
|
|
|
int curlevel;
|
2022-02-24 08:30:00 +03:00
|
|
|
List *pending_entries = NIL;
|
2013-02-21 14:26:23 +04:00
|
|
|
|
|
|
|
/* Nothing to do at subxact start, nor after commit. */
|
|
|
|
if (!(event == SUBXACT_EVENT_PRE_COMMIT_SUB ||
|
|
|
|
event == SUBXACT_EVENT_ABORT_SUB))
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* Quick exit if no connections were touched in this transaction. */
|
|
|
|
if (!xact_got_connection)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Scan all connection cache entries to find open remote subtransactions
|
|
|
|
* of the current level, and close them.
|
|
|
|
*/
|
|
|
|
curlevel = GetCurrentTransactionNestLevel();
|
|
|
|
hash_seq_init(&scan, ConnectionHash);
|
|
|
|
while ((entry = (ConnCacheEntry *) hash_seq_search(&scan)))
|
|
|
|
{
|
|
|
|
char sql[100];
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We only care about connections with open remote subtransactions of
|
|
|
|
* the current level.
|
|
|
|
*/
|
|
|
|
if (entry->conn == NULL || entry->xact_depth < curlevel)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (entry->xact_depth > curlevel)
|
|
|
|
elog(ERROR, "missed cleaning up remote subtransaction at level %d",
|
|
|
|
entry->xact_depth);
|
|
|
|
|
|
|
|
if (event == SUBXACT_EVENT_PRE_COMMIT_SUB)
|
|
|
|
{
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
/*
|
|
|
|
* If abort cleanup previously failed for this connection, we
|
|
|
|
* can't issue any more commands against it.
|
|
|
|
*/
|
|
|
|
pgfdw_reject_incomplete_xact_state_change(entry);
|
|
|
|
|
2013-02-21 14:26:23 +04:00
|
|
|
/* Commit all remote subtransactions during pre-commit */
|
|
|
|
snprintf(sql, sizeof(sql), "RELEASE SAVEPOINT s%d", curlevel);
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
entry->changing_xact_state = true;
|
2022-02-24 08:30:00 +03:00
|
|
|
if (entry->parallel_commit)
|
|
|
|
{
|
|
|
|
do_sql_command_begin(entry->conn, sql);
|
|
|
|
pending_entries = lappend(pending_entries, entry);
|
|
|
|
continue;
|
|
|
|
}
|
2013-03-12 05:31:28 +04:00
|
|
|
do_sql_command(entry->conn, sql);
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
entry->changing_xact_state = false;
|
2013-02-21 14:26:23 +04:00
|
|
|
}
|
2021-09-22 17:47:36 +03:00
|
|
|
else
|
2013-02-21 14:26:23 +04:00
|
|
|
{
|
2021-09-22 17:47:36 +03:00
|
|
|
/* Rollback all remote subtransactions during abort */
|
2022-03-25 09:30:00 +03:00
|
|
|
pgfdw_abort_cleanup(entry, false);
|
2013-02-21 14:26:23 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* OK, we're outta that level of subtransaction */
|
2022-02-24 08:30:00 +03:00
|
|
|
pgfdw_reset_xact_state(entry, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If there are any pending connections, finish cleaning them up */
|
|
|
|
if (pending_entries)
|
|
|
|
{
|
|
|
|
Assert(event == SUBXACT_EVENT_PRE_COMMIT_SUB);
|
|
|
|
pgfdw_finish_pre_subcommit_cleanup(pending_entries, curlevel);
|
2013-02-21 14:26:23 +04:00
|
|
|
}
|
|
|
|
}
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
|
2017-07-21 19:51:38 +03:00
|
|
|
/*
|
|
|
|
* Connection invalidation callback function
|
|
|
|
*
|
|
|
|
* After a change to a pg_foreign_server or pg_user_mapping catalog entry,
|
2020-12-28 13:56:13 +03:00
|
|
|
* close connections depending on that entry immediately if current transaction
|
|
|
|
* has not used those connections yet. Otherwise, mark those connections as
|
|
|
|
* invalid and then make pgfdw_xact_callback() close them at the end of current
|
|
|
|
* transaction, since they cannot be closed in the midst of the transaction
|
|
|
|
* using them. Closed connections will be remade at the next opportunity if
|
|
|
|
* necessary.
|
2017-07-21 19:51:38 +03:00
|
|
|
*
|
|
|
|
* Although most cache invalidation callbacks blow away all the related stuff
|
|
|
|
* regardless of the given hashvalue, connections are expensive enough that
|
|
|
|
* it's worth trying to avoid that.
|
|
|
|
*
|
|
|
|
* NB: We could avoid unnecessary disconnection more strictly by examining
|
|
|
|
* individual option values, but it seems too much effort for the gain.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pgfdw_inval_callback(Datum arg, int cacheid, uint32 hashvalue)
|
|
|
|
{
|
|
|
|
HASH_SEQ_STATUS scan;
|
|
|
|
ConnCacheEntry *entry;
|
|
|
|
|
|
|
|
Assert(cacheid == FOREIGNSERVEROID || cacheid == USERMAPPINGOID);
|
|
|
|
|
|
|
|
/* ConnectionHash must exist already, if we're registered */
|
|
|
|
hash_seq_init(&scan, ConnectionHash);
|
|
|
|
while ((entry = (ConnCacheEntry *) hash_seq_search(&scan)))
|
|
|
|
{
|
|
|
|
/* Ignore invalid entries */
|
|
|
|
if (entry->conn == NULL)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* hashvalue == 0 means a cache reset, must clear all state */
|
|
|
|
if (hashvalue == 0 ||
|
|
|
|
(cacheid == FOREIGNSERVEROID &&
|
|
|
|
entry->server_hashvalue == hashvalue) ||
|
|
|
|
(cacheid == USERMAPPINGOID &&
|
|
|
|
entry->mapping_hashvalue == hashvalue))
|
2020-12-28 13:56:13 +03:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Close the connection immediately if it's not used yet in this
|
|
|
|
* transaction. Otherwise mark it as invalid so that
|
|
|
|
* pgfdw_xact_callback() can close it at the end of this
|
|
|
|
* transaction.
|
|
|
|
*/
|
|
|
|
if (entry->xact_depth == 0)
|
|
|
|
{
|
|
|
|
elog(DEBUG3, "discarding connection %p", entry->conn);
|
|
|
|
disconnect_pg_server(entry);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
entry->invalidated = true;
|
|
|
|
}
|
2017-07-21 19:51:38 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
/*
|
|
|
|
* Raise an error if the given connection cache entry is marked as being
|
|
|
|
* in the middle of an xact state change. This should be called at which no
|
|
|
|
* such change is expected to be in progress; if one is found to be in
|
|
|
|
* progress, it means that we aborted in the middle of a previous state change
|
|
|
|
* and now don't know what the remote transaction state actually is.
|
|
|
|
* Such connections can't safely be further used. Re-establishing the
|
|
|
|
* connection would change the snapshot and roll back any writes already
|
|
|
|
* performed, so that's not an option, either. Thus, we must abort.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pgfdw_reject_incomplete_xact_state_change(ConnCacheEntry *entry)
|
|
|
|
{
|
|
|
|
ForeignServer *server;
|
|
|
|
|
2017-07-21 19:51:38 +03:00
|
|
|
/* nothing to do for inactive entries and entries of sane state */
|
|
|
|
if (entry->conn == NULL || !entry->changing_xact_state)
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
return;
|
|
|
|
|
2017-07-21 19:51:38 +03:00
|
|
|
/* make sure this entry is inactive */
|
|
|
|
disconnect_pg_server(entry);
|
|
|
|
|
|
|
|
/* find server name to be shown in the message below */
|
2021-01-15 04:30:19 +03:00
|
|
|
server = GetForeignServer(entry->serverid);
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CONNECTION_EXCEPTION),
|
|
|
|
errmsg("connection to server \"%s\" was lost",
|
|
|
|
server->servername)));
|
|
|
|
}
|
|
|
|
|
2022-02-24 08:30:00 +03:00
|
|
|
/*
|
|
|
|
* Reset state to show we're out of a (sub)transaction.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pgfdw_reset_xact_state(ConnCacheEntry *entry, bool toplevel)
|
|
|
|
{
|
|
|
|
if (toplevel)
|
|
|
|
{
|
|
|
|
/* Reset state to show we're out of a transaction */
|
|
|
|
entry->xact_depth = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the connection isn't in a good idle state, it is marked as
|
|
|
|
* invalid or keep_connections option of its server is disabled, then
|
|
|
|
* discard it to recover. Next GetConnection will open a new
|
|
|
|
* connection.
|
|
|
|
*/
|
|
|
|
if (PQstatus(entry->conn) != CONNECTION_OK ||
|
|
|
|
PQtransactionStatus(entry->conn) != PQTRANS_IDLE ||
|
|
|
|
entry->changing_xact_state ||
|
|
|
|
entry->invalidated ||
|
|
|
|
!entry->keep_connections)
|
|
|
|
{
|
|
|
|
elog(DEBUG3, "discarding connection %p", entry->conn);
|
|
|
|
disconnect_pg_server(entry);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Reset state to show we're out of a subtransaction */
|
|
|
|
entry->xact_depth--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
/*
|
|
|
|
* Cancel the currently-in-progress query (whose query text we do not have)
|
|
|
|
* and ignore the result. Returns true if we successfully cancel the query
|
|
|
|
* and discard any pending result, and false if not.
|
Add support for asynchronous execution.
This implements asynchronous execution, which runs multiple parts of a
non-parallel-aware Append concurrently rather than serially to improve
performance when possible. Currently, the only node type that can be
run concurrently is a ForeignScan that is an immediate child of such an
Append. In the case where such ForeignScans access data on different
remote servers, this would run those ForeignScans concurrently, and
overlap the remote operations to be performed simultaneously, so it'll
improve the performance especially when the operations involve
time-consuming ones such as remote join and remote aggregation.
We may extend this to other node types such as joins or aggregates over
ForeignScans in the future.
This also adds the support for postgres_fdw, which is enabled by the
table-level/server-level option "async_capable". The default is false.
Robert Haas, Kyotaro Horiguchi, Thomas Munro, and myself. This commit
is mostly based on the patch proposed by Robert Haas, but also uses
stuff from the patch proposed by Kyotaro Horiguchi and from the patch
proposed by Thomas Munro. Reviewed by Kyotaro Horiguchi, Konstantin
Knizhnik, Andrey Lepikhov, Movead Li, Thomas Munro, Justin Pryzby, and
others.
Discussion: https://postgr.es/m/CA%2BTgmoaXQEt4tZ03FtQhnzeDEMzBck%2BLrni0UWHVVgOTnA6C1w%40mail.gmail.com
Discussion: https://postgr.es/m/CA%2BhUKGLBRyu0rHrDCMC4%3DRn3252gogyp1SjOgG8SEKKZv%3DFwfQ%40mail.gmail.com
Discussion: https://postgr.es/m/20200228.170650.667613673625155850.horikyota.ntt%40gmail.com
2021-03-31 12:45:00 +03:00
|
|
|
*
|
2021-10-13 13:00:00 +03:00
|
|
|
* It's not a huge problem if we throw an ERROR here, but if we get into error
|
|
|
|
* recursion trouble, we'll end up slamming the connection shut, which will
|
|
|
|
* necessitate failing the entire toplevel transaction even if subtransactions
|
|
|
|
* were used. Try to use WARNING where we can.
|
|
|
|
*
|
Add support for asynchronous execution.
This implements asynchronous execution, which runs multiple parts of a
non-parallel-aware Append concurrently rather than serially to improve
performance when possible. Currently, the only node type that can be
run concurrently is a ForeignScan that is an immediate child of such an
Append. In the case where such ForeignScans access data on different
remote servers, this would run those ForeignScans concurrently, and
overlap the remote operations to be performed simultaneously, so it'll
improve the performance especially when the operations involve
time-consuming ones such as remote join and remote aggregation.
We may extend this to other node types such as joins or aggregates over
ForeignScans in the future.
This also adds the support for postgres_fdw, which is enabled by the
table-level/server-level option "async_capable". The default is false.
Robert Haas, Kyotaro Horiguchi, Thomas Munro, and myself. This commit
is mostly based on the patch proposed by Robert Haas, but also uses
stuff from the patch proposed by Kyotaro Horiguchi and from the patch
proposed by Thomas Munro. Reviewed by Kyotaro Horiguchi, Konstantin
Knizhnik, Andrey Lepikhov, Movead Li, Thomas Munro, Justin Pryzby, and
others.
Discussion: https://postgr.es/m/CA%2BTgmoaXQEt4tZ03FtQhnzeDEMzBck%2BLrni0UWHVVgOTnA6C1w%40mail.gmail.com
Discussion: https://postgr.es/m/CA%2BhUKGLBRyu0rHrDCMC4%3DRn3252gogyp1SjOgG8SEKKZv%3DFwfQ%40mail.gmail.com
Discussion: https://postgr.es/m/20200228.170650.667613673625155850.horikyota.ntt%40gmail.com
2021-03-31 12:45:00 +03:00
|
|
|
* XXX: if the query was one sent by fetch_more_data_begin(), we could get the
|
|
|
|
* query text from the pendingAreq saved in the per-connection state, then
|
|
|
|
* report the query using it.
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
pgfdw_cancel_query(PGconn *conn)
|
|
|
|
{
|
|
|
|
PGcancel *cancel;
|
|
|
|
char errbuf[256];
|
|
|
|
PGresult *result = NULL;
|
|
|
|
TimestampTz endtime;
|
2021-12-08 17:31:46 +03:00
|
|
|
bool timed_out;
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If it takes too long to cancel the query and discard the result, assume
|
|
|
|
* the connection is dead.
|
|
|
|
*/
|
|
|
|
endtime = TimestampTzPlusMilliseconds(GetCurrentTimestamp(), 30000);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Issue cancel request. Unfortunately, there's no good way to limit the
|
|
|
|
* amount of time that we might block inside PQgetCancel().
|
|
|
|
*/
|
|
|
|
if ((cancel = PQgetCancel(conn)))
|
|
|
|
{
|
|
|
|
if (!PQcancel(cancel, errbuf, sizeof(errbuf)))
|
|
|
|
{
|
|
|
|
ereport(WARNING,
|
|
|
|
(errcode(ERRCODE_CONNECTION_FAILURE),
|
|
|
|
errmsg("could not send cancel request: %s",
|
|
|
|
errbuf)));
|
|
|
|
PQfreeCancel(cancel);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
PQfreeCancel(cancel);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Get and discard the result of the query. */
|
2021-12-08 17:31:46 +03:00
|
|
|
if (pgfdw_get_cleanup_result(conn, endtime, &result, &timed_out))
|
|
|
|
{
|
|
|
|
if (timed_out)
|
|
|
|
ereport(WARNING,
|
|
|
|
(errmsg("could not get result of cancel request due to timeout")));
|
|
|
|
else
|
|
|
|
ereport(WARNING,
|
|
|
|
(errcode(ERRCODE_CONNECTION_FAILURE),
|
|
|
|
errmsg("could not get result of cancel request: %s",
|
|
|
|
pchomp(PQerrorMessage(conn)))));
|
|
|
|
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
return false;
|
2021-12-08 17:31:46 +03:00
|
|
|
}
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
PQclear(result);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Submit a query during (sub)abort cleanup and wait up to 30 seconds for the
|
|
|
|
* result. If the query is executed without error, the return value is true.
|
|
|
|
* If the query is executed successfully but returns an error, the return
|
|
|
|
* value is true if and only if ignore_errors is set. If the query can't be
|
|
|
|
* sent or times out, the return value is false.
|
2021-10-13 13:00:00 +03:00
|
|
|
*
|
|
|
|
* It's not a huge problem if we throw an ERROR here, but if we get into error
|
|
|
|
* recursion trouble, we'll end up slamming the connection shut, which will
|
|
|
|
* necessitate failing the entire toplevel transaction even if subtransactions
|
|
|
|
* were used. Try to use WARNING where we can.
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
pgfdw_exec_cleanup_query(PGconn *conn, const char *query, bool ignore_errors)
|
|
|
|
{
|
|
|
|
PGresult *result = NULL;
|
|
|
|
TimestampTz endtime;
|
2021-12-08 17:31:46 +03:00
|
|
|
bool timed_out;
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If it takes too long to execute a cleanup query, assume the connection
|
|
|
|
* is dead. It's fairly likely that this is why we aborted in the first
|
|
|
|
* place (e.g. statement timeout, user cancel), so the timeout shouldn't
|
|
|
|
* be too long.
|
|
|
|
*/
|
|
|
|
endtime = TimestampTzPlusMilliseconds(GetCurrentTimestamp(), 30000);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Submit a query. Since we don't use non-blocking mode, this also can
|
|
|
|
* block. But its risk is relatively small, so we ignore that for now.
|
|
|
|
*/
|
|
|
|
if (!PQsendQuery(conn, query))
|
|
|
|
{
|
|
|
|
pgfdw_report_error(WARNING, NULL, conn, false, query);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Get the result of the query. */
|
2021-12-08 17:31:46 +03:00
|
|
|
if (pgfdw_get_cleanup_result(conn, endtime, &result, &timed_out))
|
|
|
|
{
|
|
|
|
if (timed_out)
|
|
|
|
ereport(WARNING,
|
|
|
|
(errmsg("could not get query result due to timeout"),
|
|
|
|
query ? errcontext("remote SQL command: %s", query) : 0));
|
|
|
|
else
|
|
|
|
pgfdw_report_error(WARNING, NULL, conn, false, query);
|
|
|
|
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
return false;
|
2021-12-08 17:31:46 +03:00
|
|
|
}
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
|
|
|
|
/* Issue a warning if not successful. */
|
|
|
|
if (PQresultStatus(result) != PGRES_COMMAND_OK)
|
|
|
|
{
|
|
|
|
pgfdw_report_error(WARNING, result, conn, true, query);
|
|
|
|
return ignore_errors;
|
|
|
|
}
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
PQclear(result);
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get, during abort cleanup, the result of a query that is in progress. This
|
|
|
|
* might be a query that is being interrupted by transaction abort, or it might
|
|
|
|
* be a query that was initiated as part of transaction abort to get the remote
|
|
|
|
* side back to the appropriate state.
|
|
|
|
*
|
|
|
|
* endtime is the time at which we should give up and assume the remote
|
2021-12-08 17:31:46 +03:00
|
|
|
* side is dead. Returns true if the timeout expired or connection trouble
|
|
|
|
* occurred, false otherwise. Sets *result except in case of a timeout.
|
|
|
|
* Sets timed_out to true only when the timeout expired.
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
*/
|
|
|
|
static bool
|
2021-12-08 17:31:46 +03:00
|
|
|
pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime, PGresult **result,
|
|
|
|
bool *timed_out)
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
{
|
2021-12-08 17:31:46 +03:00
|
|
|
volatile bool failed = false;
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
PGresult *volatile last_res = NULL;
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
|
2021-12-08 17:31:46 +03:00
|
|
|
*timed_out = false;
|
|
|
|
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
/* In what follows, do not leak any PGresults on an error. */
|
|
|
|
PG_TRY();
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
{
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
for (;;)
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
{
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
PGresult *res;
|
|
|
|
|
|
|
|
while (PQisBusy(conn))
|
|
|
|
{
|
|
|
|
int wc;
|
|
|
|
TimestampTz now = GetCurrentTimestamp();
|
|
|
|
long cur_timeout;
|
|
|
|
|
|
|
|
/* If timeout has expired, give up, else get sleep time. */
|
Fix and simplify some usages of TimestampDifference().
Introduce TimestampDifferenceMilliseconds() to simplify callers
that would rather have the difference in milliseconds, instead of
the select()-oriented seconds-and-microseconds format. This gets
rid of at least one integer division per call, and it eliminates
some apparently-easy-to-mess-up arithmetic.
Two of these call sites were in fact wrong:
* pg_prewarm's autoprewarm_main() forgot to multiply the seconds
by 1000, thus ending up with a delay 1000X shorter than intended.
That doesn't quite make it a busy-wait, but close.
* postgres_fdw's pgfdw_get_cleanup_result() thought it needed to compute
microseconds not milliseconds, thus ending up with a delay 1000X longer
than intended. Somebody along the way had noticed this problem but
misdiagnosed the cause, and imposed an ad-hoc 60-second limit rather
than fixing the units. This was relatively harmless in context, because
we don't care that much about exactly how long this delay is; still,
it's wrong.
There are a few more callers of TimestampDifference() that don't
have a direct need for seconds-and-microseconds, but can't use
TimestampDifferenceMilliseconds() either because they do need
microsecond precision or because they might possibly deal with
intervals long enough to overflow 32-bit milliseconds. It might be
worth inventing another API to improve that, but that seems outside
the scope of this patch; so those callers are untouched here.
Given the fact that we are fixing some bugs, and the likelihood
that future patches might want to back-patch code that uses this
new API, back-patch to all supported branches.
Alexey Kondratov and Tom Lane
Discussion: https://postgr.es/m/3b1c053a21c07c1ed5e00be3b2b855ef@postgrespro.ru
2020-11-11 06:51:18 +03:00
|
|
|
cur_timeout = TimestampDifferenceMilliseconds(now, endtime);
|
|
|
|
if (cur_timeout <= 0)
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
{
|
2021-12-08 17:31:46 +03:00
|
|
|
*timed_out = true;
|
|
|
|
failed = true;
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
goto exit;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Sleep until there's something to do */
|
|
|
|
wc = WaitLatchOrSocket(MyLatch,
|
Add WL_EXIT_ON_PM_DEATH pseudo-event.
Users of the WaitEventSet and WaitLatch() APIs can now choose between
asking for WL_POSTMASTER_DEATH and then handling it explicitly, or asking
for WL_EXIT_ON_PM_DEATH to trigger immediate exit on postmaster death.
This reduces code duplication, since almost all callers want the latter.
Repair all code that was previously ignoring postmaster death completely,
or requesting the event but ignoring it, or requesting the event but then
doing an unconditional PostmasterIsAlive() call every time through its
event loop (which is an expensive syscall on platforms for which we don't
have USE_POSTMASTER_DEATH_SIGNAL support).
Assert that callers of WaitLatchXXX() under the postmaster remember to
ask for either WL_POSTMASTER_DEATH or WL_EXIT_ON_PM_DEATH, to prevent
future bugs.
The only process that doesn't handle postmaster death is syslogger. It
waits until all backends holding the write end of the syslog pipe
(including the postmaster) have closed it by exiting, to be sure to
capture any parting messages. By using the WaitEventSet API directly
it avoids the new assertion, and as a by-product it may be slightly
more efficient on platforms that have epoll().
Author: Thomas Munro
Reviewed-by: Kyotaro Horiguchi, Heikki Linnakangas, Tom Lane
Discussion: https://postgr.es/m/CAEepm%3D1TCviRykkUb69ppWLr_V697rzd1j3eZsRMmbXvETfqbQ%40mail.gmail.com,
https://postgr.es/m/CAEepm=2LqHzizbe7muD7-2yHUbTOoF7Q+qkSD5Q41kuhttRTwA@mail.gmail.com
2018-11-23 10:16:41 +03:00
|
|
|
WL_LATCH_SET | WL_SOCKET_READABLE |
|
|
|
|
WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
PQsocket(conn),
|
|
|
|
cur_timeout, PG_WAIT_EXTENSION);
|
|
|
|
ResetLatch(MyLatch);
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
CHECK_FOR_INTERRUPTS();
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
/* Data available in socket? */
|
|
|
|
if (wc & WL_SOCKET_READABLE)
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
{
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
if (!PQconsumeInput(conn))
|
|
|
|
{
|
2021-12-08 17:31:46 +03:00
|
|
|
/* connection trouble */
|
|
|
|
failed = true;
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
goto exit;
|
|
|
|
}
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
res = PQgetResult(conn);
|
|
|
|
if (res == NULL)
|
|
|
|
break; /* query is complete */
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
PQclear(last_res);
|
|
|
|
last_res = res;
|
|
|
|
}
|
|
|
|
exit: ;
|
|
|
|
}
|
|
|
|
PG_CATCH();
|
|
|
|
{
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
PQclear(last_res);
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
PG_RE_THROW();
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
}
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
PG_END_TRY();
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
|
2021-12-08 17:31:46 +03:00
|
|
|
if (failed)
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 22:03:39 +03:00
|
|
|
PQclear(last_res);
|
|
|
|
else
|
|
|
|
*result = last_res;
|
2021-12-08 17:31:46 +03:00
|
|
|
return failed;
|
postgres_fdw: Allow cancellation of transaction control commands.
Commit f039eaac7131ef2a4cf63a10cf98486f8bcd09d2, later back-patched
with commit 1b812afb0eafe125b820cc3b95e7ca03821aa675, allowed many of
the queries issued by postgres_fdw to fetch remote data to respond to
cancel interrupts in a timely fashion. However, it didn't do anything
about the transaction control commands, which remained
noninterruptible.
Improve the situation by changing do_sql_command() to retrieve query
results using pgfdw_get_result(), which uses the asynchronous
interface to libpq so that it can check for interrupts every time
libpq returns control. Since this might result in a situation
where we can no longer be sure that the remote transaction state
matches the local transaction state, add a facility to force all
levels of the local transaction to abort if we've lost track of
the remote state; without this, an apparently-successful commit of
the local transaction might fail to commit changes made on the
remote side. Also, add a 60-second timeout for queries issue during
transaction abort; if that expires, give up and mark the state of
the connection as unknown. Drop all such connections when we exit
the local transaction. Together, these changes mean that if we're
aborting the local toplevel transaction anyway, we can just drop the
remote connection in lieu of waiting (possibly for a very long time)
for it to complete an abort.
This still leaves quite a bit of room for improvement. PQcancel()
has no asynchronous interface, so if we get stuck sending the cancel
request we'll still hang. Also, PQsetnonblocking() is not used, which
means we could block uninterruptibly when sending a query. There
might be some other optimizations possible as well. Nonetheless,
this allows us to escape a wait for an unresponsive remote server
quickly in many more cases than previously.
Report by Suraj Kharage. Patch by me and Rafia Sabih. Review
and testing by Amit Kapila and Tushar Ahuja.
Discussion: http://postgr.es/m/CAF1DzPU8Kx+fMXEbFoP289xtm3bz3t+ZfxhmKavr98Bh-C0TqQ@mail.gmail.com
2017-06-07 22:14:55 +03:00
|
|
|
}
|
2021-01-18 09:11:08 +03:00
|
|
|
|
2021-09-22 17:47:36 +03:00
|
|
|
/*
|
2022-03-25 09:30:00 +03:00
|
|
|
* Abort remote transaction or subtransaction.
|
2021-09-22 17:47:36 +03:00
|
|
|
*
|
|
|
|
* "toplevel" should be set to true if toplevel (main) transaction is
|
|
|
|
* rollbacked, false otherwise.
|
|
|
|
*
|
|
|
|
* Set entry->changing_xact_state to false on success, true on failure.
|
|
|
|
*/
|
|
|
|
static void
|
2022-03-25 09:30:00 +03:00
|
|
|
pgfdw_abort_cleanup(ConnCacheEntry *entry, bool toplevel)
|
2021-09-22 17:47:36 +03:00
|
|
|
{
|
2022-03-25 09:30:00 +03:00
|
|
|
char sql[100];
|
|
|
|
|
2021-09-22 17:47:36 +03:00
|
|
|
/*
|
|
|
|
* Don't try to clean up the connection if we're already in error
|
|
|
|
* recursion trouble.
|
|
|
|
*/
|
|
|
|
if (in_error_recursion_trouble())
|
|
|
|
entry->changing_xact_state = true;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If connection is already unsalvageable, don't touch it further.
|
|
|
|
*/
|
|
|
|
if (entry->changing_xact_state)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Mark this connection as in the process of changing transaction state.
|
|
|
|
*/
|
|
|
|
entry->changing_xact_state = true;
|
|
|
|
|
|
|
|
/* Assume we might have lost track of prepared statements */
|
|
|
|
entry->have_error = true;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If a command has been submitted to the remote server by using an
|
|
|
|
* asynchronous execution function, the command might not have yet
|
|
|
|
* completed. Check to see if a command is still being processed by the
|
|
|
|
* remote server, and if so, request cancellation of the command.
|
|
|
|
*/
|
|
|
|
if (PQtransactionStatus(entry->conn) == PQTRANS_ACTIVE &&
|
|
|
|
!pgfdw_cancel_query(entry->conn))
|
|
|
|
return; /* Unable to cancel running query */
|
|
|
|
|
2022-03-25 09:30:00 +03:00
|
|
|
if (toplevel)
|
|
|
|
snprintf(sql, sizeof(sql), "ABORT TRANSACTION");
|
|
|
|
else
|
|
|
|
snprintf(sql, sizeof(sql),
|
|
|
|
"ROLLBACK TO SAVEPOINT s%d; RELEASE SAVEPOINT s%d",
|
|
|
|
entry->xact_depth, entry->xact_depth);
|
2021-09-22 17:47:36 +03:00
|
|
|
if (!pgfdw_exec_cleanup_query(entry->conn, sql, false))
|
2022-03-25 09:30:00 +03:00
|
|
|
return; /* Unable to abort remote (sub)transaction */
|
2021-09-22 17:47:36 +03:00
|
|
|
|
|
|
|
if (toplevel)
|
|
|
|
{
|
|
|
|
if (entry->have_prep_stmt && entry->have_error &&
|
|
|
|
!pgfdw_exec_cleanup_query(entry->conn,
|
|
|
|
"DEALLOCATE ALL",
|
|
|
|
true))
|
|
|
|
return; /* Trouble clearing prepared statements */
|
|
|
|
|
|
|
|
entry->have_prep_stmt = false;
|
|
|
|
entry->have_error = false;
|
|
|
|
}
|
|
|
|
|
2022-01-21 11:45:00 +03:00
|
|
|
/*
|
|
|
|
* If pendingAreq of the per-connection state is not NULL, it means that
|
|
|
|
* an asynchronous fetch begun by fetch_more_data_begin() was not done
|
|
|
|
* successfully and thus the per-connection state was not reset in
|
|
|
|
* fetch_more_data(); in that case reset the per-connection state here.
|
|
|
|
*/
|
|
|
|
if (entry->state.pendingAreq)
|
|
|
|
memset(&entry->state, 0, sizeof(entry->state));
|
|
|
|
|
2021-09-22 17:47:36 +03:00
|
|
|
/* Disarm changing_xact_state if it all worked */
|
|
|
|
entry->changing_xact_state = false;
|
|
|
|
}
|
|
|
|
|
2022-02-24 08:30:00 +03:00
|
|
|
/*
|
|
|
|
* Finish pre-commit cleanup of connections on each of which we've sent a
|
|
|
|
* COMMIT command to the remote server.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pgfdw_finish_pre_commit_cleanup(List *pending_entries)
|
|
|
|
{
|
|
|
|
ConnCacheEntry *entry;
|
|
|
|
List *pending_deallocs = NIL;
|
|
|
|
ListCell *lc;
|
|
|
|
|
|
|
|
Assert(pending_entries);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the result of the COMMIT command for each of the pending entries
|
|
|
|
*/
|
|
|
|
foreach(lc, pending_entries)
|
|
|
|
{
|
|
|
|
entry = (ConnCacheEntry *) lfirst(lc);
|
|
|
|
|
|
|
|
Assert(entry->changing_xact_state);
|
|
|
|
/*
|
|
|
|
* We might already have received the result on the socket, so pass
|
|
|
|
* consume_input=true to try to consume it first
|
|
|
|
*/
|
|
|
|
do_sql_command_end(entry->conn, "COMMIT TRANSACTION", true);
|
|
|
|
entry->changing_xact_state = false;
|
|
|
|
|
|
|
|
/* Do a DEALLOCATE ALL in parallel if needed */
|
|
|
|
if (entry->have_prep_stmt && entry->have_error)
|
|
|
|
{
|
|
|
|
/* Ignore errors (see notes in pgfdw_xact_callback) */
|
|
|
|
if (PQsendQuery(entry->conn, "DEALLOCATE ALL"))
|
|
|
|
{
|
|
|
|
pending_deallocs = lappend(pending_deallocs, entry);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
entry->have_prep_stmt = false;
|
|
|
|
entry->have_error = false;
|
|
|
|
|
|
|
|
pgfdw_reset_xact_state(entry, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* No further work if no pending entries */
|
|
|
|
if (!pending_deallocs)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the result of the DEALLOCATE command for each of the pending
|
|
|
|
* entries
|
|
|
|
*/
|
|
|
|
foreach(lc, pending_deallocs)
|
|
|
|
{
|
|
|
|
PGresult *res;
|
|
|
|
|
|
|
|
entry = (ConnCacheEntry *) lfirst(lc);
|
|
|
|
|
|
|
|
/* Ignore errors (see notes in pgfdw_xact_callback) */
|
|
|
|
while ((res = PQgetResult(entry->conn)) != NULL)
|
|
|
|
{
|
|
|
|
PQclear(res);
|
|
|
|
/* Stop if the connection is lost (else we'll loop infinitely) */
|
|
|
|
if (PQstatus(entry->conn) == CONNECTION_BAD)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
entry->have_prep_stmt = false;
|
|
|
|
entry->have_error = false;
|
|
|
|
|
|
|
|
pgfdw_reset_xact_state(entry, true);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Finish pre-subcommit cleanup of connections on each of which we've sent a
|
|
|
|
* RELEASE command to the remote server.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pgfdw_finish_pre_subcommit_cleanup(List *pending_entries, int curlevel)
|
|
|
|
{
|
|
|
|
ConnCacheEntry *entry;
|
|
|
|
char sql[100];
|
|
|
|
ListCell *lc;
|
|
|
|
|
|
|
|
Assert(pending_entries);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the result of the RELEASE command for each of the pending entries
|
|
|
|
*/
|
|
|
|
snprintf(sql, sizeof(sql), "RELEASE SAVEPOINT s%d", curlevel);
|
|
|
|
foreach(lc, pending_entries)
|
|
|
|
{
|
|
|
|
entry = (ConnCacheEntry *) lfirst(lc);
|
|
|
|
|
|
|
|
Assert(entry->changing_xact_state);
|
|
|
|
/*
|
|
|
|
* We might already have received the result on the socket, so pass
|
|
|
|
* consume_input=true to try to consume it first
|
|
|
|
*/
|
|
|
|
do_sql_command_end(entry->conn, sql, true);
|
|
|
|
entry->changing_xact_state = false;
|
|
|
|
|
|
|
|
pgfdw_reset_xact_state(entry, false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-18 09:11:08 +03:00
|
|
|
/*
|
|
|
|
* List active foreign server connections.
|
|
|
|
*
|
|
|
|
* This function takes no input parameter and returns setof record made of
|
|
|
|
* following values:
|
|
|
|
* - server_name - server name of active connection. In case the foreign server
|
|
|
|
* is dropped but still the connection is active, then the server name will
|
|
|
|
* be NULL in output.
|
|
|
|
* - valid - true/false representing whether the connection is valid or not.
|
|
|
|
* Note that the connections can get invalidated in pgfdw_inval_callback.
|
|
|
|
*
|
|
|
|
* No records are returned when there are no cached connections at all.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
postgres_fdw_get_connections(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
#define POSTGRES_FDW_GET_CONNECTIONS_COLS 2
|
|
|
|
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
|
|
|
|
HASH_SEQ_STATUS scan;
|
|
|
|
ConnCacheEntry *entry;
|
|
|
|
|
Simplify SRFs using materialize mode in contrib/ modules
9e98583 introduced a helper to centralize building their needed state
(tuplestore, tuple descriptors, etc.), checking for any errors. This
commit updates all places of contrib/ that can be switched to use
SetSingleFuncCall() as a drop-in replacement, resulting in the removal
of a lot of boilerplate code in all the modules updated by this commit.
Per analysis, some places remain as they are:
- pg_logdir_ls() in adminpack/ uses historically TYPEFUNC_RECORD as
return type, and I suspect that changing it may cause issues at run-time
with some of its past versions, down to 1.0.
- dblink/ uses a wrapper function doing exactly the work of
SetSingleFuncCall(). Here the switch should be possible, but rather
invasive so it does not seem the extra backpatch maintenance cost.
- tablefunc/, similarly, uses multiple helper functions with portions of
SetSingleFuncCall() spread across the code paths of this module.
Author: Melanie Plageman
Discussion: https://postgr.es/m/CAAKRu_bvDPJoL9mH6eYwvBpPtTGQwbDzfJbCM-OjkSZDu5yTPg@mail.gmail.com
2022-03-08 04:12:22 +03:00
|
|
|
SetSingleFuncCall(fcinfo, 0);
|
2021-01-18 09:11:08 +03:00
|
|
|
|
|
|
|
/* If cache doesn't exist, we return no records */
|
|
|
|
if (!ConnectionHash)
|
|
|
|
PG_RETURN_VOID();
|
|
|
|
|
|
|
|
hash_seq_init(&scan, ConnectionHash);
|
|
|
|
while ((entry = (ConnCacheEntry *) hash_seq_search(&scan)))
|
|
|
|
{
|
|
|
|
ForeignServer *server;
|
|
|
|
Datum values[POSTGRES_FDW_GET_CONNECTIONS_COLS];
|
|
|
|
bool nulls[POSTGRES_FDW_GET_CONNECTIONS_COLS];
|
|
|
|
|
|
|
|
/* We only look for open remote connections */
|
|
|
|
if (!entry->conn)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
server = GetForeignServerExtended(entry->serverid, FSV_MISSING_OK);
|
|
|
|
|
|
|
|
MemSet(values, 0, sizeof(values));
|
|
|
|
MemSet(nulls, 0, sizeof(nulls));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The foreign server may have been dropped in current explicit
|
|
|
|
* transaction. It is not possible to drop the server from another
|
|
|
|
* session when the connection associated with it is in use in the
|
|
|
|
* current transaction, if tried so, the drop query in another session
|
|
|
|
* blocks until the current transaction finishes.
|
|
|
|
*
|
|
|
|
* Even though the server is dropped in the current transaction, the
|
|
|
|
* cache can still have associated active connection entry, say we
|
|
|
|
* call such connections dangling. Since we can not fetch the server
|
2021-01-25 21:54:46 +03:00
|
|
|
* name from system catalogs for dangling connections, instead we show
|
|
|
|
* NULL value for server name in output.
|
2021-01-18 09:11:08 +03:00
|
|
|
*
|
|
|
|
* We could have done better by storing the server name in the cache
|
|
|
|
* entry instead of server oid so that it could be used in the output.
|
|
|
|
* But the server name in each cache entry requires 64 bytes of
|
|
|
|
* memory, which is huge, when there are many cached connections and
|
|
|
|
* the use case i.e. dropping the foreign server within the explicit
|
|
|
|
* current transaction seems rare. So, we chose to show NULL value for
|
|
|
|
* server name in output.
|
|
|
|
*
|
|
|
|
* Such dangling connections get closed either in next use or at the
|
|
|
|
* end of current explicit transaction in pgfdw_xact_callback.
|
|
|
|
*/
|
|
|
|
if (!server)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* If the server has been dropped in the current explicit
|
|
|
|
* transaction, then this entry would have been invalidated in
|
2021-01-25 21:54:46 +03:00
|
|
|
* pgfdw_inval_callback at the end of drop server command. Note
|
2021-01-18 09:11:08 +03:00
|
|
|
* that this connection would not have been closed in
|
|
|
|
* pgfdw_inval_callback because it is still being used in the
|
|
|
|
* current explicit transaction. So, assert that here.
|
|
|
|
*/
|
|
|
|
Assert(entry->conn && entry->xact_depth > 0 && entry->invalidated);
|
|
|
|
|
|
|
|
/* Show null, if no server name was found */
|
|
|
|
nulls[0] = true;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
values[0] = CStringGetTextDatum(server->servername);
|
|
|
|
|
|
|
|
values[1] = BoolGetDatum(!entry->invalidated);
|
|
|
|
|
Simplify SRFs using materialize mode in contrib/ modules
9e98583 introduced a helper to centralize building their needed state
(tuplestore, tuple descriptors, etc.), checking for any errors. This
commit updates all places of contrib/ that can be switched to use
SetSingleFuncCall() as a drop-in replacement, resulting in the removal
of a lot of boilerplate code in all the modules updated by this commit.
Per analysis, some places remain as they are:
- pg_logdir_ls() in adminpack/ uses historically TYPEFUNC_RECORD as
return type, and I suspect that changing it may cause issues at run-time
with some of its past versions, down to 1.0.
- dblink/ uses a wrapper function doing exactly the work of
SetSingleFuncCall(). Here the switch should be possible, but rather
invasive so it does not seem the extra backpatch maintenance cost.
- tablefunc/, similarly, uses multiple helper functions with portions of
SetSingleFuncCall() spread across the code paths of this module.
Author: Melanie Plageman
Discussion: https://postgr.es/m/CAAKRu_bvDPJoL9mH6eYwvBpPtTGQwbDzfJbCM-OjkSZDu5yTPg@mail.gmail.com
2022-03-08 04:12:22 +03:00
|
|
|
tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
|
2021-01-18 09:11:08 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
PG_RETURN_VOID();
|
|
|
|
}
|
2021-01-25 21:54:46 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Disconnect the specified cached connections.
|
|
|
|
*
|
|
|
|
* This function discards the open connections that are established by
|
|
|
|
* postgres_fdw from the local session to the foreign server with
|
|
|
|
* the given name. Note that there can be multiple connections to
|
|
|
|
* the given server using different user mappings. If the connections
|
|
|
|
* are used in the current local transaction, they are not disconnected
|
|
|
|
* and warning messages are reported. This function returns true
|
|
|
|
* if it disconnects at least one connection, otherwise false. If no
|
|
|
|
* foreign server with the given name is found, an error is reported.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
postgres_fdw_disconnect(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
ForeignServer *server;
|
|
|
|
char *servername;
|
|
|
|
|
|
|
|
servername = text_to_cstring(PG_GETARG_TEXT_PP(0));
|
|
|
|
server = GetForeignServerByName(servername, false);
|
|
|
|
|
|
|
|
PG_RETURN_BOOL(disconnect_cached_connections(server->serverid));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Disconnect all the cached connections.
|
|
|
|
*
|
|
|
|
* This function discards all the open connections that are established by
|
|
|
|
* postgres_fdw from the local session to the foreign servers.
|
|
|
|
* If the connections are used in the current local transaction, they are
|
|
|
|
* not disconnected and warning messages are reported. This function
|
|
|
|
* returns true if it disconnects at least one connection, otherwise false.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
postgres_fdw_disconnect_all(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
PG_RETURN_BOOL(disconnect_cached_connections(InvalidOid));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Workhorse to disconnect cached connections.
|
|
|
|
*
|
|
|
|
* This function scans all the connection cache entries and disconnects
|
|
|
|
* the open connections whose foreign server OID matches with
|
|
|
|
* the specified one. If InvalidOid is specified, it disconnects all
|
|
|
|
* the cached connections.
|
|
|
|
*
|
|
|
|
* This function emits a warning for each connection that's used in
|
|
|
|
* the current transaction and doesn't close it. It returns true if
|
|
|
|
* it disconnects at least one connection, otherwise false.
|
|
|
|
*
|
|
|
|
* Note that this function disconnects even the connections that are
|
|
|
|
* established by other users in the same local session using different
|
|
|
|
* user mappings. This leads even non-superuser to be able to close
|
|
|
|
* the connections established by superusers in the same local session.
|
|
|
|
*
|
|
|
|
* XXX As of now we don't see any security risk doing this. But we should
|
|
|
|
* set some restrictions on that, for example, prevent non-superuser
|
|
|
|
* from closing the connections established by superusers even
|
|
|
|
* in the same session?
|
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
disconnect_cached_connections(Oid serverid)
|
|
|
|
{
|
|
|
|
HASH_SEQ_STATUS scan;
|
|
|
|
ConnCacheEntry *entry;
|
|
|
|
bool all = !OidIsValid(serverid);
|
|
|
|
bool result = false;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Connection cache hashtable has not been initialized yet in this
|
|
|
|
* session, so return false.
|
|
|
|
*/
|
|
|
|
if (!ConnectionHash)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
hash_seq_init(&scan, ConnectionHash);
|
|
|
|
while ((entry = (ConnCacheEntry *) hash_seq_search(&scan)))
|
|
|
|
{
|
|
|
|
/* Ignore cache entry if no open connection right now. */
|
|
|
|
if (!entry->conn)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (all || entry->serverid == serverid)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Emit a warning because the connection to close is used in the
|
|
|
|
* current transaction and cannot be disconnected right now.
|
|
|
|
*/
|
|
|
|
if (entry->xact_depth > 0)
|
|
|
|
{
|
|
|
|
ForeignServer *server;
|
|
|
|
|
|
|
|
server = GetForeignServerExtended(entry->serverid,
|
|
|
|
FSV_MISSING_OK);
|
|
|
|
|
|
|
|
if (!server)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* If the foreign server was dropped while its connection
|
|
|
|
* was used in the current transaction, the connection
|
|
|
|
* must have been marked as invalid by
|
|
|
|
* pgfdw_inval_callback at the end of DROP SERVER command.
|
|
|
|
*/
|
|
|
|
Assert(entry->invalidated);
|
|
|
|
|
|
|
|
ereport(WARNING,
|
|
|
|
(errmsg("cannot close dropped server connection because it is still in use")));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
ereport(WARNING,
|
|
|
|
(errmsg("cannot close connection for server \"%s\" because it is still in use",
|
|
|
|
server->servername)));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
elog(DEBUG3, "discarding connection %p", entry->conn);
|
|
|
|
disconnect_pg_server(entry);
|
|
|
|
result = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|