Don't leak descriptors into subprograms.

Open long-lived data and WAL file descriptors with O_CLOEXEC.  This flag
was introduced by SUSv4 (POSIX.1-2008), and by now all of our target
Unix systems have it.  Our open() implementation for Windows already had
that behavior, so provide a dummy O_CLOEXEC flag on that platform.

For now, callers of open() and the "thin" wrappers in fd.c that deal in
raw descriptors need to pass in O_CLOEXEC explicitly if desired.  This
commit does that for WAL files, and automatically for everything
accessed via VFDs including SMgrRelation and BufFile.  (With more
discussion we might decide to turn it on automatically for the thin
open()-wrappers too to avoid risk of missing places that need it, but
these are typically used for short-lived descriptors where we don't
expect to fork/exec, and it's remotely possible that extensions could be
using these APIs and passing descriptors to subprograms deliberately, so
that hasn't been done here.)

Do the same for sockets and the postmaster pipe with FD_CLOEXEC.  (Later
commits might use modern interfaces to remove these extra fcntl() calls
and more where possible, but we'll need them as a fallback for a couple
of systems, so do it that way in this initial commit.)

With this change, subprograms executed for archiving, copying etc will
no longer have access to the server's descriptors, other than the ones
that we decide to pass down.

Reviewed-by: Andres Freund <andres@anarazel.de> (earlier version)
Discussion: https://postgr.es/m/CA%2BhUKGKb6FsAdQWcRL35KJsftv%2B9zXqQbzwkfRf1i0J2e57%2BhQ%40mail.gmail.com
This commit is contained in:
Thomas Munro 2023-03-03 10:28:47 +13:00
parent 6b661b01f4
commit 1da569ca1f
5 changed files with 36 additions and 3 deletions

View File

@ -2936,7 +2936,8 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
* Try to use existent file (checkpoint maker may have created it already)
*/
*added = false;
fd = BasicOpenFile(path, O_RDWR | PG_BINARY | get_sync_bit(sync_method));
fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
get_sync_bit(sync_method));
if (fd < 0)
{
if (errno != ENOENT)
@ -3097,7 +3098,8 @@ XLogFileInit(XLogSegNo logsegno, TimeLineID logtli)
return fd;
/* Now open original target segment (might not be file I just made) */
fd = BasicOpenFile(path, O_RDWR | PG_BINARY | get_sync_bit(sync_method));
fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
get_sync_bit(sync_method));
if (fd < 0)
ereport(ERROR,
(errcode_for_file_access(),
@ -3328,7 +3330,8 @@ XLogFileOpen(XLogSegNo segno, TimeLineID tli)
XLogFilePath(path, tli, segno, wal_segment_size);
fd = BasicOpenFile(path, O_RDWR | PG_BINARY | get_sync_bit(sync_method));
fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
get_sync_bit(sync_method));
if (fd < 0)
ereport(PANIC,
(errcode_for_file_access(),

View File

@ -200,6 +200,13 @@ pq_init(void)
(errmsg("could not set socket to nonblocking mode: %m")));
#endif
#ifndef WIN32
/* Don't give the socket to any subprograms we execute. */
if (fcntl(MyProcPort->sock, F_SETFD, FD_CLOEXEC) < 0)
elog(FATAL, "fcntl(F_SETFD) failed on socket: %m");
#endif
FeBeWaitSet = CreateWaitEventSet(TopMemoryContext, FeBeWaitSetNEvents);
socket_pos = AddWaitEventToSet(FeBeWaitSet, WL_SOCKET_WRITEABLE,
MyProcPort->sock, NULL, NULL);

View File

@ -1515,6 +1515,14 @@ PathNameOpenFilePerm(const char *fileName, int fileFlags, mode_t fileMode)
/* Close excess kernel FDs. */
ReleaseLruFiles();
/*
* Descriptors managed by VFDs are implicitly marked O_CLOEXEC. The
* client shouldn't be expected to know which kernel descriptors are
* currently open, so it wouldn't make sense for them to be inherited by
* executed subprograms.
*/
fileFlags |= O_CLOEXEC;
vfdP->fd = BasicOpenFilePerm(fileName, fileFlags, fileMode);
if (vfdP->fd < 0)

View File

@ -163,6 +163,14 @@ InitPostmasterChild(void)
/* Request a signal if the postmaster dies, if possible. */
PostmasterDeathSignalInit();
/* Don't give the pipe to subprograms that we execute. */
#ifndef WIN32
if (fcntl(postmaster_alive_fds[POSTMASTER_FD_WATCH], F_SETFD, FD_CLOEXEC) < 0)
ereport(FATAL,
(errcode_for_socket_access(),
errmsg_internal("could not set postmaster death monitoring pipe to FD_CLOEXEC mode: %m")));
#endif
}
/*

View File

@ -353,6 +353,13 @@ extern int _pglstat64(const char *name, struct stat *buf);
*/
#define O_DSYNC 0x0080
/*
* Our open() replacement does not create inheritable handles, so it is safe to
* ignore O_CLOEXEC. (If we were using Windows' own open(), it might be
* necessary to convert this to _O_NOINHERIT.)
*/
#define O_CLOEXEC 0
/*
* Supplement to <errno.h>.
*