Avoid "could not reattach" by providing space for concurrent allocation.
We've long had reports of intermittent "could not reattach to shared memory" errors on Windows. Buildfarm member dory fails that way when PGSharedMemoryReAttach() execution overlaps with creation of a thread for the process's "default thread pool". Fix that by providing a second region to receive asynchronous allocations that would otherwise intrude into UsedShmemSegAddr. In pgwin32_ReserveSharedMemoryRegion(), stop trying to free reservations landing at incorrect addresses; the caller's next step has been to terminate the affected process. Back-patch to 9.4 (all supported versions). Reviewed by Tom Lane. He also did much of the prerequisite research; see commit bcbf2346d69f6006f126044864dd9383d50d87b4. Discussion: https://postgr.es/m/20190402135442.GA1173872@rfd.leadboat.com
This commit is contained in:
parent
68e745ed0d
commit
e45a8ff871
@ -17,6 +17,28 @@
|
|||||||
#include "storage/ipc.h"
|
#include "storage/ipc.h"
|
||||||
#include "storage/pg_shmem.h"
|
#include "storage/pg_shmem.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Early in a process's life, Windows asynchronously creates threads for the
|
||||||
|
* process's "default thread pool"
|
||||||
|
* (https://docs.microsoft.com/en-us/windows/desktop/ProcThread/thread-pools).
|
||||||
|
* Occasionally, thread creation allocates a stack after
|
||||||
|
* PGSharedMemoryReAttach() has released UsedShmemSegAddr and before it has
|
||||||
|
* mapped shared memory at UsedShmemSegAddr. This would cause mapping to fail
|
||||||
|
* if the allocator preferred the just-released region for allocating the new
|
||||||
|
* thread stack. We observed such failures in some Windows Server 2016
|
||||||
|
* configurations. To give the system another region to prefer, reserve and
|
||||||
|
* release an additional, protective region immediately before reserving or
|
||||||
|
* releasing shared memory. The idea is that, if the allocator handed out
|
||||||
|
* REGION1 pages before REGION2 pages at one occasion, it will do so whenever
|
||||||
|
* both regions are free. Windows Server 2016 exhibits that behavior, and a
|
||||||
|
* system behaving differently would have less need to protect
|
||||||
|
* UsedShmemSegAddr. The protective region must be at least large enough for
|
||||||
|
* one thread stack. However, ten times as much is less than 2% of the 32-bit
|
||||||
|
* address space and is negligible relative to the 64-bit address space.
|
||||||
|
*/
|
||||||
|
#define PROTECTIVE_REGION_SIZE (10 * WIN32_STACK_RLIMIT)
|
||||||
|
void *ShmemProtectiveRegion = NULL;
|
||||||
|
|
||||||
HANDLE UsedShmemSegID = INVALID_HANDLE_VALUE;
|
HANDLE UsedShmemSegID = INVALID_HANDLE_VALUE;
|
||||||
void *UsedShmemSegAddr = NULL;
|
void *UsedShmemSegAddr = NULL;
|
||||||
static Size UsedShmemSegSize = 0;
|
static Size UsedShmemSegSize = 0;
|
||||||
@ -192,6 +214,12 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port,
|
|||||||
Size orig_size = size;
|
Size orig_size = size;
|
||||||
DWORD flProtect = PAGE_READWRITE;
|
DWORD flProtect = PAGE_READWRITE;
|
||||||
|
|
||||||
|
ShmemProtectiveRegion = VirtualAlloc(NULL, PROTECTIVE_REGION_SIZE,
|
||||||
|
MEM_RESERVE, PAGE_NOACCESS);
|
||||||
|
if (ShmemProtectiveRegion == NULL)
|
||||||
|
elog(FATAL, "could not reserve memory region: error code %lu",
|
||||||
|
GetLastError());
|
||||||
|
|
||||||
/* Room for a header? */
|
/* Room for a header? */
|
||||||
Assert(size > MAXALIGN(sizeof(PGShmemHeader)));
|
Assert(size > MAXALIGN(sizeof(PGShmemHeader)));
|
||||||
|
|
||||||
@ -370,9 +398,9 @@ retry:
|
|||||||
* an already existing shared memory segment, using the handle inherited from
|
* an already existing shared memory segment, using the handle inherited from
|
||||||
* the postmaster.
|
* the postmaster.
|
||||||
*
|
*
|
||||||
* UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
|
* ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
|
||||||
* routine. The caller must have already restored them to the postmaster's
|
* parameters to this routine. The caller must have already restored them to
|
||||||
* values.
|
* the postmaster's values.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
PGSharedMemoryReAttach(void)
|
PGSharedMemoryReAttach(void)
|
||||||
@ -380,12 +408,16 @@ PGSharedMemoryReAttach(void)
|
|||||||
PGShmemHeader *hdr;
|
PGShmemHeader *hdr;
|
||||||
void *origUsedShmemSegAddr = UsedShmemSegAddr;
|
void *origUsedShmemSegAddr = UsedShmemSegAddr;
|
||||||
|
|
||||||
|
Assert(ShmemProtectiveRegion != NULL);
|
||||||
Assert(UsedShmemSegAddr != NULL);
|
Assert(UsedShmemSegAddr != NULL);
|
||||||
Assert(IsUnderPostmaster);
|
Assert(IsUnderPostmaster);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Release memory region reservation that was made by the postmaster
|
* Release memory region reservations made by the postmaster
|
||||||
*/
|
*/
|
||||||
|
if (VirtualFree(ShmemProtectiveRegion, 0, MEM_RELEASE) == 0)
|
||||||
|
elog(FATAL, "failed to release reserved memory region (addr=%p): error code %lu",
|
||||||
|
ShmemProtectiveRegion, GetLastError());
|
||||||
if (VirtualFree(UsedShmemSegAddr, 0, MEM_RELEASE) == 0)
|
if (VirtualFree(UsedShmemSegAddr, 0, MEM_RELEASE) == 0)
|
||||||
elog(FATAL, "failed to release reserved memory region (addr=%p): error code %lu",
|
elog(FATAL, "failed to release reserved memory region (addr=%p): error code %lu",
|
||||||
UsedShmemSegAddr, GetLastError());
|
UsedShmemSegAddr, GetLastError());
|
||||||
@ -414,13 +446,14 @@ PGSharedMemoryReAttach(void)
|
|||||||
* The child process startup logic might or might not call PGSharedMemoryDetach
|
* The child process startup logic might or might not call PGSharedMemoryDetach
|
||||||
* after this; make sure that it will be a no-op if called.
|
* after this; make sure that it will be a no-op if called.
|
||||||
*
|
*
|
||||||
* UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
|
* ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
|
||||||
* routine. The caller must have already restored them to the postmaster's
|
* parameters to this routine. The caller must have already restored them to
|
||||||
* values.
|
* the postmaster's values.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
PGSharedMemoryNoReAttach(void)
|
PGSharedMemoryNoReAttach(void)
|
||||||
{
|
{
|
||||||
|
Assert(ShmemProtectiveRegion != NULL);
|
||||||
Assert(UsedShmemSegAddr != NULL);
|
Assert(UsedShmemSegAddr != NULL);
|
||||||
Assert(IsUnderPostmaster);
|
Assert(IsUnderPostmaster);
|
||||||
|
|
||||||
@ -447,12 +480,25 @@ PGSharedMemoryNoReAttach(void)
|
|||||||
* Rather, this is for subprocesses that have inherited an attachment and want
|
* Rather, this is for subprocesses that have inherited an attachment and want
|
||||||
* to get rid of it.
|
* to get rid of it.
|
||||||
*
|
*
|
||||||
* UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
|
* ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
|
||||||
* routine.
|
* parameters to this routine.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
PGSharedMemoryDetach(void)
|
PGSharedMemoryDetach(void)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* Releasing the protective region liberates an unimportant quantity of
|
||||||
|
* address space, but be tidy.
|
||||||
|
*/
|
||||||
|
if (ShmemProtectiveRegion != NULL)
|
||||||
|
{
|
||||||
|
if (VirtualFree(ShmemProtectiveRegion, 0, MEM_RELEASE) == 0)
|
||||||
|
elog(LOG, "failed to release reserved memory region (addr=%p): error code %lu",
|
||||||
|
ShmemProtectiveRegion, GetLastError());
|
||||||
|
|
||||||
|
ShmemProtectiveRegion = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
/* Unmap the view, if it's mapped */
|
/* Unmap the view, if it's mapped */
|
||||||
if (UsedShmemSegAddr != NULL)
|
if (UsedShmemSegAddr != NULL)
|
||||||
{
|
{
|
||||||
@ -510,19 +556,22 @@ pgwin32_ReserveSharedMemoryRegion(HANDLE hChild)
|
|||||||
{
|
{
|
||||||
void *address;
|
void *address;
|
||||||
|
|
||||||
|
Assert(ShmemProtectiveRegion != NULL);
|
||||||
Assert(UsedShmemSegAddr != NULL);
|
Assert(UsedShmemSegAddr != NULL);
|
||||||
Assert(UsedShmemSegSize != 0);
|
Assert(UsedShmemSegSize != 0);
|
||||||
|
|
||||||
address = VirtualAllocEx(hChild, UsedShmemSegAddr, UsedShmemSegSize,
|
/* ShmemProtectiveRegion */
|
||||||
MEM_RESERVE, PAGE_READWRITE);
|
address = VirtualAllocEx(hChild, ShmemProtectiveRegion,
|
||||||
|
PROTECTIVE_REGION_SIZE,
|
||||||
|
MEM_RESERVE, PAGE_NOACCESS);
|
||||||
if (address == NULL)
|
if (address == NULL)
|
||||||
{
|
{
|
||||||
/* Don't use FATAL since we're running in the postmaster */
|
/* Don't use FATAL since we're running in the postmaster */
|
||||||
elog(LOG, "could not reserve shared memory region (addr=%p) for child %p: error code %lu",
|
elog(LOG, "could not reserve shared memory region (addr=%p) for child %p: error code %lu",
|
||||||
UsedShmemSegAddr, hChild, GetLastError());
|
ShmemProtectiveRegion, hChild, GetLastError());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (address != UsedShmemSegAddr)
|
if (address != ShmemProtectiveRegion)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Should never happen - in theory if allocation granularity causes
|
* Should never happen - in theory if allocation granularity causes
|
||||||
@ -530,9 +579,24 @@ pgwin32_ReserveSharedMemoryRegion(HANDLE hChild)
|
|||||||
*
|
*
|
||||||
* Don't use FATAL since we're running in the postmaster.
|
* Don't use FATAL since we're running in the postmaster.
|
||||||
*/
|
*/
|
||||||
|
elog(LOG, "reserved shared memory region got incorrect address %p, expected %p",
|
||||||
|
address, ShmemProtectiveRegion);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* UsedShmemSegAddr */
|
||||||
|
address = VirtualAllocEx(hChild, UsedShmemSegAddr, UsedShmemSegSize,
|
||||||
|
MEM_RESERVE, PAGE_READWRITE);
|
||||||
|
if (address == NULL)
|
||||||
|
{
|
||||||
|
elog(LOG, "could not reserve shared memory region (addr=%p) for child %p: error code %lu",
|
||||||
|
UsedShmemSegAddr, hChild, GetLastError());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (address != UsedShmemSegAddr)
|
||||||
|
{
|
||||||
elog(LOG, "reserved shared memory region got incorrect address %p, expected %p",
|
elog(LOG, "reserved shared memory region got incorrect address %p, expected %p",
|
||||||
address, UsedShmemSegAddr);
|
address, UsedShmemSegAddr);
|
||||||
VirtualFreeEx(hChild, address, 0, MEM_RELEASE);
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -493,6 +493,7 @@ typedef struct
|
|||||||
#ifndef WIN32
|
#ifndef WIN32
|
||||||
unsigned long UsedShmemSegID;
|
unsigned long UsedShmemSegID;
|
||||||
#else
|
#else
|
||||||
|
void *ShmemProtectiveRegion;
|
||||||
HANDLE UsedShmemSegID;
|
HANDLE UsedShmemSegID;
|
||||||
#endif
|
#endif
|
||||||
void *UsedShmemSegAddr;
|
void *UsedShmemSegAddr;
|
||||||
@ -5997,6 +5998,9 @@ save_backend_variables(BackendParameters *param, Port *port,
|
|||||||
param->MyCancelKey = MyCancelKey;
|
param->MyCancelKey = MyCancelKey;
|
||||||
param->MyPMChildSlot = MyPMChildSlot;
|
param->MyPMChildSlot = MyPMChildSlot;
|
||||||
|
|
||||||
|
#ifdef WIN32
|
||||||
|
param->ShmemProtectiveRegion = ShmemProtectiveRegion;
|
||||||
|
#endif
|
||||||
param->UsedShmemSegID = UsedShmemSegID;
|
param->UsedShmemSegID = UsedShmemSegID;
|
||||||
param->UsedShmemSegAddr = UsedShmemSegAddr;
|
param->UsedShmemSegAddr = UsedShmemSegAddr;
|
||||||
|
|
||||||
@ -6230,6 +6234,9 @@ restore_backend_variables(BackendParameters *param, Port *port)
|
|||||||
MyCancelKey = param->MyCancelKey;
|
MyCancelKey = param->MyCancelKey;
|
||||||
MyPMChildSlot = param->MyPMChildSlot;
|
MyPMChildSlot = param->MyPMChildSlot;
|
||||||
|
|
||||||
|
#ifdef WIN32
|
||||||
|
ShmemProtectiveRegion = param->ShmemProtectiveRegion;
|
||||||
|
#endif
|
||||||
UsedShmemSegID = param->UsedShmemSegID;
|
UsedShmemSegID = param->UsedShmemSegID;
|
||||||
UsedShmemSegAddr = param->UsedShmemSegAddr;
|
UsedShmemSegAddr = param->UsedShmemSegAddr;
|
||||||
|
|
||||||
|
@ -56,6 +56,7 @@ typedef enum
|
|||||||
extern unsigned long UsedShmemSegID;
|
extern unsigned long UsedShmemSegID;
|
||||||
#else
|
#else
|
||||||
extern HANDLE UsedShmemSegID;
|
extern HANDLE UsedShmemSegID;
|
||||||
|
extern void *ShmemProtectiveRegion;
|
||||||
#endif
|
#endif
|
||||||
extern void *UsedShmemSegAddr;
|
extern void *UsedShmemSegAddr;
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user