Allow dynamic allocation of shared memory segments.
Patch by myself and Amit Kapila. Design help from Noah Misch. Review by Andres Freund.
This commit is contained in:
parent
f566515192
commit
0ac5e5a7e1
177
configure
vendored
177
configure
vendored
@ -8384,6 +8384,180 @@ if test "$ac_res" != no; then
|
|||||||
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
{ $as_echo "$as_me:$LINENO: checking for library containing shm_open" >&5
|
||||||
|
$as_echo_n "checking for library containing shm_open... " >&6; }
|
||||||
|
if test "${ac_cv_search_shm_open+set}" = set; then
|
||||||
|
$as_echo_n "(cached) " >&6
|
||||||
|
else
|
||||||
|
ac_func_search_save_LIBS=$LIBS
|
||||||
|
cat >conftest.$ac_ext <<_ACEOF
|
||||||
|
/* confdefs.h. */
|
||||||
|
_ACEOF
|
||||||
|
cat confdefs.h >>conftest.$ac_ext
|
||||||
|
cat >>conftest.$ac_ext <<_ACEOF
|
||||||
|
/* end confdefs.h. */
|
||||||
|
|
||||||
|
/* Override any GCC internal prototype to avoid an error.
|
||||||
|
Use char because int might match the return type of a GCC
|
||||||
|
builtin and then its argument prototype would still apply. */
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
char shm_open ();
|
||||||
|
int
|
||||||
|
main ()
|
||||||
|
{
|
||||||
|
return shm_open ();
|
||||||
|
;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
_ACEOF
|
||||||
|
for ac_lib in '' rt; do
|
||||||
|
if test -z "$ac_lib"; then
|
||||||
|
ac_res="none required"
|
||||||
|
else
|
||||||
|
ac_res=-l$ac_lib
|
||||||
|
LIBS="-l$ac_lib $ac_func_search_save_LIBS"
|
||||||
|
fi
|
||||||
|
rm -f conftest.$ac_objext conftest$ac_exeext
|
||||||
|
if { (ac_try="$ac_link"
|
||||||
|
case "(($ac_try" in
|
||||||
|
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
|
||||||
|
*) ac_try_echo=$ac_try;;
|
||||||
|
esac
|
||||||
|
eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
|
||||||
|
$as_echo "$ac_try_echo") >&5
|
||||||
|
(eval "$ac_link") 2>conftest.er1
|
||||||
|
ac_status=$?
|
||||||
|
grep -v '^ *+' conftest.er1 >conftest.err
|
||||||
|
rm -f conftest.er1
|
||||||
|
cat conftest.err >&5
|
||||||
|
$as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
|
||||||
|
(exit $ac_status); } && {
|
||||||
|
test -z "$ac_c_werror_flag" ||
|
||||||
|
test ! -s conftest.err
|
||||||
|
} && test -s conftest$ac_exeext && {
|
||||||
|
test "$cross_compiling" = yes ||
|
||||||
|
$as_test_x conftest$ac_exeext
|
||||||
|
}; then
|
||||||
|
ac_cv_search_shm_open=$ac_res
|
||||||
|
else
|
||||||
|
$as_echo "$as_me: failed program was:" >&5
|
||||||
|
sed 's/^/| /' conftest.$ac_ext >&5
|
||||||
|
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
rm -rf conftest.dSYM
|
||||||
|
rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
|
||||||
|
conftest$ac_exeext
|
||||||
|
if test "${ac_cv_search_shm_open+set}" = set; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
if test "${ac_cv_search_shm_open+set}" = set; then
|
||||||
|
:
|
||||||
|
else
|
||||||
|
ac_cv_search_shm_open=no
|
||||||
|
fi
|
||||||
|
rm conftest.$ac_ext
|
||||||
|
LIBS=$ac_func_search_save_LIBS
|
||||||
|
fi
|
||||||
|
{ $as_echo "$as_me:$LINENO: result: $ac_cv_search_shm_open" >&5
|
||||||
|
$as_echo "$ac_cv_search_shm_open" >&6; }
|
||||||
|
ac_res=$ac_cv_search_shm_open
|
||||||
|
if test "$ac_res" != no; then
|
||||||
|
test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
{ $as_echo "$as_me:$LINENO: checking for library containing shm_unlink" >&5
|
||||||
|
$as_echo_n "checking for library containing shm_unlink... " >&6; }
|
||||||
|
if test "${ac_cv_search_shm_unlink+set}" = set; then
|
||||||
|
$as_echo_n "(cached) " >&6
|
||||||
|
else
|
||||||
|
ac_func_search_save_LIBS=$LIBS
|
||||||
|
cat >conftest.$ac_ext <<_ACEOF
|
||||||
|
/* confdefs.h. */
|
||||||
|
_ACEOF
|
||||||
|
cat confdefs.h >>conftest.$ac_ext
|
||||||
|
cat >>conftest.$ac_ext <<_ACEOF
|
||||||
|
/* end confdefs.h. */
|
||||||
|
|
||||||
|
/* Override any GCC internal prototype to avoid an error.
|
||||||
|
Use char because int might match the return type of a GCC
|
||||||
|
builtin and then its argument prototype would still apply. */
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
#endif
|
||||||
|
char shm_unlink ();
|
||||||
|
int
|
||||||
|
main ()
|
||||||
|
{
|
||||||
|
return shm_unlink ();
|
||||||
|
;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
_ACEOF
|
||||||
|
for ac_lib in '' rt; do
|
||||||
|
if test -z "$ac_lib"; then
|
||||||
|
ac_res="none required"
|
||||||
|
else
|
||||||
|
ac_res=-l$ac_lib
|
||||||
|
LIBS="-l$ac_lib $ac_func_search_save_LIBS"
|
||||||
|
fi
|
||||||
|
rm -f conftest.$ac_objext conftest$ac_exeext
|
||||||
|
if { (ac_try="$ac_link"
|
||||||
|
case "(($ac_try" in
|
||||||
|
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
|
||||||
|
*) ac_try_echo=$ac_try;;
|
||||||
|
esac
|
||||||
|
eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
|
||||||
|
$as_echo "$ac_try_echo") >&5
|
||||||
|
(eval "$ac_link") 2>conftest.er1
|
||||||
|
ac_status=$?
|
||||||
|
grep -v '^ *+' conftest.er1 >conftest.err
|
||||||
|
rm -f conftest.er1
|
||||||
|
cat conftest.err >&5
|
||||||
|
$as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
|
||||||
|
(exit $ac_status); } && {
|
||||||
|
test -z "$ac_c_werror_flag" ||
|
||||||
|
test ! -s conftest.err
|
||||||
|
} && test -s conftest$ac_exeext && {
|
||||||
|
test "$cross_compiling" = yes ||
|
||||||
|
$as_test_x conftest$ac_exeext
|
||||||
|
}; then
|
||||||
|
ac_cv_search_shm_unlink=$ac_res
|
||||||
|
else
|
||||||
|
$as_echo "$as_me: failed program was:" >&5
|
||||||
|
sed 's/^/| /' conftest.$ac_ext >&5
|
||||||
|
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
rm -rf conftest.dSYM
|
||||||
|
rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
|
||||||
|
conftest$ac_exeext
|
||||||
|
if test "${ac_cv_search_shm_unlink+set}" = set; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
if test "${ac_cv_search_shm_unlink+set}" = set; then
|
||||||
|
:
|
||||||
|
else
|
||||||
|
ac_cv_search_shm_unlink=no
|
||||||
|
fi
|
||||||
|
rm conftest.$ac_ext
|
||||||
|
LIBS=$ac_func_search_save_LIBS
|
||||||
|
fi
|
||||||
|
{ $as_echo "$as_me:$LINENO: result: $ac_cv_search_shm_unlink" >&5
|
||||||
|
$as_echo "$ac_cv_search_shm_unlink" >&6; }
|
||||||
|
ac_res=$ac_cv_search_shm_unlink
|
||||||
|
if test "$ac_res" != no; then
|
||||||
|
test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
# Solaris:
|
# Solaris:
|
||||||
{ $as_echo "$as_me:$LINENO: checking for library containing fdatasync" >&5
|
{ $as_echo "$as_me:$LINENO: checking for library containing fdatasync" >&5
|
||||||
$as_echo_n "checking for library containing fdatasync... " >&6; }
|
$as_echo_n "checking for library containing fdatasync... " >&6; }
|
||||||
@ -19763,7 +19937,8 @@ LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat readlink setproctitle setsid sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l
|
|
||||||
|
for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat readlink setproctitle setsid shm_open sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l
|
||||||
do
|
do
|
||||||
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
|
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
|
||||||
{ $as_echo "$as_me:$LINENO: checking for $ac_func" >&5
|
{ $as_echo "$as_me:$LINENO: checking for $ac_func" >&5
|
||||||
|
@ -883,6 +883,8 @@ case $host_os in
|
|||||||
esac
|
esac
|
||||||
AC_SEARCH_LIBS(getopt_long, [getopt gnugetopt])
|
AC_SEARCH_LIBS(getopt_long, [getopt gnugetopt])
|
||||||
AC_SEARCH_LIBS(crypt, crypt)
|
AC_SEARCH_LIBS(crypt, crypt)
|
||||||
|
AC_SEARCH_LIBS(shm_open, rt)
|
||||||
|
AC_SEARCH_LIBS(shm_unlink, rt)
|
||||||
# Solaris:
|
# Solaris:
|
||||||
AC_SEARCH_LIBS(fdatasync, [rt posix4])
|
AC_SEARCH_LIBS(fdatasync, [rt posix4])
|
||||||
# Required for thread_test.c on Solaris 2.5:
|
# Required for thread_test.c on Solaris 2.5:
|
||||||
@ -1230,7 +1232,7 @@ PGAC_FUNC_GETTIMEOFDAY_1ARG
|
|||||||
LIBS_including_readline="$LIBS"
|
LIBS_including_readline="$LIBS"
|
||||||
LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
|
LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
|
||||||
|
|
||||||
AC_CHECK_FUNCS([cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat readlink setproctitle setsid sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l])
|
AC_CHECK_FUNCS([cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat readlink setproctitle setsid shm_open sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l])
|
||||||
|
|
||||||
AC_REPLACE_FUNCS(fseeko)
|
AC_REPLACE_FUNCS(fseeko)
|
||||||
case $host_os in
|
case $host_os in
|
||||||
|
@ -1194,6 +1194,32 @@ include 'filename'
|
|||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry id="guc-dynamic-shared-memory-type" xreflabel="dynamic_shared_memory_type">
|
||||||
|
<term><varname>dynamic_shared_memory_type</varname> (<type>enum</type>)</term>
|
||||||
|
<indexterm>
|
||||||
|
<primary><varname>dynamic_shared_memory_type</> configuration parameter</primary>
|
||||||
|
</indexterm>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Specifies the dynamic shared memory implementation that the server
|
||||||
|
should use. Possible values are <literal>posix</> (for POSIX shared
|
||||||
|
memory allocated using <literal>shm_open</>), <literal>sysv</literal>
|
||||||
|
(for System V shared memory allocated via <literal>shmget</>),
|
||||||
|
<literal>windows</> (for Windows shared memory), <literal>mmap</>
|
||||||
|
(to simulate shared memory using memory-mapped files stored in the
|
||||||
|
data directory), and <literal>none</> (to disable this feature).
|
||||||
|
Not all values are supported on all platforms; the first supported
|
||||||
|
option is the default for that platform. The use of the
|
||||||
|
<literal>mmap</> option, which is not the default on any platform,
|
||||||
|
is generally discouraged because the operating system may write
|
||||||
|
modified pages back to disk repeatedly, increasing system I/O load;
|
||||||
|
however, it may be useful for debugging, when the
|
||||||
|
<literal>pg_dynshmem</> directory is stored on a RAM disk, or when
|
||||||
|
other shared memory facilities are not available.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
</variablelist>
|
</variablelist>
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
|
@ -29,6 +29,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "miscadmin.h"
|
#include "miscadmin.h"
|
||||||
|
#include "portability/mem.h"
|
||||||
#include "storage/ipc.h"
|
#include "storage/ipc.h"
|
||||||
#include "storage/pg_shmem.h"
|
#include "storage/pg_shmem.h"
|
||||||
|
|
||||||
@ -36,31 +37,6 @@
|
|||||||
typedef key_t IpcMemoryKey; /* shared memory key passed to shmget(2) */
|
typedef key_t IpcMemoryKey; /* shared memory key passed to shmget(2) */
|
||||||
typedef int IpcMemoryId; /* shared memory ID returned by shmget(2) */
|
typedef int IpcMemoryId; /* shared memory ID returned by shmget(2) */
|
||||||
|
|
||||||
#define IPCProtection (0600) /* access/modify by user only */
|
|
||||||
|
|
||||||
#ifdef SHM_SHARE_MMU /* use intimate shared memory on Solaris */
|
|
||||||
#define PG_SHMAT_FLAGS SHM_SHARE_MMU
|
|
||||||
#else
|
|
||||||
#define PG_SHMAT_FLAGS 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Linux prefers MAP_ANONYMOUS, but the flag is called MAP_ANON on other systems. */
|
|
||||||
#ifndef MAP_ANONYMOUS
|
|
||||||
#define MAP_ANONYMOUS MAP_ANON
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* BSD-derived systems have MAP_HASSEMAPHORE, but it's not present (or needed) on Linux. */
|
|
||||||
#ifndef MAP_HASSEMAPHORE
|
|
||||||
#define MAP_HASSEMAPHORE 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define PG_MMAP_FLAGS (MAP_SHARED|MAP_ANONYMOUS|MAP_HASSEMAPHORE)
|
|
||||||
|
|
||||||
/* Some really old systems don't define MAP_FAILED. */
|
|
||||||
#ifndef MAP_FAILED
|
|
||||||
#define MAP_FAILED ((void *) -1)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
unsigned long UsedShmemSegID = 0;
|
unsigned long UsedShmemSegID = 0;
|
||||||
void *UsedShmemSegAddr = NULL;
|
void *UsedShmemSegAddr = NULL;
|
||||||
|
@ -15,7 +15,7 @@ override CFLAGS+= -fno-inline
|
|||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
OBJS = ipc.o ipci.o pmsignal.o procarray.o procsignal.o shmem.o shmqueue.o \
|
OBJS = dsm_impl.o dsm.o ipc.o ipci.o pmsignal.o procarray.o procsignal.o \
|
||||||
sinval.o sinvaladt.o standby.o
|
shmem.o shmqueue.o sinval.o sinvaladt.o standby.o
|
||||||
|
|
||||||
include $(top_srcdir)/src/backend/common.mk
|
include $(top_srcdir)/src/backend/common.mk
|
||||||
|
972
src/backend/storage/ipc/dsm.c
Normal file
972
src/backend/storage/ipc/dsm.c
Normal file
@ -0,0 +1,972 @@
|
|||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* dsm.c
|
||||||
|
* manage dynamic shared memory segments
|
||||||
|
*
|
||||||
|
* This file provides a set of services to make programming with dynamic
|
||||||
|
* shared memory segments more convenient. Unlike the low-level
|
||||||
|
* facilities provided by dsm_impl.h and dsm_impl.c, mappings and segments
|
||||||
|
* created using this module will be cleaned up automatically. Mappings
|
||||||
|
* will be removed when the resource owner under which they were created
|
||||||
|
* is cleaned up, unless dsm_keep_mapping() is used, in which case they
|
||||||
|
* have session lifespan. Segments will be removed when there are no
|
||||||
|
* remaining mappings, or at postmaster shutdown in any case. After a
|
||||||
|
* hard postmaster crash, remaining segments will be removed, if they
|
||||||
|
* still exist, at the next postmaster startup.
|
||||||
|
*
|
||||||
|
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
|
||||||
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* IDENTIFICATION
|
||||||
|
* src/backend/storage/ipc/dsm.c
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#ifndef WIN32
|
||||||
|
#include <sys/mman.h>
|
||||||
|
#endif
|
||||||
|
#include <sys/stat.h>
|
||||||
|
|
||||||
|
#include "lib/ilist.h"
|
||||||
|
#include "miscadmin.h"
|
||||||
|
#include "storage/dsm.h"
|
||||||
|
#include "storage/ipc.h"
|
||||||
|
#include "storage/lwlock.h"
|
||||||
|
#include "utils/guc.h"
|
||||||
|
#include "utils/memutils.h"
|
||||||
|
#include "utils/resowner_private.h"
|
||||||
|
|
||||||
|
#define PG_DYNSHMEM_STATE_FILE PG_DYNSHMEM_DIR "/state"
|
||||||
|
#define PG_DYNSHMEM_NEW_STATE_FILE PG_DYNSHMEM_DIR "/state.new"
|
||||||
|
#define PG_DYNSHMEM_STATE_BUFSIZ 512
|
||||||
|
#define PG_DYNSHMEM_CONTROL_MAGIC 0x9a503d32
|
||||||
|
|
||||||
|
/*
|
||||||
|
* There's no point in getting too cheap here, because the minimum allocation
|
||||||
|
* is one OS page, which is probably at least 4KB and could easily be as high
|
||||||
|
* as 64KB. Each currently sizeof(dsm_control_item), currently 8 bytes.
|
||||||
|
*/
|
||||||
|
#define PG_DYNSHMEM_FIXED_SLOTS 64
|
||||||
|
#define PG_DYNSHMEM_SLOTS_PER_BACKEND 2
|
||||||
|
|
||||||
|
#define INVALID_CONTROL_SLOT ((uint32) -1)
|
||||||
|
|
||||||
|
/* Backend-local state for a dynamic shared memory segment. */
|
||||||
|
struct dsm_segment
|
||||||
|
{
|
||||||
|
dlist_node node; /* List link in dsm_segment_list. */
|
||||||
|
ResourceOwner resowner; /* Resource owner. */
|
||||||
|
dsm_handle handle; /* Segment name. */
|
||||||
|
uint32 control_slot; /* Slot in control segment. */
|
||||||
|
void *impl_private; /* Implementation-specific private data. */
|
||||||
|
void *mapped_address; /* Mapping address, or NULL if unmapped. */
|
||||||
|
uint64 mapped_size; /* Size of our mapping. */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Shared-memory state for a dynamic shared memory segment. */
|
||||||
|
typedef struct dsm_control_item
|
||||||
|
{
|
||||||
|
dsm_handle handle;
|
||||||
|
uint32 refcnt; /* 2+ = active, 1 = moribund, 0 = gone */
|
||||||
|
} dsm_control_item;
|
||||||
|
|
||||||
|
/* Layout of the dynamic shared memory control segment. */
|
||||||
|
typedef struct dsm_control_header
|
||||||
|
{
|
||||||
|
uint32 magic;
|
||||||
|
uint32 nitems;
|
||||||
|
uint32 maxitems;
|
||||||
|
dsm_control_item item[FLEXIBLE_ARRAY_MEMBER];
|
||||||
|
} dsm_control_header;
|
||||||
|
|
||||||
|
static void dsm_cleanup_using_control_segment(void);
|
||||||
|
static void dsm_cleanup_for_mmap(void);
|
||||||
|
static bool dsm_read_state_file(dsm_handle *h);
|
||||||
|
static void dsm_write_state_file(dsm_handle h);
|
||||||
|
static void dsm_postmaster_shutdown(int code, Datum arg);
|
||||||
|
static void dsm_backend_shutdown(int code, Datum arg);
|
||||||
|
static dsm_segment *dsm_create_descriptor(void);
|
||||||
|
static bool dsm_control_segment_sane(dsm_control_header *control,
|
||||||
|
uint64 mapped_size);
|
||||||
|
static uint64 dsm_control_bytes_needed(uint32 nitems);
|
||||||
|
|
||||||
|
/* Has this backend initialized the dynamic shared memory system yet? */
|
||||||
|
static bool dsm_init_done = false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* List of dynamic shared memory segments used by this backend.
|
||||||
|
*
|
||||||
|
* At process exit time, we must decrement the reference count of each
|
||||||
|
* segment we have attached; this list makes it possible to find all such
|
||||||
|
* segments.
|
||||||
|
*
|
||||||
|
* This list should always be empty in the postmaster. We could probably
|
||||||
|
* allow the postmaster to map dynamic shared memory segments before it
|
||||||
|
* begins to start child processes, provided that each process adjusted
|
||||||
|
* the reference counts for those segments in the control segment at
|
||||||
|
* startup time, but there's no obvious need for such a facility, which
|
||||||
|
* would also be complex to handle in the EXEC_BACKEND case. Once the
|
||||||
|
* postmaster has begun spawning children, there's an additional problem:
|
||||||
|
* each new mapping would require an update to the control segment,
|
||||||
|
* which requires locking, in which the postmaster must not be involved.
|
||||||
|
*/
|
||||||
|
static dlist_head dsm_segment_list = DLIST_STATIC_INIT(dsm_segment_list);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Control segment information.
|
||||||
|
*
|
||||||
|
* Unlike ordinary shared memory segments, the control segment is not
|
||||||
|
* reference counted; instead, it lasts for the postmaster's entire
|
||||||
|
* life cycle. For simplicity, it doesn't have a dsm_segment object either.
|
||||||
|
*/
|
||||||
|
static dsm_handle dsm_control_handle;
|
||||||
|
static dsm_control_header *dsm_control;
|
||||||
|
static uint64 dsm_control_mapped_size = 0;
|
||||||
|
static void *dsm_control_impl_private = NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Start up the dynamic shared memory system.
|
||||||
|
*
|
||||||
|
* This is called just once during each cluster lifetime, at postmaster
|
||||||
|
* startup time.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
dsm_postmaster_startup(void)
|
||||||
|
{
|
||||||
|
void *dsm_control_address = NULL;
|
||||||
|
uint32 maxitems;
|
||||||
|
uint64 segsize;
|
||||||
|
|
||||||
|
Assert(!IsUnderPostmaster);
|
||||||
|
|
||||||
|
/* If dynamic shared memory is disabled, there's nothing to do. */
|
||||||
|
if (dynamic_shared_memory_type == DSM_IMPL_NONE)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check for, and remove, shared memory segments left behind by a dead
|
||||||
|
* postmaster. This isn't necessary on Windows, which always removes them
|
||||||
|
* when the last reference is gone.
|
||||||
|
*/
|
||||||
|
switch (dynamic_shared_memory_type)
|
||||||
|
{
|
||||||
|
case DSM_IMPL_POSIX:
|
||||||
|
case DSM_IMPL_SYSV:
|
||||||
|
dsm_cleanup_using_control_segment();
|
||||||
|
break;
|
||||||
|
case DSM_IMPL_MMAP:
|
||||||
|
dsm_cleanup_for_mmap();
|
||||||
|
break;
|
||||||
|
case DSM_IMPL_WINDOWS:
|
||||||
|
/* Nothing to do. */
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
elog(ERROR, "unknown dynamic shared memory type: %d",
|
||||||
|
dynamic_shared_memory_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Determine size for new control segment. */
|
||||||
|
maxitems = PG_DYNSHMEM_FIXED_SLOTS
|
||||||
|
+ PG_DYNSHMEM_SLOTS_PER_BACKEND * MaxBackends;
|
||||||
|
elog(DEBUG2, "dynamic shared memory system will support %u segments",
|
||||||
|
maxitems);
|
||||||
|
segsize = dsm_control_bytes_needed(maxitems);
|
||||||
|
|
||||||
|
/* Loop until we find an unused identifier for the new control segment. */
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
Assert(dsm_control_address == NULL);
|
||||||
|
Assert(dsm_control_mapped_size == 0);
|
||||||
|
dsm_control_handle = random();
|
||||||
|
if (dsm_impl_op(DSM_OP_CREATE, dsm_control_handle, segsize,
|
||||||
|
&dsm_control_impl_private, &dsm_control_address,
|
||||||
|
&dsm_control_mapped_size, ERROR))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
dsm_control = dsm_control_address;
|
||||||
|
on_shmem_exit(dsm_postmaster_shutdown, 0);
|
||||||
|
elog(DEBUG2, "created dynamic shared memory control segment %u ("
|
||||||
|
UINT64_FORMAT " bytes)", dsm_control_handle, segsize);
|
||||||
|
dsm_write_state_file(dsm_control_handle);
|
||||||
|
|
||||||
|
/* Initialize control segment. */
|
||||||
|
dsm_control->magic = PG_DYNSHMEM_CONTROL_MAGIC;
|
||||||
|
dsm_control->nitems = 0;
|
||||||
|
dsm_control->maxitems = maxitems;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Determine whether the control segment from the previous postmaster
|
||||||
|
* invocation still exists. If so, remove the dynamic shared memory
|
||||||
|
* segments to which it refers, and then the control segment itself.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
dsm_cleanup_using_control_segment(void)
|
||||||
|
{
|
||||||
|
void *mapped_address = NULL;
|
||||||
|
void *junk_mapped_address = NULL;
|
||||||
|
void *impl_private = NULL;
|
||||||
|
void *junk_impl_private = NULL;
|
||||||
|
uint64 mapped_size = 0;
|
||||||
|
uint64 junk_mapped_size = 0;
|
||||||
|
uint32 nitems;
|
||||||
|
uint32 i;
|
||||||
|
dsm_handle old_control_handle;
|
||||||
|
dsm_control_header *old_control;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Read the state file. If it doesn't exist or is empty, there's nothing
|
||||||
|
* more to do.
|
||||||
|
*/
|
||||||
|
if (!dsm_read_state_file(&old_control_handle))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Try to attach the segment. If this fails, it probably just means that
|
||||||
|
* the operating system has been rebooted and the segment no longer exists,
|
||||||
|
* or an unrelated proces has used the same shm ID. So just fall out
|
||||||
|
* quietly.
|
||||||
|
*/
|
||||||
|
if (!dsm_impl_op(DSM_OP_ATTACH, old_control_handle, 0, &impl_private,
|
||||||
|
&mapped_address, &mapped_size, DEBUG1))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We've managed to reattach it, but the contents might not be sane.
|
||||||
|
* If they aren't, we disregard the segment after all.
|
||||||
|
*/
|
||||||
|
old_control = (dsm_control_header *) mapped_address;
|
||||||
|
if (!dsm_control_segment_sane(old_control, mapped_size))
|
||||||
|
{
|
||||||
|
dsm_impl_op(DSM_OP_DETACH, old_control_handle, 0, &impl_private,
|
||||||
|
&mapped_address, &mapped_size, LOG);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* OK, the control segment looks basically valid, so we can get use
|
||||||
|
* it to get a list of segments that need to be removed.
|
||||||
|
*/
|
||||||
|
nitems = old_control->nitems;
|
||||||
|
for (i = 0; i < nitems; ++i)
|
||||||
|
{
|
||||||
|
dsm_handle handle;
|
||||||
|
uint32 refcnt;
|
||||||
|
|
||||||
|
/* If the reference count is 0, the slot is actually unused. */
|
||||||
|
refcnt = old_control->item[i].refcnt;
|
||||||
|
if (refcnt == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* Log debugging information. */
|
||||||
|
handle = old_control->item[i].handle;
|
||||||
|
elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u (reference count %u)",
|
||||||
|
handle, refcnt);
|
||||||
|
|
||||||
|
/* Destroy the referenced segment. */
|
||||||
|
dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
|
||||||
|
&junk_mapped_address, &junk_mapped_size, LOG);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Destroy the old control segment, too. */
|
||||||
|
elog(DEBUG2,
|
||||||
|
"cleaning up dynamic shared memory control segment with ID %u",
|
||||||
|
old_control_handle);
|
||||||
|
dsm_impl_op(DSM_OP_DESTROY, old_control_handle, 0, &impl_private,
|
||||||
|
&mapped_address, &mapped_size, LOG);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When we're using the mmap shared memory implementation, "shared memory"
|
||||||
|
* segments might even manage to survive an operating system reboot.
|
||||||
|
* But there's no guarantee as to exactly what will survive: some segments
|
||||||
|
* may survive, and others may not, and the contents of some may be out
|
||||||
|
* of date. In particular, the control segment may be out of date, so we
|
||||||
|
* can't rely on it to figure out what to remove. However, since we know
|
||||||
|
* what directory contains the files we used as shared memory, we can simply
|
||||||
|
* scan the directory and blow everything away that shouldn't be there.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
dsm_cleanup_for_mmap(void)
|
||||||
|
{
|
||||||
|
DIR *dir;
|
||||||
|
struct dirent *dent;
|
||||||
|
|
||||||
|
/* Open the directory; can't use AllocateDir in postmaster. */
|
||||||
|
if ((dir = opendir(PG_DYNSHMEM_DIR)) == NULL)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode_for_file_access(),
|
||||||
|
errmsg("could not open directory \"%s\": %m",
|
||||||
|
PG_DYNSHMEM_DIR)));
|
||||||
|
|
||||||
|
/* Scan for something with a name of the correct format. */
|
||||||
|
while ((dent = readdir(dir)) != NULL)
|
||||||
|
{
|
||||||
|
if (strncmp(dent->d_name, PG_DYNSHMEM_MMAP_FILE_PREFIX,
|
||||||
|
strlen(PG_DYNSHMEM_MMAP_FILE_PREFIX)) == 0)
|
||||||
|
{
|
||||||
|
char buf[MAXPGPATH];
|
||||||
|
snprintf(buf, MAXPGPATH, PG_DYNSHMEM_DIR "/%s", dent->d_name);
|
||||||
|
|
||||||
|
elog(DEBUG2, "removing file \"%s\"", buf);
|
||||||
|
|
||||||
|
/* We found a matching file; so remove it. */
|
||||||
|
if (unlink(buf) != 0)
|
||||||
|
{
|
||||||
|
int save_errno;
|
||||||
|
|
||||||
|
save_errno = errno;
|
||||||
|
closedir(dir);
|
||||||
|
errno = save_errno;
|
||||||
|
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode_for_file_access(),
|
||||||
|
errmsg("could not remove file \"%s\": %m", buf)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Cleanup complete. */
|
||||||
|
closedir(dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Read and parse the state file.
|
||||||
|
*
|
||||||
|
* If the state file is empty or the contents are garbled, it probably means
|
||||||
|
* that the operating system rebooted before the data written by the previous
|
||||||
|
* postmaster made it to disk. In that case, we can just ignore it; any shared
|
||||||
|
* memory from before the reboot should be gone anyway.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
dsm_read_state_file(dsm_handle *h)
|
||||||
|
{
|
||||||
|
int statefd;
|
||||||
|
char statebuf[PG_DYNSHMEM_STATE_BUFSIZ];
|
||||||
|
int nbytes = 0;
|
||||||
|
char *endptr,
|
||||||
|
*s;
|
||||||
|
dsm_handle handle;
|
||||||
|
|
||||||
|
/* Read the state file to get the ID of the old control segment. */
|
||||||
|
statefd = open(PG_DYNSHMEM_STATE_FILE, O_RDONLY | PG_BINARY, 0);
|
||||||
|
if (statefd < 0)
|
||||||
|
{
|
||||||
|
if (errno == ENOENT)
|
||||||
|
return false;
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode_for_file_access(),
|
||||||
|
errmsg("could not open file \"%s\": %m",
|
||||||
|
PG_DYNSHMEM_STATE_FILE)));
|
||||||
|
}
|
||||||
|
nbytes = read(statefd, statebuf, PG_DYNSHMEM_STATE_BUFSIZ - 1);
|
||||||
|
if (nbytes < 0)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode_for_file_access(),
|
||||||
|
errmsg("could not read file \"%s\": %m",
|
||||||
|
PG_DYNSHMEM_STATE_FILE)));
|
||||||
|
/* make sure buffer is NUL terminated */
|
||||||
|
statebuf[nbytes] = '\0';
|
||||||
|
close(statefd);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We expect to find the handle of the old control segment here,
|
||||||
|
* on a line by itself.
|
||||||
|
*/
|
||||||
|
handle = strtoul(statebuf, &endptr, 10);
|
||||||
|
for (s = endptr; *s == ' ' || *s == '\t'; ++s)
|
||||||
|
;
|
||||||
|
if (*s != '\n' && *s != '\0')
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* Looks good. */
|
||||||
|
*h = handle;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Write our control segment handle to the state file, so that if the
|
||||||
|
* postmaster is killed without running it's on_shmem_exit hooks, the
|
||||||
|
* next postmaster can clean things up after restart.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
dsm_write_state_file(dsm_handle h)
|
||||||
|
{
|
||||||
|
int statefd;
|
||||||
|
char statebuf[PG_DYNSHMEM_STATE_BUFSIZ];
|
||||||
|
int nbytes;
|
||||||
|
|
||||||
|
/* Create or truncate the file. */
|
||||||
|
statefd = open(PG_DYNSHMEM_NEW_STATE_FILE,
|
||||||
|
O_RDWR | O_CREAT | O_TRUNC | PG_BINARY, 0600);
|
||||||
|
if (statefd < 0)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode_for_file_access(),
|
||||||
|
errmsg("could not create file \"%s\": %m",
|
||||||
|
PG_DYNSHMEM_NEW_STATE_FILE)));
|
||||||
|
|
||||||
|
/* Write contents. */
|
||||||
|
snprintf(statebuf, PG_DYNSHMEM_STATE_BUFSIZ, "%u\n", dsm_control_handle);
|
||||||
|
nbytes = strlen(statebuf);
|
||||||
|
if (write(statefd, statebuf, nbytes) != nbytes)
|
||||||
|
{
|
||||||
|
if (errno == 0)
|
||||||
|
errno = ENOSPC; /* if no error signalled, assume no space */
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode_for_file_access(),
|
||||||
|
errmsg("could not write file \"%s\": %m",
|
||||||
|
PG_DYNSHMEM_NEW_STATE_FILE)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Close file. */
|
||||||
|
close(statefd);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Atomically rename file into place, so that no one ever sees a partially
|
||||||
|
* written state file.
|
||||||
|
*/
|
||||||
|
if (rename(PG_DYNSHMEM_NEW_STATE_FILE, PG_DYNSHMEM_STATE_FILE) < 0)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode_for_file_access(),
|
||||||
|
errmsg("could not rename file \"%s\": %m",
|
||||||
|
PG_DYNSHMEM_NEW_STATE_FILE)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* At shutdown time, we iterate over the control segment and remove all
|
||||||
|
* remaining dynamic shared memory segments. We avoid throwing errors here;
|
||||||
|
* the postmaster is shutting down either way, and this is just non-critical
|
||||||
|
* resource cleanup.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
dsm_postmaster_shutdown(int code, Datum arg)
|
||||||
|
{
|
||||||
|
uint32 nitems;
|
||||||
|
uint32 i;
|
||||||
|
void *dsm_control_address;
|
||||||
|
void *junk_mapped_address = NULL;
|
||||||
|
void *junk_impl_private = NULL;
|
||||||
|
uint64 junk_mapped_size = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If some other backend exited uncleanly, it might have corrupted the
|
||||||
|
* control segment while it was dying. In that case, we warn and ignore
|
||||||
|
* the contents of the control segment. This may end up leaving behind
|
||||||
|
* stray shared memory segments, but there's not much we can do about
|
||||||
|
* that if the metadata is gone.
|
||||||
|
*/
|
||||||
|
nitems = dsm_control->nitems;
|
||||||
|
if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size))
|
||||||
|
{
|
||||||
|
ereport(LOG,
|
||||||
|
(errmsg("dynamic shared memory control segment is corrupt")));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Remove any remaining segments. */
|
||||||
|
for (i = 0; i < nitems; ++i)
|
||||||
|
{
|
||||||
|
dsm_handle handle;
|
||||||
|
|
||||||
|
/* If the reference count is 0, the slot is actually unused. */
|
||||||
|
if (dsm_control->item[i].refcnt == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* Log debugging information. */
|
||||||
|
handle = dsm_control->item[i].handle;
|
||||||
|
elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u",
|
||||||
|
handle);
|
||||||
|
|
||||||
|
/* Destroy the segment. */
|
||||||
|
dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
|
||||||
|
&junk_mapped_address, &junk_mapped_size, LOG);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Remove the control segment itself. */
|
||||||
|
elog(DEBUG2,
|
||||||
|
"cleaning up dynamic shared memory control segment with ID %u",
|
||||||
|
dsm_control_handle);
|
||||||
|
dsm_control_address = dsm_control;
|
||||||
|
dsm_impl_op(DSM_OP_DESTROY, dsm_control_handle, 0,
|
||||||
|
&dsm_control_impl_private, &dsm_control_address,
|
||||||
|
&dsm_control_mapped_size, LOG);
|
||||||
|
dsm_control = dsm_control_address;
|
||||||
|
|
||||||
|
/* And, finally, remove the state file. */
|
||||||
|
if (unlink(PG_DYNSHMEM_STATE_FILE) < 0)
|
||||||
|
ereport(LOG,
|
||||||
|
(errcode_for_file_access(),
|
||||||
|
errmsg("could not unlink file \"%s\": %m",
|
||||||
|
PG_DYNSHMEM_STATE_FILE)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Prepare this backend for dynamic shared memory usage. Under EXEC_BACKEND,
|
||||||
|
* we must reread the state file and map the control segment; in other cases,
|
||||||
|
* we'll have inherited the postmaster's mapping and global variables.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
dsm_backend_startup(void)
|
||||||
|
{
|
||||||
|
/* If dynamic shared memory is disabled, reject this. */
|
||||||
|
if (dynamic_shared_memory_type == DSM_IMPL_NONE)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||||
|
errmsg("dynamic shared memory is disabled"),
|
||||||
|
errhint("Set dynamic_shared_memory_type to a value other than \"none\".")));
|
||||||
|
|
||||||
|
#ifdef EXEC_BACKEND
|
||||||
|
{
|
||||||
|
dsm_handle control_handle;
|
||||||
|
void *control_address = NULL;
|
||||||
|
|
||||||
|
/* Read the control segment information from the state file. */
|
||||||
|
if (!dsm_read_state_file(&control_handle))
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INTERNAL_ERROR),
|
||||||
|
errmsg("could not parse dynamic shared memory state file")));
|
||||||
|
|
||||||
|
/* Attach control segment. */
|
||||||
|
dsm_impl_op(DSM_OP_ATTACH, control_handle, 0,
|
||||||
|
&dsm_control_impl_private, &control_address,
|
||||||
|
&dsm_control_mapped_size, ERROR);
|
||||||
|
dsm_control_handle = control_handle;
|
||||||
|
dsm_control = control_address;
|
||||||
|
/* If control segment doesn't look sane, something is badly wrong. */
|
||||||
|
if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size))
|
||||||
|
{
|
||||||
|
dsm_impl_op(DSM_OP_DETACH, control_handle, 0,
|
||||||
|
&dsm_control_impl_private, &control_address,
|
||||||
|
&dsm_control_mapped_size, WARNING);
|
||||||
|
ereport(FATAL,
|
||||||
|
(errcode(ERRCODE_INTERNAL_ERROR),
|
||||||
|
errmsg("dynamic shared memory control segment is not valid")));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Arrange to detach segments on exit. */
|
||||||
|
on_shmem_exit(dsm_backend_shutdown, 0);
|
||||||
|
|
||||||
|
dsm_init_done = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Create a new dynamic shared memory segment.
|
||||||
|
*/
|
||||||
|
dsm_segment *
|
||||||
|
dsm_create(uint64 size)
|
||||||
|
{
|
||||||
|
dsm_segment *seg = dsm_create_descriptor();
|
||||||
|
uint32 i;
|
||||||
|
uint32 nitems;
|
||||||
|
|
||||||
|
/* Unsafe in postmaster (and pointless in a stand-alone backend). */
|
||||||
|
Assert(IsUnderPostmaster);
|
||||||
|
|
||||||
|
if (!dsm_init_done)
|
||||||
|
dsm_backend_startup();
|
||||||
|
|
||||||
|
/* Loop until we find an unused segment identifier. */
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
Assert(seg->mapped_address == NULL && seg->mapped_size == 0);
|
||||||
|
seg->handle = random();
|
||||||
|
if (dsm_impl_op(DSM_OP_CREATE, seg->handle, size, &seg->impl_private,
|
||||||
|
&seg->mapped_address, &seg->mapped_size, ERROR))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Lock the control segment so we can register the new segment. */
|
||||||
|
LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
|
||||||
|
|
||||||
|
/* Search the control segment for an unused slot. */
|
||||||
|
nitems = dsm_control->nitems;
|
||||||
|
for (i = 0; i < nitems; ++i)
|
||||||
|
{
|
||||||
|
if (dsm_control->item[i].refcnt == 0)
|
||||||
|
{
|
||||||
|
dsm_control->item[i].handle = seg->handle;
|
||||||
|
/* refcnt of 1 triggers destruction, so start at 2 */
|
||||||
|
dsm_control->item[i].refcnt = 2;
|
||||||
|
seg->control_slot = i;
|
||||||
|
LWLockRelease(DynamicSharedMemoryControlLock);
|
||||||
|
return seg;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Verify that we can support an additional mapping. */
|
||||||
|
if (nitems >= dsm_control->maxitems)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
|
||||||
|
errmsg("too many dynamic shared memory segments")));
|
||||||
|
|
||||||
|
/* Enter the handle into a new array slot. */
|
||||||
|
dsm_control->item[nitems].handle = seg->handle;
|
||||||
|
/* refcnt of 1 triggers destruction, so start at 2 */
|
||||||
|
dsm_control->item[nitems].refcnt = 2;
|
||||||
|
seg->control_slot = nitems;
|
||||||
|
dsm_control->nitems++;
|
||||||
|
LWLockRelease(DynamicSharedMemoryControlLock);
|
||||||
|
|
||||||
|
return seg;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Attach a dynamic shared memory segment.
|
||||||
|
*
|
||||||
|
* See comments for dsm_segment_handle() for an explanation of how this
|
||||||
|
* is intended to be used.
|
||||||
|
*
|
||||||
|
* This function will return NULL if the segment isn't known to the system.
|
||||||
|
* This can happen if we're asked to attach the segment, but then everyone
|
||||||
|
* else detaches it (causing it to be destroyed) before we get around to
|
||||||
|
* attaching it.
|
||||||
|
*/
|
||||||
|
dsm_segment *
|
||||||
|
dsm_attach(dsm_handle h)
|
||||||
|
{
|
||||||
|
dsm_segment *seg;
|
||||||
|
dlist_iter iter;
|
||||||
|
uint32 i;
|
||||||
|
uint32 nitems;
|
||||||
|
|
||||||
|
/* Unsafe in postmaster (and pointless in a stand-alone backend). */
|
||||||
|
Assert(IsUnderPostmaster);
|
||||||
|
|
||||||
|
if (!dsm_init_done)
|
||||||
|
dsm_backend_startup();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Since this is just a debugging cross-check, we could leave it out
|
||||||
|
* altogether, or include it only in assert-enabled builds. But since
|
||||||
|
* the list of attached segments should normally be very short, let's
|
||||||
|
* include it always for right now.
|
||||||
|
*
|
||||||
|
* If you're hitting this error, you probably want to attempt to
|
||||||
|
* find an existing mapping via dsm_find_mapping() before calling
|
||||||
|
* dsm_attach() to create a new one.
|
||||||
|
*/
|
||||||
|
dlist_foreach(iter, &dsm_segment_list)
|
||||||
|
{
|
||||||
|
seg = dlist_container(dsm_segment, node, iter.cur);
|
||||||
|
if (seg->handle == h)
|
||||||
|
elog(ERROR, "can't attach the same segment more than once");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Create a new segment descriptor. */
|
||||||
|
seg = dsm_create_descriptor();
|
||||||
|
seg->handle = h;
|
||||||
|
|
||||||
|
/* Bump reference count for this segment in shared memory. */
|
||||||
|
LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
|
||||||
|
nitems = dsm_control->nitems;
|
||||||
|
for (i = 0; i < nitems; ++i)
|
||||||
|
{
|
||||||
|
/* If the reference count is 0, the slot is actually unused. */
|
||||||
|
if (dsm_control->item[i].refcnt == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the reference count is 1, the slot is still in use, but the
|
||||||
|
* segment is in the process of going away. Treat that as if we
|
||||||
|
* didn't find a match.
|
||||||
|
*/
|
||||||
|
if (dsm_control->item[i].refcnt == 1)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Otherwise, if the descriptor matches, we've found a match. */
|
||||||
|
if (dsm_control->item[i].handle == seg->handle)
|
||||||
|
{
|
||||||
|
dsm_control->item[i].refcnt++;
|
||||||
|
seg->control_slot = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LWLockRelease(DynamicSharedMemoryControlLock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we didn't find the handle we're looking for in the control
|
||||||
|
* segment, it probably means that everyone else who had it mapped,
|
||||||
|
* including the original creator, died before we got to this point.
|
||||||
|
* It's up to the caller to decide what to do about that.
|
||||||
|
*/
|
||||||
|
if (seg->control_slot == INVALID_CONTROL_SLOT)
|
||||||
|
{
|
||||||
|
dsm_detach(seg);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Here's where we actually try to map the segment. */
|
||||||
|
dsm_impl_op(DSM_OP_ATTACH, seg->handle, 0, &seg->impl_private,
|
||||||
|
&seg->mapped_address, &seg->mapped_size, ERROR);
|
||||||
|
|
||||||
|
return seg;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* At backend shutdown time, detach any segments that are still attached.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
dsm_backend_shutdown(int code, Datum arg)
|
||||||
|
{
|
||||||
|
while (!dlist_is_empty(&dsm_segment_list))
|
||||||
|
{
|
||||||
|
dsm_segment *seg;
|
||||||
|
|
||||||
|
seg = dlist_head_element(dsm_segment, node, &dsm_segment_list);
|
||||||
|
dsm_detach(seg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Resize an existing shared memory segment.
|
||||||
|
*
|
||||||
|
* This may cause the shared memory segment to be remapped at a different
|
||||||
|
* address. For the caller's convenience, we return the mapped address.
|
||||||
|
*/
|
||||||
|
void *
|
||||||
|
dsm_resize(dsm_segment *seg, uint64 size)
|
||||||
|
{
|
||||||
|
Assert(seg->control_slot != INVALID_CONTROL_SLOT);
|
||||||
|
dsm_impl_op(DSM_OP_RESIZE, seg->handle, size, &seg->impl_private,
|
||||||
|
&seg->mapped_address, &seg->mapped_size, ERROR);
|
||||||
|
return seg->mapped_address;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Remap an existing shared memory segment.
|
||||||
|
*
|
||||||
|
* This is intended to be used when some other process has extended the
|
||||||
|
* mapping using dsm_resize(), but we've still only got the initial
|
||||||
|
* portion mapped. Since this might change the address at which the
|
||||||
|
* segment is mapped, we return the new mapped address.
|
||||||
|
*/
|
||||||
|
void *
|
||||||
|
dsm_remap(dsm_segment *seg)
|
||||||
|
{
|
||||||
|
dsm_impl_op(DSM_OP_ATTACH, seg->handle, 0, &seg->impl_private,
|
||||||
|
&seg->mapped_address, &seg->mapped_size, ERROR);
|
||||||
|
|
||||||
|
return seg->mapped_address;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Detach from a shared memory segment, destroying the segment if we
|
||||||
|
* remove the last reference.
|
||||||
|
*
|
||||||
|
* This function should never fail. It will often be invoked when aborting
|
||||||
|
* a transaction, and a further error won't serve any purpose. It's not a
|
||||||
|
* complete disaster if we fail to unmap or destroy the segment; it means a
|
||||||
|
* resource leak, but that doesn't necessarily preclude further operations.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
dsm_detach(dsm_segment *seg)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Try to remove the mapping, if one exists. Normally, there will be,
|
||||||
|
* but maybe not, if we failed partway through a create or attach
|
||||||
|
* operation. We remove the mapping before decrementing the reference
|
||||||
|
* count so that the process that sees a zero reference count can be
|
||||||
|
* certain that no remaining mappings exist. Even if this fails, we
|
||||||
|
* pretend that it works, because retrying is likely to fail in the
|
||||||
|
* same way.
|
||||||
|
*/
|
||||||
|
if (seg->mapped_address != NULL)
|
||||||
|
{
|
||||||
|
dsm_impl_op(DSM_OP_DETACH, seg->handle, 0, &seg->impl_private,
|
||||||
|
&seg->mapped_address, &seg->mapped_size, WARNING);
|
||||||
|
seg->impl_private = NULL;
|
||||||
|
seg->mapped_address = NULL;
|
||||||
|
seg->mapped_size = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Reduce reference count, if we previously increased it. */
|
||||||
|
if (seg->control_slot != INVALID_CONTROL_SLOT)
|
||||||
|
{
|
||||||
|
uint32 refcnt;
|
||||||
|
uint32 control_slot = seg->control_slot;
|
||||||
|
|
||||||
|
LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
|
||||||
|
Assert(dsm_control->item[control_slot].handle == seg->handle);
|
||||||
|
Assert(dsm_control->item[control_slot].refcnt > 1);
|
||||||
|
refcnt = --dsm_control->item[control_slot].refcnt;
|
||||||
|
seg->control_slot = INVALID_CONTROL_SLOT;
|
||||||
|
LWLockRelease(DynamicSharedMemoryControlLock);
|
||||||
|
|
||||||
|
/* If new reference count is 1, try to destroy the segment. */
|
||||||
|
if (refcnt == 1)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* If we fail to destroy the segment here, or are killed before
|
||||||
|
* we finish doing so, the reference count will remain at 1, which
|
||||||
|
* will mean that nobody else can attach to the segment. At
|
||||||
|
* postmaster shutdown time, or when a new postmaster is started
|
||||||
|
* after a hard kill, another attempt will be made to remove the
|
||||||
|
* segment.
|
||||||
|
*
|
||||||
|
* The main case we're worried about here is being killed by
|
||||||
|
* a signal before we can finish removing the segment. In that
|
||||||
|
* case, it's important to be sure that the segment still gets
|
||||||
|
* removed. If we actually fail to remove the segment for some
|
||||||
|
* other reason, the postmaster may not have any better luck than
|
||||||
|
* we did. There's not much we can do about that, though.
|
||||||
|
*/
|
||||||
|
if (dsm_impl_op(DSM_OP_DESTROY, seg->handle, 0, &seg->impl_private,
|
||||||
|
&seg->mapped_address, &seg->mapped_size, WARNING))
|
||||||
|
{
|
||||||
|
LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
|
||||||
|
Assert(dsm_control->item[control_slot].handle == seg->handle);
|
||||||
|
Assert(dsm_control->item[control_slot].refcnt == 1);
|
||||||
|
dsm_control->item[control_slot].refcnt = 0;
|
||||||
|
LWLockRelease(DynamicSharedMemoryControlLock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Clean up our remaining backend-private data structures. */
|
||||||
|
if (seg->resowner != NULL)
|
||||||
|
ResourceOwnerForgetDSM(seg->resowner, seg);
|
||||||
|
dlist_delete(&seg->node);
|
||||||
|
pfree(seg);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Keep a dynamic shared memory mapping until end of session.
|
||||||
|
*
|
||||||
|
* By default, mappings are owned by the current resource owner, which
|
||||||
|
* typically means they stick around for the duration of the current query
|
||||||
|
* only.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
dsm_keep_mapping(dsm_segment *seg)
|
||||||
|
{
|
||||||
|
if (seg->resowner != NULL)
|
||||||
|
{
|
||||||
|
ResourceOwnerForgetDSM(seg->resowner, seg);
|
||||||
|
seg->resowner = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Find an existing mapping for a shared memory segment, if there is one.
|
||||||
|
*/
|
||||||
|
dsm_segment *
|
||||||
|
dsm_find_mapping(dsm_handle h)
|
||||||
|
{
|
||||||
|
dlist_iter iter;
|
||||||
|
dsm_segment *seg;
|
||||||
|
|
||||||
|
dlist_foreach(iter, &dsm_segment_list)
|
||||||
|
{
|
||||||
|
seg = dlist_container(dsm_segment, node, iter.cur);
|
||||||
|
if (seg->handle == h)
|
||||||
|
return seg;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get the address at which a dynamic shared memory segment is mapped.
|
||||||
|
*/
|
||||||
|
void *
|
||||||
|
dsm_segment_address(dsm_segment *seg)
|
||||||
|
{
|
||||||
|
Assert(seg->mapped_address != NULL);
|
||||||
|
return seg->mapped_address;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get the size of a mapping.
|
||||||
|
*/
|
||||||
|
uint64
|
||||||
|
dsm_segment_map_length(dsm_segment *seg)
|
||||||
|
{
|
||||||
|
Assert(seg->mapped_address != NULL);
|
||||||
|
return seg->mapped_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get a handle for a mapping.
|
||||||
|
*
|
||||||
|
* To establish communication via dynamic shared memory between two backends,
|
||||||
|
* one of them should first call dsm_create() to establish a new shared
|
||||||
|
* memory mapping. That process should then call dsm_segment_handle() to
|
||||||
|
* obtain a handle for the mapping, and pass that handle to the
|
||||||
|
* coordinating backend via some means (e.g. bgw_main_arg, or via the
|
||||||
|
* main shared memory segment). The recipient, once in position of the
|
||||||
|
* handle, should call dsm_attach().
|
||||||
|
*/
|
||||||
|
dsm_handle
|
||||||
|
dsm_segment_handle(dsm_segment *seg)
|
||||||
|
{
|
||||||
|
return seg->handle;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Create a segment descriptor.
|
||||||
|
*/
|
||||||
|
static dsm_segment *
|
||||||
|
dsm_create_descriptor(void)
|
||||||
|
{
|
||||||
|
dsm_segment *seg;
|
||||||
|
|
||||||
|
ResourceOwnerEnlargeDSMs(CurrentResourceOwner);
|
||||||
|
|
||||||
|
seg = MemoryContextAlloc(TopMemoryContext, sizeof(dsm_segment));
|
||||||
|
dlist_push_head(&dsm_segment_list, &seg->node);
|
||||||
|
|
||||||
|
/* seg->handle must be initialized by the caller */
|
||||||
|
seg->control_slot = INVALID_CONTROL_SLOT;
|
||||||
|
seg->impl_private = NULL;
|
||||||
|
seg->mapped_address = NULL;
|
||||||
|
seg->mapped_size = 0;
|
||||||
|
|
||||||
|
seg->resowner = CurrentResourceOwner;
|
||||||
|
ResourceOwnerRememberDSM(CurrentResourceOwner, seg);
|
||||||
|
|
||||||
|
return seg;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Sanity check a control segment.
|
||||||
|
*
|
||||||
|
* The goal here isn't to detect everything that could possibly be wrong with
|
||||||
|
* the control segment; there's not enough information for that. Rather, the
|
||||||
|
* goal is to make sure that someone can iterate over the items in the segment
|
||||||
|
* without overrunning the end of the mapping and crashing. We also check
|
||||||
|
* the magic number since, if that's messed up, this may not even be one of
|
||||||
|
* our segments at all.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
dsm_control_segment_sane(dsm_control_header *control, uint64 mapped_size)
|
||||||
|
{
|
||||||
|
if (mapped_size < offsetof(dsm_control_header, item))
|
||||||
|
return false; /* Mapped size too short to read header. */
|
||||||
|
if (control->magic != PG_DYNSHMEM_CONTROL_MAGIC)
|
||||||
|
return false; /* Magic number doesn't match. */
|
||||||
|
if (dsm_control_bytes_needed(control->maxitems) > mapped_size)
|
||||||
|
return false; /* Max item count won't fit in map. */
|
||||||
|
if (control->nitems > control->maxitems)
|
||||||
|
return false; /* Overfull. */
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Compute the number of control-segment bytes needed to store a given
|
||||||
|
* number of items.
|
||||||
|
*/
|
||||||
|
static uint64
|
||||||
|
dsm_control_bytes_needed(uint32 nitems)
|
||||||
|
{
|
||||||
|
return offsetof(dsm_control_header, item)
|
||||||
|
+ sizeof(dsm_control_item) * (uint64) nitems;
|
||||||
|
}
|
990
src/backend/storage/ipc/dsm_impl.c
Normal file
990
src/backend/storage/ipc/dsm_impl.c
Normal file
@ -0,0 +1,990 @@
|
|||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* dsm_impl.c
|
||||||
|
* manage dynamic shared memory segments
|
||||||
|
*
|
||||||
|
* This file provides low-level APIs for creating and destroying shared
|
||||||
|
* memory segments using several different possible techniques. We refer
|
||||||
|
* to these segments as dynamic because they can be created, altered, and
|
||||||
|
* destroyed at any point during the server life cycle. This is unlike
|
||||||
|
* the main shared memory segment, of which there is always exactly one
|
||||||
|
* and which is always mapped at a fixed address in every PostgreSQL
|
||||||
|
* background process.
|
||||||
|
*
|
||||||
|
* Because not all systems provide the same primitives in this area, nor
|
||||||
|
* do all primitives behave the same way on all systems, we provide
|
||||||
|
* several implementations of this facility. Many systems implement
|
||||||
|
* POSIX shared memory (shm_open etc.), which is well-suited to our needs
|
||||||
|
* in this area, with the exception that shared memory identifiers live
|
||||||
|
* in a flat system-wide namespace, raising the uncomfortable prospect of
|
||||||
|
* name collisions with other processes (including other copies of
|
||||||
|
* PostgreSQL) running on the same system. Some systems only support
|
||||||
|
* the older System V shared memory interface (shmget etc.) which is
|
||||||
|
* also usable; however, the default allocation limits are often quite
|
||||||
|
* small, and the namespace is even more restricted.
|
||||||
|
*
|
||||||
|
* We also provide an mmap-based shared memory implementation. This may
|
||||||
|
* be useful on systems that provide shared memory via a special-purpose
|
||||||
|
* filesystem; by opting for this implementation, the user can even
|
||||||
|
* control precisely where their shared memory segments are placed. It
|
||||||
|
* can also be used as a fallback for systems where shm_open and shmget
|
||||||
|
* are not available or can't be used for some reason. Of course,
|
||||||
|
* mapping a file residing on an actual spinning disk is a fairly poor
|
||||||
|
* approximation for shared memory because writeback may hurt performance
|
||||||
|
* substantially, but there should be few systems where we must make do
|
||||||
|
* with such poor tools.
|
||||||
|
*
|
||||||
|
* As ever, Windows requires its own implemetation.
|
||||||
|
*
|
||||||
|
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
|
||||||
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* IDENTIFICATION
|
||||||
|
* src/backend/storage/ipc/dsm.c
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#ifndef WIN32
|
||||||
|
#include <sys/mman.h>
|
||||||
|
#endif
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#ifdef HAVE_SYS_IPC_H
|
||||||
|
#include <sys/ipc.h>
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_SYS_SHM_H
|
||||||
|
#include <sys/shm.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "portability/mem.h"
|
||||||
|
#include "storage/dsm_impl.h"
|
||||||
|
#include "storage/fd.h"
|
||||||
|
#include "utils/guc.h"
|
||||||
|
#include "utils/memutils.h"
|
||||||
|
|
||||||
|
#ifdef USE_DSM_POSIX
|
||||||
|
static bool dsm_impl_posix(dsm_op op, dsm_handle handle, uint64 request_size,
|
||||||
|
void **impl_private, void **mapped_address,
|
||||||
|
uint64 *mapped_size, int elevel);
|
||||||
|
#endif
|
||||||
|
#ifdef USE_DSM_SYSV
|
||||||
|
static bool dsm_impl_sysv(dsm_op op, dsm_handle handle, uint64 request_size,
|
||||||
|
void **impl_private, void **mapped_address,
|
||||||
|
uint64 *mapped_size, int elevel);
|
||||||
|
#endif
|
||||||
|
#ifdef USE_DSM_WINDOWS
|
||||||
|
static bool dsm_impl_windows(dsm_op op, dsm_handle handle, uint64 request_size,
|
||||||
|
void **impl_private, void **mapped_address,
|
||||||
|
uint64 *mapped_size, int elevel);
|
||||||
|
#endif
|
||||||
|
#ifdef USE_DSM_MMAP
|
||||||
|
static bool dsm_impl_mmap(dsm_op op, dsm_handle handle, uint64 request_size,
|
||||||
|
void **impl_private, void **mapped_address,
|
||||||
|
uint64 *mapped_size, int elevel);
|
||||||
|
#endif
|
||||||
|
static int errcode_for_dynamic_shared_memory(void);
|
||||||
|
|
||||||
|
const struct config_enum_entry dynamic_shared_memory_options[] = {
|
||||||
|
#ifdef USE_DSM_POSIX
|
||||||
|
{ "posix", DSM_IMPL_POSIX, false},
|
||||||
|
#endif
|
||||||
|
#ifdef USE_DSM_SYSV
|
||||||
|
{ "sysv", DSM_IMPL_SYSV, false},
|
||||||
|
#endif
|
||||||
|
#ifdef USE_DSM_WINDOWS
|
||||||
|
{ "windows", DSM_IMPL_WINDOWS, false},
|
||||||
|
#endif
|
||||||
|
#ifdef USE_DSM_MMAP
|
||||||
|
{ "mmap", DSM_IMPL_MMAP, false},
|
||||||
|
#endif
|
||||||
|
{ "none", DSM_IMPL_NONE, false},
|
||||||
|
{NULL, 0, false}
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Implementation selector. */
|
||||||
|
int dynamic_shared_memory_type;
|
||||||
|
|
||||||
|
/* Size of buffer to be used for zero-filling. */
|
||||||
|
#define ZBUFFER_SIZE 8192
|
||||||
|
|
||||||
|
/*------
|
||||||
|
* Perform a low-level shared memory operation in a platform-specific way,
|
||||||
|
* as dictated by the selected implementation. Each implementation is
|
||||||
|
* required to implement the following primitives.
|
||||||
|
*
|
||||||
|
* DSM_OP_CREATE. Create a segment whose size is the request_size and
|
||||||
|
* map it.
|
||||||
|
*
|
||||||
|
* DSM_OP_ATTACH. Map the segment, whose size must be the request_size.
|
||||||
|
* The segment may already be mapped; any existing mapping should be removed
|
||||||
|
* before creating a new one.
|
||||||
|
*
|
||||||
|
* DSM_OP_DETACH. Unmap the segment.
|
||||||
|
*
|
||||||
|
* DSM_OP_RESIZE. Resize the segment to the given request_size and
|
||||||
|
* remap the segment at that new size.
|
||||||
|
*
|
||||||
|
* DSM_OP_DESTROY. Unmap the segment, if it is mapped. Destroy the
|
||||||
|
* segment.
|
||||||
|
*
|
||||||
|
* Arguments:
|
||||||
|
* op: The operation to be performed.
|
||||||
|
* handle: The handle of an existing object, or for DSM_OP_CREATE, the
|
||||||
|
* a new handle the caller wants created.
|
||||||
|
* request_size: For DSM_OP_CREATE, the requested size. For DSM_OP_RESIZE,
|
||||||
|
* the new size. Otherwise, 0.
|
||||||
|
* impl_private: Private, implementation-specific data. Will be a pointer
|
||||||
|
* to NULL for the first operation on a shared memory segment within this
|
||||||
|
* backend; thereafter, it will point to the value to which it was set
|
||||||
|
* on the previous call.
|
||||||
|
* mapped_address: Pointer to start of current mapping; pointer to NULL
|
||||||
|
* if none. Updated with new mapping address.
|
||||||
|
* mapped_size: Pointer to size of current mapping; pointer to 0 if none.
|
||||||
|
* Updated with new mapped size.
|
||||||
|
* elevel: Level at which to log errors.
|
||||||
|
*
|
||||||
|
* Return value: true on success, false on failure. When false is returned,
|
||||||
|
* a message should first be logged at the specified elevel, except in the
|
||||||
|
* case where DSM_OP_CREATE experiences a name collision, which should
|
||||||
|
* silently return false.
|
||||||
|
*-----
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
dsm_impl_op(dsm_op op, dsm_handle handle, uint64 request_size,
|
||||||
|
void **impl_private, void **mapped_address, uint64 *mapped_size,
|
||||||
|
int elevel)
|
||||||
|
{
|
||||||
|
Assert(op == DSM_OP_CREATE || op == DSM_OP_RESIZE || request_size == 0);
|
||||||
|
Assert((op != DSM_OP_CREATE && op != DSM_OP_ATTACH) ||
|
||||||
|
(*mapped_address == NULL && *mapped_size == 0));
|
||||||
|
|
||||||
|
if (request_size > (size_t) -1)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
||||||
|
errmsg("requested shared memory size overflows size_t")));
|
||||||
|
|
||||||
|
switch (dynamic_shared_memory_type)
|
||||||
|
{
|
||||||
|
#ifdef USE_DSM_POSIX
|
||||||
|
case DSM_IMPL_POSIX:
|
||||||
|
return dsm_impl_posix(op, handle, request_size, impl_private,
|
||||||
|
mapped_address, mapped_size, elevel);
|
||||||
|
#endif
|
||||||
|
#ifdef USE_DSM_SYSV
|
||||||
|
case DSM_IMPL_SYSV:
|
||||||
|
return dsm_impl_sysv(op, handle, request_size, impl_private,
|
||||||
|
mapped_address, mapped_size, elevel);
|
||||||
|
#endif
|
||||||
|
#ifdef USE_DSM_WINDOWS
|
||||||
|
case DSM_IMPL_WINDOWS:
|
||||||
|
return dsm_impl_windows(op, handle, request_size, impl_private,
|
||||||
|
mapped_address, mapped_size, elevel);
|
||||||
|
#endif
|
||||||
|
#ifdef USE_DSM_MMAP
|
||||||
|
case DSM_IMPL_MMAP:
|
||||||
|
return dsm_impl_mmap(op, handle, request_size, impl_private,
|
||||||
|
mapped_address, mapped_size, elevel);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
elog(ERROR, "unexpected dynamic shared memory type: %d",
|
||||||
|
dynamic_shared_memory_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Does the current dynamic shared memory implementation support resizing
|
||||||
|
* segments? (The answer here could be platform-dependent in the future,
|
||||||
|
* since AIX allows shmctl(shmid, SHM_RESIZE, &buffer), though you apparently
|
||||||
|
* can't resize segments to anything larger than 256MB that way. For now,
|
||||||
|
* we keep it simple.)
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
dsm_impl_can_resize(void)
|
||||||
|
{
|
||||||
|
switch (dynamic_shared_memory_type)
|
||||||
|
{
|
||||||
|
case DSM_IMPL_NONE:
|
||||||
|
return false;
|
||||||
|
case DSM_IMPL_POSIX:
|
||||||
|
return true;
|
||||||
|
case DSM_IMPL_SYSV:
|
||||||
|
return false;
|
||||||
|
case DSM_IMPL_WINDOWS:
|
||||||
|
return false;
|
||||||
|
case DSM_IMPL_MMAP:
|
||||||
|
return false;
|
||||||
|
default:
|
||||||
|
return false; /* should not happen */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef USE_DSM_POSIX
|
||||||
|
/*
|
||||||
|
* Operating system primitives to support POSIX shared memory.
|
||||||
|
*
|
||||||
|
* POSIX shared memory segments are created and attached using shm_open()
|
||||||
|
* and shm_unlink(); other operations, such as sizing or mapping the
|
||||||
|
* segment, are performed as if the shared memory segments were files.
|
||||||
|
*
|
||||||
|
* Indeed, on some platforms, they may be implemented that way. While
|
||||||
|
* POSIX shared memory segments seem intended to exist in a flat namespace,
|
||||||
|
* some operating systems may implement them as files, even going so far
|
||||||
|
* to treat a request for /xyz as a request to create a file by that name
|
||||||
|
* in the root directory. Users of such broken platforms should select
|
||||||
|
* a different shared memory implementation.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
dsm_impl_posix(dsm_op op, dsm_handle handle, uint64 request_size,
|
||||||
|
void **impl_private, void **mapped_address, uint64 *mapped_size,
|
||||||
|
int elevel)
|
||||||
|
{
|
||||||
|
char name[64];
|
||||||
|
int flags;
|
||||||
|
int fd;
|
||||||
|
char *address;
|
||||||
|
|
||||||
|
snprintf(name, 64, "/PostgreSQL.%u", handle);
|
||||||
|
|
||||||
|
/* Handle teardown cases. */
|
||||||
|
if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
|
||||||
|
{
|
||||||
|
if (*mapped_address != NULL
|
||||||
|
&& munmap(*mapped_address, *mapped_size) != 0)
|
||||||
|
{
|
||||||
|
ereport(elevel,
|
||||||
|
(errcode_for_dynamic_shared_memory(),
|
||||||
|
errmsg("could not unmap shared memory segment \"%s\": %m",
|
||||||
|
name)));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
*mapped_address = NULL;
|
||||||
|
*mapped_size = 0;
|
||||||
|
if (op == DSM_OP_DESTROY && shm_unlink(name) != 0)
|
||||||
|
{
|
||||||
|
ereport(elevel,
|
||||||
|
(errcode_for_dynamic_shared_memory(),
|
||||||
|
errmsg("could not remove shared memory segment \"%s\": %m",
|
||||||
|
name)));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Create new segment or open an existing one for attach or resize.
|
||||||
|
*
|
||||||
|
* Even though we're not going through fd.c, we should be safe against
|
||||||
|
* running out of file descriptors, because of NUM_RESERVED_FDS. We're
|
||||||
|
* only opening one extra descriptor here, and we'll close it before
|
||||||
|
* returning.
|
||||||
|
*/
|
||||||
|
flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
|
||||||
|
if ((fd = shm_open(name, flags, 0600)) == -1)
|
||||||
|
{
|
||||||
|
if (errno != EEXIST)
|
||||||
|
ereport(elevel,
|
||||||
|
(errcode_for_dynamic_shared_memory(),
|
||||||
|
errmsg("could not open shared memory segment \"%s\": %m",
|
||||||
|
name)));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we're attaching the segment, determine the current size; if we are
|
||||||
|
* creating or resizing the segment, set the size to the requested value.
|
||||||
|
*/
|
||||||
|
if (op == DSM_OP_ATTACH)
|
||||||
|
{
|
||||||
|
struct stat st;
|
||||||
|
|
||||||
|
if (fstat(fd, &st) != 0)
|
||||||
|
{
|
||||||
|
int save_errno;
|
||||||
|
|
||||||
|
/* Back out what's already been done. */
|
||||||
|
save_errno = errno;
|
||||||
|
close(fd);
|
||||||
|
errno = save_errno;
|
||||||
|
|
||||||
|
ereport(elevel,
|
||||||
|
(errcode_for_dynamic_shared_memory(),
|
||||||
|
errmsg("could not stat shared memory segment \"%s\": %m",
|
||||||
|
name)));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
request_size = st.st_size;
|
||||||
|
}
|
||||||
|
else if (*mapped_size != request_size && ftruncate(fd, request_size))
|
||||||
|
{
|
||||||
|
int save_errno;
|
||||||
|
|
||||||
|
/* Back out what's already been done. */
|
||||||
|
save_errno = errno;
|
||||||
|
close(fd);
|
||||||
|
if (op == DSM_OP_CREATE)
|
||||||
|
shm_unlink(name);
|
||||||
|
errno = save_errno;
|
||||||
|
|
||||||
|
ereport(elevel,
|
||||||
|
(errcode_for_dynamic_shared_memory(),
|
||||||
|
errmsg("could not resize shared memory segment %s to " UINT64_FORMAT " bytes: %m",
|
||||||
|
name, request_size)));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we're reattaching or resizing, we must remove any existing mapping,
|
||||||
|
* unless we've already got the right thing mapped.
|
||||||
|
*/
|
||||||
|
if (*mapped_address != NULL)
|
||||||
|
{
|
||||||
|
if (*mapped_size == request_size)
|
||||||
|
return true;
|
||||||
|
if (munmap(*mapped_address, *mapped_size) != 0)
|
||||||
|
{
|
||||||
|
int save_errno;
|
||||||
|
|
||||||
|
/* Back out what's already been done. */
|
||||||
|
save_errno = errno;
|
||||||
|
close(fd);
|
||||||
|
if (op == DSM_OP_CREATE)
|
||||||
|
shm_unlink(name);
|
||||||
|
errno = save_errno;
|
||||||
|
|
||||||
|
ereport(elevel,
|
||||||
|
(errcode_for_dynamic_shared_memory(),
|
||||||
|
errmsg("could not unmap shared memory segment \"%s\": %m",
|
||||||
|
name)));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
*mapped_address = NULL;
|
||||||
|
*mapped_size = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Map it. */
|
||||||
|
address = mmap(NULL, request_size, PROT_READ|PROT_WRITE,
|
||||||
|
MAP_SHARED|MAP_HASSEMAPHORE, fd, 0);
|
||||||
|
if (address == MAP_FAILED)
|
||||||
|
{
|
||||||
|
int save_errno;
|
||||||
|
|
||||||
|
/* Back out what's already been done. */
|
||||||
|
save_errno = errno;
|
||||||
|
close(fd);
|
||||||
|
if (op == DSM_OP_CREATE)
|
||||||
|
shm_unlink(name);
|
||||||
|
errno = save_errno;
|
||||||
|
|
||||||
|
ereport(elevel,
|
||||||
|
(errcode_for_dynamic_shared_memory(),
|
||||||
|
errmsg("could not map shared memory segment \"%s\": %m",
|
||||||
|
name)));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
*mapped_address = address;
|
||||||
|
*mapped_size = request_size;
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef USE_DSM_SYSV
|
||||||
|
/*
|
||||||
|
* Operating system primitives to support System V shared memory.
|
||||||
|
*
|
||||||
|
* System V shared memory segments are manipulated using shmget(), shmat(),
|
||||||
|
* shmdt(), and shmctl(). There's no portable way to resize such
|
||||||
|
* segments. As the default allocation limits for System V shared memory
|
||||||
|
* are usually quite low, the POSIX facilities may be preferable; but
|
||||||
|
* those are not supported everywhere.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
dsm_impl_sysv(dsm_op op, dsm_handle handle, uint64 request_size,
|
||||||
|
void **impl_private, void **mapped_address, uint64 *mapped_size,
|
||||||
|
int elevel)
|
||||||
|
{
|
||||||
|
key_t key;
|
||||||
|
int ident;
|
||||||
|
char *address;
|
||||||
|
char name[64];
|
||||||
|
int *ident_cache;
|
||||||
|
|
||||||
|
/* Resize is not supported for System V shared memory. */
|
||||||
|
if (op == DSM_OP_RESIZE)
|
||||||
|
{
|
||||||
|
elog(elevel, "System V shared memory segments cannot be resized");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Since resize isn't supported, reattach is a no-op. */
|
||||||
|
if (op == DSM_OP_ATTACH && *mapped_address != NULL)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* POSIX shared memory and mmap-based shared memory identify segments
|
||||||
|
* with names. To avoid needless error message variation, we use the
|
||||||
|
* handle as the name.
|
||||||
|
*/
|
||||||
|
snprintf(name, 64, "%u", handle);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The System V shared memory namespace is very restricted; names are
|
||||||
|
* of type key_t, which is expected to be some sort of integer data type,
|
||||||
|
* but not necessarily the same one as dsm_handle. Since we use
|
||||||
|
* dsm_handle to identify shared memory segments across processes, this
|
||||||
|
* might seem like a problem, but it's really not. If dsm_handle is
|
||||||
|
* bigger than key_t, the cast below might truncate away some bits from
|
||||||
|
* the handle the user-provided, but it'll truncate exactly the same bits
|
||||||
|
* away in exactly the same fashion every time we use that handle, which
|
||||||
|
* is all that really matters. Conversely, if dsm_handle is smaller than
|
||||||
|
* key_t, we won't use the full range of available key space, but that's
|
||||||
|
* no big deal either.
|
||||||
|
*
|
||||||
|
* We do make sure that the key isn't negative, because that might not
|
||||||
|
* be portable.
|
||||||
|
*/
|
||||||
|
key = (key_t) handle;
|
||||||
|
if (key < 1) /* avoid compiler warning if type is unsigned */
|
||||||
|
key = -key;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* There's one special key, IPC_PRIVATE, which can't be used. If we end
|
||||||
|
* up with that value by chance during a create operation, just pretend
|
||||||
|
* it already exists, so that caller will retry. If we run into it
|
||||||
|
* anywhere else, the caller has passed a handle that doesn't correspond
|
||||||
|
* to anything we ever created, which should not happen.
|
||||||
|
*/
|
||||||
|
if (key == IPC_PRIVATE)
|
||||||
|
{
|
||||||
|
if (op != DSM_OP_CREATE)
|
||||||
|
elog(DEBUG4, "System V shared memory key may not be IPC_PRIVATE");
|
||||||
|
errno = EEXIST;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Before we can do anything with a shared memory segment, we have to
|
||||||
|
* map the shared memory key to a shared memory identifier using shmget().
|
||||||
|
* To avoid repeated lookups, we store the key using impl_private.
|
||||||
|
*/
|
||||||
|
if (*impl_private != NULL)
|
||||||
|
{
|
||||||
|
ident_cache = *impl_private;
|
||||||
|
ident = *ident_cache;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int flags = IPCProtection;
|
||||||
|
size_t segsize;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allocate the memory BEFORE acquiring the resource, so that we don't
|
||||||
|
* leak the resource if memory allocation fails.
|
||||||
|
*/
|
||||||
|
ident_cache = MemoryContextAlloc(TopMemoryContext, sizeof(int));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When using shmget to find an existing segment, we must pass the
|
||||||
|
* size as 0. Passing a non-zero size which is greater than the
|
||||||
|
* actual size will result in EINVAL.
|
||||||
|
*/
|
||||||
|
segsize = 0;
|
||||||
|
|
||||||
|
if (op == DSM_OP_CREATE)
|
||||||
|
{
|
||||||
|
flags |= IPC_CREAT | IPC_EXCL;
|
||||||
|
segsize = request_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((ident = shmget(key, segsize, flags)) == -1)
|
||||||
|
{
|
||||||
|
if (errno != EEXIST)
|
||||||
|
{
|
||||||
|
int save_errno = errno;
|
||||||
|
pfree(ident_cache);
|
||||||
|
errno = save_errno;
|
||||||
|
ereport(elevel,
|
||||||
|
(errcode_for_dynamic_shared_memory(),
|
||||||
|
errmsg("could not get shared memory segment: %m")));
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
*ident_cache = ident;
|
||||||
|
*impl_private = ident_cache;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Handle teardown cases. */
|
||||||
|
if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
|
||||||
|
{
|
||||||
|
pfree(ident_cache);
|
||||||
|
*impl_private = NULL;
|
||||||
|
if (*mapped_address != NULL && shmdt(*mapped_address) != 0)
|
||||||
|
{
|
||||||
|
ereport(elevel,
|
||||||
|
(errcode_for_dynamic_shared_memory(),
|
||||||
|
errmsg("could not unmap shared memory segment \"%s\": %m",
|
||||||
|
name)));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
*mapped_address = NULL;
|
||||||
|
*mapped_size = 0;
|
||||||
|
if (op == DSM_OP_DESTROY && shmctl(ident, IPC_RMID, NULL) < 0)
|
||||||
|
{
|
||||||
|
ereport(elevel,
|
||||||
|
(errcode_for_dynamic_shared_memory(),
|
||||||
|
errmsg("could not remove shared memory segment \"%s\": %m",
|
||||||
|
name)));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If we're attaching it, we must use IPC_STAT to determine the size. */
|
||||||
|
if (op == DSM_OP_ATTACH)
|
||||||
|
{
|
||||||
|
struct shmid_ds shm;
|
||||||
|
|
||||||
|
if (shmctl(ident, IPC_STAT, &shm) != 0)
|
||||||
|
{
|
||||||
|
int save_errno;
|
||||||
|
|
||||||
|
/* Back out what's already been done. */
|
||||||
|
save_errno = errno;
|
||||||
|
if (op == DSM_OP_CREATE)
|
||||||
|
shmctl(ident, IPC_RMID, NULL);
|
||||||
|
errno = save_errno;
|
||||||
|
|
||||||
|
ereport(elevel,
|
||||||
|
(errcode_for_dynamic_shared_memory(),
|
||||||
|
errmsg("could not stat shared memory segment \"%s\": %m",
|
||||||
|
name)));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
request_size = shm.shm_segsz;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Map it. */
|
||||||
|
address = shmat(ident, NULL, PG_SHMAT_FLAGS);
|
||||||
|
if (address == (void *) -1)
|
||||||
|
{
|
||||||
|
int save_errno;
|
||||||
|
|
||||||
|
/* Back out what's already been done. */
|
||||||
|
save_errno = errno;
|
||||||
|
if (op == DSM_OP_CREATE)
|
||||||
|
shmctl(ident, IPC_RMID, NULL);
|
||||||
|
errno = save_errno;
|
||||||
|
|
||||||
|
ereport(elevel,
|
||||||
|
(errcode_for_dynamic_shared_memory(),
|
||||||
|
errmsg("could not map shared memory segment \"%s\": %m",
|
||||||
|
name)));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
*mapped_address = address;
|
||||||
|
*mapped_size = request_size;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef USE_DSM_WINDOWS
|
||||||
|
/*
|
||||||
|
* Operating system primitives to support Windows shared memory.
|
||||||
|
*
|
||||||
|
* Windows shared memory implementation is done using file mapping
|
||||||
|
* which can be backed by either physical file or system paging file.
|
||||||
|
* Current implementation uses system paging file as other effects
|
||||||
|
* like performance are not clear for physical file and it is used in similar
|
||||||
|
* way for main shared memory in windows.
|
||||||
|
*
|
||||||
|
* A memory mapping object is a kernel object - they always get deleted when
|
||||||
|
* the last reference to them goes away, either explicitly via a CloseHandle or
|
||||||
|
* when the process containing the reference exits.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
dsm_impl_windows(dsm_op op, dsm_handle handle, uint64 request_size,
|
||||||
|
void **impl_private, void **mapped_address,
|
||||||
|
uint64 *mapped_size, int elevel)
|
||||||
|
{
|
||||||
|
char *address;
|
||||||
|
HANDLE hmap;
|
||||||
|
char name[64];
|
||||||
|
MEMORY_BASIC_INFORMATION info;
|
||||||
|
|
||||||
|
/* Resize is not supported for Windows shared memory. */
|
||||||
|
if (op == DSM_OP_RESIZE)
|
||||||
|
{
|
||||||
|
elog(elevel, "Windows shared memory segments cannot be resized");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Since resize isn't supported, reattach is a no-op. */
|
||||||
|
if (op == DSM_OP_ATTACH && *mapped_address != NULL)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Storing the shared memory segment in the Global\ namespace, can
|
||||||
|
* allow any process running in any session to access that file
|
||||||
|
* mapping object provided that the caller has the required access rights.
|
||||||
|
* But to avoid issues faced in main shared memory, we are using the naming
|
||||||
|
* convention similar to main shared memory. We can change here once
|
||||||
|
* issue mentioned in GetSharedMemName is resolved.
|
||||||
|
*/
|
||||||
|
snprintf(name, 64, "Global/PostgreSQL.%u", handle);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Handle teardown cases. Since Windows automatically destroys the object
|
||||||
|
* when no references reamin, we can treat it the same as detach.
|
||||||
|
*/
|
||||||
|
if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
|
||||||
|
{
|
||||||
|
if (*mapped_address != NULL
|
||||||
|
&& UnmapViewOfFile(*mapped_address) == 0)
|
||||||
|
{
|
||||||
|
_dosmaperr(GetLastError());
|
||||||
|
ereport(elevel,
|
||||||
|
(errcode_for_dynamic_shared_memory(),
|
||||||
|
errmsg("could not unmap shared memory segment \"%s\": %m",
|
||||||
|
name)));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (*impl_private != NULL
|
||||||
|
&& CloseHandle(*impl_private) == 0)
|
||||||
|
{
|
||||||
|
_dosmaperr(GetLastError());
|
||||||
|
ereport(elevel,
|
||||||
|
(errcode_for_dynamic_shared_memory(),
|
||||||
|
errmsg("could not remove shared memory segment \"%s\": %m",
|
||||||
|
name)));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
*impl_private = NULL;
|
||||||
|
*mapped_address = NULL;
|
||||||
|
*mapped_size = 0;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Create new segment or open an existing one for attach. */
|
||||||
|
if (op == DSM_OP_CREATE)
|
||||||
|
{
|
||||||
|
DWORD size_high = (DWORD) (request_size >> 32);
|
||||||
|
DWORD size_low = (DWORD) request_size;
|
||||||
|
hmap = CreateFileMapping(INVALID_HANDLE_VALUE, /* Use the pagefile */
|
||||||
|
NULL, /* Default security attrs */
|
||||||
|
PAGE_READWRITE, /* Memory is read/write */
|
||||||
|
size_high, /* Upper 32 bits of size */
|
||||||
|
size_low, /* Lower 32 bits of size */
|
||||||
|
name);
|
||||||
|
_dosmaperr(GetLastError());
|
||||||
|
if (errno == EEXIST)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* On Windows, when the segment already exists, a handle for the
|
||||||
|
* existing segment is returned. We must close it before
|
||||||
|
* returning. We don't do _dosmaperr here, so errno won't be
|
||||||
|
* modified.
|
||||||
|
*/
|
||||||
|
CloseHandle(hmap);
|
||||||
|
ereport(elevel,
|
||||||
|
(errcode_for_dynamic_shared_memory(),
|
||||||
|
errmsg("could not open shared memory segment \"%s\": %m",
|
||||||
|
name)));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
hmap = OpenFileMapping(FILE_MAP_WRITE | FILE_MAP_READ,
|
||||||
|
FALSE, /* do not inherit the name */
|
||||||
|
name); /* name of mapping object */
|
||||||
|
_dosmaperr(GetLastError());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!hmap)
|
||||||
|
{
|
||||||
|
ereport(elevel,
|
||||||
|
(errcode_for_dynamic_shared_memory(),
|
||||||
|
errmsg("could not open shared memory segment \"%s\": %m",
|
||||||
|
name)));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Map it. */
|
||||||
|
address = MapViewOfFile(hmap, FILE_MAP_WRITE | FILE_MAP_READ,
|
||||||
|
0, 0, 0);
|
||||||
|
if (!address)
|
||||||
|
{
|
||||||
|
int save_errno;
|
||||||
|
|
||||||
|
_dosmaperr(GetLastError());
|
||||||
|
/* Back out what's already been done. */
|
||||||
|
save_errno = errno;
|
||||||
|
CloseHandle(hmap);
|
||||||
|
errno = save_errno;
|
||||||
|
|
||||||
|
ereport(elevel,
|
||||||
|
(errcode_for_dynamic_shared_memory(),
|
||||||
|
errmsg("could not map shared memory segment \"%s\": %m",
|
||||||
|
name)));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* VirtualQuery gives size in page_size units, which is 4K for Windows.
|
||||||
|
* We need size only when we are attaching, but it's better to get the
|
||||||
|
* size when creating new segment to keep size consistent both for
|
||||||
|
* DSM_OP_CREATE and DSM_OP_ATTACH.
|
||||||
|
*/
|
||||||
|
if (VirtualQuery(address, &info, sizeof(info)) == 0)
|
||||||
|
{
|
||||||
|
int save_errno;
|
||||||
|
|
||||||
|
_dosmaperr(GetLastError());
|
||||||
|
/* Back out what's already been done. */
|
||||||
|
save_errno = errno;
|
||||||
|
UnmapViewOfFile(address);
|
||||||
|
CloseHandle(hmap);
|
||||||
|
errno = save_errno;
|
||||||
|
|
||||||
|
ereport(elevel,
|
||||||
|
(errcode_for_dynamic_shared_memory(),
|
||||||
|
errmsg("could not stat shared memory segment \"%s\": %m",
|
||||||
|
name)));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
*mapped_address = address;
|
||||||
|
*mapped_size = info.RegionSize;
|
||||||
|
*impl_private = hmap;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef USE_DSM_MMAP
|
||||||
|
/*
|
||||||
|
* Operating system primitives to support mmap-based shared memory.
|
||||||
|
*
|
||||||
|
* Calling this "shared memory" is somewhat of a misnomer, because what
|
||||||
|
* we're really doing is creating a bunch of files and mapping them into
|
||||||
|
* our address space. The operating system may feel obliged to
|
||||||
|
* synchronize the contents to disk even if nothing is being paged out,
|
||||||
|
* which will not serve us well. The user can relocate the pg_dynshmem
|
||||||
|
* directory to a ramdisk to avoid this problem, if available.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
dsm_impl_mmap(dsm_op op, dsm_handle handle, uint64 request_size,
|
||||||
|
void **impl_private, void **mapped_address, uint64 *mapped_size,
|
||||||
|
int elevel)
|
||||||
|
{
|
||||||
|
char name[64];
|
||||||
|
int flags;
|
||||||
|
int fd;
|
||||||
|
char *address;
|
||||||
|
|
||||||
|
snprintf(name, 64, PG_DYNSHMEM_DIR "/" PG_DYNSHMEM_MMAP_FILE_PREFIX "%u",
|
||||||
|
handle);
|
||||||
|
|
||||||
|
/* Handle teardown cases. */
|
||||||
|
if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
|
||||||
|
{
|
||||||
|
if (*mapped_address != NULL
|
||||||
|
&& munmap(*mapped_address, *mapped_size) != 0)
|
||||||
|
{
|
||||||
|
ereport(elevel,
|
||||||
|
(errcode_for_dynamic_shared_memory(),
|
||||||
|
errmsg("could not unmap shared memory segment \"%s\": %m",
|
||||||
|
name)));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
*mapped_address = NULL;
|
||||||
|
*mapped_size = 0;
|
||||||
|
if (op == DSM_OP_DESTROY && unlink(name) != 0)
|
||||||
|
{
|
||||||
|
ereport(elevel,
|
||||||
|
(errcode_for_dynamic_shared_memory(),
|
||||||
|
errmsg("could not remove shared memory segment \"%s\": %m",
|
||||||
|
name)));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Create new segment or open an existing one for attach or resize. */
|
||||||
|
flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
|
||||||
|
if ((fd = OpenTransientFile(name, flags, 0600)) == -1)
|
||||||
|
{
|
||||||
|
if (errno != EEXIST)
|
||||||
|
ereport(elevel,
|
||||||
|
(errcode_for_dynamic_shared_memory(),
|
||||||
|
errmsg("could not open shared memory segment \"%s\": %m",
|
||||||
|
name)));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we're attaching the segment, determine the current size; if we are
|
||||||
|
* creating or resizing the segment, set the size to the requested value.
|
||||||
|
*/
|
||||||
|
if (op == DSM_OP_ATTACH)
|
||||||
|
{
|
||||||
|
struct stat st;
|
||||||
|
|
||||||
|
if (fstat(fd, &st) != 0)
|
||||||
|
{
|
||||||
|
int save_errno;
|
||||||
|
|
||||||
|
/* Back out what's already been done. */
|
||||||
|
save_errno = errno;
|
||||||
|
CloseTransientFile(fd);
|
||||||
|
errno = save_errno;
|
||||||
|
|
||||||
|
ereport(elevel,
|
||||||
|
(errcode_for_dynamic_shared_memory(),
|
||||||
|
errmsg("could not stat shared memory segment \"%s\": %m",
|
||||||
|
name)));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
request_size = st.st_size;
|
||||||
|
}
|
||||||
|
else if (*mapped_size > request_size && ftruncate(fd, request_size))
|
||||||
|
{
|
||||||
|
int save_errno;
|
||||||
|
|
||||||
|
/* Back out what's already been done. */
|
||||||
|
save_errno = errno;
|
||||||
|
close(fd);
|
||||||
|
if (op == DSM_OP_CREATE)
|
||||||
|
shm_unlink(name);
|
||||||
|
errno = save_errno;
|
||||||
|
|
||||||
|
ereport(elevel,
|
||||||
|
(errcode_for_dynamic_shared_memory(),
|
||||||
|
errmsg("could not resize shared memory segment %s to " UINT64_FORMAT " bytes: %m",
|
||||||
|
name, request_size)));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
else if (*mapped_size < request_size)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Allocate a buffer full of zeros.
|
||||||
|
*
|
||||||
|
* Note: palloc zbuffer, instead of just using a local char array,
|
||||||
|
* to ensure it is reasonably well-aligned; this may save a few
|
||||||
|
* cycles transferring data to the kernel.
|
||||||
|
*/
|
||||||
|
char *zbuffer = (char *) palloc0(ZBUFFER_SIZE);
|
||||||
|
uint32 remaining = request_size;
|
||||||
|
bool success = true;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Zero-fill the file. We have to do this the hard way to ensure
|
||||||
|
* that all the file space has really been allocated, so that we
|
||||||
|
* don't later seg fault when accessing the memory mapping. This
|
||||||
|
* is pretty pessimal.
|
||||||
|
*/
|
||||||
|
while (success && remaining > 0)
|
||||||
|
{
|
||||||
|
uint64 goal = remaining;
|
||||||
|
|
||||||
|
if (goal > ZBUFFER_SIZE)
|
||||||
|
goal = ZBUFFER_SIZE;
|
||||||
|
if (write(fd, zbuffer, goal) == goal)
|
||||||
|
remaining -= goal;
|
||||||
|
else
|
||||||
|
success = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!success)
|
||||||
|
{
|
||||||
|
int save_errno;
|
||||||
|
|
||||||
|
/* Back out what's already been done. */
|
||||||
|
save_errno = errno;
|
||||||
|
CloseTransientFile(fd);
|
||||||
|
if (op == DSM_OP_CREATE)
|
||||||
|
unlink(name);
|
||||||
|
errno = save_errno ? save_errno : ENOSPC;
|
||||||
|
|
||||||
|
ereport(elevel,
|
||||||
|
(errcode_for_dynamic_shared_memory(),
|
||||||
|
errmsg("could not resize shared memory segment %s to " UINT64_FORMAT " bytes: %m",
|
||||||
|
name, request_size)));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we're reattaching or resizing, we must remove any existing mapping,
|
||||||
|
* unless we've already got the right thing mapped.
|
||||||
|
*/
|
||||||
|
if (*mapped_address != NULL)
|
||||||
|
{
|
||||||
|
if (*mapped_size == request_size)
|
||||||
|
return true;
|
||||||
|
if (munmap(*mapped_address, *mapped_size) != 0)
|
||||||
|
{
|
||||||
|
int save_errno;
|
||||||
|
|
||||||
|
/* Back out what's already been done. */
|
||||||
|
save_errno = errno;
|
||||||
|
CloseTransientFile(fd);
|
||||||
|
if (op == DSM_OP_CREATE)
|
||||||
|
unlink(name);
|
||||||
|
errno = save_errno;
|
||||||
|
|
||||||
|
ereport(elevel,
|
||||||
|
(errcode_for_dynamic_shared_memory(),
|
||||||
|
errmsg("could not unmap shared memory segment \"%s\": %m",
|
||||||
|
name)));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
*mapped_address = NULL;
|
||||||
|
*mapped_size = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Map it. */
|
||||||
|
address = mmap(NULL, request_size, PROT_READ|PROT_WRITE,
|
||||||
|
MAP_SHARED|MAP_HASSEMAPHORE, fd, 0);
|
||||||
|
if (address == MAP_FAILED)
|
||||||
|
{
|
||||||
|
int save_errno;
|
||||||
|
|
||||||
|
/* Back out what's already been done. */
|
||||||
|
save_errno = errno;
|
||||||
|
CloseTransientFile(fd);
|
||||||
|
if (op == DSM_OP_CREATE)
|
||||||
|
unlink(name);
|
||||||
|
errno = save_errno;
|
||||||
|
|
||||||
|
ereport(elevel,
|
||||||
|
(errcode_for_dynamic_shared_memory(),
|
||||||
|
errmsg("could not map shared memory segment \"%s\": %m",
|
||||||
|
name)));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
*mapped_address = address;
|
||||||
|
*mapped_size = request_size;
|
||||||
|
CloseTransientFile(fd);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static int
|
||||||
|
errcode_for_dynamic_shared_memory()
|
||||||
|
{
|
||||||
|
if (errno == EFBIG || errno == ENOMEM)
|
||||||
|
return errcode(ERRCODE_OUT_OF_MEMORY);
|
||||||
|
else
|
||||||
|
return errcode_for_file_access();
|
||||||
|
}
|
@ -30,6 +30,7 @@
|
|||||||
#include "replication/walreceiver.h"
|
#include "replication/walreceiver.h"
|
||||||
#include "replication/walsender.h"
|
#include "replication/walsender.h"
|
||||||
#include "storage/bufmgr.h"
|
#include "storage/bufmgr.h"
|
||||||
|
#include "storage/dsm.h"
|
||||||
#include "storage/ipc.h"
|
#include "storage/ipc.h"
|
||||||
#include "storage/pg_shmem.h"
|
#include "storage/pg_shmem.h"
|
||||||
#include "storage/pmsignal.h"
|
#include "storage/pmsignal.h"
|
||||||
@ -249,6 +250,10 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
|
|||||||
ShmemBackendArrayAllocation();
|
ShmemBackendArrayAllocation();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* Initialize dynamic shared memory facilities. */
|
||||||
|
if (!IsUnderPostmaster)
|
||||||
|
dsm_postmaster_startup();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now give loadable modules a chance to set up their shmem allocations
|
* Now give loadable modules a chance to set up their shmem allocations
|
||||||
*/
|
*/
|
||||||
|
@ -61,6 +61,7 @@
|
|||||||
#include "replication/walreceiver.h"
|
#include "replication/walreceiver.h"
|
||||||
#include "replication/walsender.h"
|
#include "replication/walsender.h"
|
||||||
#include "storage/bufmgr.h"
|
#include "storage/bufmgr.h"
|
||||||
|
#include "storage/dsm_impl.h"
|
||||||
#include "storage/standby.h"
|
#include "storage/standby.h"
|
||||||
#include "storage/fd.h"
|
#include "storage/fd.h"
|
||||||
#include "storage/proc.h"
|
#include "storage/proc.h"
|
||||||
@ -385,6 +386,7 @@ static const struct config_enum_entry synchronous_commit_options[] = {
|
|||||||
*/
|
*/
|
||||||
extern const struct config_enum_entry wal_level_options[];
|
extern const struct config_enum_entry wal_level_options[];
|
||||||
extern const struct config_enum_entry sync_method_options[];
|
extern const struct config_enum_entry sync_method_options[];
|
||||||
|
extern const struct config_enum_entry dynamic_shared_memory_options[];
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* GUC option variables that are exported from this module
|
* GUC option variables that are exported from this module
|
||||||
@ -3335,6 +3337,16 @@ static struct config_enum ConfigureNamesEnum[] =
|
|||||||
NULL, NULL, NULL
|
NULL, NULL, NULL
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
{"dynamic_shared_memory_type", PGC_POSTMASTER, RESOURCES_MEM,
|
||||||
|
gettext_noop("Selects the dynamic shared memory implementation used."),
|
||||||
|
NULL
|
||||||
|
},
|
||||||
|
&dynamic_shared_memory_type,
|
||||||
|
DEFAULT_DYNAMIC_SHARED_MEMORY_TYPE, dynamic_shared_memory_options,
|
||||||
|
NULL, NULL, NULL
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
{"wal_sync_method", PGC_SIGHUP, WAL_SETTINGS,
|
{"wal_sync_method", PGC_SIGHUP, WAL_SETTINGS,
|
||||||
gettext_noop("Selects the method used for forcing WAL updates to disk."),
|
gettext_noop("Selects the method used for forcing WAL updates to disk."),
|
||||||
|
@ -123,6 +123,13 @@
|
|||||||
#work_mem = 1MB # min 64kB
|
#work_mem = 1MB # min 64kB
|
||||||
#maintenance_work_mem = 16MB # min 1MB
|
#maintenance_work_mem = 16MB # min 1MB
|
||||||
#max_stack_depth = 2MB # min 100kB
|
#max_stack_depth = 2MB # min 100kB
|
||||||
|
#dynamic_shared_memory_type = posix # the default is the first option
|
||||||
|
# supported by the operating system:
|
||||||
|
# posix
|
||||||
|
# sysv
|
||||||
|
# windows
|
||||||
|
# mmap
|
||||||
|
# use none to disable dynamic shared memory
|
||||||
|
|
||||||
# - Disk -
|
# - Disk -
|
||||||
|
|
||||||
|
@ -98,6 +98,11 @@ typedef struct ResourceOwnerData
|
|||||||
int nfiles; /* number of owned temporary files */
|
int nfiles; /* number of owned temporary files */
|
||||||
File *files; /* dynamically allocated array */
|
File *files; /* dynamically allocated array */
|
||||||
int maxfiles; /* currently allocated array size */
|
int maxfiles; /* currently allocated array size */
|
||||||
|
|
||||||
|
/* We have built-in support for remembering dynamic shmem segments */
|
||||||
|
int ndsms; /* number of owned shmem segments */
|
||||||
|
dsm_segment **dsms; /* dynamically allocated array */
|
||||||
|
int maxdsms; /* currently allocated array size */
|
||||||
} ResourceOwnerData;
|
} ResourceOwnerData;
|
||||||
|
|
||||||
|
|
||||||
@ -132,6 +137,7 @@ static void PrintPlanCacheLeakWarning(CachedPlan *plan);
|
|||||||
static void PrintTupleDescLeakWarning(TupleDesc tupdesc);
|
static void PrintTupleDescLeakWarning(TupleDesc tupdesc);
|
||||||
static void PrintSnapshotLeakWarning(Snapshot snapshot);
|
static void PrintSnapshotLeakWarning(Snapshot snapshot);
|
||||||
static void PrintFileLeakWarning(File file);
|
static void PrintFileLeakWarning(File file);
|
||||||
|
static void PrintDSMLeakWarning(dsm_segment *seg);
|
||||||
|
|
||||||
|
|
||||||
/*****************************************************************************
|
/*****************************************************************************
|
||||||
@ -271,6 +277,21 @@ ResourceOwnerReleaseInternal(ResourceOwner owner,
|
|||||||
PrintRelCacheLeakWarning(owner->relrefs[owner->nrelrefs - 1]);
|
PrintRelCacheLeakWarning(owner->relrefs[owner->nrelrefs - 1]);
|
||||||
RelationClose(owner->relrefs[owner->nrelrefs - 1]);
|
RelationClose(owner->relrefs[owner->nrelrefs - 1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Release dynamic shared memory segments. Note that dsm_detach()
|
||||||
|
* will remove the segment from my list, so I just have to iterate
|
||||||
|
* until there are none.
|
||||||
|
*
|
||||||
|
* As in the preceding cases, warn if there are leftover at commit
|
||||||
|
* time.
|
||||||
|
*/
|
||||||
|
while (owner->ndsms > 0)
|
||||||
|
{
|
||||||
|
if (isCommit)
|
||||||
|
PrintDSMLeakWarning(owner->dsms[owner->ndsms - 1]);
|
||||||
|
dsm_detach(owner->dsms[owner->ndsms - 1]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (phase == RESOURCE_RELEASE_LOCKS)
|
else if (phase == RESOURCE_RELEASE_LOCKS)
|
||||||
{
|
{
|
||||||
@ -402,6 +423,7 @@ ResourceOwnerDelete(ResourceOwner owner)
|
|||||||
Assert(owner->ncatrefs == 0);
|
Assert(owner->ncatrefs == 0);
|
||||||
Assert(owner->ncatlistrefs == 0);
|
Assert(owner->ncatlistrefs == 0);
|
||||||
Assert(owner->nrelrefs == 0);
|
Assert(owner->nrelrefs == 0);
|
||||||
|
Assert(owner->ndsms == 0);
|
||||||
Assert(owner->nplanrefs == 0);
|
Assert(owner->nplanrefs == 0);
|
||||||
Assert(owner->ntupdescs == 0);
|
Assert(owner->ntupdescs == 0);
|
||||||
Assert(owner->nsnapshots == 0);
|
Assert(owner->nsnapshots == 0);
|
||||||
@ -438,6 +460,8 @@ ResourceOwnerDelete(ResourceOwner owner)
|
|||||||
pfree(owner->snapshots);
|
pfree(owner->snapshots);
|
||||||
if (owner->files)
|
if (owner->files)
|
||||||
pfree(owner->files);
|
pfree(owner->files);
|
||||||
|
if (owner->dsms)
|
||||||
|
pfree(owner->dsms);
|
||||||
|
|
||||||
pfree(owner);
|
pfree(owner);
|
||||||
}
|
}
|
||||||
@ -1230,3 +1254,88 @@ PrintFileLeakWarning(File file)
|
|||||||
"temporary file leak: File %d still referenced",
|
"temporary file leak: File %d still referenced",
|
||||||
file);
|
file);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Make sure there is room for at least one more entry in a ResourceOwner's
|
||||||
|
* dynamic shmem segment reference array.
|
||||||
|
*
|
||||||
|
* This is separate from actually inserting an entry because if we run out
|
||||||
|
* of memory, it's critical to do so *before* acquiring the resource.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
ResourceOwnerEnlargeDSMs(ResourceOwner owner)
|
||||||
|
{
|
||||||
|
int newmax;
|
||||||
|
|
||||||
|
if (owner->ndsms < owner->maxdsms)
|
||||||
|
return; /* nothing to do */
|
||||||
|
|
||||||
|
if (owner->dsms == NULL)
|
||||||
|
{
|
||||||
|
newmax = 16;
|
||||||
|
owner->dsms = (dsm_segment **)
|
||||||
|
MemoryContextAlloc(TopMemoryContext,
|
||||||
|
newmax * sizeof(dsm_segment *));
|
||||||
|
owner->maxdsms = newmax;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
newmax = owner->maxdsms * 2;
|
||||||
|
owner->dsms = (dsm_segment **)
|
||||||
|
repalloc(owner->dsms, newmax * sizeof(dsm_segment *));
|
||||||
|
owner->maxdsms = newmax;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Remember that a dynamic shmem segment is owned by a ResourceOwner
|
||||||
|
*
|
||||||
|
* Caller must have previously done ResourceOwnerEnlargeDSMs()
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
ResourceOwnerRememberDSM(ResourceOwner owner, dsm_segment *seg)
|
||||||
|
{
|
||||||
|
Assert(owner->ndsms < owner->maxdsms);
|
||||||
|
owner->dsms[owner->ndsms] = seg;
|
||||||
|
owner->ndsms++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Forget that a temporary file is owned by a ResourceOwner
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
ResourceOwnerForgetDSM(ResourceOwner owner, dsm_segment *seg)
|
||||||
|
{
|
||||||
|
dsm_segment **dsms = owner->dsms;
|
||||||
|
int ns1 = owner->ndsms - 1;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = ns1; i >= 0; i--)
|
||||||
|
{
|
||||||
|
if (dsms[i] == seg)
|
||||||
|
{
|
||||||
|
while (i < ns1)
|
||||||
|
{
|
||||||
|
dsms[i] = dsms[i + 1];
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
owner->ndsms = ns1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
elog(ERROR,
|
||||||
|
"dynamic shared memory segment %u is not owned by resource owner %s",
|
||||||
|
dsm_segment_handle(seg), owner->name);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Debugging subroutine
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
PrintDSMLeakWarning(dsm_segment *seg)
|
||||||
|
{
|
||||||
|
elog(WARNING,
|
||||||
|
"dynamic shared memory leak: segment %u still referenced",
|
||||||
|
dsm_segment_handle(seg));
|
||||||
|
}
|
||||||
|
@ -182,6 +182,7 @@ const char *subdirs[] = {
|
|||||||
"pg_xlog",
|
"pg_xlog",
|
||||||
"pg_xlog/archive_status",
|
"pg_xlog/archive_status",
|
||||||
"pg_clog",
|
"pg_clog",
|
||||||
|
"pg_dynshmem",
|
||||||
"pg_notify",
|
"pg_notify",
|
||||||
"pg_serial",
|
"pg_serial",
|
||||||
"pg_snapshots",
|
"pg_snapshots",
|
||||||
|
@ -424,6 +424,9 @@
|
|||||||
/* Define to 1 if you have the `setsid' function. */
|
/* Define to 1 if you have the `setsid' function. */
|
||||||
#undef HAVE_SETSID
|
#undef HAVE_SETSID
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `shm_open' function. */
|
||||||
|
#undef HAVE_SHM_OPEN
|
||||||
|
|
||||||
/* Define to 1 if you have the `sigprocmask' function. */
|
/* Define to 1 if you have the `sigprocmask' function. */
|
||||||
#undef HAVE_SIGPROCMASK
|
#undef HAVE_SIGPROCMASK
|
||||||
|
|
||||||
|
40
src/include/portability/mem.h
Normal file
40
src/include/portability/mem.h
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* mem.h
|
||||||
|
* portability definitions for various memory operations
|
||||||
|
*
|
||||||
|
* Copyright (c) 2001-2013, PostgreSQL Global Development Group
|
||||||
|
*
|
||||||
|
* src/include/portability/mem.h
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
#ifndef MEM_H
|
||||||
|
#define MEM_H
|
||||||
|
|
||||||
|
#define IPCProtection (0600) /* access/modify by user only */
|
||||||
|
|
||||||
|
#ifdef SHM_SHARE_MMU /* use intimate shared memory on Solaris */
|
||||||
|
#define PG_SHMAT_FLAGS SHM_SHARE_MMU
|
||||||
|
#else
|
||||||
|
#define PG_SHMAT_FLAGS 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Linux prefers MAP_ANONYMOUS, but the flag is called MAP_ANON on other systems. */
|
||||||
|
#ifndef MAP_ANONYMOUS
|
||||||
|
#define MAP_ANONYMOUS MAP_ANON
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* BSD-derived systems have MAP_HASSEMAPHORE, but it's not present (or needed) on Linux. */
|
||||||
|
#ifndef MAP_HASSEMAPHORE
|
||||||
|
#define MAP_HASSEMAPHORE 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define PG_MMAP_FLAGS (MAP_SHARED|MAP_ANONYMOUS|MAP_HASSEMAPHORE)
|
||||||
|
|
||||||
|
/* Some really old systems don't define MAP_FAILED. */
|
||||||
|
#ifndef MAP_FAILED
|
||||||
|
#define MAP_FAILED ((void *) -1)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* MEM_H */
|
39
src/include/storage/dsm.h
Normal file
39
src/include/storage/dsm.h
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* dsm.h
|
||||||
|
* manage dynamic shared memory segments
|
||||||
|
*
|
||||||
|
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
|
||||||
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
|
*
|
||||||
|
* src/include/storage/dsm.h
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
#ifndef DSM_H
|
||||||
|
#define DSM_H
|
||||||
|
|
||||||
|
#include "storage/dsm_impl.h"
|
||||||
|
|
||||||
|
typedef struct dsm_segment dsm_segment;
|
||||||
|
|
||||||
|
/* Initialization function. */
|
||||||
|
extern void dsm_postmaster_startup(void);
|
||||||
|
|
||||||
|
/* Functions that create, update, or remove mappings. */
|
||||||
|
extern dsm_segment *dsm_create(uint64 size);
|
||||||
|
extern dsm_segment *dsm_attach(dsm_handle h);
|
||||||
|
extern void *dsm_resize(dsm_segment *seg, uint64 size);
|
||||||
|
extern void *dsm_remap(dsm_segment *seg);
|
||||||
|
extern void dsm_detach(dsm_segment *seg);
|
||||||
|
|
||||||
|
/* Resource management functions. */
|
||||||
|
extern void dsm_keep_mapping(dsm_segment *seg);
|
||||||
|
extern dsm_segment *dsm_find_mapping(dsm_handle h);
|
||||||
|
|
||||||
|
/* Informational functions. */
|
||||||
|
extern void *dsm_segment_address(dsm_segment *seg);
|
||||||
|
extern uint64 dsm_segment_map_length(dsm_segment *seg);
|
||||||
|
extern dsm_handle dsm_segment_handle(dsm_segment *seg);
|
||||||
|
|
||||||
|
#endif /* DSM_H */
|
75
src/include/storage/dsm_impl.h
Normal file
75
src/include/storage/dsm_impl.h
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* dsm_impl.h
|
||||||
|
* low-level dynamic shared memory primitives
|
||||||
|
*
|
||||||
|
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
|
||||||
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
|
*
|
||||||
|
* src/include/storage/dsm_impl.h
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
#ifndef DSM_IMPL_H
|
||||||
|
#define DSM_IMPL_H
|
||||||
|
|
||||||
|
/* Dynamic shared memory implementations. */
|
||||||
|
#define DSM_IMPL_NONE 0
|
||||||
|
#define DSM_IMPL_POSIX 1
|
||||||
|
#define DSM_IMPL_SYSV 2
|
||||||
|
#define DSM_IMPL_WINDOWS 3
|
||||||
|
#define DSM_IMPL_MMAP 4
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Determine which dynamic shared memory implementations will be supported
|
||||||
|
* on this platform, and which one will be the default.
|
||||||
|
*/
|
||||||
|
#ifdef WIN32
|
||||||
|
#define USE_DSM_WINDOWS
|
||||||
|
#define DEFAULT_DYNAMIC_SHARED_MEMORY_TYPE DSM_IMPL_WINDOWS
|
||||||
|
#else
|
||||||
|
#ifdef HAVE_SHM_OPEN
|
||||||
|
#define USE_DSM_POSIX
|
||||||
|
#define DEFAULT_DYNAMIC_SHARED_MEMORY_TYPE DSM_IMPL_POSIX
|
||||||
|
#endif
|
||||||
|
#define USE_DSM_SYSV
|
||||||
|
#ifndef DEFAULT_DYNAMIC_SHARED_MEMORY_TYPE
|
||||||
|
#define DEFAULT_DYNAMIC_SHARED_MEMORY_TYPE DSM_IMPL_SYSV
|
||||||
|
#endif
|
||||||
|
#define USE_DSM_MMAP
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* GUC. */
|
||||||
|
extern int dynamic_shared_memory_type;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Directory for on-disk state.
|
||||||
|
*
|
||||||
|
* This is used by all implementations for crash recovery and by the mmap
|
||||||
|
* implementation for storage.
|
||||||
|
*/
|
||||||
|
#define PG_DYNSHMEM_DIR "pg_dynshmem"
|
||||||
|
#define PG_DYNSHMEM_MMAP_FILE_PREFIX "mmap."
|
||||||
|
|
||||||
|
/* A "name" for a dynamic shared memory segment. */
|
||||||
|
typedef uint32 dsm_handle;
|
||||||
|
|
||||||
|
/* All the shared-memory operations we know about. */
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
DSM_OP_CREATE,
|
||||||
|
DSM_OP_ATTACH,
|
||||||
|
DSM_OP_DETACH,
|
||||||
|
DSM_OP_RESIZE,
|
||||||
|
DSM_OP_DESTROY
|
||||||
|
} dsm_op;
|
||||||
|
|
||||||
|
/* Create, attach to, detach from, resize, or destroy a segment. */
|
||||||
|
extern bool dsm_impl_op(dsm_op op, dsm_handle handle, uint64 request_size,
|
||||||
|
void **impl_private, void **mapped_address, uint64 *mapped_size,
|
||||||
|
int elevel);
|
||||||
|
|
||||||
|
/* Some implementations cannot resize segments. Can this one? */
|
||||||
|
extern bool dsm_impl_can_resize(void);
|
||||||
|
|
||||||
|
#endif /* DSM_IMPL_H */
|
@ -80,6 +80,7 @@ typedef enum LWLockId
|
|||||||
OldSerXidLock,
|
OldSerXidLock,
|
||||||
SyncRepLock,
|
SyncRepLock,
|
||||||
BackgroundWorkerLock,
|
BackgroundWorkerLock,
|
||||||
|
DynamicSharedMemoryControlLock,
|
||||||
/* Individual lock IDs end here */
|
/* Individual lock IDs end here */
|
||||||
FirstBufMappingLock,
|
FirstBufMappingLock,
|
||||||
FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS,
|
FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS,
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
#ifndef RESOWNER_PRIVATE_H
|
#ifndef RESOWNER_PRIVATE_H
|
||||||
#define RESOWNER_PRIVATE_H
|
#define RESOWNER_PRIVATE_H
|
||||||
|
|
||||||
|
#include "storage/dsm.h"
|
||||||
#include "storage/fd.h"
|
#include "storage/fd.h"
|
||||||
#include "storage/lock.h"
|
#include "storage/lock.h"
|
||||||
#include "utils/catcache.h"
|
#include "utils/catcache.h"
|
||||||
@ -80,4 +81,11 @@ extern void ResourceOwnerRememberFile(ResourceOwner owner,
|
|||||||
extern void ResourceOwnerForgetFile(ResourceOwner owner,
|
extern void ResourceOwnerForgetFile(ResourceOwner owner,
|
||||||
File file);
|
File file);
|
||||||
|
|
||||||
|
/* support for dynamic shared memory management */
|
||||||
|
extern void ResourceOwnerEnlargeDSMs(ResourceOwner owner);
|
||||||
|
extern void ResourceOwnerRememberDSM(ResourceOwner owner,
|
||||||
|
dsm_segment *);
|
||||||
|
extern void ResourceOwnerForgetDSM(ResourceOwner owner,
|
||||||
|
dsm_segment *);
|
||||||
|
|
||||||
#endif /* RESOWNER_PRIVATE_H */
|
#endif /* RESOWNER_PRIVATE_H */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user