Add huge_page_size setting for use on Linux.
This allows the huge page size to be set explicitly. The default is 0, meaning it will use the system default, as before. Author: Odin Ugedal <odin@ugedal.com> Discussion: https://postgr.es/m/20200608154639.20254-1-odin%40ugedal.com
This commit is contained in:
parent
d66b23b032
commit
d2bddc2500
@ -1582,6 +1582,33 @@ include_dir 'conf.d'
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry id="guc-huge-page-size" xreflabel="huge_page_size">
|
||||
<term><varname>huge_page_size</varname> (<type>integer</type>)
|
||||
<indexterm>
|
||||
<primary><varname>huge_page_size</varname> configuration parameter</primary>
|
||||
</indexterm>
|
||||
</term>
|
||||
<listitem>
|
||||
<para>
|
||||
Controls the size of huge pages, when they are enabled with
|
||||
<xref linkend="guc-huge-pages"/>.
|
||||
The default is zero (<literal>0</literal>).
|
||||
When set to <literal>0</literal>, the default huge page size on the
|
||||
system will be used.
|
||||
</para>
|
||||
<para>
|
||||
Some commonly available page sizes on modern 64 bit server architectures include:
|
||||
<literal>2MB</literal> and <literal>1GB</literal> (Intel and AMD), <literal>16MB</literal> and
|
||||
<literal>16GB</literal> (IBM POWER), and <literal>64kB</literal>, <literal>2MB</literal>,
|
||||
<literal>32MB</literal> and <literal>1GB</literal> (ARM). For more information
|
||||
about usage and support, see <xref linkend="linux-huge-pages"/>.
|
||||
</para>
|
||||
<para>
|
||||
Non-default settings are currently supported only on Linux.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry id="guc-temp-buffers" xreflabel="temp_buffers">
|
||||
<term><varname>temp_buffers</varname> (<type>integer</type>)
|
||||
<indexterm>
|
||||
|
@ -1391,13 +1391,14 @@ export PG_OOM_ADJUST_VALUE=0
|
||||
using large values of <xref linkend="guc-shared-buffers"/>. To use this
|
||||
feature in <productname>PostgreSQL</productname> you need a kernel
|
||||
with <varname>CONFIG_HUGETLBFS=y</varname> and
|
||||
<varname>CONFIG_HUGETLB_PAGE=y</varname>. You will also have to adjust
|
||||
the kernel setting <varname>vm.nr_hugepages</varname>. To estimate the
|
||||
number of huge pages needed, start <productname>PostgreSQL</productname>
|
||||
without huge pages enabled and check the
|
||||
postmaster's anonymous shared memory segment size, as well as the system's
|
||||
huge page size, using the <filename>/proc</filename> file system. This might
|
||||
look like:
|
||||
<varname>CONFIG_HUGETLB_PAGE=y</varname>. You will also have to configure
|
||||
the operating system to provide enough huge pages of the desired size.
|
||||
To estimate the number of huge pages needed, start
|
||||
<productname>PostgreSQL</productname> without huge pages enabled and check
|
||||
the postmaster's anonymous shared memory segment size, as well as the
|
||||
system's default and supported huge page sizes, using the
|
||||
<filename>/proc</filename> and <filename>/sys</filename> file systems.
|
||||
This might look like:
|
||||
<programlisting>
|
||||
$ <userinput>head -1 $PGDATA/postmaster.pid</userinput>
|
||||
4170
|
||||
@ -1405,27 +1406,40 @@ $ <userinput>pmap 4170 | awk '/rw-s/ && /zero/ {print $2}'</userinput>
|
||||
6490428K
|
||||
$ <userinput>grep ^Hugepagesize /proc/meminfo</userinput>
|
||||
Hugepagesize: 2048 kB
|
||||
$ <userinput>ls /sys/kernel/mm/hugepages</userinput>
|
||||
hugepages-1048576kB hugepages-2048kB
|
||||
</programlisting>
|
||||
|
||||
In this example the default is 2MB, but you can also explicitly request
|
||||
either 2MB or 1GB with <xref linkend="guc-huge-page-size"/>.
|
||||
|
||||
Assuming <literal>2MB</literal> huge pages,
|
||||
<literal>6490428</literal> / <literal>2048</literal> gives approximately
|
||||
<literal>3169.154</literal>, so in this example we need at
|
||||
least <literal>3170</literal> huge pages, which we can set with:
|
||||
least <literal>3170</literal> huge pages. A larger setting would be
|
||||
appropriate if other programs on the machine also need huge pages.
|
||||
We can set this with:
|
||||
<programlisting>
|
||||
$ <userinput>sysctl -w vm.nr_hugepages=3170</userinput>
|
||||
# <userinput>sysctl -w vm.nr_hugepages=3170</userinput>
|
||||
</programlisting>
|
||||
A larger setting would be appropriate if other programs on the machine
|
||||
also need huge pages. Don't forget to add this setting
|
||||
to <filename>/etc/sysctl.conf</filename> so that it will be reapplied
|
||||
after reboots.
|
||||
Don't forget to add this setting to <filename>/etc/sysctl.conf</filename>
|
||||
so that it is reapplied after reboots. For non-default huge page sizes,
|
||||
we can instead use:
|
||||
<programlisting>
|
||||
# <userinput>echo 3170 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages</userinput>
|
||||
</programlisting>
|
||||
It is also possible to provide these settings at boot time using
|
||||
kernel parameters such as <literal>hugepagesz=2M hugepages=3170</literal>.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Sometimes the kernel is not able to allocate the desired number of huge
|
||||
pages immediately, so it might be necessary to repeat the command or to
|
||||
reboot. (Immediately after a reboot, most of the machine's memory
|
||||
should be available to convert into huge pages.) To verify the huge
|
||||
page allocation situation, use:
|
||||
pages immediately due to fragmentation, so it might be necessary
|
||||
to repeat the command or to reboot. (Immediately after a reboot, most of
|
||||
the machine's memory should be available to convert into huge pages.)
|
||||
To verify the huge page allocation situation for a given size, use:
|
||||
<programlisting>
|
||||
$ <userinput>grep Huge /proc/meminfo</userinput>
|
||||
$ <userinput>cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages</userinput>
|
||||
</programlisting>
|
||||
</para>
|
||||
|
||||
@ -1438,8 +1452,9 @@ $ <userinput>grep Huge /proc/meminfo</userinput>
|
||||
|
||||
<para>
|
||||
The default behavior for huge pages in
|
||||
<productname>PostgreSQL</productname> is to use them when possible and
|
||||
to fall back to normal pages when failing. To enforce the use of huge
|
||||
<productname>PostgreSQL</productname> is to use them when possible, with
|
||||
the system's default huge page size, and
|
||||
to fall back to normal pages on failure. To enforce the use of huge
|
||||
pages, you can set <xref linkend="guc-huge-pages"/>
|
||||
to <literal>on</literal> in <filename>postgresql.conf</filename>.
|
||||
Note that with this setting <productname>PostgreSQL</productname> will fail to
|
||||
|
@ -32,6 +32,7 @@
|
||||
#endif
|
||||
|
||||
#include "miscadmin.h"
|
||||
#include "port/pg_bitutils.h"
|
||||
#include "portability/mem.h"
|
||||
#include "storage/dsm.h"
|
||||
#include "storage/fd.h"
|
||||
@ -448,7 +449,7 @@ PGSharedMemoryAttach(IpcMemoryId shmId,
|
||||
#ifdef MAP_HUGETLB
|
||||
|
||||
/*
|
||||
* Identify the huge page size to use.
|
||||
* Identify the huge page size to use, and compute the related mmap flags.
|
||||
*
|
||||
* Some Linux kernel versions have a bug causing mmap() to fail on requests
|
||||
* that are not a multiple of the hugepage size. Versions without that bug
|
||||
@ -464,25 +465,13 @@ PGSharedMemoryAttach(IpcMemoryId shmId,
|
||||
* hugepage sizes, we might want to think about more invasive strategies,
|
||||
* such as increasing shared_buffers to absorb the extra space.
|
||||
*
|
||||
* Returns the (real or assumed) page size into *hugepagesize,
|
||||
* Returns the (real, assumed or config provided) page size into *hugepagesize,
|
||||
* and the hugepage-related mmap flags to use into *mmap_flags.
|
||||
*
|
||||
* Currently *mmap_flags is always just MAP_HUGETLB. Someday, on systems
|
||||
* that support it, we might OR in additional bits to specify a particular
|
||||
* non-default huge page size.
|
||||
*/
|
||||
static void
|
||||
GetHugePageSize(Size *hugepagesize, int *mmap_flags)
|
||||
{
|
||||
/*
|
||||
* If we fail to find out the system's default huge page size, assume it
|
||||
* is 2MB. This will work fine when the actual size is less. If it's
|
||||
* more, we might get mmap() or munmap() failures due to unaligned
|
||||
* requests; but at this writing, there are no reports of any non-Linux
|
||||
* systems being picky about that.
|
||||
*/
|
||||
*hugepagesize = 2 * 1024 * 1024;
|
||||
*mmap_flags = MAP_HUGETLB;
|
||||
Size default_hugepagesize = 0;
|
||||
|
||||
/*
|
||||
* System-dependent code to find out the default huge page size.
|
||||
@ -491,6 +480,7 @@ GetHugePageSize(Size *hugepagesize, int *mmap_flags)
|
||||
* nnnn kB". Ignore any failures, falling back to the preset default.
|
||||
*/
|
||||
#ifdef __linux__
|
||||
|
||||
{
|
||||
FILE *fp = AllocateFile("/proc/meminfo", "r");
|
||||
char buf[128];
|
||||
@ -505,7 +495,7 @@ GetHugePageSize(Size *hugepagesize, int *mmap_flags)
|
||||
{
|
||||
if (ch == 'k')
|
||||
{
|
||||
*hugepagesize = sz * (Size) 1024;
|
||||
default_hugepagesize = sz * (Size) 1024;
|
||||
break;
|
||||
}
|
||||
/* We could accept other units besides kB, if needed */
|
||||
@ -515,6 +505,44 @@ GetHugePageSize(Size *hugepagesize, int *mmap_flags)
|
||||
}
|
||||
}
|
||||
#endif /* __linux__ */
|
||||
|
||||
if (huge_page_size != 0)
|
||||
{
|
||||
/* If huge page size is requested explicitly, use that. */
|
||||
*hugepagesize = (Size) huge_page_size * 1024;
|
||||
}
|
||||
else if (default_hugepagesize != 0)
|
||||
{
|
||||
/* Otherwise use the system default, if we have it. */
|
||||
*hugepagesize = default_hugepagesize;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* If we fail to find out the system's default huge page size, or no
|
||||
* huge page size is requested explicitly, assume it is 2MB. This will
|
||||
* work fine when the actual size is less. If it's more, we might get
|
||||
* mmap() or munmap() failures due to unaligned requests; but at this
|
||||
* writing, there are no reports of any non-Linux systems being picky
|
||||
* about that.
|
||||
*/
|
||||
*hugepagesize = 2 * 1024 * 1024;
|
||||
}
|
||||
|
||||
*mmap_flags = MAP_HUGETLB;
|
||||
|
||||
/*
|
||||
* On recent enough Linux, also include the explicit page size, if
|
||||
* necessary.
|
||||
*/
|
||||
#if defined(MAP_HUGE_MASK) && defined(MAP_HUGE_SHIFT)
|
||||
if (*hugepagesize != default_hugepagesize)
|
||||
{
|
||||
int shift = pg_ceil_log2_64(*hugepagesize);
|
||||
|
||||
*mmap_flags |= (shift & MAP_HUGE_MASK) << MAP_HUGE_SHIFT;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* MAP_HUGETLB */
|
||||
@ -583,7 +611,7 @@ CreateAnonymousSegment(Size *size)
|
||||
"(currently %zu bytes), reduce PostgreSQL's shared "
|
||||
"memory usage, perhaps by reducing shared_buffers or "
|
||||
"max_connections.",
|
||||
*size) : 0));
|
||||
allocsize) : 0));
|
||||
}
|
||||
|
||||
*size = allocsize;
|
||||
|
@ -20,11 +20,14 @@
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
#include <limits.h>
|
||||
#include <unistd.h>
|
||||
#ifndef WIN32
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
#include <sys/stat.h>
|
||||
#ifdef HAVE_SYSLOG
|
||||
#include <syslog.h>
|
||||
#endif
|
||||
#include <unistd.h>
|
||||
|
||||
#include "access/commit_ts.h"
|
||||
#include "access/gin.h"
|
||||
@ -198,6 +201,7 @@ static bool check_max_wal_senders(int *newval, void **extra, GucSource source);
|
||||
static bool check_autovacuum_work_mem(int *newval, void **extra, GucSource source);
|
||||
static bool check_effective_io_concurrency(int *newval, void **extra, GucSource source);
|
||||
static bool check_maintenance_io_concurrency(int *newval, void **extra, GucSource source);
|
||||
static bool check_huge_page_size(int *newval, void **extra, GucSource source);
|
||||
static void assign_pgstat_temp_directory(const char *newval, void *extra);
|
||||
static bool check_application_name(char **newval, void **extra, GucSource source);
|
||||
static void assign_application_name(const char *newval, void *extra);
|
||||
@ -576,6 +580,7 @@ int ssl_renegotiation_limit;
|
||||
* need to be duplicated in all the different implementations of pg_shmem.c.
|
||||
*/
|
||||
int huge_pages;
|
||||
int huge_page_size;
|
||||
|
||||
/*
|
||||
* These variables are all dummies that don't do anything, except in some
|
||||
@ -3381,6 +3386,17 @@ static struct config_int ConfigureNamesInt[] =
|
||||
NULL, assign_tcp_user_timeout, show_tcp_user_timeout
|
||||
},
|
||||
|
||||
{
|
||||
{"huge_page_size", PGC_POSTMASTER, RESOURCES_MEM,
|
||||
gettext_noop("The size of huge page that should be requested."),
|
||||
NULL,
|
||||
GUC_UNIT_KB
|
||||
},
|
||||
&huge_page_size,
|
||||
0, 0, INT_MAX,
|
||||
check_huge_page_size, NULL, NULL
|
||||
},
|
||||
|
||||
/* End-of-list marker */
|
||||
{
|
||||
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
|
||||
@ -11565,6 +11581,20 @@ check_maintenance_io_concurrency(int *newval, void **extra, GucSource source)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
check_huge_page_size(int *newval, void **extra, GucSource source)
|
||||
{
|
||||
#if !(defined(MAP_HUGE_MASK) && defined(MAP_HUGE_SHIFT))
|
||||
/* Recent enough Linux only, for now. See GetHugePageSize(). */
|
||||
if (*newval != 0)
|
||||
{
|
||||
GUC_check_errdetail("huge_page_size must be 0 on this platform.");
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
assign_pgstat_temp_directory(const char *newval, void *extra)
|
||||
{
|
||||
|
@ -122,6 +122,8 @@
|
||||
# (change requires restart)
|
||||
#huge_pages = try # on, off, or try
|
||||
# (change requires restart)
|
||||
#huge_page_size = 0 # zero for system default
|
||||
# (change requires restart)
|
||||
#temp_buffers = 8MB # min 800kB
|
||||
#max_prepared_transactions = 0 # zero disables the feature
|
||||
# (change requires restart)
|
||||
|
@ -44,6 +44,7 @@ typedef struct PGShmemHeader /* standard header for all Postgres shmem */
|
||||
/* GUC variables */
|
||||
extern int shared_memory_type;
|
||||
extern int huge_pages;
|
||||
extern int huge_page_size;
|
||||
|
||||
/* Possible values for huge_pages */
|
||||
typedef enum
|
||||
|
Loading…
x
Reference in New Issue
Block a user