2000-06-11 15:40:09 +04:00
|
|
|
# Macros to detect C compiler features
|
2010-09-21 00:08:53 +04:00
|
|
|
# config/c-compiler.m4
|
2000-08-29 13:36:51 +04:00
|
|
|
|
|
|
|
|
|
|
|
# PGAC_C_SIGNED
|
|
|
|
# -------------
|
|
|
|
# Check if the C compiler understands signed types.
|
|
|
|
AC_DEFUN([PGAC_C_SIGNED],
|
|
|
|
[AC_CACHE_CHECK(for signed types, pgac_cv_c_signed,
|
2015-07-02 19:21:23 +03:00
|
|
|
[AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],
|
|
|
|
[signed char c; signed short s; signed int i;])],
|
2000-08-29 13:36:51 +04:00
|
|
|
[pgac_cv_c_signed=yes],
|
|
|
|
[pgac_cv_c_signed=no])])
|
|
|
|
if test x"$pgac_cv_c_signed" = xno ; then
|
2003-04-07 02:45:23 +04:00
|
|
|
AC_DEFINE(signed,, [Define to empty if the C compiler does not understand signed types.])
|
2000-08-29 13:36:51 +04:00
|
|
|
fi])# PGAC_C_SIGNED
|
|
|
|
|
|
|
|
|
|
|
|
|
2014-11-23 17:34:03 +03:00
|
|
|
# PGAC_C_PRINTF_ARCHETYPE
|
|
|
|
# -----------------------
|
|
|
|
# Set the format archetype used by gcc to check printf type functions. We
|
|
|
|
# prefer "gnu_printf", which includes what glibc uses, such as %m for error
|
|
|
|
# strings and %lld for 64 bit long longs. GCC 4.4 introduced it. It makes a
|
|
|
|
# dramatic difference on Windows.
|
|
|
|
AC_DEFUN([PGAC_PRINTF_ARCHETYPE],
|
|
|
|
[AC_CACHE_CHECK([for printf format archetype], pgac_cv_printf_archetype,
|
|
|
|
[ac_save_c_werror_flag=$ac_c_werror_flag
|
|
|
|
ac_c_werror_flag=yes
|
|
|
|
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
|
|
|
|
[extern int
|
|
|
|
pgac_write(int ignore, const char *fmt,...)
|
|
|
|
__attribute__((format(gnu_printf, 2, 3)));], [])],
|
|
|
|
[pgac_cv_printf_archetype=gnu_printf],
|
|
|
|
[pgac_cv_printf_archetype=printf])
|
|
|
|
ac_c_werror_flag=$ac_save_c_werror_flag])
|
|
|
|
AC_DEFINE_UNQUOTED([PG_PRINTF_ATTRIBUTE], [$pgac_cv_printf_archetype],
|
|
|
|
[Define to gnu_printf if compiler supports it, else printf.])
|
|
|
|
])# PGAC_PRINTF_ARCHETYPE
|
|
|
|
|
2010-02-13 05:34:16 +03:00
|
|
|
|
2000-06-11 15:40:09 +04:00
|
|
|
# PGAC_TYPE_64BIT_INT(TYPE)
|
|
|
|
# -------------------------
|
|
|
|
# Check if TYPE is a working 64 bit integer type. Set HAVE_TYPE_64 to
|
|
|
|
# yes or no respectively, and define HAVE_TYPE_64 if yes.
|
|
|
|
AC_DEFUN([PGAC_TYPE_64BIT_INT],
|
|
|
|
[define([Ac_define], [translit([have_$1_64], [a-z *], [A-Z_P])])dnl
|
|
|
|
define([Ac_cachevar], [translit([pgac_cv_type_$1_64], [ *], [_p])])dnl
|
|
|
|
AC_CACHE_CHECK([whether $1 is 64 bits], [Ac_cachevar],
|
2015-07-02 19:21:23 +03:00
|
|
|
[AC_RUN_IFELSE([AC_LANG_SOURCE(
|
2004-12-16 20:48:29 +03:00
|
|
|
[typedef $1 ac_int64;
|
2000-06-11 15:40:09 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* These are globals to discourage the compiler from folding all the
|
|
|
|
* arithmetic tests down to compile-time constants.
|
|
|
|
*/
|
2004-12-16 20:48:29 +03:00
|
|
|
ac_int64 a = 20000001;
|
|
|
|
ac_int64 b = 40000005;
|
2000-06-11 15:40:09 +04:00
|
|
|
|
|
|
|
int does_int64_work()
|
|
|
|
{
|
2004-12-16 20:48:29 +03:00
|
|
|
ac_int64 c,d;
|
2000-06-11 15:40:09 +04:00
|
|
|
|
2004-12-16 20:48:29 +03:00
|
|
|
if (sizeof(ac_int64) != 8)
|
2000-06-11 15:40:09 +04:00
|
|
|
return 0; /* definitely not the right size */
|
|
|
|
|
|
|
|
/* Do perfunctory checks to see if 64-bit arithmetic seems to work */
|
|
|
|
c = a * b;
|
|
|
|
d = (c + b) / b;
|
|
|
|
if (d != a+1)
|
|
|
|
return 0;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
main() {
|
|
|
|
exit(! does_int64_work());
|
2015-07-02 19:21:23 +03:00
|
|
|
}])],
|
2000-06-11 15:40:09 +04:00
|
|
|
[Ac_cachevar=yes],
|
|
|
|
[Ac_cachevar=no],
|
2003-01-29 00:57:12 +03:00
|
|
|
[# If cross-compiling, check the size reported by the compiler and
|
|
|
|
# trust that the arithmetic works.
|
|
|
|
AC_COMPILE_IFELSE([AC_LANG_BOOL_COMPILE_TRY([], [sizeof($1) == 8])],
|
|
|
|
Ac_cachevar=yes,
|
|
|
|
Ac_cachevar=no)])])
|
2000-06-11 15:40:09 +04:00
|
|
|
|
|
|
|
Ac_define=$Ac_cachevar
|
|
|
|
if test x"$Ac_cachevar" = xyes ; then
|
2008-04-18 22:43:09 +04:00
|
|
|
AC_DEFINE(Ac_define, 1, [Define to 1 if `]$1[' works and is 64 bits.])
|
2000-06-11 15:40:09 +04:00
|
|
|
fi
|
|
|
|
undefine([Ac_define])dnl
|
|
|
|
undefine([Ac_cachevar])dnl
|
|
|
|
])# PGAC_TYPE_64BIT_INT
|
|
|
|
|
|
|
|
|
Add, optional, support for 128bit integers.
We will, for the foreseeable future, not expose 128 bit datatypes to
SQL. But being able to use 128bit math will allow us, in a later patch,
to use 128bit accumulators for some aggregates; leading to noticeable
speedups over using numeric.
So far we only detect a gcc/clang extension that supports 128bit math,
but no 128bit literals, and no *printf support. We might want to expand
this in the future to further compilers; if there are any that that
provide similar support.
Discussion: 544BB5F1.50709@proxel.se
Author: Andreas Karlsson, with significant editorializing by me
Reviewed-By: Peter Geoghegan, Oskari Saarenmaa
2015-03-20 12:26:17 +03:00
|
|
|
# PGAC_TYPE_128BIT_INT
|
|
|
|
# ---------------------
|
|
|
|
# Check if __int128 is a working 128 bit integer type, and if so
|
|
|
|
# define PG_INT128_TYPE to that typename. This currently only detects
|
|
|
|
# a GCC/clang extension, but support for different environments may be
|
|
|
|
# added in the future.
|
|
|
|
#
|
|
|
|
# For the moment we only test for support for 128bit math; support for
|
|
|
|
# 128bit literals and snprintf is not required.
|
|
|
|
AC_DEFUN([PGAC_TYPE_128BIT_INT],
|
|
|
|
[AC_CACHE_CHECK([for __int128], [pgac_cv__128bit_int],
|
|
|
|
[AC_LINK_IFELSE([AC_LANG_PROGRAM([
|
|
|
|
/*
|
|
|
|
* These are globals to discourage the compiler from folding all the
|
|
|
|
* arithmetic tests down to compile-time constants. We do not have
|
|
|
|
* convenient support for 64bit literals at this point...
|
|
|
|
*/
|
|
|
|
__int128 a = 48828125;
|
|
|
|
__int128 b = 97656255;
|
|
|
|
],[
|
|
|
|
__int128 c,d;
|
|
|
|
a = (a << 12) + 1; /* 200000000001 */
|
|
|
|
b = (b << 12) + 5; /* 400000000005 */
|
|
|
|
/* use the most relevant arithmetic ops */
|
|
|
|
c = a * b;
|
|
|
|
d = (c + b) / b;
|
|
|
|
/* return different values, to prevent optimizations */
|
|
|
|
if (d != a+1)
|
|
|
|
return 0;
|
|
|
|
return 1;
|
|
|
|
])],
|
|
|
|
[pgac_cv__128bit_int=yes],
|
|
|
|
[pgac_cv__128bit_int=no])])
|
|
|
|
if test x"$pgac_cv__128bit_int" = xyes ; then
|
|
|
|
AC_DEFINE(PG_INT128_TYPE, __int128, [Define to the name of a signed 128-bit integer type.])
|
|
|
|
fi])# PGAC_TYPE_128BIT_INT
|
|
|
|
|
2000-06-11 15:40:09 +04:00
|
|
|
|
2003-04-25 01:16:45 +04:00
|
|
|
# PGAC_C_FUNCNAME_SUPPORT
|
2003-10-25 19:32:11 +04:00
|
|
|
# -----------------------
|
2003-04-25 01:16:45 +04:00
|
|
|
# Check if the C compiler understands __func__ (C99) or __FUNCTION__ (gcc).
|
|
|
|
# Define HAVE_FUNCNAME__FUNC or HAVE_FUNCNAME__FUNCTION accordingly.
|
|
|
|
AC_DEFUN([PGAC_C_FUNCNAME_SUPPORT],
|
|
|
|
[AC_CACHE_CHECK(for __func__, pgac_cv_funcname_func_support,
|
2015-07-02 19:21:23 +03:00
|
|
|
[AC_COMPILE_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>],
|
|
|
|
[printf("%s\n", __func__);])],
|
2003-04-25 01:16:45 +04:00
|
|
|
[pgac_cv_funcname_func_support=yes],
|
|
|
|
[pgac_cv_funcname_func_support=no])])
|
|
|
|
if test x"$pgac_cv_funcname_func_support" = xyes ; then
|
|
|
|
AC_DEFINE(HAVE_FUNCNAME__FUNC, 1,
|
|
|
|
[Define to 1 if your compiler understands __func__.])
|
|
|
|
else
|
|
|
|
AC_CACHE_CHECK(for __FUNCTION__, pgac_cv_funcname_function_support,
|
2015-07-02 19:21:23 +03:00
|
|
|
[AC_COMPILE_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>],
|
|
|
|
[printf("%s\n", __FUNCTION__);])],
|
2003-04-25 01:16:45 +04:00
|
|
|
[pgac_cv_funcname_function_support=yes],
|
|
|
|
[pgac_cv_funcname_function_support=no])])
|
|
|
|
if test x"$pgac_cv_funcname_function_support" = xyes ; then
|
|
|
|
AC_DEFINE(HAVE_FUNCNAME__FUNCTION, 1,
|
|
|
|
[Define to 1 if your compiler understands __FUNCTION__.])
|
|
|
|
fi
|
|
|
|
fi])# PGAC_C_FUNCNAME_SUPPORT
|
2003-10-25 19:32:11 +04:00
|
|
|
|
2008-02-17 19:36:43 +03:00
|
|
|
|
|
|
|
|
2012-09-30 22:38:31 +04:00
|
|
|
# PGAC_C_STATIC_ASSERT
|
Improve handling of ereport(ERROR) and elog(ERROR).
In commit 71450d7fd6c7cf7b3e38ac56e363bff6a681973c, we added code to inform
suitably-intelligent compilers that ereport() doesn't return if the elevel
is ERROR or higher. This patch extends that to elog(), and also fixes a
double-evaluation hazard that the previous commit created in ereport(),
as well as reducing the emitted code size.
The elog() improvement requires the compiler to support __VA_ARGS__, which
should be available in just about anything nowadays since it's required by
C99. But our minimum language baseline is still C89, so add a configure
test for that.
The previous commit assumed that ereport's elevel could be evaluated twice,
which isn't terribly safe --- there are already counterexamples in xlog.c.
On compilers that have __builtin_constant_p, we can use that to protect the
second test, since there's no possible optimization gain if the compiler
doesn't know the value of elevel. Otherwise, use a local variable inside
the macros to prevent double evaluation. The local-variable solution is
inferior because (a) it leads to useless code being emitted when elevel
isn't constant, and (b) it increases the optimization level needed for the
compiler to recognize that subsequent code is unreachable. But it seems
better than not teaching non-gcc compilers about unreachability at all.
Lastly, if the compiler has __builtin_unreachable(), we can use that
instead of abort(), resulting in a noticeable code savings since no
function call is actually emitted. However, it seems wise to do this only
in non-assert builds. In an assert build, continue to use abort(), so that
the behavior will be predictable and debuggable if the "impossible"
happens.
These changes involve making the ereport and elog macros emit do-while
statement blocks not just expressions, which forces small changes in
a few call sites.
Andres Freund, Tom Lane, Heikki Linnakangas
2013-01-14 03:39:20 +04:00
|
|
|
# --------------------
|
2012-09-30 22:38:31 +04:00
|
|
|
# Check if the C compiler understands _Static_assert(),
|
|
|
|
# and define HAVE__STATIC_ASSERT if so.
|
|
|
|
#
|
|
|
|
# We actually check the syntax ({ _Static_assert(...) }), because we need
|
|
|
|
# gcc-style compound expressions to be able to wrap the thing into macros.
|
|
|
|
AC_DEFUN([PGAC_C_STATIC_ASSERT],
|
|
|
|
[AC_CACHE_CHECK(for _Static_assert, pgac_cv__static_assert,
|
2015-07-02 19:21:23 +03:00
|
|
|
[AC_LINK_IFELSE([AC_LANG_PROGRAM([],
|
|
|
|
[({ _Static_assert(1, "foo"); })])],
|
2012-09-30 22:38:31 +04:00
|
|
|
[pgac_cv__static_assert=yes],
|
|
|
|
[pgac_cv__static_assert=no])])
|
|
|
|
if test x"$pgac_cv__static_assert" = xyes ; then
|
|
|
|
AC_DEFINE(HAVE__STATIC_ASSERT, 1,
|
|
|
|
[Define to 1 if your compiler understands _Static_assert.])
|
|
|
|
fi])# PGAC_C_STATIC_ASSERT
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# PGAC_C_TYPES_COMPATIBLE
|
|
|
|
# -----------------------
|
|
|
|
# Check if the C compiler understands __builtin_types_compatible_p,
|
|
|
|
# and define HAVE__BUILTIN_TYPES_COMPATIBLE_P if so.
|
|
|
|
#
|
|
|
|
# We check usage with __typeof__, though it's unlikely any compiler would
|
|
|
|
# have the former and not the latter.
|
|
|
|
AC_DEFUN([PGAC_C_TYPES_COMPATIBLE],
|
|
|
|
[AC_CACHE_CHECK(for __builtin_types_compatible_p, pgac_cv__types_compatible,
|
2015-07-02 19:21:23 +03:00
|
|
|
[AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],
|
|
|
|
[[ int x; static int y[__builtin_types_compatible_p(__typeof__(x), int)]; ]])],
|
2012-09-30 22:38:31 +04:00
|
|
|
[pgac_cv__types_compatible=yes],
|
|
|
|
[pgac_cv__types_compatible=no])])
|
|
|
|
if test x"$pgac_cv__types_compatible" = xyes ; then
|
|
|
|
AC_DEFINE(HAVE__BUILTIN_TYPES_COMPATIBLE_P, 1,
|
|
|
|
[Define to 1 if your compiler understands __builtin_types_compatible_p.])
|
|
|
|
fi])# PGAC_C_TYPES_COMPATIBLE
|
|
|
|
|
|
|
|
|
|
|
|
|
2015-02-10 11:54:40 +03:00
|
|
|
# PGAC_C_BUILTIN_BSWAP32
|
|
|
|
# -------------------------
|
|
|
|
# Check if the C compiler understands __builtin_bswap32(),
|
|
|
|
# and define HAVE__BUILTIN_BSWAP32 if so.
|
|
|
|
AC_DEFUN([PGAC_C_BUILTIN_BSWAP32],
|
|
|
|
[AC_CACHE_CHECK(for __builtin_bswap32, pgac_cv__builtin_bswap32,
|
2015-07-02 19:21:23 +03:00
|
|
|
[AC_COMPILE_IFELSE([AC_LANG_SOURCE(
|
|
|
|
[static unsigned long int x = __builtin_bswap32(0xaabbccdd);]
|
|
|
|
)],
|
2015-02-10 11:54:40 +03:00
|
|
|
[pgac_cv__builtin_bswap32=yes],
|
|
|
|
[pgac_cv__builtin_bswap32=no])])
|
|
|
|
if test x"$pgac_cv__builtin_bswap32" = xyes ; then
|
|
|
|
AC_DEFINE(HAVE__BUILTIN_BSWAP32, 1,
|
|
|
|
[Define to 1 if your compiler understands __builtin_bswap32.])
|
|
|
|
fi])# PGAC_C_BUILTIN_BSWAP32
|
|
|
|
|
|
|
|
|
|
|
|
|
2015-10-08 20:01:36 +03:00
|
|
|
# PGAC_C_BUILTIN_BSWAP64
|
|
|
|
# -------------------------
|
|
|
|
# Check if the C compiler understands __builtin_bswap64(),
|
|
|
|
# and define HAVE__BUILTIN_BSWAP64 if so.
|
|
|
|
AC_DEFUN([PGAC_C_BUILTIN_BSWAP64],
|
|
|
|
[AC_CACHE_CHECK(for __builtin_bswap64, pgac_cv__builtin_bswap64,
|
|
|
|
[AC_COMPILE_IFELSE([AC_LANG_SOURCE(
|
|
|
|
[static unsigned long int x = __builtin_bswap64(0xaabbccddeeff0011);]
|
|
|
|
)],
|
|
|
|
[pgac_cv__builtin_bswap64=yes],
|
|
|
|
[pgac_cv__builtin_bswap64=no])])
|
|
|
|
if test x"$pgac_cv__builtin_bswap64" = xyes ; then
|
|
|
|
AC_DEFINE(HAVE__BUILTIN_BSWAP64, 1,
|
|
|
|
[Define to 1 if your compiler understands __builtin_bswap64.])
|
|
|
|
fi])# PGAC_C_BUILTIN_BSWAP64
|
|
|
|
|
|
|
|
|
|
|
|
|
Improve handling of ereport(ERROR) and elog(ERROR).
In commit 71450d7fd6c7cf7b3e38ac56e363bff6a681973c, we added code to inform
suitably-intelligent compilers that ereport() doesn't return if the elevel
is ERROR or higher. This patch extends that to elog(), and also fixes a
double-evaluation hazard that the previous commit created in ereport(),
as well as reducing the emitted code size.
The elog() improvement requires the compiler to support __VA_ARGS__, which
should be available in just about anything nowadays since it's required by
C99. But our minimum language baseline is still C89, so add a configure
test for that.
The previous commit assumed that ereport's elevel could be evaluated twice,
which isn't terribly safe --- there are already counterexamples in xlog.c.
On compilers that have __builtin_constant_p, we can use that to protect the
second test, since there's no possible optimization gain if the compiler
doesn't know the value of elevel. Otherwise, use a local variable inside
the macros to prevent double evaluation. The local-variable solution is
inferior because (a) it leads to useless code being emitted when elevel
isn't constant, and (b) it increases the optimization level needed for the
compiler to recognize that subsequent code is unreachable. But it seems
better than not teaching non-gcc compilers about unreachability at all.
Lastly, if the compiler has __builtin_unreachable(), we can use that
instead of abort(), resulting in a noticeable code savings since no
function call is actually emitted. However, it seems wise to do this only
in non-assert builds. In an assert build, continue to use abort(), so that
the behavior will be predictable and debuggable if the "impossible"
happens.
These changes involve making the ereport and elog macros emit do-while
statement blocks not just expressions, which forces small changes in
a few call sites.
Andres Freund, Tom Lane, Heikki Linnakangas
2013-01-14 03:39:20 +04:00
|
|
|
# PGAC_C_BUILTIN_CONSTANT_P
|
|
|
|
# -------------------------
|
|
|
|
# Check if the C compiler understands __builtin_constant_p(),
|
|
|
|
# and define HAVE__BUILTIN_CONSTANT_P if so.
|
|
|
|
AC_DEFUN([PGAC_C_BUILTIN_CONSTANT_P],
|
|
|
|
[AC_CACHE_CHECK(for __builtin_constant_p, pgac_cv__builtin_constant_p,
|
2015-07-02 19:21:23 +03:00
|
|
|
[AC_COMPILE_IFELSE([AC_LANG_SOURCE(
|
|
|
|
[[static int x; static int y[__builtin_constant_p(x) ? x : 1];]]
|
|
|
|
)],
|
Improve handling of ereport(ERROR) and elog(ERROR).
In commit 71450d7fd6c7cf7b3e38ac56e363bff6a681973c, we added code to inform
suitably-intelligent compilers that ereport() doesn't return if the elevel
is ERROR or higher. This patch extends that to elog(), and also fixes a
double-evaluation hazard that the previous commit created in ereport(),
as well as reducing the emitted code size.
The elog() improvement requires the compiler to support __VA_ARGS__, which
should be available in just about anything nowadays since it's required by
C99. But our minimum language baseline is still C89, so add a configure
test for that.
The previous commit assumed that ereport's elevel could be evaluated twice,
which isn't terribly safe --- there are already counterexamples in xlog.c.
On compilers that have __builtin_constant_p, we can use that to protect the
second test, since there's no possible optimization gain if the compiler
doesn't know the value of elevel. Otherwise, use a local variable inside
the macros to prevent double evaluation. The local-variable solution is
inferior because (a) it leads to useless code being emitted when elevel
isn't constant, and (b) it increases the optimization level needed for the
compiler to recognize that subsequent code is unreachable. But it seems
better than not teaching non-gcc compilers about unreachability at all.
Lastly, if the compiler has __builtin_unreachable(), we can use that
instead of abort(), resulting in a noticeable code savings since no
function call is actually emitted. However, it seems wise to do this only
in non-assert builds. In an assert build, continue to use abort(), so that
the behavior will be predictable and debuggable if the "impossible"
happens.
These changes involve making the ereport and elog macros emit do-while
statement blocks not just expressions, which forces small changes in
a few call sites.
Andres Freund, Tom Lane, Heikki Linnakangas
2013-01-14 03:39:20 +04:00
|
|
|
[pgac_cv__builtin_constant_p=yes],
|
|
|
|
[pgac_cv__builtin_constant_p=no])])
|
|
|
|
if test x"$pgac_cv__builtin_constant_p" = xyes ; then
|
|
|
|
AC_DEFINE(HAVE__BUILTIN_CONSTANT_P, 1,
|
|
|
|
[Define to 1 if your compiler understands __builtin_constant_p.])
|
|
|
|
fi])# PGAC_C_BUILTIN_CONSTANT_P
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# PGAC_C_BUILTIN_UNREACHABLE
|
|
|
|
# --------------------------
|
|
|
|
# Check if the C compiler understands __builtin_unreachable(),
|
|
|
|
# and define HAVE__BUILTIN_UNREACHABLE if so.
|
|
|
|
#
|
|
|
|
# NB: Don't get the idea of putting a for(;;); or such before the
|
|
|
|
# __builtin_unreachable() call. Some compilers would remove it before linking
|
|
|
|
# and only a warning instead of an error would be produced.
|
|
|
|
AC_DEFUN([PGAC_C_BUILTIN_UNREACHABLE],
|
|
|
|
[AC_CACHE_CHECK(for __builtin_unreachable, pgac_cv__builtin_unreachable,
|
2015-07-02 19:21:23 +03:00
|
|
|
[AC_LINK_IFELSE([AC_LANG_PROGRAM([],
|
|
|
|
[__builtin_unreachable();])],
|
Improve handling of ereport(ERROR) and elog(ERROR).
In commit 71450d7fd6c7cf7b3e38ac56e363bff6a681973c, we added code to inform
suitably-intelligent compilers that ereport() doesn't return if the elevel
is ERROR or higher. This patch extends that to elog(), and also fixes a
double-evaluation hazard that the previous commit created in ereport(),
as well as reducing the emitted code size.
The elog() improvement requires the compiler to support __VA_ARGS__, which
should be available in just about anything nowadays since it's required by
C99. But our minimum language baseline is still C89, so add a configure
test for that.
The previous commit assumed that ereport's elevel could be evaluated twice,
which isn't terribly safe --- there are already counterexamples in xlog.c.
On compilers that have __builtin_constant_p, we can use that to protect the
second test, since there's no possible optimization gain if the compiler
doesn't know the value of elevel. Otherwise, use a local variable inside
the macros to prevent double evaluation. The local-variable solution is
inferior because (a) it leads to useless code being emitted when elevel
isn't constant, and (b) it increases the optimization level needed for the
compiler to recognize that subsequent code is unreachable. But it seems
better than not teaching non-gcc compilers about unreachability at all.
Lastly, if the compiler has __builtin_unreachable(), we can use that
instead of abort(), resulting in a noticeable code savings since no
function call is actually emitted. However, it seems wise to do this only
in non-assert builds. In an assert build, continue to use abort(), so that
the behavior will be predictable and debuggable if the "impossible"
happens.
These changes involve making the ereport and elog macros emit do-while
statement blocks not just expressions, which forces small changes in
a few call sites.
Andres Freund, Tom Lane, Heikki Linnakangas
2013-01-14 03:39:20 +04:00
|
|
|
[pgac_cv__builtin_unreachable=yes],
|
|
|
|
[pgac_cv__builtin_unreachable=no])])
|
|
|
|
if test x"$pgac_cv__builtin_unreachable" = xyes ; then
|
|
|
|
AC_DEFINE(HAVE__BUILTIN_UNREACHABLE, 1,
|
|
|
|
[Define to 1 if your compiler understands __builtin_unreachable.])
|
|
|
|
fi])# PGAC_C_BUILTIN_UNREACHABLE
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# PGAC_C_VA_ARGS
|
|
|
|
# --------------
|
|
|
|
# Check if the C compiler understands C99-style variadic macros,
|
|
|
|
# and define HAVE__VA_ARGS if so.
|
|
|
|
AC_DEFUN([PGAC_C_VA_ARGS],
|
|
|
|
[AC_CACHE_CHECK(for __VA_ARGS__, pgac_cv__va_args,
|
2015-07-02 19:21:23 +03:00
|
|
|
[AC_COMPILE_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>],
|
Improve handling of ereport(ERROR) and elog(ERROR).
In commit 71450d7fd6c7cf7b3e38ac56e363bff6a681973c, we added code to inform
suitably-intelligent compilers that ereport() doesn't return if the elevel
is ERROR or higher. This patch extends that to elog(), and also fixes a
double-evaluation hazard that the previous commit created in ereport(),
as well as reducing the emitted code size.
The elog() improvement requires the compiler to support __VA_ARGS__, which
should be available in just about anything nowadays since it's required by
C99. But our minimum language baseline is still C89, so add a configure
test for that.
The previous commit assumed that ereport's elevel could be evaluated twice,
which isn't terribly safe --- there are already counterexamples in xlog.c.
On compilers that have __builtin_constant_p, we can use that to protect the
second test, since there's no possible optimization gain if the compiler
doesn't know the value of elevel. Otherwise, use a local variable inside
the macros to prevent double evaluation. The local-variable solution is
inferior because (a) it leads to useless code being emitted when elevel
isn't constant, and (b) it increases the optimization level needed for the
compiler to recognize that subsequent code is unreachable. But it seems
better than not teaching non-gcc compilers about unreachability at all.
Lastly, if the compiler has __builtin_unreachable(), we can use that
instead of abort(), resulting in a noticeable code savings since no
function call is actually emitted. However, it seems wise to do this only
in non-assert builds. In an assert build, continue to use abort(), so that
the behavior will be predictable and debuggable if the "impossible"
happens.
These changes involve making the ereport and elog macros emit do-while
statement blocks not just expressions, which forces small changes in
a few call sites.
Andres Freund, Tom Lane, Heikki Linnakangas
2013-01-14 03:39:20 +04:00
|
|
|
[#define debug(...) fprintf(stderr, __VA_ARGS__)
|
|
|
|
debug("%s", "blarg");
|
2015-07-02 19:21:23 +03:00
|
|
|
])],
|
Improve handling of ereport(ERROR) and elog(ERROR).
In commit 71450d7fd6c7cf7b3e38ac56e363bff6a681973c, we added code to inform
suitably-intelligent compilers that ereport() doesn't return if the elevel
is ERROR or higher. This patch extends that to elog(), and also fixes a
double-evaluation hazard that the previous commit created in ereport(),
as well as reducing the emitted code size.
The elog() improvement requires the compiler to support __VA_ARGS__, which
should be available in just about anything nowadays since it's required by
C99. But our minimum language baseline is still C89, so add a configure
test for that.
The previous commit assumed that ereport's elevel could be evaluated twice,
which isn't terribly safe --- there are already counterexamples in xlog.c.
On compilers that have __builtin_constant_p, we can use that to protect the
second test, since there's no possible optimization gain if the compiler
doesn't know the value of elevel. Otherwise, use a local variable inside
the macros to prevent double evaluation. The local-variable solution is
inferior because (a) it leads to useless code being emitted when elevel
isn't constant, and (b) it increases the optimization level needed for the
compiler to recognize that subsequent code is unreachable. But it seems
better than not teaching non-gcc compilers about unreachability at all.
Lastly, if the compiler has __builtin_unreachable(), we can use that
instead of abort(), resulting in a noticeable code savings since no
function call is actually emitted. However, it seems wise to do this only
in non-assert builds. In an assert build, continue to use abort(), so that
the behavior will be predictable and debuggable if the "impossible"
happens.
These changes involve making the ereport and elog macros emit do-while
statement blocks not just expressions, which forces small changes in
a few call sites.
Andres Freund, Tom Lane, Heikki Linnakangas
2013-01-14 03:39:20 +04:00
|
|
|
[pgac_cv__va_args=yes],
|
|
|
|
[pgac_cv__va_args=no])])
|
|
|
|
if test x"$pgac_cv__va_args" = xyes ; then
|
|
|
|
AC_DEFINE(HAVE__VA_ARGS, 1,
|
|
|
|
[Define to 1 if your compiler understands __VA_ARGS__ in macros.])
|
|
|
|
fi])# PGAC_C_VA_ARGS
|
|
|
|
|
|
|
|
|
|
|
|
|
2004-10-20 06:12:07 +04:00
|
|
|
# PGAC_PROG_CC_CFLAGS_OPT
|
|
|
|
# -----------------------
|
|
|
|
# Given a string, check if the compiler supports the string as a
|
|
|
|
# command-line option. If it does, add the string to CFLAGS.
|
|
|
|
AC_DEFUN([PGAC_PROG_CC_CFLAGS_OPT],
|
2010-09-29 23:38:04 +04:00
|
|
|
[define([Ac_cachevar], [AS_TR_SH([pgac_cv_prog_cc_cflags_$1])])dnl
|
|
|
|
AC_CACHE_CHECK([whether $CC supports $1], [Ac_cachevar],
|
|
|
|
[pgac_save_CFLAGS=$CFLAGS
|
2004-10-20 06:12:07 +04:00
|
|
|
CFLAGS="$pgac_save_CFLAGS $1"
|
2011-05-27 01:29:33 +04:00
|
|
|
ac_save_c_werror_flag=$ac_c_werror_flag
|
|
|
|
ac_c_werror_flag=yes
|
2004-10-20 06:12:07 +04:00
|
|
|
_AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],
|
2010-09-29 23:38:04 +04:00
|
|
|
[Ac_cachevar=yes],
|
|
|
|
[Ac_cachevar=no])
|
2011-05-27 01:29:33 +04:00
|
|
|
ac_c_werror_flag=$ac_save_c_werror_flag
|
2010-09-29 23:38:04 +04:00
|
|
|
CFLAGS="$pgac_save_CFLAGS"])
|
|
|
|
if test x"$Ac_cachevar" = x"yes"; then
|
|
|
|
CFLAGS="$CFLAGS $1"
|
|
|
|
fi
|
|
|
|
undefine([Ac_cachevar])dnl
|
2004-10-20 06:12:07 +04:00
|
|
|
])# PGAC_PROG_CC_CFLAGS_OPT
|
2008-05-19 00:13:12 +04:00
|
|
|
|
|
|
|
|
|
|
|
|
2013-04-30 09:59:26 +04:00
|
|
|
# PGAC_PROG_CC_VAR_OPT
|
|
|
|
# -----------------------
|
|
|
|
# Given a variable name and a string, check if the compiler supports
|
|
|
|
# the string as a command-line option. If it does, add the string to
|
|
|
|
# the given variable.
|
|
|
|
AC_DEFUN([PGAC_PROG_CC_VAR_OPT],
|
|
|
|
[define([Ac_cachevar], [AS_TR_SH([pgac_cv_prog_cc_cflags_$2])])dnl
|
|
|
|
AC_CACHE_CHECK([whether $CC supports $2], [Ac_cachevar],
|
|
|
|
[pgac_save_CFLAGS=$CFLAGS
|
|
|
|
CFLAGS="$pgac_save_CFLAGS $2"
|
|
|
|
ac_save_c_werror_flag=$ac_c_werror_flag
|
|
|
|
ac_c_werror_flag=yes
|
|
|
|
_AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],
|
|
|
|
[Ac_cachevar=yes],
|
|
|
|
[Ac_cachevar=no])
|
|
|
|
ac_c_werror_flag=$ac_save_c_werror_flag
|
|
|
|
CFLAGS="$pgac_save_CFLAGS"])
|
|
|
|
if test x"$Ac_cachevar" = x"yes"; then
|
|
|
|
$1="${$1} $2"
|
|
|
|
fi
|
|
|
|
undefine([Ac_cachevar])dnl
|
|
|
|
])# PGAC_PROG_CC_CFLAGS_OPT
|
|
|
|
|
|
|
|
|
|
|
|
|
2008-05-19 00:13:12 +04:00
|
|
|
# PGAC_PROG_CC_LDFLAGS_OPT
|
|
|
|
# ------------------------
|
|
|
|
# Given a string, check if the compiler supports the string as a
|
|
|
|
# command-line option. If it does, add the string to LDFLAGS.
|
2008-05-20 07:30:22 +04:00
|
|
|
# For reasons you'd really rather not know about, this checks whether
|
|
|
|
# you can link to a particular function, not just whether you can link.
|
2008-06-27 04:36:16 +04:00
|
|
|
# In fact, we must actually check that the resulting program runs :-(
|
2008-05-19 00:13:12 +04:00
|
|
|
AC_DEFUN([PGAC_PROG_CC_LDFLAGS_OPT],
|
2010-09-29 23:38:04 +04:00
|
|
|
[define([Ac_cachevar], [AS_TR_SH([pgac_cv_prog_cc_ldflags_$1])])dnl
|
|
|
|
AC_CACHE_CHECK([whether $CC supports $1], [Ac_cachevar],
|
|
|
|
[pgac_save_LDFLAGS=$LDFLAGS
|
2008-05-19 00:13:12 +04:00
|
|
|
LDFLAGS="$pgac_save_LDFLAGS $1"
|
2008-06-27 04:36:16 +04:00
|
|
|
AC_RUN_IFELSE([AC_LANG_PROGRAM([extern void $2 (); void (*fptr) () = $2;],[])],
|
2010-09-29 23:38:04 +04:00
|
|
|
[Ac_cachevar=yes],
|
|
|
|
[Ac_cachevar=no],
|
|
|
|
[Ac_cachevar="assuming no"])
|
|
|
|
LDFLAGS="$pgac_save_LDFLAGS"])
|
|
|
|
if test x"$Ac_cachevar" = x"yes"; then
|
|
|
|
LDFLAGS="$LDFLAGS $1"
|
|
|
|
fi
|
|
|
|
undefine([Ac_cachevar])dnl
|
2008-05-19 00:13:12 +04:00
|
|
|
])# PGAC_PROG_CC_LDFLAGS_OPT
|
Add a basic atomic ops API abstracting away platform/architecture details.
Several upcoming performance/scalability improvements require atomic
operations. This new API avoids the need to splatter compiler and
architecture dependent code over all the locations employing atomic
ops.
For several of the potential usages it'd be problematic to maintain
both, a atomics using implementation and one using spinlocks or
similar. In all likelihood one of the implementations would not get
tested regularly under concurrency. To avoid that scenario the new API
provides a automatic fallback of atomic operations to spinlocks. All
properties of atomic operations are maintained. This fallback -
obviously - isn't as fast as just using atomic ops, but it's not bad
either. For one of the future users the atomics ontop spinlocks
implementation was actually slightly faster than the old purely
spinlock using implementation. That's important because it reduces the
fear of regressing older platforms when improving the scalability for
new ones.
The API, loosely modeled after the C11 atomics support, currently
provides 'atomic flags' and 32 bit unsigned integers. If the platform
efficiently supports atomic 64 bit unsigned integers those are also
provided.
To implement atomics support for a platform/architecture/compiler for
a type of atomics 32bit compare and exchange needs to be
implemented. If available and more efficient native support for flags,
32 bit atomic addition, and corresponding 64 bit operations may also
be provided. Additional useful atomic operations are implemented
generically ontop of these.
The implementation for various versions of gcc, msvc and sun studio have
been tested. Additional existing stub implementations for
* Intel icc
* HUPX acc
* IBM xlc
are included but have never been tested. These will likely require
fixes based on buildfarm and user feedback.
As atomic operations also require barriers for some operations the
existing barrier support has been moved into the atomics code.
Author: Andres Freund with contributions from Oskari Saarenmaa
Reviewed-By: Amit Kapila, Robert Haas, Heikki Linnakangas and Álvaro Herrera
Discussion: CA+TgmoYBW+ux5-8Ja=Mcyuy8=VXAnVRHp3Kess6Pn3DMXAPAEA@mail.gmail.com,
20131015123303.GH5300@awork2.anarazel.de,
20131028205522.GI20248@awork2.anarazel.de
2014-09-26 01:49:05 +04:00
|
|
|
|
|
|
|
# PGAC_HAVE_GCC__SYNC_CHAR_TAS
|
|
|
|
# -------------------------
|
|
|
|
# Check if the C compiler understands __sync_lock_test_and_set(char),
|
|
|
|
# and define HAVE_GCC__SYNC_CHAR_TAS
|
|
|
|
#
|
|
|
|
# NB: There are platforms where test_and_set is available but compare_and_swap
|
|
|
|
# is not, so test this separately.
|
|
|
|
# NB: Some platforms only do 32bit tas, others only do 8bit tas. Test both.
|
|
|
|
AC_DEFUN([PGAC_HAVE_GCC__SYNC_CHAR_TAS],
|
|
|
|
[AC_CACHE_CHECK(for builtin __sync char locking functions, pgac_cv_gcc_sync_char_tas,
|
2015-07-02 19:21:23 +03:00
|
|
|
[AC_LINK_IFELSE([AC_LANG_PROGRAM([],
|
Add a basic atomic ops API abstracting away platform/architecture details.
Several upcoming performance/scalability improvements require atomic
operations. This new API avoids the need to splatter compiler and
architecture dependent code over all the locations employing atomic
ops.
For several of the potential usages it'd be problematic to maintain
both, a atomics using implementation and one using spinlocks or
similar. In all likelihood one of the implementations would not get
tested regularly under concurrency. To avoid that scenario the new API
provides a automatic fallback of atomic operations to spinlocks. All
properties of atomic operations are maintained. This fallback -
obviously - isn't as fast as just using atomic ops, but it's not bad
either. For one of the future users the atomics ontop spinlocks
implementation was actually slightly faster than the old purely
spinlock using implementation. That's important because it reduces the
fear of regressing older platforms when improving the scalability for
new ones.
The API, loosely modeled after the C11 atomics support, currently
provides 'atomic flags' and 32 bit unsigned integers. If the platform
efficiently supports atomic 64 bit unsigned integers those are also
provided.
To implement atomics support for a platform/architecture/compiler for
a type of atomics 32bit compare and exchange needs to be
implemented. If available and more efficient native support for flags,
32 bit atomic addition, and corresponding 64 bit operations may also
be provided. Additional useful atomic operations are implemented
generically ontop of these.
The implementation for various versions of gcc, msvc and sun studio have
been tested. Additional existing stub implementations for
* Intel icc
* HUPX acc
* IBM xlc
are included but have never been tested. These will likely require
fixes based on buildfarm and user feedback.
As atomic operations also require barriers for some operations the
existing barrier support has been moved into the atomics code.
Author: Andres Freund with contributions from Oskari Saarenmaa
Reviewed-By: Amit Kapila, Robert Haas, Heikki Linnakangas and Álvaro Herrera
Discussion: CA+TgmoYBW+ux5-8Ja=Mcyuy8=VXAnVRHp3Kess6Pn3DMXAPAEA@mail.gmail.com,
20131015123303.GH5300@awork2.anarazel.de,
20131028205522.GI20248@awork2.anarazel.de
2014-09-26 01:49:05 +04:00
|
|
|
[char lock = 0;
|
|
|
|
__sync_lock_test_and_set(&lock, 1);
|
2015-07-02 19:21:23 +03:00
|
|
|
__sync_lock_release(&lock);])],
|
Add a basic atomic ops API abstracting away platform/architecture details.
Several upcoming performance/scalability improvements require atomic
operations. This new API avoids the need to splatter compiler and
architecture dependent code over all the locations employing atomic
ops.
For several of the potential usages it'd be problematic to maintain
both, a atomics using implementation and one using spinlocks or
similar. In all likelihood one of the implementations would not get
tested regularly under concurrency. To avoid that scenario the new API
provides a automatic fallback of atomic operations to spinlocks. All
properties of atomic operations are maintained. This fallback -
obviously - isn't as fast as just using atomic ops, but it's not bad
either. For one of the future users the atomics ontop spinlocks
implementation was actually slightly faster than the old purely
spinlock using implementation. That's important because it reduces the
fear of regressing older platforms when improving the scalability for
new ones.
The API, loosely modeled after the C11 atomics support, currently
provides 'atomic flags' and 32 bit unsigned integers. If the platform
efficiently supports atomic 64 bit unsigned integers those are also
provided.
To implement atomics support for a platform/architecture/compiler for
a type of atomics 32bit compare and exchange needs to be
implemented. If available and more efficient native support for flags,
32 bit atomic addition, and corresponding 64 bit operations may also
be provided. Additional useful atomic operations are implemented
generically ontop of these.
The implementation for various versions of gcc, msvc and sun studio have
been tested. Additional existing stub implementations for
* Intel icc
* HUPX acc
* IBM xlc
are included but have never been tested. These will likely require
fixes based on buildfarm and user feedback.
As atomic operations also require barriers for some operations the
existing barrier support has been moved into the atomics code.
Author: Andres Freund with contributions from Oskari Saarenmaa
Reviewed-By: Amit Kapila, Robert Haas, Heikki Linnakangas and Álvaro Herrera
Discussion: CA+TgmoYBW+ux5-8Ja=Mcyuy8=VXAnVRHp3Kess6Pn3DMXAPAEA@mail.gmail.com,
20131015123303.GH5300@awork2.anarazel.de,
20131028205522.GI20248@awork2.anarazel.de
2014-09-26 01:49:05 +04:00
|
|
|
[pgac_cv_gcc_sync_char_tas="yes"],
|
|
|
|
[pgac_cv_gcc_sync_char_tas="no"])])
|
|
|
|
if test x"$pgac_cv_gcc_sync_char_tas" = x"yes"; then
|
|
|
|
AC_DEFINE(HAVE_GCC__SYNC_CHAR_TAS, 1, [Define to 1 if you have __sync_lock_test_and_set(char *) and friends.])
|
|
|
|
fi])# PGAC_HAVE_GCC__SYNC_CHAR_TAS
|
|
|
|
|
|
|
|
# PGAC_HAVE_GCC__SYNC_INT32_TAS
|
|
|
|
# -------------------------
|
|
|
|
# Check if the C compiler understands __sync_lock_test_and_set(),
|
|
|
|
# and define HAVE_GCC__SYNC_INT32_TAS
|
|
|
|
AC_DEFUN([PGAC_HAVE_GCC__SYNC_INT32_TAS],
|
|
|
|
[AC_CACHE_CHECK(for builtin __sync int32 locking functions, pgac_cv_gcc_sync_int32_tas,
|
2015-07-02 19:21:23 +03:00
|
|
|
[AC_LINK_IFELSE([AC_LANG_PROGRAM([],
|
Add a basic atomic ops API abstracting away platform/architecture details.
Several upcoming performance/scalability improvements require atomic
operations. This new API avoids the need to splatter compiler and
architecture dependent code over all the locations employing atomic
ops.
For several of the potential usages it'd be problematic to maintain
both, a atomics using implementation and one using spinlocks or
similar. In all likelihood one of the implementations would not get
tested regularly under concurrency. To avoid that scenario the new API
provides a automatic fallback of atomic operations to spinlocks. All
properties of atomic operations are maintained. This fallback -
obviously - isn't as fast as just using atomic ops, but it's not bad
either. For one of the future users the atomics ontop spinlocks
implementation was actually slightly faster than the old purely
spinlock using implementation. That's important because it reduces the
fear of regressing older platforms when improving the scalability for
new ones.
The API, loosely modeled after the C11 atomics support, currently
provides 'atomic flags' and 32 bit unsigned integers. If the platform
efficiently supports atomic 64 bit unsigned integers those are also
provided.
To implement atomics support for a platform/architecture/compiler for
a type of atomics 32bit compare and exchange needs to be
implemented. If available and more efficient native support for flags,
32 bit atomic addition, and corresponding 64 bit operations may also
be provided. Additional useful atomic operations are implemented
generically ontop of these.
The implementation for various versions of gcc, msvc and sun studio have
been tested. Additional existing stub implementations for
* Intel icc
* HUPX acc
* IBM xlc
are included but have never been tested. These will likely require
fixes based on buildfarm and user feedback.
As atomic operations also require barriers for some operations the
existing barrier support has been moved into the atomics code.
Author: Andres Freund with contributions from Oskari Saarenmaa
Reviewed-By: Amit Kapila, Robert Haas, Heikki Linnakangas and Álvaro Herrera
Discussion: CA+TgmoYBW+ux5-8Ja=Mcyuy8=VXAnVRHp3Kess6Pn3DMXAPAEA@mail.gmail.com,
20131015123303.GH5300@awork2.anarazel.de,
20131028205522.GI20248@awork2.anarazel.de
2014-09-26 01:49:05 +04:00
|
|
|
[int lock = 0;
|
|
|
|
__sync_lock_test_and_set(&lock, 1);
|
2015-07-02 19:21:23 +03:00
|
|
|
__sync_lock_release(&lock);])],
|
Add a basic atomic ops API abstracting away platform/architecture details.
Several upcoming performance/scalability improvements require atomic
operations. This new API avoids the need to splatter compiler and
architecture dependent code over all the locations employing atomic
ops.
For several of the potential usages it'd be problematic to maintain
both, a atomics using implementation and one using spinlocks or
similar. In all likelihood one of the implementations would not get
tested regularly under concurrency. To avoid that scenario the new API
provides a automatic fallback of atomic operations to spinlocks. All
properties of atomic operations are maintained. This fallback -
obviously - isn't as fast as just using atomic ops, but it's not bad
either. For one of the future users the atomics ontop spinlocks
implementation was actually slightly faster than the old purely
spinlock using implementation. That's important because it reduces the
fear of regressing older platforms when improving the scalability for
new ones.
The API, loosely modeled after the C11 atomics support, currently
provides 'atomic flags' and 32 bit unsigned integers. If the platform
efficiently supports atomic 64 bit unsigned integers those are also
provided.
To implement atomics support for a platform/architecture/compiler for
a type of atomics 32bit compare and exchange needs to be
implemented. If available and more efficient native support for flags,
32 bit atomic addition, and corresponding 64 bit operations may also
be provided. Additional useful atomic operations are implemented
generically ontop of these.
The implementation for various versions of gcc, msvc and sun studio have
been tested. Additional existing stub implementations for
* Intel icc
* HUPX acc
* IBM xlc
are included but have never been tested. These will likely require
fixes based on buildfarm and user feedback.
As atomic operations also require barriers for some operations the
existing barrier support has been moved into the atomics code.
Author: Andres Freund with contributions from Oskari Saarenmaa
Reviewed-By: Amit Kapila, Robert Haas, Heikki Linnakangas and Álvaro Herrera
Discussion: CA+TgmoYBW+ux5-8Ja=Mcyuy8=VXAnVRHp3Kess6Pn3DMXAPAEA@mail.gmail.com,
20131015123303.GH5300@awork2.anarazel.de,
20131028205522.GI20248@awork2.anarazel.de
2014-09-26 01:49:05 +04:00
|
|
|
[pgac_cv_gcc_sync_int32_tas="yes"],
|
|
|
|
[pgac_cv_gcc_sync_int32_tas="no"])])
|
|
|
|
if test x"$pgac_cv_gcc_sync_int32_tas" = x"yes"; then
|
|
|
|
AC_DEFINE(HAVE_GCC__SYNC_INT32_TAS, 1, [Define to 1 if you have __sync_lock_test_and_set(int *) and friends.])
|
|
|
|
fi])# PGAC_HAVE_GCC__SYNC_INT32_TAS
|
|
|
|
|
|
|
|
# PGAC_HAVE_GCC__SYNC_INT32_CAS
|
|
|
|
# -------------------------
|
|
|
|
# Check if the C compiler understands __sync_compare_and_swap() for 32bit
|
|
|
|
# types, and define HAVE_GCC__SYNC_INT32_CAS if so.
|
|
|
|
AC_DEFUN([PGAC_HAVE_GCC__SYNC_INT32_CAS],
|
|
|
|
[AC_CACHE_CHECK(for builtin __sync int32 atomic operations, pgac_cv_gcc_sync_int32_cas,
|
2015-07-02 19:21:23 +03:00
|
|
|
[AC_LINK_IFELSE([AC_LANG_PROGRAM([],
|
Add a basic atomic ops API abstracting away platform/architecture details.
Several upcoming performance/scalability improvements require atomic
operations. This new API avoids the need to splatter compiler and
architecture dependent code over all the locations employing atomic
ops.
For several of the potential usages it'd be problematic to maintain
both, a atomics using implementation and one using spinlocks or
similar. In all likelihood one of the implementations would not get
tested regularly under concurrency. To avoid that scenario the new API
provides a automatic fallback of atomic operations to spinlocks. All
properties of atomic operations are maintained. This fallback -
obviously - isn't as fast as just using atomic ops, but it's not bad
either. For one of the future users the atomics ontop spinlocks
implementation was actually slightly faster than the old purely
spinlock using implementation. That's important because it reduces the
fear of regressing older platforms when improving the scalability for
new ones.
The API, loosely modeled after the C11 atomics support, currently
provides 'atomic flags' and 32 bit unsigned integers. If the platform
efficiently supports atomic 64 bit unsigned integers those are also
provided.
To implement atomics support for a platform/architecture/compiler for
a type of atomics 32bit compare and exchange needs to be
implemented. If available and more efficient native support for flags,
32 bit atomic addition, and corresponding 64 bit operations may also
be provided. Additional useful atomic operations are implemented
generically ontop of these.
The implementation for various versions of gcc, msvc and sun studio have
been tested. Additional existing stub implementations for
* Intel icc
* HUPX acc
* IBM xlc
are included but have never been tested. These will likely require
fixes based on buildfarm and user feedback.
As atomic operations also require barriers for some operations the
existing barrier support has been moved into the atomics code.
Author: Andres Freund with contributions from Oskari Saarenmaa
Reviewed-By: Amit Kapila, Robert Haas, Heikki Linnakangas and Álvaro Herrera
Discussion: CA+TgmoYBW+ux5-8Ja=Mcyuy8=VXAnVRHp3Kess6Pn3DMXAPAEA@mail.gmail.com,
20131015123303.GH5300@awork2.anarazel.de,
20131028205522.GI20248@awork2.anarazel.de
2014-09-26 01:49:05 +04:00
|
|
|
[int val = 0;
|
2015-07-02 19:21:23 +03:00
|
|
|
__sync_val_compare_and_swap(&val, 0, 37);])],
|
Add a basic atomic ops API abstracting away platform/architecture details.
Several upcoming performance/scalability improvements require atomic
operations. This new API avoids the need to splatter compiler and
architecture dependent code over all the locations employing atomic
ops.
For several of the potential usages it'd be problematic to maintain
both, a atomics using implementation and one using spinlocks or
similar. In all likelihood one of the implementations would not get
tested regularly under concurrency. To avoid that scenario the new API
provides a automatic fallback of atomic operations to spinlocks. All
properties of atomic operations are maintained. This fallback -
obviously - isn't as fast as just using atomic ops, but it's not bad
either. For one of the future users the atomics ontop spinlocks
implementation was actually slightly faster than the old purely
spinlock using implementation. That's important because it reduces the
fear of regressing older platforms when improving the scalability for
new ones.
The API, loosely modeled after the C11 atomics support, currently
provides 'atomic flags' and 32 bit unsigned integers. If the platform
efficiently supports atomic 64 bit unsigned integers those are also
provided.
To implement atomics support for a platform/architecture/compiler for
a type of atomics 32bit compare and exchange needs to be
implemented. If available and more efficient native support for flags,
32 bit atomic addition, and corresponding 64 bit operations may also
be provided. Additional useful atomic operations are implemented
generically ontop of these.
The implementation for various versions of gcc, msvc and sun studio have
been tested. Additional existing stub implementations for
* Intel icc
* HUPX acc
* IBM xlc
are included but have never been tested. These will likely require
fixes based on buildfarm and user feedback.
As atomic operations also require barriers for some operations the
existing barrier support has been moved into the atomics code.
Author: Andres Freund with contributions from Oskari Saarenmaa
Reviewed-By: Amit Kapila, Robert Haas, Heikki Linnakangas and Álvaro Herrera
Discussion: CA+TgmoYBW+ux5-8Ja=Mcyuy8=VXAnVRHp3Kess6Pn3DMXAPAEA@mail.gmail.com,
20131015123303.GH5300@awork2.anarazel.de,
20131028205522.GI20248@awork2.anarazel.de
2014-09-26 01:49:05 +04:00
|
|
|
[pgac_cv_gcc_sync_int32_cas="yes"],
|
|
|
|
[pgac_cv_gcc_sync_int32_cas="no"])])
|
|
|
|
if test x"$pgac_cv_gcc_sync_int32_cas" = x"yes"; then
|
|
|
|
AC_DEFINE(HAVE_GCC__SYNC_INT32_CAS, 1, [Define to 1 if you have __sync_compare_and_swap(int *, int, int).])
|
|
|
|
fi])# PGAC_HAVE_GCC__SYNC_INT32_CAS
|
|
|
|
|
|
|
|
# PGAC_HAVE_GCC__SYNC_INT64_CAS
|
|
|
|
# -------------------------
|
|
|
|
# Check if the C compiler understands __sync_compare_and_swap() for 64bit
|
|
|
|
# types, and define HAVE_GCC__SYNC_INT64_CAS if so.
|
|
|
|
AC_DEFUN([PGAC_HAVE_GCC__SYNC_INT64_CAS],
|
|
|
|
[AC_CACHE_CHECK(for builtin __sync int64 atomic operations, pgac_cv_gcc_sync_int64_cas,
|
2015-07-02 19:21:23 +03:00
|
|
|
[AC_LINK_IFELSE([AC_LANG_PROGRAM([],
|
Add a basic atomic ops API abstracting away platform/architecture details.
Several upcoming performance/scalability improvements require atomic
operations. This new API avoids the need to splatter compiler and
architecture dependent code over all the locations employing atomic
ops.
For several of the potential usages it'd be problematic to maintain
both, a atomics using implementation and one using spinlocks or
similar. In all likelihood one of the implementations would not get
tested regularly under concurrency. To avoid that scenario the new API
provides a automatic fallback of atomic operations to spinlocks. All
properties of atomic operations are maintained. This fallback -
obviously - isn't as fast as just using atomic ops, but it's not bad
either. For one of the future users the atomics ontop spinlocks
implementation was actually slightly faster than the old purely
spinlock using implementation. That's important because it reduces the
fear of regressing older platforms when improving the scalability for
new ones.
The API, loosely modeled after the C11 atomics support, currently
provides 'atomic flags' and 32 bit unsigned integers. If the platform
efficiently supports atomic 64 bit unsigned integers those are also
provided.
To implement atomics support for a platform/architecture/compiler for
a type of atomics 32bit compare and exchange needs to be
implemented. If available and more efficient native support for flags,
32 bit atomic addition, and corresponding 64 bit operations may also
be provided. Additional useful atomic operations are implemented
generically ontop of these.
The implementation for various versions of gcc, msvc and sun studio have
been tested. Additional existing stub implementations for
* Intel icc
* HUPX acc
* IBM xlc
are included but have never been tested. These will likely require
fixes based on buildfarm and user feedback.
As atomic operations also require barriers for some operations the
existing barrier support has been moved into the atomics code.
Author: Andres Freund with contributions from Oskari Saarenmaa
Reviewed-By: Amit Kapila, Robert Haas, Heikki Linnakangas and Álvaro Herrera
Discussion: CA+TgmoYBW+ux5-8Ja=Mcyuy8=VXAnVRHp3Kess6Pn3DMXAPAEA@mail.gmail.com,
20131015123303.GH5300@awork2.anarazel.de,
20131028205522.GI20248@awork2.anarazel.de
2014-09-26 01:49:05 +04:00
|
|
|
[PG_INT64_TYPE lock = 0;
|
2015-07-02 19:21:23 +03:00
|
|
|
__sync_val_compare_and_swap(&lock, 0, (PG_INT64_TYPE) 37);])],
|
Add a basic atomic ops API abstracting away platform/architecture details.
Several upcoming performance/scalability improvements require atomic
operations. This new API avoids the need to splatter compiler and
architecture dependent code over all the locations employing atomic
ops.
For several of the potential usages it'd be problematic to maintain
both, a atomics using implementation and one using spinlocks or
similar. In all likelihood one of the implementations would not get
tested regularly under concurrency. To avoid that scenario the new API
provides a automatic fallback of atomic operations to spinlocks. All
properties of atomic operations are maintained. This fallback -
obviously - isn't as fast as just using atomic ops, but it's not bad
either. For one of the future users the atomics ontop spinlocks
implementation was actually slightly faster than the old purely
spinlock using implementation. That's important because it reduces the
fear of regressing older platforms when improving the scalability for
new ones.
The API, loosely modeled after the C11 atomics support, currently
provides 'atomic flags' and 32 bit unsigned integers. If the platform
efficiently supports atomic 64 bit unsigned integers those are also
provided.
To implement atomics support for a platform/architecture/compiler for
a type of atomics 32bit compare and exchange needs to be
implemented. If available and more efficient native support for flags,
32 bit atomic addition, and corresponding 64 bit operations may also
be provided. Additional useful atomic operations are implemented
generically ontop of these.
The implementation for various versions of gcc, msvc and sun studio have
been tested. Additional existing stub implementations for
* Intel icc
* HUPX acc
* IBM xlc
are included but have never been tested. These will likely require
fixes based on buildfarm and user feedback.
As atomic operations also require barriers for some operations the
existing barrier support has been moved into the atomics code.
Author: Andres Freund with contributions from Oskari Saarenmaa
Reviewed-By: Amit Kapila, Robert Haas, Heikki Linnakangas and Álvaro Herrera
Discussion: CA+TgmoYBW+ux5-8Ja=Mcyuy8=VXAnVRHp3Kess6Pn3DMXAPAEA@mail.gmail.com,
20131015123303.GH5300@awork2.anarazel.de,
20131028205522.GI20248@awork2.anarazel.de
2014-09-26 01:49:05 +04:00
|
|
|
[pgac_cv_gcc_sync_int64_cas="yes"],
|
|
|
|
[pgac_cv_gcc_sync_int64_cas="no"])])
|
|
|
|
if test x"$pgac_cv_gcc_sync_int64_cas" = x"yes"; then
|
|
|
|
AC_DEFINE(HAVE_GCC__SYNC_INT64_CAS, 1, [Define to 1 if you have __sync_compare_and_swap(int64 *, int64, int64).])
|
|
|
|
fi])# PGAC_HAVE_GCC__SYNC_INT64_CAS
|
|
|
|
|
|
|
|
# PGAC_HAVE_GCC__ATOMIC_INT32_CAS
|
|
|
|
# -------------------------
|
|
|
|
# Check if the C compiler understands __atomic_compare_exchange_n() for 32bit
|
|
|
|
# types, and define HAVE_GCC__ATOMIC_INT32_CAS if so.
|
|
|
|
AC_DEFUN([PGAC_HAVE_GCC__ATOMIC_INT32_CAS],
|
|
|
|
[AC_CACHE_CHECK(for builtin __atomic int32 atomic operations, pgac_cv_gcc_atomic_int32_cas,
|
2015-07-02 19:21:23 +03:00
|
|
|
[AC_LINK_IFELSE([AC_LANG_PROGRAM([],
|
Add a basic atomic ops API abstracting away platform/architecture details.
Several upcoming performance/scalability improvements require atomic
operations. This new API avoids the need to splatter compiler and
architecture dependent code over all the locations employing atomic
ops.
For several of the potential usages it'd be problematic to maintain
both, a atomics using implementation and one using spinlocks or
similar. In all likelihood one of the implementations would not get
tested regularly under concurrency. To avoid that scenario the new API
provides a automatic fallback of atomic operations to spinlocks. All
properties of atomic operations are maintained. This fallback -
obviously - isn't as fast as just using atomic ops, but it's not bad
either. For one of the future users the atomics ontop spinlocks
implementation was actually slightly faster than the old purely
spinlock using implementation. That's important because it reduces the
fear of regressing older platforms when improving the scalability for
new ones.
The API, loosely modeled after the C11 atomics support, currently
provides 'atomic flags' and 32 bit unsigned integers. If the platform
efficiently supports atomic 64 bit unsigned integers those are also
provided.
To implement atomics support for a platform/architecture/compiler for
a type of atomics 32bit compare and exchange needs to be
implemented. If available and more efficient native support for flags,
32 bit atomic addition, and corresponding 64 bit operations may also
be provided. Additional useful atomic operations are implemented
generically ontop of these.
The implementation for various versions of gcc, msvc and sun studio have
been tested. Additional existing stub implementations for
* Intel icc
* HUPX acc
* IBM xlc
are included but have never been tested. These will likely require
fixes based on buildfarm and user feedback.
As atomic operations also require barriers for some operations the
existing barrier support has been moved into the atomics code.
Author: Andres Freund with contributions from Oskari Saarenmaa
Reviewed-By: Amit Kapila, Robert Haas, Heikki Linnakangas and Álvaro Herrera
Discussion: CA+TgmoYBW+ux5-8Ja=Mcyuy8=VXAnVRHp3Kess6Pn3DMXAPAEA@mail.gmail.com,
20131015123303.GH5300@awork2.anarazel.de,
20131028205522.GI20248@awork2.anarazel.de
2014-09-26 01:49:05 +04:00
|
|
|
[int val = 0;
|
|
|
|
int expect = 0;
|
2015-07-02 19:21:23 +03:00
|
|
|
__atomic_compare_exchange_n(&val, &expect, 37, 0, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);])],
|
Add a basic atomic ops API abstracting away platform/architecture details.
Several upcoming performance/scalability improvements require atomic
operations. This new API avoids the need to splatter compiler and
architecture dependent code over all the locations employing atomic
ops.
For several of the potential usages it'd be problematic to maintain
both, a atomics using implementation and one using spinlocks or
similar. In all likelihood one of the implementations would not get
tested regularly under concurrency. To avoid that scenario the new API
provides a automatic fallback of atomic operations to spinlocks. All
properties of atomic operations are maintained. This fallback -
obviously - isn't as fast as just using atomic ops, but it's not bad
either. For one of the future users the atomics ontop spinlocks
implementation was actually slightly faster than the old purely
spinlock using implementation. That's important because it reduces the
fear of regressing older platforms when improving the scalability for
new ones.
The API, loosely modeled after the C11 atomics support, currently
provides 'atomic flags' and 32 bit unsigned integers. If the platform
efficiently supports atomic 64 bit unsigned integers those are also
provided.
To implement atomics support for a platform/architecture/compiler for
a type of atomics 32bit compare and exchange needs to be
implemented. If available and more efficient native support for flags,
32 bit atomic addition, and corresponding 64 bit operations may also
be provided. Additional useful atomic operations are implemented
generically ontop of these.
The implementation for various versions of gcc, msvc and sun studio have
been tested. Additional existing stub implementations for
* Intel icc
* HUPX acc
* IBM xlc
are included but have never been tested. These will likely require
fixes based on buildfarm and user feedback.
As atomic operations also require barriers for some operations the
existing barrier support has been moved into the atomics code.
Author: Andres Freund with contributions from Oskari Saarenmaa
Reviewed-By: Amit Kapila, Robert Haas, Heikki Linnakangas and Álvaro Herrera
Discussion: CA+TgmoYBW+ux5-8Ja=Mcyuy8=VXAnVRHp3Kess6Pn3DMXAPAEA@mail.gmail.com,
20131015123303.GH5300@awork2.anarazel.de,
20131028205522.GI20248@awork2.anarazel.de
2014-09-26 01:49:05 +04:00
|
|
|
[pgac_cv_gcc_atomic_int32_cas="yes"],
|
|
|
|
[pgac_cv_gcc_atomic_int32_cas="no"])])
|
|
|
|
if test x"$pgac_cv_gcc_atomic_int32_cas" = x"yes"; then
|
|
|
|
AC_DEFINE(HAVE_GCC__ATOMIC_INT32_CAS, 1, [Define to 1 if you have __atomic_compare_exchange_n(int *, int *, int).])
|
|
|
|
fi])# PGAC_HAVE_GCC__ATOMIC_INT32_CAS
|
|
|
|
|
|
|
|
# PGAC_HAVE_GCC__ATOMIC_INT64_CAS
|
|
|
|
# -------------------------
|
|
|
|
# Check if the C compiler understands __atomic_compare_exchange_n() for 64bit
|
|
|
|
# types, and define HAVE_GCC__ATOMIC_INT64_CAS if so.
|
|
|
|
AC_DEFUN([PGAC_HAVE_GCC__ATOMIC_INT64_CAS],
|
|
|
|
[AC_CACHE_CHECK(for builtin __atomic int64 atomic operations, pgac_cv_gcc_atomic_int64_cas,
|
2015-07-02 19:21:23 +03:00
|
|
|
[AC_LINK_IFELSE([AC_LANG_PROGRAM([],
|
Add a basic atomic ops API abstracting away platform/architecture details.
Several upcoming performance/scalability improvements require atomic
operations. This new API avoids the need to splatter compiler and
architecture dependent code over all the locations employing atomic
ops.
For several of the potential usages it'd be problematic to maintain
both, a atomics using implementation and one using spinlocks or
similar. In all likelihood one of the implementations would not get
tested regularly under concurrency. To avoid that scenario the new API
provides a automatic fallback of atomic operations to spinlocks. All
properties of atomic operations are maintained. This fallback -
obviously - isn't as fast as just using atomic ops, but it's not bad
either. For one of the future users the atomics ontop spinlocks
implementation was actually slightly faster than the old purely
spinlock using implementation. That's important because it reduces the
fear of regressing older platforms when improving the scalability for
new ones.
The API, loosely modeled after the C11 atomics support, currently
provides 'atomic flags' and 32 bit unsigned integers. If the platform
efficiently supports atomic 64 bit unsigned integers those are also
provided.
To implement atomics support for a platform/architecture/compiler for
a type of atomics 32bit compare and exchange needs to be
implemented. If available and more efficient native support for flags,
32 bit atomic addition, and corresponding 64 bit operations may also
be provided. Additional useful atomic operations are implemented
generically ontop of these.
The implementation for various versions of gcc, msvc and sun studio have
been tested. Additional existing stub implementations for
* Intel icc
* HUPX acc
* IBM xlc
are included but have never been tested. These will likely require
fixes based on buildfarm and user feedback.
As atomic operations also require barriers for some operations the
existing barrier support has been moved into the atomics code.
Author: Andres Freund with contributions from Oskari Saarenmaa
Reviewed-By: Amit Kapila, Robert Haas, Heikki Linnakangas and Álvaro Herrera
Discussion: CA+TgmoYBW+ux5-8Ja=Mcyuy8=VXAnVRHp3Kess6Pn3DMXAPAEA@mail.gmail.com,
20131015123303.GH5300@awork2.anarazel.de,
20131028205522.GI20248@awork2.anarazel.de
2014-09-26 01:49:05 +04:00
|
|
|
[PG_INT64_TYPE val = 0;
|
|
|
|
PG_INT64_TYPE expect = 0;
|
2015-07-02 19:21:23 +03:00
|
|
|
__atomic_compare_exchange_n(&val, &expect, 37, 0, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);])],
|
Add a basic atomic ops API abstracting away platform/architecture details.
Several upcoming performance/scalability improvements require atomic
operations. This new API avoids the need to splatter compiler and
architecture dependent code over all the locations employing atomic
ops.
For several of the potential usages it'd be problematic to maintain
both, a atomics using implementation and one using spinlocks or
similar. In all likelihood one of the implementations would not get
tested regularly under concurrency. To avoid that scenario the new API
provides a automatic fallback of atomic operations to spinlocks. All
properties of atomic operations are maintained. This fallback -
obviously - isn't as fast as just using atomic ops, but it's not bad
either. For one of the future users the atomics ontop spinlocks
implementation was actually slightly faster than the old purely
spinlock using implementation. That's important because it reduces the
fear of regressing older platforms when improving the scalability for
new ones.
The API, loosely modeled after the C11 atomics support, currently
provides 'atomic flags' and 32 bit unsigned integers. If the platform
efficiently supports atomic 64 bit unsigned integers those are also
provided.
To implement atomics support for a platform/architecture/compiler for
a type of atomics 32bit compare and exchange needs to be
implemented. If available and more efficient native support for flags,
32 bit atomic addition, and corresponding 64 bit operations may also
be provided. Additional useful atomic operations are implemented
generically ontop of these.
The implementation for various versions of gcc, msvc and sun studio have
been tested. Additional existing stub implementations for
* Intel icc
* HUPX acc
* IBM xlc
are included but have never been tested. These will likely require
fixes based on buildfarm and user feedback.
As atomic operations also require barriers for some operations the
existing barrier support has been moved into the atomics code.
Author: Andres Freund with contributions from Oskari Saarenmaa
Reviewed-By: Amit Kapila, Robert Haas, Heikki Linnakangas and Álvaro Herrera
Discussion: CA+TgmoYBW+ux5-8Ja=Mcyuy8=VXAnVRHp3Kess6Pn3DMXAPAEA@mail.gmail.com,
20131015123303.GH5300@awork2.anarazel.de,
20131028205522.GI20248@awork2.anarazel.de
2014-09-26 01:49:05 +04:00
|
|
|
[pgac_cv_gcc_atomic_int64_cas="yes"],
|
|
|
|
[pgac_cv_gcc_atomic_int64_cas="no"])])
|
|
|
|
if test x"$pgac_cv_gcc_atomic_int64_cas" = x"yes"; then
|
|
|
|
AC_DEFINE(HAVE_GCC__ATOMIC_INT64_CAS, 1, [Define to 1 if you have __atomic_compare_exchange_n(int64 *, int *, int64).])
|
|
|
|
fi])# PGAC_HAVE_GCC__ATOMIC_INT64_CAS
|
Use Intel SSE 4.2 CRC instructions where available.
Modern x86 and x86-64 processors with SSE 4.2 support have special
instructions, crc32b and crc32q, for calculating CRC-32C. They greatly
speed up CRC calculation.
Whether the instructions can be used or not depends on the compiler and the
target architecture. If generation of SSE 4.2 instructions is allowed for
the target (-msse4.2 flag on gcc and clang), use them. If they are not
allowed by default, but the compiler supports the -msse4.2 flag to enable
them, compile just the CRC-32C function with -msse4.2 flag, and check at
runtime whether the processor we're running on supports it. If it doesn't,
fall back to the slicing-by-8 algorithm. (With the common defaults on
current operating systems, the runtime-check variant is what you get in
practice.)
Abhijit Menon-Sen, heavily modified by me, reviewed by Andres Freund.
2015-04-14 17:05:03 +03:00
|
|
|
|
|
|
|
# PGAC_SSE42_CRC32_INTRINSICS
|
|
|
|
# -----------------------
|
2015-04-14 23:58:16 +03:00
|
|
|
# Check if the compiler supports the x86 CRC instructions added in SSE 4.2,
|
|
|
|
# using the _mm_crc32_u8 and _mm_crc32_u32 intrinsic functions. (We don't
|
|
|
|
# test the 8-byte variant, _mm_crc32_u64, but it is assumed to be present if
|
|
|
|
# the other ones are, on x86-64 platforms)
|
|
|
|
#
|
Use Intel SSE 4.2 CRC instructions where available.
Modern x86 and x86-64 processors with SSE 4.2 support have special
instructions, crc32b and crc32q, for calculating CRC-32C. They greatly
speed up CRC calculation.
Whether the instructions can be used or not depends on the compiler and the
target architecture. If generation of SSE 4.2 instructions is allowed for
the target (-msse4.2 flag on gcc and clang), use them. If they are not
allowed by default, but the compiler supports the -msse4.2 flag to enable
them, compile just the CRC-32C function with -msse4.2 flag, and check at
runtime whether the processor we're running on supports it. If it doesn't,
fall back to the slicing-by-8 algorithm. (With the common defaults on
current operating systems, the runtime-check variant is what you get in
practice.)
Abhijit Menon-Sen, heavily modified by me, reviewed by Andres Freund.
2015-04-14 17:05:03 +03:00
|
|
|
# An optional compiler flag can be passed as argument (e.g. -msse4.2). If the
|
|
|
|
# intrinsics are supported, sets pgac_sse42_crc32_intrinsics, and CFLAGS_SSE42.
|
|
|
|
AC_DEFUN([PGAC_SSE42_CRC32_INTRINSICS],
|
|
|
|
[define([Ac_cachevar], [AS_TR_SH([pgac_cv_sse42_crc32_intrinsics_$1])])dnl
|
2015-04-14 23:58:16 +03:00
|
|
|
AC_CACHE_CHECK([for _mm_crc32_u8 and _mm_crc32_u32 with CFLAGS=$1], [Ac_cachevar],
|
Use Intel SSE 4.2 CRC instructions where available.
Modern x86 and x86-64 processors with SSE 4.2 support have special
instructions, crc32b and crc32q, for calculating CRC-32C. They greatly
speed up CRC calculation.
Whether the instructions can be used or not depends on the compiler and the
target architecture. If generation of SSE 4.2 instructions is allowed for
the target (-msse4.2 flag on gcc and clang), use them. If they are not
allowed by default, but the compiler supports the -msse4.2 flag to enable
them, compile just the CRC-32C function with -msse4.2 flag, and check at
runtime whether the processor we're running on supports it. If it doesn't,
fall back to the slicing-by-8 algorithm. (With the common defaults on
current operating systems, the runtime-check variant is what you get in
practice.)
Abhijit Menon-Sen, heavily modified by me, reviewed by Andres Freund.
2015-04-14 17:05:03 +03:00
|
|
|
[pgac_save_CFLAGS=$CFLAGS
|
|
|
|
CFLAGS="$pgac_save_CFLAGS $1"
|
2015-07-02 19:21:23 +03:00
|
|
|
AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <nmmintrin.h>],
|
Use Intel SSE 4.2 CRC instructions where available.
Modern x86 and x86-64 processors with SSE 4.2 support have special
instructions, crc32b and crc32q, for calculating CRC-32C. They greatly
speed up CRC calculation.
Whether the instructions can be used or not depends on the compiler and the
target architecture. If generation of SSE 4.2 instructions is allowed for
the target (-msse4.2 flag on gcc and clang), use them. If they are not
allowed by default, but the compiler supports the -msse4.2 flag to enable
them, compile just the CRC-32C function with -msse4.2 flag, and check at
runtime whether the processor we're running on supports it. If it doesn't,
fall back to the slicing-by-8 algorithm. (With the common defaults on
current operating systems, the runtime-check variant is what you get in
practice.)
Abhijit Menon-Sen, heavily modified by me, reviewed by Andres Freund.
2015-04-14 17:05:03 +03:00
|
|
|
[unsigned int crc = 0;
|
|
|
|
crc = _mm_crc32_u8(crc, 0);
|
2015-08-17 12:15:46 +03:00
|
|
|
crc = _mm_crc32_u32(crc, 0);
|
|
|
|
/* return computed value, to prevent the above being optimized away */
|
|
|
|
return crc == 0;])],
|
Use Intel SSE 4.2 CRC instructions where available.
Modern x86 and x86-64 processors with SSE 4.2 support have special
instructions, crc32b and crc32q, for calculating CRC-32C. They greatly
speed up CRC calculation.
Whether the instructions can be used or not depends on the compiler and the
target architecture. If generation of SSE 4.2 instructions is allowed for
the target (-msse4.2 flag on gcc and clang), use them. If they are not
allowed by default, but the compiler supports the -msse4.2 flag to enable
them, compile just the CRC-32C function with -msse4.2 flag, and check at
runtime whether the processor we're running on supports it. If it doesn't,
fall back to the slicing-by-8 algorithm. (With the common defaults on
current operating systems, the runtime-check variant is what you get in
practice.)
Abhijit Menon-Sen, heavily modified by me, reviewed by Andres Freund.
2015-04-14 17:05:03 +03:00
|
|
|
[Ac_cachevar=yes],
|
|
|
|
[Ac_cachevar=no])
|
|
|
|
CFLAGS="$pgac_save_CFLAGS"])
|
|
|
|
if test x"$Ac_cachevar" = x"yes"; then
|
|
|
|
CFLAGS_SSE42="$1"
|
|
|
|
pgac_sse42_crc32_intrinsics=yes
|
|
|
|
fi
|
|
|
|
undefine([Ac_cachevar])dnl
|
|
|
|
])# PGAC_SSE42_CRC32_INTRINSICS
|