diff --git a/config/c-compiler.m4 b/config/c-compiler.m4 index b518851441..ba5c40db01 100644 --- a/config/c-compiler.m4 +++ b/config/c-compiler.m4 @@ -667,3 +667,37 @@ if test x"$Ac_cachevar" = x"yes"; then fi undefine([Ac_cachevar])dnl ])# PGAC_SSE42_CRC32_INTRINSICS + + +# PGAC_ARMV8_CRC32C_INTRINSICS +# ----------------------- +# Check if the compiler supports the CRC32C instructions using the __crc32cb, +# __crc32ch, __crc32cw, and __crc32cd intrinsic functions. These instructions +# were first introduced in ARMv8 in the optional CRC Extension, and became +# mandatory in ARMv8.1. +# +# An optional compiler flag can be passed as argument (e.g. +# -march=armv8-a+crc). If the intrinsics are supported, sets +# pgac_armv8_crc32c_intrinsics, and CFLAGS_ARMV8_CRC32C. +AC_DEFUN([PGAC_ARMV8_CRC32C_INTRINSICS], +[define([Ac_cachevar], [AS_TR_SH([pgac_cv_armv8_crc32c_intrinsics_$1])])dnl +AC_CACHE_CHECK([for __crc32cb, __crc32ch, __crc32cw, and __crc32cd with CFLAGS=$1], [Ac_cachevar], +[pgac_save_CFLAGS=$CFLAGS +CFLAGS="$pgac_save_CFLAGS $1" +AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ], + [unsigned int crc = 0; + crc = __crc32cb(crc, 0); + crc = __crc32ch(crc, 0); + crc = __crc32cw(crc, 0); + crc = __crc32cd(crc, 0); + /* return computed value, to prevent the above being optimized away */ + return crc == 0;])], + [Ac_cachevar=yes], + [Ac_cachevar=no]) +CFLAGS="$pgac_save_CFLAGS"]) +if test x"$Ac_cachevar" = x"yes"; then + CFLAGS_ARMV8_CRC32C="$1" + pgac_armv8_crc32c_intrinsics=yes +fi +undefine([Ac_cachevar])dnl +])# PGAC_ARMV8_CRC32C_INTRINSICS diff --git a/configure b/configure index 5c56f21282..56f18dfbc2 100755 --- a/configure +++ b/configure @@ -646,6 +646,7 @@ MSGMERGE MSGFMT_FLAGS MSGFMT PG_CRC32C_OBJS +CFLAGS_ARMV8_CRC32C CFLAGS_SSE42 have_win32_dbghelp HAVE_IPV6 @@ -17254,28 +17255,175 @@ if ac_fn_c_try_compile "$LINENO"; then : fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +# Check for ARMv8 CRC Extension intrinsics to do CRC calculations. +# +# First check if __crc32c* intrinsics can be used with the default compiler +# flags. If not, check if adding -march=armv8-a+crc flag helps. +# CFLAGS_ARMV8_CRC32C is set if the extra flag is required. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __crc32cb, __crc32ch, __crc32cw, and __crc32cd with CFLAGS=" >&5 +$as_echo_n "checking for __crc32cb, __crc32ch, __crc32cw, and __crc32cd with CFLAGS=... " >&6; } +if ${pgac_cv_armv8_crc32c_intrinsics_+:} false; then : + $as_echo_n "(cached) " >&6 +else + pgac_save_CFLAGS=$CFLAGS +CFLAGS="$pgac_save_CFLAGS " +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +unsigned int crc = 0; + crc = __crc32cb(crc, 0); + crc = __crc32ch(crc, 0); + crc = __crc32cw(crc, 0); + crc = __crc32cd(crc, 0); + /* return computed value, to prevent the above being optimized away */ + return crc == 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + pgac_cv_armv8_crc32c_intrinsics_=yes +else + pgac_cv_armv8_crc32c_intrinsics_=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +CFLAGS="$pgac_save_CFLAGS" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_armv8_crc32c_intrinsics_" >&5 +$as_echo "$pgac_cv_armv8_crc32c_intrinsics_" >&6; } +if test x"$pgac_cv_armv8_crc32c_intrinsics_" = x"yes"; then + CFLAGS_ARMV8_CRC32C="" + pgac_armv8_crc32c_intrinsics=yes +fi + +if test x"$pgac_armv8_crc32c_intrinsics" != x"yes"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __crc32cb, __crc32ch, __crc32cw, and __crc32cd with CFLAGS=-march=armv8-a+crc" >&5 +$as_echo_n "checking for __crc32cb, __crc32ch, __crc32cw, and __crc32cd with CFLAGS=-march=armv8-a+crc... " >&6; } +if ${pgac_cv_armv8_crc32c_intrinsics__march_armv8_apcrc+:} false; then : + $as_echo_n "(cached) " >&6 +else + pgac_save_CFLAGS=$CFLAGS +CFLAGS="$pgac_save_CFLAGS -march=armv8-a+crc" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +unsigned int crc = 0; + crc = __crc32cb(crc, 0); + crc = __crc32ch(crc, 0); + crc = __crc32cw(crc, 0); + crc = __crc32cd(crc, 0); + /* return computed value, to prevent the above being optimized away */ + return crc == 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + pgac_cv_armv8_crc32c_intrinsics__march_armv8_apcrc=yes +else + pgac_cv_armv8_crc32c_intrinsics__march_armv8_apcrc=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +CFLAGS="$pgac_save_CFLAGS" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_armv8_crc32c_intrinsics__march_armv8_apcrc" >&5 +$as_echo "$pgac_cv_armv8_crc32c_intrinsics__march_armv8_apcrc" >&6; } +if test x"$pgac_cv_armv8_crc32c_intrinsics__march_armv8_apcrc" = x"yes"; then + CFLAGS_ARMV8_CRC32C="-march=armv8-a+crc" + pgac_armv8_crc32c_intrinsics=yes +fi + +fi + + +# In order to detect at runtime, if the ARM CRC Extension is available, +# we will do "getauxval(AT_HWCAP) & HWCAP_CRC32". Check if we have +# everything we need for that. +for ac_func in getauxval +do : + ac_fn_c_check_func "$LINENO" "getauxval" "ac_cv_func_getauxval" +if test "x$ac_cv_func_getauxval" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_GETAUXVAL 1 +_ACEOF + +fi +done + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include + +int +main () +{ + +#ifndef AT_HWCAP +#error AT_HWCAP not defined +#endif +#ifndef HWCAP_CRC32 +#error HWCAP_CRC32 not defined +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + HAVE_HWCAP_CRC32=1 +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + # Select CRC-32C implementation. # -# If we are targeting a processor that has SSE 4.2 instructions, we can use the -# special CRC instructions for calculating CRC-32C. If we're not targeting such -# a processor, but we can nevertheless produce code that uses the SSE -# intrinsics, perhaps with some extra CFLAGS, compile both implementations and -# select which one to use at runtime, depending on whether SSE 4.2 is supported -# by the processor we're running on. +# If we are targeting a processor that has Intel SSE 4.2 instructions, we can +# use the special CRC instructions for calculating CRC-32C. If we're not +# targeting such a processor, but we can nevertheless produce code that uses +# the SSE intrinsics, perhaps with some extra CFLAGS, compile both +# implementations and select which one to use at runtime, depending on whether +# SSE 4.2 is supported by the processor we're running on. +# +# Similarly, if we are targeting an ARM processor that has the CRC +# instructions that are part of the ARMv8 CRC Extension, use them. And if +# we're not targeting such a processor, but can nevertheless produce code that +# uses the CRC instructions, compile both, and select at runtime. # # You can override this logic by setting the appropriate USE_*_CRC32 flag to 1 # in the template or configure command line. -if test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_SLICING_BY_8_CRC32C" = x""; then +if test x"$USE_SLICING_BY_8_CRC32C" = x"" && test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_ARMV8_CRC32C" = x"" && test x"$USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK" = x""; then + # Use Intel SSE 4.2 if available. if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && test x"$SSE4_2_TARGETED" = x"1" ; then USE_SSE42_CRC32C=1 else - # the CPUID instruction is needed for the runtime check. + # Intel SSE 4.2, with runtime check? The CPUID instruction is needed for + # the runtime check. if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && (test x"$pgac_cv__get_cpuid" = x"yes" || test x"$pgac_cv__cpuid" = x"yes"); then USE_SSE42_CRC32C_WITH_RUNTIME_CHECK=1 else - # fall back to slicing-by-8 algorithm which doesn't require any special - # CPU support. - USE_SLICING_BY_8_CRC32C=1 + # Use ARM CRC Extension if available. + if test x"$pgac_armv8_crc32c_intrinsics" = x"yes" && test x"$CFLAGS_ARMV8_CRC32C" = x""; then + USE_ARMV8_CRC32C=1 + else + # ARM CRC Extension, with runtime check? The getauxval() function and + # HWCAP_CRC32 are needed for the runtime check. + if test x"$pgac_armv8_crc32c_intrinsics" = x"yes" && test x"$ac_cv_func_getauxval" = x"yes" && test x"$HAVE_HWCAP_CRC32" = x"1"; then + USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK=1 + else + # fall back to slicing-by-8 algorithm, which doesn't require any + # special CPU support. + USE_SLICING_BY_8_CRC32C=1 + fi + fi fi fi fi @@ -17295,16 +17443,34 @@ else $as_echo "#define USE_SSE42_CRC32C_WITH_RUNTIME_CHECK 1" >>confdefs.h - PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_choose.o" + PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_sse42_choose.o" { $as_echo "$as_me:${as_lineno-$LINENO}: result: SSE 4.2 with runtime check" >&5 $as_echo "SSE 4.2 with runtime check" >&6; } else + if test x"$USE_ARMV8_CRC32C" = x"1"; then + +$as_echo "#define USE_ARMV8_CRC32C 1" >>confdefs.h + + PG_CRC32C_OBJS="pg_crc32c_armv8.o" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: ARMv8 CRC instructions" >&5 +$as_echo "ARMv8 CRC instructions" >&6; } + else + if test x"$USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then + +$as_echo "#define USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK 1" >>confdefs.h + + PG_CRC32C_OBJS="pg_crc32c_armv8.o pg_crc32c_sb8.o pg_crc32c_armv8_choose.o" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: ARMv8 CRC instructions with runtime check" >&5 +$as_echo "ARMv8 CRC instructions with runtime check" >&6; } + else $as_echo "#define USE_SLICING_BY_8_CRC32C 1" >>confdefs.h - PG_CRC32C_OBJS="pg_crc32c_sb8.o" - { $as_echo "$as_me:${as_lineno-$LINENO}: result: slicing-by-8" >&5 + PG_CRC32C_OBJS="pg_crc32c_sb8.o" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: slicing-by-8" >&5 $as_echo "slicing-by-8" >&6; } + fi + fi fi fi diff --git a/configure.in b/configure.in index 1d28f0f982..da02a56ec6 100644 --- a/configure.in +++ b/configure.in @@ -2003,28 +2003,73 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [ #endif ])], [SSE4_2_TARGETED=1]) +# Check for ARMv8 CRC Extension intrinsics to do CRC calculations. +# +# First check if __crc32c* intrinsics can be used with the default compiler +# flags. If not, check if adding -march=armv8-a+crc flag helps. +# CFLAGS_ARMV8_CRC32C is set if the extra flag is required. +PGAC_ARMV8_CRC32C_INTRINSICS([]) +if test x"$pgac_armv8_crc32c_intrinsics" != x"yes"; then + PGAC_ARMV8_CRC32C_INTRINSICS([-march=armv8-a+crc]) +fi +AC_SUBST(CFLAGS_ARMV8_CRC32C) + +# In order to detect at runtime, if the ARM CRC Extension is available, +# we will do "getauxval(AT_HWCAP) & HWCAP_CRC32". Check if we have +# everything we need for that. +AC_CHECK_FUNCS([getauxval]) +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([ +#include +#include +], [ +#ifndef AT_HWCAP +#error AT_HWCAP not defined +#endif +#ifndef HWCAP_CRC32 +#error HWCAP_CRC32 not defined +#endif +])], [HAVE_HWCAP_CRC32=1]) + # Select CRC-32C implementation. # -# If we are targeting a processor that has SSE 4.2 instructions, we can use the -# special CRC instructions for calculating CRC-32C. If we're not targeting such -# a processor, but we can nevertheless produce code that uses the SSE -# intrinsics, perhaps with some extra CFLAGS, compile both implementations and -# select which one to use at runtime, depending on whether SSE 4.2 is supported -# by the processor we're running on. +# If we are targeting a processor that has Intel SSE 4.2 instructions, we can +# use the special CRC instructions for calculating CRC-32C. If we're not +# targeting such a processor, but we can nevertheless produce code that uses +# the SSE intrinsics, perhaps with some extra CFLAGS, compile both +# implementations and select which one to use at runtime, depending on whether +# SSE 4.2 is supported by the processor we're running on. +# +# Similarly, if we are targeting an ARM processor that has the CRC +# instructions that are part of the ARMv8 CRC Extension, use them. And if +# we're not targeting such a processor, but can nevertheless produce code that +# uses the CRC instructions, compile both, and select at runtime. # # You can override this logic by setting the appropriate USE_*_CRC32 flag to 1 # in the template or configure command line. -if test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_SLICING_BY_8_CRC32C" = x""; then +if test x"$USE_SLICING_BY_8_CRC32C" = x"" && test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_ARMV8_CRC32C" = x"" && test x"$USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK" = x""; then + # Use Intel SSE 4.2 if available. if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && test x"$SSE4_2_TARGETED" = x"1" ; then USE_SSE42_CRC32C=1 else - # the CPUID instruction is needed for the runtime check. + # Intel SSE 4.2, with runtime check? The CPUID instruction is needed for + # the runtime check. if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && (test x"$pgac_cv__get_cpuid" = x"yes" || test x"$pgac_cv__cpuid" = x"yes"); then USE_SSE42_CRC32C_WITH_RUNTIME_CHECK=1 else - # fall back to slicing-by-8 algorithm which doesn't require any special - # CPU support. - USE_SLICING_BY_8_CRC32C=1 + # Use ARM CRC Extension if available. + if test x"$pgac_armv8_crc32c_intrinsics" = x"yes" && test x"$CFLAGS_ARMV8_CRC32C" = x""; then + USE_ARMV8_CRC32C=1 + else + # ARM CRC Extension, with runtime check? The getauxval() function and + # HWCAP_CRC32 are needed for the runtime check. + if test x"$pgac_armv8_crc32c_intrinsics" = x"yes" && test x"$ac_cv_func_getauxval" = x"yes" && test x"$HAVE_HWCAP_CRC32" = x"1"; then + USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK=1 + else + # fall back to slicing-by-8 algorithm, which doesn't require any + # special CPU support. + USE_SLICING_BY_8_CRC32C=1 + fi + fi fi fi fi @@ -2038,12 +2083,24 @@ if test x"$USE_SSE42_CRC32C" = x"1"; then else if test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then AC_DEFINE(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK, 1, [Define to 1 to use Intel SSE 4.2 CRC instructions with a runtime check.]) - PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_choose.o" + PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_sse42_choose.o" AC_MSG_RESULT(SSE 4.2 with runtime check) else - AC_DEFINE(USE_SLICING_BY_8_CRC32C, 1, [Define to 1 to use software CRC-32C implementation (slicing-by-8).]) - PG_CRC32C_OBJS="pg_crc32c_sb8.o" - AC_MSG_RESULT(slicing-by-8) + if test x"$USE_ARMV8_CRC32C" = x"1"; then + AC_DEFINE(USE_ARMV8_CRC32C, 1, [Define to 1 to use ARMv8 CRC Extension.]) + PG_CRC32C_OBJS="pg_crc32c_armv8.o" + AC_MSG_RESULT(ARMv8 CRC instructions) + else + if test x"$USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then + AC_DEFINE(USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK, 1, [Define to 1 to use ARMv8 CRC Extension with a runtime check.]) + PG_CRC32C_OBJS="pg_crc32c_armv8.o pg_crc32c_sb8.o pg_crc32c_armv8_choose.o" + AC_MSG_RESULT(ARMv8 CRC instructions with runtime check) + else + AC_DEFINE(USE_SLICING_BY_8_CRC32C, 1, [Define to 1 to use software CRC-32C implementation (slicing-by-8).]) + PG_CRC32C_OBJS="pg_crc32c_sb8.o" + AC_MSG_RESULT(slicing-by-8) + fi + fi fi fi AC_SUBST(PG_CRC32C_OBJS) diff --git a/src/Makefile.global.in b/src/Makefile.global.in index 2dac3ff897..19c9c1e11e 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -260,6 +260,7 @@ CXX = @CXX@ CFLAGS = @CFLAGS@ CFLAGS_VECTOR = @CFLAGS_VECTOR@ CFLAGS_SSE42 = @CFLAGS_SSE42@ +CFLAGS_ARMV8_CRC32C = @CFLAGS_ARMV8_CRC32C@ CXXFLAGS = @CXXFLAGS@ LLVM_CPPFLAGS = @LLVM_CPPFLAGS@ diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index eb4b43fed0..f3620231a7 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -239,6 +239,9 @@ /* Define to 1 if you have the `getaddrinfo' function. */ #undef HAVE_GETADDRINFO +/* Define to 1 if you have the `getauxval' function. */ +#undef HAVE_GETAUXVAL + /* Define to 1 if you have the `gethostbyname_r' function. */ #undef HAVE_GETHOSTBYNAME_R @@ -842,6 +845,12 @@ /* Define to 1 if your declares `struct tm'. */ #undef TM_IN_SYS_TIME +/* Define to 1 to use ARMv8 CRC Extension. */ +#undef USE_ARMV8_CRC32C + +/* Define to 1 to use ARMv8 CRC Extension with a runtime check. */ +#undef USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK + /* Define to 1 to build with assertion checks. (--enable-cassert) */ #undef USE_ASSERT_CHECKING diff --git a/src/include/port/pg_crc32c.h b/src/include/port/pg_crc32c.h index ae2701e958..9a26295c8e 100644 --- a/src/include/port/pg_crc32c.h +++ b/src/include/port/pg_crc32c.h @@ -42,26 +42,42 @@ typedef uint32 pg_crc32c; #define EQ_CRC32C(c1, c2) ((c1) == (c2)) #if defined(USE_SSE42_CRC32C) -/* Use SSE4.2 instructions. */ +/* Use Intel SSE4.2 instructions. */ #define COMP_CRC32C(crc, data, len) \ ((crc) = pg_comp_crc32c_sse42((crc), (data), (len))) #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len); -#elif defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK) +#elif defined(USE_ARMV8_CRC32C) +/* Use ARMv8 CRC Extension instructions. */ + +#define COMP_CRC32C(crc, data, len) \ + ((crc) = pg_comp_crc32c_armv8((crc), (data), (len))) +#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) + +extern pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len); + +#elif defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK) || defined(USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK) + /* - * Use SSE4.2 instructions, but perform a runtime check first to check that - * they are available. + * Use Intel SSE 4.2 or ARMv8 instructions, but perform a runtime check first + * to check that they are available. */ #define COMP_CRC32C(crc, data, len) \ ((crc) = pg_comp_crc32c((crc), (data), (len))) #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) -extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len); extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len); extern pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len); +#ifdef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK +extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len); +#endif +#ifdef USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK +extern pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len); +#endif + #else /* * Use slicing-by-8 algorithm. diff --git a/src/port/Makefile b/src/port/Makefile index 81f01b25bb..f328b705e4 100644 --- a/src/port/Makefile +++ b/src/port/Makefile @@ -65,6 +65,10 @@ thread.o: CFLAGS+=$(PTHREAD_CFLAGS) pg_crc32c_sse42.o: CFLAGS+=$(CFLAGS_SSE42) pg_crc32c_sse42_srv.o: CFLAGS+=$(CFLAGS_SSE42) +# pg_crc32c_armv8.o and its _srv.o version need CFLAGS_ARMV8_CRC32C +pg_crc32c_armv8.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C) +pg_crc32c_armv8_srv.o: CFLAGS+=$(CFLAGS_ARMV8_CRC32C) + # # Server versions of object files # diff --git a/src/port/pg_crc32c_armv8.c b/src/port/pg_crc32c_armv8.c new file mode 100644 index 0000000000..12b8bc1f64 --- /dev/null +++ b/src/port/pg_crc32c_armv8.c @@ -0,0 +1,72 @@ +/*------------------------------------------------------------------------- + * + * pg_crc32c_armv8.c + * Compute CRC-32C checksum using ARMv8 CRC Extension instructions + * + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/port/pg_crc32c_armv8.c + * + *------------------------------------------------------------------------- + */ +#include "c.h" + +#include "port/pg_crc32c.h" + +#include + +pg_crc32c +pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len) +{ + const unsigned char *p = data; + const unsigned char *pend = p + len; + + /* + * ARMv8 doesn't require alignment, but aligned memory access is + * significantly faster. Process leading bytes so that the loop below + * starts with a pointer aligned to eight bytes. + */ + if (!PointerIsAligned(p, uint16) && p < pend) + { + crc = __crc32cb(crc, *p); + p += 1; + } + if (!PointerIsAligned(p, uint32) && p < pend) + { + crc = __crc32ch(crc, *(uint16 *) p); + p += 2; + } + if (!PointerIsAligned(p, uint64) && p < pend) + { + crc = __crc32cw(crc, *(uint32 *) p); + p += 4; + } + + /* Process eight bytes at a time, as far as we can. */ + while (p + 8 <= pend) + { + crc = __crc32cd(crc, *(uint64 *) p); + p += 8; + } + + /* Process remaining 0-7 bytes. */ + if (p + 4 <= pend) + { + crc = __crc32cw(crc, *(uint32 *) p); + p += 4; + } + if (p + 2 <= pend) + { + crc = __crc32ch(crc, *(uint16 *) p); + p += 2; + } + if (p < pend) + { + crc = __crc32cb(crc, *p); + } + + return crc; +} diff --git a/src/port/pg_crc32c_armv8_choose.c b/src/port/pg_crc32c_armv8_choose.c new file mode 100644 index 0000000000..f21a8243e9 --- /dev/null +++ b/src/port/pg_crc32c_armv8_choose.c @@ -0,0 +1,55 @@ +/*------------------------------------------------------------------------- + * + * pg_crc32c_armv8_choose.c + * Choose between ARMv8 and software CRC-32C implementation. + * + * On first call, checks if the CPU we're running on supports the ARMv8 + * CRC Extension. If it does, use the special instructions for CRC-32C + * computation. Otherwise, fall back to the pure software implementation + * (slicing-by-8). + * + * XXX: The glibc-specific getauxval() function, with the HWCAP_CRC32 + * flag, is used to determine if the CRC Extension is available on the + * current platform. Is there a more portable way to determine that? + * + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/port/pg_crc32c_armv8_choose.c + * + *------------------------------------------------------------------------- + */ + +#include "c.h" + +#include +#include + +#include "port/pg_crc32c.h" + +static bool +pg_crc32c_armv8_available(void) +{ + unsigned long auxv = getauxval(AT_HWCAP); + + return (auxv & HWCAP_CRC32) != 0; +} + +/* + * This gets called on the first call. It replaces the function pointer + * so that subsequent calls are routed directly to the chosen implementation. + */ +static pg_crc32c +pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len) +{ + if (pg_crc32c_armv8_available()) + pg_comp_crc32c = pg_comp_crc32c_armv8; + else + pg_comp_crc32c = pg_comp_crc32c_sb8; + + return pg_comp_crc32c(crc, data, len); +} + +pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose; diff --git a/src/port/pg_crc32c_choose.c b/src/port/pg_crc32c_sse42_choose.c similarity index 78% rename from src/port/pg_crc32c_choose.c rename to src/port/pg_crc32c_sse42_choose.c index 40bee67b0a..c2d1242d91 100644 --- a/src/port/pg_crc32c_choose.c +++ b/src/port/pg_crc32c_sse42_choose.c @@ -1,18 +1,19 @@ /*------------------------------------------------------------------------- * - * pg_crc32c_choose.c - * Choose which CRC-32C implementation to use, at runtime. + * pg_crc32c_sse42_choose.c + * Choose between Intel SSE 4.2 and software CRC-32C implementation. * - * Try to the special CRC instructions introduced in Intel SSE 4.2, - * if available on the platform we're running on, but fall back to the - * slicing-by-8 implementation otherwise. + * On first call, checks if the CPU we're running on supports Intel SSE + * 4.2. If it does, use the special SSE instructions for CRC-32C + * computation. Otherwise, fall back to the pure software implementation + * (slicing-by-8). * * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * src/port/pg_crc32c_choose.c + * src/port/pg_crc32c_sse42_choose.c * *------------------------------------------------------------------------- */ diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm index 71f72b16fc..41d720880a 100644 --- a/src/tools/msvc/Mkvcbuild.pm +++ b/src/tools/msvc/Mkvcbuild.pm @@ -101,7 +101,7 @@ sub mkvcbuild if ($vsVersion >= '9.00') { - push(@pgportfiles, 'pg_crc32c_choose.c'); + push(@pgportfiles, 'pg_crc32c_sse42_choose.c'); push(@pgportfiles, 'pg_crc32c_sse42.c'); push(@pgportfiles, 'pg_crc32c_sb8.c'); }