Add POPCNT support for MSVC x86_64 builds

02a6a54ec added code to make use of the POPCNT instruction when available
for many of our common platforms.  Here we do the same for MSVC for x86_64
machines.

MSVC's intrinsic functions for popcnt seem to differ from GCCs in that
they always appear to emit the popcnt instructions.  In GCC the behavior
will depend on if the source file was compiled with -mpopcnt or not.  For
this reason, the MSVC intrinsic function has been lumped into the
pg_popcount*_asm function, however doing that sort of invalidates the name
of that function, so let's rename it to pg_popcount*_fast().

Author: David Rowley
Reviewed-by: John Naylor
Discussion: https://postgr.es/m/CAApHDvqL3cbbK%3DGzNcwzsNR9Gi%2BaUvTudKkC4XgnQfXirJ_oRQ%40mail.gmail.com
This commit is contained in:
David Rowley 2021-08-09 15:23:48 +12:00
parent d8a75b1308
commit 2e281249af

View File

@ -103,6 +103,16 @@ const uint8 pg_number_of_ones[256] = {
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
}; };
/*
* With MSVC on x86_64 builds, try using native popcnt instructions via the
* __popcnt and __popcnt64 intrinsics. These don't work the same as GCC's
* __builtin_popcount* intrinsic functions as they always emit popcnt
* instructions.
*/
#if defined(_MSC_VER) && defined(_M_AMD64)
#define HAVE_X86_64_POPCNTQ
#endif
/* /*
* On x86_64, we can use the hardware popcount instruction, but only if * On x86_64, we can use the hardware popcount instruction, but only if
* we can verify that the CPU supports it via the cpuid instruction. * we can verify that the CPU supports it via the cpuid instruction.
@ -112,28 +122,28 @@ const uint8 pg_number_of_ones[256] = {
*/ */
#ifdef HAVE_X86_64_POPCNTQ #ifdef HAVE_X86_64_POPCNTQ
#if defined(HAVE__GET_CPUID) || defined(HAVE__CPUID) #if defined(HAVE__GET_CPUID) || defined(HAVE__CPUID)
#define USE_POPCNT_ASM 1 #define TRY_POPCNT_FAST 1
#endif #endif
#endif #endif
static int pg_popcount32_slow(uint32 word); static int pg_popcount32_slow(uint32 word);
static int pg_popcount64_slow(uint64 word); static int pg_popcount64_slow(uint64 word);
#ifdef USE_POPCNT_ASM #ifdef TRY_POPCNT_FAST
static bool pg_popcount_available(void); static bool pg_popcount_available(void);
static int pg_popcount32_choose(uint32 word); static int pg_popcount32_choose(uint32 word);
static int pg_popcount64_choose(uint64 word); static int pg_popcount64_choose(uint64 word);
static int pg_popcount32_asm(uint32 word); static int pg_popcount32_fast(uint32 word);
static int pg_popcount64_asm(uint64 word); static int pg_popcount64_fast(uint64 word);
int (*pg_popcount32) (uint32 word) = pg_popcount32_choose; int (*pg_popcount32) (uint32 word) = pg_popcount32_choose;
int (*pg_popcount64) (uint64 word) = pg_popcount64_choose; int (*pg_popcount64) (uint64 word) = pg_popcount64_choose;
#else #else
int (*pg_popcount32) (uint32 word) = pg_popcount32_slow; int (*pg_popcount32) (uint32 word) = pg_popcount32_slow;
int (*pg_popcount64) (uint64 word) = pg_popcount64_slow; int (*pg_popcount64) (uint64 word) = pg_popcount64_slow;
#endif /* USE_POPCNT_ASM */ #endif /* TRY_POPCNT_FAST */
#ifdef USE_POPCNT_ASM #ifdef TRY_POPCNT_FAST
/* /*
* Return true if CPUID indicates that the POPCNT instruction is available. * Return true if CPUID indicates that the POPCNT instruction is available.
@ -165,8 +175,8 @@ pg_popcount32_choose(uint32 word)
{ {
if (pg_popcount_available()) if (pg_popcount_available())
{ {
pg_popcount32 = pg_popcount32_asm; pg_popcount32 = pg_popcount32_fast;
pg_popcount64 = pg_popcount64_asm; pg_popcount64 = pg_popcount64_fast;
} }
else else
{ {
@ -182,8 +192,8 @@ pg_popcount64_choose(uint64 word)
{ {
if (pg_popcount_available()) if (pg_popcount_available())
{ {
pg_popcount32 = pg_popcount32_asm; pg_popcount32 = pg_popcount32_fast;
pg_popcount64 = pg_popcount64_asm; pg_popcount64 = pg_popcount64_fast;
} }
else else
{ {
@ -195,32 +205,40 @@ pg_popcount64_choose(uint64 word)
} }
/* /*
* pg_popcount32_asm * pg_popcount32_fast
* Return the number of 1 bits set in word * Return the number of 1 bits set in word
*/ */
static int static int
pg_popcount32_asm(uint32 word) pg_popcount32_fast(uint32 word)
{ {
#ifdef _MSC_VER
return __popcnt(word);
#else
uint32 res; uint32 res;
__asm__ __volatile__(" popcntl %1,%0\n":"=q"(res):"rm"(word):"cc"); __asm__ __volatile__(" popcntl %1,%0\n":"=q"(res):"rm"(word):"cc");
return (int) res; return (int) res;
#endif
} }
/* /*
* pg_popcount64_asm * pg_popcount64_fast
* Return the number of 1 bits set in word * Return the number of 1 bits set in word
*/ */
static int static int
pg_popcount64_asm(uint64 word) pg_popcount64_fast(uint64 word)
{ {
#ifdef _MSC_VER
return __popcnt64(word);
#else
uint64 res; uint64 res;
__asm__ __volatile__(" popcntq %1,%0\n":"=q"(res):"rm"(word):"cc"); __asm__ __volatile__(" popcntq %1,%0\n":"=q"(res):"rm"(word):"cc");
return (int) res; return (int) res;
#endif
} }
#endif /* USE_POPCNT_ASM */ #endif /* TRY_POPCNT_FAST */
/* /*