runtime cpuflag for Intel MULX, cleaned in tfm.c

This commit is contained in:
Takashi Kojo 2015-04-06 10:34:07 +09:00
parent 3a372aebe3
commit d8a6d9cffc
2 changed files with 82 additions and 83 deletions

View File

@ -33,6 +33,84 @@
/******************************************************************/ /******************************************************************/
/* fp_montgomery_reduce.c asm or generic */ /* fp_montgomery_reduce.c asm or generic */
/* Each platform needs to query info type 1 from cpuid to see if aesni is
* supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
*/
#if defined(HAVE_INTEL_MULX)
#ifndef _MSC_VER
#define cpuid(reg, leaf, sub)\
__asm__ __volatile__ ("cpuid":\
"=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\
"a" (leaf), "c"(sub));
#define XASM_LINK(f) asm(f)
#else
#include <intrin.h>
#define cpuid(a,b) __cpuid((int*)a,b)
#define XASM_LINK(f)
#endif /* _MSC_VER */
#define EAX 0
#define EBX 1
#define ECX 2
#define EDX 3
#define CPUID_AVX1 0x1
#define CPUID_AVX2 0x2
#define CPUID_RDRAND 0x4
#define CPUID_RDSEED 0x8
#define IS_INTEL_AVX1 (cpuid_flags&CPUID_AVX1)
#define IS_INTEL_AVX2 (cpuid_flags&CPUID_AVX2)
#define IS_INTEL_RDRAND (cpuid_flags&CPUID_RDRAND)
#define IS_INTEL_RDSEED (cpuid_flags&CPUID_RDSEED)
#define SET_FLAGS
static word32 cpuid_check = 0 ;
static word32 cpuid_flags = 0 ;
static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) {
int got_intel_cpu=0;
unsigned int reg[5];
reg[4] = '\0' ;
cpuid(reg, 0, 0);
if(memcmp((char *)&(reg[EBX]), "Genu", 4) == 0 &&
memcmp((char *)&(reg[EDX]), "ineI", 4) == 0 &&
memcmp((char *)&(reg[ECX]), "ntel", 4) == 0) {
got_intel_cpu = 1;
}
if (got_intel_cpu) {
cpuid(reg, leaf, sub);
return((reg[num]>>bit)&0x1) ;
}
return 0 ;
}
INLINE static int set_cpuid_flags(void) {
if(cpuid_check == 0) {
if(cpuid_flag(7, 0, EBX, 5)){ cpuid_flags |= CPUID_AVX2 ; }
cpuid_check = 1 ;
return 0 ;
}
return 1 ;
}
#define RETURN return
#define IF_HAVE_INTEL_MULX(func, ret) \
if(cpuid_check==0)set_cpuid_flags() ; \
if(IS_INTEL_AVX2){ func; ret ; }
#else
#define IF_HAVE_INTEL_MULX(func, ret)
#endif
#if defined(TFM_X86) && !defined(TFM_SSE2) #if defined(TFM_X86) && !defined(TFM_SSE2)
/* x86-32 code */ /* x86-32 code */

View File

@ -403,72 +403,6 @@ void fp_mul_2d(fp_int *a, int b, fp_int *c)
/* generic PxQ multiplier */ /* generic PxQ multiplier */
#if defined(HAVE_INTEL_MULX) #if defined(HAVE_INTEL_MULX)
/* Each platform needs to query info type 1 from cpuid to see if aesni is
* supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
*/
#ifndef _MSC_VER
#define cpuid(reg, leaf, sub)\
__asm__ __volatile__ ("cpuid":\
"=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\
"a" (leaf), "c"(sub));
#define XASM_LINK(f) asm(f)
#else
#include <intrin.h>
#define cpuid(a,b) __cpuid((int*)a,b)
#define XASM_LINK(f)
#endif /* _MSC_VER */
#define EAX 0
#define EBX 1
#define ECX 2
#define EDX 3
#define CPUID_AVX1 0x1
#define CPUID_AVX2 0x2
#define CPUID_RDRAND 0x4
#define CPUID_RDSEED 0x8
#define IS_INTEL_AVX1 (cpuid_flags&CPUID_AVX1)
#define IS_INTEL_AVX2 (cpuid_flags&CPUID_AVX2)
#define IS_INTEL_RDRAND (cpuid_flags&CPUID_RDRAND)
#define IS_INTEL_RDSEED (cpuid_flags&CPUID_RDSEED)
#define SET_FLAGS if(cpuid_check==0)set_cpuid_flags()
static word32 cpuid_check = 0 ;
static word32 cpuid_flags = 0 ;
static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) {
int got_intel_cpu=0;
unsigned int reg[5];
reg[4] = '\0' ;
cpuid(reg, 0, 0);
if(memcmp((char *)&(reg[EBX]), "Genu", 4) == 0 &&
memcmp((char *)&(reg[EDX]), "ineI", 4) == 0 &&
memcmp((char *)&(reg[ECX]), "ntel", 4) == 0) {
got_intel_cpu = 1;
}
if (got_intel_cpu) {
cpuid(reg, leaf, sub);
return((reg[num]>>bit)&0x1) ;
}
return 0 ;
}
INLINE static int set_cpuid_flags(void) {
if(cpuid_check == 0) {
if(cpuid_flag(7, 0, EBX, 5)){ cpuid_flags |= CPUID_AVX2 ; }
cpuid_check = 1 ;
return 0 ;
}
return 1 ;
}
INLINE static void fp_mul_comba_mulx(fp_int *A, fp_int *B, fp_int *C) INLINE static void fp_mul_comba_mulx(fp_int *A, fp_int *B, fp_int *C)
{ {
@ -496,7 +430,6 @@ INLINE static void fp_mul_comba_mulx(fp_int *A, fp_int *B, fp_int *C)
fp_clamp(dst); fp_clamp(dst);
fp_copy(dst, C); fp_copy(dst, C);
} }
#endif #endif
void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C) void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
@ -505,13 +438,7 @@ void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
fp_digit c0, c1, c2, *tmpx, *tmpy; fp_digit c0, c1, c2, *tmpx, *tmpy;
fp_int tmp, *dst; fp_int tmp, *dst;
#if defined(HAVE_INTEL_MULX) IF_HAVE_INTEL_MULX(fp_mul_comba_mulx(A, B, C), return) ;
SET_FLAGS ;
if(IS_INTEL_AVX2) {
fp_mul_comba_mulx(A, B, C) ;
return ;
}
#endif
COMBA_START; COMBA_START;
COMBA_CLEAR; COMBA_CLEAR;
@ -1725,15 +1652,9 @@ void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
{ {
fp_digit c[FP_SIZE], *_c, *tmpm, mu = 0; fp_digit c[FP_SIZE], *_c, *tmpm, mu = 0;
int oldused, x, y, pa; int oldused, x, y, pa;
#ifdef HAVE_INTEL_MULX IF_HAVE_INTEL_MULX(fp_montgomery_reduce_mulx(a, m, mp), return) ;
SET_FLAGS ;
if(IS_INTEL_AVX2) {
fp_montgomery_reduce_mulx(a, m, mp) ;
return ;
}
#endif
/* bail if too large */ /* bail if too large */
if (m->used > (FP_SIZE/2)) { if (m->used > (FP_SIZE/2)) {
(void)mu; /* shut up compiler */ (void)mu; /* shut up compiler */