add cycles per byte to gcc + x86_64 benchmarks

This commit is contained in:
toddouska 2015-01-26 16:33:30 -08:00
parent 466d8a970c
commit a682d53f67

View File

@ -70,6 +70,20 @@
#define fopen wolfSSL_fopen #define fopen wolfSSL_fopen
#endif #endif
#if defined(__GNUC__) && defined(__x86_64__) && !defined(NO_ASM)
#define HAVE_GET_CYCLES
static INLINE word64 get_intel_cycles();
static word64 total_cycles;
#define BEGIN_INTEL_CYCLES total_cycles = get_intel_cycles();
#define END_INTEL_CYCLES total_cycles = get_intel_cycles() - total_cycles;
#define SHOW_INTEL_CYCLES printf(" Cycles per byte = %5.2f", \
(float)total_cycles / (numBlocks*sizeof(plain)));
#else
#define BEGIN_INTEL_CYCLES
#define END_INTEL_CYCLES
#define SHOW_INTEL_CYCLES
#endif
#if defined(USE_CERT_BUFFERS_1024) || defined(USE_CERT_BUFFERS_2048) #if defined(USE_CERT_BUFFERS_1024) || defined(USE_CERT_BUFFERS_2048)
/* include test cert and key buffers for use with NO_FILESYSTEM */ /* include test cert and key buffers for use with NO_FILESYSTEM */
#if defined(WOLFSSL_MDK_ARM) #if defined(WOLFSSL_MDK_ARM)
@ -282,7 +296,7 @@ enum BenchmarkBounds {
static const char blockType[] = "kB"; /* used in printf output */ static const char blockType[] = "kB"; /* used in printf output */
#else #else
enum BenchmarkBounds { enum BenchmarkBounds {
numBlocks = 5, /* how many megs to test (en/de)cryption */ numBlocks = 50, /* how many megs to test (en/de)cryption */
ntimes = 100, ntimes = 100,
genTimes = 100, genTimes = 100,
agreeTimes = 100 agreeTimes = 100
@ -344,10 +358,12 @@ void bench_aes(int show)
return; return;
} }
start = current_time(1); start = current_time(1);
BEGIN_INTEL_CYCLES
for(i = 0; i < numBlocks; i++) for(i = 0; i < numBlocks; i++)
wc_AesCbcEncrypt(&enc, plain, cipher, sizeof(plain)); wc_AesCbcEncrypt(&enc, plain, cipher, sizeof(plain));
END_INTEL_CYCLES
total = current_time(0) - start; total = current_time(0) - start;
persec = 1 / total * numBlocks; persec = 1 / total * numBlocks;
@ -356,9 +372,12 @@ void bench_aes(int show)
persec = persec / 1024; persec = persec / 1024;
#endif #endif
if (show) if (show) {
printf("AES %d %s took %5.3f seconds, %7.3f MB/s\n", numBlocks, printf("AES %d %s took %5.3f seconds, %7.3f MB/s", numBlocks,
blockType, total, persec); blockType, total, persec);
SHOW_INTEL_CYCLES
printf("\n");
}
#ifdef HAVE_CAVIUM #ifdef HAVE_CAVIUM
wc_AesFreeCavium(&enc); wc_AesFreeCavium(&enc);
#endif #endif
@ -381,11 +400,13 @@ void bench_aesgcm(void)
wc_AesGcmSetKey(&enc, key, 16); wc_AesGcmSetKey(&enc, key, 16);
start = current_time(1); start = current_time(1);
BEGIN_INTEL_CYCLES
for(i = 0; i < numBlocks; i++) for(i = 0; i < numBlocks; i++)
wc_AesGcmEncrypt(&enc, cipher, plain, sizeof(plain), iv, 12, wc_AesGcmEncrypt(&enc, cipher, plain, sizeof(plain), iv, 12,
tag, 16, additional, 13); tag, 16, additional, 13);
END_INTEL_CYCLES
total = current_time(0) - start; total = current_time(0) - start;
persec = 1 / total * numBlocks; persec = 1 / total * numBlocks;
@ -394,8 +415,10 @@ void bench_aesgcm(void)
persec = persec / 1024; persec = persec / 1024;
#endif #endif
printf("AES-GCM %d %s took %5.3f seconds, %7.3f MB/s\n", numBlocks, printf("AES-GCM %d %s took %5.3f seconds, %7.3f MB/s", numBlocks,
blockType, total, persec); blockType, total, persec);
SHOW_INTEL_CYCLES
printf("\n");
} }
#endif #endif
@ -408,10 +431,12 @@ void bench_aesctr(void)
wc_AesSetKeyDirect(&enc, key, AES_BLOCK_SIZE, iv, AES_ENCRYPTION); wc_AesSetKeyDirect(&enc, key, AES_BLOCK_SIZE, iv, AES_ENCRYPTION);
start = current_time(1); start = current_time(1);
BEGIN_INTEL_CYCLES
for(i = 0; i < numBlocks; i++) for(i = 0; i < numBlocks; i++)
wc_AesCtrEncrypt(&enc, plain, cipher, sizeof(plain)); wc_AesCtrEncrypt(&enc, plain, cipher, sizeof(plain));
END_INTEL_CYCLES
total = current_time(0) - start; total = current_time(0) - start;
persec = 1 / total * numBlocks; persec = 1 / total * numBlocks;
@ -420,8 +445,10 @@ void bench_aesctr(void)
persec = persec / 1024; persec = persec / 1024;
#endif #endif
printf("AES-CTR %d %s took %5.3f seconds, %7.3f MB/s\n", numBlocks, printf("AES-CTR %d %s took %5.3f seconds, %7.3f MB/s", numBlocks,
blockType, total, persec); blockType, total, persec);
SHOW_INTEL_CYCLES
printf("\n");
} }
#endif #endif
@ -436,11 +463,13 @@ void bench_aesccm(void)
wc_AesCcmSetKey(&enc, key, 16); wc_AesCcmSetKey(&enc, key, 16);
start = current_time(1); start = current_time(1);
BEGIN_INTEL_CYCLES
for(i = 0; i < numBlocks; i++) for(i = 0; i < numBlocks; i++)
wc_AesCcmEncrypt(&enc, cipher, plain, sizeof(plain), iv, 12, wc_AesCcmEncrypt(&enc, cipher, plain, sizeof(plain), iv, 12,
tag, 16, additional, 13); tag, 16, additional, 13);
END_INTEL_CYCLES
total = current_time(0) - start; total = current_time(0) - start;
persec = 1 / total * numBlocks; persec = 1 / total * numBlocks;
@ -449,8 +478,10 @@ void bench_aesccm(void)
persec = persec / 1024; persec = persec / 1024;
#endif #endif
printf("AES-CCM %d %s took %5.3f seconds, %7.3f MB/s\n", numBlocks, printf("AES-CCM %d %s took %5.3f seconds, %7.3f MB/s", numBlocks,
blockType, total, persec); blockType, total, persec);
SHOW_INTEL_CYCLES
printf("\n");
} }
#endif #endif
@ -471,11 +502,13 @@ void bench_poly1305()
return; return;
} }
start = current_time(1); start = current_time(1);
BEGIN_INTEL_CYCLES
for(i = 0; i < numBlocks; i++) for(i = 0; i < numBlocks; i++)
wc_Poly1305Update(&enc, plain, sizeof(plain)); wc_Poly1305Update(&enc, plain, sizeof(plain));
wc_Poly1305Final(&enc, mac); wc_Poly1305Final(&enc, mac);
END_INTEL_CYCLES
total = current_time(0) - start; total = current_time(0) - start;
persec = 1 / total * numBlocks; persec = 1 / total * numBlocks;
@ -484,8 +517,10 @@ void bench_poly1305()
persec = persec / 1024; persec = persec / 1024;
#endif #endif
printf("POLY1305 %d %s took %5.3f seconds, %7.3f MB/s\n", numBlocks, printf("POLY1305 %d %s took %5.3f seconds, %7.3f MB/s", numBlocks,
blockType, total, persec); blockType, total, persec);
SHOW_INTEL_CYCLES
printf("\n");
} }
#endif /* HAVE_POLY1305 */ #endif /* HAVE_POLY1305 */
@ -503,10 +538,12 @@ void bench_camellia(void)
return; return;
} }
start = current_time(1); start = current_time(1);
BEGIN_INTEL_CYCLES
for(i = 0; i < numBlocks; i++) for(i = 0; i < numBlocks; i++)
wc_CamelliaCbcEncrypt(&cam, plain, cipher, sizeof(plain)); wc_CamelliaCbcEncrypt(&cam, plain, cipher, sizeof(plain));
END_INTEL_CYCLES
total = current_time(0) - start; total = current_time(0) - start;
persec = 1 / total * numBlocks; persec = 1 / total * numBlocks;
@ -515,8 +552,10 @@ void bench_camellia(void)
persec = persec / 1024; persec = persec / 1024;
#endif #endif
printf("Camellia %d %s took %5.3f seconds, %7.3f MB/s\n", numBlocks, printf("Camellia %d %s took %5.3f seconds, %7.3f MB/s", numBlocks,
blockType, total, persec); blockType, total, persec);
SHOW_INTEL_CYCLES
printf("\n");
} }
#endif #endif
@ -538,10 +577,12 @@ void bench_des(void)
return; return;
} }
start = current_time(1); start = current_time(1);
BEGIN_INTEL_CYCLES
for(i = 0; i < numBlocks; i++) for(i = 0; i < numBlocks; i++)
wc_Des3_CbcEncrypt(&enc, plain, cipher, sizeof(plain)); wc_Des3_CbcEncrypt(&enc, plain, cipher, sizeof(plain));
END_INTEL_CYCLES
total = current_time(0) - start; total = current_time(0) - start;
persec = 1 / total * numBlocks; persec = 1 / total * numBlocks;
@ -550,8 +591,10 @@ void bench_des(void)
persec = persec / 1024; persec = persec / 1024;
#endif #endif
printf("3DES %d %s took %5.3f seconds, %7.3f MB/s\n", numBlocks, printf("3DES %d %s took %5.3f seconds, %7.3f MB/s", numBlocks,
blockType, total, persec); blockType, total, persec);
SHOW_INTEL_CYCLES
printf("\n");
#ifdef HAVE_CAVIUM #ifdef HAVE_CAVIUM
wc_Des3_FreeCavium(&enc); wc_Des3_FreeCavium(&enc);
#endif #endif
@ -573,10 +616,12 @@ void bench_arc4(void)
wc_Arc4SetKey(&enc, key, 16); wc_Arc4SetKey(&enc, key, 16);
start = current_time(1); start = current_time(1);
BEGIN_INTEL_CYCLES
for(i = 0; i < numBlocks; i++) for(i = 0; i < numBlocks; i++)
wc_Arc4Process(&enc, cipher, plain, sizeof(plain)); wc_Arc4Process(&enc, cipher, plain, sizeof(plain));
END_INTEL_CYCLES
total = current_time(0) - start; total = current_time(0) - start;
persec = 1 / total * numBlocks; persec = 1 / total * numBlocks;
#ifdef BENCH_EMBEDDED #ifdef BENCH_EMBEDDED
@ -584,8 +629,10 @@ void bench_arc4(void)
persec = persec / 1024; persec = persec / 1024;
#endif #endif
printf("ARC4 %d %s took %5.3f seconds, %7.3f MB/s\n", numBlocks, printf("ARC4 %d %s took %5.3f seconds, %7.3f MB/s", numBlocks,
blockType, total, persec); blockType, total, persec);
SHOW_INTEL_CYCLES
printf("\n");
#ifdef HAVE_CAVIUM #ifdef HAVE_CAVIUM
wc_Arc4FreeCavium(&enc); wc_Arc4FreeCavium(&enc);
#endif #endif
@ -602,10 +649,12 @@ void bench_hc128(void)
wc_Hc128_SetKey(&enc, key, iv); wc_Hc128_SetKey(&enc, key, iv);
start = current_time(1); start = current_time(1);
BEGIN_INTEL_CYCLES
for(i = 0; i < numBlocks; i++) for(i = 0; i < numBlocks; i++)
wc_Hc128_Process(&enc, cipher, plain, sizeof(plain)); wc_Hc128_Process(&enc, cipher, plain, sizeof(plain));
END_INTEL_CYCLES
total = current_time(0) - start; total = current_time(0) - start;
persec = 1 / total * numBlocks; persec = 1 / total * numBlocks;
#ifdef BENCH_EMBEDDED #ifdef BENCH_EMBEDDED
@ -613,8 +662,10 @@ void bench_hc128(void)
persec = persec / 1024; persec = persec / 1024;
#endif #endif
printf("HC128 %d %s took %5.3f seconds, %7.3f MB/s\n", numBlocks, printf("HC128 %d %s took %5.3f seconds, %7.3f MB/s", numBlocks,
blockType, total, persec); blockType, total, persec);
SHOW_INTEL_CYCLES
printf("\n");
} }
#endif /* HAVE_HC128 */ #endif /* HAVE_HC128 */
@ -628,10 +679,12 @@ void bench_rabbit(void)
wc_RabbitSetKey(&enc, key, iv); wc_RabbitSetKey(&enc, key, iv);
start = current_time(1); start = current_time(1);
BEGIN_INTEL_CYCLES
for(i = 0; i < numBlocks; i++) for(i = 0; i < numBlocks; i++)
wc_RabbitProcess(&enc, cipher, plain, sizeof(plain)); wc_RabbitProcess(&enc, cipher, plain, sizeof(plain));
END_INTEL_CYCLES
total = current_time(0) - start; total = current_time(0) - start;
persec = 1 / total * numBlocks; persec = 1 / total * numBlocks;
#ifdef BENCH_EMBEDDED #ifdef BENCH_EMBEDDED
@ -639,8 +692,10 @@ void bench_rabbit(void)
persec = persec / 1024; persec = persec / 1024;
#endif #endif
printf("RABBIT %d %s took %5.3f seconds, %7.3f MB/s\n", numBlocks, printf("RABBIT %d %s took %5.3f seconds, %7.3f MB/s", numBlocks,
blockType, total, persec); blockType, total, persec);
SHOW_INTEL_CYCLES
printf("\n");
} }
#endif /* NO_RABBIT */ #endif /* NO_RABBIT */
@ -654,11 +709,14 @@ void bench_chacha(void)
wc_Chacha_SetKey(&enc, key, 16); wc_Chacha_SetKey(&enc, key, 16);
start = current_time(1); start = current_time(1);
BEGIN_INTEL_CYCLES
for (i = 0; i < numBlocks; i++) { for (i = 0; i < numBlocks; i++) {
wc_Chacha_SetIV(&enc, iv, 0); wc_Chacha_SetIV(&enc, iv, 0);
wc_Chacha_Process(&enc, cipher, plain, sizeof(plain)); wc_Chacha_Process(&enc, cipher, plain, sizeof(plain));
} }
END_INTEL_CYCLES
total = current_time(0) - start; total = current_time(0) - start;
persec = 1 / total * numBlocks; persec = 1 / total * numBlocks;
#ifdef BENCH_EMBEDDED #ifdef BENCH_EMBEDDED
@ -666,7 +724,9 @@ void bench_chacha(void)
persec = persec / 1024; persec = persec / 1024;
#endif #endif
printf("CHACHA %d %s took %5.3f seconds, %7.3f MB/s\n", numBlocks, blockType, total, persec); printf("CHACHA %d %s took %5.3f seconds, %7.3f MB/s", numBlocks, blockType, total, persec);
SHOW_INTEL_CYCLES
printf("\n");
} }
#endif /* HAVE_CHACHA*/ #endif /* HAVE_CHACHA*/
@ -682,12 +742,14 @@ void bench_md5(void)
wc_InitMd5(&hash); wc_InitMd5(&hash);
start = current_time(1); start = current_time(1);
BEGIN_INTEL_CYCLES
for(i = 0; i < numBlocks; i++) for(i = 0; i < numBlocks; i++)
wc_Md5Update(&hash, plain, sizeof(plain)); wc_Md5Update(&hash, plain, sizeof(plain));
wc_Md5Final(&hash, digest); wc_Md5Final(&hash, digest);
END_INTEL_CYCLES
total = current_time(0) - start; total = current_time(0) - start;
persec = 1 / total * numBlocks; persec = 1 / total * numBlocks;
#ifdef BENCH_EMBEDDED #ifdef BENCH_EMBEDDED
@ -695,8 +757,10 @@ void bench_md5(void)
persec = persec / 1024; persec = persec / 1024;
#endif #endif
printf("MD5 %d %s took %5.3f seconds, %7.3f MB/s\n", numBlocks, printf("MD5 %d %s took %5.3f seconds, %7.3f MB/s", numBlocks,
blockType, total, persec); blockType, total, persec);
SHOW_INTEL_CYCLES
printf("\n");
} }
#endif /* NO_MD5 */ #endif /* NO_MD5 */
@ -715,12 +779,14 @@ void bench_sha(void)
return; return;
} }
start = current_time(1); start = current_time(1);
BEGIN_INTEL_CYCLES
for(i = 0; i < numBlocks; i++) for(i = 0; i < numBlocks; i++)
wc_ShaUpdate(&hash, plain, sizeof(plain)); wc_ShaUpdate(&hash, plain, sizeof(plain));
wc_ShaFinal(&hash, digest); wc_ShaFinal(&hash, digest);
END_INTEL_CYCLES
total = current_time(0) - start; total = current_time(0) - start;
persec = 1 / total * numBlocks; persec = 1 / total * numBlocks;
#ifdef BENCH_EMBEDDED #ifdef BENCH_EMBEDDED
@ -728,8 +794,10 @@ void bench_sha(void)
persec = persec / 1024; persec = persec / 1024;
#endif #endif
printf("SHA %d %s took %5.3f seconds, %7.3f MB/s\n", numBlocks, printf("SHA %d %s took %5.3f seconds, %7.3f MB/s", numBlocks,
blockType, total, persec); blockType, total, persec);
SHOW_INTEL_CYCLES
printf("\n");
} }
#endif /* NO_SHA */ #endif /* NO_SHA */
@ -741,14 +809,15 @@ void bench_sha256(void)
byte digest[SHA256_DIGEST_SIZE]; byte digest[SHA256_DIGEST_SIZE];
double start, total, persec; double start, total, persec;
int i, ret; int i, ret;
ret = wc_InitSha256(&hash); ret = wc_InitSha256(&hash);
if (ret != 0) { if (ret != 0) {
printf("InitSha256 failed, ret = %d\n", ret); printf("InitSha256 failed, ret = %d\n", ret);
return; return;
} }
start = current_time(1); start = current_time(1);
BEGIN_INTEL_CYCLES
for(i = 0; i < numBlocks; i++) { for(i = 0; i < numBlocks; i++) {
ret = wc_Sha256Update(&hash, plain, sizeof(plain)); ret = wc_Sha256Update(&hash, plain, sizeof(plain));
if (ret != 0) { if (ret != 0) {
@ -756,13 +825,14 @@ void bench_sha256(void)
return; return;
} }
} }
ret = wc_Sha256Final(&hash, digest); ret = wc_Sha256Final(&hash, digest);
if (ret != 0) { if (ret != 0) {
printf("Sha256Final failed, ret = %d\n", ret); printf("Sha256Final failed, ret = %d\n", ret);
return; return;
} }
END_INTEL_CYCLES
total = current_time(0) - start; total = current_time(0) - start;
persec = 1 / total * numBlocks; persec = 1 / total * numBlocks;
#ifdef BENCH_EMBEDDED #ifdef BENCH_EMBEDDED
@ -770,8 +840,10 @@ void bench_sha256(void)
persec = persec / 1024; persec = persec / 1024;
#endif #endif
printf("SHA-256 %d %s took %5.3f seconds, %7.3f MB/s\n", numBlocks, printf("SHA-256 %d %s took %5.3f seconds, %7.3f MB/s", numBlocks,
blockType, total, persec); blockType, total, persec);
SHOW_INTEL_CYCLES
printf("\n");
} }
#endif #endif
@ -789,6 +861,7 @@ void bench_sha384(void)
return; return;
} }
start = current_time(1); start = current_time(1);
BEGIN_INTEL_CYCLES
for(i = 0; i < numBlocks; i++) { for(i = 0; i < numBlocks; i++) {
ret = wc_Sha384Update(&hash, plain, sizeof(plain)); ret = wc_Sha384Update(&hash, plain, sizeof(plain));
@ -804,6 +877,7 @@ void bench_sha384(void)
return; return;
} }
END_INTEL_CYCLES
total = current_time(0) - start; total = current_time(0) - start;
persec = 1 / total * numBlocks; persec = 1 / total * numBlocks;
#ifdef BENCH_EMBEDDED #ifdef BENCH_EMBEDDED
@ -811,8 +885,10 @@ void bench_sha384(void)
persec = persec / 1024; persec = persec / 1024;
#endif #endif
printf("SHA-384 %d %s took %5.3f seconds, %7.3f MB/s\n", numBlocks, printf("SHA-384 %d %s took %5.3f seconds, %7.3f MB/s", numBlocks,
blockType, total, persec); blockType, total, persec);
SHOW_INTEL_CYCLES
printf("\n");
} }
#endif #endif
@ -830,6 +906,7 @@ void bench_sha512(void)
return; return;
} }
start = current_time(1); start = current_time(1);
BEGIN_INTEL_CYCLES
for(i = 0; i < numBlocks; i++) { for(i = 0; i < numBlocks; i++) {
ret = wc_Sha512Update(&hash, plain, sizeof(plain)); ret = wc_Sha512Update(&hash, plain, sizeof(plain));
@ -845,6 +922,7 @@ void bench_sha512(void)
return; return;
} }
END_INTEL_CYCLES
total = current_time(0) - start; total = current_time(0) - start;
persec = 1 / total * numBlocks; persec = 1 / total * numBlocks;
#ifdef BENCH_EMBEDDED #ifdef BENCH_EMBEDDED
@ -852,8 +930,10 @@ void bench_sha512(void)
persec = persec / 1024; persec = persec / 1024;
#endif #endif
printf("SHA-512 %d %s took %5.3f seconds, %7.3f MB/s\n", numBlocks, printf("SHA-512 %d %s took %5.3f seconds, %7.3f MB/s", numBlocks,
blockType, total, persec); blockType, total, persec);
SHOW_INTEL_CYCLES
printf("\n");
} }
#endif #endif
@ -867,12 +947,14 @@ void bench_ripemd(void)
wc_InitRipeMd(&hash); wc_InitRipeMd(&hash);
start = current_time(1); start = current_time(1);
BEGIN_INTEL_CYCLES
for(i = 0; i < numBlocks; i++) for(i = 0; i < numBlocks; i++)
wc_RipeMdUpdate(&hash, plain, sizeof(plain)); wc_RipeMdUpdate(&hash, plain, sizeof(plain));
wc_RipeMdFinal(&hash, digest); wc_RipeMdFinal(&hash, digest);
END_INTEL_CYCLES
total = current_time(0) - start; total = current_time(0) - start;
persec = 1 / total * numBlocks; persec = 1 / total * numBlocks;
#ifdef BENCH_EMBEDDED #ifdef BENCH_EMBEDDED
@ -880,8 +962,10 @@ void bench_ripemd(void)
persec = persec / 1024; persec = persec / 1024;
#endif #endif
printf("RIPEMD %d %s took %5.3f seconds, %7.3f MB/s\n", numBlocks, printf("RIPEMD %d %s took %5.3f seconds, %7.3f MB/s", numBlocks,
blockType, total, persec); blockType, total, persec);
SHOW_INTEL_CYCLES
printf("\n");
} }
#endif #endif
@ -900,6 +984,7 @@ void bench_blake2(void)
return; return;
} }
start = current_time(1); start = current_time(1);
BEGIN_INTEL_CYCLES
for(i = 0; i < numBlocks; i++) { for(i = 0; i < numBlocks; i++) {
ret = wc_Blake2bUpdate(&b2b, plain, sizeof(plain)); ret = wc_Blake2bUpdate(&b2b, plain, sizeof(plain));
@ -915,6 +1000,7 @@ void bench_blake2(void)
return; return;
} }
END_INTEL_CYCLES
total = current_time(0) - start; total = current_time(0) - start;
persec = 1 / total * numBlocks; persec = 1 / total * numBlocks;
#ifdef BENCH_EMBEDDED #ifdef BENCH_EMBEDDED
@ -922,8 +1008,10 @@ void bench_blake2(void)
persec = persec / 1024; persec = persec / 1024;
#endif #endif
printf("BLAKE2b %d %s took %5.3f seconds, %7.3f MB/s\n", numBlocks, printf("BLAKE2b %d %s took %5.3f seconds, %7.3f MB/s", numBlocks,
blockType, total, persec); blockType, total, persec);
SHOW_INTEL_CYCLES
printf("\n");
} }
#endif #endif
@ -1591,3 +1679,19 @@ void bench_eccKeyAgree(void)
} }
#endif /* _WIN32 */ #endif /* _WIN32 */
#ifdef HAVE_GET_CYCLES
static INLINE word64 get_intel_cycles()
{
unsigned int lo_c, hi_c;
__asm__ __volatile__ (
"cpuid\n\t"
"rdtsc"
: "=a"(lo_c), "=d"(hi_c) /* out */
: "a"(0) /* in */
: "%ebx", "%ecx"); /* clobber */
return ((word64)lo_c) | (((word64)hi_c) << 32);
}
#endif /* HAVE_GET_CYCLES */