From bf7f7f0f603d53ebc47cb61c3392a07af4046e66 Mon Sep 17 00:00:00 2001 From: Bernhard Miklautz Date: Wed, 27 Feb 2013 10:17:24 +0100 Subject: [PATCH 01/13] winpr/sysinfo: added IsProcessorFeaturePresent and IsProcessorFeaturePresentEx These functions can be used to check if an processor feature is supported. IsProcessorFeaturePresentEx is a extended version which is not available in the windows API and allows to query additional features. Currently it works on the following platforms: - i386/amd64 when compiling with gcc - ARM on linux --- libfreerdp/primitives/primitives.c | 6 +- winpr/include/winpr/sysinfo.h | 58 +++- winpr/libwinpr/sysinfo/sysinfo.c | 311 +++++++++++++++++- winpr/libwinpr/sysinfo/test/CMakeLists.txt | 4 +- winpr/libwinpr/sysinfo/test/TestCPUFeatures.c | 45 +++ 5 files changed, 415 insertions(+), 9 deletions(-) create mode 100644 winpr/libwinpr/sysinfo/test/TestCPUFeatures.c diff --git a/libfreerdp/primitives/primitives.c b/libfreerdp/primitives/primitives.c index 99c71e37f..245a6ce3b 100644 --- a/libfreerdp/primitives/primitives.c +++ b/libfreerdp/primitives/primitives.c @@ -247,7 +247,7 @@ static const flagpair_t x86_flags[] = { { PRIM_X86_MMX_AVAILABLE, "MMX" }, { PRIM_X86_3DNOW_AVAILABLE, "3DNow" }, - { PRIM_X86_3DNOW_PREFETCH_AVAILABLE, "3DNow-PF" }, + { PRIM_X86_3DNOW_PREFETCH_AVAILABLE, "3DNow-PF" }, { PRIM_X86_SSE_AVAILABLE, "SSE" }, { PRIM_X86_SSE2_AVAILABLE, "SSE2" }, { PRIM_X86_SSE3_AVAILABLE, "SSE3" }, @@ -256,7 +256,7 @@ static const flagpair_t x86_flags[] = { PRIM_X86_SSE42_AVAILABLE, "SSE4.2" }, { PRIM_X86_AVX_AVAILABLE, "AVX" }, { PRIM_X86_FMA_AVAILABLE, "FMA" }, - { PRIM_X86_AVX_AES_AVAILABLE, "AVX-AES" }, + { PRIM_X86_AVX_AES_AVAILABLE, "AVX-AES" }, { PRIM_X86_AVX2_AVAILABLE, "AVX2" }, }; @@ -268,7 +268,7 @@ static const flagpair_t arm_flags[] = { PRIM_ARM_VFP4_AVAILABLE, "VFP4" }, { PRIM_ARM_FPA_AVAILABLE, "FPA" }, { PRIM_ARM_FPE_AVAILABLE, "FPE" }, - { PRIM_ARM_IWMMXT_AVAILABLE, "IWMMXT" }, + { PRIM_ARM_IWMMXT_AVAILABLE, "IWMMXT" }, { PRIM_ARM_NEON_AVAILABLE, "NEON" }, }; diff --git a/winpr/include/winpr/sysinfo.h b/winpr/include/winpr/sysinfo.h index 2b37ab89d..001232ab0 100644 --- a/winpr/include/winpr/sysinfo.h +++ b/winpr/include/winpr/sysinfo.h @@ -69,14 +69,13 @@ #define PF_FLOATING_POINT_PRECISION_ERRATA 0 #define PF_FLOATING_POINT_EMULATED 1 #define PF_COMPARE_EXCHANGE_DOUBLE 2 -#define PF_MMX_INSTRUCTIONS_AVAILABLE 3 +#define PF_MMX_INSTRUCTIONS_AVAILABLE 3 #define PF_PPC_MOVEMEM_64BIT_OK 4 -#define PF_ALPHA_BYTE_INSTRUCTIONS 5 -#define PF_XMMI_INSTRUCTIONS_AVAILABLE 6 +#define PF_XMMI_INSTRUCTIONS_AVAILABLE 6 //sse #define PF_3DNOW_INSTRUCTIONS_AVAILABLE 7 #define PF_RDTSC_INSTRUCTION_AVAILABLE 8 #define PF_PAE_ENABLED 9 -#define PF_XMMI64_INSTRUCTIONS_AVAILABLE 10 +#define PF_XMMI64_INSTRUCTIONS_AVAILABLE 10 //sse2 #define PF_SSE_DAZ_MODE_AVAILABLE 11 #define PF_NX_ENABLED 12 #define PF_SSE3_INSTRUCTIONS_AVAILABLE 13 @@ -95,6 +94,54 @@ #define PF_ARM_EXTERNAL_CACHE_AVAILABLE 26 #define PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE 27 +#define PF_ARM_V4 0x80000001 +#define PF_ARM_V5 0x80000002 +#define PF_ARM_V6 0x80000003 +#define PF_ARM_V7 0x80000004 +#define PF_ARM_THUMB 0x80000005 +#define PF_ARM_JAZELLE 0x80000006 +#define PF_ARM_DSP 0x80000007 +#define PF_ARM_MOVE_CP 0x80000008 +#define PF_ARM_VFP10 0x80000009 +#define PF_ARM_MPU 0x8000000A +#define PF_ARM_WRITE_BUFFER 0x8000000B +#define PF_ARM_MBX 0x8000000C +#define PF_ARM_L2CACHE 0x8000000D +#define PF_ARM_PHYSICALLY_TAGGED_CACHE 0x8000000E +#define PF_ARM_VFP_SINGLE_PRECISION 0x8000000F +#define PF_ARM_VFP_DOUBLE_PRECISION 0x80000010 +#define PF_ARM_ITCM 0x80000011 +#define PF_ARM_DTCM 0x80000012 +#define PF_ARM_UNIFIED_CACHE 0x80000013 +#define PF_ARM_WRITE_BACK_CACHE 0x80000014 +#define PF_ARM_CACHE_CAN_BE_LOCKED_DOWN 0x80000015 +#define PF_ARM_L2CACHE_MEMORY_MAPPED 0x80000016 +#define PF_ARM_L2CACHE_COPROC 0x80000017 +#define PF_ARM_THUMB2 0x80000018 +#define PF_ARM_T2EE 0x80000019 +#define PF_ARM_VFP3 0x8000001A +#define PF_ARM_NEON 0x8000001B +#define PF_ARM_UNALIGNED_ACCESS 0x8000001C + +#define PF_ARM_INTEL_XSCALE 0x80010001 +#define PF_ARM_INTEL_PMU 0x80010002 +#define PF_ARM_INTEL_WMMX 0x80010003 + +// extended flags +#define PF_EX_3DNOW_PREFETCH 1 +#define PF_EX_SSSE3 2 +#define PF_EX_SSE41 3 +#define PF_EX_SSE42 4 +#define PF_EX_AVX 5 +#define PF_EX_FMA 6 +#define PF_EX_AVX_AES 7 +#define PF_EX_AVX2 8 +#define PF_EX_ARM_VFP1 9 +#define PF_EX_ARM_VFP3D16 10 +#define PF_EX_ARM_VFP4 11 +#define PF_EX_ARM_IDIVA 12 +#define PF_EX_ARM_IDIVT 13 + typedef struct _SYSTEM_INFO { union @@ -243,7 +290,10 @@ WINPR_API VOID GetSystemTimeAsFileTime(LPFILETIME lpSystemTimeAsFileTime); WINPR_API DWORD GetTickCount(void); +WINPR_API BOOL IsProcessorFeaturePresent(DWORD ProcessorFeature); #endif +WINPR_API BOOL IsProcessorFeaturePresentEx(DWORD ProcessorFeature); + #endif /* WINPR_SYSINFO_H */ diff --git a/winpr/libwinpr/sysinfo/sysinfo.c b/winpr/libwinpr/sysinfo/sysinfo.c index 6f574fe7e..0dddf6aca 100644 --- a/winpr/libwinpr/sysinfo/sysinfo.c +++ b/winpr/libwinpr/sysinfo/sysinfo.c @@ -3,6 +3,7 @@ * System Information * * Copyright 2012 Marc-Andre Moreau + * Copyright 2013 Bernhard Miklautz * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +23,13 @@ #endif #include +#include + +#if defined(__linux__) && defined(__GNUC__) +#include +#include +#include +#endif /** * api-ms-win-core-sysinfo-l1-1-1.dll: @@ -109,7 +117,7 @@ DWORD GetNumberOfProcessors() { DWORD numCPUs = 1; - /* TODO: Android and iOS */ + /* TODO: iOS */ #if defined(__linux__) || defined(__sun) || defined(_AIX) numCPUs = (DWORD) sysconf(_SC_NPROCESSORS_ONLN); @@ -331,4 +339,305 @@ DWORD GetTickCount(void) return ticks; } +/* If x86 and gcc*/ +#ifdef _M_IX86_AMD64 +#ifdef __GNUC__ + +#ifdef __AVX__ +#define xgetbv(_func_, _lo_, _hi_) \ + __asm__ __volatile__ ("xgetbv" : "=a" (_lo_), "=d" (_hi_) : "c" (_func_)) #endif + +#define D_BIT_MMX (1<<23) +#define D_BIT_SSE (1<<25) +#define D_BIT_SSE2 (1<<26) +#define D_BIT_3DN (1<<30) +#define C_BIT_SSE3 (1<<0) +#define C_BIT_3DNP (1<<8) +#define C_BIT_SSSE3 (1<<9) +#define C_BIT_SSE41 (1<<19) +#define C_BIT_SSE42 (1<<20) +#define C_BIT_XGETBV (1<<27) +#define C_BIT_AVX (1<<28) +#define C_BITS_AVX (C_BIT_XGETBV|C_BIT_AVX) +#define E_BIT_XMM (1<<1) +#define E_BIT_YMM (1<<2) +#define E_BITS_AVX (E_BIT_XMM|E_BIT_YMM) +#define C_BIT_FMA (1<<11) +#define C_BIT_AVX_AES (1<<24) + +static void cpuid( + unsigned info, + unsigned *eax, + unsigned *ebx, + unsigned *ecx, + unsigned *edx) +{ + *eax = *ebx = *ecx = *edx = 0; + + __asm volatile + ( + /* The EBX (or RBX register on x86_64) is used for the PIC base address + * and must not be corrupted by our inline assembly. + */ +#ifdef _M_IX86 + "mov %%ebx, %%esi;" + "cpuid;" + "xchg %%ebx, %%esi;" +#else + "mov %%rbx, %%rsi;" + "cpuid;" + "xchg %%rbx, %%rsi;" +#endif + : "=a" (*eax), "=S" (*ebx), "=c" (*ecx), "=d" (*edx) + : "0" (info) + ); +} +#endif // __GNUC__ +#elif defined(_M_ARM) +#if defined(__linux__) +// HWCAP flags from linux kernel - uapi/asm/hwcap.h +#define HWCAP_SWP (1 << 0) +#define HWCAP_HALF (1 << 1) +#define HWCAP_THUMB (1 << 2) +#define HWCAP_26BIT (1 << 3) /* Play it safe */ +#define HWCAP_FAST_MULT (1 << 4) +#define HWCAP_FPA (1 << 5) +#define HWCAP_VFP (1 << 6) +#define HWCAP_EDSP (1 << 7) +#define HWCAP_JAVA (1 << 8) +#define HWCAP_IWMMXT (1 << 9) +#define HWCAP_CRUNCH (1 << 10) +#define HWCAP_THUMBEE (1 << 11) +#define HWCAP_NEON (1 << 12) +#define HWCAP_VFPv3 (1 << 13) +#define HWCAP_VFPv3D16 (1 << 14) /* also set for VFPv4-D16 */ +#define HWCAP_TLS (1 << 15) +#define HWCAP_VFPv4 (1 << 16) +#define HWCAP_IDIVA (1 << 17) +#define HWCAP_IDIVT (1 << 18) +#define HWCAP_VFPD32 (1 << 19) /* set if VFP has 32 regs (not 16) */ +#define HWCAP_IDIV (HWCAP_IDIVA | HWCAP_IDIVT) + +// From linux kernel uapi/linux/auxvec.h +#define AT_HWCAP 16 + +static unsigned GetARMCPUCaps(void){ + unsigned caps = 0; + + int fd = open ("/proc/self/auxv", O_RDONLY); + + if (fd == -1) + return 0; + + static struct + { + unsigned a_type; /* Entry type */ + unsigned a_val; /* Integer value */ + } auxvec; + + while (1){ + int num; + num = read(fd, (char *)&auxvec, sizeof(auxvec)); + if (num < 1 || (auxvec.a_type == 0 && auxvec.a_val == 0)) + break; + if (auxvec.a_type == AT_HWCAP) + { + caps = auxvec.a_val; + } + } + close(fd); + return caps; +} + +#endif // defined(__linux__) +#endif // _M_IX86_AMD64 + +BOOL IsProcessorFeaturePresent(DWORD ProcessorFeature) +{ + BOOL ret = FALSE; +#ifdef _M_ARM +#ifdef __linux__ + unsigned caps; + caps = GetARMCPUCaps(); + + switch (ProcessorFeature) + { + case PF_ARM_NEON_INSTRUCTIONS_AVAILABLE: + case PF_ARM_NEON: + if (caps & HWCAP_NEON) + ret = TRUE; + break; + case PF_ARM_THUMB: + if (caps & HWCAP_THUMB) + ret = TRUE; + case PF_ARM_VFP_32_REGISTERS_AVAILABLE: + if (caps & HWCAP_VFPD32) + ret = TRUE; + case PF_ARM_DIVIDE_INSTRUCTION_AVAILABLE: + if ((caps & HWCAP_IDIVA) || (caps & HWCAP_IDIVT)) + ret = TRUE; + case PF_ARM_VFP3: + if (caps & HWCAP_VFPv3) + ret = TRUE; + break; + case PF_ARM_JAZELLE: + if (caps & HWCAP_JAVA) + ret = TRUE; + break; + case PF_ARM_DSP: + if (caps & HWCAP_EDSP) + ret = TRUE; + break; + case PF_ARM_MPU: + if (caps & HWCAP_EDSP) + ret = TRUE; + break; + case PF_ARM_THUMB2: + if ((caps & HWCAP_IDIVT) || (caps & HWCAP_VFPv4)) + ret = TRUE; + break; + case PF_ARM_T2EE: + if (caps & HWCAP_THUMBEE) + ret = TRUE; + break; + case PF_ARM_INTEL_WMMX: + if (caps & HWCAP_IWMMXT) + ret = TRUE; + break; + default: + break; + } +#endif +#elif defined(_M_IX86_AMD64) +#ifdef __GNUC__ + unsigned a, b, c, d; + + cpuid(1, &a, &b, &c, &d); + + switch (ProcessorFeature) + { + case PF_MMX_INSTRUCTIONS_AVAILABLE: + if (d & D_BIT_MMX) + ret = TRUE; + break; + case PF_XMMI_INSTRUCTIONS_AVAILABLE: + if (d & D_BIT_SSE) + ret = TRUE; + break; + case PF_XMMI64_INSTRUCTIONS_AVAILABLE: + if (d & D_BIT_SSE2) + ret = TRUE; + break; + case PF_3DNOW_INSTRUCTIONS_AVAILABLE: + if (d & D_BIT_3DN) + ret = TRUE; + break; + case PF_SSE3_INSTRUCTIONS_AVAILABLE: + if (c & C_BIT_SSE3) + ret = TRUE; + break; + default: + break; + } + +#endif // __GNUC__ +#endif + return ret; +} +#endif _WIN32 + +BOOL IsProcessorFeaturePresentEx(DWORD ProcessorFeature) +{ + BOOL ret = FALSE; +#ifdef _M_ARM +#ifdef __linux__ + unsigned caps; + caps = GetARMCPUCaps(); + + switch (ProcessorFeature) + { + case PF_EX_ARM_VFP1: + if (caps & HWCAP_VFP) + ret = TRUE; + break; + case PF_EX_ARM_VFP3D16: + if (caps & HWCAP_VFPv3D16) + ret = TRUE; + break; + case PF_EX_ARM_VFP4: + if (caps & HWCAP_VFPv4) + ret = TRUE; + break; + case PF_EX_ARM_IDIVA: + if (caps & HWCAP_IDIVA) + ret = TRUE; + break; + case PF_EX_ARM_IDIVT: + if (caps & HWCAP_IDIVT) + ret = TRUE; + break; + } +#endif // __linux__ +#elif defined(_M_IX86_AMD64) + unsigned a, b, c, d; + cpuid(1, &a, &b, &c, &d); + + switch (ProcessorFeature) + { + case PF_EX_3DNOW_PREFETCH: + if (c & C_BIT_3DNP) + ret = TRUE; + break; + case PF_EX_SSSE3: + if (c & C_BIT_SSSE3) + ret = TRUE; + break; + case PF_EX_SSE41: + if (c & C_BIT_SSE41) + ret = TRUE; + break; + case PF_EX_SSE42: + if (c & C_BIT_SSE42) + ret = TRUE; + break; +#ifdef __AVX__ + case PF_EX_AVX: + case PF_EX_FMA: + case PF_EX_AVX_AES: + { + if ((c & C_BITS_AVX) != C_BITS_AVX) + ret = FALSE; + + int e, f; + xgetbv(0, e, f); + + if ((e & E_BITS_AVX) == E_BITS_AVX) + { + switch (ProcessorFeature) + { + case: PF_EX_AVX: + ret = TRUE; + break; + case: PF_EX_FMA: + if (c & C_BIT_FMA) + ret = TRUE; + break; + case: PF_EX_AVX_AES: + if (c & C_BIT_AVX_AES) + ret = TRUE; + break; + { + ret = TRUE; + break; + } + } + } + break; +#endif //__AVX__ + default: + break; + } +#endif + return ret; +} diff --git a/winpr/libwinpr/sysinfo/test/CMakeLists.txt b/winpr/libwinpr/sysinfo/test/CMakeLists.txt index 79ba944c3..3eb8101ca 100644 --- a/winpr/libwinpr/sysinfo/test/CMakeLists.txt +++ b/winpr/libwinpr/sysinfo/test/CMakeLists.txt @@ -5,7 +5,9 @@ set(MODULE_PREFIX "TEST_SYSINFO") set(${MODULE_PREFIX}_DRIVER ${MODULE_NAME}.c) set(${MODULE_PREFIX}_TESTS - TestGetNativeSystemInfo.c) + TestGetNativeSystemInfo.c + TestCPUFeatures.c + ) create_test_sourcelist(${MODULE_PREFIX}_SRCS ${${MODULE_PREFIX}_DRIVER} diff --git a/winpr/libwinpr/sysinfo/test/TestCPUFeatures.c b/winpr/libwinpr/sysinfo/test/TestCPUFeatures.c new file mode 100644 index 000000000..e7c7e83ea --- /dev/null +++ b/winpr/libwinpr/sysinfo/test/TestCPUFeatures.c @@ -0,0 +1,45 @@ + +#include +#include +#include + +int TestCPUFeatures(int argc, char* argv[]) +{ + printf("Base CPU Flags:\n"); +#ifdef _M_IX86_AMD64 + printf("\tPF_MMX_INSTRUCTIONS_AVAILABLE: %s\n", IsProcessorFeaturePresent(PF_MMX_INSTRUCTIONS_AVAILABLE) ? "yes" : "no"); + printf("\tPF_XMMI_INSTRUCTIONS_AVAILABLE: %s\n", IsProcessorFeaturePresent(PF_XMMI_INSTRUCTIONS_AVAILABLE) ? "yes" : "no"); + printf("\tPF_XMMI64_INSTRUCTIONS_AVAILABLE: %s\n", IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE) ? "yes" : "no"); + printf("\tPF_3DNOW_INSTRUCTIONS_AVAILABLE: %s\n", IsProcessorFeaturePresent(PF_3DNOW_INSTRUCTIONS_AVAILABLE) ? "yes" : "no"); + printf("\tPF_SSE3_INSTRUCTIONS_AVAILABLE: %s\n", IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE) ? "yes" : "no"); + printf("\n"); + printf("Extended CPU Flags (not found in windows API):\n"); + printf("\tPF_EX_3DNOW_PREFETCH: %s\n", IsProcessorFeaturePresentEx(PF_EX_3DNOW_PREFETCH) ? "yes" : "no"); + printf("\tPF_EX_SSSE3: %s\n", IsProcessorFeaturePresentEx(PF_EX_SSSE3) ? "yes" : "no"); + printf("\tPF_EX_SSE41: %s\n", IsProcessorFeaturePresentEx(PF_EX_SSE41) ? "yes" : "no"); + printf("\tPF_EX_SSE42: %s\n", IsProcessorFeaturePresentEx(PF_EX_SSE42) ? "yes" : "no"); + printf("\tPF_EX_AVX: %s\n", IsProcessorFeaturePresentEx(PF_EX_AVX) ? "yes" : "no"); + printf("\tPF_EX_FMA: %s\n", IsProcessorFeaturePresentEx(PF_EX_FMA) ? "yes" : "no"); + printf("\tPF_EX_AVX_AES: %s\n", IsProcessorFeaturePresentEx(PF_EX_AVX_AES) ? "yes" : "no"); +#elif defined(_M_ARM) + printf("\tPF_ARM_NEON_INSTRUCTIONS_AVAILABLE: %s\n", IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? "yes" : "no"); + printf("\tPF_ARM_THUMB: %s\n", IsProcessorFeaturePresent(PF_ARM_THUMB) ? "yes" : "no"); + printf("\tPF_ARM_VFP_32_REGISTERS_AVAILABLE: %s\n", IsProcessorFeaturePresent(PF_ARM_VFP_32_REGISTERS_AVAILABLE) ? "yes" : "no"); + printf("\tPF_ARM_DIVIDE_INSTRUCTION_AVAILABLE: %s\n", IsProcessorFeaturePresent(PF_ARM_DIVIDE_INSTRUCTION_AVAILABLE) ? "yes" : "no"); + printf("\tPF_ARM_VFP3: %s\n", IsProcessorFeaturePresent(PF_ARM_VFP3) ? "yes" : "no"); + printf("\tPF_ARM_THUMB: %s\n", IsProcessorFeaturePresent(PF_ARM_THUMB) ? "yes" : "no"); + printf("\tPF_ARM_JAZELLE: %s\n", IsProcessorFeaturePresent(PF_ARM_JAZELLE) ? "yes" : "no"); + printf("\tPF_ARM_DSP: %s\n", IsProcessorFeaturePresent(PF_ARM_DSP) ? "yes" : "no"); + printf("\tPF_ARM_THUMB2: %s\n", IsProcessorFeaturePresent(PF_ARM_THUMB2) ? "yes" : "no"); + printf("\tPF_ARM_T2EE: %s\n", IsProcessorFeaturePresent(PF_ARM_T2EE) ? "yes" : "no"); + printf("\tPF_ARM_INTEL_WMMX: %s\n", IsProcessorFeaturePresent(PF_ARM_INTEL_WMMX) ? "yes" : "no"); + printf("Extended CPU Flags (not found in windows API):\n"); + printf("\tPF_EX_ARM_VFP1: %s\n", IsProcessorFeaturePresentEx(PF_EX_ARM_VFP1) ? "yes" : "no"); + printf("\tPF_EX_ARM_VFP3D16: %s\n", IsProcessorFeaturePresentEx(PF_EX_ARM_VFP3D16) ? "yes" : "no"); + printf("\tPF_EX_ARM_VFP4: %s\n", IsProcessorFeaturePresentEx(PF_EX_ARM_VFP4) ? "yes" : "no"); + printf("\tPF_EX_ARM_IDIVA: %s\n", IsProcessorFeaturePresentEx(PF_EX_ARM_IDIVA) ? "yes" : "no"); + printf("\tPF_EX_ARM_IDIVT: %s\n", IsProcessorFeaturePresentEx(PF_EX_ARM_IDIVT) ? "yes" : "no"); +#endif + printf("\n"); + return 0; +} From 23a7ef6c475281eb78a0e1eebf309bede34c815d Mon Sep 17 00:00:00 2001 From: Bernhard Miklautz Date: Wed, 27 Feb 2013 10:59:06 +0100 Subject: [PATCH 02/13] codec/rfx: use sysinfo to detect sse2/neon This also moves (sse2) detection code out of the client into the decoder. --- client/X11/xfreerdp.c | 52 ------------------------------------- include/freerdp/codec/rfx.h | 1 - libfreerdp/codec/rfx.c | 7 ----- libfreerdp/codec/rfx_neon.c | 36 +------------------------ libfreerdp/codec/rfx_sse2.c | 5 ++++ 5 files changed, 6 insertions(+), 95 deletions(-) diff --git a/client/X11/xfreerdp.c b/client/X11/xfreerdp.c index dc7aba819..acab01abc 100644 --- a/client/X11/xfreerdp.c +++ b/client/X11/xfreerdp.c @@ -762,46 +762,6 @@ BOOL xf_pre_connect(freerdp* instance) return TRUE; } -void cpuid(unsigned info, unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx) -{ -#ifdef __GNUC__ -#if defined(__i386__) || defined(__x86_64__) - __asm volatile - ( - /* The EBX (or RBX register on x86_64) is used for the PIC base address - and must not be corrupted by our inline assembly. */ -#if defined(__i386__) - "mov %%ebx, %%esi;" - "cpuid;" - "xchg %%ebx, %%esi;" -#else - "mov %%rbx, %%rsi;" - "cpuid;" - "xchg %%rbx, %%rsi;" -#endif - : "=a" (*eax), "=S" (*ebx), "=c" (*ecx), "=d" (*edx) - : "0" (info) - ); -#endif -#endif -} - -UINT32 xf_detect_cpu() -{ - unsigned int eax, ebx, ecx, edx = 0; - UINT32 cpu_opt = 0; - - cpuid(1, &eax, &ebx, &ecx, &edx); - - if (edx & (1<<26)) - { - DEBUG_MSG("SSE2 detected"); - cpu_opt |= CPU_SSE2; - } - - return cpu_opt; -} - /** * Callback given to freerdp_connect() to perform post-connection operations. * It will be called only if the connection was initialized properly, and will continue the initialization based on the @@ -809,9 +769,6 @@ UINT32 xf_detect_cpu() */ BOOL xf_post_connect(freerdp* instance) { -#ifdef WITH_SSE2 - UINT32 cpu; -#endif xfInfo* xfi; XGCValues gcv; rdpCache* cache; @@ -866,15 +823,6 @@ BOOL xf_post_connect(freerdp* instance) } } -#ifdef WITH_SSE2 - /* detect only if needed */ - cpu = xf_detect_cpu(); - if (rfx_context) - rfx_context_set_cpu_opt(rfx_context, cpu); - if (nsc_context) - nsc_context_set_cpu_opt(nsc_context, cpu); -#endif - xfi->width = instance->settings->DesktopWidth; xfi->height = instance->settings->DesktopHeight; diff --git a/include/freerdp/codec/rfx.h b/include/freerdp/codec/rfx.h index e2e9b6c85..2e99607b2 100644 --- a/include/freerdp/codec/rfx.h +++ b/include/freerdp/codec/rfx.h @@ -113,7 +113,6 @@ typedef struct _RFX_CONTEXT RFX_CONTEXT; FREERDP_API RFX_CONTEXT* rfx_context_new(void); FREERDP_API void rfx_context_free(RFX_CONTEXT* context); -FREERDP_API void rfx_context_set_cpu_opt(RFX_CONTEXT* context, UINT32 cpu_opt); FREERDP_API void rfx_context_set_pixel_format(RFX_CONTEXT* context, RDP_PIXEL_FORMAT pixel_format); FREERDP_API void rfx_context_reset(RFX_CONTEXT* context); diff --git a/libfreerdp/codec/rfx.c b/libfreerdp/codec/rfx.c index 3f30cbd22..f105e648b 100644 --- a/libfreerdp/codec/rfx.c +++ b/libfreerdp/codec/rfx.c @@ -238,13 +238,6 @@ RFX_CONTEXT* rfx_context_new(void) return context; } -void rfx_context_set_cpu_opt(RFX_CONTEXT* context, UINT32 cpu_opt) -{ - /* enable SIMD CPU acceleration if detected */ - if (cpu_opt & CPU_SSE2) - RFX_INIT_SIMD(context); -} - void rfx_context_free(RFX_CONTEXT* context) { free(context->quants); diff --git a/libfreerdp/codec/rfx_neon.c b/libfreerdp/codec/rfx_neon.c index 40683b91b..fec028ba9 100644 --- a/libfreerdp/codec/rfx_neon.c +++ b/libfreerdp/codec/rfx_neon.c @@ -252,43 +252,9 @@ void rfx_dwt_2d_decode_NEON(INT16 * buffer, INT16 * dwt_buffer) rfx_dwt_2d_decode_block_NEON(buffer, dwt_buffer, 32); } -int isNeonSupported() -{ -#if ANDROID - if (android_getCpuFamily() != ANDROID_CPU_FAMILY_ARM) - { - DEBUG_RFX("NEON optimization disabled - No ARM CPU found"); - return 0; - } - - UINT64 features = android_getCpuFeatures(); - - if ((features & ANDROID_CPU_ARM_FEATURE_ARMv7)) - { - if (features & ANDROID_CPU_ARM_FEATURE_NEON) - { - DEBUG_RFX("NEON optimization enabled!"); - return FALSE; - } - DEBUG_RFX("NEON optimization disabled - CPU not NEON capable"); - } - else - { - DEBUG_RFX("NEON optimization disabled - No ARMv7 CPU found"); - } - - return FALSE; -#elif defined(__APPLE) - /* assume NEON support on iOS devices */ - return TRUE; -#else - return FALSE; -#endif -} - void rfx_init_neon(RFX_CONTEXT * context) { - if (isNeonSupported()) + if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) { DEBUG_RFX("Using NEON optimizations"); diff --git a/libfreerdp/codec/rfx_sse2.c b/libfreerdp/codec/rfx_sse2.c index ac7e1f21f..fccd1e866 100644 --- a/libfreerdp/codec/rfx_sse2.c +++ b/libfreerdp/codec/rfx_sse2.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -490,6 +491,10 @@ static void rfx_dwt_2d_encode_sse2(INT16* buffer, INT16* dwt_buffer) void rfx_init_sse2(RFX_CONTEXT* context) { + + if (!IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) + return; + DEBUG_RFX("Using SSE2 optimizations"); IF_PROFILER(context->priv->prof_rfx_quantization_decode->name = "rfx_quantization_decode_sse2"); From 84a3dbc1158d8aac72af9b5fc3e268d69ceb269c Mon Sep 17 00:00:00 2001 From: Bernhard Miklautz Date: Wed, 27 Feb 2013 15:58:06 +0100 Subject: [PATCH 03/13] primitives: make use of winprs processor feature detection - Removed the cpu flag detection - Fixed and updated the tests --- include/freerdp/primitives.h | 9 - libfreerdp/primitives/README.txt | 14 +- libfreerdp/primitives/prim_add.c | 3 +- libfreerdp/primitives/prim_add.h | 2 +- libfreerdp/primitives/prim_add_opt.c | 8 +- libfreerdp/primitives/prim_alphaComp.c | 4 +- libfreerdp/primitives/prim_alphaComp.h | 2 +- libfreerdp/primitives/prim_alphaComp_opt.c | 7 +- libfreerdp/primitives/prim_andor.c | 3 +- libfreerdp/primitives/prim_andor.h | 2 +- libfreerdp/primitives/prim_andor_opt.c | 7 +- libfreerdp/primitives/prim_colors.c | 4 +- libfreerdp/primitives/prim_colors.h | 2 +- libfreerdp/primitives/prim_colors_opt.c | 7 +- libfreerdp/primitives/prim_copy.c | 1 - libfreerdp/primitives/prim_internal.h | 18 -- libfreerdp/primitives/prim_set.c | 3 +- libfreerdp/primitives/prim_set.h | 2 +- libfreerdp/primitives/prim_set_opt.c | 5 +- libfreerdp/primitives/prim_shift.c | 3 +- libfreerdp/primitives/prim_shift.h | 2 +- libfreerdp/primitives/prim_shift_opt.c | 7 +- libfreerdp/primitives/prim_sign.c | 3 +- libfreerdp/primitives/prim_sign.h | 2 +- libfreerdp/primitives/prim_sign_opt.c | 7 +- libfreerdp/primitives/primitives.c | 276 +------------------- libfreerdp/primitives/test/CMakeLists.txt | 19 +- libfreerdp/primitives/test/prim_test.c | 88 ++++++- libfreerdp/primitives/test/prim_test.h | 67 ++--- libfreerdp/primitives/test/test_add.c | 7 +- libfreerdp/primitives/test/test_alphaComp.c | 9 +- libfreerdp/primitives/test/test_andor.c | 13 +- libfreerdp/primitives/test/test_colors.c | 13 +- libfreerdp/primitives/test/test_copy.c | 4 +- libfreerdp/primitives/test/test_set.c | 21 +- libfreerdp/primitives/test/test_shift.c | 16 +- libfreerdp/primitives/test/test_sign.c | 9 +- 37 files changed, 200 insertions(+), 469 deletions(-) diff --git a/include/freerdp/primitives.h b/include/freerdp/primitives.h index 8bcd14c7b..24bac4748 100644 --- a/include/freerdp/primitives.h +++ b/include/freerdp/primitives.h @@ -190,9 +190,6 @@ typedef struct __yCbCrToRGB_16s16s_P3P3_t yCbCrToRGB_16s16s_P3P3; __RGBToYCbCr_16s16s_P3P3_t RGBToYCbCr_16s16s_P3P3; __RGBToRGB_16s8u_P3AC4R_t RGBToRGB_16s8u_P3AC4R; - - /* internal use for CPU flags and such. */ - void *hints; } primitives_t; #ifdef __cplusplus @@ -202,12 +199,6 @@ extern "C" { /* Prototypes for the externally-visible entrypoints. */ FREERDP_API void primitives_init(void); FREERDP_API primitives_t *primitives_get(void); -FREERDP_API UINT32 primitives_get_flags( - const primitives_t *prims); -FREERDP_API void primitives_flags_str( - const primitives_t *prims, - char *str, - size_t len); FREERDP_API void primitives_deinit(void); #ifdef __cplusplus diff --git a/libfreerdp/primitives/README.txt b/libfreerdp/primitives/README.txt index 369102c0d..81c7e9754 100644 --- a/libfreerdp/primitives/README.txt +++ b/libfreerdp/primitives/README.txt @@ -62,10 +62,7 @@ New Optimizations ----------------- As the need arises, new optimizations can be added to the library, including NEON, AVX, and perhaps OpenCL or other SIMD implementations. -The initialization routine is free to do any quick run-time test to -determine which features are available before hooking the operation's -function pointer, or it can simply look at the processor features list -from the hints passed to the initialization routine. +The CPU feature detection is done in winpr/sysinfo. Adding Entrypoints @@ -85,15 +82,6 @@ be added. The template functions can frequently be used to extend the operations without writing a lot of new code. - -Flags ------ -The entrypoint primitives_get_flags() returns a bitfield of processor flags -(as defined in primitives.h) and primitives_flag_str() returns a string -related to those processor flags, for debugging and information. The -bitfield can be used elsewhere in the code as needed. - - Cache Management ---------------- I haven't found a lot of speed improvement by attempting prefetch, and diff --git a/libfreerdp/primitives/prim_add.c b/libfreerdp/primitives/prim_add.c index 258bcc6ea..4d5525bdc 100644 --- a/libfreerdp/primitives/prim_add.c +++ b/libfreerdp/primitives/prim_add.c @@ -46,12 +46,11 @@ pstatus_t general_add_16s( /* ------------------------------------------------------------------------- */ void primitives_init_add( - const primitives_hints_t *hints, primitives_t *prims) { prims->add_16s = general_add_16s; - primitives_init_add_opt(hints, prims); + primitives_init_add_opt(prims); } /* ------------------------------------------------------------------------- */ diff --git a/libfreerdp/primitives/prim_add.h b/libfreerdp/primitives/prim_add.h index 4ad460279..f1e143cd1 100644 --- a/libfreerdp/primitives/prim_add.h +++ b/libfreerdp/primitives/prim_add.h @@ -24,7 +24,7 @@ pstatus_t general_add_16s(const INT16 *pSrc1, const INT16 *pSrc2, INT16 *pDst, INT32 len); -void primitives_init_add_opt(const primitives_hints_t *hints, primitives_t *prims); +void primitives_init_add_opt(primitives_t *prims); #endif /* !__PRIM_ADD_H_INCLUDED__ */ diff --git a/libfreerdp/primitives/prim_add_opt.c b/libfreerdp/primitives/prim_add_opt.c index 2de0b8fc6..88a4fbc93 100644 --- a/libfreerdp/primitives/prim_add_opt.c +++ b/libfreerdp/primitives/prim_add_opt.c @@ -20,6 +20,7 @@ #include #include +#include #ifdef WITH_SSE2 #include @@ -45,18 +46,15 @@ SSE3_SSD_ROUTINE(sse3_add_16s, INT16, general_add_16s, /* ------------------------------------------------------------------------- */ void primitives_init_add_opt( - const primitives_hints_t *hints, primitives_t *prims) { #ifdef WITH_IPP prims->add_16s = (__add_16s_t) ippsAdd_16s; #elif defined(WITH_SSE2) - if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE) - && (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE)) /* for LDDQU */ + if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE) + && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */ { prims->add_16s = sse3_add_16s; } #endif } - - diff --git a/libfreerdp/primitives/prim_alphaComp.c b/libfreerdp/primitives/prim_alphaComp.c index 24f916770..e48ce9d61 100644 --- a/libfreerdp/primitives/prim_alphaComp.c +++ b/libfreerdp/primitives/prim_alphaComp.c @@ -102,11 +102,11 @@ pstatus_t general_alphaComp_argb( } /* ------------------------------------------------------------------------- */ -void primitives_init_alphaComp(const primitives_hints_t* hints, primitives_t* prims) +void primitives_init_alphaComp(primitives_t* prims) { prims->alphaComp_argb = general_alphaComp_argb; - primitives_init_alphaComp_opt(hints, prims); + primitives_init_alphaComp_opt(prims); } /* ------------------------------------------------------------------------- */ diff --git a/libfreerdp/primitives/prim_alphaComp.h b/libfreerdp/primitives/prim_alphaComp.h index 50591162d..a16335711 100644 --- a/libfreerdp/primitives/prim_alphaComp.h +++ b/libfreerdp/primitives/prim_alphaComp.h @@ -24,7 +24,7 @@ pstatus_t general_alphaComp_argb(const BYTE *pSrc1, INT32 src1Step, const BYTE *pSrc2, INT32 src2Step, BYTE *pDst, INT32 dstStep, INT32 width, INT32 height); -void primitives_init_alphaComp_opt(const primitives_hints_t* hints, primitives_t* prims); +void primitives_init_alphaComp_opt(primitives_t* prims); #endif /* !__PRIM_ALPHACOMP_H_INCLUDED__ */ diff --git a/libfreerdp/primitives/prim_alphaComp_opt.c b/libfreerdp/primitives/prim_alphaComp_opt.c index 5550fcbc1..52e33fbc9 100644 --- a/libfreerdp/primitives/prim_alphaComp_opt.c +++ b/libfreerdp/primitives/prim_alphaComp_opt.c @@ -26,6 +26,7 @@ #include #include +#include #ifdef WITH_SSE2 #include @@ -210,13 +211,13 @@ pstatus_t ipp_alphaComp_argb( #endif /* ------------------------------------------------------------------------- */ -void primitives_init_alphaComp_opt(const primitives_hints_t* hints, primitives_t* prims) +void primitives_init_alphaComp_opt(primitives_t* prims) { #ifdef WITH_IPP prims->alphaComp_argb = ipp_alphaComp_argb; #elif defined(WITH_SSE2) - if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE) - && (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE)) /* for LDDQU */ + if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE) + && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */ { prims->alphaComp_argb = sse2_alphaComp_argb; } diff --git a/libfreerdp/primitives/prim_andor.c b/libfreerdp/primitives/prim_andor.c index 0b8092ff2..4c1923f46 100644 --- a/libfreerdp/primitives/prim_andor.c +++ b/libfreerdp/primitives/prim_andor.c @@ -61,14 +61,13 @@ pstatus_t general_orC_32u( /* ------------------------------------------------------------------------- */ void primitives_init_andor( - const primitives_hints_t *hints, primitives_t *prims) { /* Start with the default. */ prims->andC_32u = general_andC_32u; prims->orC_32u = general_orC_32u; - primitives_init_andor_opt(hints, prims); + primitives_init_andor_opt(prims); } /* ------------------------------------------------------------------------- */ diff --git a/libfreerdp/primitives/prim_andor.h b/libfreerdp/primitives/prim_andor.h index 6a2e7ac46..9762f22aa 100644 --- a/libfreerdp/primitives/prim_andor.h +++ b/libfreerdp/primitives/prim_andor.h @@ -25,7 +25,7 @@ pstatus_t general_andC_32u(const UINT32 *pSrc, UINT32 val, UINT32 *pDst, INT32 len); pstatus_t general_orC_32u(const UINT32 *pSrc, UINT32 val, UINT32 *pDst, INT32 len); -void primitives_init_andor_opt(const primitives_hints_t *hints, primitives_t *prims); +void primitives_init_andor_opt(primitives_t *prims); #endif /* !__PRIM_ANDOR_H_INCLUDED__ */ diff --git a/libfreerdp/primitives/prim_andor_opt.c b/libfreerdp/primitives/prim_andor_opt.c index e0ce1ea5b..8d74f30fa 100644 --- a/libfreerdp/primitives/prim_andor_opt.c +++ b/libfreerdp/primitives/prim_andor_opt.c @@ -19,6 +19,7 @@ #include #include +#include #ifdef WITH_SSE2 #include @@ -45,14 +46,14 @@ SSE3_SCD_PRE_ROUTINE(sse3_orC_32u, UINT32, general_orC_32u, /* ------------------------------------------------------------------------- */ -void primitives_init_andor_opt(const primitives_hints_t *hints, primitives_t *prims) +void primitives_init_andor_opt(primitives_t *prims) { #if defined(WITH_IPP) prims->andC_32u = (__andC_32u_t) ippsAndC_32u; prims->orC_32u = (__orC_32u_t) ippsOrC_32u; #elif defined(WITH_SSE2) - if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE) - && (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE)) + if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE) + && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) { prims->andC_32u = sse3_andC_32u; prims->orC_32u = sse3_orC_32u; diff --git a/libfreerdp/primitives/prim_colors.c b/libfreerdp/primitives/prim_colors.c index 179e569b7..aae98e2d8 100644 --- a/libfreerdp/primitives/prim_colors.c +++ b/libfreerdp/primitives/prim_colors.c @@ -215,13 +215,13 @@ pstatus_t general_RGBToRGB_16s8u_P3AC4R( } /* ------------------------------------------------------------------------- */ -void primitives_init_colors(const primitives_hints_t* hints, primitives_t* prims) +void primitives_init_colors(primitives_t* prims) { prims->RGBToRGB_16s8u_P3AC4R = general_RGBToRGB_16s8u_P3AC4R; prims->yCbCrToRGB_16s16s_P3P3 = general_yCbCrToRGB_16s16s_P3P3; prims->RGBToYCbCr_16s16s_P3P3 = general_RGBToYCbCr_16s16s_P3P3; - primitives_init_colors_opt(hints, prims); + primitives_init_colors_opt(prims); } /* ------------------------------------------------------------------------- */ diff --git a/libfreerdp/primitives/prim_colors.h b/libfreerdp/primitives/prim_colors.h index 70f478547..15b76d997 100644 --- a/libfreerdp/primitives/prim_colors.h +++ b/libfreerdp/primitives/prim_colors.h @@ -26,7 +26,7 @@ pstatus_t general_yCbCrToRGB_16s16s_P3P3(const INT16 *pSrc[3], INT32 srcStep, IN pstatus_t general_RGBToYCbCr_16s16s_P3P3(const INT16 *pSrc[3], INT32 srcStep, INT16 *pDst[3], INT32 dstStep, const prim_size_t *roi); pstatus_t general_RGBToRGB_16s8u_P3AC4R(const INT16 *pSrc[3], int srcStep, BYTE *pDst, int dstStep, const prim_size_t *roi); -void primitives_init_colors_opt(const primitives_hints_t* hints, primitives_t* prims); +void primitives_init_colors_opt(primitives_t* prims); #endif /* !__PRIM_COLORS_H_INCLUDED__ */ diff --git a/libfreerdp/primitives/prim_colors_opt.c b/libfreerdp/primitives/prim_colors_opt.c index cfc87414e..3dcd8895b 100644 --- a/libfreerdp/primitives/prim_colors_opt.c +++ b/libfreerdp/primitives/prim_colors_opt.c @@ -23,6 +23,7 @@ #include #include +#include #ifdef WITH_SSE2 #include @@ -542,17 +543,17 @@ pstatus_t neon_yCbCrToRGB_16s16s_P3P3( */ /* ------------------------------------------------------------------------- */ -void primitives_init_colors_opt(const primitives_hints_t* hints, primitives_t* prims) +void primitives_init_colors_opt(primitives_t* prims) { #if defined(WITH_SSE2) - if (hints->x86_flags & PRIM_X86_SSE2_AVAILABLE) + if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) { prims->RGBToRGB_16s8u_P3AC4R = sse2_RGBToRGB_16s8u_P3AC4R; prims->yCbCrToRGB_16s16s_P3P3 = sse2_yCbCrToRGB_16s16s_P3P3; prims->RGBToYCbCr_16s16s_P3P3 = sse2_RGBToYCbCr_16s16s_P3P3; } #elif defined(WITH_NEON) - if (hints->arm_flags & PRIM_ARM_NEON_AVAILABLE) + if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) { prims->yCbCrToRGB_16s16s_P3P3 = neon_yCbCrToRGB_16s16s_P3P3; } diff --git a/libfreerdp/primitives/prim_copy.c b/libfreerdp/primitives/prim_copy.c index 4198f2d8f..95755a09c 100644 --- a/libfreerdp/primitives/prim_copy.c +++ b/libfreerdp/primitives/prim_copy.c @@ -148,7 +148,6 @@ static pstatus_t ippiCopy_8u_AC4r( /* ------------------------------------------------------------------------- */ void primitives_init_copy( - const primitives_hints_t *hints, primitives_t *prims) { /* Start with the default. */ diff --git a/libfreerdp/primitives/prim_internal.h b/libfreerdp/primitives/prim_internal.h index 001ab8562..06418fba2 100644 --- a/libfreerdp/primitives/prim_internal.h +++ b/libfreerdp/primitives/prim_internal.h @@ -34,61 +34,43 @@ ? _mm_lddqu_si128((__m128i *) (_ptr_)) \ : _mm_load_si128((__m128i *) (_ptr_))) -/* This structure can (eventually) be used to provide hints to the - * initialization routines, e.g. whether SSE2 or NEON or IPP instructions - * or calls are available. - */ -typedef struct -{ - UINT32 x86_flags; - UINT32 arm_flags; -} primitives_hints_t; - /* Function prototypes for all the init/deinit routines. */ extern void primitives_init_copy( - const primitives_hints_t *hints, primitives_t *prims); extern void primitives_deinit_copy( primitives_t *prims); extern void primitives_init_set( - const primitives_hints_t *hints, primitives_t *prims); extern void primitives_deinit_set( primitives_t *prims); extern void primitives_init_add( - const primitives_hints_t *hints, primitives_t *prims); extern void primitives_deinit_add( primitives_t *prims); extern void primitives_init_andor( - const primitives_hints_t *hints, primitives_t *prims); extern void primitives_deinit_andor( primitives_t *prims); extern void primitives_init_shift( - const primitives_hints_t *hints, primitives_t *prims); extern void primitives_deinit_shift( primitives_t *prims); extern void primitives_init_sign( - const primitives_hints_t *hints, primitives_t *prims); extern void primitives_deinit_sign( primitives_t *prims); extern void primitives_init_alphaComp( - const primitives_hints_t *hints, primitives_t *prims); extern void primitives_deinit_alphaComp( primitives_t *prims); extern void primitives_init_colors( - const primitives_hints_t *hints, primitives_t *prims); extern void primitives_deinit_colors( primitives_t *prims); diff --git a/libfreerdp/primitives/prim_set.c b/libfreerdp/primitives/prim_set.c index 9176c8722..967df7b33 100644 --- a/libfreerdp/primitives/prim_set.c +++ b/libfreerdp/primitives/prim_set.c @@ -111,7 +111,6 @@ pstatus_t general_set_32u( /* ------------------------------------------------------------------------- */ void primitives_init_set( - const primitives_hints_t *hints, primitives_t *prims) { /* Start with the default. */ @@ -120,7 +119,7 @@ void primitives_init_set( prims->set_32u = general_set_32u; prims->zero = general_zero; - primitives_init_set_opt(hints, prims); + primitives_init_set_opt(prims); } /* ------------------------------------------------------------------------- */ diff --git a/libfreerdp/primitives/prim_set.h b/libfreerdp/primitives/prim_set.h index e4504dc2c..0e2be1ea7 100644 --- a/libfreerdp/primitives/prim_set.h +++ b/libfreerdp/primitives/prim_set.h @@ -28,7 +28,7 @@ pstatus_t general_set_32s(INT32 val, INT32 *pDst, INT32 len); pstatus_t general_set_32u(UINT32 val, UINT32 *pDst, INT32 len); -void primitives_init_set_opt(const primitives_hints_t *hints, primitives_t *prims); +void primitives_init_set_opt(primitives_t *prims); #endif /* !__PRIM_SET_H_INCLUDED__ */ diff --git a/libfreerdp/primitives/prim_set_opt.c b/libfreerdp/primitives/prim_set_opt.c index 0523434ff..08b0f7e5f 100644 --- a/libfreerdp/primitives/prim_set_opt.c +++ b/libfreerdp/primitives/prim_set_opt.c @@ -21,6 +21,7 @@ #include #include #include +#include #ifdef WITH_SSE2 # include @@ -198,7 +199,7 @@ pstatus_t ipp_wrapper_set_32u( #endif /* ------------------------------------------------------------------------- */ -void primitives_init_set_opt(const primitives_hints_t *hints, primitives_t *prims) +void primitives_init_set_opt(primitives_t *prims) { /* Pick tuned versions if possible. */ #ifdef WITH_IPP @@ -207,7 +208,7 @@ void primitives_init_set_opt(const primitives_hints_t *hints, primitives_t *prim prims->set_32u = (__set_32u_t) ipp_wrapper_set_32u; prims->zero = (__zero_t) ippsZero_8u; #elif defined(WITH_SSE2) - if (hints->x86_flags & PRIM_X86_SSE2_AVAILABLE) + if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) { prims->set_8u = sse2_set_8u; prims->set_32s = sse2_set_32s; diff --git a/libfreerdp/primitives/prim_shift.c b/libfreerdp/primitives/prim_shift.c index bd26dc0a0..e89b2e086 100644 --- a/libfreerdp/primitives/prim_shift.c +++ b/libfreerdp/primitives/prim_shift.c @@ -104,7 +104,6 @@ pstatus_t general_shiftC_16u( /* ------------------------------------------------------------------------- */ void primitives_init_shift( - const primitives_hints_t *hints, primitives_t *prims) { /* Start with the default. */ @@ -117,7 +116,7 @@ void primitives_init_shift( prims->shiftC_16s = general_shiftC_16s; prims->shiftC_16u = general_shiftC_16u; - primitives_init_shift_opt(hints, prims); + primitives_init_shift_opt(prims); } /* ------------------------------------------------------------------------- */ diff --git a/libfreerdp/primitives/prim_shift.h b/libfreerdp/primitives/prim_shift.h index cad054013..a26a5037c 100644 --- a/libfreerdp/primitives/prim_shift.h +++ b/libfreerdp/primitives/prim_shift.h @@ -29,7 +29,7 @@ pstatus_t general_rShiftC_16u(const UINT16 *pSrc, INT32 val, UINT16 *pDst, INT32 pstatus_t general_shiftC_16s(const INT16 *pSrc, INT32 val, INT16 *pDst, INT32 len); pstatus_t general_shiftC_16u(const UINT16 *pSrc, INT32 val, UINT16 *pDst, INT32 len); -void primitives_init_shift_opt(const primitives_hints_t *hints, primitives_t *prims); +void primitives_init_shift_opt(primitives_t *prims); #endif /* !__PRIM_SHIFT_H_INCLUDED__ */ diff --git a/libfreerdp/primitives/prim_shift_opt.c b/libfreerdp/primitives/prim_shift_opt.c index 0e57da269..9cdb33db7 100644 --- a/libfreerdp/primitives/prim_shift_opt.c +++ b/libfreerdp/primitives/prim_shift_opt.c @@ -19,6 +19,7 @@ #include #include +#include #ifdef WITH_SSE2 #include @@ -58,7 +59,7 @@ SSE3_SCD_ROUTINE(sse2_rShiftC_16u, UINT16, general_rShiftC_16u, */ /* ------------------------------------------------------------------------- */ -void primitives_init_shift_opt(const primitives_hints_t *hints, primitives_t *prims) +void primitives_init_shift_opt(primitives_t *prims) { #if defined(WITH_IPP) prims->lShiftC_16s = (__lShiftC_16s_t) ippsLShiftC_16s; @@ -66,8 +67,8 @@ void primitives_init_shift_opt(const primitives_hints_t *hints, primitives_t *pr prims->lShiftC_16u = (__lShiftC_16u_t) ippsLShiftC_16u; prims->rShiftC_16u = (__rShiftC_16u_t) ippsRShiftC_16u; #elif defined(WITH_SSE2) - if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE) - && (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE)) + if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE) + && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) { prims->lShiftC_16s = sse2_lShiftC_16s; prims->rShiftC_16s = sse2_rShiftC_16s; diff --git a/libfreerdp/primitives/prim_sign.c b/libfreerdp/primitives/prim_sign.c index d7d2eb018..8b2bfa974 100644 --- a/libfreerdp/primitives/prim_sign.c +++ b/libfreerdp/primitives/prim_sign.c @@ -42,13 +42,12 @@ pstatus_t general_sign_16s( /* ------------------------------------------------------------------------- */ void primitives_init_sign( - const primitives_hints_t *hints, primitives_t *prims) { /* Start with the default. */ prims->sign_16s = general_sign_16s; - primitives_init_sign_opt(hints, prims); + primitives_init_sign_opt(prims); } /* ------------------------------------------------------------------------- */ diff --git a/libfreerdp/primitives/prim_sign.h b/libfreerdp/primitives/prim_sign.h index 3592990ec..f43eca24c 100644 --- a/libfreerdp/primitives/prim_sign.h +++ b/libfreerdp/primitives/prim_sign.h @@ -24,7 +24,7 @@ pstatus_t general_sign_16s(const INT16 *pSrc, INT16 *pDst, INT32 len); -void primitives_init_sign_opt(const primitives_hints_t *hints, primitives_t *prims); +void primitives_init_sign_opt(primitives_t *prims); #endif /* !__PRIM_SIGN_H_INCLUDED__ */ diff --git a/libfreerdp/primitives/prim_sign_opt.c b/libfreerdp/primitives/prim_sign_opt.c index 81842b9bd..643a75b3b 100644 --- a/libfreerdp/primitives/prim_sign_opt.c +++ b/libfreerdp/primitives/prim_sign_opt.c @@ -19,6 +19,7 @@ #include #include +#include #ifdef WITH_SSE2 #include @@ -134,13 +135,13 @@ pstatus_t ssse3_sign_16s( #endif /* WITH_SSE2 */ /* ------------------------------------------------------------------------- */ -void primitives_init_sign_opt(const primitives_hints_t *hints, primitives_t *prims) +void primitives_init_sign_opt(primitives_t *prims) { /* Pick tuned versions if possible. */ /* I didn't spot an IPP version of this. */ #if defined(WITH_SSE2) - if ((hints->x86_flags & PRIM_X86_SSSE3_AVAILABLE) - && (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE)) + if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE) + && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) { prims->sign_16s = ssse3_sign_16s; } diff --git a/libfreerdp/primitives/primitives.c b/libfreerdp/primitives/primitives.c index 245a6ce3b..2bb05d626 100644 --- a/libfreerdp/primitives/primitives.c +++ b/libfreerdp/primitives/primitives.c @@ -22,173 +22,16 @@ #include #include -#include - #include #include "prim_internal.h" -#ifdef __ANDROID__ -#include "cpu-features.h" -#endif - /* Singleton pointer used throughout the program when requested. */ static primitives_t* pPrimitives = NULL; -#define D_BIT_MMX (1<<23) -#define D_BIT_SSE (1<<25) -#define D_BIT_SSE2 (1<<26) -#define D_BIT_3DN (1<<30) -#define C_BIT_SSE3 (1<<0) -#define C_BIT_3DNP (1<<8) -#define C_BIT_SSSE3 (1<<9) -#define C_BIT_SSE41 (1<<19) -#define C_BIT_SSE42 (1<<20) -#define C_BIT_XGETBV (1<<27) -#define C_BIT_AVX (1<<28) -#define C_BITS_AVX (C_BIT_XGETBV|C_BIT_AVX) -#define E_BIT_XMM (1<<1) -#define E_BIT_YMM (1<<2) -#define E_BITS_AVX (E_BIT_XMM|E_BIT_YMM) -#define C_BIT_FMA (1<<11) -#define C_BIT_AVX_AES (1<<24) - -/* If x86 */ -#if defined(_M_IX86_AMD64) - -/* If GCC */ -#ifdef __GNUC__ - -#ifdef __AVX__ -#define xgetbv(_func_, _lo_, _hi_) \ - __asm__ __volatile__ ("xgetbv" : "=a" (_lo_), "=d" (_hi_) : "c" (_func_)) -#endif - -static void cpuid( - unsigned info, - unsigned *eax, - unsigned *ebx, - unsigned *ecx, - unsigned *edx) -{ - *eax = *ebx = *ecx = *edx = 0; - - __asm volatile - ( - /* The EBX (or RBX register on x86_64) is used for the PIC base address - * and must not be corrupted by our inline assembly. - */ -#ifdef _M_IX86 - "mov %%ebx, %%esi;" - "cpuid;" - "xchg %%ebx, %%esi;" -#else - "mov %%rbx, %%rsi;" - "cpuid;" - "xchg %%rbx, %%rsi;" -#endif - : "=a" (*eax), "=S" (*ebx), "=c" (*ecx), "=d" (*edx) - : "0" (info) - ); -} - -static void set_hints(primitives_hints_t* hints) -{ - unsigned a, b, c, d; - - cpuid(1, &a, &b, &c, &d); - - if (d & D_BIT_MMX) - hints->x86_flags |= PRIM_X86_MMX_AVAILABLE; - if (d & D_BIT_SSE) - hints->x86_flags |= PRIM_X86_SSE_AVAILABLE; - if (d & D_BIT_SSE2) - hints->x86_flags |= PRIM_X86_SSE2_AVAILABLE; - if (d & D_BIT_3DN) - hints->x86_flags |= PRIM_X86_3DNOW_AVAILABLE; - if (c & C_BIT_3DNP) - hints->x86_flags |= PRIM_X86_3DNOW_PREFETCH_AVAILABLE; - if (c & C_BIT_SSE3) - hints->x86_flags |= PRIM_X86_SSE3_AVAILABLE; - if (c & C_BIT_SSSE3) - hints->x86_flags |= PRIM_X86_SSSE3_AVAILABLE; - if (c & C_BIT_SSE41) - hints->x86_flags |= PRIM_X86_SSE41_AVAILABLE; - if (c & C_BIT_SSE42) - hints->x86_flags |= PRIM_X86_SSE42_AVAILABLE; - -#ifdef __AVX__ - if ((c & C_BITS_AVX) == C_BITS_AVX) - { - int e, f; - xgetbv(0, e, f); - - if ((e & E_BITS_AVX) == E_BITS_AVX) - { - hints->x86_flags |= PRIM_X86_AVX_AVAILABLE; - - if (c & C_BIT_FMA) - hints->x86_flags |= PRIM_X86_FMA_AVAILABLE; - if (c & C_BIT_AVX_AES) - hints->x86_flags |= PRIM_X86_AVX_AES_AVAILABLE; - } - } - /* TODO: AVX2: set eax=7, ecx=0, cpuid, check ebx-bit5 */ -#endif -} - -#else - -static void set_hints(primitives_hints_t* hints) -{ - /* x86 non-GCC: TODO */ -} - -#endif /* __GNUC__ */ - -/* ------------------------------------------------------------------------- */ - -#elif defined(_M_ARM) - -static UINT32 getNeonSupport(void) -{ -#ifdef __ANDROID__ - if (android_getCpuFamily() != ANDROID_CPU_FAMILY_ARM) return 0; - - UINT64 features = android_getCpuFeatures(); - - if ((features & ANDROID_CPU_ARM_FEATURE_ARMv7)) - { - if (features & ANDROID_CPU_ARM_FEATURE_NEON) - { - return PRIM_ARM_NEON_AVAILABLE; - } - } -#elif defined(__APPLE) - /* assume NEON support on iOS devices */ - return PRIM_ARM_NEON_AVAILABLE; -#endif - return 0; -} - -static void set_hints(primitives_hints_t* hints) -{ - /* ARM: TODO */ - hints->arm_flags |= getNeonSupport(); -} - -#else -static void set_hints( - primitives_hints_t *hints) -{ -} -#endif /* x86 else ARM else */ - /* ------------------------------------------------------------------------- */ void primitives_init(void) { - primitives_hints_t* hints; - if (pPrimitives == NULL) { pPrimitives = calloc(1, sizeof(primitives_t)); @@ -197,19 +40,15 @@ void primitives_init(void) return; } - hints = calloc(1, sizeof(primitives_hints_t)); - set_hints(hints); - pPrimitives->hints = (void *) hints; - /* Now call each section's initialization routine. */ - primitives_init_add(hints, pPrimitives); - primitives_init_andor(hints, pPrimitives); - primitives_init_alphaComp(hints, pPrimitives); - primitives_init_copy(hints, pPrimitives); - primitives_init_set(hints, pPrimitives); - primitives_init_shift(hints, pPrimitives); - primitives_init_sign(hints, pPrimitives); - primitives_init_colors(hints, pPrimitives); + primitives_init_add(pPrimitives); + primitives_init_andor(pPrimitives); + primitives_init_alphaComp(pPrimitives); + primitives_init_copy(pPrimitives); + primitives_init_set(pPrimitives); + primitives_init_shift(pPrimitives); + primitives_init_sign(pPrimitives); + primitives_init_colors(pPrimitives); } /* ------------------------------------------------------------------------- */ @@ -221,102 +60,6 @@ primitives_t* primitives_get(void) return pPrimitives; } -/* ------------------------------------------------------------------------- */ -UINT32 primitives_get_flags(const primitives_t* prims) -{ - primitives_hints_t* hints = (primitives_hints_t*) (prims->hints); - -#if defined(_M_IX86_AMD64) - return hints->x86_flags; -#elif defined(_M_ARM) - return hints->arm_flags; -#else - return 0; -#endif -} - -/* ------------------------------------------------------------------------- */ - -typedef struct -{ - UINT32 flag; - const char *str; -} flagpair_t; - -static const flagpair_t x86_flags[] = -{ - { PRIM_X86_MMX_AVAILABLE, "MMX" }, - { PRIM_X86_3DNOW_AVAILABLE, "3DNow" }, - { PRIM_X86_3DNOW_PREFETCH_AVAILABLE, "3DNow-PF" }, - { PRIM_X86_SSE_AVAILABLE, "SSE" }, - { PRIM_X86_SSE2_AVAILABLE, "SSE2" }, - { PRIM_X86_SSE3_AVAILABLE, "SSE3" }, - { PRIM_X86_SSSE3_AVAILABLE, "SSSE3" }, - { PRIM_X86_SSE41_AVAILABLE, "SSE4.1" }, - { PRIM_X86_SSE42_AVAILABLE, "SSE4.2" }, - { PRIM_X86_AVX_AVAILABLE, "AVX" }, - { PRIM_X86_FMA_AVAILABLE, "FMA" }, - { PRIM_X86_AVX_AES_AVAILABLE, "AVX-AES" }, - { PRIM_X86_AVX2_AVAILABLE, "AVX2" }, -}; - -static const flagpair_t arm_flags[] = -{ - { PRIM_ARM_VFP1_AVAILABLE, "VFP1" }, - { PRIM_ARM_VFP2_AVAILABLE, "VFP2" }, - { PRIM_ARM_VFP3_AVAILABLE, "VFP3" }, - { PRIM_ARM_VFP4_AVAILABLE, "VFP4" }, - { PRIM_ARM_FPA_AVAILABLE, "FPA" }, - { PRIM_ARM_FPE_AVAILABLE, "FPE" }, - { PRIM_ARM_IWMMXT_AVAILABLE, "IWMMXT" }, - { PRIM_ARM_NEON_AVAILABLE, "NEON" }, -}; - -void primitives_flags_str(const primitives_t* prims, char* str, size_t len) -{ - int i; - primitives_hints_t* hints; - - *str = '\0'; - --len; /* for the '/0' */ - - hints = (primitives_hints_t*) (prims->hints); - - for (i = 0; i < sizeof(x86_flags) / sizeof(flagpair_t); ++i) - { - if (hints->x86_flags & x86_flags[i].flag) - { - int slen = strlen(x86_flags[i].str) + 1; - - if (len < slen) - break; - - if (*str != '\0') - strcat(str, " "); - - strcat(str, x86_flags[i].str); - len -= slen; - } - } - - for (i = 0; i < sizeof(arm_flags) / sizeof(flagpair_t); ++i) - { - if (hints->arm_flags & arm_flags[i].flag) - { - int slen = strlen(arm_flags[i].str) + 1; - - if (len < slen) - break; - - if (*str != '\0') - strcat(str, " "); - - strcat(str, arm_flags[i].str); - len -= slen; - } - } -} - /* ------------------------------------------------------------------------- */ void primitives_deinit(void) { @@ -333,9 +76,6 @@ void primitives_deinit(void) primitives_deinit_sign(pPrimitives); primitives_deinit_colors(pPrimitives); - if (pPrimitives->hints != NULL) - free((void*) (pPrimitives->hints)); - free((void*) pPrimitives); pPrimitives = NULL; } diff --git a/libfreerdp/primitives/test/CMakeLists.txt b/libfreerdp/primitives/test/CMakeLists.txt index 7030ac7f8..738329152 100644 --- a/libfreerdp/primitives/test/CMakeLists.txt +++ b/libfreerdp/primitives/test/CMakeLists.txt @@ -31,28 +31,11 @@ set(PRIMITIVE_TEST_CFILES test_set.c test_shift.c test_sign.c - ../prim_add.c - ../prim_andor.c - ../prim_alphaComp.c - ../prim_colors.c - ../prim_copy.c - ../prim_set.c - ../prim_shift.c - ../prim_sign.c - ../prim_add_opt.c - ../prim_alphaComp_opt.c - ../prim_andor_opt.c - ../prim_colors_opt.c - ../prim_set_opt.c - ../prim_shift_opt.c - ../prim_sign_opt.c - ../primitives.c ) set(PRIMITIVE_TEST_HEADERS measure.h prim_test.h - ../prim_internal.h ) set(PRIMITIVE_TEST_SRCS @@ -138,7 +121,7 @@ endif() set_property(SOURCE ${PRIMITIVE_TEST_CFILES} PROPERTY COMPILE_FLAGS ${OPTFLAGS}) -target_link_libraries(prim_test rt) +target_link_libraries(prim_test freerdp-primitives rt winpr-sysinfo) if(NOT TESTING_OUTPUT_DIRECTORY) set(TESTING_OUTPUT_DIRECTORY .) endif() diff --git a/libfreerdp/primitives/test/prim_test.c b/libfreerdp/primitives/test/prim_test.c index 172d6ffd4..7e88efbc9 100644 --- a/libfreerdp/primitives/test/prim_test.c +++ b/libfreerdp/primitives/test/prim_test.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include #ifdef HAVE_UNISTD_H #include @@ -32,6 +34,88 @@ int test_sizes[] = { 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096 }; int Quiet = 0; + + +/* ------------------------------------------------------------------------- */ +typedef struct +{ + UINT32 flag; + const char *str; +} flagpair_t; + +static const flagpair_t flags[] = +#ifdef _M_IX86_AMD64 +{ + { PF_MMX_INSTRUCTIONS_AVAILABLE, "MMX" }, + { PF_3DNOW_INSTRUCTIONS_AVAILABLE, "3DNow" }, + { PF_XMMI_INSTRUCTIONS_AVAILABLE, "SSE" }, + { PF_XMMI64_INSTRUCTIONS_AVAILABLE, "SSE2" }, + { PF_SSE3_INSTRUCTIONS_AVAILABLE, "SSE3" }, +#elif defined(_M_ARM) + { PF_ARM_VFP3, "VFP3" }, + { PF_ARM_INTEL_WMMX, "IWMMXT" }, + { PF_ARM_NEON_INSTRUCTIONS_AVAILABLE, "NEON" }, +#endif +}; + +static const flagpair_t flags_extended[] = +{ +#ifdef _M_IX86_AMD64 + { PF_EX_3DNOW_PREFETCH, "3DNow-PF" }, + { PF_EX_SSSE3, "SSSE3" }, + { PF_EX_SSE41, "SSE4.1" }, + { PF_EX_SSE42, "SSE4.2" }, + { PF_EX_AVX, "AVX" }, + { PF_EX_FMA, "FMA" }, + { PF_EX_AVX_AES, "AVX-AES" }, + { PF_EX_AVX2, "AVX2" }, +#elif defined(_M_ARM) + { PF_EX_ARM_VFP1, "VFP1"}, + { PF_EX_ARM_VFP4, "VFP4" }, +#endif +}; + +void primitives_flags_str(char* str, size_t len) +{ + int i; + + *str = '\0'; + --len; /* for the '/0' */ + + for (i = 0; i < sizeof(flags) / sizeof(flagpair_t); ++i) + { + if (IsProcessorFeaturePresent(flags[i].flag)) + { + int slen = strlen(flags[i].str) + 1; + + if (len < slen) + break; + + if (*str != '\0') + strcat(str, " "); + + strcat(str, flags[i].str); + len -= slen; + } + } + for (i = 0; i < sizeof(flags_extended) / sizeof(flagpair_t); ++i) + { + if (IsProcessorFeaturePresent(flags_extended[i].flag)) + { + int slen = strlen(flags_extended[i].str) + 1; + + if (len < slen) + break; + + if (*str != '\0') + strcat(str, " "); + + strcat(str, flags_extended[i].str); + len -= slen; + } + } +} + /* ------------------------------------------------------------------------- */ static void get_random_data_lrand( void *buffer, @@ -198,7 +282,7 @@ static const test_t testTypeList[] = int main(int argc, char** argv) { int i; - char hints[256]; + char hints[1024]; UINT32 testSet = 0; UINT32 testTypes = 0; int results = SUCCESS; @@ -253,7 +337,7 @@ int main(int argc, char** argv) primitives_init(); - primitives_flags_str(primitives_get(), hints, sizeof(hints)); + primitives_flags_str(hints, sizeof(hints)); printf("Hints: %s\n", hints); /* COPY */ diff --git a/libfreerdp/primitives/test/prim_test.h b/libfreerdp/primitives/test/prim_test.h index 9c4d3d872..fa61025f0 100644 --- a/libfreerdp/primitives/test/prim_test.h +++ b/libfreerdp/primitives/test/prim_test.h @@ -29,6 +29,7 @@ #include #include +#include #ifdef WITH_IPP #include @@ -121,8 +122,8 @@ extern int test_or_32u_speed(void); } \ } while (0) -#if defined(i386) && defined(WITH_SSE2) -#define DO_SSE_MEASUREMENTS(_funcSSE_, _prework_) \ +#if (defined(_M_IX86_AMD64) && defined(WITH_SSE2)) || (defined(arm) && defined(WITH_NEON)) +#define DO_OPT_MEASUREMENTS(_funcOpt_, _prework_) \ do { \ for (s=0; s 0.0) _floatprint(resultNormal[s], sN); \ - if (resultSSENeon[s] > 0.0) \ + if (resultOpt[s] > 0.0) \ { \ - _floatprint(resultSSENeon[s], sSN); \ + _floatprint(resultOpt[s], sSN); \ if (resultNormal[s] > 0.0) \ { \ sprintf(sSNp, "%d%%", \ - (int) (resultSSENeon[s] / resultNormal[s] * 100.0 + 0.5)); \ + (int) (resultOpt[s] / resultNormal[s] * 100.0 + 0.5)); \ } \ } \ if (resultIPP[s] > 0.0) \ @@ -244,7 +229,7 @@ static void _name_( \ printf("%8d: %15s %15s %5s %15s %5s\n", \ size_array[s], sN, sSN, sSNp, sIPP, sIPPp); \ } \ - free(resultNormal); free(resultSSENeon); free(resultIPP); \ + free(resultNormal); free(resultOpt); free(resultIPP); \ } #endif // !__PRIMTEST_H_INCLUDED__ diff --git a/libfreerdp/primitives/test/test_add.c b/libfreerdp/primitives/test/test_add.c index bcdd643b7..083c37907 100644 --- a/libfreerdp/primitives/test/test_add.c +++ b/libfreerdp/primitives/test/test_add.c @@ -16,6 +16,7 @@ #include "config.h" #endif +#include #include "prim_test.h" #define FUNC_TEST_SIZE 65536 @@ -35,7 +36,6 @@ int test_add16s_func(void) int failed = 0; int i; char testStr[256]; - UINT32 pflags = primitives_get_flags(primitives_get()); testStr[0] = '\0'; get_random_data(src1, sizeof(src1)); @@ -44,7 +44,7 @@ int test_add16s_func(void) memset(d2, 0, sizeof(d2)); general_add_16s(src1+1, src2+1, d1+1, FUNC_TEST_SIZE); #ifdef _M_IX86_AMD64 - if (pflags & PRIM_X86_SSE3_AVAILABLE) + if(IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) { strcat(testStr, " SSE3"); /* Aligned */ @@ -91,8 +91,7 @@ int test_add16s_func(void) /* ------------------------------------------------------------------------- */ STD_SPEED_TEST(add16s_speed_test, INT16, INT16, dst=dst, TRUE, general_add_16s(src1, src2, dst, size), - TRUE, sse3_add_16s(src1, src2, dst, size), PRIM_X86_SSE3_AVAILABLE, - FALSE, dst=dst, 0, + TRUE, sse3_add_16s(src1, src2, dst, size), PF_SSE3_INSTRUCTIONS_AVAILABLE, FALSE, TRUE, ippsAdd_16s(src1, src2, dst, size)); int test_add16s_speed(void) diff --git a/libfreerdp/primitives/test/test_alphaComp.c b/libfreerdp/primitives/test/test_alphaComp.c index a39157acd..2d8285f27 100644 --- a/libfreerdp/primitives/test/test_alphaComp.c +++ b/libfreerdp/primitives/test/test_alphaComp.c @@ -15,6 +15,7 @@ #ifdef HAVE_CONFIG_H #include "config.h" #endif +#include #include "prim_test.h" @@ -110,7 +111,6 @@ int test_alphaComp_func(void) UINT32 ALIGN(dst2u[DST_WIDTH*DST_HEIGHT+1]); UINT32 ALIGN(dst3[DST_WIDTH*DST_HEIGHT]); int error = 0; - UINT32 pflags = primitives_get_flags(primitives_get()); char testStr[256]; UINT32 *ptr; int i, x, y; @@ -133,7 +133,7 @@ int test_alphaComp_func(void) (const BYTE *) src2, 4*SRC2_WIDTH, (BYTE *) dst1, 4*DST_WIDTH, TEST_WIDTH, TEST_HEIGHT); #ifdef _M_IX86_AMD64 - if (pflags & PRIM_X86_SSE2_AVAILABLE) + if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) { strcat(testStr, " SSE2"); sse2_alphaComp_argb((const BYTE *) src1, 4*SRC1_WIDTH, @@ -166,7 +166,7 @@ int test_alphaComp_func(void) error = 1; } #ifdef _M_IX86_AMD64 - if (pflags & PRIM_X86_SSE2_AVAILABLE) + if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) { UINT32 c2 = *PIXEL(dst2a, 4*DST_WIDTH, x, y); if (colordist(c0, c2) > TOLERANCE) @@ -207,8 +207,7 @@ STD_SPEED_TEST(alphaComp_speed, BYTE, BYTE, int bytes = size*4, TRUE, general_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes, size, size), TRUE, sse2_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes, - size, size), PRIM_X86_SSE2_AVAILABLE, - FALSE, dst=dst, 0, + size, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE, TRUE, ipp_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes, size, size)); diff --git a/libfreerdp/primitives/test/test_andor.c b/libfreerdp/primitives/test/test_andor.c index 54e1ead9f..6e8b3d8a3 100644 --- a/libfreerdp/primitives/test/test_andor.c +++ b/libfreerdp/primitives/test/test_andor.c @@ -15,6 +15,7 @@ #ifdef HAVE_CONFIG_H #include "config.h" #endif +#include #include "prim_test.h" @@ -39,7 +40,6 @@ int test_and_32u_func(void) UINT32 ALIGN(src[FUNC_TEST_SIZE+3]), ALIGN(dst[FUNC_TEST_SIZE+3]); int failed = 0; int i; - UINT32 pflags = primitives_get_flags(primitives_get()); char testStr[256]; testStr[0] = '\0'; @@ -56,7 +56,7 @@ int test_and_32u_func(void) } } #ifdef _M_IX86_AMD64 - if (pflags & PRIM_X86_SSE3_AVAILABLE) + if (IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) { strcat(testStr, " SSE3"); /* Aligned */ @@ -92,8 +92,7 @@ int test_and_32u_func(void) /* ------------------------------------------------------------------------- */ STD_SPEED_TEST(andC_32u_speed_test, UINT32, UINT32, dst=dst, TRUE, general_andC_32u(src1, constant, dst, size), - TRUE, sse3_andC_32u(src1, constant, dst, size), PRIM_X86_SSE3_AVAILABLE, - FALSE, dst=dst, 0, + TRUE, sse3_andC_32u(src1, constant, dst, size), PF_SSE3_INSTRUCTIONS_AVAILABLE, FALSE, TRUE, ippsAndC_32u(src1, constant, dst, size)) int test_and_32u_speed(void) @@ -113,7 +112,6 @@ int test_or_32u_func(void) UINT32 ALIGN(src[FUNC_TEST_SIZE+3]), ALIGN(dst[FUNC_TEST_SIZE+3]); int failed = 0; int i; - UINT32 pflags = primitives_get_flags(primitives_get()); char testStr[256]; testStr[0] = '\0'; @@ -130,7 +128,7 @@ int test_or_32u_func(void) } } #ifdef _M_IX86_AMD64 - if (pflags & PRIM_X86_SSE3_AVAILABLE) + if(IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) { strcat(testStr, " SSE3"); /* Aligned */ @@ -166,8 +164,7 @@ int test_or_32u_func(void) /* ------------------------------------------------------------------------- */ STD_SPEED_TEST(orC_32u_speed_test, UINT32, UINT32, dst=dst, TRUE, general_orC_32u(src1, constant, dst, size), - TRUE, sse3_orC_32u(src1, constant, dst, size), PRIM_X86_SSE3_AVAILABLE, - FALSE, dst=dst, 0, + TRUE, sse3_orC_32u(src1, constant, dst, size), PF_SSE3_INSTRUCTIONS_AVAILABLE, FALSE, TRUE, ippsOrC_32u(src1, constant, dst, size)) int test_or_32u_speed(void) diff --git a/libfreerdp/primitives/test/test_colors.c b/libfreerdp/primitives/test/test_colors.c index d86d76e12..e5192c035 100644 --- a/libfreerdp/primitives/test/test_colors.c +++ b/libfreerdp/primitives/test/test_colors.c @@ -16,6 +16,7 @@ #include "config.h" #endif +#include #include "prim_test.h" static const int RGB_TRIAL_ITERATIONS = 1000; @@ -38,7 +39,6 @@ int test_RGBToRGB_16s8u_P3AC4R_func(void) UINT32 ALIGN(out1[4096]), ALIGN(out2[4096]); int i; int failed = 0; - UINT32 pflags = primitives_get_flags(primitives_get()); char testStr[256]; INT16 *ptrs[3]; prim_size_t roi = { 64, 64 }; @@ -62,7 +62,7 @@ int test_RGBToRGB_16s8u_P3AC4R_func(void) general_RGBToRGB_16s8u_P3AC4R((const INT16 **) ptrs, 64*2, (BYTE *) out1, 64*4, &roi); #ifdef _M_IX86_AMD64 - if (pflags & PRIM_X86_SSE2_AVAILABLE) + if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) { strcat(testStr, " SSE2"); sse2_RGBToRGB_16s8u_P3AC4R((const INT16 **) ptrs, 64*2, @@ -90,8 +90,7 @@ STD_SPEED_TEST( (const INT16 **) src1, 64*2, (BYTE *) dst, 64*4, &roi64x64), TRUE, sse2_RGBToRGB_16s8u_P3AC4R( (const INT16 **) src1, 64*2, (BYTE *) dst, 64*4, &roi64x64), - PRIM_X86_SSE2_AVAILABLE, - FALSE, dst=dst, 0, + PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE, FALSE, dst=dst); int test_RGBToRGB_16s8u_P3AC4R_speed(void) @@ -131,7 +130,6 @@ int test_yCbCrToRGB_16s16s_P3P3_func(void) INT16 ALIGN(r2[4096]), ALIGN(g2[4096]), ALIGN(b2[4096]); int i; int failed = 0; - UINT32 pflags = primitives_get_flags(primitives_get()); char testStr[256]; const INT16 *in[3]; INT16 *out1[3]; @@ -168,7 +166,7 @@ int test_yCbCrToRGB_16s16s_P3P3_func(void) general_yCbCrToRGB_16s16s_P3P3(in, 64*2, out1, 64*2, &roi); #ifdef _M_IX86_AMD64 - if (pflags & PRIM_X86_SSE2_AVAILABLE) + if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) { strcat(testStr, " SSE2"); sse2_yCbCrToRGB_16s16s_P3P3(in, 64*2, out2, 64*2, &roi); @@ -193,8 +191,7 @@ STD_SPEED_TEST( ycbcr_to_rgb_speed, INT16*, INT16*, dst=dst, TRUE, general_yCbCrToRGB_16s16s_P3P3(src1, 64*2, dst, 64*2, &roi64x64), TRUE, sse2_yCbCrToRGB_16s16s_P3P3(src1, 64*2, dst, 64*2, &roi64x64), - PRIM_X86_SSE2_AVAILABLE, - FALSE, dst=dst, 0, + PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE, FALSE, dst=dst); int test_yCbCrToRGB_16s16s_P3P3_speed(void) diff --git a/libfreerdp/primitives/test/test_copy.c b/libfreerdp/primitives/test/test_copy.c index d92af5300..5f2ed9103 100644 --- a/libfreerdp/primitives/test/test_copy.c +++ b/libfreerdp/primitives/test/test_copy.c @@ -16,6 +16,7 @@ #include "config.h" #endif +#include #include "prim_test.h" static const int MEMCPY_PRETEST_ITERATIONS = 1000000; @@ -70,8 +71,7 @@ int test_copy8u_func(void) /* ------------------------------------------------------------------------- */ STD_SPEED_TEST(copy8u_speed_test, BYTE, BYTE, dst=dst, TRUE, memcpy(dst, src1, size), - FALSE, NULL, 0, - FALSE, NULL, 0, + FALSE, NULL, 0, FALSE, TRUE, ippsCopy_8u(src1, dst, size)); int test_copy8u_speed(void) diff --git a/libfreerdp/primitives/test/test_set.c b/libfreerdp/primitives/test/test_set.c index 597f76678..0343674f5 100644 --- a/libfreerdp/primitives/test/test_set.c +++ b/libfreerdp/primitives/test/test_set.c @@ -16,6 +16,7 @@ #include "config.h" #endif +#include #include "prim_test.h" static const int MEMSET8_PRETEST_ITERATIONS = 100000000; @@ -40,12 +41,11 @@ int test_set8u_func(void) int failed = 0; int off; char testStr[256]; - UINT32 pflags = primitives_get_flags(primitives_get()); testStr[0] = '\0'; #ifdef _M_IX86_AMD64 /* Test SSE under various alignments */ - if (pflags & PRIM_X86_SSE2_AVAILABLE) + if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) { strcat(testStr, " SSE2"); for (off=0; off<16; ++off) @@ -101,8 +101,7 @@ int test_set8u_func(void) /* ------------------------------------------------------------------------- */ STD_SPEED_TEST(set8u_speed_test, BYTE, BYTE, dst=dst, TRUE, memset(dst, constant, size), - FALSE, NULL, 0, - FALSE, NULL, 0, + FALSE, NULL, 0, FALSE, TRUE, ippsSet_8u(constant, dst, size)); int test_set8u_speed(void) @@ -116,17 +115,15 @@ int test_set8u_speed(void) /* ------------------------------------------------------------------------- */ int test_set32s_func(void) { - primitives_t* prims = primitives_get(); INT32 ALIGN(dest[512]); int failed = 0; int off; char testStr[256]; - UINT32 pflags = primitives_get_flags(prims); testStr[0] = '\0'; #ifdef _M_IX86_AMD64 /* Test SSE under various alignments */ - if (pflags & PRIM_X86_SSE2_AVAILABLE) + if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) { strcat(testStr, " SSE2"); for (off=0; off<16; ++off) { @@ -179,17 +176,15 @@ int test_set32s_func(void) /* ------------------------------------------------------------------------- */ int test_set32u_func(void) { - primitives_t* prims = primitives_get(); UINT32 ALIGN(dest[512]); int failed = 0; int off; char testStr[256]; - UINT32 pflags = primitives_get_flags(prims); testStr[0] = '\0'; #ifdef _M_IX86_AMD64 /* Test SSE under various alignments */ - if (pflags & PRIM_X86_SSE2_AVAILABLE) + if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) { strcat(testStr, " SSE2"); for (off=0; off<16; ++off) { @@ -251,8 +246,7 @@ static inline void memset32u_naive( /* ------------------------------------------------------------------------- */ STD_SPEED_TEST(set32u_speed_test, UINT32, UINT32, dst=dst, TRUE, memset32u_naive(constant, dst, size), - TRUE, sse2_set_32u(constant, dst, size), PRIM_X86_SSE2_AVAILABLE, - FALSE, dst=dst, 0, + TRUE, sse2_set_32u(constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE, TRUE, ipp_wrapper_set_32u(constant, dst, size)); int test_set32u_speed(void) @@ -280,8 +274,7 @@ static inline void memset32s_naive( /* ------------------------------------------------------------------------- */ STD_SPEED_TEST(set32s_speed_test, INT32, INT32, dst=dst, TRUE, memset32s_naive(constant, dst, size), - TRUE, sse2_set_32s(constant, dst, size), PRIM_X86_SSE2_AVAILABLE, - FALSE, dst=dst, 0, + TRUE, sse2_set_32s(constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE, TRUE, ippsSet_32s(constant, dst, size)); int test_set32s_speed(void) diff --git a/libfreerdp/primitives/test/test_shift.c b/libfreerdp/primitives/test/test_shift.c index 1b8a5c12d..d72407e13 100644 --- a/libfreerdp/primitives/test/test_shift.c +++ b/libfreerdp/primitives/test/test_shift.c @@ -16,6 +16,7 @@ #include "config.h" #endif +#include #include "prim_test.h" #define FUNC_TEST_SIZE 65536 @@ -55,12 +56,11 @@ int _name_(void) \ ALIGN(d1[FUNC_TEST_SIZE+3]), ALIGN(d2[FUNC_TEST_SIZE+3]); \ int failed = 0; \ int i; \ - UINT32 pflags = primitives_get_flags(primitives_get()); \ char testStr[256]; \ testStr[0] = '\0'; \ get_random_data(src, sizeof(src)); \ _f1_(src+1, 3, d1+1, FUNC_TEST_SIZE); \ - if (pflags & PRIM_X86_SSE3_AVAILABLE) \ + if (IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) \ { \ strcat(testStr, " SSE3"); \ /* Aligned */ \ @@ -109,23 +109,19 @@ SHIFT_TEST_FUNC(test_rShift_16u_func, UINT16, "rshift_16u", general_rShiftC_16u, /* ========================================================================= */ STD_SPEED_TEST(speed_lShift_16s, INT16, INT16, dst=dst, TRUE, general_lShiftC_16s(src1, constant, dst, size), - TRUE, sse2_lShiftC_16s(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE, - FALSE, dst=dst, 0, + TRUE, sse2_lShiftC_16s(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE, TRUE, ippsLShiftC_16s(src1, constant, dst, size)); STD_SPEED_TEST(speed_lShift_16u, UINT16, UINT16, dst=dst, TRUE, general_lShiftC_16u(src1, constant, dst, size), - TRUE, sse2_lShiftC_16u(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE, - FALSE, dst=dst, 0, + TRUE, sse2_lShiftC_16u(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE, TRUE, ippsLShiftC_16u(src1, constant, dst, size)); STD_SPEED_TEST(speed_rShift_16s, INT16, INT16, dst=dst, TRUE, general_rShiftC_16s(src1, constant, dst, size), - TRUE, sse2_rShiftC_16s(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE, - FALSE, dst=dst, 0, + TRUE, sse2_rShiftC_16s(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE, TRUE, ippsRShiftC_16s(src1, constant, dst, size)); STD_SPEED_TEST(speed_rShift_16u, UINT16, UINT16, dst=dst, TRUE, general_rShiftC_16u(src1, constant, dst, size), - TRUE, sse2_rShiftC_16u(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE, - FALSE, dst=dst, 0, + TRUE, sse2_rShiftC_16u(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE, TRUE, ippsRShiftC_16u(src1, constant, dst, size)); /* ------------------------------------------------------------------------- */ diff --git a/libfreerdp/primitives/test/test_sign.c b/libfreerdp/primitives/test/test_sign.c index 012303544..99f5a60d4 100644 --- a/libfreerdp/primitives/test/test_sign.c +++ b/libfreerdp/primitives/test/test_sign.c @@ -16,6 +16,7 @@ #include "config.h" #endif +#include #include "prim_test.h" static const int SIGN_PRETEST_ITERATIONS = 100000; @@ -30,7 +31,6 @@ int test_sign16s_func(void) INT16 ALIGN(src[65535]), ALIGN(d1[65535]), ALIGN(d2[65535]); int failed = 0; int i; - UINT32 pflags = primitives_get_flags(primitives_get()); char testStr[256]; /* Test when we can reach 16-byte alignment */ @@ -38,7 +38,7 @@ int test_sign16s_func(void) get_random_data(src, sizeof(src)); general_sign_16s(src+1, d1+1, 65535); #ifdef _M_IX86_AMD64 - if (pflags & PRIM_X86_SSSE3_AVAILABLE) + if (IsProcessorFeaturePresentEx(PF_EX_SSSE3)) { strcat(testStr, " SSSE3"); ssse3_sign_16s(src+1, d2+1, 65535); @@ -58,7 +58,7 @@ int test_sign16s_func(void) get_random_data(src, sizeof(src)); general_sign_16s(src+1, d1+2, 65535); #ifdef _M_IX86_AMD64 - if (pflags & PRIM_X86_SSSE3_AVAILABLE) + if (IsProcessorFeaturePresentEx(PF_EX_SSSE3)) { ssse3_sign_16s(src+1, d2+2, 65535); for (i=2; i<65535; ++i) @@ -79,8 +79,7 @@ int test_sign16s_func(void) /* ------------------------------------------------------------------------- */ STD_SPEED_TEST(sign16s_speed_test, INT16, INT16, dst=dst, TRUE, general_sign_16s(src1, dst, size), - TRUE, ssse3_sign_16s(src1, dst, size), PRIM_X86_SSSE3_AVAILABLE, - FALSE, dst=dst, 0, + TRUE, ssse3_sign_16s(src1, dst, size), PF_EX_SSSE3, TRUE, FALSE, dst=dst); int test_sign16s_speed(void) From 589d32dc560b4fe80a125448b81a69acbb323ad0 Mon Sep 17 00:00:00 2001 From: Bernhard Miklautz Date: Wed, 27 Feb 2013 16:04:45 +0100 Subject: [PATCH 04/13] codec/rfx,primitives: removed cpu-detect.[ch] from build sysinfo cpu detection is used on android as well. --- libfreerdp/codec/CMakeLists.txt | 7 ------- libfreerdp/codec/rfx_neon.c | 5 +---- libfreerdp/primitives/CMakeLists.txt | 9 --------- 3 files changed, 1 insertion(+), 20 deletions(-) diff --git a/libfreerdp/codec/CMakeLists.txt b/libfreerdp/codec/CMakeLists.txt index f69801897..4bf327a64 100644 --- a/libfreerdp/codec/CMakeLists.txt +++ b/libfreerdp/codec/CMakeLists.txt @@ -69,13 +69,6 @@ if(WITH_SSE2) endif() if(WITH_NEON) - if(ANDROID) - set(ANDROID_CPU_FEATURES_PATH "${ANDROID_NDK}/sources/android/cpufeatures") - include_directories(${ANDROID_CPU_FEATURES_PATH}) - set(${MODULE_PREFIX}_SRCS ${${MODULE_PREFIX}_SRCS} - ${ANDROID_CPU_FEATURES_PATH}/cpu-features.c - ${ANDROID_CPU_FEATURES_PATH}/cpu-features.h) - endif() set(${MODULE_PREFIX}_SRCS ${${MODULE_PREFIX}_SRCS} ${${MODULE_PREFIX}_NEON_SRCS}) set_source_files_properties(${${MODULE_PREFIX}_NEON_SRCS} PROPERTIES COMPILE_FLAGS "-mfpu=neon -mfloat-abi=${ARM_FP_ABI} -Wno-unused-variable") endif() diff --git a/libfreerdp/codec/rfx_neon.c b/libfreerdp/codec/rfx_neon.c index fec028ba9..c78dc6c14 100644 --- a/libfreerdp/codec/rfx_neon.c +++ b/libfreerdp/codec/rfx_neon.c @@ -27,14 +27,11 @@ #include #include #include +#include #include "rfx_types.h" #include "rfx_neon.h" -#if ANDROID -#include "cpu-features.h" -#endif - /* rfx_decode_YCbCr_to_RGB_NEON code now resides in the primitives library. */ static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/libfreerdp/primitives/CMakeLists.txt b/libfreerdp/primitives/CMakeLists.txt index e8d1a7bfb..780ae94e1 100644 --- a/libfreerdp/primitives/CMakeLists.txt +++ b/libfreerdp/primitives/CMakeLists.txt @@ -63,15 +63,6 @@ elseif(WITH_NEON) # TODO: Add MSVC equivalent endif() -# required for android cpu detection -if(ANDROID) - set(ANDROID_CPU_FEATURES_PATH "${ANDROID_NDK}/sources/android/cpufeatures") - include_directories(${ANDROID_CPU_FEATURES_PATH}) - set(${MODULE_PREFIX}_SRCS ${${MODULE_PREFIX}_SRCS} - ${ANDROID_CPU_FEATURES_PATH}/cpu-features.c - ${ANDROID_CPU_FEATURES_PATH}/cpu-features.h) -endif() - set_property(SOURCE ${${MODULE_PREFIX}_OPT_SRCS} PROPERTY COMPILE_FLAGS ${OPTIMIZATION}) set(${MODULE_PREFIX}_SRCS ${${MODULE_PREFIX}_SRCS} ${${MODULE_PREFIX}_OPT_SRCS}) From 081be8b01fec33d8b6ada4f346e45b3a77aad145 Mon Sep 17 00:00:00 2001 From: Bernhard Miklautz Date: Thu, 28 Feb 2013 10:49:39 +0100 Subject: [PATCH 05/13] primitives: updated tests, cleanup and build fixes --- libfreerdp/primitives/test/CMakeLists.txt | 19 +++++++++++++- libfreerdp/primitives/test/prim_test.c | 2 +- libfreerdp/primitives/test/prim_test.h | 5 ++-- libfreerdp/primitives/test/test_add.c | 10 ++++++-- libfreerdp/primitives/test/test_alphaComp.c | 9 +++++-- libfreerdp/primitives/test/test_andor.c | 12 +++++++-- libfreerdp/primitives/test/test_colors.c | 22 +++++++++++++--- libfreerdp/primitives/test/test_copy.c | 2 +- libfreerdp/primitives/test/test_set.c | 28 +++++++++++++++------ libfreerdp/primitives/test/test_shift.c | 18 ++++++++++++- libfreerdp/primitives/test/test_sign.c | 17 ++++++++++--- 11 files changed, 118 insertions(+), 26 deletions(-) diff --git a/libfreerdp/primitives/test/CMakeLists.txt b/libfreerdp/primitives/test/CMakeLists.txt index 738329152..6d6898f17 100644 --- a/libfreerdp/primitives/test/CMakeLists.txt +++ b/libfreerdp/primitives/test/CMakeLists.txt @@ -31,11 +31,28 @@ set(PRIMITIVE_TEST_CFILES test_set.c test_shift.c test_sign.c + ../prim_add.c + ../prim_andor.c + ../prim_alphaComp.c + ../prim_colors.c + ../prim_copy.c + ../prim_set.c + ../prim_shift.c + ../prim_sign.c + ../prim_add_opt.c + ../prim_alphaComp_opt.c + ../prim_andor_opt.c + ../prim_colors_opt.c + ../prim_set_opt.c + ../prim_shift_opt.c + ../prim_sign_opt.c + ../primitives.c ) set(PRIMITIVE_TEST_HEADERS measure.h prim_test.h + ../prim_internal.h ) set(PRIMITIVE_TEST_SRCS @@ -121,7 +138,7 @@ endif() set_property(SOURCE ${PRIMITIVE_TEST_CFILES} PROPERTY COMPILE_FLAGS ${OPTFLAGS}) -target_link_libraries(prim_test freerdp-primitives rt winpr-sysinfo) +target_link_libraries(prim_test rt winpr-sysinfo) if(NOT TESTING_OUTPUT_DIRECTORY) set(TESTING_OUTPUT_DIRECTORY .) endif() diff --git a/libfreerdp/primitives/test/prim_test.c b/libfreerdp/primitives/test/prim_test.c index 7e88efbc9..5104eebc7 100644 --- a/libfreerdp/primitives/test/prim_test.c +++ b/libfreerdp/primitives/test/prim_test.c @@ -44,8 +44,8 @@ typedef struct } flagpair_t; static const flagpair_t flags[] = -#ifdef _M_IX86_AMD64 { +#ifdef _M_IX86_AMD64 { PF_MMX_INSTRUCTIONS_AVAILABLE, "MMX" }, { PF_3DNOW_INSTRUCTIONS_AVAILABLE, "3DNow" }, { PF_XMMI_INSTRUCTIONS_AVAILABLE, "SSE" }, diff --git a/libfreerdp/primitives/test/prim_test.h b/libfreerdp/primitives/test/prim_test.h index fa61025f0..f32f46889 100644 --- a/libfreerdp/primitives/test/prim_test.h +++ b/libfreerdp/primitives/test/prim_test.h @@ -101,7 +101,7 @@ extern int test_or_32u_speed(void); /* Since so much of this code is repeated, define a macro to build * functions to do speed tests. */ -#ifdef armel +#ifdef _M_ARM #define SIMD_TYPE "Neon" #else #define SIMD_TYPE "SSE" @@ -122,7 +122,7 @@ extern int test_or_32u_speed(void); } \ } while (0) -#if (defined(_M_IX86_AMD64) && defined(WITH_SSE2)) || (defined(arm) && defined(WITH_NEON)) +#if (defined(_M_IX86_AMD64) && defined(WITH_SSE2)) || (defined(_M_ARM) && defined(WITH_NEON)) #define DO_OPT_MEASUREMENTS(_funcOpt_, _prework_) \ do { \ for (s=0; s Date: Thu, 28 Feb 2013 11:13:43 +0100 Subject: [PATCH 06/13] IPP: disable on arm and apple --- CMakeLists.txt | 9 +++++++-- cmake/ConfigOptions.cmake | 6 ++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0f9d6697c..ecdba4d37 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -303,6 +303,7 @@ if(ANDROID) set(GSTREAMER_FEATURE_TYPE "DISABLED") endif() + find_feature(X11 ${X11_FEATURE_TYPE} ${X11_FEATURE_PURPOSE} ${X11_FEATURE_DESCRIPTION}) find_feature(DirectFB ${DIRECTFB_FEATURE_TYPE} ${DIRECTFB_FEATURE_PURPOSE} ${DIRECTFB_FEATURE_DESCRIPTION}) @@ -318,8 +319,12 @@ find_feature(PCSC ${PCSC_FEATURE_TYPE} ${PCSC_FEATURE_PURPOSE} ${PCSC_FEATURE_DE find_feature(FFmpeg ${FFMPEG_FEATURE_TYPE} ${FFMPEG_FEATURE_PURPOSE} ${FFMPEG_FEATURE_DESCRIPTION}) find_feature(Gstreamer ${GSTREAMER_FEATURE_TYPE} ${GSTREAMER_FEATURE_PURPOSE} ${GSTREAMER_FEATURE_DESCRIPTION}) -# Intel Performance Primitives -find_feature(IPP ${IPP_FEATURE_TYPE} ${IPP_FEATURE_PURPOSE} ${IPP_FEATURE_DESCRIPTION}) +if(TARGET_ARCH MATCHES "x86|x64") + if (NOT APPLE) + # Intel Performance Primitives + find_feature(IPP ${IPP_FEATURE_TYPE} ${IPP_FEATURE_PURPOSE} ${IPP_FEATURE_DESCRIPTION}) + endif() +endif() # Installation Paths if(WIN32) diff --git a/cmake/ConfigOptions.cmake b/cmake/ConfigOptions.cmake index c922d2f78..0408e79f2 100644 --- a/cmake/ConfigOptions.cmake +++ b/cmake/ConfigOptions.cmake @@ -9,7 +9,6 @@ endif() option(WITH_MANPAGES "Generate manpages." ON) option(WITH_PROFILER "Compile profiler." OFF) -option(WITH_IPP "Use Intel Performance Primitives." OFF) if((TARGET_ARCH MATCHES "x86|x64") AND (NOT DEFINED WITH_SSE2)) option(WITH_SSE2 "Enable SSE2 optimization." ON) @@ -29,8 +28,11 @@ if(TARGET_ARCH MATCHES "ARM") set(ARM_FP_ABI ${ARM_FP_API} CACHE STRING "Floating point ABI to use on arm") endif() mark_as_advanced(ARM_FP_ABI) +else() + if(NOT APPLE) + option(WITH_IPP "Use Intel Performance Primitives." OFF) + endif() endif() - option(WITH_JPEG "Use JPEG decoding." OFF) if(APPLE) From 08772f640273a2c8b205d029e918a96c8deb8ab3 Mon Sep 17 00:00:00 2001 From: Bernhard Miklautz Date: Thu, 28 Feb 2013 12:18:21 +0100 Subject: [PATCH 07/13] primitives: removed build warnings on arm --- libfreerdp/primitives/test/test_alphaComp.c | 3 +-- libfreerdp/primitives/test/test_set.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/libfreerdp/primitives/test/test_alphaComp.c b/libfreerdp/primitives/test/test_alphaComp.c index 7d721cd39..66d765ec6 100644 --- a/libfreerdp/primitives/test/test_alphaComp.c +++ b/libfreerdp/primitives/test/test_alphaComp.c @@ -203,7 +203,7 @@ int test_alphaComp_func(void) /* ------------------------------------------------------------------------- */ -STD_SPEED_TEST(alphaComp_speed, BYTE, BYTE, int bytes = size*4, +STD_SPEED_TEST(alphaComp_speed, BYTE, BYTE, int bytes __attribute__((unused)) = size*4, TRUE, general_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes, size, size), #ifdef WITH_SSE2 @@ -212,7 +212,6 @@ STD_SPEED_TEST(alphaComp_speed, BYTE, BYTE, int bytes = size*4, #else FALSE, PRIM_NOP, 0, FALSE, #endif - TRUE, ipp_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes, size, size)); diff --git a/libfreerdp/primitives/test/test_set.c b/libfreerdp/primitives/test/test_set.c index 33a41e216..3edf746d7 100644 --- a/libfreerdp/primitives/test/test_set.c +++ b/libfreerdp/primitives/test/test_set.c @@ -287,7 +287,7 @@ STD_SPEED_TEST(set32s_speed_test, INT32, INT32, dst=dst, #ifdef WITH_SSE2 TRUE, sse2_set_32s(constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE, #else - FALSE, NULL, 0, FALSE, + FALSE, PRIM_NOP, 0, FALSE, #endif TRUE, ippsSet_32s(constant, dst, size)); From 0ce13c38376580943d5a37758e926597832d44e2 Mon Sep 17 00:00:00 2001 From: Bernhard Miklautz Date: Thu, 28 Feb 2013 13:45:32 +0100 Subject: [PATCH 08/13] sysinfo: neon is supported on all current ios devices --- winpr/libwinpr/sysinfo/sysinfo.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/winpr/libwinpr/sysinfo/sysinfo.c b/winpr/libwinpr/sysinfo/sysinfo.c index 0dddf6aca..1a78aa6b9 100644 --- a/winpr/libwinpr/sysinfo/sysinfo.c +++ b/winpr/libwinpr/sysinfo/sysinfo.c @@ -508,7 +508,15 @@ BOOL IsProcessorFeaturePresent(DWORD ProcessorFeature) default: break; } -#endif +#elif defined(__APPLE__) // __linux__ + switch (ProcessorFeature) + { + case PF_ARM_NEON_INSTRUCTIONS_AVAILABLE: + case PF_ARM_NEON: + ret = TRUE; + break; + } +#endif // __linux__ #elif defined(_M_IX86_AMD64) #ifdef __GNUC__ unsigned a, b, c, d; From fe91121706ab7849da6f06696afd19657872139d Mon Sep 17 00:00:00 2001 From: Bernhard Miklautz Date: Thu, 28 Feb 2013 14:36:55 +0100 Subject: [PATCH 09/13] primitves/test: use Ex function for extended flags --- libfreerdp/primitives/test/prim_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libfreerdp/primitives/test/prim_test.c b/libfreerdp/primitives/test/prim_test.c index 5104eebc7..dfd579ba1 100644 --- a/libfreerdp/primitives/test/prim_test.c +++ b/libfreerdp/primitives/test/prim_test.c @@ -100,7 +100,7 @@ void primitives_flags_str(char* str, size_t len) } for (i = 0; i < sizeof(flags_extended) / sizeof(flagpair_t); ++i) { - if (IsProcessorFeaturePresent(flags_extended[i].flag)) + if (IsProcessorFeaturePresentEx(flags_extended[i].flag)) { int slen = strlen(flags_extended[i].str) + 1; From b8a545d9c95ccf79b2548b49f6f827bd66cf21ee Mon Sep 17 00:00:00 2001 From: Bernhard Miklautz Date: Fri, 1 Mar 2013 08:45:44 +0100 Subject: [PATCH 10/13] winpr/sysinfo: restructuring and "aliases" added Added meaningful aliases for the not so clear defines. --- winpr/include/winpr/sysinfo.h | 159 ++++++++++++++++--------------- winpr/libwinpr/sysinfo/sysinfo.c | 2 +- 2 files changed, 83 insertions(+), 78 deletions(-) diff --git a/winpr/include/winpr/sysinfo.h b/winpr/include/winpr/sysinfo.h index 001232ab0..6d681cdaa 100644 --- a/winpr/include/winpr/sysinfo.h +++ b/winpr/include/winpr/sysinfo.h @@ -66,82 +66,6 @@ #define PROCESSOR_ARM_7TDMI 70001 #define PROCESSOR_OPTIL 0x494F -#define PF_FLOATING_POINT_PRECISION_ERRATA 0 -#define PF_FLOATING_POINT_EMULATED 1 -#define PF_COMPARE_EXCHANGE_DOUBLE 2 -#define PF_MMX_INSTRUCTIONS_AVAILABLE 3 -#define PF_PPC_MOVEMEM_64BIT_OK 4 -#define PF_XMMI_INSTRUCTIONS_AVAILABLE 6 //sse -#define PF_3DNOW_INSTRUCTIONS_AVAILABLE 7 -#define PF_RDTSC_INSTRUCTION_AVAILABLE 8 -#define PF_PAE_ENABLED 9 -#define PF_XMMI64_INSTRUCTIONS_AVAILABLE 10 //sse2 -#define PF_SSE_DAZ_MODE_AVAILABLE 11 -#define PF_NX_ENABLED 12 -#define PF_SSE3_INSTRUCTIONS_AVAILABLE 13 -#define PF_COMPARE_EXCHANGE128 14 -#define PF_COMPARE64_EXCHANGE128 15 -#define PF_CHANNELS_ENABLED 16 -#define PF_XSAVE_ENABLED 17 -#define PF_ARM_VFP_32_REGISTERS_AVAILABLE 18 -#define PF_ARM_NEON_INSTRUCTIONS_AVAILABLE 19 -#define PF_SECOND_LEVEL_ADDRESS_TRANSLATION 20 -#define PF_VIRT_FIRMWARE_ENABLED 21 -#define PF_RDWRFSGSBASE_AVAILABLE 22 -#define PF_FASTFAIL_AVAILABLE 23 -#define PF_ARM_DIVIDE_INSTRUCTION_AVAILABLE 24 -#define PF_ARM_64BIT_LOADSTORE_ATOMIC 25 -#define PF_ARM_EXTERNAL_CACHE_AVAILABLE 26 -#define PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE 27 - -#define PF_ARM_V4 0x80000001 -#define PF_ARM_V5 0x80000002 -#define PF_ARM_V6 0x80000003 -#define PF_ARM_V7 0x80000004 -#define PF_ARM_THUMB 0x80000005 -#define PF_ARM_JAZELLE 0x80000006 -#define PF_ARM_DSP 0x80000007 -#define PF_ARM_MOVE_CP 0x80000008 -#define PF_ARM_VFP10 0x80000009 -#define PF_ARM_MPU 0x8000000A -#define PF_ARM_WRITE_BUFFER 0x8000000B -#define PF_ARM_MBX 0x8000000C -#define PF_ARM_L2CACHE 0x8000000D -#define PF_ARM_PHYSICALLY_TAGGED_CACHE 0x8000000E -#define PF_ARM_VFP_SINGLE_PRECISION 0x8000000F -#define PF_ARM_VFP_DOUBLE_PRECISION 0x80000010 -#define PF_ARM_ITCM 0x80000011 -#define PF_ARM_DTCM 0x80000012 -#define PF_ARM_UNIFIED_CACHE 0x80000013 -#define PF_ARM_WRITE_BACK_CACHE 0x80000014 -#define PF_ARM_CACHE_CAN_BE_LOCKED_DOWN 0x80000015 -#define PF_ARM_L2CACHE_MEMORY_MAPPED 0x80000016 -#define PF_ARM_L2CACHE_COPROC 0x80000017 -#define PF_ARM_THUMB2 0x80000018 -#define PF_ARM_T2EE 0x80000019 -#define PF_ARM_VFP3 0x8000001A -#define PF_ARM_NEON 0x8000001B -#define PF_ARM_UNALIGNED_ACCESS 0x8000001C - -#define PF_ARM_INTEL_XSCALE 0x80010001 -#define PF_ARM_INTEL_PMU 0x80010002 -#define PF_ARM_INTEL_WMMX 0x80010003 - -// extended flags -#define PF_EX_3DNOW_PREFETCH 1 -#define PF_EX_SSSE3 2 -#define PF_EX_SSE41 3 -#define PF_EX_SSE42 4 -#define PF_EX_AVX 5 -#define PF_EX_FMA 6 -#define PF_EX_AVX_AES 7 -#define PF_EX_AVX2 8 -#define PF_EX_ARM_VFP1 9 -#define PF_EX_ARM_VFP3D16 10 -#define PF_EX_ARM_VFP4 11 -#define PF_EX_ARM_IDIVA 12 -#define PF_EX_ARM_IDIVT 13 - typedef struct _SYSTEM_INFO { union @@ -291,9 +215,90 @@ WINPR_API VOID GetSystemTimeAsFileTime(LPFILETIME lpSystemTimeAsFileTime); WINPR_API DWORD GetTickCount(void); WINPR_API BOOL IsProcessorFeaturePresent(DWORD ProcessorFeature); + +#define PF_FLOATING_POINT_PRECISION_ERRATA 0 +#define PF_FLOATING_POINT_EMULATED 1 +#define PF_COMPARE_EXCHANGE_DOUBLE 2 +#define PF_MMX_INSTRUCTIONS_AVAILABLE 3 +#define PF_PPC_MOVEMEM_64BIT_OK 4 +#define PF_XMMI_INSTRUCTIONS_AVAILABLE 6 //sse +#define PF_3DNOW_INSTRUCTIONS_AVAILABLE 7 +#define PF_RDTSC_INSTRUCTION_AVAILABLE 8 +#define PF_PAE_ENABLED 9 +#define PF_XMMI64_INSTRUCTIONS_AVAILABLE 10 //sse2 +#define PF_SSE_DAZ_MODE_AVAILABLE 11 +#define PF_NX_ENABLED 12 +#define PF_SSE3_INSTRUCTIONS_AVAILABLE 13 +#define PF_COMPARE_EXCHANGE128 14 +#define PF_COMPARE64_EXCHANGE128 15 +#define PF_CHANNELS_ENABLED 16 +#define PF_XSAVE_ENABLED 17 +#define PF_ARM_VFP_32_REGISTERS_AVAILABLE 18 +#define PF_ARM_NEON_INSTRUCTIONS_AVAILABLE 19 +#define PF_SECOND_LEVEL_ADDRESS_TRANSLATION 20 +#define PF_VIRT_FIRMWARE_ENABLED 21 +#define PF_RDWRFSGSBASE_AVAILABLE 22 +#define PF_FASTFAIL_AVAILABLE 23 +#define PF_ARM_DIVIDE_INSTRUCTION_AVAILABLE 24 +#define PF_ARM_64BIT_LOADSTORE_ATOMIC 25 +#define PF_ARM_EXTERNAL_CACHE_AVAILABLE 26 +#define PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE 27 + +#define PF_ARM_V4 0x80000001 +#define PF_ARM_V5 0x80000002 +#define PF_ARM_V6 0x80000003 +#define PF_ARM_V7 0x80000004 +#define PF_ARM_THUMB 0x80000005 +#define PF_ARM_JAZELLE 0x80000006 +#define PF_ARM_DSP 0x80000007 +#define PF_ARM_MOVE_CP 0x80000008 +#define PF_ARM_VFP10 0x80000009 +#define PF_ARM_MPU 0x8000000A +#define PF_ARM_WRITE_BUFFER 0x8000000B +#define PF_ARM_MBX 0x8000000C +#define PF_ARM_L2CACHE 0x8000000D +#define PF_ARM_PHYSICALLY_TAGGED_CACHE 0x8000000E +#define PF_ARM_VFP_SINGLE_PRECISION 0x8000000F +#define PF_ARM_VFP_DOUBLE_PRECISION 0x80000010 +#define PF_ARM_ITCM 0x80000011 +#define PF_ARM_DTCM 0x80000012 +#define PF_ARM_UNIFIED_CACHE 0x80000013 +#define PF_ARM_WRITE_BACK_CACHE 0x80000014 +#define PF_ARM_CACHE_CAN_BE_LOCKED_DOWN 0x80000015 +#define PF_ARM_L2CACHE_MEMORY_MAPPED 0x80000016 +#define PF_ARM_L2CACHE_COPROC 0x80000017 +#define PF_ARM_THUMB2 0x80000018 +#define PF_ARM_T2EE 0x80000019 +#define PF_ARM_VFP3 0x8000001A +#define PF_ARM_NEON 0x8000001B +#define PF_ARM_UNALIGNED_ACCESS 0x8000001C + +#define PF_ARM_INTEL_XSCALE 0x80010001 +#define PF_ARM_INTEL_PMU 0x80010002 +#define PF_ARM_INTEL_WMMX 0x80010003 + #endif WINPR_API BOOL IsProcessorFeaturePresentEx(DWORD ProcessorFeature); -#endif /* WINPR_SYSINFO_H */ +// extended flags +#define PF_EX_3DNOW_PREFETCH 1 +#define PF_EX_SSSE3 2 +#define PF_EX_SSE41 3 +#define PF_EX_SSE42 4 +#define PF_EX_AVX 5 +#define PF_EX_FMA 6 +#define PF_EX_AVX_AES 7 +#define PF_EX_AVX2 8 +#define PF_EX_ARM_VFP1 9 +#define PF_EX_ARM_VFP3D16 10 +#define PF_EX_ARM_VFP4 11 +#define PF_EX_ARM_IDIVA 12 +#define PF_EX_ARM_IDIVT 13 +// some "aliases" for the standard defines +// to be more clear +#define PF_SSE_INSTRUCTIONS_AVAILABLE PF_XMMI_INSTRUCTIONS_AVAILABLE +#define PF_SSE2_INSTRUCTIONS_AVAILABLE PF_XMMI64_INSTRUCTIONS_AVAILABLE + +#endif /* WINPR_SYSINFO_H */ diff --git a/winpr/libwinpr/sysinfo/sysinfo.c b/winpr/libwinpr/sysinfo/sysinfo.c index 1a78aa6b9..6e561ddff 100644 --- a/winpr/libwinpr/sysinfo/sysinfo.c +++ b/winpr/libwinpr/sysinfo/sysinfo.c @@ -553,7 +553,7 @@ BOOL IsProcessorFeaturePresent(DWORD ProcessorFeature) #endif return ret; } -#endif _WIN32 +#endif //_WIN32 BOOL IsProcessorFeaturePresentEx(DWORD ProcessorFeature) { From eb194014d3d611d9d7f3b708ac71b5f568182e5f Mon Sep 17 00:00:00 2001 From: Bernhard Miklautz Date: Fri, 1 Mar 2013 08:52:34 +0100 Subject: [PATCH 11/13] primitives: use alias define for SSE2 --- libfreerdp/primitives/prim_add_opt.c | 2 +- libfreerdp/primitives/prim_alphaComp_opt.c | 2 +- libfreerdp/primitives/prim_andor_opt.c | 2 +- libfreerdp/primitives/prim_colors_opt.c | 2 +- libfreerdp/primitives/prim_set_opt.c | 2 +- libfreerdp/primitives/prim_shift_opt.c | 2 +- libfreerdp/primitives/prim_sign_opt.c | 2 +- libfreerdp/primitives/test/prim_test.c | 4 ++-- libfreerdp/primitives/test/test_alphaComp.c | 6 +++--- libfreerdp/primitives/test/test_colors.c | 8 ++++---- libfreerdp/primitives/test/test_set.c | 10 +++++----- libfreerdp/primitives/test/test_shift.c | 8 ++++---- 12 files changed, 25 insertions(+), 25 deletions(-) diff --git a/libfreerdp/primitives/prim_add_opt.c b/libfreerdp/primitives/prim_add_opt.c index 88a4fbc93..0741cdc0c 100644 --- a/libfreerdp/primitives/prim_add_opt.c +++ b/libfreerdp/primitives/prim_add_opt.c @@ -51,7 +51,7 @@ void primitives_init_add_opt( #ifdef WITH_IPP prims->add_16s = (__add_16s_t) ippsAdd_16s; #elif defined(WITH_SSE2) - if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE) + if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */ { prims->add_16s = sse3_add_16s; diff --git a/libfreerdp/primitives/prim_alphaComp_opt.c b/libfreerdp/primitives/prim_alphaComp_opt.c index 52e33fbc9..57901bb08 100644 --- a/libfreerdp/primitives/prim_alphaComp_opt.c +++ b/libfreerdp/primitives/prim_alphaComp_opt.c @@ -216,7 +216,7 @@ void primitives_init_alphaComp_opt(primitives_t* prims) #ifdef WITH_IPP prims->alphaComp_argb = ipp_alphaComp_argb; #elif defined(WITH_SSE2) - if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE) + if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */ { prims->alphaComp_argb = sse2_alphaComp_argb; diff --git a/libfreerdp/primitives/prim_andor_opt.c b/libfreerdp/primitives/prim_andor_opt.c index 8d74f30fa..a98d85f0a 100644 --- a/libfreerdp/primitives/prim_andor_opt.c +++ b/libfreerdp/primitives/prim_andor_opt.c @@ -52,7 +52,7 @@ void primitives_init_andor_opt(primitives_t *prims) prims->andC_32u = (__andC_32u_t) ippsAndC_32u; prims->orC_32u = (__orC_32u_t) ippsOrC_32u; #elif defined(WITH_SSE2) - if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE) + if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) { prims->andC_32u = sse3_andC_32u; diff --git a/libfreerdp/primitives/prim_colors_opt.c b/libfreerdp/primitives/prim_colors_opt.c index 3dcd8895b..10d7c03be 100644 --- a/libfreerdp/primitives/prim_colors_opt.c +++ b/libfreerdp/primitives/prim_colors_opt.c @@ -546,7 +546,7 @@ pstatus_t neon_yCbCrToRGB_16s16s_P3P3( void primitives_init_colors_opt(primitives_t* prims) { #if defined(WITH_SSE2) - if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) + if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE)) { prims->RGBToRGB_16s8u_P3AC4R = sse2_RGBToRGB_16s8u_P3AC4R; prims->yCbCrToRGB_16s16s_P3P3 = sse2_yCbCrToRGB_16s16s_P3P3; diff --git a/libfreerdp/primitives/prim_set_opt.c b/libfreerdp/primitives/prim_set_opt.c index 08b0f7e5f..41c6f6f18 100644 --- a/libfreerdp/primitives/prim_set_opt.c +++ b/libfreerdp/primitives/prim_set_opt.c @@ -208,7 +208,7 @@ void primitives_init_set_opt(primitives_t *prims) prims->set_32u = (__set_32u_t) ipp_wrapper_set_32u; prims->zero = (__zero_t) ippsZero_8u; #elif defined(WITH_SSE2) - if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) + if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE)) { prims->set_8u = sse2_set_8u; prims->set_32s = sse2_set_32s; diff --git a/libfreerdp/primitives/prim_shift_opt.c b/libfreerdp/primitives/prim_shift_opt.c index 9cdb33db7..3f4d8acb6 100644 --- a/libfreerdp/primitives/prim_shift_opt.c +++ b/libfreerdp/primitives/prim_shift_opt.c @@ -67,7 +67,7 @@ void primitives_init_shift_opt(primitives_t *prims) prims->lShiftC_16u = (__lShiftC_16u_t) ippsLShiftC_16u; prims->rShiftC_16u = (__rShiftC_16u_t) ippsRShiftC_16u; #elif defined(WITH_SSE2) - if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE) + if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) { prims->lShiftC_16s = sse2_lShiftC_16s; diff --git a/libfreerdp/primitives/prim_sign_opt.c b/libfreerdp/primitives/prim_sign_opt.c index 643a75b3b..0d4464751 100644 --- a/libfreerdp/primitives/prim_sign_opt.c +++ b/libfreerdp/primitives/prim_sign_opt.c @@ -140,7 +140,7 @@ void primitives_init_sign_opt(primitives_t *prims) /* Pick tuned versions if possible. */ /* I didn't spot an IPP version of this. */ #if defined(WITH_SSE2) - if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE) + if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) { prims->sign_16s = ssse3_sign_16s; diff --git a/libfreerdp/primitives/test/prim_test.c b/libfreerdp/primitives/test/prim_test.c index dfd579ba1..52d520970 100644 --- a/libfreerdp/primitives/test/prim_test.c +++ b/libfreerdp/primitives/test/prim_test.c @@ -48,8 +48,8 @@ static const flagpair_t flags[] = #ifdef _M_IX86_AMD64 { PF_MMX_INSTRUCTIONS_AVAILABLE, "MMX" }, { PF_3DNOW_INSTRUCTIONS_AVAILABLE, "3DNow" }, - { PF_XMMI_INSTRUCTIONS_AVAILABLE, "SSE" }, - { PF_XMMI64_INSTRUCTIONS_AVAILABLE, "SSE2" }, + { PF_SSE_INSTRUCTIONS_AVAILABLE, "SSE" }, + { PF_SSE2_INSTRUCTIONS_AVAILABLE, "SSE2" }, { PF_SSE3_INSTRUCTIONS_AVAILABLE, "SSE3" }, #elif defined(_M_ARM) { PF_ARM_VFP3, "VFP3" }, diff --git a/libfreerdp/primitives/test/test_alphaComp.c b/libfreerdp/primitives/test/test_alphaComp.c index 66d765ec6..c3b5e9ca1 100644 --- a/libfreerdp/primitives/test/test_alphaComp.c +++ b/libfreerdp/primitives/test/test_alphaComp.c @@ -133,7 +133,7 @@ int test_alphaComp_func(void) (const BYTE *) src2, 4*SRC2_WIDTH, (BYTE *) dst1, 4*DST_WIDTH, TEST_WIDTH, TEST_HEIGHT); #ifdef WITH_SSE2 - if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) + if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE)) { strcat(testStr, " SSE2"); sse2_alphaComp_argb((const BYTE *) src1, 4*SRC1_WIDTH, @@ -166,7 +166,7 @@ int test_alphaComp_func(void) error = 1; } #ifdef WITH_SSE2 - if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) + if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE)) { UINT32 c2 = *PIXEL(dst2a, 4*DST_WIDTH, x, y); if (colordist(c0, c2) > TOLERANCE) @@ -208,7 +208,7 @@ STD_SPEED_TEST(alphaComp_speed, BYTE, BYTE, int bytes __attribute__((unused)) = size, size), #ifdef WITH_SSE2 TRUE, sse2_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes, - size, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE, + size, size), PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE, #else FALSE, PRIM_NOP, 0, FALSE, #endif diff --git a/libfreerdp/primitives/test/test_colors.c b/libfreerdp/primitives/test/test_colors.c index 57e568b9c..628cb5d5e 100644 --- a/libfreerdp/primitives/test/test_colors.c +++ b/libfreerdp/primitives/test/test_colors.c @@ -67,7 +67,7 @@ int test_RGBToRGB_16s8u_P3AC4R_func(void) general_RGBToRGB_16s8u_P3AC4R((const INT16 **) ptrs, 64*2, (BYTE *) out1, 64*4, &roi); #ifdef WITH_SSE2 - if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) + if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE)) { strcat(testStr, " SSE2"); sse2_RGBToRGB_16s8u_P3AC4R((const INT16 **) ptrs, 64*2, @@ -96,7 +96,7 @@ STD_SPEED_TEST( #ifdef WITH_SSE2 TRUE, sse2_RGBToRGB_16s8u_P3AC4R( (const INT16 **) src1, 64*2, (BYTE *) dst, 64*4, &roi64x64), - PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE, + PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE, #else FALSE, PRIM_NOP, 0, FALSE, #endif @@ -175,7 +175,7 @@ int test_yCbCrToRGB_16s16s_P3P3_func(void) general_yCbCrToRGB_16s16s_P3P3(in, 64*2, out1, 64*2, &roi); #ifdef WITH_SSE2 - if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) + if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE)) { strcat(testStr, " SSE2"); sse2_yCbCrToRGB_16s16s_P3P3(in, 64*2, out2, 64*2, &roi); @@ -201,7 +201,7 @@ STD_SPEED_TEST( TRUE, general_yCbCrToRGB_16s16s_P3P3(src1, 64*2, dst, 64*2, &roi64x64), #ifdef WITH_SSE2 TRUE, sse2_yCbCrToRGB_16s16s_P3P3(src1, 64*2, dst, 64*2, &roi64x64), - PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE, + PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE, #elif defined(WITH_NEON) TRUE, neon_yCbCrToRGB_16s16s_P3P3(src1, 64*2, dst, 64*2, &roi64x64), PF_ARM_NEON_INSTRUCTIONS_AVAILABLE, FALSE, diff --git a/libfreerdp/primitives/test/test_set.c b/libfreerdp/primitives/test/test_set.c index 3edf746d7..32d22c71d 100644 --- a/libfreerdp/primitives/test/test_set.c +++ b/libfreerdp/primitives/test/test_set.c @@ -47,7 +47,7 @@ int test_set8u_func(void) #ifdef WITH_SSE2 /* Test SSE under various alignments */ - if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) + if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE)) { strcat(testStr, " SSE2"); for (off=0; off<16; ++off) @@ -127,7 +127,7 @@ int test_set32s_func(void) #ifdef WITH_SSE2 /* Test SSE under various alignments */ - if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) + if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE)) { strcat(testStr, " SSE2"); for (off=0; off<16; ++off) { @@ -190,7 +190,7 @@ int test_set32u_func(void) #ifdef WITH_SSE2 /* Test SSE under various alignments */ - if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) + if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE)) { strcat(testStr, " SSE2"); for (off=0; off<16; ++off) { @@ -253,7 +253,7 @@ static inline void memset32u_naive( STD_SPEED_TEST(set32u_speed_test, UINT32, UINT32, dst=dst, TRUE, memset32u_naive(constant, dst, size), #ifdef WITH_SSE2 - TRUE, sse2_set_32u(constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE, + TRUE, sse2_set_32u(constant, dst, size), PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE, #else FALSE, PRIM_NOP, 0, FALSE, #endif @@ -285,7 +285,7 @@ static inline void memset32s_naive( STD_SPEED_TEST(set32s_speed_test, INT32, INT32, dst=dst, TRUE, memset32s_naive(constant, dst, size), #ifdef WITH_SSE2 - TRUE, sse2_set_32s(constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE, + TRUE, sse2_set_32s(constant, dst, size), PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE, #else FALSE, PRIM_NOP, 0, FALSE, #endif diff --git a/libfreerdp/primitives/test/test_shift.c b/libfreerdp/primitives/test/test_shift.c index 2150373d5..588187a4b 100644 --- a/libfreerdp/primitives/test/test_shift.c +++ b/libfreerdp/primitives/test/test_shift.c @@ -110,7 +110,7 @@ SHIFT_TEST_FUNC(test_rShift_16u_func, UINT16, "rshift_16u", general_rShiftC_16u, STD_SPEED_TEST(speed_lShift_16s, INT16, INT16, dst=dst, TRUE, general_lShiftC_16s(src1, constant, dst, size), #ifdef WITH_SSE2 - TRUE, sse2_lShiftC_16s(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE, + TRUE, sse2_lShiftC_16s(src1, constant, dst, size), PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE, #else FALSE, PRIM_NOP, 0, FALSE, #endif @@ -118,7 +118,7 @@ STD_SPEED_TEST(speed_lShift_16s, INT16, INT16, dst=dst, STD_SPEED_TEST(speed_lShift_16u, UINT16, UINT16, dst=dst, TRUE, general_lShiftC_16u(src1, constant, dst, size), #ifdef WITH_SSE2 - TRUE, sse2_lShiftC_16u(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE, + TRUE, sse2_lShiftC_16u(src1, constant, dst, size), PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE, #else FALSE, PRIM_NOP, 0, FALSE, #endif @@ -126,7 +126,7 @@ STD_SPEED_TEST(speed_lShift_16u, UINT16, UINT16, dst=dst, STD_SPEED_TEST(speed_rShift_16s, INT16, INT16, dst=dst, TRUE, general_rShiftC_16s(src1, constant, dst, size), #ifdef WITH_SSE2 - TRUE, sse2_rShiftC_16s(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE, + TRUE, sse2_rShiftC_16s(src1, constant, dst, size), PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE, #else FALSE, PRIM_NOP, 0, FALSE, #endif @@ -134,7 +134,7 @@ STD_SPEED_TEST(speed_rShift_16s, INT16, INT16, dst=dst, STD_SPEED_TEST(speed_rShift_16u, UINT16, UINT16, dst=dst, TRUE, general_rShiftC_16u(src1, constant, dst, size), #ifdef WITH_SSE2 - TRUE, sse2_rShiftC_16u(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE, + TRUE, sse2_rShiftC_16u(src1, constant, dst, size), PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE, #else FALSE, PRIM_NOP, 0, FALSE, #endif From 50a4f6d97b0fa8da29b944f4782f5e3ab86365e7 Mon Sep 17 00:00:00 2001 From: Bernhard Miklautz Date: Fri, 1 Mar 2013 08:53:57 +0100 Subject: [PATCH 12/13] primitives: fixed flag detection for sign functions --- libfreerdp/primitives/prim_sign_opt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libfreerdp/primitives/prim_sign_opt.c b/libfreerdp/primitives/prim_sign_opt.c index 0d4464751..635208f26 100644 --- a/libfreerdp/primitives/prim_sign_opt.c +++ b/libfreerdp/primitives/prim_sign_opt.c @@ -140,7 +140,7 @@ void primitives_init_sign_opt(primitives_t *prims) /* Pick tuned versions if possible. */ /* I didn't spot an IPP version of this. */ #if defined(WITH_SSE2) - if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) + if (IsProcessorFeaturePresentEx(PF_EX_SSSE3) && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) { prims->sign_16s = ssse3_sign_16s; From 0d3febe1fcc1a136be2c69757becba4be0c543fb Mon Sep 17 00:00:00 2001 From: Bernhard Miklautz Date: Fri, 1 Mar 2013 10:14:33 +0100 Subject: [PATCH 13/13] iOS: set CMAKE_SYSTEM_PROCESSOR in toolchain file This is required that the right architecture is chosen during the cmake run. Otherwise it is not possible to compile WITH_NEON. --- cmake/iOSToolchain.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmake/iOSToolchain.cmake b/cmake/iOSToolchain.cmake index 6635e9803..41a4776ca 100644 --- a/cmake/iOSToolchain.cmake +++ b/cmake/iOSToolchain.cmake @@ -144,8 +144,10 @@ set (CMAKE_OSX_SYSROOT ${CMAKE_IOS_SDK_ROOT} CACHE PATH "Sysroot used for iOS su # NOTE: Currently both ARCHS_STANDARD_32_BIT and ARCHS_UNIVERSAL_IPHONE_OS set armv7 only, so set both manually if (${IOS_PLATFORM} STREQUAL "OS") set (IOS_ARCH armv7 armv7s) + set (CMAKE_SYSTEM_PROCESSOR armv7) else (${IOS_PLATFORM} STREQUAL "OS") set (IOS_ARCH i386) + set (CMAKE_SYSTEM_PROCESSOR i386) endif (${IOS_PLATFORM} STREQUAL "OS") set (CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE string "Build architecture for iOS")