Merge pull request #1023 from bmiklautz/cpuflagdetection

CPU feature detection
This commit is contained in:
Marc-André Moreau 2013-03-02 15:52:25 -08:00
commit 8f3911d4b8
51 changed files with 767 additions and 625 deletions

View File

@ -303,6 +303,7 @@ if(ANDROID)
set(GSTREAMER_FEATURE_TYPE "DISABLED")
endif()
find_feature(X11 ${X11_FEATURE_TYPE} ${X11_FEATURE_PURPOSE} ${X11_FEATURE_DESCRIPTION})
find_feature(DirectFB ${DIRECTFB_FEATURE_TYPE} ${DIRECTFB_FEATURE_PURPOSE} ${DIRECTFB_FEATURE_DESCRIPTION})
@ -318,8 +319,12 @@ find_feature(PCSC ${PCSC_FEATURE_TYPE} ${PCSC_FEATURE_PURPOSE} ${PCSC_FEATURE_DE
find_feature(FFmpeg ${FFMPEG_FEATURE_TYPE} ${FFMPEG_FEATURE_PURPOSE} ${FFMPEG_FEATURE_DESCRIPTION})
find_feature(Gstreamer ${GSTREAMER_FEATURE_TYPE} ${GSTREAMER_FEATURE_PURPOSE} ${GSTREAMER_FEATURE_DESCRIPTION})
if(TARGET_ARCH MATCHES "x86|x64")
if (NOT APPLE)
# Intel Performance Primitives
find_feature(IPP ${IPP_FEATURE_TYPE} ${IPP_FEATURE_PURPOSE} ${IPP_FEATURE_DESCRIPTION})
endif()
endif()
# Installation Paths
if(WIN32)

View File

@ -762,46 +762,6 @@ BOOL xf_pre_connect(freerdp* instance)
return TRUE;
}
void cpuid(unsigned info, unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx)
{
#ifdef __GNUC__
#if defined(__i386__) || defined(__x86_64__)
__asm volatile
(
/* The EBX (or RBX register on x86_64) is used for the PIC base address
and must not be corrupted by our inline assembly. */
#if defined(__i386__)
"mov %%ebx, %%esi;"
"cpuid;"
"xchg %%ebx, %%esi;"
#else
"mov %%rbx, %%rsi;"
"cpuid;"
"xchg %%rbx, %%rsi;"
#endif
: "=a" (*eax), "=S" (*ebx), "=c" (*ecx), "=d" (*edx)
: "0" (info)
);
#endif
#endif
}
UINT32 xf_detect_cpu()
{
unsigned int eax, ebx, ecx, edx = 0;
UINT32 cpu_opt = 0;
cpuid(1, &eax, &ebx, &ecx, &edx);
if (edx & (1<<26))
{
DEBUG_MSG("SSE2 detected");
cpu_opt |= CPU_SSE2;
}
return cpu_opt;
}
/**
* Callback given to freerdp_connect() to perform post-connection operations.
* It will be called only if the connection was initialized properly, and will continue the initialization based on the
@ -809,9 +769,6 @@ UINT32 xf_detect_cpu()
*/
BOOL xf_post_connect(freerdp* instance)
{
#ifdef WITH_SSE2
UINT32 cpu;
#endif
xfInfo* xfi;
XGCValues gcv;
rdpCache* cache;
@ -866,15 +823,6 @@ BOOL xf_post_connect(freerdp* instance)
}
}
#ifdef WITH_SSE2
/* detect only if needed */
cpu = xf_detect_cpu();
if (rfx_context)
rfx_context_set_cpu_opt(rfx_context, cpu);
if (nsc_context)
nsc_context_set_cpu_opt(nsc_context, cpu);
#endif
xfi->width = instance->settings->DesktopWidth;
xfi->height = instance->settings->DesktopHeight;

View File

@ -9,7 +9,6 @@ endif()
option(WITH_MANPAGES "Generate manpages." ON)
option(WITH_PROFILER "Compile profiler." OFF)
option(WITH_IPP "Use Intel Performance Primitives." OFF)
if((TARGET_ARCH MATCHES "x86|x64") AND (NOT DEFINED WITH_SSE2))
option(WITH_SSE2 "Enable SSE2 optimization." ON)
@ -29,8 +28,11 @@ if(TARGET_ARCH MATCHES "ARM")
set(ARM_FP_ABI ${ARM_FP_API} CACHE STRING "Floating point ABI to use on arm")
endif()
mark_as_advanced(ARM_FP_ABI)
else()
if(NOT APPLE)
option(WITH_IPP "Use Intel Performance Primitives." OFF)
endif()
endif()
option(WITH_JPEG "Use JPEG decoding." OFF)
if(APPLE)

View File

@ -144,8 +144,10 @@ set (CMAKE_OSX_SYSROOT ${CMAKE_IOS_SDK_ROOT} CACHE PATH "Sysroot used for iOS su
# NOTE: Currently both ARCHS_STANDARD_32_BIT and ARCHS_UNIVERSAL_IPHONE_OS set armv7 only, so set both manually
if (${IOS_PLATFORM} STREQUAL "OS")
set (IOS_ARCH armv7 armv7s)
set (CMAKE_SYSTEM_PROCESSOR armv7)
else (${IOS_PLATFORM} STREQUAL "OS")
set (IOS_ARCH i386)
set (CMAKE_SYSTEM_PROCESSOR i386)
endif (${IOS_PLATFORM} STREQUAL "OS")
set (CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE string "Build architecture for iOS")

View File

@ -113,7 +113,6 @@ typedef struct _RFX_CONTEXT RFX_CONTEXT;
FREERDP_API RFX_CONTEXT* rfx_context_new(void);
FREERDP_API void rfx_context_free(RFX_CONTEXT* context);
FREERDP_API void rfx_context_set_cpu_opt(RFX_CONTEXT* context, UINT32 cpu_opt);
FREERDP_API void rfx_context_set_pixel_format(RFX_CONTEXT* context, RDP_PIXEL_FORMAT pixel_format);
FREERDP_API void rfx_context_reset(RFX_CONTEXT* context);

View File

@ -190,9 +190,6 @@ typedef struct
__yCbCrToRGB_16s16s_P3P3_t yCbCrToRGB_16s16s_P3P3;
__RGBToYCbCr_16s16s_P3P3_t RGBToYCbCr_16s16s_P3P3;
__RGBToRGB_16s8u_P3AC4R_t RGBToRGB_16s8u_P3AC4R;
/* internal use for CPU flags and such. */
void *hints;
} primitives_t;
#ifdef __cplusplus
@ -202,12 +199,6 @@ extern "C" {
/* Prototypes for the externally-visible entrypoints. */
FREERDP_API void primitives_init(void);
FREERDP_API primitives_t *primitives_get(void);
FREERDP_API UINT32 primitives_get_flags(
const primitives_t *prims);
FREERDP_API void primitives_flags_str(
const primitives_t *prims,
char *str,
size_t len);
FREERDP_API void primitives_deinit(void);
#ifdef __cplusplus

View File

@ -69,13 +69,6 @@ if(WITH_SSE2)
endif()
if(WITH_NEON)
if(ANDROID)
set(ANDROID_CPU_FEATURES_PATH "${ANDROID_NDK}/sources/android/cpufeatures")
include_directories(${ANDROID_CPU_FEATURES_PATH})
set(${MODULE_PREFIX}_SRCS ${${MODULE_PREFIX}_SRCS}
${ANDROID_CPU_FEATURES_PATH}/cpu-features.c
${ANDROID_CPU_FEATURES_PATH}/cpu-features.h)
endif()
set(${MODULE_PREFIX}_SRCS ${${MODULE_PREFIX}_SRCS} ${${MODULE_PREFIX}_NEON_SRCS})
set_source_files_properties(${${MODULE_PREFIX}_NEON_SRCS} PROPERTIES COMPILE_FLAGS "-mfpu=neon -mfloat-abi=${ARM_FP_ABI} -Wno-unused-variable")
endif()

View File

@ -238,13 +238,6 @@ RFX_CONTEXT* rfx_context_new(void)
return context;
}
void rfx_context_set_cpu_opt(RFX_CONTEXT* context, UINT32 cpu_opt)
{
/* enable SIMD CPU acceleration if detected */
if (cpu_opt & CPU_SSE2)
RFX_INIT_SIMD(context);
}
void rfx_context_free(RFX_CONTEXT* context)
{
free(context->quants);

View File

@ -27,14 +27,11 @@
#include <stdlib.h>
#include <string.h>
#include <arm_neon.h>
#include <winpr/sysinfo.h>
#include "rfx_types.h"
#include "rfx_neon.h"
#if ANDROID
#include "cpu-features.h"
#endif
/* rfx_decode_YCbCr_to_RGB_NEON code now resides in the primitives library. */
static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@ -252,43 +249,9 @@ void rfx_dwt_2d_decode_NEON(INT16 * buffer, INT16 * dwt_buffer)
rfx_dwt_2d_decode_block_NEON(buffer, dwt_buffer, 32);
}
int isNeonSupported()
{
#if ANDROID
if (android_getCpuFamily() != ANDROID_CPU_FAMILY_ARM)
{
DEBUG_RFX("NEON optimization disabled - No ARM CPU found");
return 0;
}
UINT64 features = android_getCpuFeatures();
if ((features & ANDROID_CPU_ARM_FEATURE_ARMv7))
{
if (features & ANDROID_CPU_ARM_FEATURE_NEON)
{
DEBUG_RFX("NEON optimization enabled!");
return FALSE;
}
DEBUG_RFX("NEON optimization disabled - CPU not NEON capable");
}
else
{
DEBUG_RFX("NEON optimization disabled - No ARMv7 CPU found");
}
return FALSE;
#elif defined(__APPLE)
/* assume NEON support on iOS devices */
return TRUE;
#else
return FALSE;
#endif
}
void rfx_init_neon(RFX_CONTEXT * context)
{
if (isNeonSupported())
if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
{
DEBUG_RFX("Using NEON optimizations");

View File

@ -25,6 +25,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <winpr/sysinfo.h>
#include <xmmintrin.h>
#include <emmintrin.h>
@ -490,6 +491,10 @@ static void rfx_dwt_2d_encode_sse2(INT16* buffer, INT16* dwt_buffer)
void rfx_init_sse2(RFX_CONTEXT* context)
{
if (!IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
return;
DEBUG_RFX("Using SSE2 optimizations");
IF_PROFILER(context->priv->prof_rfx_quantization_decode->name = "rfx_quantization_decode_sse2");

View File

@ -63,15 +63,6 @@ elseif(WITH_NEON)
# TODO: Add MSVC equivalent
endif()
# required for android cpu detection
if(ANDROID)
set(ANDROID_CPU_FEATURES_PATH "${ANDROID_NDK}/sources/android/cpufeatures")
include_directories(${ANDROID_CPU_FEATURES_PATH})
set(${MODULE_PREFIX}_SRCS ${${MODULE_PREFIX}_SRCS}
${ANDROID_CPU_FEATURES_PATH}/cpu-features.c
${ANDROID_CPU_FEATURES_PATH}/cpu-features.h)
endif()
set_property(SOURCE ${${MODULE_PREFIX}_OPT_SRCS} PROPERTY COMPILE_FLAGS ${OPTIMIZATION})
set(${MODULE_PREFIX}_SRCS ${${MODULE_PREFIX}_SRCS} ${${MODULE_PREFIX}_OPT_SRCS})

View File

@ -62,10 +62,7 @@ New Optimizations
-----------------
As the need arises, new optimizations can be added to the library,
including NEON, AVX, and perhaps OpenCL or other SIMD implementations.
The initialization routine is free to do any quick run-time test to
determine which features are available before hooking the operation's
function pointer, or it can simply look at the processor features list
from the hints passed to the initialization routine.
The CPU feature detection is done in winpr/sysinfo.
Adding Entrypoints
@ -85,15 +82,6 @@ be added.
The template functions can frequently be used to extend the
operations without writing a lot of new code.
Flags
-----
The entrypoint primitives_get_flags() returns a bitfield of processor flags
(as defined in primitives.h) and primitives_flag_str() returns a string
related to those processor flags, for debugging and information. The
bitfield can be used elsewhere in the code as needed.
Cache Management
----------------
I haven't found a lot of speed improvement by attempting prefetch, and

View File

@ -46,12 +46,11 @@ pstatus_t general_add_16s(
/* ------------------------------------------------------------------------- */
void primitives_init_add(
const primitives_hints_t *hints,
primitives_t *prims)
{
prims->add_16s = general_add_16s;
primitives_init_add_opt(hints, prims);
primitives_init_add_opt(prims);
}
/* ------------------------------------------------------------------------- */

View File

@ -24,7 +24,7 @@
pstatus_t general_add_16s(const INT16 *pSrc1, const INT16 *pSrc2, INT16 *pDst, INT32 len);
void primitives_init_add_opt(const primitives_hints_t *hints, primitives_t *prims);
void primitives_init_add_opt(primitives_t *prims);
#endif /* !__PRIM_ADD_H_INCLUDED__ */

View File

@ -20,6 +20,7 @@
#include <freerdp/types.h>
#include <freerdp/primitives.h>
#include <winpr/sysinfo.h>
#ifdef WITH_SSE2
#include <emmintrin.h>
@ -45,18 +46,15 @@ SSE3_SSD_ROUTINE(sse3_add_16s, INT16, general_add_16s,
/* ------------------------------------------------------------------------- */
void primitives_init_add_opt(
const primitives_hints_t *hints,
primitives_t *prims)
{
#ifdef WITH_IPP
prims->add_16s = (__add_16s_t) ippsAdd_16s;
#elif defined(WITH_SSE2)
if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
&& (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE)) /* for LDDQU */
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE)
&& IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */
{
prims->add_16s = sse3_add_16s;
}
#endif
}

View File

@ -102,11 +102,11 @@ pstatus_t general_alphaComp_argb(
}
/* ------------------------------------------------------------------------- */
void primitives_init_alphaComp(const primitives_hints_t* hints, primitives_t* prims)
void primitives_init_alphaComp(primitives_t* prims)
{
prims->alphaComp_argb = general_alphaComp_argb;
primitives_init_alphaComp_opt(hints, prims);
primitives_init_alphaComp_opt(prims);
}
/* ------------------------------------------------------------------------- */

View File

@ -24,7 +24,7 @@
pstatus_t general_alphaComp_argb(const BYTE *pSrc1, INT32 src1Step, const BYTE *pSrc2, INT32 src2Step, BYTE *pDst, INT32 dstStep, INT32 width, INT32 height);
void primitives_init_alphaComp_opt(const primitives_hints_t* hints, primitives_t* prims);
void primitives_init_alphaComp_opt(primitives_t* prims);
#endif /* !__PRIM_ALPHACOMP_H_INCLUDED__ */

View File

@ -26,6 +26,7 @@
#include <freerdp/types.h>
#include <freerdp/primitives.h>
#include <winpr/sysinfo.h>
#ifdef WITH_SSE2
#include <emmintrin.h>
@ -210,13 +211,13 @@ pstatus_t ipp_alphaComp_argb(
#endif
/* ------------------------------------------------------------------------- */
void primitives_init_alphaComp_opt(const primitives_hints_t* hints, primitives_t* prims)
void primitives_init_alphaComp_opt(primitives_t* prims)
{
#ifdef WITH_IPP
prims->alphaComp_argb = ipp_alphaComp_argb;
#elif defined(WITH_SSE2)
if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
&& (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE)) /* for LDDQU */
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE)
&& IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */
{
prims->alphaComp_argb = sse2_alphaComp_argb;
}

View File

@ -61,14 +61,13 @@ pstatus_t general_orC_32u(
/* ------------------------------------------------------------------------- */
void primitives_init_andor(
const primitives_hints_t *hints,
primitives_t *prims)
{
/* Start with the default. */
prims->andC_32u = general_andC_32u;
prims->orC_32u = general_orC_32u;
primitives_init_andor_opt(hints, prims);
primitives_init_andor_opt(prims);
}
/* ------------------------------------------------------------------------- */

View File

@ -25,7 +25,7 @@
pstatus_t general_andC_32u(const UINT32 *pSrc, UINT32 val, UINT32 *pDst, INT32 len);
pstatus_t general_orC_32u(const UINT32 *pSrc, UINT32 val, UINT32 *pDst, INT32 len);
void primitives_init_andor_opt(const primitives_hints_t *hints, primitives_t *prims);
void primitives_init_andor_opt(primitives_t *prims);
#endif /* !__PRIM_ANDOR_H_INCLUDED__ */

View File

@ -19,6 +19,7 @@
#include <freerdp/types.h>
#include <freerdp/primitives.h>
#include <winpr/sysinfo.h>
#ifdef WITH_SSE2
#include <emmintrin.h>
@ -45,14 +46,14 @@ SSE3_SCD_PRE_ROUTINE(sse3_orC_32u, UINT32, general_orC_32u,
/* ------------------------------------------------------------------------- */
void primitives_init_andor_opt(const primitives_hints_t *hints, primitives_t *prims)
void primitives_init_andor_opt(primitives_t *prims)
{
#if defined(WITH_IPP)
prims->andC_32u = (__andC_32u_t) ippsAndC_32u;
prims->orC_32u = (__orC_32u_t) ippsOrC_32u;
#elif defined(WITH_SSE2)
if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
&& (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE))
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE)
&& IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
prims->andC_32u = sse3_andC_32u;
prims->orC_32u = sse3_orC_32u;

View File

@ -215,13 +215,13 @@ pstatus_t general_RGBToRGB_16s8u_P3AC4R(
}
/* ------------------------------------------------------------------------- */
void primitives_init_colors(const primitives_hints_t* hints, primitives_t* prims)
void primitives_init_colors(primitives_t* prims)
{
prims->RGBToRGB_16s8u_P3AC4R = general_RGBToRGB_16s8u_P3AC4R;
prims->yCbCrToRGB_16s16s_P3P3 = general_yCbCrToRGB_16s16s_P3P3;
prims->RGBToYCbCr_16s16s_P3P3 = general_RGBToYCbCr_16s16s_P3P3;
primitives_init_colors_opt(hints, prims);
primitives_init_colors_opt(prims);
}
/* ------------------------------------------------------------------------- */

View File

@ -26,7 +26,7 @@ pstatus_t general_yCbCrToRGB_16s16s_P3P3(const INT16 *pSrc[3], INT32 srcStep, IN
pstatus_t general_RGBToYCbCr_16s16s_P3P3(const INT16 *pSrc[3], INT32 srcStep, INT16 *pDst[3], INT32 dstStep, const prim_size_t *roi);
pstatus_t general_RGBToRGB_16s8u_P3AC4R(const INT16 *pSrc[3], int srcStep, BYTE *pDst, int dstStep, const prim_size_t *roi);
void primitives_init_colors_opt(const primitives_hints_t* hints, primitives_t* prims);
void primitives_init_colors_opt(primitives_t* prims);
#endif /* !__PRIM_COLORS_H_INCLUDED__ */

View File

@ -23,6 +23,7 @@
#include <freerdp/types.h>
#include <freerdp/primitives.h>
#include <winpr/sysinfo.h>
#ifdef WITH_SSE2
#include <emmintrin.h>
@ -542,17 +543,17 @@ pstatus_t neon_yCbCrToRGB_16s16s_P3P3(
*/
/* ------------------------------------------------------------------------- */
void primitives_init_colors_opt(const primitives_hints_t* hints, primitives_t* prims)
void primitives_init_colors_opt(primitives_t* prims)
{
#if defined(WITH_SSE2)
if (hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE))
{
prims->RGBToRGB_16s8u_P3AC4R = sse2_RGBToRGB_16s8u_P3AC4R;
prims->yCbCrToRGB_16s16s_P3P3 = sse2_yCbCrToRGB_16s16s_P3P3;
prims->RGBToYCbCr_16s16s_P3P3 = sse2_RGBToYCbCr_16s16s_P3P3;
}
#elif defined(WITH_NEON)
if (hints->arm_flags & PRIM_ARM_NEON_AVAILABLE)
if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
{
prims->yCbCrToRGB_16s16s_P3P3 = neon_yCbCrToRGB_16s16s_P3P3;
}

View File

@ -148,7 +148,6 @@ static pstatus_t ippiCopy_8u_AC4r(
/* ------------------------------------------------------------------------- */
void primitives_init_copy(
const primitives_hints_t *hints,
primitives_t *prims)
{
/* Start with the default. */

View File

@ -34,61 +34,43 @@
? _mm_lddqu_si128((__m128i *) (_ptr_)) \
: _mm_load_si128((__m128i *) (_ptr_)))
/* This structure can (eventually) be used to provide hints to the
* initialization routines, e.g. whether SSE2 or NEON or IPP instructions
* or calls are available.
*/
typedef struct
{
UINT32 x86_flags;
UINT32 arm_flags;
} primitives_hints_t;
/* Function prototypes for all the init/deinit routines. */
extern void primitives_init_copy(
const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_copy(
primitives_t *prims);
extern void primitives_init_set(
const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_set(
primitives_t *prims);
extern void primitives_init_add(
const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_add(
primitives_t *prims);
extern void primitives_init_andor(
const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_andor(
primitives_t *prims);
extern void primitives_init_shift(
const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_shift(
primitives_t *prims);
extern void primitives_init_sign(
const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_sign(
primitives_t *prims);
extern void primitives_init_alphaComp(
const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_alphaComp(
primitives_t *prims);
extern void primitives_init_colors(
const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_colors(
primitives_t *prims);

View File

@ -111,7 +111,6 @@ pstatus_t general_set_32u(
/* ------------------------------------------------------------------------- */
void primitives_init_set(
const primitives_hints_t *hints,
primitives_t *prims)
{
/* Start with the default. */
@ -120,7 +119,7 @@ void primitives_init_set(
prims->set_32u = general_set_32u;
prims->zero = general_zero;
primitives_init_set_opt(hints, prims);
primitives_init_set_opt(prims);
}
/* ------------------------------------------------------------------------- */

View File

@ -28,7 +28,7 @@ pstatus_t general_set_32s(INT32 val, INT32 *pDst, INT32 len);
pstatus_t general_set_32u(UINT32 val, UINT32 *pDst, INT32 len);
void primitives_init_set_opt(const primitives_hints_t *hints, primitives_t *prims);
void primitives_init_set_opt(primitives_t *prims);
#endif /* !__PRIM_SET_H_INCLUDED__ */

View File

@ -21,6 +21,7 @@
#include <string.h>
#include <freerdp/types.h>
#include <freerdp/primitives.h>
#include <winpr/sysinfo.h>
#ifdef WITH_SSE2
# include <emmintrin.h>
@ -198,7 +199,7 @@ pstatus_t ipp_wrapper_set_32u(
#endif
/* ------------------------------------------------------------------------- */
void primitives_init_set_opt(const primitives_hints_t *hints, primitives_t *prims)
void primitives_init_set_opt(primitives_t *prims)
{
/* Pick tuned versions if possible. */
#ifdef WITH_IPP
@ -207,7 +208,7 @@ void primitives_init_set_opt(const primitives_hints_t *hints, primitives_t *prim
prims->set_32u = (__set_32u_t) ipp_wrapper_set_32u;
prims->zero = (__zero_t) ippsZero_8u;
#elif defined(WITH_SSE2)
if (hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE))
{
prims->set_8u = sse2_set_8u;
prims->set_32s = sse2_set_32s;

View File

@ -104,7 +104,6 @@ pstatus_t general_shiftC_16u(
/* ------------------------------------------------------------------------- */
void primitives_init_shift(
const primitives_hints_t *hints,
primitives_t *prims)
{
/* Start with the default. */
@ -117,7 +116,7 @@ void primitives_init_shift(
prims->shiftC_16s = general_shiftC_16s;
prims->shiftC_16u = general_shiftC_16u;
primitives_init_shift_opt(hints, prims);
primitives_init_shift_opt(prims);
}
/* ------------------------------------------------------------------------- */

View File

@ -29,7 +29,7 @@ pstatus_t general_rShiftC_16u(const UINT16 *pSrc, INT32 val, UINT16 *pDst, INT32
pstatus_t general_shiftC_16s(const INT16 *pSrc, INT32 val, INT16 *pDst, INT32 len);
pstatus_t general_shiftC_16u(const UINT16 *pSrc, INT32 val, UINT16 *pDst, INT32 len);
void primitives_init_shift_opt(const primitives_hints_t *hints, primitives_t *prims);
void primitives_init_shift_opt(primitives_t *prims);
#endif /* !__PRIM_SHIFT_H_INCLUDED__ */

View File

@ -19,6 +19,7 @@
#include <freerdp/types.h>
#include <freerdp/primitives.h>
#include <winpr/sysinfo.h>
#ifdef WITH_SSE2
#include <emmintrin.h>
@ -58,7 +59,7 @@ SSE3_SCD_ROUTINE(sse2_rShiftC_16u, UINT16, general_rShiftC_16u,
*/
/* ------------------------------------------------------------------------- */
void primitives_init_shift_opt(const primitives_hints_t *hints, primitives_t *prims)
void primitives_init_shift_opt(primitives_t *prims)
{
#if defined(WITH_IPP)
prims->lShiftC_16s = (__lShiftC_16s_t) ippsLShiftC_16s;
@ -66,8 +67,8 @@ void primitives_init_shift_opt(const primitives_hints_t *hints, primitives_t *pr
prims->lShiftC_16u = (__lShiftC_16u_t) ippsLShiftC_16u;
prims->rShiftC_16u = (__rShiftC_16u_t) ippsRShiftC_16u;
#elif defined(WITH_SSE2)
if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
&& (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE))
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE)
&& IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
prims->lShiftC_16s = sse2_lShiftC_16s;
prims->rShiftC_16s = sse2_rShiftC_16s;

View File

@ -42,13 +42,12 @@ pstatus_t general_sign_16s(
/* ------------------------------------------------------------------------- */
void primitives_init_sign(
const primitives_hints_t *hints,
primitives_t *prims)
{
/* Start with the default. */
prims->sign_16s = general_sign_16s;
primitives_init_sign_opt(hints, prims);
primitives_init_sign_opt(prims);
}
/* ------------------------------------------------------------------------- */

View File

@ -24,7 +24,7 @@
pstatus_t general_sign_16s(const INT16 *pSrc, INT16 *pDst, INT32 len);
void primitives_init_sign_opt(const primitives_hints_t *hints, primitives_t *prims);
void primitives_init_sign_opt(primitives_t *prims);
#endif /* !__PRIM_SIGN_H_INCLUDED__ */

View File

@ -19,6 +19,7 @@
#include <freerdp/types.h>
#include <freerdp/primitives.h>
#include <winpr/sysinfo.h>
#ifdef WITH_SSE2
#include <emmintrin.h>
@ -134,13 +135,13 @@ pstatus_t ssse3_sign_16s(
#endif /* WITH_SSE2 */
/* ------------------------------------------------------------------------- */
void primitives_init_sign_opt(const primitives_hints_t *hints, primitives_t *prims)
void primitives_init_sign_opt(primitives_t *prims)
{
/* Pick tuned versions if possible. */
/* I didn't spot an IPP version of this. */
#if defined(WITH_SSE2)
if ((hints->x86_flags & PRIM_X86_SSSE3_AVAILABLE)
&& (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE))
if (IsProcessorFeaturePresentEx(PF_EX_SSSE3)
&& IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
prims->sign_16s = ssse3_sign_16s;
}

View File

@ -22,173 +22,16 @@
#include <string.h>
#include <stdlib.h>
#include <winpr/platform.h>
#include <freerdp/primitives.h>
#include "prim_internal.h"
#ifdef __ANDROID__
#include "cpu-features.h"
#endif
/* Singleton pointer used throughout the program when requested. */
static primitives_t* pPrimitives = NULL;
#define D_BIT_MMX (1<<23)
#define D_BIT_SSE (1<<25)
#define D_BIT_SSE2 (1<<26)
#define D_BIT_3DN (1<<30)
#define C_BIT_SSE3 (1<<0)
#define C_BIT_3DNP (1<<8)
#define C_BIT_SSSE3 (1<<9)
#define C_BIT_SSE41 (1<<19)
#define C_BIT_SSE42 (1<<20)
#define C_BIT_XGETBV (1<<27)
#define C_BIT_AVX (1<<28)
#define C_BITS_AVX (C_BIT_XGETBV|C_BIT_AVX)
#define E_BIT_XMM (1<<1)
#define E_BIT_YMM (1<<2)
#define E_BITS_AVX (E_BIT_XMM|E_BIT_YMM)
#define C_BIT_FMA (1<<11)
#define C_BIT_AVX_AES (1<<24)
/* If x86 */
#if defined(_M_IX86_AMD64)
/* If GCC */
#ifdef __GNUC__
#ifdef __AVX__
#define xgetbv(_func_, _lo_, _hi_) \
__asm__ __volatile__ ("xgetbv" : "=a" (_lo_), "=d" (_hi_) : "c" (_func_))
#endif
static void cpuid(
unsigned info,
unsigned *eax,
unsigned *ebx,
unsigned *ecx,
unsigned *edx)
{
*eax = *ebx = *ecx = *edx = 0;
__asm volatile
(
/* The EBX (or RBX register on x86_64) is used for the PIC base address
* and must not be corrupted by our inline assembly.
*/
#ifdef _M_IX86
"mov %%ebx, %%esi;"
"cpuid;"
"xchg %%ebx, %%esi;"
#else
"mov %%rbx, %%rsi;"
"cpuid;"
"xchg %%rbx, %%rsi;"
#endif
: "=a" (*eax), "=S" (*ebx), "=c" (*ecx), "=d" (*edx)
: "0" (info)
);
}
static void set_hints(primitives_hints_t* hints)
{
unsigned a, b, c, d;
cpuid(1, &a, &b, &c, &d);
if (d & D_BIT_MMX)
hints->x86_flags |= PRIM_X86_MMX_AVAILABLE;
if (d & D_BIT_SSE)
hints->x86_flags |= PRIM_X86_SSE_AVAILABLE;
if (d & D_BIT_SSE2)
hints->x86_flags |= PRIM_X86_SSE2_AVAILABLE;
if (d & D_BIT_3DN)
hints->x86_flags |= PRIM_X86_3DNOW_AVAILABLE;
if (c & C_BIT_3DNP)
hints->x86_flags |= PRIM_X86_3DNOW_PREFETCH_AVAILABLE;
if (c & C_BIT_SSE3)
hints->x86_flags |= PRIM_X86_SSE3_AVAILABLE;
if (c & C_BIT_SSSE3)
hints->x86_flags |= PRIM_X86_SSSE3_AVAILABLE;
if (c & C_BIT_SSE41)
hints->x86_flags |= PRIM_X86_SSE41_AVAILABLE;
if (c & C_BIT_SSE42)
hints->x86_flags |= PRIM_X86_SSE42_AVAILABLE;
#ifdef __AVX__
if ((c & C_BITS_AVX) == C_BITS_AVX)
{
int e, f;
xgetbv(0, e, f);
if ((e & E_BITS_AVX) == E_BITS_AVX)
{
hints->x86_flags |= PRIM_X86_AVX_AVAILABLE;
if (c & C_BIT_FMA)
hints->x86_flags |= PRIM_X86_FMA_AVAILABLE;
if (c & C_BIT_AVX_AES)
hints->x86_flags |= PRIM_X86_AVX_AES_AVAILABLE;
}
}
/* TODO: AVX2: set eax=7, ecx=0, cpuid, check ebx-bit5 */
#endif
}
#else
static void set_hints(primitives_hints_t* hints)
{
/* x86 non-GCC: TODO */
}
#endif /* __GNUC__ */
/* ------------------------------------------------------------------------- */
#elif defined(_M_ARM)
static UINT32 getNeonSupport(void)
{
#ifdef __ANDROID__
if (android_getCpuFamily() != ANDROID_CPU_FAMILY_ARM) return 0;
UINT64 features = android_getCpuFeatures();
if ((features & ANDROID_CPU_ARM_FEATURE_ARMv7))
{
if (features & ANDROID_CPU_ARM_FEATURE_NEON)
{
return PRIM_ARM_NEON_AVAILABLE;
}
}
#elif defined(__APPLE)
/* assume NEON support on iOS devices */
return PRIM_ARM_NEON_AVAILABLE;
#endif
return 0;
}
static void set_hints(primitives_hints_t* hints)
{
/* ARM: TODO */
hints->arm_flags |= getNeonSupport();
}
#else
static void set_hints(
primitives_hints_t *hints)
{
}
#endif /* x86 else ARM else */
/* ------------------------------------------------------------------------- */
void primitives_init(void)
{
primitives_hints_t* hints;
if (pPrimitives == NULL)
{
pPrimitives = calloc(1, sizeof(primitives_t));
@ -197,19 +40,15 @@ void primitives_init(void)
return;
}
hints = calloc(1, sizeof(primitives_hints_t));
set_hints(hints);
pPrimitives->hints = (void *) hints;
/* Now call each section's initialization routine. */
primitives_init_add(hints, pPrimitives);
primitives_init_andor(hints, pPrimitives);
primitives_init_alphaComp(hints, pPrimitives);
primitives_init_copy(hints, pPrimitives);
primitives_init_set(hints, pPrimitives);
primitives_init_shift(hints, pPrimitives);
primitives_init_sign(hints, pPrimitives);
primitives_init_colors(hints, pPrimitives);
primitives_init_add(pPrimitives);
primitives_init_andor(pPrimitives);
primitives_init_alphaComp(pPrimitives);
primitives_init_copy(pPrimitives);
primitives_init_set(pPrimitives);
primitives_init_shift(pPrimitives);
primitives_init_sign(pPrimitives);
primitives_init_colors(pPrimitives);
}
/* ------------------------------------------------------------------------- */
@ -221,102 +60,6 @@ primitives_t* primitives_get(void)
return pPrimitives;
}
/* ------------------------------------------------------------------------- */
UINT32 primitives_get_flags(const primitives_t* prims)
{
primitives_hints_t* hints = (primitives_hints_t*) (prims->hints);
#if defined(_M_IX86_AMD64)
return hints->x86_flags;
#elif defined(_M_ARM)
return hints->arm_flags;
#else
return 0;
#endif
}
/* ------------------------------------------------------------------------- */
typedef struct
{
UINT32 flag;
const char *str;
} flagpair_t;
static const flagpair_t x86_flags[] =
{
{ PRIM_X86_MMX_AVAILABLE, "MMX" },
{ PRIM_X86_3DNOW_AVAILABLE, "3DNow" },
{ PRIM_X86_3DNOW_PREFETCH_AVAILABLE, "3DNow-PF" },
{ PRIM_X86_SSE_AVAILABLE, "SSE" },
{ PRIM_X86_SSE2_AVAILABLE, "SSE2" },
{ PRIM_X86_SSE3_AVAILABLE, "SSE3" },
{ PRIM_X86_SSSE3_AVAILABLE, "SSSE3" },
{ PRIM_X86_SSE41_AVAILABLE, "SSE4.1" },
{ PRIM_X86_SSE42_AVAILABLE, "SSE4.2" },
{ PRIM_X86_AVX_AVAILABLE, "AVX" },
{ PRIM_X86_FMA_AVAILABLE, "FMA" },
{ PRIM_X86_AVX_AES_AVAILABLE, "AVX-AES" },
{ PRIM_X86_AVX2_AVAILABLE, "AVX2" },
};
static const flagpair_t arm_flags[] =
{
{ PRIM_ARM_VFP1_AVAILABLE, "VFP1" },
{ PRIM_ARM_VFP2_AVAILABLE, "VFP2" },
{ PRIM_ARM_VFP3_AVAILABLE, "VFP3" },
{ PRIM_ARM_VFP4_AVAILABLE, "VFP4" },
{ PRIM_ARM_FPA_AVAILABLE, "FPA" },
{ PRIM_ARM_FPE_AVAILABLE, "FPE" },
{ PRIM_ARM_IWMMXT_AVAILABLE, "IWMMXT" },
{ PRIM_ARM_NEON_AVAILABLE, "NEON" },
};
void primitives_flags_str(const primitives_t* prims, char* str, size_t len)
{
int i;
primitives_hints_t* hints;
*str = '\0';
--len; /* for the '/0' */
hints = (primitives_hints_t*) (prims->hints);
for (i = 0; i < sizeof(x86_flags) / sizeof(flagpair_t); ++i)
{
if (hints->x86_flags & x86_flags[i].flag)
{
int slen = strlen(x86_flags[i].str) + 1;
if (len < slen)
break;
if (*str != '\0')
strcat(str, " ");
strcat(str, x86_flags[i].str);
len -= slen;
}
}
for (i = 0; i < sizeof(arm_flags) / sizeof(flagpair_t); ++i)
{
if (hints->arm_flags & arm_flags[i].flag)
{
int slen = strlen(arm_flags[i].str) + 1;
if (len < slen)
break;
if (*str != '\0')
strcat(str, " ");
strcat(str, arm_flags[i].str);
len -= slen;
}
}
}
/* ------------------------------------------------------------------------- */
void primitives_deinit(void)
{
@ -333,9 +76,6 @@ void primitives_deinit(void)
primitives_deinit_sign(pPrimitives);
primitives_deinit_colors(pPrimitives);
if (pPrimitives->hints != NULL)
free((void*) (pPrimitives->hints));
free((void*) pPrimitives);
pPrimitives = NULL;
}

View File

@ -138,7 +138,7 @@ endif()
set_property(SOURCE ${PRIMITIVE_TEST_CFILES} PROPERTY COMPILE_FLAGS ${OPTFLAGS})
target_link_libraries(prim_test rt)
target_link_libraries(prim_test rt winpr-sysinfo)
if(NOT TESTING_OUTPUT_DIRECTORY)
set(TESTING_OUTPUT_DIRECTORY .)
endif()

View File

@ -21,6 +21,8 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <winpr/platform.h>
#include <winpr/sysinfo.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
@ -32,6 +34,88 @@
int test_sizes[] = { 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096 };
int Quiet = 0;
/* ------------------------------------------------------------------------- */
typedef struct
{
UINT32 flag;
const char *str;
} flagpair_t;
static const flagpair_t flags[] =
{
#ifdef _M_IX86_AMD64
{ PF_MMX_INSTRUCTIONS_AVAILABLE, "MMX" },
{ PF_3DNOW_INSTRUCTIONS_AVAILABLE, "3DNow" },
{ PF_SSE_INSTRUCTIONS_AVAILABLE, "SSE" },
{ PF_SSE2_INSTRUCTIONS_AVAILABLE, "SSE2" },
{ PF_SSE3_INSTRUCTIONS_AVAILABLE, "SSE3" },
#elif defined(_M_ARM)
{ PF_ARM_VFP3, "VFP3" },
{ PF_ARM_INTEL_WMMX, "IWMMXT" },
{ PF_ARM_NEON_INSTRUCTIONS_AVAILABLE, "NEON" },
#endif
};
static const flagpair_t flags_extended[] =
{
#ifdef _M_IX86_AMD64
{ PF_EX_3DNOW_PREFETCH, "3DNow-PF" },
{ PF_EX_SSSE3, "SSSE3" },
{ PF_EX_SSE41, "SSE4.1" },
{ PF_EX_SSE42, "SSE4.2" },
{ PF_EX_AVX, "AVX" },
{ PF_EX_FMA, "FMA" },
{ PF_EX_AVX_AES, "AVX-AES" },
{ PF_EX_AVX2, "AVX2" },
#elif defined(_M_ARM)
{ PF_EX_ARM_VFP1, "VFP1"},
{ PF_EX_ARM_VFP4, "VFP4" },
#endif
};
void primitives_flags_str(char* str, size_t len)
{
int i;
*str = '\0';
--len; /* for the '/0' */
for (i = 0; i < sizeof(flags) / sizeof(flagpair_t); ++i)
{
if (IsProcessorFeaturePresent(flags[i].flag))
{
int slen = strlen(flags[i].str) + 1;
if (len < slen)
break;
if (*str != '\0')
strcat(str, " ");
strcat(str, flags[i].str);
len -= slen;
}
}
for (i = 0; i < sizeof(flags_extended) / sizeof(flagpair_t); ++i)
{
if (IsProcessorFeaturePresentEx(flags_extended[i].flag))
{
int slen = strlen(flags_extended[i].str) + 1;
if (len < slen)
break;
if (*str != '\0')
strcat(str, " ");
strcat(str, flags_extended[i].str);
len -= slen;
}
}
}
/* ------------------------------------------------------------------------- */
static void get_random_data_lrand(
void *buffer,
@ -198,7 +282,7 @@ static const test_t testTypeList[] =
int main(int argc, char** argv)
{
int i;
char hints[256];
char hints[1024];
UINT32 testSet = 0;
UINT32 testTypes = 0;
int results = SUCCESS;
@ -253,7 +337,7 @@ int main(int argc, char** argv)
primitives_init();
primitives_flags_str(primitives_get(), hints, sizeof(hints));
primitives_flags_str(hints, sizeof(hints));
printf("Hints: %s\n", hints);
/* COPY */

View File

@ -29,6 +29,7 @@
#include <stdio.h>
#include <freerdp/primitives.h>
#include <winpr/platform.h>
#ifdef WITH_IPP
#include <ipps.h>
@ -100,7 +101,7 @@ extern int test_or_32u_speed(void);
/* Since so much of this code is repeated, define a macro to build
* functions to do speed tests.
*/
#ifdef armel
#ifdef _M_ARM
#define SIMD_TYPE "Neon"
#else
#define SIMD_TYPE "SSE"
@ -121,8 +122,8 @@ extern int test_or_32u_speed(void);
} \
} while (0)
#if defined(i386) && defined(WITH_SSE2)
#define DO_SSE_MEASUREMENTS(_funcSSE_, _prework_) \
#if (defined(_M_IX86_AMD64) && defined(WITH_SSE2)) || (defined(_M_ARM) && defined(WITH_NEON))
#define DO_OPT_MEASUREMENTS(_funcOpt_, _prework_) \
do { \
for (s=0; s<num_sizes; ++s) \
{ \
@ -132,34 +133,15 @@ extern int test_or_32u_speed(void);
_prework_; \
iter = iterations/size; \
sprintf(label, "%s-%s-%-4d", SIMD_TYPE, oplabel, size); \
MEASURE_TIMED(label, iter, test_time, resultSSENeon[s], \
_funcSSE_); \
MEASURE_TIMED(label, iter, test_time, resultOpt[s], \
_funcOpt_); \
} \
} while (0)
#else
#define DO_SSE_MEASUREMENTS(_funcSSE_, _prework_)
#define DO_OPT_MEASUREMENTS(_funcSSE_, _prework_)
#endif
#if defined(armel) && defined(INCLUDE_NEON_MEASUREMENTS)
#define DO_NEON_MEASUREMENTS(_funcNeon_, _prework_) \
do { \
for (s=0; s<num_sizes; ++s) \
{ \
int iter; \
char label[256]; \
int size = size_array[s]; \
_prework_; \
iter = iterations/size; \
sprintf(label, "%s-%s-%-4d", SIMD_TYPE, oplabel, size); \
MEASURE_TIMED(label, iter, test_time, resultSSENeon[s], \
_funcNeon_); \
} \
} while (0)
#else
#define DO_NEON_MEASUREMENTS(_funcNeon_, _prework_)
#endif
#if defined(i386) && defined(WITH_IPP)
#if defined(_M_IX86_AMD64) && defined(WITH_IPP)
#define DO_IPP_MEASUREMENTS(_funcIPP_, _prework_) \
do { \
for (s=0; s<num_sizes; ++s) \
@ -178,12 +160,12 @@ extern int test_or_32u_speed(void);
#define DO_IPP_MEASUREMENTS(_funcIPP_, _prework_)
#endif
#define PRIM_NOP do {} while (0)
/* ------------------------------------------------------------------------- */
#define STD_SPEED_TEST( \
_name_, _srctype_, _dsttype_, _prework_, \
_doNormal_, _funcNormal_, \
_doSSE_, _funcSSE_, _flagsSSE_, \
_doNeon_, _funcNeon_, _flagsNeon_, \
_doOpt_, _funcOpt_, _flagOpt_, _flagExt_, \
_doIPP_, _funcIPP_) \
static void _name_( \
const char *oplabel, const char *type, \
@ -193,24 +175,28 @@ static void _name_( \
int iterations, float test_time) \
{ \
int s; \
float *resultNormal, *resultSSENeon, *resultIPP; \
UINT32 pflags = primitives_get_flags(primitives_get()); \
float *resultNormal, *resultOpt, *resultIPP; \
resultNormal = (float *) calloc(num_sizes, sizeof(float)); \
resultSSENeon = (float *) calloc(num_sizes, sizeof(float)); \
resultOpt = (float *) calloc(num_sizes, sizeof(float)); \
resultIPP = (float *) calloc(num_sizes, sizeof(float)); \
printf("******************** %s %s ******************\n", \
oplabel, type); \
if (_doNormal_) { DO_NORMAL_MEASUREMENTS(_funcNormal_, _prework_); } \
if (_doSSE_) { \
if ((pflags & (_flagsSSE_)) == (_flagsSSE_)) \
if (_doOpt_) \
{ \
DO_SSE_MEASUREMENTS(_funcSSE_, _prework_); \
if (_flagExt_) \
{ \
if (IsProcessorFeaturePresentEx(_flagOpt_)) \
{ \
DO_OPT_MEASUREMENTS(_funcOpt_, _prework_); \
} \
} \
if (_doNeon_) { \
if ((pflags & (_flagsNeon_)) == (_flagsNeon_)) \
else \
{ \
DO_NEON_MEASUREMENTS(_funcNeon_, _prework_); \
if (IsProcessorFeaturePresent(_flagOpt_)) \
{ \
DO_OPT_MEASUREMENTS(_funcOpt_, _prework_); \
} \
} \
} \
if (_doIPP_) { DO_IPP_MEASUREMENTS(_funcIPP_, _prework_); } \
@ -223,13 +209,13 @@ static void _name_( \
strcpy(sN, "N/A"); strcpy(sSN, "N/A"); strcpy(sSNp, "N/A"); \
strcpy(sIPP, "N/A"); strcpy(sIPPp, "N/A"); \
if (resultNormal[s] > 0.0) _floatprint(resultNormal[s], sN); \
if (resultSSENeon[s] > 0.0) \
if (resultOpt[s] > 0.0) \
{ \
_floatprint(resultSSENeon[s], sSN); \
_floatprint(resultOpt[s], sSN); \
if (resultNormal[s] > 0.0) \
{ \
sprintf(sSNp, "%d%%", \
(int) (resultSSENeon[s] / resultNormal[s] * 100.0 + 0.5)); \
(int) (resultOpt[s] / resultNormal[s] * 100.0 + 0.5)); \
} \
} \
if (resultIPP[s] > 0.0) \
@ -244,7 +230,7 @@ static void _name_( \
printf("%8d: %15s %15s %5s %15s %5s\n", \
size_array[s], sN, sSN, sSNp, sIPP, sIPPp); \
} \
free(resultNormal); free(resultSSENeon); free(resultIPP); \
free(resultNormal); free(resultOpt); free(resultIPP); \
}
#endif // !__PRIMTEST_H_INCLUDED__

View File

@ -16,6 +16,7 @@
#include "config.h"
#endif
#include <winpr/sysinfo.h>
#include "prim_test.h"
#define FUNC_TEST_SIZE 65536
@ -33,9 +34,10 @@ int test_add16s_func(void)
INT16 ALIGN(src1[FUNC_TEST_SIZE+3]), ALIGN(src2[FUNC_TEST_SIZE+3]),
ALIGN(d1[FUNC_TEST_SIZE+3]), ALIGN(d2[FUNC_TEST_SIZE+3]);
int failed = 0;
#if defined(WITH_SSE2) || defined(WITH_IPP)
int i;
#endif
char testStr[256];
UINT32 pflags = primitives_get_flags(primitives_get());
testStr[0] = '\0';
get_random_data(src1, sizeof(src1));
@ -43,8 +45,8 @@ int test_add16s_func(void)
memset(d1, 0, sizeof(d1));
memset(d2, 0, sizeof(d2));
general_add_16s(src1+1, src2+1, d1+1, FUNC_TEST_SIZE);
#ifdef _M_IX86_AMD64
if (pflags & PRIM_X86_SSE3_AVAILABLE)
#ifdef WITH_SSE2
if(IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE3");
/* Aligned */
@ -70,7 +72,7 @@ int test_add16s_func(void)
}
}
}
#endif /* i386 */
#endif
#ifdef WITH_IPP
strcat(testStr, " IPP");
ippsAdd_16s(src1+1, src2+1, d2+1, FUNC_TEST_SIZE);
@ -91,8 +93,11 @@ int test_add16s_func(void)
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(add16s_speed_test, INT16, INT16, dst=dst,
TRUE, general_add_16s(src1, src2, dst, size),
TRUE, sse3_add_16s(src1, src2, dst, size), PRIM_X86_SSE3_AVAILABLE,
FALSE, dst=dst, 0,
#ifdef WITH_SSE2
TRUE, sse3_add_16s(src1, src2, dst, size), PF_SSE3_INSTRUCTIONS_AVAILABLE, FALSE,
#else
FALSE, PRIM_NOP, 0, FALSE,
#endif
TRUE, ippsAdd_16s(src1, src2, dst, size));
int test_add16s_speed(void)

View File

@ -15,6 +15,7 @@
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <winpr/sysinfo.h>
#include "prim_test.h"
@ -110,7 +111,6 @@ int test_alphaComp_func(void)
UINT32 ALIGN(dst2u[DST_WIDTH*DST_HEIGHT+1]);
UINT32 ALIGN(dst3[DST_WIDTH*DST_HEIGHT]);
int error = 0;
UINT32 pflags = primitives_get_flags(primitives_get());
char testStr[256];
UINT32 *ptr;
int i, x, y;
@ -132,8 +132,8 @@ int test_alphaComp_func(void)
general_alphaComp_argb((const BYTE *) src1, 4*SRC1_WIDTH,
(const BYTE *) src2, 4*SRC2_WIDTH,
(BYTE *) dst1, 4*DST_WIDTH, TEST_WIDTH, TEST_HEIGHT);
#ifdef _M_IX86_AMD64
if (pflags & PRIM_X86_SSE2_AVAILABLE)
#ifdef WITH_SSE2
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE2");
sse2_alphaComp_argb((const BYTE *) src1, 4*SRC1_WIDTH,
@ -165,8 +165,8 @@ int test_alphaComp_func(void)
x, y, s1, s2, c0, c1);
error = 1;
}
#ifdef _M_IX86_AMD64
if (pflags & PRIM_X86_SSE2_AVAILABLE)
#ifdef WITH_SSE2
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE))
{
UINT32 c2 = *PIXEL(dst2a, 4*DST_WIDTH, x, y);
if (colordist(c0, c2) > TOLERANCE)
@ -203,12 +203,15 @@ int test_alphaComp_func(void)
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(alphaComp_speed, BYTE, BYTE, int bytes = size*4,
STD_SPEED_TEST(alphaComp_speed, BYTE, BYTE, int bytes __attribute__((unused)) = size*4,
TRUE, general_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes,
size, size),
#ifdef WITH_SSE2
TRUE, sse2_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes,
size, size), PRIM_X86_SSE2_AVAILABLE,
FALSE, dst=dst, 0,
size, size), PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE,
#else
FALSE, PRIM_NOP, 0, FALSE,
#endif
TRUE, ipp_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes,
size, size));

View File

@ -15,6 +15,7 @@
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <winpr/sysinfo.h>
#include "prim_test.h"
@ -39,7 +40,6 @@ int test_and_32u_func(void)
UINT32 ALIGN(src[FUNC_TEST_SIZE+3]), ALIGN(dst[FUNC_TEST_SIZE+3]);
int failed = 0;
int i;
UINT32 pflags = primitives_get_flags(primitives_get());
char testStr[256];
testStr[0] = '\0';
@ -55,8 +55,8 @@ int test_and_32u_func(void)
++failed;
}
}
#ifdef _M_IX86_AMD64
if (pflags & PRIM_X86_SSE3_AVAILABLE)
#ifdef WITH_SSE2
if (IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE3");
/* Aligned */
@ -92,8 +92,11 @@ int test_and_32u_func(void)
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(andC_32u_speed_test, UINT32, UINT32, dst=dst,
TRUE, general_andC_32u(src1, constant, dst, size),
TRUE, sse3_andC_32u(src1, constant, dst, size), PRIM_X86_SSE3_AVAILABLE,
FALSE, dst=dst, 0,
#ifdef WITH_SSE2
TRUE, sse3_andC_32u(src1, constant, dst, size), PF_SSE3_INSTRUCTIONS_AVAILABLE, FALSE,
#else
FALSE, PRIM_NOP, 0, FALSE,
#endif
TRUE, ippsAndC_32u(src1, constant, dst, size))
int test_and_32u_speed(void)
@ -113,7 +116,6 @@ int test_or_32u_func(void)
UINT32 ALIGN(src[FUNC_TEST_SIZE+3]), ALIGN(dst[FUNC_TEST_SIZE+3]);
int failed = 0;
int i;
UINT32 pflags = primitives_get_flags(primitives_get());
char testStr[256];
testStr[0] = '\0';
@ -129,8 +131,8 @@ int test_or_32u_func(void)
++failed;
}
}
#ifdef _M_IX86_AMD64
if (pflags & PRIM_X86_SSE3_AVAILABLE)
#ifdef WITH_SSE2
if(IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE3");
/* Aligned */
@ -166,8 +168,11 @@ int test_or_32u_func(void)
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(orC_32u_speed_test, UINT32, UINT32, dst=dst,
TRUE, general_orC_32u(src1, constant, dst, size),
TRUE, sse3_orC_32u(src1, constant, dst, size), PRIM_X86_SSE3_AVAILABLE,
FALSE, dst=dst, 0,
#ifdef WITH_SSE2
TRUE, sse3_orC_32u(src1, constant, dst, size), PF_SSE3_INSTRUCTIONS_AVAILABLE, FALSE,
#else
FALSE, PRIM_NOP, 0, FALSE,
#endif
TRUE, ippsOrC_32u(src1, constant, dst, size))
int test_or_32u_speed(void)

View File

@ -16,6 +16,7 @@
#include "config.h"
#endif
#include <winpr/sysinfo.h>
#include "prim_test.h"
static const int RGB_TRIAL_ITERATIONS = 1000;
@ -30,15 +31,19 @@ extern pstatus_t general_yCbCrToRGB_16s16s_P3P3(const INT16 *pSrc[3],
int srcStep, INT16 *pDst[3], int dstStep, const prim_size_t *roi);
extern pstatus_t sse2_yCbCrToRGB_16s16s_P3P3(const INT16 *pSrc[3],
int srcStep, INT16 *pDst[3], int dstStep, const prim_size_t *roi);
extern pstatus_t neon_yCbCrToRGB_16s16s_P3P3(const INT16 *pSrc[3],
int srcStep, INT16 *pDst[3], int dstStep, const prim_size_t *roi);
/* ------------------------------------------------------------------------- */
int test_RGBToRGB_16s8u_P3AC4R_func(void)
{
INT16 ALIGN(r[4096]), ALIGN(g[4096]), ALIGN(b[4096]);
UINT32 ALIGN(out1[4096]), ALIGN(out2[4096]);
UINT32 ALIGN(out1[4096]);
#ifdef WITH_SSE2
UINT32 ALIGN(out2[4096]);
#endif
int i;
int failed = 0;
UINT32 pflags = primitives_get_flags(primitives_get());
char testStr[256];
INT16 *ptrs[3];
prim_size_t roi = { 64, 64 };
@ -61,8 +66,8 @@ int test_RGBToRGB_16s8u_P3AC4R_func(void)
general_RGBToRGB_16s8u_P3AC4R((const INT16 **) ptrs, 64*2,
(BYTE *) out1, 64*4, &roi);
#ifdef _M_IX86_AMD64
if (pflags & PRIM_X86_SSE2_AVAILABLE)
#ifdef WITH_SSE2
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE2");
sse2_RGBToRGB_16s8u_P3AC4R((const INT16 **) ptrs, 64*2,
@ -88,10 +93,13 @@ STD_SPEED_TEST(
rgb_to_argb_speed, INT16*, UINT32, dst=dst,
TRUE, general_RGBToRGB_16s8u_P3AC4R(
(const INT16 **) src1, 64*2, (BYTE *) dst, 64*4, &roi64x64),
#ifdef WITH_SSE2
TRUE, sse2_RGBToRGB_16s8u_P3AC4R(
(const INT16 **) src1, 64*2, (BYTE *) dst, 64*4, &roi64x64),
PRIM_X86_SSE2_AVAILABLE,
FALSE, dst=dst, 0,
PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE,
#else
FALSE, PRIM_NOP, 0, FALSE,
#endif
FALSE, dst=dst);
int test_RGBToRGB_16s8u_P3AC4R_speed(void)
@ -131,7 +139,6 @@ int test_yCbCrToRGB_16s16s_P3P3_func(void)
INT16 ALIGN(r2[4096]), ALIGN(g2[4096]), ALIGN(b2[4096]);
int i;
int failed = 0;
UINT32 pflags = primitives_get_flags(primitives_get());
char testStr[256];
const INT16 *in[3];
INT16 *out1[3];
@ -167,8 +174,8 @@ int test_yCbCrToRGB_16s16s_P3P3_func(void)
out2[2] = b2;
general_yCbCrToRGB_16s16s_P3P3(in, 64*2, out1, 64*2, &roi);
#ifdef _M_IX86_AMD64
if (pflags & PRIM_X86_SSE2_AVAILABLE)
#ifdef WITH_SSE2
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE2");
sse2_yCbCrToRGB_16s16s_P3P3(in, 64*2, out2, 64*2, &roi);
@ -192,9 +199,15 @@ int test_yCbCrToRGB_16s16s_P3P3_func(void)
STD_SPEED_TEST(
ycbcr_to_rgb_speed, INT16*, INT16*, dst=dst,
TRUE, general_yCbCrToRGB_16s16s_P3P3(src1, 64*2, dst, 64*2, &roi64x64),
#ifdef WITH_SSE2
TRUE, sse2_yCbCrToRGB_16s16s_P3P3(src1, 64*2, dst, 64*2, &roi64x64),
PRIM_X86_SSE2_AVAILABLE,
FALSE, dst=dst, 0,
PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE,
#elif defined(WITH_NEON)
TRUE, neon_yCbCrToRGB_16s16s_P3P3(src1, 64*2, dst, 64*2, &roi64x64),
PF_ARM_NEON_INSTRUCTIONS_AVAILABLE, FALSE,
#else
FALSE, PRIM_NOP, 0, FALSE,
#endif
FALSE, dst=dst);
int test_yCbCrToRGB_16s16s_P3P3_speed(void)

View File

@ -16,6 +16,7 @@
#include "config.h"
#endif
#include <winpr/sysinfo.h>
#include "prim_test.h"
static const int MEMCPY_PRETEST_ITERATIONS = 1000000;
@ -70,8 +71,7 @@ int test_copy8u_func(void)
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(copy8u_speed_test, BYTE, BYTE, dst=dst,
TRUE, memcpy(dst, src1, size),
FALSE, NULL, 0,
FALSE, NULL, 0,
FALSE, PRIM_NOP, 0, FALSE,
TRUE, ippsCopy_8u(src1, dst, size));
int test_copy8u_speed(void)

View File

@ -16,6 +16,7 @@
#include "config.h"
#endif
#include <winpr/sysinfo.h>
#include "prim_test.h"
static const int MEMSET8_PRETEST_ITERATIONS = 100000000;
@ -36,16 +37,17 @@ static const int set_sizes[] = { 1, 4, 16, 32, 64, 256, 1024, 4096 };
/* ------------------------------------------------------------------------- */
int test_set8u_func(void)
{
#if defined(WITH_SSE2) || defined(WITH_IPP)
BYTE ALIGN(dest[48]);
int failed = 0;
int off;
#endif
int failed = 0;
char testStr[256];
UINT32 pflags = primitives_get_flags(primitives_get());
testStr[0] = '\0';
#ifdef _M_IX86_AMD64
#ifdef WITH_SSE2
/* Test SSE under various alignments */
if (pflags & PRIM_X86_SSE2_AVAILABLE)
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE2");
for (off=0; off<16; ++off)
@ -101,8 +103,7 @@ int test_set8u_func(void)
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(set8u_speed_test, BYTE, BYTE, dst=dst,
TRUE, memset(dst, constant, size),
FALSE, NULL, 0,
FALSE, NULL, 0,
FALSE, PRIM_NOP, 0, FALSE,
TRUE, ippsSet_8u(constant, dst, size));
int test_set8u_speed(void)
@ -116,17 +117,17 @@ int test_set8u_speed(void)
/* ------------------------------------------------------------------------- */
int test_set32s_func(void)
{
primitives_t* prims = primitives_get();
#if defined(WITH_SSE2) || defined(WITH_IPP)
INT32 ALIGN(dest[512]);
int failed = 0;
int off;
#endif
int failed = 0;
char testStr[256];
UINT32 pflags = primitives_get_flags(prims);
testStr[0] = '\0';
#ifdef _M_IX86_AMD64
#ifdef WITH_SSE2
/* Test SSE under various alignments */
if (pflags & PRIM_X86_SSE2_AVAILABLE)
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE2");
for (off=0; off<16; ++off) {
@ -179,17 +180,17 @@ int test_set32s_func(void)
/* ------------------------------------------------------------------------- */
int test_set32u_func(void)
{
primitives_t* prims = primitives_get();
#if defined(WITH_SSE2) || defined(WITH_IPP)
UINT32 ALIGN(dest[512]);
int failed = 0;
int off;
#endif
int failed = 0;
char testStr[256];
UINT32 pflags = primitives_get_flags(prims);
testStr[0] = '\0';
#ifdef _M_IX86_AMD64
#ifdef WITH_SSE2
/* Test SSE under various alignments */
if (pflags & PRIM_X86_SSE2_AVAILABLE)
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE2");
for (off=0; off<16; ++off) {
@ -251,8 +252,11 @@ static inline void memset32u_naive(
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(set32u_speed_test, UINT32, UINT32, dst=dst,
TRUE, memset32u_naive(constant, dst, size),
TRUE, sse2_set_32u(constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
FALSE, dst=dst, 0,
#ifdef WITH_SSE2
TRUE, sse2_set_32u(constant, dst, size), PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE,
#else
FALSE, PRIM_NOP, 0, FALSE,
#endif
TRUE, ipp_wrapper_set_32u(constant, dst, size));
int test_set32u_speed(void)
@ -280,8 +284,11 @@ static inline void memset32s_naive(
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(set32s_speed_test, INT32, INT32, dst=dst,
TRUE, memset32s_naive(constant, dst, size),
TRUE, sse2_set_32s(constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
FALSE, dst=dst, 0,
#ifdef WITH_SSE2
TRUE, sse2_set_32s(constant, dst, size), PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE,
#else
FALSE, PRIM_NOP, 0, FALSE,
#endif
TRUE, ippsSet_32s(constant, dst, size));
int test_set32s_speed(void)

View File

@ -16,6 +16,7 @@
#include "config.h"
#endif
#include <winpr/sysinfo.h>
#include "prim_test.h"
#define FUNC_TEST_SIZE 65536
@ -47,7 +48,7 @@ extern pstatus_t sse2_rShiftC_16u(
extern pstatus_t sse2_shiftC_16u(
const UINT16 *pSrc, int val, UINT16 *pDst, int len);
#ifdef _M_IX86_AMD64
#ifdef WITH_SSE2
#define SHIFT_TEST_FUNC(_name_, _type_, _str_, _f1_, _f2_) \
int _name_(void) \
{ \
@ -55,12 +56,11 @@ int _name_(void) \
ALIGN(d1[FUNC_TEST_SIZE+3]), ALIGN(d2[FUNC_TEST_SIZE+3]); \
int failed = 0; \
int i; \
UINT32 pflags = primitives_get_flags(primitives_get()); \
char testStr[256]; \
testStr[0] = '\0'; \
get_random_data(src, sizeof(src)); \
_f1_(src+1, 3, d1+1, FUNC_TEST_SIZE); \
if (pflags & PRIM_X86_SSE3_AVAILABLE) \
if (IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) \
{ \
strcat(testStr, " SSE3"); \
/* Aligned */ \
@ -109,23 +109,35 @@ SHIFT_TEST_FUNC(test_rShift_16u_func, UINT16, "rshift_16u", general_rShiftC_16u,
/* ========================================================================= */
STD_SPEED_TEST(speed_lShift_16s, INT16, INT16, dst=dst,
TRUE, general_lShiftC_16s(src1, constant, dst, size),
TRUE, sse2_lShiftC_16s(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
FALSE, dst=dst, 0,
#ifdef WITH_SSE2
TRUE, sse2_lShiftC_16s(src1, constant, dst, size), PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE,
#else
FALSE, PRIM_NOP, 0, FALSE,
#endif
TRUE, ippsLShiftC_16s(src1, constant, dst, size));
STD_SPEED_TEST(speed_lShift_16u, UINT16, UINT16, dst=dst,
TRUE, general_lShiftC_16u(src1, constant, dst, size),
TRUE, sse2_lShiftC_16u(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
FALSE, dst=dst, 0,
#ifdef WITH_SSE2
TRUE, sse2_lShiftC_16u(src1, constant, dst, size), PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE,
#else
FALSE, PRIM_NOP, 0, FALSE,
#endif
TRUE, ippsLShiftC_16u(src1, constant, dst, size));
STD_SPEED_TEST(speed_rShift_16s, INT16, INT16, dst=dst,
TRUE, general_rShiftC_16s(src1, constant, dst, size),
TRUE, sse2_rShiftC_16s(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
FALSE, dst=dst, 0,
#ifdef WITH_SSE2
TRUE, sse2_rShiftC_16s(src1, constant, dst, size), PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE,
#else
FALSE, PRIM_NOP, 0, FALSE,
#endif
TRUE, ippsRShiftC_16s(src1, constant, dst, size));
STD_SPEED_TEST(speed_rShift_16u, UINT16, UINT16, dst=dst,
TRUE, general_rShiftC_16u(src1, constant, dst, size),
TRUE, sse2_rShiftC_16u(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
FALSE, dst=dst, 0,
#ifdef WITH_SSE2
TRUE, sse2_rShiftC_16u(src1, constant, dst, size), PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE,
#else
FALSE, PRIM_NOP, 0, FALSE,
#endif
TRUE, ippsRShiftC_16u(src1, constant, dst, size));
/* ------------------------------------------------------------------------- */

View File

@ -16,29 +16,34 @@
#include "config.h"
#endif
#include <winpr/sysinfo.h>
#include "prim_test.h"
static const int SIGN_PRETEST_ITERATIONS = 100000;
static const float TEST_TIME = 1.0;
extern pstatus_t general_sign_16s(const INT16 *pSrc, INT16 *pDst, int len);
#ifdef WITH_SSE2
extern pstatus_t ssse3_sign_16s(const INT16 *pSrc, INT16 *pDst, int len);
#endif
/* ------------------------------------------------------------------------- */
int test_sign16s_func(void)
{
INT16 ALIGN(src[65535]), ALIGN(d1[65535]), ALIGN(d2[65535]);
int failed = 0;
INT16 ALIGN(src[65535]), ALIGN(d1[65535]);
#ifdef WITH_SSE2
INT16 ALIGN(d2[65535]);
int i;
UINT32 pflags = primitives_get_flags(primitives_get());
#endif
int failed = 0;
char testStr[256];
/* Test when we can reach 16-byte alignment */
testStr[0] = '\0';
get_random_data(src, sizeof(src));
general_sign_16s(src+1, d1+1, 65535);
#ifdef _M_IX86_AMD64
if (pflags & PRIM_X86_SSSE3_AVAILABLE)
#ifdef WITH_SSE2
if (IsProcessorFeaturePresentEx(PF_EX_SSSE3))
{
strcat(testStr, " SSSE3");
ssse3_sign_16s(src+1, d2+1, 65535);
@ -57,8 +62,8 @@ int test_sign16s_func(void)
/* Test when we cannot reach 16-byte alignment */
get_random_data(src, sizeof(src));
general_sign_16s(src+1, d1+2, 65535);
#ifdef _M_IX86_AMD64
if (pflags & PRIM_X86_SSSE3_AVAILABLE)
#ifdef WITH_SSE2
if (IsProcessorFeaturePresentEx(PF_EX_SSSE3))
{
ssse3_sign_16s(src+1, d2+2, 65535);
for (i=2; i<65535; ++i)
@ -79,8 +84,11 @@ int test_sign16s_func(void)
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(sign16s_speed_test, INT16, INT16, dst=dst,
TRUE, general_sign_16s(src1, dst, size),
TRUE, ssse3_sign_16s(src1, dst, size), PRIM_X86_SSSE3_AVAILABLE,
FALSE, dst=dst, 0,
#ifdef WITH_SSE2
TRUE, ssse3_sign_16s(src1, dst, size), PF_EX_SSSE3, TRUE,
#else
FALSE, PRIM_NOP, 0, FALSE,
#endif
FALSE, dst=dst);
int test_sign16s_speed(void)

View File

@ -66,35 +66,6 @@
#define PROCESSOR_ARM_7TDMI 70001
#define PROCESSOR_OPTIL 0x494F
#define PF_FLOATING_POINT_PRECISION_ERRATA 0
#define PF_FLOATING_POINT_EMULATED 1
#define PF_COMPARE_EXCHANGE_DOUBLE 2
#define PF_MMX_INSTRUCTIONS_AVAILABLE 3
#define PF_PPC_MOVEMEM_64BIT_OK 4
#define PF_ALPHA_BYTE_INSTRUCTIONS 5
#define PF_XMMI_INSTRUCTIONS_AVAILABLE 6
#define PF_3DNOW_INSTRUCTIONS_AVAILABLE 7
#define PF_RDTSC_INSTRUCTION_AVAILABLE 8
#define PF_PAE_ENABLED 9
#define PF_XMMI64_INSTRUCTIONS_AVAILABLE 10
#define PF_SSE_DAZ_MODE_AVAILABLE 11
#define PF_NX_ENABLED 12
#define PF_SSE3_INSTRUCTIONS_AVAILABLE 13
#define PF_COMPARE_EXCHANGE128 14
#define PF_COMPARE64_EXCHANGE128 15
#define PF_CHANNELS_ENABLED 16
#define PF_XSAVE_ENABLED 17
#define PF_ARM_VFP_32_REGISTERS_AVAILABLE 18
#define PF_ARM_NEON_INSTRUCTIONS_AVAILABLE 19
#define PF_SECOND_LEVEL_ADDRESS_TRANSLATION 20
#define PF_VIRT_FIRMWARE_ENABLED 21
#define PF_RDWRFSGSBASE_AVAILABLE 22
#define PF_FASTFAIL_AVAILABLE 23
#define PF_ARM_DIVIDE_INSTRUCTION_AVAILABLE 24
#define PF_ARM_64BIT_LOADSTORE_ATOMIC 25
#define PF_ARM_EXTERNAL_CACHE_AVAILABLE 26
#define PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE 27
typedef struct _SYSTEM_INFO
{
union
@ -243,7 +214,91 @@ WINPR_API VOID GetSystemTimeAsFileTime(LPFILETIME lpSystemTimeAsFileTime);
WINPR_API DWORD GetTickCount(void);
WINPR_API BOOL IsProcessorFeaturePresent(DWORD ProcessorFeature);
#define PF_FLOATING_POINT_PRECISION_ERRATA 0
#define PF_FLOATING_POINT_EMULATED 1
#define PF_COMPARE_EXCHANGE_DOUBLE 2
#define PF_MMX_INSTRUCTIONS_AVAILABLE 3
#define PF_PPC_MOVEMEM_64BIT_OK 4
#define PF_XMMI_INSTRUCTIONS_AVAILABLE 6 //sse
#define PF_3DNOW_INSTRUCTIONS_AVAILABLE 7
#define PF_RDTSC_INSTRUCTION_AVAILABLE 8
#define PF_PAE_ENABLED 9
#define PF_XMMI64_INSTRUCTIONS_AVAILABLE 10 //sse2
#define PF_SSE_DAZ_MODE_AVAILABLE 11
#define PF_NX_ENABLED 12
#define PF_SSE3_INSTRUCTIONS_AVAILABLE 13
#define PF_COMPARE_EXCHANGE128 14
#define PF_COMPARE64_EXCHANGE128 15
#define PF_CHANNELS_ENABLED 16
#define PF_XSAVE_ENABLED 17
#define PF_ARM_VFP_32_REGISTERS_AVAILABLE 18
#define PF_ARM_NEON_INSTRUCTIONS_AVAILABLE 19
#define PF_SECOND_LEVEL_ADDRESS_TRANSLATION 20
#define PF_VIRT_FIRMWARE_ENABLED 21
#define PF_RDWRFSGSBASE_AVAILABLE 22
#define PF_FASTFAIL_AVAILABLE 23
#define PF_ARM_DIVIDE_INSTRUCTION_AVAILABLE 24
#define PF_ARM_64BIT_LOADSTORE_ATOMIC 25
#define PF_ARM_EXTERNAL_CACHE_AVAILABLE 26
#define PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE 27
#define PF_ARM_V4 0x80000001
#define PF_ARM_V5 0x80000002
#define PF_ARM_V6 0x80000003
#define PF_ARM_V7 0x80000004
#define PF_ARM_THUMB 0x80000005
#define PF_ARM_JAZELLE 0x80000006
#define PF_ARM_DSP 0x80000007
#define PF_ARM_MOVE_CP 0x80000008
#define PF_ARM_VFP10 0x80000009
#define PF_ARM_MPU 0x8000000A
#define PF_ARM_WRITE_BUFFER 0x8000000B
#define PF_ARM_MBX 0x8000000C
#define PF_ARM_L2CACHE 0x8000000D
#define PF_ARM_PHYSICALLY_TAGGED_CACHE 0x8000000E
#define PF_ARM_VFP_SINGLE_PRECISION 0x8000000F
#define PF_ARM_VFP_DOUBLE_PRECISION 0x80000010
#define PF_ARM_ITCM 0x80000011
#define PF_ARM_DTCM 0x80000012
#define PF_ARM_UNIFIED_CACHE 0x80000013
#define PF_ARM_WRITE_BACK_CACHE 0x80000014
#define PF_ARM_CACHE_CAN_BE_LOCKED_DOWN 0x80000015
#define PF_ARM_L2CACHE_MEMORY_MAPPED 0x80000016
#define PF_ARM_L2CACHE_COPROC 0x80000017
#define PF_ARM_THUMB2 0x80000018
#define PF_ARM_T2EE 0x80000019
#define PF_ARM_VFP3 0x8000001A
#define PF_ARM_NEON 0x8000001B
#define PF_ARM_UNALIGNED_ACCESS 0x8000001C
#define PF_ARM_INTEL_XSCALE 0x80010001
#define PF_ARM_INTEL_PMU 0x80010002
#define PF_ARM_INTEL_WMMX 0x80010003
#endif
#endif /* WINPR_SYSINFO_H */
WINPR_API BOOL IsProcessorFeaturePresentEx(DWORD ProcessorFeature);
// extended flags
#define PF_EX_3DNOW_PREFETCH 1
#define PF_EX_SSSE3 2
#define PF_EX_SSE41 3
#define PF_EX_SSE42 4
#define PF_EX_AVX 5
#define PF_EX_FMA 6
#define PF_EX_AVX_AES 7
#define PF_EX_AVX2 8
#define PF_EX_ARM_VFP1 9
#define PF_EX_ARM_VFP3D16 10
#define PF_EX_ARM_VFP4 11
#define PF_EX_ARM_IDIVA 12
#define PF_EX_ARM_IDIVT 13
// some "aliases" for the standard defines
// to be more clear
#define PF_SSE_INSTRUCTIONS_AVAILABLE PF_XMMI_INSTRUCTIONS_AVAILABLE
#define PF_SSE2_INSTRUCTIONS_AVAILABLE PF_XMMI64_INSTRUCTIONS_AVAILABLE
#endif /* WINPR_SYSINFO_H */

View File

@ -3,6 +3,7 @@
* System Information
*
* Copyright 2012 Marc-Andre Moreau <marcandre.moreau@gmail.com>
* Copyright 2013 Bernhard Miklautz <bmiklautz@thinstuff.at>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -22,6 +23,13 @@
#endif
#include <winpr/sysinfo.h>
#include <winpr/platform.h>
#if defined(__linux__) && defined(__GNUC__)
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#endif
/**
* api-ms-win-core-sysinfo-l1-1-1.dll:
@ -109,7 +117,7 @@ DWORD GetNumberOfProcessors()
{
DWORD numCPUs = 1;
/* TODO: Android and iOS */
/* TODO: iOS */
#if defined(__linux__) || defined(__sun) || defined(_AIX)
numCPUs = (DWORD) sysconf(_SC_NPROCESSORS_ONLN);
@ -331,4 +339,313 @@ DWORD GetTickCount(void)
return ticks;
}
/* If x86 and gcc*/
#ifdef _M_IX86_AMD64
#ifdef __GNUC__
#ifdef __AVX__
#define xgetbv(_func_, _lo_, _hi_) \
__asm__ __volatile__ ("xgetbv" : "=a" (_lo_), "=d" (_hi_) : "c" (_func_))
#endif
#define D_BIT_MMX (1<<23)
#define D_BIT_SSE (1<<25)
#define D_BIT_SSE2 (1<<26)
#define D_BIT_3DN (1<<30)
#define C_BIT_SSE3 (1<<0)
#define C_BIT_3DNP (1<<8)
#define C_BIT_SSSE3 (1<<9)
#define C_BIT_SSE41 (1<<19)
#define C_BIT_SSE42 (1<<20)
#define C_BIT_XGETBV (1<<27)
#define C_BIT_AVX (1<<28)
#define C_BITS_AVX (C_BIT_XGETBV|C_BIT_AVX)
#define E_BIT_XMM (1<<1)
#define E_BIT_YMM (1<<2)
#define E_BITS_AVX (E_BIT_XMM|E_BIT_YMM)
#define C_BIT_FMA (1<<11)
#define C_BIT_AVX_AES (1<<24)
static void cpuid(
unsigned info,
unsigned *eax,
unsigned *ebx,
unsigned *ecx,
unsigned *edx)
{
*eax = *ebx = *ecx = *edx = 0;
__asm volatile
(
/* The EBX (or RBX register on x86_64) is used for the PIC base address
* and must not be corrupted by our inline assembly.
*/
#ifdef _M_IX86
"mov %%ebx, %%esi;"
"cpuid;"
"xchg %%ebx, %%esi;"
#else
"mov %%rbx, %%rsi;"
"cpuid;"
"xchg %%rbx, %%rsi;"
#endif
: "=a" (*eax), "=S" (*ebx), "=c" (*ecx), "=d" (*edx)
: "0" (info)
);
}
#endif // __GNUC__
#elif defined(_M_ARM)
#if defined(__linux__)
// HWCAP flags from linux kernel - uapi/asm/hwcap.h
#define HWCAP_SWP (1 << 0)
#define HWCAP_HALF (1 << 1)
#define HWCAP_THUMB (1 << 2)
#define HWCAP_26BIT (1 << 3) /* Play it safe */
#define HWCAP_FAST_MULT (1 << 4)
#define HWCAP_FPA (1 << 5)
#define HWCAP_VFP (1 << 6)
#define HWCAP_EDSP (1 << 7)
#define HWCAP_JAVA (1 << 8)
#define HWCAP_IWMMXT (1 << 9)
#define HWCAP_CRUNCH (1 << 10)
#define HWCAP_THUMBEE (1 << 11)
#define HWCAP_NEON (1 << 12)
#define HWCAP_VFPv3 (1 << 13)
#define HWCAP_VFPv3D16 (1 << 14) /* also set for VFPv4-D16 */
#define HWCAP_TLS (1 << 15)
#define HWCAP_VFPv4 (1 << 16)
#define HWCAP_IDIVA (1 << 17)
#define HWCAP_IDIVT (1 << 18)
#define HWCAP_VFPD32 (1 << 19) /* set if VFP has 32 regs (not 16) */
#define HWCAP_IDIV (HWCAP_IDIVA | HWCAP_IDIVT)
// From linux kernel uapi/linux/auxvec.h
#define AT_HWCAP 16
static unsigned GetARMCPUCaps(void){
unsigned caps = 0;
int fd = open ("/proc/self/auxv", O_RDONLY);
if (fd == -1)
return 0;
static struct
{
unsigned a_type; /* Entry type */
unsigned a_val; /* Integer value */
} auxvec;
while (1){
int num;
num = read(fd, (char *)&auxvec, sizeof(auxvec));
if (num < 1 || (auxvec.a_type == 0 && auxvec.a_val == 0))
break;
if (auxvec.a_type == AT_HWCAP)
{
caps = auxvec.a_val;
}
}
close(fd);
return caps;
}
#endif // defined(__linux__)
#endif // _M_IX86_AMD64
BOOL IsProcessorFeaturePresent(DWORD ProcessorFeature)
{
BOOL ret = FALSE;
#ifdef _M_ARM
#ifdef __linux__
unsigned caps;
caps = GetARMCPUCaps();
switch (ProcessorFeature)
{
case PF_ARM_NEON_INSTRUCTIONS_AVAILABLE:
case PF_ARM_NEON:
if (caps & HWCAP_NEON)
ret = TRUE;
break;
case PF_ARM_THUMB:
if (caps & HWCAP_THUMB)
ret = TRUE;
case PF_ARM_VFP_32_REGISTERS_AVAILABLE:
if (caps & HWCAP_VFPD32)
ret = TRUE;
case PF_ARM_DIVIDE_INSTRUCTION_AVAILABLE:
if ((caps & HWCAP_IDIVA) || (caps & HWCAP_IDIVT))
ret = TRUE;
case PF_ARM_VFP3:
if (caps & HWCAP_VFPv3)
ret = TRUE;
break;
case PF_ARM_JAZELLE:
if (caps & HWCAP_JAVA)
ret = TRUE;
break;
case PF_ARM_DSP:
if (caps & HWCAP_EDSP)
ret = TRUE;
break;
case PF_ARM_MPU:
if (caps & HWCAP_EDSP)
ret = TRUE;
break;
case PF_ARM_THUMB2:
if ((caps & HWCAP_IDIVT) || (caps & HWCAP_VFPv4))
ret = TRUE;
break;
case PF_ARM_T2EE:
if (caps & HWCAP_THUMBEE)
ret = TRUE;
break;
case PF_ARM_INTEL_WMMX:
if (caps & HWCAP_IWMMXT)
ret = TRUE;
break;
default:
break;
}
#elif defined(__APPLE__) // __linux__
switch (ProcessorFeature)
{
case PF_ARM_NEON_INSTRUCTIONS_AVAILABLE:
case PF_ARM_NEON:
ret = TRUE;
break;
}
#endif // __linux__
#elif defined(_M_IX86_AMD64)
#ifdef __GNUC__
unsigned a, b, c, d;
cpuid(1, &a, &b, &c, &d);
switch (ProcessorFeature)
{
case PF_MMX_INSTRUCTIONS_AVAILABLE:
if (d & D_BIT_MMX)
ret = TRUE;
break;
case PF_XMMI_INSTRUCTIONS_AVAILABLE:
if (d & D_BIT_SSE)
ret = TRUE;
break;
case PF_XMMI64_INSTRUCTIONS_AVAILABLE:
if (d & D_BIT_SSE2)
ret = TRUE;
break;
case PF_3DNOW_INSTRUCTIONS_AVAILABLE:
if (d & D_BIT_3DN)
ret = TRUE;
break;
case PF_SSE3_INSTRUCTIONS_AVAILABLE:
if (c & C_BIT_SSE3)
ret = TRUE;
break;
default:
break;
}
#endif // __GNUC__
#endif
return ret;
}
#endif //_WIN32
BOOL IsProcessorFeaturePresentEx(DWORD ProcessorFeature)
{
BOOL ret = FALSE;
#ifdef _M_ARM
#ifdef __linux__
unsigned caps;
caps = GetARMCPUCaps();
switch (ProcessorFeature)
{
case PF_EX_ARM_VFP1:
if (caps & HWCAP_VFP)
ret = TRUE;
break;
case PF_EX_ARM_VFP3D16:
if (caps & HWCAP_VFPv3D16)
ret = TRUE;
break;
case PF_EX_ARM_VFP4:
if (caps & HWCAP_VFPv4)
ret = TRUE;
break;
case PF_EX_ARM_IDIVA:
if (caps & HWCAP_IDIVA)
ret = TRUE;
break;
case PF_EX_ARM_IDIVT:
if (caps & HWCAP_IDIVT)
ret = TRUE;
break;
}
#endif // __linux__
#elif defined(_M_IX86_AMD64)
unsigned a, b, c, d;
cpuid(1, &a, &b, &c, &d);
switch (ProcessorFeature)
{
case PF_EX_3DNOW_PREFETCH:
if (c & C_BIT_3DNP)
ret = TRUE;
break;
case PF_EX_SSSE3:
if (c & C_BIT_SSSE3)
ret = TRUE;
break;
case PF_EX_SSE41:
if (c & C_BIT_SSE41)
ret = TRUE;
break;
case PF_EX_SSE42:
if (c & C_BIT_SSE42)
ret = TRUE;
break;
#ifdef __AVX__
case PF_EX_AVX:
case PF_EX_FMA:
case PF_EX_AVX_AES:
{
if ((c & C_BITS_AVX) != C_BITS_AVX)
ret = FALSE;
int e, f;
xgetbv(0, e, f);
if ((e & E_BITS_AVX) == E_BITS_AVX)
{
switch (ProcessorFeature)
{
case: PF_EX_AVX:
ret = TRUE;
break;
case: PF_EX_FMA:
if (c & C_BIT_FMA)
ret = TRUE;
break;
case: PF_EX_AVX_AES:
if (c & C_BIT_AVX_AES)
ret = TRUE;
break;
{
ret = TRUE;
break;
}
}
}
break;
#endif //__AVX__
default:
break;
}
#endif
return ret;
}

View File

@ -5,7 +5,9 @@ set(MODULE_PREFIX "TEST_SYSINFO")
set(${MODULE_PREFIX}_DRIVER ${MODULE_NAME}.c)
set(${MODULE_PREFIX}_TESTS
TestGetNativeSystemInfo.c)
TestGetNativeSystemInfo.c
TestCPUFeatures.c
)
create_test_sourcelist(${MODULE_PREFIX}_SRCS
${${MODULE_PREFIX}_DRIVER}

View File

@ -0,0 +1,45 @@
#include <winpr/crt.h>
#include <winpr/sysinfo.h>
#include <winpr/platform.h>
int TestCPUFeatures(int argc, char* argv[])
{
printf("Base CPU Flags:\n");
#ifdef _M_IX86_AMD64
printf("\tPF_MMX_INSTRUCTIONS_AVAILABLE: %s\n", IsProcessorFeaturePresent(PF_MMX_INSTRUCTIONS_AVAILABLE) ? "yes" : "no");
printf("\tPF_XMMI_INSTRUCTIONS_AVAILABLE: %s\n", IsProcessorFeaturePresent(PF_XMMI_INSTRUCTIONS_AVAILABLE) ? "yes" : "no");
printf("\tPF_XMMI64_INSTRUCTIONS_AVAILABLE: %s\n", IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE) ? "yes" : "no");
printf("\tPF_3DNOW_INSTRUCTIONS_AVAILABLE: %s\n", IsProcessorFeaturePresent(PF_3DNOW_INSTRUCTIONS_AVAILABLE) ? "yes" : "no");
printf("\tPF_SSE3_INSTRUCTIONS_AVAILABLE: %s\n", IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE) ? "yes" : "no");
printf("\n");
printf("Extended CPU Flags (not found in windows API):\n");
printf("\tPF_EX_3DNOW_PREFETCH: %s\n", IsProcessorFeaturePresentEx(PF_EX_3DNOW_PREFETCH) ? "yes" : "no");
printf("\tPF_EX_SSSE3: %s\n", IsProcessorFeaturePresentEx(PF_EX_SSSE3) ? "yes" : "no");
printf("\tPF_EX_SSE41: %s\n", IsProcessorFeaturePresentEx(PF_EX_SSE41) ? "yes" : "no");
printf("\tPF_EX_SSE42: %s\n", IsProcessorFeaturePresentEx(PF_EX_SSE42) ? "yes" : "no");
printf("\tPF_EX_AVX: %s\n", IsProcessorFeaturePresentEx(PF_EX_AVX) ? "yes" : "no");
printf("\tPF_EX_FMA: %s\n", IsProcessorFeaturePresentEx(PF_EX_FMA) ? "yes" : "no");
printf("\tPF_EX_AVX_AES: %s\n", IsProcessorFeaturePresentEx(PF_EX_AVX_AES) ? "yes" : "no");
#elif defined(_M_ARM)
printf("\tPF_ARM_NEON_INSTRUCTIONS_AVAILABLE: %s\n", IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? "yes" : "no");
printf("\tPF_ARM_THUMB: %s\n", IsProcessorFeaturePresent(PF_ARM_THUMB) ? "yes" : "no");
printf("\tPF_ARM_VFP_32_REGISTERS_AVAILABLE: %s\n", IsProcessorFeaturePresent(PF_ARM_VFP_32_REGISTERS_AVAILABLE) ? "yes" : "no");
printf("\tPF_ARM_DIVIDE_INSTRUCTION_AVAILABLE: %s\n", IsProcessorFeaturePresent(PF_ARM_DIVIDE_INSTRUCTION_AVAILABLE) ? "yes" : "no");
printf("\tPF_ARM_VFP3: %s\n", IsProcessorFeaturePresent(PF_ARM_VFP3) ? "yes" : "no");
printf("\tPF_ARM_THUMB: %s\n", IsProcessorFeaturePresent(PF_ARM_THUMB) ? "yes" : "no");
printf("\tPF_ARM_JAZELLE: %s\n", IsProcessorFeaturePresent(PF_ARM_JAZELLE) ? "yes" : "no");
printf("\tPF_ARM_DSP: %s\n", IsProcessorFeaturePresent(PF_ARM_DSP) ? "yes" : "no");
printf("\tPF_ARM_THUMB2: %s\n", IsProcessorFeaturePresent(PF_ARM_THUMB2) ? "yes" : "no");
printf("\tPF_ARM_T2EE: %s\n", IsProcessorFeaturePresent(PF_ARM_T2EE) ? "yes" : "no");
printf("\tPF_ARM_INTEL_WMMX: %s\n", IsProcessorFeaturePresent(PF_ARM_INTEL_WMMX) ? "yes" : "no");
printf("Extended CPU Flags (not found in windows API):\n");
printf("\tPF_EX_ARM_VFP1: %s\n", IsProcessorFeaturePresentEx(PF_EX_ARM_VFP1) ? "yes" : "no");
printf("\tPF_EX_ARM_VFP3D16: %s\n", IsProcessorFeaturePresentEx(PF_EX_ARM_VFP3D16) ? "yes" : "no");
printf("\tPF_EX_ARM_VFP4: %s\n", IsProcessorFeaturePresentEx(PF_EX_ARM_VFP4) ? "yes" : "no");
printf("\tPF_EX_ARM_IDIVA: %s\n", IsProcessorFeaturePresentEx(PF_EX_ARM_IDIVA) ? "yes" : "no");
printf("\tPF_EX_ARM_IDIVT: %s\n", IsProcessorFeaturePresentEx(PF_EX_ARM_IDIVT) ? "yes" : "no");
#endif
printf("\n");
return 0;
}