primitives: make use of winprs processor feature detection

- Removed the cpu flag detection
- Fixed and updated the tests
This commit is contained in:
Bernhard Miklautz 2013-02-27 15:58:06 +01:00
parent 23a7ef6c47
commit 84a3dbc115
37 changed files with 200 additions and 469 deletions

View File

@ -190,9 +190,6 @@ typedef struct
__yCbCrToRGB_16s16s_P3P3_t yCbCrToRGB_16s16s_P3P3;
__RGBToYCbCr_16s16s_P3P3_t RGBToYCbCr_16s16s_P3P3;
__RGBToRGB_16s8u_P3AC4R_t RGBToRGB_16s8u_P3AC4R;
/* internal use for CPU flags and such. */
void *hints;
} primitives_t;
#ifdef __cplusplus
@ -202,12 +199,6 @@ extern "C" {
/* Prototypes for the externally-visible entrypoints. */
FREERDP_API void primitives_init(void);
FREERDP_API primitives_t *primitives_get(void);
FREERDP_API UINT32 primitives_get_flags(
const primitives_t *prims);
FREERDP_API void primitives_flags_str(
const primitives_t *prims,
char *str,
size_t len);
FREERDP_API void primitives_deinit(void);
#ifdef __cplusplus

View File

@ -62,10 +62,7 @@ New Optimizations
-----------------
As the need arises, new optimizations can be added to the library,
including NEON, AVX, and perhaps OpenCL or other SIMD implementations.
The initialization routine is free to do any quick run-time test to
determine which features are available before hooking the operation's
function pointer, or it can simply look at the processor features list
from the hints passed to the initialization routine.
The CPU feature detection is done in winpr/sysinfo.
Adding Entrypoints
@ -85,15 +82,6 @@ be added.
The template functions can frequently be used to extend the
operations without writing a lot of new code.
Flags
-----
The entrypoint primitives_get_flags() returns a bitfield of processor flags
(as defined in primitives.h) and primitives_flag_str() returns a string
related to those processor flags, for debugging and information. The
bitfield can be used elsewhere in the code as needed.
Cache Management
----------------
I haven't found a lot of speed improvement by attempting prefetch, and

View File

@ -46,12 +46,11 @@ pstatus_t general_add_16s(
/* ------------------------------------------------------------------------- */
void primitives_init_add(
const primitives_hints_t *hints,
primitives_t *prims)
{
prims->add_16s = general_add_16s;
primitives_init_add_opt(hints, prims);
primitives_init_add_opt(prims);
}
/* ------------------------------------------------------------------------- */

View File

@ -24,7 +24,7 @@
pstatus_t general_add_16s(const INT16 *pSrc1, const INT16 *pSrc2, INT16 *pDst, INT32 len);
void primitives_init_add_opt(const primitives_hints_t *hints, primitives_t *prims);
void primitives_init_add_opt(primitives_t *prims);
#endif /* !__PRIM_ADD_H_INCLUDED__ */

View File

@ -20,6 +20,7 @@
#include <freerdp/types.h>
#include <freerdp/primitives.h>
#include <winpr/sysinfo.h>
#ifdef WITH_SSE2
#include <emmintrin.h>
@ -45,18 +46,15 @@ SSE3_SSD_ROUTINE(sse3_add_16s, INT16, general_add_16s,
/* ------------------------------------------------------------------------- */
void primitives_init_add_opt(
const primitives_hints_t *hints,
primitives_t *prims)
{
#ifdef WITH_IPP
prims->add_16s = (__add_16s_t) ippsAdd_16s;
#elif defined(WITH_SSE2)
if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
&& (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE)) /* for LDDQU */
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)
&& IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */
{
prims->add_16s = sse3_add_16s;
}
#endif
}

View File

@ -102,11 +102,11 @@ pstatus_t general_alphaComp_argb(
}
/* ------------------------------------------------------------------------- */
void primitives_init_alphaComp(const primitives_hints_t* hints, primitives_t* prims)
void primitives_init_alphaComp(primitives_t* prims)
{
prims->alphaComp_argb = general_alphaComp_argb;
primitives_init_alphaComp_opt(hints, prims);
primitives_init_alphaComp_opt(prims);
}
/* ------------------------------------------------------------------------- */

View File

@ -24,7 +24,7 @@
pstatus_t general_alphaComp_argb(const BYTE *pSrc1, INT32 src1Step, const BYTE *pSrc2, INT32 src2Step, BYTE *pDst, INT32 dstStep, INT32 width, INT32 height);
void primitives_init_alphaComp_opt(const primitives_hints_t* hints, primitives_t* prims);
void primitives_init_alphaComp_opt(primitives_t* prims);
#endif /* !__PRIM_ALPHACOMP_H_INCLUDED__ */

View File

@ -26,6 +26,7 @@
#include <freerdp/types.h>
#include <freerdp/primitives.h>
#include <winpr/sysinfo.h>
#ifdef WITH_SSE2
#include <emmintrin.h>
@ -210,13 +211,13 @@ pstatus_t ipp_alphaComp_argb(
#endif
/* ------------------------------------------------------------------------- */
void primitives_init_alphaComp_opt(const primitives_hints_t* hints, primitives_t* prims)
void primitives_init_alphaComp_opt(primitives_t* prims)
{
#ifdef WITH_IPP
prims->alphaComp_argb = ipp_alphaComp_argb;
#elif defined(WITH_SSE2)
if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
&& (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE)) /* for LDDQU */
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)
&& IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */
{
prims->alphaComp_argb = sse2_alphaComp_argb;
}

View File

@ -61,14 +61,13 @@ pstatus_t general_orC_32u(
/* ------------------------------------------------------------------------- */
void primitives_init_andor(
const primitives_hints_t *hints,
primitives_t *prims)
{
/* Start with the default. */
prims->andC_32u = general_andC_32u;
prims->orC_32u = general_orC_32u;
primitives_init_andor_opt(hints, prims);
primitives_init_andor_opt(prims);
}
/* ------------------------------------------------------------------------- */

View File

@ -25,7 +25,7 @@
pstatus_t general_andC_32u(const UINT32 *pSrc, UINT32 val, UINT32 *pDst, INT32 len);
pstatus_t general_orC_32u(const UINT32 *pSrc, UINT32 val, UINT32 *pDst, INT32 len);
void primitives_init_andor_opt(const primitives_hints_t *hints, primitives_t *prims);
void primitives_init_andor_opt(primitives_t *prims);
#endif /* !__PRIM_ANDOR_H_INCLUDED__ */

View File

@ -19,6 +19,7 @@
#include <freerdp/types.h>
#include <freerdp/primitives.h>
#include <winpr/sysinfo.h>
#ifdef WITH_SSE2
#include <emmintrin.h>
@ -45,14 +46,14 @@ SSE3_SCD_PRE_ROUTINE(sse3_orC_32u, UINT32, general_orC_32u,
/* ------------------------------------------------------------------------- */
void primitives_init_andor_opt(const primitives_hints_t *hints, primitives_t *prims)
void primitives_init_andor_opt(primitives_t *prims)
{
#if defined(WITH_IPP)
prims->andC_32u = (__andC_32u_t) ippsAndC_32u;
prims->orC_32u = (__orC_32u_t) ippsOrC_32u;
#elif defined(WITH_SSE2)
if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
&& (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE))
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)
&& IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
prims->andC_32u = sse3_andC_32u;
prims->orC_32u = sse3_orC_32u;

View File

@ -215,13 +215,13 @@ pstatus_t general_RGBToRGB_16s8u_P3AC4R(
}
/* ------------------------------------------------------------------------- */
void primitives_init_colors(const primitives_hints_t* hints, primitives_t* prims)
void primitives_init_colors(primitives_t* prims)
{
prims->RGBToRGB_16s8u_P3AC4R = general_RGBToRGB_16s8u_P3AC4R;
prims->yCbCrToRGB_16s16s_P3P3 = general_yCbCrToRGB_16s16s_P3P3;
prims->RGBToYCbCr_16s16s_P3P3 = general_RGBToYCbCr_16s16s_P3P3;
primitives_init_colors_opt(hints, prims);
primitives_init_colors_opt(prims);
}
/* ------------------------------------------------------------------------- */

View File

@ -26,7 +26,7 @@ pstatus_t general_yCbCrToRGB_16s16s_P3P3(const INT16 *pSrc[3], INT32 srcStep, IN
pstatus_t general_RGBToYCbCr_16s16s_P3P3(const INT16 *pSrc[3], INT32 srcStep, INT16 *pDst[3], INT32 dstStep, const prim_size_t *roi);
pstatus_t general_RGBToRGB_16s8u_P3AC4R(const INT16 *pSrc[3], int srcStep, BYTE *pDst, int dstStep, const prim_size_t *roi);
void primitives_init_colors_opt(const primitives_hints_t* hints, primitives_t* prims);
void primitives_init_colors_opt(primitives_t* prims);
#endif /* !__PRIM_COLORS_H_INCLUDED__ */

View File

@ -23,6 +23,7 @@
#include <freerdp/types.h>
#include <freerdp/primitives.h>
#include <winpr/sysinfo.h>
#ifdef WITH_SSE2
#include <emmintrin.h>
@ -542,17 +543,17 @@ pstatus_t neon_yCbCrToRGB_16s16s_P3P3(
*/
/* ------------------------------------------------------------------------- */
void primitives_init_colors_opt(const primitives_hints_t* hints, primitives_t* prims)
void primitives_init_colors_opt(primitives_t* prims)
{
#if defined(WITH_SSE2)
if (hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
{
prims->RGBToRGB_16s8u_P3AC4R = sse2_RGBToRGB_16s8u_P3AC4R;
prims->yCbCrToRGB_16s16s_P3P3 = sse2_yCbCrToRGB_16s16s_P3P3;
prims->RGBToYCbCr_16s16s_P3P3 = sse2_RGBToYCbCr_16s16s_P3P3;
}
#elif defined(WITH_NEON)
if (hints->arm_flags & PRIM_ARM_NEON_AVAILABLE)
if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
{
prims->yCbCrToRGB_16s16s_P3P3 = neon_yCbCrToRGB_16s16s_P3P3;
}

View File

@ -148,7 +148,6 @@ static pstatus_t ippiCopy_8u_AC4r(
/* ------------------------------------------------------------------------- */
void primitives_init_copy(
const primitives_hints_t *hints,
primitives_t *prims)
{
/* Start with the default. */

View File

@ -34,61 +34,43 @@
? _mm_lddqu_si128((__m128i *) (_ptr_)) \
: _mm_load_si128((__m128i *) (_ptr_)))
/* This structure can (eventually) be used to provide hints to the
* initialization routines, e.g. whether SSE2 or NEON or IPP instructions
* or calls are available.
*/
typedef struct
{
UINT32 x86_flags;
UINT32 arm_flags;
} primitives_hints_t;
/* Function prototypes for all the init/deinit routines. */
extern void primitives_init_copy(
const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_copy(
primitives_t *prims);
extern void primitives_init_set(
const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_set(
primitives_t *prims);
extern void primitives_init_add(
const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_add(
primitives_t *prims);
extern void primitives_init_andor(
const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_andor(
primitives_t *prims);
extern void primitives_init_shift(
const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_shift(
primitives_t *prims);
extern void primitives_init_sign(
const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_sign(
primitives_t *prims);
extern void primitives_init_alphaComp(
const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_alphaComp(
primitives_t *prims);
extern void primitives_init_colors(
const primitives_hints_t *hints,
primitives_t *prims);
extern void primitives_deinit_colors(
primitives_t *prims);

View File

@ -111,7 +111,6 @@ pstatus_t general_set_32u(
/* ------------------------------------------------------------------------- */
void primitives_init_set(
const primitives_hints_t *hints,
primitives_t *prims)
{
/* Start with the default. */
@ -120,7 +119,7 @@ void primitives_init_set(
prims->set_32u = general_set_32u;
prims->zero = general_zero;
primitives_init_set_opt(hints, prims);
primitives_init_set_opt(prims);
}
/* ------------------------------------------------------------------------- */

View File

@ -28,7 +28,7 @@ pstatus_t general_set_32s(INT32 val, INT32 *pDst, INT32 len);
pstatus_t general_set_32u(UINT32 val, UINT32 *pDst, INT32 len);
void primitives_init_set_opt(const primitives_hints_t *hints, primitives_t *prims);
void primitives_init_set_opt(primitives_t *prims);
#endif /* !__PRIM_SET_H_INCLUDED__ */

View File

@ -21,6 +21,7 @@
#include <string.h>
#include <freerdp/types.h>
#include <freerdp/primitives.h>
#include <winpr/sysinfo.h>
#ifdef WITH_SSE2
# include <emmintrin.h>
@ -198,7 +199,7 @@ pstatus_t ipp_wrapper_set_32u(
#endif
/* ------------------------------------------------------------------------- */
void primitives_init_set_opt(const primitives_hints_t *hints, primitives_t *prims)
void primitives_init_set_opt(primitives_t *prims)
{
/* Pick tuned versions if possible. */
#ifdef WITH_IPP
@ -207,7 +208,7 @@ void primitives_init_set_opt(const primitives_hints_t *hints, primitives_t *prim
prims->set_32u = (__set_32u_t) ipp_wrapper_set_32u;
prims->zero = (__zero_t) ippsZero_8u;
#elif defined(WITH_SSE2)
if (hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
{
prims->set_8u = sse2_set_8u;
prims->set_32s = sse2_set_32s;

View File

@ -104,7 +104,6 @@ pstatus_t general_shiftC_16u(
/* ------------------------------------------------------------------------- */
void primitives_init_shift(
const primitives_hints_t *hints,
primitives_t *prims)
{
/* Start with the default. */
@ -117,7 +116,7 @@ void primitives_init_shift(
prims->shiftC_16s = general_shiftC_16s;
prims->shiftC_16u = general_shiftC_16u;
primitives_init_shift_opt(hints, prims);
primitives_init_shift_opt(prims);
}
/* ------------------------------------------------------------------------- */

View File

@ -29,7 +29,7 @@ pstatus_t general_rShiftC_16u(const UINT16 *pSrc, INT32 val, UINT16 *pDst, INT32
pstatus_t general_shiftC_16s(const INT16 *pSrc, INT32 val, INT16 *pDst, INT32 len);
pstatus_t general_shiftC_16u(const UINT16 *pSrc, INT32 val, UINT16 *pDst, INT32 len);
void primitives_init_shift_opt(const primitives_hints_t *hints, primitives_t *prims);
void primitives_init_shift_opt(primitives_t *prims);
#endif /* !__PRIM_SHIFT_H_INCLUDED__ */

View File

@ -19,6 +19,7 @@
#include <freerdp/types.h>
#include <freerdp/primitives.h>
#include <winpr/sysinfo.h>
#ifdef WITH_SSE2
#include <emmintrin.h>
@ -58,7 +59,7 @@ SSE3_SCD_ROUTINE(sse2_rShiftC_16u, UINT16, general_rShiftC_16u,
*/
/* ------------------------------------------------------------------------- */
void primitives_init_shift_opt(const primitives_hints_t *hints, primitives_t *prims)
void primitives_init_shift_opt(primitives_t *prims)
{
#if defined(WITH_IPP)
prims->lShiftC_16s = (__lShiftC_16s_t) ippsLShiftC_16s;
@ -66,8 +67,8 @@ void primitives_init_shift_opt(const primitives_hints_t *hints, primitives_t *pr
prims->lShiftC_16u = (__lShiftC_16u_t) ippsLShiftC_16u;
prims->rShiftC_16u = (__rShiftC_16u_t) ippsRShiftC_16u;
#elif defined(WITH_SSE2)
if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
&& (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE))
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)
&& IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
prims->lShiftC_16s = sse2_lShiftC_16s;
prims->rShiftC_16s = sse2_rShiftC_16s;

View File

@ -42,13 +42,12 @@ pstatus_t general_sign_16s(
/* ------------------------------------------------------------------------- */
void primitives_init_sign(
const primitives_hints_t *hints,
primitives_t *prims)
{
/* Start with the default. */
prims->sign_16s = general_sign_16s;
primitives_init_sign_opt(hints, prims);
primitives_init_sign_opt(prims);
}
/* ------------------------------------------------------------------------- */

View File

@ -24,7 +24,7 @@
pstatus_t general_sign_16s(const INT16 *pSrc, INT16 *pDst, INT32 len);
void primitives_init_sign_opt(const primitives_hints_t *hints, primitives_t *prims);
void primitives_init_sign_opt(primitives_t *prims);
#endif /* !__PRIM_SIGN_H_INCLUDED__ */

View File

@ -19,6 +19,7 @@
#include <freerdp/types.h>
#include <freerdp/primitives.h>
#include <winpr/sysinfo.h>
#ifdef WITH_SSE2
#include <emmintrin.h>
@ -134,13 +135,13 @@ pstatus_t ssse3_sign_16s(
#endif /* WITH_SSE2 */
/* ------------------------------------------------------------------------- */
void primitives_init_sign_opt(const primitives_hints_t *hints, primitives_t *prims)
void primitives_init_sign_opt(primitives_t *prims)
{
/* Pick tuned versions if possible. */
/* I didn't spot an IPP version of this. */
#if defined(WITH_SSE2)
if ((hints->x86_flags & PRIM_X86_SSSE3_AVAILABLE)
&& (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE))
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)
&& IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
prims->sign_16s = ssse3_sign_16s;
}

View File

@ -22,173 +22,16 @@
#include <string.h>
#include <stdlib.h>
#include <winpr/platform.h>
#include <freerdp/primitives.h>
#include "prim_internal.h"
#ifdef __ANDROID__
#include "cpu-features.h"
#endif
/* Singleton pointer used throughout the program when requested. */
static primitives_t* pPrimitives = NULL;
#define D_BIT_MMX (1<<23)
#define D_BIT_SSE (1<<25)
#define D_BIT_SSE2 (1<<26)
#define D_BIT_3DN (1<<30)
#define C_BIT_SSE3 (1<<0)
#define C_BIT_3DNP (1<<8)
#define C_BIT_SSSE3 (1<<9)
#define C_BIT_SSE41 (1<<19)
#define C_BIT_SSE42 (1<<20)
#define C_BIT_XGETBV (1<<27)
#define C_BIT_AVX (1<<28)
#define C_BITS_AVX (C_BIT_XGETBV|C_BIT_AVX)
#define E_BIT_XMM (1<<1)
#define E_BIT_YMM (1<<2)
#define E_BITS_AVX (E_BIT_XMM|E_BIT_YMM)
#define C_BIT_FMA (1<<11)
#define C_BIT_AVX_AES (1<<24)
/* If x86 */
#if defined(_M_IX86_AMD64)
/* If GCC */
#ifdef __GNUC__
#ifdef __AVX__
#define xgetbv(_func_, _lo_, _hi_) \
__asm__ __volatile__ ("xgetbv" : "=a" (_lo_), "=d" (_hi_) : "c" (_func_))
#endif
static void cpuid(
unsigned info,
unsigned *eax,
unsigned *ebx,
unsigned *ecx,
unsigned *edx)
{
*eax = *ebx = *ecx = *edx = 0;
__asm volatile
(
/* The EBX (or RBX register on x86_64) is used for the PIC base address
* and must not be corrupted by our inline assembly.
*/
#ifdef _M_IX86
"mov %%ebx, %%esi;"
"cpuid;"
"xchg %%ebx, %%esi;"
#else
"mov %%rbx, %%rsi;"
"cpuid;"
"xchg %%rbx, %%rsi;"
#endif
: "=a" (*eax), "=S" (*ebx), "=c" (*ecx), "=d" (*edx)
: "0" (info)
);
}
static void set_hints(primitives_hints_t* hints)
{
unsigned a, b, c, d;
cpuid(1, &a, &b, &c, &d);
if (d & D_BIT_MMX)
hints->x86_flags |= PRIM_X86_MMX_AVAILABLE;
if (d & D_BIT_SSE)
hints->x86_flags |= PRIM_X86_SSE_AVAILABLE;
if (d & D_BIT_SSE2)
hints->x86_flags |= PRIM_X86_SSE2_AVAILABLE;
if (d & D_BIT_3DN)
hints->x86_flags |= PRIM_X86_3DNOW_AVAILABLE;
if (c & C_BIT_3DNP)
hints->x86_flags |= PRIM_X86_3DNOW_PREFETCH_AVAILABLE;
if (c & C_BIT_SSE3)
hints->x86_flags |= PRIM_X86_SSE3_AVAILABLE;
if (c & C_BIT_SSSE3)
hints->x86_flags |= PRIM_X86_SSSE3_AVAILABLE;
if (c & C_BIT_SSE41)
hints->x86_flags |= PRIM_X86_SSE41_AVAILABLE;
if (c & C_BIT_SSE42)
hints->x86_flags |= PRIM_X86_SSE42_AVAILABLE;
#ifdef __AVX__
if ((c & C_BITS_AVX) == C_BITS_AVX)
{
int e, f;
xgetbv(0, e, f);
if ((e & E_BITS_AVX) == E_BITS_AVX)
{
hints->x86_flags |= PRIM_X86_AVX_AVAILABLE;
if (c & C_BIT_FMA)
hints->x86_flags |= PRIM_X86_FMA_AVAILABLE;
if (c & C_BIT_AVX_AES)
hints->x86_flags |= PRIM_X86_AVX_AES_AVAILABLE;
}
}
/* TODO: AVX2: set eax=7, ecx=0, cpuid, check ebx-bit5 */
#endif
}
#else
static void set_hints(primitives_hints_t* hints)
{
/* x86 non-GCC: TODO */
}
#endif /* __GNUC__ */
/* ------------------------------------------------------------------------- */
#elif defined(_M_ARM)
static UINT32 getNeonSupport(void)
{
#ifdef __ANDROID__
if (android_getCpuFamily() != ANDROID_CPU_FAMILY_ARM) return 0;
UINT64 features = android_getCpuFeatures();
if ((features & ANDROID_CPU_ARM_FEATURE_ARMv7))
{
if (features & ANDROID_CPU_ARM_FEATURE_NEON)
{
return PRIM_ARM_NEON_AVAILABLE;
}
}
#elif defined(__APPLE)
/* assume NEON support on iOS devices */
return PRIM_ARM_NEON_AVAILABLE;
#endif
return 0;
}
static void set_hints(primitives_hints_t* hints)
{
/* ARM: TODO */
hints->arm_flags |= getNeonSupport();
}
#else
static void set_hints(
primitives_hints_t *hints)
{
}
#endif /* x86 else ARM else */
/* ------------------------------------------------------------------------- */
void primitives_init(void)
{
primitives_hints_t* hints;
if (pPrimitives == NULL)
{
pPrimitives = calloc(1, sizeof(primitives_t));
@ -197,19 +40,15 @@ void primitives_init(void)
return;
}
hints = calloc(1, sizeof(primitives_hints_t));
set_hints(hints);
pPrimitives->hints = (void *) hints;
/* Now call each section's initialization routine. */
primitives_init_add(hints, pPrimitives);
primitives_init_andor(hints, pPrimitives);
primitives_init_alphaComp(hints, pPrimitives);
primitives_init_copy(hints, pPrimitives);
primitives_init_set(hints, pPrimitives);
primitives_init_shift(hints, pPrimitives);
primitives_init_sign(hints, pPrimitives);
primitives_init_colors(hints, pPrimitives);
primitives_init_add(pPrimitives);
primitives_init_andor(pPrimitives);
primitives_init_alphaComp(pPrimitives);
primitives_init_copy(pPrimitives);
primitives_init_set(pPrimitives);
primitives_init_shift(pPrimitives);
primitives_init_sign(pPrimitives);
primitives_init_colors(pPrimitives);
}
/* ------------------------------------------------------------------------- */
@ -221,102 +60,6 @@ primitives_t* primitives_get(void)
return pPrimitives;
}
/* ------------------------------------------------------------------------- */
UINT32 primitives_get_flags(const primitives_t* prims)
{
primitives_hints_t* hints = (primitives_hints_t*) (prims->hints);
#if defined(_M_IX86_AMD64)
return hints->x86_flags;
#elif defined(_M_ARM)
return hints->arm_flags;
#else
return 0;
#endif
}
/* ------------------------------------------------------------------------- */
typedef struct
{
UINT32 flag;
const char *str;
} flagpair_t;
static const flagpair_t x86_flags[] =
{
{ PRIM_X86_MMX_AVAILABLE, "MMX" },
{ PRIM_X86_3DNOW_AVAILABLE, "3DNow" },
{ PRIM_X86_3DNOW_PREFETCH_AVAILABLE, "3DNow-PF" },
{ PRIM_X86_SSE_AVAILABLE, "SSE" },
{ PRIM_X86_SSE2_AVAILABLE, "SSE2" },
{ PRIM_X86_SSE3_AVAILABLE, "SSE3" },
{ PRIM_X86_SSSE3_AVAILABLE, "SSSE3" },
{ PRIM_X86_SSE41_AVAILABLE, "SSE4.1" },
{ PRIM_X86_SSE42_AVAILABLE, "SSE4.2" },
{ PRIM_X86_AVX_AVAILABLE, "AVX" },
{ PRIM_X86_FMA_AVAILABLE, "FMA" },
{ PRIM_X86_AVX_AES_AVAILABLE, "AVX-AES" },
{ PRIM_X86_AVX2_AVAILABLE, "AVX2" },
};
static const flagpair_t arm_flags[] =
{
{ PRIM_ARM_VFP1_AVAILABLE, "VFP1" },
{ PRIM_ARM_VFP2_AVAILABLE, "VFP2" },
{ PRIM_ARM_VFP3_AVAILABLE, "VFP3" },
{ PRIM_ARM_VFP4_AVAILABLE, "VFP4" },
{ PRIM_ARM_FPA_AVAILABLE, "FPA" },
{ PRIM_ARM_FPE_AVAILABLE, "FPE" },
{ PRIM_ARM_IWMMXT_AVAILABLE, "IWMMXT" },
{ PRIM_ARM_NEON_AVAILABLE, "NEON" },
};
void primitives_flags_str(const primitives_t* prims, char* str, size_t len)
{
int i;
primitives_hints_t* hints;
*str = '\0';
--len; /* for the '/0' */
hints = (primitives_hints_t*) (prims->hints);
for (i = 0; i < sizeof(x86_flags) / sizeof(flagpair_t); ++i)
{
if (hints->x86_flags & x86_flags[i].flag)
{
int slen = strlen(x86_flags[i].str) + 1;
if (len < slen)
break;
if (*str != '\0')
strcat(str, " ");
strcat(str, x86_flags[i].str);
len -= slen;
}
}
for (i = 0; i < sizeof(arm_flags) / sizeof(flagpair_t); ++i)
{
if (hints->arm_flags & arm_flags[i].flag)
{
int slen = strlen(arm_flags[i].str) + 1;
if (len < slen)
break;
if (*str != '\0')
strcat(str, " ");
strcat(str, arm_flags[i].str);
len -= slen;
}
}
}
/* ------------------------------------------------------------------------- */
void primitives_deinit(void)
{
@ -333,9 +76,6 @@ void primitives_deinit(void)
primitives_deinit_sign(pPrimitives);
primitives_deinit_colors(pPrimitives);
if (pPrimitives->hints != NULL)
free((void*) (pPrimitives->hints));
free((void*) pPrimitives);
pPrimitives = NULL;
}

View File

@ -31,28 +31,11 @@ set(PRIMITIVE_TEST_CFILES
test_set.c
test_shift.c
test_sign.c
../prim_add.c
../prim_andor.c
../prim_alphaComp.c
../prim_colors.c
../prim_copy.c
../prim_set.c
../prim_shift.c
../prim_sign.c
../prim_add_opt.c
../prim_alphaComp_opt.c
../prim_andor_opt.c
../prim_colors_opt.c
../prim_set_opt.c
../prim_shift_opt.c
../prim_sign_opt.c
../primitives.c
)
set(PRIMITIVE_TEST_HEADERS
measure.h
prim_test.h
../prim_internal.h
)
set(PRIMITIVE_TEST_SRCS
@ -138,7 +121,7 @@ endif()
set_property(SOURCE ${PRIMITIVE_TEST_CFILES} PROPERTY COMPILE_FLAGS ${OPTFLAGS})
target_link_libraries(prim_test rt)
target_link_libraries(prim_test freerdp-primitives rt winpr-sysinfo)
if(NOT TESTING_OUTPUT_DIRECTORY)
set(TESTING_OUTPUT_DIRECTORY .)
endif()

View File

@ -21,6 +21,8 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <winpr/platform.h>
#include <winpr/sysinfo.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
@ -32,6 +34,88 @@
int test_sizes[] = { 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096 };
int Quiet = 0;
/* ------------------------------------------------------------------------- */
typedef struct
{
UINT32 flag;
const char *str;
} flagpair_t;
static const flagpair_t flags[] =
#ifdef _M_IX86_AMD64
{
{ PF_MMX_INSTRUCTIONS_AVAILABLE, "MMX" },
{ PF_3DNOW_INSTRUCTIONS_AVAILABLE, "3DNow" },
{ PF_XMMI_INSTRUCTIONS_AVAILABLE, "SSE" },
{ PF_XMMI64_INSTRUCTIONS_AVAILABLE, "SSE2" },
{ PF_SSE3_INSTRUCTIONS_AVAILABLE, "SSE3" },
#elif defined(_M_ARM)
{ PF_ARM_VFP3, "VFP3" },
{ PF_ARM_INTEL_WMMX, "IWMMXT" },
{ PF_ARM_NEON_INSTRUCTIONS_AVAILABLE, "NEON" },
#endif
};
static const flagpair_t flags_extended[] =
{
#ifdef _M_IX86_AMD64
{ PF_EX_3DNOW_PREFETCH, "3DNow-PF" },
{ PF_EX_SSSE3, "SSSE3" },
{ PF_EX_SSE41, "SSE4.1" },
{ PF_EX_SSE42, "SSE4.2" },
{ PF_EX_AVX, "AVX" },
{ PF_EX_FMA, "FMA" },
{ PF_EX_AVX_AES, "AVX-AES" },
{ PF_EX_AVX2, "AVX2" },
#elif defined(_M_ARM)
{ PF_EX_ARM_VFP1, "VFP1"},
{ PF_EX_ARM_VFP4, "VFP4" },
#endif
};
void primitives_flags_str(char* str, size_t len)
{
int i;
*str = '\0';
--len; /* for the '/0' */
for (i = 0; i < sizeof(flags) / sizeof(flagpair_t); ++i)
{
if (IsProcessorFeaturePresent(flags[i].flag))
{
int slen = strlen(flags[i].str) + 1;
if (len < slen)
break;
if (*str != '\0')
strcat(str, " ");
strcat(str, flags[i].str);
len -= slen;
}
}
for (i = 0; i < sizeof(flags_extended) / sizeof(flagpair_t); ++i)
{
if (IsProcessorFeaturePresent(flags_extended[i].flag))
{
int slen = strlen(flags_extended[i].str) + 1;
if (len < slen)
break;
if (*str != '\0')
strcat(str, " ");
strcat(str, flags_extended[i].str);
len -= slen;
}
}
}
/* ------------------------------------------------------------------------- */
static void get_random_data_lrand(
void *buffer,
@ -198,7 +282,7 @@ static const test_t testTypeList[] =
int main(int argc, char** argv)
{
int i;
char hints[256];
char hints[1024];
UINT32 testSet = 0;
UINT32 testTypes = 0;
int results = SUCCESS;
@ -253,7 +337,7 @@ int main(int argc, char** argv)
primitives_init();
primitives_flags_str(primitives_get(), hints, sizeof(hints));
primitives_flags_str(hints, sizeof(hints));
printf("Hints: %s\n", hints);
/* COPY */

View File

@ -29,6 +29,7 @@
#include <stdio.h>
#include <freerdp/primitives.h>
#include <winpr/platform.h>
#ifdef WITH_IPP
#include <ipps.h>
@ -121,8 +122,8 @@ extern int test_or_32u_speed(void);
} \
} while (0)
#if defined(i386) && defined(WITH_SSE2)
#define DO_SSE_MEASUREMENTS(_funcSSE_, _prework_) \
#if (defined(_M_IX86_AMD64) && defined(WITH_SSE2)) || (defined(arm) && defined(WITH_NEON))
#define DO_OPT_MEASUREMENTS(_funcOpt_, _prework_) \
do { \
for (s=0; s<num_sizes; ++s) \
{ \
@ -132,34 +133,15 @@ extern int test_or_32u_speed(void);
_prework_; \
iter = iterations/size; \
sprintf(label, "%s-%s-%-4d", SIMD_TYPE, oplabel, size); \
MEASURE_TIMED(label, iter, test_time, resultSSENeon[s], \
_funcSSE_); \
MEASURE_TIMED(label, iter, test_time, resultOpt[s], \
_funcOpt_); \
} \
} while (0)
#else
#define DO_SSE_MEASUREMENTS(_funcSSE_, _prework_)
#define DO_OPT_MEASUREMENTS(_funcSSE_, _prework_)
#endif
#if defined(armel) && defined(INCLUDE_NEON_MEASUREMENTS)
#define DO_NEON_MEASUREMENTS(_funcNeon_, _prework_) \
do { \
for (s=0; s<num_sizes; ++s) \
{ \
int iter; \
char label[256]; \
int size = size_array[s]; \
_prework_; \
iter = iterations/size; \
sprintf(label, "%s-%s-%-4d", SIMD_TYPE, oplabel, size); \
MEASURE_TIMED(label, iter, test_time, resultSSENeon[s], \
_funcNeon_); \
} \
} while (0)
#else
#define DO_NEON_MEASUREMENTS(_funcNeon_, _prework_)
#endif
#if defined(i386) && defined(WITH_IPP)
#if defined(_M_IX86_AMD64) && defined(WITH_IPP)
#define DO_IPP_MEASUREMENTS(_funcIPP_, _prework_) \
do { \
for (s=0; s<num_sizes; ++s) \
@ -182,8 +164,7 @@ extern int test_or_32u_speed(void);
#define STD_SPEED_TEST( \
_name_, _srctype_, _dsttype_, _prework_, \
_doNormal_, _funcNormal_, \
_doSSE_, _funcSSE_, _flagsSSE_, \
_doNeon_, _funcNeon_, _flagsNeon_, \
_doOpt_, _funcOpt_, _flagOpt_, _flagExt_, \
_doIPP_, _funcIPP_) \
static void _name_( \
const char *oplabel, const char *type, \
@ -193,24 +174,28 @@ static void _name_( \
int iterations, float test_time) \
{ \
int s; \
float *resultNormal, *resultSSENeon, *resultIPP; \
UINT32 pflags = primitives_get_flags(primitives_get()); \
float *resultNormal, *resultOpt, *resultIPP; \
resultNormal = (float *) calloc(num_sizes, sizeof(float)); \
resultSSENeon = (float *) calloc(num_sizes, sizeof(float)); \
resultOpt = (float *) calloc(num_sizes, sizeof(float)); \
resultIPP = (float *) calloc(num_sizes, sizeof(float)); \
printf("******************** %s %s ******************\n", \
oplabel, type); \
if (_doNormal_) { DO_NORMAL_MEASUREMENTS(_funcNormal_, _prework_); } \
if (_doSSE_) { \
if ((pflags & (_flagsSSE_)) == (_flagsSSE_)) \
if (_doOpt_) \
{ \
if (_flagExt_) \
{ \
DO_SSE_MEASUREMENTS(_funcSSE_, _prework_); \
if (IsProcessorFeaturePresentEx(_flagOpt_)) \
{ \
DO_OPT_MEASUREMENTS(_funcOpt_, _prework_); \
} \
} \
} \
if (_doNeon_) { \
if ((pflags & (_flagsNeon_)) == (_flagsNeon_)) \
else \
{ \
DO_NEON_MEASUREMENTS(_funcNeon_, _prework_); \
if (IsProcessorFeaturePresent(_flagOpt_)) \
{ \
DO_OPT_MEASUREMENTS(_funcOpt_, _prework_); \
} \
} \
} \
if (_doIPP_) { DO_IPP_MEASUREMENTS(_funcIPP_, _prework_); } \
@ -223,13 +208,13 @@ static void _name_( \
strcpy(sN, "N/A"); strcpy(sSN, "N/A"); strcpy(sSNp, "N/A"); \
strcpy(sIPP, "N/A"); strcpy(sIPPp, "N/A"); \
if (resultNormal[s] > 0.0) _floatprint(resultNormal[s], sN); \
if (resultSSENeon[s] > 0.0) \
if (resultOpt[s] > 0.0) \
{ \
_floatprint(resultSSENeon[s], sSN); \
_floatprint(resultOpt[s], sSN); \
if (resultNormal[s] > 0.0) \
{ \
sprintf(sSNp, "%d%%", \
(int) (resultSSENeon[s] / resultNormal[s] * 100.0 + 0.5)); \
(int) (resultOpt[s] / resultNormal[s] * 100.0 + 0.5)); \
} \
} \
if (resultIPP[s] > 0.0) \
@ -244,7 +229,7 @@ static void _name_( \
printf("%8d: %15s %15s %5s %15s %5s\n", \
size_array[s], sN, sSN, sSNp, sIPP, sIPPp); \
} \
free(resultNormal); free(resultSSENeon); free(resultIPP); \
free(resultNormal); free(resultOpt); free(resultIPP); \
}
#endif // !__PRIMTEST_H_INCLUDED__

View File

@ -16,6 +16,7 @@
#include "config.h"
#endif
#include <winpr/sysinfo.h>
#include "prim_test.h"
#define FUNC_TEST_SIZE 65536
@ -35,7 +36,6 @@ int test_add16s_func(void)
int failed = 0;
int i;
char testStr[256];
UINT32 pflags = primitives_get_flags(primitives_get());
testStr[0] = '\0';
get_random_data(src1, sizeof(src1));
@ -44,7 +44,7 @@ int test_add16s_func(void)
memset(d2, 0, sizeof(d2));
general_add_16s(src1+1, src2+1, d1+1, FUNC_TEST_SIZE);
#ifdef _M_IX86_AMD64
if (pflags & PRIM_X86_SSE3_AVAILABLE)
if(IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE3");
/* Aligned */
@ -91,8 +91,7 @@ int test_add16s_func(void)
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(add16s_speed_test, INT16, INT16, dst=dst,
TRUE, general_add_16s(src1, src2, dst, size),
TRUE, sse3_add_16s(src1, src2, dst, size), PRIM_X86_SSE3_AVAILABLE,
FALSE, dst=dst, 0,
TRUE, sse3_add_16s(src1, src2, dst, size), PF_SSE3_INSTRUCTIONS_AVAILABLE, FALSE,
TRUE, ippsAdd_16s(src1, src2, dst, size));
int test_add16s_speed(void)

View File

@ -15,6 +15,7 @@
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <winpr/sysinfo.h>
#include "prim_test.h"
@ -110,7 +111,6 @@ int test_alphaComp_func(void)
UINT32 ALIGN(dst2u[DST_WIDTH*DST_HEIGHT+1]);
UINT32 ALIGN(dst3[DST_WIDTH*DST_HEIGHT]);
int error = 0;
UINT32 pflags = primitives_get_flags(primitives_get());
char testStr[256];
UINT32 *ptr;
int i, x, y;
@ -133,7 +133,7 @@ int test_alphaComp_func(void)
(const BYTE *) src2, 4*SRC2_WIDTH,
(BYTE *) dst1, 4*DST_WIDTH, TEST_WIDTH, TEST_HEIGHT);
#ifdef _M_IX86_AMD64
if (pflags & PRIM_X86_SSE2_AVAILABLE)
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE2");
sse2_alphaComp_argb((const BYTE *) src1, 4*SRC1_WIDTH,
@ -166,7 +166,7 @@ int test_alphaComp_func(void)
error = 1;
}
#ifdef _M_IX86_AMD64
if (pflags & PRIM_X86_SSE2_AVAILABLE)
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
{
UINT32 c2 = *PIXEL(dst2a, 4*DST_WIDTH, x, y);
if (colordist(c0, c2) > TOLERANCE)
@ -207,8 +207,7 @@ STD_SPEED_TEST(alphaComp_speed, BYTE, BYTE, int bytes = size*4,
TRUE, general_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes,
size, size),
TRUE, sse2_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes,
size, size), PRIM_X86_SSE2_AVAILABLE,
FALSE, dst=dst, 0,
size, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
TRUE, ipp_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes,
size, size));

View File

@ -15,6 +15,7 @@
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <winpr/sysinfo.h>
#include "prim_test.h"
@ -39,7 +40,6 @@ int test_and_32u_func(void)
UINT32 ALIGN(src[FUNC_TEST_SIZE+3]), ALIGN(dst[FUNC_TEST_SIZE+3]);
int failed = 0;
int i;
UINT32 pflags = primitives_get_flags(primitives_get());
char testStr[256];
testStr[0] = '\0';
@ -56,7 +56,7 @@ int test_and_32u_func(void)
}
}
#ifdef _M_IX86_AMD64
if (pflags & PRIM_X86_SSE3_AVAILABLE)
if (IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE3");
/* Aligned */
@ -92,8 +92,7 @@ int test_and_32u_func(void)
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(andC_32u_speed_test, UINT32, UINT32, dst=dst,
TRUE, general_andC_32u(src1, constant, dst, size),
TRUE, sse3_andC_32u(src1, constant, dst, size), PRIM_X86_SSE3_AVAILABLE,
FALSE, dst=dst, 0,
TRUE, sse3_andC_32u(src1, constant, dst, size), PF_SSE3_INSTRUCTIONS_AVAILABLE, FALSE,
TRUE, ippsAndC_32u(src1, constant, dst, size))
int test_and_32u_speed(void)
@ -113,7 +112,6 @@ int test_or_32u_func(void)
UINT32 ALIGN(src[FUNC_TEST_SIZE+3]), ALIGN(dst[FUNC_TEST_SIZE+3]);
int failed = 0;
int i;
UINT32 pflags = primitives_get_flags(primitives_get());
char testStr[256];
testStr[0] = '\0';
@ -130,7 +128,7 @@ int test_or_32u_func(void)
}
}
#ifdef _M_IX86_AMD64
if (pflags & PRIM_X86_SSE3_AVAILABLE)
if(IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE3");
/* Aligned */
@ -166,8 +164,7 @@ int test_or_32u_func(void)
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(orC_32u_speed_test, UINT32, UINT32, dst=dst,
TRUE, general_orC_32u(src1, constant, dst, size),
TRUE, sse3_orC_32u(src1, constant, dst, size), PRIM_X86_SSE3_AVAILABLE,
FALSE, dst=dst, 0,
TRUE, sse3_orC_32u(src1, constant, dst, size), PF_SSE3_INSTRUCTIONS_AVAILABLE, FALSE,
TRUE, ippsOrC_32u(src1, constant, dst, size))
int test_or_32u_speed(void)

View File

@ -16,6 +16,7 @@
#include "config.h"
#endif
#include <winpr/sysinfo.h>
#include "prim_test.h"
static const int RGB_TRIAL_ITERATIONS = 1000;
@ -38,7 +39,6 @@ int test_RGBToRGB_16s8u_P3AC4R_func(void)
UINT32 ALIGN(out1[4096]), ALIGN(out2[4096]);
int i;
int failed = 0;
UINT32 pflags = primitives_get_flags(primitives_get());
char testStr[256];
INT16 *ptrs[3];
prim_size_t roi = { 64, 64 };
@ -62,7 +62,7 @@ int test_RGBToRGB_16s8u_P3AC4R_func(void)
general_RGBToRGB_16s8u_P3AC4R((const INT16 **) ptrs, 64*2,
(BYTE *) out1, 64*4, &roi);
#ifdef _M_IX86_AMD64
if (pflags & PRIM_X86_SSE2_AVAILABLE)
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE2");
sse2_RGBToRGB_16s8u_P3AC4R((const INT16 **) ptrs, 64*2,
@ -90,8 +90,7 @@ STD_SPEED_TEST(
(const INT16 **) src1, 64*2, (BYTE *) dst, 64*4, &roi64x64),
TRUE, sse2_RGBToRGB_16s8u_P3AC4R(
(const INT16 **) src1, 64*2, (BYTE *) dst, 64*4, &roi64x64),
PRIM_X86_SSE2_AVAILABLE,
FALSE, dst=dst, 0,
PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
FALSE, dst=dst);
int test_RGBToRGB_16s8u_P3AC4R_speed(void)
@ -131,7 +130,6 @@ int test_yCbCrToRGB_16s16s_P3P3_func(void)
INT16 ALIGN(r2[4096]), ALIGN(g2[4096]), ALIGN(b2[4096]);
int i;
int failed = 0;
UINT32 pflags = primitives_get_flags(primitives_get());
char testStr[256];
const INT16 *in[3];
INT16 *out1[3];
@ -168,7 +166,7 @@ int test_yCbCrToRGB_16s16s_P3P3_func(void)
general_yCbCrToRGB_16s16s_P3P3(in, 64*2, out1, 64*2, &roi);
#ifdef _M_IX86_AMD64
if (pflags & PRIM_X86_SSE2_AVAILABLE)
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE2");
sse2_yCbCrToRGB_16s16s_P3P3(in, 64*2, out2, 64*2, &roi);
@ -193,8 +191,7 @@ STD_SPEED_TEST(
ycbcr_to_rgb_speed, INT16*, INT16*, dst=dst,
TRUE, general_yCbCrToRGB_16s16s_P3P3(src1, 64*2, dst, 64*2, &roi64x64),
TRUE, sse2_yCbCrToRGB_16s16s_P3P3(src1, 64*2, dst, 64*2, &roi64x64),
PRIM_X86_SSE2_AVAILABLE,
FALSE, dst=dst, 0,
PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
FALSE, dst=dst);
int test_yCbCrToRGB_16s16s_P3P3_speed(void)

View File

@ -16,6 +16,7 @@
#include "config.h"
#endif
#include <winpr/sysinfo.h>
#include "prim_test.h"
static const int MEMCPY_PRETEST_ITERATIONS = 1000000;
@ -70,8 +71,7 @@ int test_copy8u_func(void)
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(copy8u_speed_test, BYTE, BYTE, dst=dst,
TRUE, memcpy(dst, src1, size),
FALSE, NULL, 0,
FALSE, NULL, 0,
FALSE, NULL, 0, FALSE,
TRUE, ippsCopy_8u(src1, dst, size));
int test_copy8u_speed(void)

View File

@ -16,6 +16,7 @@
#include "config.h"
#endif
#include <winpr/sysinfo.h>
#include "prim_test.h"
static const int MEMSET8_PRETEST_ITERATIONS = 100000000;
@ -40,12 +41,11 @@ int test_set8u_func(void)
int failed = 0;
int off;
char testStr[256];
UINT32 pflags = primitives_get_flags(primitives_get());
testStr[0] = '\0';
#ifdef _M_IX86_AMD64
/* Test SSE under various alignments */
if (pflags & PRIM_X86_SSE2_AVAILABLE)
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE2");
for (off=0; off<16; ++off)
@ -101,8 +101,7 @@ int test_set8u_func(void)
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(set8u_speed_test, BYTE, BYTE, dst=dst,
TRUE, memset(dst, constant, size),
FALSE, NULL, 0,
FALSE, NULL, 0,
FALSE, NULL, 0, FALSE,
TRUE, ippsSet_8u(constant, dst, size));
int test_set8u_speed(void)
@ -116,17 +115,15 @@ int test_set8u_speed(void)
/* ------------------------------------------------------------------------- */
int test_set32s_func(void)
{
primitives_t* prims = primitives_get();
INT32 ALIGN(dest[512]);
int failed = 0;
int off;
char testStr[256];
UINT32 pflags = primitives_get_flags(prims);
testStr[0] = '\0';
#ifdef _M_IX86_AMD64
/* Test SSE under various alignments */
if (pflags & PRIM_X86_SSE2_AVAILABLE)
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE2");
for (off=0; off<16; ++off) {
@ -179,17 +176,15 @@ int test_set32s_func(void)
/* ------------------------------------------------------------------------- */
int test_set32u_func(void)
{
primitives_t* prims = primitives_get();
UINT32 ALIGN(dest[512]);
int failed = 0;
int off;
char testStr[256];
UINT32 pflags = primitives_get_flags(prims);
testStr[0] = '\0';
#ifdef _M_IX86_AMD64
/* Test SSE under various alignments */
if (pflags & PRIM_X86_SSE2_AVAILABLE)
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
{
strcat(testStr, " SSE2");
for (off=0; off<16; ++off) {
@ -251,8 +246,7 @@ static inline void memset32u_naive(
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(set32u_speed_test, UINT32, UINT32, dst=dst,
TRUE, memset32u_naive(constant, dst, size),
TRUE, sse2_set_32u(constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
FALSE, dst=dst, 0,
TRUE, sse2_set_32u(constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
TRUE, ipp_wrapper_set_32u(constant, dst, size));
int test_set32u_speed(void)
@ -280,8 +274,7 @@ static inline void memset32s_naive(
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(set32s_speed_test, INT32, INT32, dst=dst,
TRUE, memset32s_naive(constant, dst, size),
TRUE, sse2_set_32s(constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
FALSE, dst=dst, 0,
TRUE, sse2_set_32s(constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
TRUE, ippsSet_32s(constant, dst, size));
int test_set32s_speed(void)

View File

@ -16,6 +16,7 @@
#include "config.h"
#endif
#include <winpr/sysinfo.h>
#include "prim_test.h"
#define FUNC_TEST_SIZE 65536
@ -55,12 +56,11 @@ int _name_(void) \
ALIGN(d1[FUNC_TEST_SIZE+3]), ALIGN(d2[FUNC_TEST_SIZE+3]); \
int failed = 0; \
int i; \
UINT32 pflags = primitives_get_flags(primitives_get()); \
char testStr[256]; \
testStr[0] = '\0'; \
get_random_data(src, sizeof(src)); \
_f1_(src+1, 3, d1+1, FUNC_TEST_SIZE); \
if (pflags & PRIM_X86_SSE3_AVAILABLE) \
if (IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) \
{ \
strcat(testStr, " SSE3"); \
/* Aligned */ \
@ -109,23 +109,19 @@ SHIFT_TEST_FUNC(test_rShift_16u_func, UINT16, "rshift_16u", general_rShiftC_16u,
/* ========================================================================= */
STD_SPEED_TEST(speed_lShift_16s, INT16, INT16, dst=dst,
TRUE, general_lShiftC_16s(src1, constant, dst, size),
TRUE, sse2_lShiftC_16s(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
FALSE, dst=dst, 0,
TRUE, sse2_lShiftC_16s(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
TRUE, ippsLShiftC_16s(src1, constant, dst, size));
STD_SPEED_TEST(speed_lShift_16u, UINT16, UINT16, dst=dst,
TRUE, general_lShiftC_16u(src1, constant, dst, size),
TRUE, sse2_lShiftC_16u(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
FALSE, dst=dst, 0,
TRUE, sse2_lShiftC_16u(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
TRUE, ippsLShiftC_16u(src1, constant, dst, size));
STD_SPEED_TEST(speed_rShift_16s, INT16, INT16, dst=dst,
TRUE, general_rShiftC_16s(src1, constant, dst, size),
TRUE, sse2_rShiftC_16s(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
FALSE, dst=dst, 0,
TRUE, sse2_rShiftC_16s(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
TRUE, ippsRShiftC_16s(src1, constant, dst, size));
STD_SPEED_TEST(speed_rShift_16u, UINT16, UINT16, dst=dst,
TRUE, general_rShiftC_16u(src1, constant, dst, size),
TRUE, sse2_rShiftC_16u(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
FALSE, dst=dst, 0,
TRUE, sse2_rShiftC_16u(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
TRUE, ippsRShiftC_16u(src1, constant, dst, size));
/* ------------------------------------------------------------------------- */

View File

@ -16,6 +16,7 @@
#include "config.h"
#endif
#include <winpr/sysinfo.h>
#include "prim_test.h"
static const int SIGN_PRETEST_ITERATIONS = 100000;
@ -30,7 +31,6 @@ int test_sign16s_func(void)
INT16 ALIGN(src[65535]), ALIGN(d1[65535]), ALIGN(d2[65535]);
int failed = 0;
int i;
UINT32 pflags = primitives_get_flags(primitives_get());
char testStr[256];
/* Test when we can reach 16-byte alignment */
@ -38,7 +38,7 @@ int test_sign16s_func(void)
get_random_data(src, sizeof(src));
general_sign_16s(src+1, d1+1, 65535);
#ifdef _M_IX86_AMD64
if (pflags & PRIM_X86_SSSE3_AVAILABLE)
if (IsProcessorFeaturePresentEx(PF_EX_SSSE3))
{
strcat(testStr, " SSSE3");
ssse3_sign_16s(src+1, d2+1, 65535);
@ -58,7 +58,7 @@ int test_sign16s_func(void)
get_random_data(src, sizeof(src));
general_sign_16s(src+1, d1+2, 65535);
#ifdef _M_IX86_AMD64
if (pflags & PRIM_X86_SSSE3_AVAILABLE)
if (IsProcessorFeaturePresentEx(PF_EX_SSSE3))
{
ssse3_sign_16s(src+1, d2+2, 65535);
for (i=2; i<65535; ++i)
@ -79,8 +79,7 @@ int test_sign16s_func(void)
/* ------------------------------------------------------------------------- */
STD_SPEED_TEST(sign16s_speed_test, INT16, INT16, dst=dst,
TRUE, general_sign_16s(src1, dst, size),
TRUE, ssse3_sign_16s(src1, dst, size), PRIM_X86_SSSE3_AVAILABLE,
FALSE, dst=dst, 0,
TRUE, ssse3_sign_16s(src1, dst, size), PF_EX_SSSE3, TRUE,
FALSE, dst=dst);
int test_sign16s_speed(void)