primitives: make use of winprs processor feature detection
- Removed the cpu flag detection - Fixed and updated the tests
This commit is contained in:
parent
23a7ef6c47
commit
84a3dbc115
@ -190,9 +190,6 @@ typedef struct
|
||||
__yCbCrToRGB_16s16s_P3P3_t yCbCrToRGB_16s16s_P3P3;
|
||||
__RGBToYCbCr_16s16s_P3P3_t RGBToYCbCr_16s16s_P3P3;
|
||||
__RGBToRGB_16s8u_P3AC4R_t RGBToRGB_16s8u_P3AC4R;
|
||||
|
||||
/* internal use for CPU flags and such. */
|
||||
void *hints;
|
||||
} primitives_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
@ -202,12 +199,6 @@ extern "C" {
|
||||
/* Prototypes for the externally-visible entrypoints. */
|
||||
FREERDP_API void primitives_init(void);
|
||||
FREERDP_API primitives_t *primitives_get(void);
|
||||
FREERDP_API UINT32 primitives_get_flags(
|
||||
const primitives_t *prims);
|
||||
FREERDP_API void primitives_flags_str(
|
||||
const primitives_t *prims,
|
||||
char *str,
|
||||
size_t len);
|
||||
FREERDP_API void primitives_deinit(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -62,10 +62,7 @@ New Optimizations
|
||||
-----------------
|
||||
As the need arises, new optimizations can be added to the library,
|
||||
including NEON, AVX, and perhaps OpenCL or other SIMD implementations.
|
||||
The initialization routine is free to do any quick run-time test to
|
||||
determine which features are available before hooking the operation's
|
||||
function pointer, or it can simply look at the processor features list
|
||||
from the hints passed to the initialization routine.
|
||||
The CPU feature detection is done in winpr/sysinfo.
|
||||
|
||||
|
||||
Adding Entrypoints
|
||||
@ -85,15 +82,6 @@ be added.
|
||||
The template functions can frequently be used to extend the
|
||||
operations without writing a lot of new code.
|
||||
|
||||
|
||||
Flags
|
||||
-----
|
||||
The entrypoint primitives_get_flags() returns a bitfield of processor flags
|
||||
(as defined in primitives.h) and primitives_flag_str() returns a string
|
||||
related to those processor flags, for debugging and information. The
|
||||
bitfield can be used elsewhere in the code as needed.
|
||||
|
||||
|
||||
Cache Management
|
||||
----------------
|
||||
I haven't found a lot of speed improvement by attempting prefetch, and
|
||||
|
@ -46,12 +46,11 @@ pstatus_t general_add_16s(
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_add(
|
||||
const primitives_hints_t *hints,
|
||||
primitives_t *prims)
|
||||
{
|
||||
prims->add_16s = general_add_16s;
|
||||
|
||||
primitives_init_add_opt(hints, prims);
|
||||
primitives_init_add_opt(prims);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
@ -24,7 +24,7 @@
|
||||
|
||||
pstatus_t general_add_16s(const INT16 *pSrc1, const INT16 *pSrc2, INT16 *pDst, INT32 len);
|
||||
|
||||
void primitives_init_add_opt(const primitives_hints_t *hints, primitives_t *prims);
|
||||
void primitives_init_add_opt(primitives_t *prims);
|
||||
|
||||
#endif /* !__PRIM_ADD_H_INCLUDED__ */
|
||||
|
||||
|
@ -20,6 +20,7 @@
|
||||
|
||||
#include <freerdp/types.h>
|
||||
#include <freerdp/primitives.h>
|
||||
#include <winpr/sysinfo.h>
|
||||
|
||||
#ifdef WITH_SSE2
|
||||
#include <emmintrin.h>
|
||||
@ -45,18 +46,15 @@ SSE3_SSD_ROUTINE(sse3_add_16s, INT16, general_add_16s,
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_add_opt(
|
||||
const primitives_hints_t *hints,
|
||||
primitives_t *prims)
|
||||
{
|
||||
#ifdef WITH_IPP
|
||||
prims->add_16s = (__add_16s_t) ippsAdd_16s;
|
||||
#elif defined(WITH_SSE2)
|
||||
if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
|
||||
&& (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE)) /* for LDDQU */
|
||||
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)
|
||||
&& IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */
|
||||
{
|
||||
prims->add_16s = sse3_add_16s;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
@ -102,11 +102,11 @@ pstatus_t general_alphaComp_argb(
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_alphaComp(const primitives_hints_t* hints, primitives_t* prims)
|
||||
void primitives_init_alphaComp(primitives_t* prims)
|
||||
{
|
||||
prims->alphaComp_argb = general_alphaComp_argb;
|
||||
|
||||
primitives_init_alphaComp_opt(hints, prims);
|
||||
primitives_init_alphaComp_opt(prims);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
@ -24,7 +24,7 @@
|
||||
|
||||
pstatus_t general_alphaComp_argb(const BYTE *pSrc1, INT32 src1Step, const BYTE *pSrc2, INT32 src2Step, BYTE *pDst, INT32 dstStep, INT32 width, INT32 height);
|
||||
|
||||
void primitives_init_alphaComp_opt(const primitives_hints_t* hints, primitives_t* prims);
|
||||
void primitives_init_alphaComp_opt(primitives_t* prims);
|
||||
|
||||
#endif /* !__PRIM_ALPHACOMP_H_INCLUDED__ */
|
||||
|
||||
|
@ -26,6 +26,7 @@
|
||||
|
||||
#include <freerdp/types.h>
|
||||
#include <freerdp/primitives.h>
|
||||
#include <winpr/sysinfo.h>
|
||||
|
||||
#ifdef WITH_SSE2
|
||||
#include <emmintrin.h>
|
||||
@ -210,13 +211,13 @@ pstatus_t ipp_alphaComp_argb(
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_alphaComp_opt(const primitives_hints_t* hints, primitives_t* prims)
|
||||
void primitives_init_alphaComp_opt(primitives_t* prims)
|
||||
{
|
||||
#ifdef WITH_IPP
|
||||
prims->alphaComp_argb = ipp_alphaComp_argb;
|
||||
#elif defined(WITH_SSE2)
|
||||
if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
|
||||
&& (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE)) /* for LDDQU */
|
||||
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)
|
||||
&& IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */
|
||||
{
|
||||
prims->alphaComp_argb = sse2_alphaComp_argb;
|
||||
}
|
||||
|
@ -61,14 +61,13 @@ pstatus_t general_orC_32u(
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_andor(
|
||||
const primitives_hints_t *hints,
|
||||
primitives_t *prims)
|
||||
{
|
||||
/* Start with the default. */
|
||||
prims->andC_32u = general_andC_32u;
|
||||
prims->orC_32u = general_orC_32u;
|
||||
|
||||
primitives_init_andor_opt(hints, prims);
|
||||
primitives_init_andor_opt(prims);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
@ -25,7 +25,7 @@
|
||||
pstatus_t general_andC_32u(const UINT32 *pSrc, UINT32 val, UINT32 *pDst, INT32 len);
|
||||
pstatus_t general_orC_32u(const UINT32 *pSrc, UINT32 val, UINT32 *pDst, INT32 len);
|
||||
|
||||
void primitives_init_andor_opt(const primitives_hints_t *hints, primitives_t *prims);
|
||||
void primitives_init_andor_opt(primitives_t *prims);
|
||||
|
||||
#endif /* !__PRIM_ANDOR_H_INCLUDED__ */
|
||||
|
||||
|
@ -19,6 +19,7 @@
|
||||
|
||||
#include <freerdp/types.h>
|
||||
#include <freerdp/primitives.h>
|
||||
#include <winpr/sysinfo.h>
|
||||
|
||||
#ifdef WITH_SSE2
|
||||
#include <emmintrin.h>
|
||||
@ -45,14 +46,14 @@ SSE3_SCD_PRE_ROUTINE(sse3_orC_32u, UINT32, general_orC_32u,
|
||||
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_andor_opt(const primitives_hints_t *hints, primitives_t *prims)
|
||||
void primitives_init_andor_opt(primitives_t *prims)
|
||||
{
|
||||
#if defined(WITH_IPP)
|
||||
prims->andC_32u = (__andC_32u_t) ippsAndC_32u;
|
||||
prims->orC_32u = (__orC_32u_t) ippsOrC_32u;
|
||||
#elif defined(WITH_SSE2)
|
||||
if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
|
||||
&& (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE))
|
||||
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)
|
||||
&& IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
prims->andC_32u = sse3_andC_32u;
|
||||
prims->orC_32u = sse3_orC_32u;
|
||||
|
@ -215,13 +215,13 @@ pstatus_t general_RGBToRGB_16s8u_P3AC4R(
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_colors(const primitives_hints_t* hints, primitives_t* prims)
|
||||
void primitives_init_colors(primitives_t* prims)
|
||||
{
|
||||
prims->RGBToRGB_16s8u_P3AC4R = general_RGBToRGB_16s8u_P3AC4R;
|
||||
prims->yCbCrToRGB_16s16s_P3P3 = general_yCbCrToRGB_16s16s_P3P3;
|
||||
prims->RGBToYCbCr_16s16s_P3P3 = general_RGBToYCbCr_16s16s_P3P3;
|
||||
|
||||
primitives_init_colors_opt(hints, prims);
|
||||
primitives_init_colors_opt(prims);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
@ -26,7 +26,7 @@ pstatus_t general_yCbCrToRGB_16s16s_P3P3(const INT16 *pSrc[3], INT32 srcStep, IN
|
||||
pstatus_t general_RGBToYCbCr_16s16s_P3P3(const INT16 *pSrc[3], INT32 srcStep, INT16 *pDst[3], INT32 dstStep, const prim_size_t *roi);
|
||||
pstatus_t general_RGBToRGB_16s8u_P3AC4R(const INT16 *pSrc[3], int srcStep, BYTE *pDst, int dstStep, const prim_size_t *roi);
|
||||
|
||||
void primitives_init_colors_opt(const primitives_hints_t* hints, primitives_t* prims);
|
||||
void primitives_init_colors_opt(primitives_t* prims);
|
||||
|
||||
#endif /* !__PRIM_COLORS_H_INCLUDED__ */
|
||||
|
||||
|
@ -23,6 +23,7 @@
|
||||
|
||||
#include <freerdp/types.h>
|
||||
#include <freerdp/primitives.h>
|
||||
#include <winpr/sysinfo.h>
|
||||
|
||||
#ifdef WITH_SSE2
|
||||
#include <emmintrin.h>
|
||||
@ -542,17 +543,17 @@ pstatus_t neon_yCbCrToRGB_16s16s_P3P3(
|
||||
*/
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_colors_opt(const primitives_hints_t* hints, primitives_t* prims)
|
||||
void primitives_init_colors_opt(primitives_t* prims)
|
||||
{
|
||||
#if defined(WITH_SSE2)
|
||||
if (hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
|
||||
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
prims->RGBToRGB_16s8u_P3AC4R = sse2_RGBToRGB_16s8u_P3AC4R;
|
||||
prims->yCbCrToRGB_16s16s_P3P3 = sse2_yCbCrToRGB_16s16s_P3P3;
|
||||
prims->RGBToYCbCr_16s16s_P3P3 = sse2_RGBToYCbCr_16s16s_P3P3;
|
||||
}
|
||||
#elif defined(WITH_NEON)
|
||||
if (hints->arm_flags & PRIM_ARM_NEON_AVAILABLE)
|
||||
if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
prims->yCbCrToRGB_16s16s_P3P3 = neon_yCbCrToRGB_16s16s_P3P3;
|
||||
}
|
||||
|
@ -148,7 +148,6 @@ static pstatus_t ippiCopy_8u_AC4r(
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_copy(
|
||||
const primitives_hints_t *hints,
|
||||
primitives_t *prims)
|
||||
{
|
||||
/* Start with the default. */
|
||||
|
@ -34,61 +34,43 @@
|
||||
? _mm_lddqu_si128((__m128i *) (_ptr_)) \
|
||||
: _mm_load_si128((__m128i *) (_ptr_)))
|
||||
|
||||
/* This structure can (eventually) be used to provide hints to the
|
||||
* initialization routines, e.g. whether SSE2 or NEON or IPP instructions
|
||||
* or calls are available.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
UINT32 x86_flags;
|
||||
UINT32 arm_flags;
|
||||
} primitives_hints_t;
|
||||
|
||||
/* Function prototypes for all the init/deinit routines. */
|
||||
extern void primitives_init_copy(
|
||||
const primitives_hints_t *hints,
|
||||
primitives_t *prims);
|
||||
extern void primitives_deinit_copy(
|
||||
primitives_t *prims);
|
||||
|
||||
extern void primitives_init_set(
|
||||
const primitives_hints_t *hints,
|
||||
primitives_t *prims);
|
||||
extern void primitives_deinit_set(
|
||||
primitives_t *prims);
|
||||
|
||||
extern void primitives_init_add(
|
||||
const primitives_hints_t *hints,
|
||||
primitives_t *prims);
|
||||
extern void primitives_deinit_add(
|
||||
primitives_t *prims);
|
||||
|
||||
extern void primitives_init_andor(
|
||||
const primitives_hints_t *hints,
|
||||
primitives_t *prims);
|
||||
extern void primitives_deinit_andor(
|
||||
primitives_t *prims);
|
||||
|
||||
extern void primitives_init_shift(
|
||||
const primitives_hints_t *hints,
|
||||
primitives_t *prims);
|
||||
extern void primitives_deinit_shift(
|
||||
primitives_t *prims);
|
||||
|
||||
extern void primitives_init_sign(
|
||||
const primitives_hints_t *hints,
|
||||
primitives_t *prims);
|
||||
extern void primitives_deinit_sign(
|
||||
primitives_t *prims);
|
||||
|
||||
extern void primitives_init_alphaComp(
|
||||
const primitives_hints_t *hints,
|
||||
primitives_t *prims);
|
||||
extern void primitives_deinit_alphaComp(
|
||||
primitives_t *prims);
|
||||
|
||||
extern void primitives_init_colors(
|
||||
const primitives_hints_t *hints,
|
||||
primitives_t *prims);
|
||||
extern void primitives_deinit_colors(
|
||||
primitives_t *prims);
|
||||
|
@ -111,7 +111,6 @@ pstatus_t general_set_32u(
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_set(
|
||||
const primitives_hints_t *hints,
|
||||
primitives_t *prims)
|
||||
{
|
||||
/* Start with the default. */
|
||||
@ -120,7 +119,7 @@ void primitives_init_set(
|
||||
prims->set_32u = general_set_32u;
|
||||
prims->zero = general_zero;
|
||||
|
||||
primitives_init_set_opt(hints, prims);
|
||||
primitives_init_set_opt(prims);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
@ -28,7 +28,7 @@ pstatus_t general_set_32s(INT32 val, INT32 *pDst, INT32 len);
|
||||
pstatus_t general_set_32u(UINT32 val, UINT32 *pDst, INT32 len);
|
||||
|
||||
|
||||
void primitives_init_set_opt(const primitives_hints_t *hints, primitives_t *prims);
|
||||
void primitives_init_set_opt(primitives_t *prims);
|
||||
|
||||
#endif /* !__PRIM_SET_H_INCLUDED__ */
|
||||
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include <string.h>
|
||||
#include <freerdp/types.h>
|
||||
#include <freerdp/primitives.h>
|
||||
#include <winpr/sysinfo.h>
|
||||
|
||||
#ifdef WITH_SSE2
|
||||
# include <emmintrin.h>
|
||||
@ -198,7 +199,7 @@ pstatus_t ipp_wrapper_set_32u(
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_set_opt(const primitives_hints_t *hints, primitives_t *prims)
|
||||
void primitives_init_set_opt(primitives_t *prims)
|
||||
{
|
||||
/* Pick tuned versions if possible. */
|
||||
#ifdef WITH_IPP
|
||||
@ -207,7 +208,7 @@ void primitives_init_set_opt(const primitives_hints_t *hints, primitives_t *prim
|
||||
prims->set_32u = (__set_32u_t) ipp_wrapper_set_32u;
|
||||
prims->zero = (__zero_t) ippsZero_8u;
|
||||
#elif defined(WITH_SSE2)
|
||||
if (hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
|
||||
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
prims->set_8u = sse2_set_8u;
|
||||
prims->set_32s = sse2_set_32s;
|
||||
|
@ -104,7 +104,6 @@ pstatus_t general_shiftC_16u(
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_shift(
|
||||
const primitives_hints_t *hints,
|
||||
primitives_t *prims)
|
||||
{
|
||||
/* Start with the default. */
|
||||
@ -117,7 +116,7 @@ void primitives_init_shift(
|
||||
prims->shiftC_16s = general_shiftC_16s;
|
||||
prims->shiftC_16u = general_shiftC_16u;
|
||||
|
||||
primitives_init_shift_opt(hints, prims);
|
||||
primitives_init_shift_opt(prims);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
@ -29,7 +29,7 @@ pstatus_t general_rShiftC_16u(const UINT16 *pSrc, INT32 val, UINT16 *pDst, INT32
|
||||
pstatus_t general_shiftC_16s(const INT16 *pSrc, INT32 val, INT16 *pDst, INT32 len);
|
||||
pstatus_t general_shiftC_16u(const UINT16 *pSrc, INT32 val, UINT16 *pDst, INT32 len);
|
||||
|
||||
void primitives_init_shift_opt(const primitives_hints_t *hints, primitives_t *prims);
|
||||
void primitives_init_shift_opt(primitives_t *prims);
|
||||
|
||||
#endif /* !__PRIM_SHIFT_H_INCLUDED__ */
|
||||
|
||||
|
@ -19,6 +19,7 @@
|
||||
|
||||
#include <freerdp/types.h>
|
||||
#include <freerdp/primitives.h>
|
||||
#include <winpr/sysinfo.h>
|
||||
|
||||
#ifdef WITH_SSE2
|
||||
#include <emmintrin.h>
|
||||
@ -58,7 +59,7 @@ SSE3_SCD_ROUTINE(sse2_rShiftC_16u, UINT16, general_rShiftC_16u,
|
||||
*/
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_shift_opt(const primitives_hints_t *hints, primitives_t *prims)
|
||||
void primitives_init_shift_opt(primitives_t *prims)
|
||||
{
|
||||
#if defined(WITH_IPP)
|
||||
prims->lShiftC_16s = (__lShiftC_16s_t) ippsLShiftC_16s;
|
||||
@ -66,8 +67,8 @@ void primitives_init_shift_opt(const primitives_hints_t *hints, primitives_t *pr
|
||||
prims->lShiftC_16u = (__lShiftC_16u_t) ippsLShiftC_16u;
|
||||
prims->rShiftC_16u = (__rShiftC_16u_t) ippsRShiftC_16u;
|
||||
#elif defined(WITH_SSE2)
|
||||
if ((hints->x86_flags & PRIM_X86_SSE2_AVAILABLE)
|
||||
&& (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE))
|
||||
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)
|
||||
&& IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
prims->lShiftC_16s = sse2_lShiftC_16s;
|
||||
prims->rShiftC_16s = sse2_rShiftC_16s;
|
||||
|
@ -42,13 +42,12 @@ pstatus_t general_sign_16s(
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_sign(
|
||||
const primitives_hints_t *hints,
|
||||
primitives_t *prims)
|
||||
{
|
||||
/* Start with the default. */
|
||||
prims->sign_16s = general_sign_16s;
|
||||
|
||||
primitives_init_sign_opt(hints, prims);
|
||||
primitives_init_sign_opt(prims);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
@ -24,7 +24,7 @@
|
||||
|
||||
pstatus_t general_sign_16s(const INT16 *pSrc, INT16 *pDst, INT32 len);
|
||||
|
||||
void primitives_init_sign_opt(const primitives_hints_t *hints, primitives_t *prims);
|
||||
void primitives_init_sign_opt(primitives_t *prims);
|
||||
|
||||
#endif /* !__PRIM_SIGN_H_INCLUDED__ */
|
||||
|
||||
|
@ -19,6 +19,7 @@
|
||||
|
||||
#include <freerdp/types.h>
|
||||
#include <freerdp/primitives.h>
|
||||
#include <winpr/sysinfo.h>
|
||||
|
||||
#ifdef WITH_SSE2
|
||||
#include <emmintrin.h>
|
||||
@ -134,13 +135,13 @@ pstatus_t ssse3_sign_16s(
|
||||
#endif /* WITH_SSE2 */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_sign_opt(const primitives_hints_t *hints, primitives_t *prims)
|
||||
void primitives_init_sign_opt(primitives_t *prims)
|
||||
{
|
||||
/* Pick tuned versions if possible. */
|
||||
/* I didn't spot an IPP version of this. */
|
||||
#if defined(WITH_SSE2)
|
||||
if ((hints->x86_flags & PRIM_X86_SSSE3_AVAILABLE)
|
||||
&& (hints->x86_flags & PRIM_X86_SSE3_AVAILABLE))
|
||||
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)
|
||||
&& IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
prims->sign_16s = ssse3_sign_16s;
|
||||
}
|
||||
|
@ -22,173 +22,16 @@
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <winpr/platform.h>
|
||||
|
||||
#include <freerdp/primitives.h>
|
||||
|
||||
#include "prim_internal.h"
|
||||
|
||||
#ifdef __ANDROID__
|
||||
#include "cpu-features.h"
|
||||
#endif
|
||||
|
||||
/* Singleton pointer used throughout the program when requested. */
|
||||
static primitives_t* pPrimitives = NULL;
|
||||
|
||||
#define D_BIT_MMX (1<<23)
|
||||
#define D_BIT_SSE (1<<25)
|
||||
#define D_BIT_SSE2 (1<<26)
|
||||
#define D_BIT_3DN (1<<30)
|
||||
#define C_BIT_SSE3 (1<<0)
|
||||
#define C_BIT_3DNP (1<<8)
|
||||
#define C_BIT_SSSE3 (1<<9)
|
||||
#define C_BIT_SSE41 (1<<19)
|
||||
#define C_BIT_SSE42 (1<<20)
|
||||
#define C_BIT_XGETBV (1<<27)
|
||||
#define C_BIT_AVX (1<<28)
|
||||
#define C_BITS_AVX (C_BIT_XGETBV|C_BIT_AVX)
|
||||
#define E_BIT_XMM (1<<1)
|
||||
#define E_BIT_YMM (1<<2)
|
||||
#define E_BITS_AVX (E_BIT_XMM|E_BIT_YMM)
|
||||
#define C_BIT_FMA (1<<11)
|
||||
#define C_BIT_AVX_AES (1<<24)
|
||||
|
||||
/* If x86 */
|
||||
#if defined(_M_IX86_AMD64)
|
||||
|
||||
/* If GCC */
|
||||
#ifdef __GNUC__
|
||||
|
||||
#ifdef __AVX__
|
||||
#define xgetbv(_func_, _lo_, _hi_) \
|
||||
__asm__ __volatile__ ("xgetbv" : "=a" (_lo_), "=d" (_hi_) : "c" (_func_))
|
||||
#endif
|
||||
|
||||
static void cpuid(
|
||||
unsigned info,
|
||||
unsigned *eax,
|
||||
unsigned *ebx,
|
||||
unsigned *ecx,
|
||||
unsigned *edx)
|
||||
{
|
||||
*eax = *ebx = *ecx = *edx = 0;
|
||||
|
||||
__asm volatile
|
||||
(
|
||||
/* The EBX (or RBX register on x86_64) is used for the PIC base address
|
||||
* and must not be corrupted by our inline assembly.
|
||||
*/
|
||||
#ifdef _M_IX86
|
||||
"mov %%ebx, %%esi;"
|
||||
"cpuid;"
|
||||
"xchg %%ebx, %%esi;"
|
||||
#else
|
||||
"mov %%rbx, %%rsi;"
|
||||
"cpuid;"
|
||||
"xchg %%rbx, %%rsi;"
|
||||
#endif
|
||||
: "=a" (*eax), "=S" (*ebx), "=c" (*ecx), "=d" (*edx)
|
||||
: "0" (info)
|
||||
);
|
||||
}
|
||||
|
||||
static void set_hints(primitives_hints_t* hints)
|
||||
{
|
||||
unsigned a, b, c, d;
|
||||
|
||||
cpuid(1, &a, &b, &c, &d);
|
||||
|
||||
if (d & D_BIT_MMX)
|
||||
hints->x86_flags |= PRIM_X86_MMX_AVAILABLE;
|
||||
if (d & D_BIT_SSE)
|
||||
hints->x86_flags |= PRIM_X86_SSE_AVAILABLE;
|
||||
if (d & D_BIT_SSE2)
|
||||
hints->x86_flags |= PRIM_X86_SSE2_AVAILABLE;
|
||||
if (d & D_BIT_3DN)
|
||||
hints->x86_flags |= PRIM_X86_3DNOW_AVAILABLE;
|
||||
if (c & C_BIT_3DNP)
|
||||
hints->x86_flags |= PRIM_X86_3DNOW_PREFETCH_AVAILABLE;
|
||||
if (c & C_BIT_SSE3)
|
||||
hints->x86_flags |= PRIM_X86_SSE3_AVAILABLE;
|
||||
if (c & C_BIT_SSSE3)
|
||||
hints->x86_flags |= PRIM_X86_SSSE3_AVAILABLE;
|
||||
if (c & C_BIT_SSE41)
|
||||
hints->x86_flags |= PRIM_X86_SSE41_AVAILABLE;
|
||||
if (c & C_BIT_SSE42)
|
||||
hints->x86_flags |= PRIM_X86_SSE42_AVAILABLE;
|
||||
|
||||
#ifdef __AVX__
|
||||
if ((c & C_BITS_AVX) == C_BITS_AVX)
|
||||
{
|
||||
int e, f;
|
||||
xgetbv(0, e, f);
|
||||
|
||||
if ((e & E_BITS_AVX) == E_BITS_AVX)
|
||||
{
|
||||
hints->x86_flags |= PRIM_X86_AVX_AVAILABLE;
|
||||
|
||||
if (c & C_BIT_FMA)
|
||||
hints->x86_flags |= PRIM_X86_FMA_AVAILABLE;
|
||||
if (c & C_BIT_AVX_AES)
|
||||
hints->x86_flags |= PRIM_X86_AVX_AES_AVAILABLE;
|
||||
}
|
||||
}
|
||||
/* TODO: AVX2: set eax=7, ecx=0, cpuid, check ebx-bit5 */
|
||||
#endif
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void set_hints(primitives_hints_t* hints)
|
||||
{
|
||||
/* x86 non-GCC: TODO */
|
||||
}
|
||||
|
||||
#endif /* __GNUC__ */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
#elif defined(_M_ARM)
|
||||
|
||||
static UINT32 getNeonSupport(void)
|
||||
{
|
||||
#ifdef __ANDROID__
|
||||
if (android_getCpuFamily() != ANDROID_CPU_FAMILY_ARM) return 0;
|
||||
|
||||
UINT64 features = android_getCpuFeatures();
|
||||
|
||||
if ((features & ANDROID_CPU_ARM_FEATURE_ARMv7))
|
||||
{
|
||||
if (features & ANDROID_CPU_ARM_FEATURE_NEON)
|
||||
{
|
||||
return PRIM_ARM_NEON_AVAILABLE;
|
||||
}
|
||||
}
|
||||
#elif defined(__APPLE)
|
||||
/* assume NEON support on iOS devices */
|
||||
return PRIM_ARM_NEON_AVAILABLE;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void set_hints(primitives_hints_t* hints)
|
||||
{
|
||||
/* ARM: TODO */
|
||||
hints->arm_flags |= getNeonSupport();
|
||||
}
|
||||
|
||||
#else
|
||||
static void set_hints(
|
||||
primitives_hints_t *hints)
|
||||
{
|
||||
}
|
||||
#endif /* x86 else ARM else */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init(void)
|
||||
{
|
||||
primitives_hints_t* hints;
|
||||
|
||||
if (pPrimitives == NULL)
|
||||
{
|
||||
pPrimitives = calloc(1, sizeof(primitives_t));
|
||||
@ -197,19 +40,15 @@ void primitives_init(void)
|
||||
return;
|
||||
}
|
||||
|
||||
hints = calloc(1, sizeof(primitives_hints_t));
|
||||
set_hints(hints);
|
||||
pPrimitives->hints = (void *) hints;
|
||||
|
||||
/* Now call each section's initialization routine. */
|
||||
primitives_init_add(hints, pPrimitives);
|
||||
primitives_init_andor(hints, pPrimitives);
|
||||
primitives_init_alphaComp(hints, pPrimitives);
|
||||
primitives_init_copy(hints, pPrimitives);
|
||||
primitives_init_set(hints, pPrimitives);
|
||||
primitives_init_shift(hints, pPrimitives);
|
||||
primitives_init_sign(hints, pPrimitives);
|
||||
primitives_init_colors(hints, pPrimitives);
|
||||
primitives_init_add(pPrimitives);
|
||||
primitives_init_andor(pPrimitives);
|
||||
primitives_init_alphaComp(pPrimitives);
|
||||
primitives_init_copy(pPrimitives);
|
||||
primitives_init_set(pPrimitives);
|
||||
primitives_init_shift(pPrimitives);
|
||||
primitives_init_sign(pPrimitives);
|
||||
primitives_init_colors(pPrimitives);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
@ -221,102 +60,6 @@ primitives_t* primitives_get(void)
|
||||
return pPrimitives;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
UINT32 primitives_get_flags(const primitives_t* prims)
|
||||
{
|
||||
primitives_hints_t* hints = (primitives_hints_t*) (prims->hints);
|
||||
|
||||
#if defined(_M_IX86_AMD64)
|
||||
return hints->x86_flags;
|
||||
#elif defined(_M_ARM)
|
||||
return hints->arm_flags;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
UINT32 flag;
|
||||
const char *str;
|
||||
} flagpair_t;
|
||||
|
||||
static const flagpair_t x86_flags[] =
|
||||
{
|
||||
{ PRIM_X86_MMX_AVAILABLE, "MMX" },
|
||||
{ PRIM_X86_3DNOW_AVAILABLE, "3DNow" },
|
||||
{ PRIM_X86_3DNOW_PREFETCH_AVAILABLE, "3DNow-PF" },
|
||||
{ PRIM_X86_SSE_AVAILABLE, "SSE" },
|
||||
{ PRIM_X86_SSE2_AVAILABLE, "SSE2" },
|
||||
{ PRIM_X86_SSE3_AVAILABLE, "SSE3" },
|
||||
{ PRIM_X86_SSSE3_AVAILABLE, "SSSE3" },
|
||||
{ PRIM_X86_SSE41_AVAILABLE, "SSE4.1" },
|
||||
{ PRIM_X86_SSE42_AVAILABLE, "SSE4.2" },
|
||||
{ PRIM_X86_AVX_AVAILABLE, "AVX" },
|
||||
{ PRIM_X86_FMA_AVAILABLE, "FMA" },
|
||||
{ PRIM_X86_AVX_AES_AVAILABLE, "AVX-AES" },
|
||||
{ PRIM_X86_AVX2_AVAILABLE, "AVX2" },
|
||||
};
|
||||
|
||||
static const flagpair_t arm_flags[] =
|
||||
{
|
||||
{ PRIM_ARM_VFP1_AVAILABLE, "VFP1" },
|
||||
{ PRIM_ARM_VFP2_AVAILABLE, "VFP2" },
|
||||
{ PRIM_ARM_VFP3_AVAILABLE, "VFP3" },
|
||||
{ PRIM_ARM_VFP4_AVAILABLE, "VFP4" },
|
||||
{ PRIM_ARM_FPA_AVAILABLE, "FPA" },
|
||||
{ PRIM_ARM_FPE_AVAILABLE, "FPE" },
|
||||
{ PRIM_ARM_IWMMXT_AVAILABLE, "IWMMXT" },
|
||||
{ PRIM_ARM_NEON_AVAILABLE, "NEON" },
|
||||
};
|
||||
|
||||
void primitives_flags_str(const primitives_t* prims, char* str, size_t len)
|
||||
{
|
||||
int i;
|
||||
primitives_hints_t* hints;
|
||||
|
||||
*str = '\0';
|
||||
--len; /* for the '/0' */
|
||||
|
||||
hints = (primitives_hints_t*) (prims->hints);
|
||||
|
||||
for (i = 0; i < sizeof(x86_flags) / sizeof(flagpair_t); ++i)
|
||||
{
|
||||
if (hints->x86_flags & x86_flags[i].flag)
|
||||
{
|
||||
int slen = strlen(x86_flags[i].str) + 1;
|
||||
|
||||
if (len < slen)
|
||||
break;
|
||||
|
||||
if (*str != '\0')
|
||||
strcat(str, " ");
|
||||
|
||||
strcat(str, x86_flags[i].str);
|
||||
len -= slen;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < sizeof(arm_flags) / sizeof(flagpair_t); ++i)
|
||||
{
|
||||
if (hints->arm_flags & arm_flags[i].flag)
|
||||
{
|
||||
int slen = strlen(arm_flags[i].str) + 1;
|
||||
|
||||
if (len < slen)
|
||||
break;
|
||||
|
||||
if (*str != '\0')
|
||||
strcat(str, " ");
|
||||
|
||||
strcat(str, arm_flags[i].str);
|
||||
len -= slen;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_deinit(void)
|
||||
{
|
||||
@ -333,9 +76,6 @@ void primitives_deinit(void)
|
||||
primitives_deinit_sign(pPrimitives);
|
||||
primitives_deinit_colors(pPrimitives);
|
||||
|
||||
if (pPrimitives->hints != NULL)
|
||||
free((void*) (pPrimitives->hints));
|
||||
|
||||
free((void*) pPrimitives);
|
||||
pPrimitives = NULL;
|
||||
}
|
||||
|
@ -31,28 +31,11 @@ set(PRIMITIVE_TEST_CFILES
|
||||
test_set.c
|
||||
test_shift.c
|
||||
test_sign.c
|
||||
../prim_add.c
|
||||
../prim_andor.c
|
||||
../prim_alphaComp.c
|
||||
../prim_colors.c
|
||||
../prim_copy.c
|
||||
../prim_set.c
|
||||
../prim_shift.c
|
||||
../prim_sign.c
|
||||
../prim_add_opt.c
|
||||
../prim_alphaComp_opt.c
|
||||
../prim_andor_opt.c
|
||||
../prim_colors_opt.c
|
||||
../prim_set_opt.c
|
||||
../prim_shift_opt.c
|
||||
../prim_sign_opt.c
|
||||
../primitives.c
|
||||
)
|
||||
|
||||
set(PRIMITIVE_TEST_HEADERS
|
||||
measure.h
|
||||
prim_test.h
|
||||
../prim_internal.h
|
||||
)
|
||||
|
||||
set(PRIMITIVE_TEST_SRCS
|
||||
@ -138,7 +121,7 @@ endif()
|
||||
|
||||
set_property(SOURCE ${PRIMITIVE_TEST_CFILES} PROPERTY COMPILE_FLAGS ${OPTFLAGS})
|
||||
|
||||
target_link_libraries(prim_test rt)
|
||||
target_link_libraries(prim_test freerdp-primitives rt winpr-sysinfo)
|
||||
if(NOT TESTING_OUTPUT_DIRECTORY)
|
||||
set(TESTING_OUTPUT_DIRECTORY .)
|
||||
endif()
|
||||
|
@ -21,6 +21,8 @@
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <winpr/platform.h>
|
||||
#include <winpr/sysinfo.h>
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
@ -32,6 +34,88 @@
|
||||
int test_sizes[] = { 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096 };
|
||||
int Quiet = 0;
|
||||
|
||||
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
typedef struct
|
||||
{
|
||||
UINT32 flag;
|
||||
const char *str;
|
||||
} flagpair_t;
|
||||
|
||||
static const flagpair_t flags[] =
|
||||
#ifdef _M_IX86_AMD64
|
||||
{
|
||||
{ PF_MMX_INSTRUCTIONS_AVAILABLE, "MMX" },
|
||||
{ PF_3DNOW_INSTRUCTIONS_AVAILABLE, "3DNow" },
|
||||
{ PF_XMMI_INSTRUCTIONS_AVAILABLE, "SSE" },
|
||||
{ PF_XMMI64_INSTRUCTIONS_AVAILABLE, "SSE2" },
|
||||
{ PF_SSE3_INSTRUCTIONS_AVAILABLE, "SSE3" },
|
||||
#elif defined(_M_ARM)
|
||||
{ PF_ARM_VFP3, "VFP3" },
|
||||
{ PF_ARM_INTEL_WMMX, "IWMMXT" },
|
||||
{ PF_ARM_NEON_INSTRUCTIONS_AVAILABLE, "NEON" },
|
||||
#endif
|
||||
};
|
||||
|
||||
static const flagpair_t flags_extended[] =
|
||||
{
|
||||
#ifdef _M_IX86_AMD64
|
||||
{ PF_EX_3DNOW_PREFETCH, "3DNow-PF" },
|
||||
{ PF_EX_SSSE3, "SSSE3" },
|
||||
{ PF_EX_SSE41, "SSE4.1" },
|
||||
{ PF_EX_SSE42, "SSE4.2" },
|
||||
{ PF_EX_AVX, "AVX" },
|
||||
{ PF_EX_FMA, "FMA" },
|
||||
{ PF_EX_AVX_AES, "AVX-AES" },
|
||||
{ PF_EX_AVX2, "AVX2" },
|
||||
#elif defined(_M_ARM)
|
||||
{ PF_EX_ARM_VFP1, "VFP1"},
|
||||
{ PF_EX_ARM_VFP4, "VFP4" },
|
||||
#endif
|
||||
};
|
||||
|
||||
void primitives_flags_str(char* str, size_t len)
|
||||
{
|
||||
int i;
|
||||
|
||||
*str = '\0';
|
||||
--len; /* for the '/0' */
|
||||
|
||||
for (i = 0; i < sizeof(flags) / sizeof(flagpair_t); ++i)
|
||||
{
|
||||
if (IsProcessorFeaturePresent(flags[i].flag))
|
||||
{
|
||||
int slen = strlen(flags[i].str) + 1;
|
||||
|
||||
if (len < slen)
|
||||
break;
|
||||
|
||||
if (*str != '\0')
|
||||
strcat(str, " ");
|
||||
|
||||
strcat(str, flags[i].str);
|
||||
len -= slen;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < sizeof(flags_extended) / sizeof(flagpair_t); ++i)
|
||||
{
|
||||
if (IsProcessorFeaturePresent(flags_extended[i].flag))
|
||||
{
|
||||
int slen = strlen(flags_extended[i].str) + 1;
|
||||
|
||||
if (len < slen)
|
||||
break;
|
||||
|
||||
if (*str != '\0')
|
||||
strcat(str, " ");
|
||||
|
||||
strcat(str, flags_extended[i].str);
|
||||
len -= slen;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
static void get_random_data_lrand(
|
||||
void *buffer,
|
||||
@ -198,7 +282,7 @@ static const test_t testTypeList[] =
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
int i;
|
||||
char hints[256];
|
||||
char hints[1024];
|
||||
UINT32 testSet = 0;
|
||||
UINT32 testTypes = 0;
|
||||
int results = SUCCESS;
|
||||
@ -253,7 +337,7 @@ int main(int argc, char** argv)
|
||||
|
||||
primitives_init();
|
||||
|
||||
primitives_flags_str(primitives_get(), hints, sizeof(hints));
|
||||
primitives_flags_str(hints, sizeof(hints));
|
||||
printf("Hints: %s\n", hints);
|
||||
|
||||
/* COPY */
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include <stdio.h>
|
||||
|
||||
#include <freerdp/primitives.h>
|
||||
#include <winpr/platform.h>
|
||||
|
||||
#ifdef WITH_IPP
|
||||
#include <ipps.h>
|
||||
@ -121,8 +122,8 @@ extern int test_or_32u_speed(void);
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#if defined(i386) && defined(WITH_SSE2)
|
||||
#define DO_SSE_MEASUREMENTS(_funcSSE_, _prework_) \
|
||||
#if (defined(_M_IX86_AMD64) && defined(WITH_SSE2)) || (defined(arm) && defined(WITH_NEON))
|
||||
#define DO_OPT_MEASUREMENTS(_funcOpt_, _prework_) \
|
||||
do { \
|
||||
for (s=0; s<num_sizes; ++s) \
|
||||
{ \
|
||||
@ -132,34 +133,15 @@ extern int test_or_32u_speed(void);
|
||||
_prework_; \
|
||||
iter = iterations/size; \
|
||||
sprintf(label, "%s-%s-%-4d", SIMD_TYPE, oplabel, size); \
|
||||
MEASURE_TIMED(label, iter, test_time, resultSSENeon[s], \
|
||||
_funcSSE_); \
|
||||
MEASURE_TIMED(label, iter, test_time, resultOpt[s], \
|
||||
_funcOpt_); \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
#define DO_SSE_MEASUREMENTS(_funcSSE_, _prework_)
|
||||
#define DO_OPT_MEASUREMENTS(_funcSSE_, _prework_)
|
||||
#endif
|
||||
|
||||
#if defined(armel) && defined(INCLUDE_NEON_MEASUREMENTS)
|
||||
#define DO_NEON_MEASUREMENTS(_funcNeon_, _prework_) \
|
||||
do { \
|
||||
for (s=0; s<num_sizes; ++s) \
|
||||
{ \
|
||||
int iter; \
|
||||
char label[256]; \
|
||||
int size = size_array[s]; \
|
||||
_prework_; \
|
||||
iter = iterations/size; \
|
||||
sprintf(label, "%s-%s-%-4d", SIMD_TYPE, oplabel, size); \
|
||||
MEASURE_TIMED(label, iter, test_time, resultSSENeon[s], \
|
||||
_funcNeon_); \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
#define DO_NEON_MEASUREMENTS(_funcNeon_, _prework_)
|
||||
#endif
|
||||
|
||||
#if defined(i386) && defined(WITH_IPP)
|
||||
#if defined(_M_IX86_AMD64) && defined(WITH_IPP)
|
||||
#define DO_IPP_MEASUREMENTS(_funcIPP_, _prework_) \
|
||||
do { \
|
||||
for (s=0; s<num_sizes; ++s) \
|
||||
@ -182,8 +164,7 @@ extern int test_or_32u_speed(void);
|
||||
#define STD_SPEED_TEST( \
|
||||
_name_, _srctype_, _dsttype_, _prework_, \
|
||||
_doNormal_, _funcNormal_, \
|
||||
_doSSE_, _funcSSE_, _flagsSSE_, \
|
||||
_doNeon_, _funcNeon_, _flagsNeon_, \
|
||||
_doOpt_, _funcOpt_, _flagOpt_, _flagExt_, \
|
||||
_doIPP_, _funcIPP_) \
|
||||
static void _name_( \
|
||||
const char *oplabel, const char *type, \
|
||||
@ -193,24 +174,28 @@ static void _name_( \
|
||||
int iterations, float test_time) \
|
||||
{ \
|
||||
int s; \
|
||||
float *resultNormal, *resultSSENeon, *resultIPP; \
|
||||
UINT32 pflags = primitives_get_flags(primitives_get()); \
|
||||
float *resultNormal, *resultOpt, *resultIPP; \
|
||||
resultNormal = (float *) calloc(num_sizes, sizeof(float)); \
|
||||
resultSSENeon = (float *) calloc(num_sizes, sizeof(float)); \
|
||||
resultOpt = (float *) calloc(num_sizes, sizeof(float)); \
|
||||
resultIPP = (float *) calloc(num_sizes, sizeof(float)); \
|
||||
printf("******************** %s %s ******************\n", \
|
||||
oplabel, type); \
|
||||
if (_doNormal_) { DO_NORMAL_MEASUREMENTS(_funcNormal_, _prework_); } \
|
||||
if (_doSSE_) { \
|
||||
if ((pflags & (_flagsSSE_)) == (_flagsSSE_)) \
|
||||
if (_doOpt_) \
|
||||
{ \
|
||||
if (_flagExt_) \
|
||||
{ \
|
||||
DO_SSE_MEASUREMENTS(_funcSSE_, _prework_); \
|
||||
if (IsProcessorFeaturePresentEx(_flagOpt_)) \
|
||||
{ \
|
||||
DO_OPT_MEASUREMENTS(_funcOpt_, _prework_); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
if (_doNeon_) { \
|
||||
if ((pflags & (_flagsNeon_)) == (_flagsNeon_)) \
|
||||
else \
|
||||
{ \
|
||||
DO_NEON_MEASUREMENTS(_funcNeon_, _prework_); \
|
||||
if (IsProcessorFeaturePresent(_flagOpt_)) \
|
||||
{ \
|
||||
DO_OPT_MEASUREMENTS(_funcOpt_, _prework_); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
if (_doIPP_) { DO_IPP_MEASUREMENTS(_funcIPP_, _prework_); } \
|
||||
@ -223,13 +208,13 @@ static void _name_( \
|
||||
strcpy(sN, "N/A"); strcpy(sSN, "N/A"); strcpy(sSNp, "N/A"); \
|
||||
strcpy(sIPP, "N/A"); strcpy(sIPPp, "N/A"); \
|
||||
if (resultNormal[s] > 0.0) _floatprint(resultNormal[s], sN); \
|
||||
if (resultSSENeon[s] > 0.0) \
|
||||
if (resultOpt[s] > 0.0) \
|
||||
{ \
|
||||
_floatprint(resultSSENeon[s], sSN); \
|
||||
_floatprint(resultOpt[s], sSN); \
|
||||
if (resultNormal[s] > 0.0) \
|
||||
{ \
|
||||
sprintf(sSNp, "%d%%", \
|
||||
(int) (resultSSENeon[s] / resultNormal[s] * 100.0 + 0.5)); \
|
||||
(int) (resultOpt[s] / resultNormal[s] * 100.0 + 0.5)); \
|
||||
} \
|
||||
} \
|
||||
if (resultIPP[s] > 0.0) \
|
||||
@ -244,7 +229,7 @@ static void _name_( \
|
||||
printf("%8d: %15s %15s %5s %15s %5s\n", \
|
||||
size_array[s], sN, sSN, sSNp, sIPP, sIPPp); \
|
||||
} \
|
||||
free(resultNormal); free(resultSSENeon); free(resultIPP); \
|
||||
free(resultNormal); free(resultOpt); free(resultIPP); \
|
||||
}
|
||||
|
||||
#endif // !__PRIMTEST_H_INCLUDED__
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <winpr/sysinfo.h>
|
||||
#include "prim_test.h"
|
||||
|
||||
#define FUNC_TEST_SIZE 65536
|
||||
@ -35,7 +36,6 @@ int test_add16s_func(void)
|
||||
int failed = 0;
|
||||
int i;
|
||||
char testStr[256];
|
||||
UINT32 pflags = primitives_get_flags(primitives_get());
|
||||
|
||||
testStr[0] = '\0';
|
||||
get_random_data(src1, sizeof(src1));
|
||||
@ -44,7 +44,7 @@ int test_add16s_func(void)
|
||||
memset(d2, 0, sizeof(d2));
|
||||
general_add_16s(src1+1, src2+1, d1+1, FUNC_TEST_SIZE);
|
||||
#ifdef _M_IX86_AMD64
|
||||
if (pflags & PRIM_X86_SSE3_AVAILABLE)
|
||||
if(IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
strcat(testStr, " SSE3");
|
||||
/* Aligned */
|
||||
@ -91,8 +91,7 @@ int test_add16s_func(void)
|
||||
/* ------------------------------------------------------------------------- */
|
||||
STD_SPEED_TEST(add16s_speed_test, INT16, INT16, dst=dst,
|
||||
TRUE, general_add_16s(src1, src2, dst, size),
|
||||
TRUE, sse3_add_16s(src1, src2, dst, size), PRIM_X86_SSE3_AVAILABLE,
|
||||
FALSE, dst=dst, 0,
|
||||
TRUE, sse3_add_16s(src1, src2, dst, size), PF_SSE3_INSTRUCTIONS_AVAILABLE, FALSE,
|
||||
TRUE, ippsAdd_16s(src1, src2, dst, size));
|
||||
|
||||
int test_add16s_speed(void)
|
||||
|
@ -15,6 +15,7 @@
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
#include <winpr/sysinfo.h>
|
||||
|
||||
#include "prim_test.h"
|
||||
|
||||
@ -110,7 +111,6 @@ int test_alphaComp_func(void)
|
||||
UINT32 ALIGN(dst2u[DST_WIDTH*DST_HEIGHT+1]);
|
||||
UINT32 ALIGN(dst3[DST_WIDTH*DST_HEIGHT]);
|
||||
int error = 0;
|
||||
UINT32 pflags = primitives_get_flags(primitives_get());
|
||||
char testStr[256];
|
||||
UINT32 *ptr;
|
||||
int i, x, y;
|
||||
@ -133,7 +133,7 @@ int test_alphaComp_func(void)
|
||||
(const BYTE *) src2, 4*SRC2_WIDTH,
|
||||
(BYTE *) dst1, 4*DST_WIDTH, TEST_WIDTH, TEST_HEIGHT);
|
||||
#ifdef _M_IX86_AMD64
|
||||
if (pflags & PRIM_X86_SSE2_AVAILABLE)
|
||||
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
strcat(testStr, " SSE2");
|
||||
sse2_alphaComp_argb((const BYTE *) src1, 4*SRC1_WIDTH,
|
||||
@ -166,7 +166,7 @@ int test_alphaComp_func(void)
|
||||
error = 1;
|
||||
}
|
||||
#ifdef _M_IX86_AMD64
|
||||
if (pflags & PRIM_X86_SSE2_AVAILABLE)
|
||||
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
UINT32 c2 = *PIXEL(dst2a, 4*DST_WIDTH, x, y);
|
||||
if (colordist(c0, c2) > TOLERANCE)
|
||||
@ -207,8 +207,7 @@ STD_SPEED_TEST(alphaComp_speed, BYTE, BYTE, int bytes = size*4,
|
||||
TRUE, general_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes,
|
||||
size, size),
|
||||
TRUE, sse2_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes,
|
||||
size, size), PRIM_X86_SSE2_AVAILABLE,
|
||||
FALSE, dst=dst, 0,
|
||||
size, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
|
||||
TRUE, ipp_alphaComp_argb(src1, bytes, src2, bytes, dst, bytes,
|
||||
size, size));
|
||||
|
||||
|
@ -15,6 +15,7 @@
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
#include <winpr/sysinfo.h>
|
||||
|
||||
#include "prim_test.h"
|
||||
|
||||
@ -39,7 +40,6 @@ int test_and_32u_func(void)
|
||||
UINT32 ALIGN(src[FUNC_TEST_SIZE+3]), ALIGN(dst[FUNC_TEST_SIZE+3]);
|
||||
int failed = 0;
|
||||
int i;
|
||||
UINT32 pflags = primitives_get_flags(primitives_get());
|
||||
char testStr[256];
|
||||
|
||||
testStr[0] = '\0';
|
||||
@ -56,7 +56,7 @@ int test_and_32u_func(void)
|
||||
}
|
||||
}
|
||||
#ifdef _M_IX86_AMD64
|
||||
if (pflags & PRIM_X86_SSE3_AVAILABLE)
|
||||
if (IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
strcat(testStr, " SSE3");
|
||||
/* Aligned */
|
||||
@ -92,8 +92,7 @@ int test_and_32u_func(void)
|
||||
/* ------------------------------------------------------------------------- */
|
||||
STD_SPEED_TEST(andC_32u_speed_test, UINT32, UINT32, dst=dst,
|
||||
TRUE, general_andC_32u(src1, constant, dst, size),
|
||||
TRUE, sse3_andC_32u(src1, constant, dst, size), PRIM_X86_SSE3_AVAILABLE,
|
||||
FALSE, dst=dst, 0,
|
||||
TRUE, sse3_andC_32u(src1, constant, dst, size), PF_SSE3_INSTRUCTIONS_AVAILABLE, FALSE,
|
||||
TRUE, ippsAndC_32u(src1, constant, dst, size))
|
||||
|
||||
int test_and_32u_speed(void)
|
||||
@ -113,7 +112,6 @@ int test_or_32u_func(void)
|
||||
UINT32 ALIGN(src[FUNC_TEST_SIZE+3]), ALIGN(dst[FUNC_TEST_SIZE+3]);
|
||||
int failed = 0;
|
||||
int i;
|
||||
UINT32 pflags = primitives_get_flags(primitives_get());
|
||||
char testStr[256];
|
||||
|
||||
testStr[0] = '\0';
|
||||
@ -130,7 +128,7 @@ int test_or_32u_func(void)
|
||||
}
|
||||
}
|
||||
#ifdef _M_IX86_AMD64
|
||||
if (pflags & PRIM_X86_SSE3_AVAILABLE)
|
||||
if(IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
strcat(testStr, " SSE3");
|
||||
/* Aligned */
|
||||
@ -166,8 +164,7 @@ int test_or_32u_func(void)
|
||||
/* ------------------------------------------------------------------------- */
|
||||
STD_SPEED_TEST(orC_32u_speed_test, UINT32, UINT32, dst=dst,
|
||||
TRUE, general_orC_32u(src1, constant, dst, size),
|
||||
TRUE, sse3_orC_32u(src1, constant, dst, size), PRIM_X86_SSE3_AVAILABLE,
|
||||
FALSE, dst=dst, 0,
|
||||
TRUE, sse3_orC_32u(src1, constant, dst, size), PF_SSE3_INSTRUCTIONS_AVAILABLE, FALSE,
|
||||
TRUE, ippsOrC_32u(src1, constant, dst, size))
|
||||
|
||||
int test_or_32u_speed(void)
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <winpr/sysinfo.h>
|
||||
#include "prim_test.h"
|
||||
|
||||
static const int RGB_TRIAL_ITERATIONS = 1000;
|
||||
@ -38,7 +39,6 @@ int test_RGBToRGB_16s8u_P3AC4R_func(void)
|
||||
UINT32 ALIGN(out1[4096]), ALIGN(out2[4096]);
|
||||
int i;
|
||||
int failed = 0;
|
||||
UINT32 pflags = primitives_get_flags(primitives_get());
|
||||
char testStr[256];
|
||||
INT16 *ptrs[3];
|
||||
prim_size_t roi = { 64, 64 };
|
||||
@ -62,7 +62,7 @@ int test_RGBToRGB_16s8u_P3AC4R_func(void)
|
||||
general_RGBToRGB_16s8u_P3AC4R((const INT16 **) ptrs, 64*2,
|
||||
(BYTE *) out1, 64*4, &roi);
|
||||
#ifdef _M_IX86_AMD64
|
||||
if (pflags & PRIM_X86_SSE2_AVAILABLE)
|
||||
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
strcat(testStr, " SSE2");
|
||||
sse2_RGBToRGB_16s8u_P3AC4R((const INT16 **) ptrs, 64*2,
|
||||
@ -90,8 +90,7 @@ STD_SPEED_TEST(
|
||||
(const INT16 **) src1, 64*2, (BYTE *) dst, 64*4, &roi64x64),
|
||||
TRUE, sse2_RGBToRGB_16s8u_P3AC4R(
|
||||
(const INT16 **) src1, 64*2, (BYTE *) dst, 64*4, &roi64x64),
|
||||
PRIM_X86_SSE2_AVAILABLE,
|
||||
FALSE, dst=dst, 0,
|
||||
PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
|
||||
FALSE, dst=dst);
|
||||
|
||||
int test_RGBToRGB_16s8u_P3AC4R_speed(void)
|
||||
@ -131,7 +130,6 @@ int test_yCbCrToRGB_16s16s_P3P3_func(void)
|
||||
INT16 ALIGN(r2[4096]), ALIGN(g2[4096]), ALIGN(b2[4096]);
|
||||
int i;
|
||||
int failed = 0;
|
||||
UINT32 pflags = primitives_get_flags(primitives_get());
|
||||
char testStr[256];
|
||||
const INT16 *in[3];
|
||||
INT16 *out1[3];
|
||||
@ -168,7 +166,7 @@ int test_yCbCrToRGB_16s16s_P3P3_func(void)
|
||||
|
||||
general_yCbCrToRGB_16s16s_P3P3(in, 64*2, out1, 64*2, &roi);
|
||||
#ifdef _M_IX86_AMD64
|
||||
if (pflags & PRIM_X86_SSE2_AVAILABLE)
|
||||
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
strcat(testStr, " SSE2");
|
||||
sse2_yCbCrToRGB_16s16s_P3P3(in, 64*2, out2, 64*2, &roi);
|
||||
@ -193,8 +191,7 @@ STD_SPEED_TEST(
|
||||
ycbcr_to_rgb_speed, INT16*, INT16*, dst=dst,
|
||||
TRUE, general_yCbCrToRGB_16s16s_P3P3(src1, 64*2, dst, 64*2, &roi64x64),
|
||||
TRUE, sse2_yCbCrToRGB_16s16s_P3P3(src1, 64*2, dst, 64*2, &roi64x64),
|
||||
PRIM_X86_SSE2_AVAILABLE,
|
||||
FALSE, dst=dst, 0,
|
||||
PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
|
||||
FALSE, dst=dst);
|
||||
|
||||
int test_yCbCrToRGB_16s16s_P3P3_speed(void)
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <winpr/sysinfo.h>
|
||||
#include "prim_test.h"
|
||||
|
||||
static const int MEMCPY_PRETEST_ITERATIONS = 1000000;
|
||||
@ -70,8 +71,7 @@ int test_copy8u_func(void)
|
||||
/* ------------------------------------------------------------------------- */
|
||||
STD_SPEED_TEST(copy8u_speed_test, BYTE, BYTE, dst=dst,
|
||||
TRUE, memcpy(dst, src1, size),
|
||||
FALSE, NULL, 0,
|
||||
FALSE, NULL, 0,
|
||||
FALSE, NULL, 0, FALSE,
|
||||
TRUE, ippsCopy_8u(src1, dst, size));
|
||||
|
||||
int test_copy8u_speed(void)
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <winpr/sysinfo.h>
|
||||
#include "prim_test.h"
|
||||
|
||||
static const int MEMSET8_PRETEST_ITERATIONS = 100000000;
|
||||
@ -40,12 +41,11 @@ int test_set8u_func(void)
|
||||
int failed = 0;
|
||||
int off;
|
||||
char testStr[256];
|
||||
UINT32 pflags = primitives_get_flags(primitives_get());
|
||||
testStr[0] = '\0';
|
||||
|
||||
#ifdef _M_IX86_AMD64
|
||||
/* Test SSE under various alignments */
|
||||
if (pflags & PRIM_X86_SSE2_AVAILABLE)
|
||||
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
strcat(testStr, " SSE2");
|
||||
for (off=0; off<16; ++off)
|
||||
@ -101,8 +101,7 @@ int test_set8u_func(void)
|
||||
/* ------------------------------------------------------------------------- */
|
||||
STD_SPEED_TEST(set8u_speed_test, BYTE, BYTE, dst=dst,
|
||||
TRUE, memset(dst, constant, size),
|
||||
FALSE, NULL, 0,
|
||||
FALSE, NULL, 0,
|
||||
FALSE, NULL, 0, FALSE,
|
||||
TRUE, ippsSet_8u(constant, dst, size));
|
||||
|
||||
int test_set8u_speed(void)
|
||||
@ -116,17 +115,15 @@ int test_set8u_speed(void)
|
||||
/* ------------------------------------------------------------------------- */
|
||||
int test_set32s_func(void)
|
||||
{
|
||||
primitives_t* prims = primitives_get();
|
||||
INT32 ALIGN(dest[512]);
|
||||
int failed = 0;
|
||||
int off;
|
||||
char testStr[256];
|
||||
UINT32 pflags = primitives_get_flags(prims);
|
||||
testStr[0] = '\0';
|
||||
|
||||
#ifdef _M_IX86_AMD64
|
||||
/* Test SSE under various alignments */
|
||||
if (pflags & PRIM_X86_SSE2_AVAILABLE)
|
||||
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
strcat(testStr, " SSE2");
|
||||
for (off=0; off<16; ++off) {
|
||||
@ -179,17 +176,15 @@ int test_set32s_func(void)
|
||||
/* ------------------------------------------------------------------------- */
|
||||
int test_set32u_func(void)
|
||||
{
|
||||
primitives_t* prims = primitives_get();
|
||||
UINT32 ALIGN(dest[512]);
|
||||
int failed = 0;
|
||||
int off;
|
||||
char testStr[256];
|
||||
UINT32 pflags = primitives_get_flags(prims);
|
||||
testStr[0] = '\0';
|
||||
|
||||
#ifdef _M_IX86_AMD64
|
||||
/* Test SSE under various alignments */
|
||||
if (pflags & PRIM_X86_SSE2_AVAILABLE)
|
||||
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
strcat(testStr, " SSE2");
|
||||
for (off=0; off<16; ++off) {
|
||||
@ -251,8 +246,7 @@ static inline void memset32u_naive(
|
||||
/* ------------------------------------------------------------------------- */
|
||||
STD_SPEED_TEST(set32u_speed_test, UINT32, UINT32, dst=dst,
|
||||
TRUE, memset32u_naive(constant, dst, size),
|
||||
TRUE, sse2_set_32u(constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
|
||||
FALSE, dst=dst, 0,
|
||||
TRUE, sse2_set_32u(constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
|
||||
TRUE, ipp_wrapper_set_32u(constant, dst, size));
|
||||
|
||||
int test_set32u_speed(void)
|
||||
@ -280,8 +274,7 @@ static inline void memset32s_naive(
|
||||
/* ------------------------------------------------------------------------- */
|
||||
STD_SPEED_TEST(set32s_speed_test, INT32, INT32, dst=dst,
|
||||
TRUE, memset32s_naive(constant, dst, size),
|
||||
TRUE, sse2_set_32s(constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
|
||||
FALSE, dst=dst, 0,
|
||||
TRUE, sse2_set_32s(constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
|
||||
TRUE, ippsSet_32s(constant, dst, size));
|
||||
|
||||
int test_set32s_speed(void)
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <winpr/sysinfo.h>
|
||||
#include "prim_test.h"
|
||||
|
||||
#define FUNC_TEST_SIZE 65536
|
||||
@ -55,12 +56,11 @@ int _name_(void) \
|
||||
ALIGN(d1[FUNC_TEST_SIZE+3]), ALIGN(d2[FUNC_TEST_SIZE+3]); \
|
||||
int failed = 0; \
|
||||
int i; \
|
||||
UINT32 pflags = primitives_get_flags(primitives_get()); \
|
||||
char testStr[256]; \
|
||||
testStr[0] = '\0'; \
|
||||
get_random_data(src, sizeof(src)); \
|
||||
_f1_(src+1, 3, d1+1, FUNC_TEST_SIZE); \
|
||||
if (pflags & PRIM_X86_SSE3_AVAILABLE) \
|
||||
if (IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) \
|
||||
{ \
|
||||
strcat(testStr, " SSE3"); \
|
||||
/* Aligned */ \
|
||||
@ -109,23 +109,19 @@ SHIFT_TEST_FUNC(test_rShift_16u_func, UINT16, "rshift_16u", general_rShiftC_16u,
|
||||
/* ========================================================================= */
|
||||
STD_SPEED_TEST(speed_lShift_16s, INT16, INT16, dst=dst,
|
||||
TRUE, general_lShiftC_16s(src1, constant, dst, size),
|
||||
TRUE, sse2_lShiftC_16s(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
|
||||
FALSE, dst=dst, 0,
|
||||
TRUE, sse2_lShiftC_16s(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
|
||||
TRUE, ippsLShiftC_16s(src1, constant, dst, size));
|
||||
STD_SPEED_TEST(speed_lShift_16u, UINT16, UINT16, dst=dst,
|
||||
TRUE, general_lShiftC_16u(src1, constant, dst, size),
|
||||
TRUE, sse2_lShiftC_16u(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
|
||||
FALSE, dst=dst, 0,
|
||||
TRUE, sse2_lShiftC_16u(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
|
||||
TRUE, ippsLShiftC_16u(src1, constant, dst, size));
|
||||
STD_SPEED_TEST(speed_rShift_16s, INT16, INT16, dst=dst,
|
||||
TRUE, general_rShiftC_16s(src1, constant, dst, size),
|
||||
TRUE, sse2_rShiftC_16s(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
|
||||
FALSE, dst=dst, 0,
|
||||
TRUE, sse2_rShiftC_16s(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
|
||||
TRUE, ippsRShiftC_16s(src1, constant, dst, size));
|
||||
STD_SPEED_TEST(speed_rShift_16u, UINT16, UINT16, dst=dst,
|
||||
TRUE, general_rShiftC_16u(src1, constant, dst, size),
|
||||
TRUE, sse2_rShiftC_16u(src1, constant, dst, size), PRIM_X86_SSE2_AVAILABLE,
|
||||
FALSE, dst=dst, 0,
|
||||
TRUE, sse2_rShiftC_16u(src1, constant, dst, size), PF_XMMI64_INSTRUCTIONS_AVAILABLE, FALSE,
|
||||
TRUE, ippsRShiftC_16u(src1, constant, dst, size));
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <winpr/sysinfo.h>
|
||||
#include "prim_test.h"
|
||||
|
||||
static const int SIGN_PRETEST_ITERATIONS = 100000;
|
||||
@ -30,7 +31,6 @@ int test_sign16s_func(void)
|
||||
INT16 ALIGN(src[65535]), ALIGN(d1[65535]), ALIGN(d2[65535]);
|
||||
int failed = 0;
|
||||
int i;
|
||||
UINT32 pflags = primitives_get_flags(primitives_get());
|
||||
char testStr[256];
|
||||
|
||||
/* Test when we can reach 16-byte alignment */
|
||||
@ -38,7 +38,7 @@ int test_sign16s_func(void)
|
||||
get_random_data(src, sizeof(src));
|
||||
general_sign_16s(src+1, d1+1, 65535);
|
||||
#ifdef _M_IX86_AMD64
|
||||
if (pflags & PRIM_X86_SSSE3_AVAILABLE)
|
||||
if (IsProcessorFeaturePresentEx(PF_EX_SSSE3))
|
||||
{
|
||||
strcat(testStr, " SSSE3");
|
||||
ssse3_sign_16s(src+1, d2+1, 65535);
|
||||
@ -58,7 +58,7 @@ int test_sign16s_func(void)
|
||||
get_random_data(src, sizeof(src));
|
||||
general_sign_16s(src+1, d1+2, 65535);
|
||||
#ifdef _M_IX86_AMD64
|
||||
if (pflags & PRIM_X86_SSSE3_AVAILABLE)
|
||||
if (IsProcessorFeaturePresentEx(PF_EX_SSSE3))
|
||||
{
|
||||
ssse3_sign_16s(src+1, d2+2, 65535);
|
||||
for (i=2; i<65535; ++i)
|
||||
@ -79,8 +79,7 @@ int test_sign16s_func(void)
|
||||
/* ------------------------------------------------------------------------- */
|
||||
STD_SPEED_TEST(sign16s_speed_test, INT16, INT16, dst=dst,
|
||||
TRUE, general_sign_16s(src1, dst, size),
|
||||
TRUE, ssse3_sign_16s(src1, dst, size), PRIM_X86_SSSE3_AVAILABLE,
|
||||
FALSE, dst=dst, 0,
|
||||
TRUE, ssse3_sign_16s(src1, dst, size), PF_EX_SSSE3, TRUE,
|
||||
FALSE, dst=dst);
|
||||
|
||||
int test_sign16s_speed(void)
|
||||
|
Loading…
Reference in New Issue
Block a user