/* test_set.c * vi:ts=4 sw=4 * * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. * Licensed under the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. You may obtain * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing * permissions and limitations under the License. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "prim_test.h" static const int MEMSET8_PRETEST_ITERATIONS = 100000000; static const int MEMSET32_PRETEST_ITERATIONS = 40000000; static const float TEST_TIME = 1.0; extern pstatus_t general_set_8u(BYTE val, BYTE *pDst, int len); extern pstatus_t sse2_set_8u(BYTE val, BYTE *pDst, int len); extern pstatus_t general_set_32s(INT32 val, INT32 *pDst, int len); extern pstatus_t sse2_set_32s(INT32 val, INT32 *pDst, int len); extern pstatus_t general_set_32u(UINT32 val, UINT32 *pDst, int len); extern pstatus_t sse2_set_32u(UINT32 val, UINT32 *pDst, int len); extern pstatus_t ipp_wrapper_set_32u(UINT32 val, UINT32 *pDst, int len); static const int set_sizes[] = { 1, 4, 16, 32, 64, 256, 1024, 4096 }; #define NUM_SET_SIZES (sizeof(set_sizes)/sizeof(int)) /* ------------------------------------------------------------------------- */ int test_set8u_func(void) { BYTE ALIGN(dest[48]); int failed = 0; int off; char testStr[256]; UINT32 pflags = primitives_get_flags(primitives_get()); testStr[0] = '\0'; #ifdef _M_IX86_AMD64 /* Test SSE under various alignments */ if (pflags & PRIM_X86_SSE2_AVAILABLE) { strcat(testStr, " SSE2"); for (off=0; off<16; ++off) { int len; for (len=1; len<48-off; ++len) { int i; memset(dest, 0, sizeof(dest)); sse2_set_8u(0xa5, dest+off, len); for (i=0; i 0) ? FAILURE : SUCCESS; } /* ------------------------------------------------------------------------- */ STD_SPEED_TEST(set8u_speed_test, BYTE, BYTE, dst=dst, TRUE, memset(dst, constant, size), FALSE, NULL, 0, FALSE, NULL, 0, TRUE, ippsSet_8u(constant, dst, size)); int test_set8u_speed(void) { BYTE ALIGN(dst[MAX_TEST_SIZE]); set8u_speed_test("set8u", "aligned", NULL, NULL, 0xA5, dst, set_sizes, NUM_SET_SIZES, MEMSET8_PRETEST_ITERATIONS, TEST_TIME); return SUCCESS; } /* ------------------------------------------------------------------------- */ int test_set32s_func(void) { primitives_t* prims = primitives_get(); INT32 ALIGN(dest[512]); int failed = 0; int off; char testStr[256]; UINT32 pflags = primitives_get_flags(prims); testStr[0] = '\0'; #ifdef _M_IX86_AMD64 /* Test SSE under various alignments */ if (pflags & PRIM_X86_SSE2_AVAILABLE) { strcat(testStr, " SSE2"); for (off=0; off<16; ++off) { int len; for (len=1; len<512-off; ++len) { int i; memset(dest, 0, sizeof(dest)); sse2_set_32s(0xdeadbeef, dest+off, len); for (i=0; i 0) ? FAILURE : SUCCESS; } /* ------------------------------------------------------------------------- */ int test_set32u_func(void) { primitives_t* prims = primitives_get(); UINT32 ALIGN(dest[512]); int failed = 0; int off; char testStr[256]; UINT32 pflags = primitives_get_flags(prims); testStr[0] = '\0'; #ifdef _M_IX86_AMD64 /* Test SSE under various alignments */ if (pflags & PRIM_X86_SSE2_AVAILABLE) { strcat(testStr, " SSE2"); for (off=0; off<16; ++off) { int len; for (len=1; len<512-off; ++len) { int i; memset(dest, 0, sizeof(dest)); sse2_set_32u(0xdeadbeefU, dest+off, len); for (i=0; i 0) ? FAILURE : SUCCESS; } /* ------------------------------------------------------------------------- */ static inline void memset32u_naive( UINT32 val, UINT32 *dst, size_t count) { while (count--) *dst++ = val; } /* ------------------------------------------------------------------------- */ STD_SPEED_TEST(set32u_speed_test, UINT32, UINT32, dst=dst, TRUE, memset32u_naive(constant, dst, size), TRUE, sse2_set_32u(constant, dst, size), PRIM_X86_SSE2_AVAILABLE, FALSE, dst=dst, 0, TRUE, ipp_wrapper_set_32u(constant, dst, size)); int test_set32u_speed(void) { UINT32 ALIGN(dst[MAX_TEST_SIZE+1]); set32u_speed_test("set32u", "aligned", NULL, NULL, 0xdeadbeef, dst, set_sizes, NUM_SET_SIZES, MEMSET32_PRETEST_ITERATIONS, TEST_TIME); #if 0 /* Not really necessary; should be almost as fast. */ set32u_speed_test("set32u", "unaligned", NULL, NULL, dst+1, set_sizes, NUM_SET_SIZES, MEMSET32_PRETEST_ITERATIONS, TEST_TIME); #endif return SUCCESS; } /* ------------------------------------------------------------------------- */ static inline void memset32s_naive( INT32 val, INT32 *dst, size_t count) { while (count--) *dst++ = val; } /* ------------------------------------------------------------------------- */ STD_SPEED_TEST(set32s_speed_test, INT32, INT32, dst=dst, TRUE, memset32s_naive(constant, dst, size), TRUE, sse2_set_32s(constant, dst, size), PRIM_X86_SSE2_AVAILABLE, FALSE, dst=dst, 0, TRUE, ippsSet_32s(constant, dst, size)); int test_set32s_speed(void) { INT32 ALIGN(dst[MAX_TEST_SIZE+1]); set32s_speed_test("set32s", "aligned", NULL, NULL, 0xdeadbeef, dst, set_sizes, NUM_SET_SIZES, MEMSET32_PRETEST_ITERATIONS, TEST_TIME); #if 0 /* Not really necessary; should be almost as fast. */ set32s_speed_test("set32s", "unaligned", NULL, NULL, dst+1, set_sizes, NUM_SET_SIZES, MEMSET32_PRETEST_ITERATIONS, TEST_TIME); #endif return SUCCESS; }