/* primitives.c * This code queries processor features and calls the init/deinit routines. * vi:ts=4 sw=4 * * Copyright 2011 Martin Fleisz * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. * Copyright 2019 David Fort * * Licensed under the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. You may obtain * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing * permissions and limitations under the License. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include #include #include #include #include "prim_internal.h" #define TAG FREERDP_TAG("primitives") /* hints to know which kind of primitives to use */ static primitive_hints primitivesHints = PRIMITIVES_AUTODETECT; static BOOL primitives_init_optimized(primitives_t* prims); void primitives_set_hints(primitive_hints hints) { primitivesHints = hints; } primitive_hints primitives_get_hints(void) { return primitivesHints; } /* Singleton pointer used throughout the program when requested. */ static primitives_t pPrimitivesGeneric = { 0 }; static INIT_ONCE generic_primitives_InitOnce = INIT_ONCE_STATIC_INIT; #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES) static primitives_t pPrimitivesCpu = { 0 }; static INIT_ONCE cpu_primitives_InitOnce = INIT_ONCE_STATIC_INIT; #endif #if defined(WITH_OPENCL) static primitives_t pPrimitivesGpu = { 0 }; static INIT_ONCE gpu_primitives_InitOnce = INIT_ONCE_STATIC_INIT; #endif static INIT_ONCE auto_primitives_InitOnce = INIT_ONCE_STATIC_INIT; static primitives_t pPrimitives = { 0 }; /* ------------------------------------------------------------------------- */ static BOOL primitives_init_generic(primitives_t* prims) { primitives_init_add(prims); primitives_init_andor(prims); primitives_init_alphaComp(prims); primitives_init_copy(prims); primitives_init_set(prims); primitives_init_shift(prims); primitives_init_sign(prims); primitives_init_colors(prims); primitives_init_YCoCg(prims); primitives_init_YUV(prims); prims->uninit = NULL; return TRUE; } static BOOL CALLBACK primitives_init_generic_cb(PINIT_ONCE once, PVOID param, PVOID* context) { WINPR_UNUSED(once); WINPR_UNUSED(param); WINPR_UNUSED(context); return primitives_init_generic(&pPrimitivesGeneric); } static BOOL primitives_init_optimized(primitives_t* prims) { primitives_init_generic(prims); #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES) primitives_init_add_opt(prims); primitives_init_andor_opt(prims); primitives_init_alphaComp_opt(prims); primitives_init_copy_opt(prims); primitives_init_set_opt(prims); primitives_init_shift_opt(prims); primitives_init_sign_opt(prims); primitives_init_colors_opt(prims); primitives_init_YCoCg_opt(prims); primitives_init_YUV_opt(prims); prims->flags |= PRIM_FLAGS_HAVE_EXTCPU; #endif return TRUE; } typedef struct { BYTE* channels[3]; UINT32 steps[3]; prim_size_t roi; BYTE* outputBuffer; UINT32 outputStride; UINT32 testedFormat; } primitives_YUV_benchmark; static void primitives_YUV_benchmark_free(primitives_YUV_benchmark* bench) { int i; if (!bench) return; free(bench->outputBuffer); for (i = 0; i < 3; i++) free(bench->channels[i]); memset(bench, 0, sizeof(primitives_YUV_benchmark)); } static primitives_YUV_benchmark* primitives_YUV_benchmark_init(primitives_YUV_benchmark* ret) { int i; prim_size_t* roi; if (!ret) return NULL; memset(ret, 0, sizeof(primitives_YUV_benchmark)); roi = &ret->roi; roi->width = 1024; roi->height = 768; ret->outputStride = roi->width * 4; ret->testedFormat = PIXEL_FORMAT_BGRA32; ret->outputBuffer = calloc(ret->outputStride, roi->height); if (!ret->outputBuffer) goto fail; for (i = 0; i < 3; i++) { BYTE* buf = ret->channels[i] = calloc(roi->width, roi->height); if (!buf) goto fail; winpr_RAND(buf, roi->width * roi->height * 1ULL); ret->steps[i] = roi->width; } return ret; fail: primitives_YUV_benchmark_free(ret); return ret; } static BOOL primitives_YUV_benchmark_run(primitives_YUV_benchmark* bench, primitives_t* prims, UINT64 runTime, UINT32* computations) { ULONGLONG dueDate; const BYTE* channels[3] = { 0 }; size_t i; pstatus_t status; *computations = 0; for (i = 0; i < 3; i++) channels[i] = bench->channels[i]; /* do a first dry run to initialize cache and such */ status = prims->YUV420ToRGB_8u_P3AC4R(channels, bench->steps, bench->outputBuffer, bench->outputStride, bench->testedFormat, &bench->roi); if (status != PRIMITIVES_SUCCESS) return FALSE; /* let's run the benchmark */ dueDate = GetTickCount64() + runTime; while (GetTickCount64() < dueDate) { pstatus_t status = prims->YUV420ToRGB_8u_P3AC4R(channels, bench->steps, bench->outputBuffer, bench->outputStride, bench->testedFormat, &bench->roi); if (status != PRIMITIVES_SUCCESS) return FALSE; *computations = *computations + 1; } return TRUE; } static BOOL primitives_autodetect_best(primitives_t* prims) { size_t x; BOOL ret = FALSE; UINT64 benchDuration = 150; /* 150 ms */ struct prim_benchmark { const char* name; primitives_t* prims; UINT32 flags; UINT32 count; }; struct prim_benchmark testcases[] = { { "generic", NULL, PRIMITIVES_PURE_SOFT, 0 }, #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES) { "optimized", NULL, PRIMITIVES_ONLY_CPU, 0 }, #endif #if defined(WITH_OPENCL) { "opencl", NULL, PRIMITIVES_ONLY_GPU, 0 }, #endif }; const struct prim_benchmark* best = NULL; primitives_YUV_benchmark bench; primitives_YUV_benchmark* yuvBench = primitives_YUV_benchmark_init(&bench); if (!yuvBench) return FALSE; WLog_DBG(TAG, "primitives benchmark result:"); for (x = 0; x < ARRAYSIZE(testcases); x++) { struct prim_benchmark* cur = &testcases[x]; cur->prims = primitives_get_by_type(cur->flags); if (!cur->prims) { WLog_WARN(TAG, "Failed to initialize %s primitives", cur->name); continue; } if (!primitives_YUV_benchmark_run(yuvBench, cur->prims, benchDuration, &cur->count)) { WLog_WARN(TAG, "error running %s YUV bench", cur->name); continue; } WLog_DBG(TAG, " * %s= %" PRIu32, cur->name, cur->count); if (!best || (best->count < cur->count)) best = cur; } if (!best) { WLog_ERR(TAG, "No primitives to test, aborting."); goto out; } /* finally compute the results */ *prims = *best->prims; WLog_INFO(TAG, "primitives autodetect, using %s", best->name); ret = TRUE; out: if (!ret) *prims = pPrimitivesGeneric; primitives_YUV_benchmark_free(yuvBench); return ret; } #if defined(WITH_OPENCL) static BOOL CALLBACK primitives_init_gpu_cb(PINIT_ONCE once, PVOID param, PVOID* context) { WINPR_UNUSED(once); WINPR_UNUSED(param); WINPR_UNUSED(context); if (!primitives_init_opencl(&pPrimitivesGpu)) return FALSE; return TRUE; } #endif #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES) static BOOL CALLBACK primitives_init_cpu_cb(PINIT_ONCE once, PVOID param, PVOID* context) { WINPR_UNUSED(once); WINPR_UNUSED(param); WINPR_UNUSED(context); if (!primitives_init_optimized(&pPrimitivesCpu)) return FALSE; return TRUE; } #endif static BOOL CALLBACK primitives_auto_init_cb(PINIT_ONCE once, PVOID param, PVOID* context) { WINPR_UNUSED(once); WINPR_UNUSED(param); WINPR_UNUSED(context); return primitives_init(&pPrimitives, primitivesHints); } BOOL primitives_init(primitives_t* p, primitive_hints hints) { switch (hints) { case PRIMITIVES_AUTODETECT: return primitives_autodetect_best(p); case PRIMITIVES_PURE_SOFT: *p = pPrimitivesGeneric; return TRUE; case PRIMITIVES_ONLY_CPU: #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES) *p = pPrimitivesCpu; return TRUE; #endif case PRIMITIVES_ONLY_GPU: #if defined(WITH_OPENCL) *p = pPrimitivesGpu; return TRUE; #endif default: WLog_ERR(TAG, "unknown hint %d", hints); return FALSE; } } void primitives_uninit(void) { #if defined(WITH_OPENCL) if (pPrimitivesGpu.uninit) pPrimitivesGpu.uninit(); #endif #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES) if (pPrimitivesCpu.uninit) pPrimitivesCpu.uninit(); #endif if (pPrimitivesGeneric.uninit) pPrimitivesGeneric.uninit(); } /* ------------------------------------------------------------------------- */ static void setup(void) { InitOnceExecuteOnce(&generic_primitives_InitOnce, primitives_init_generic_cb, NULL, NULL); #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES) InitOnceExecuteOnce(&cpu_primitives_InitOnce, primitives_init_cpu_cb, NULL, NULL); #endif #if defined(WITH_OPENCL) InitOnceExecuteOnce(&gpu_primitives_InitOnce, primitives_init_gpu_cb, NULL, NULL); #endif InitOnceExecuteOnce(&auto_primitives_InitOnce, primitives_auto_init_cb, NULL, NULL); } primitives_t* primitives_get(void) { setup(); return &pPrimitives; } primitives_t* primitives_get_generic(void) { InitOnceExecuteOnce(&generic_primitives_InitOnce, primitives_init_generic_cb, NULL, NULL); return &pPrimitivesGeneric; } primitives_t* primitives_get_by_type(DWORD type) { InitOnceExecuteOnce(&generic_primitives_InitOnce, primitives_init_generic_cb, NULL, NULL); switch (type) { case PRIMITIVES_ONLY_GPU: #if defined(WITH_OPENCL) if (!InitOnceExecuteOnce(&gpu_primitives_InitOnce, primitives_init_gpu_cb, NULL, NULL)) return NULL; return &pPrimitivesGpu; #endif case PRIMITIVES_ONLY_CPU: #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES) if (!InitOnceExecuteOnce(&cpu_primitives_InitOnce, primitives_init_cpu_cb, NULL, NULL)) return NULL; return &pPrimitivesCpu; #endif case PRIMITIVES_PURE_SOFT: default: return &pPrimitivesGeneric; } } DWORD primitives_flags(primitives_t* p) { return p->flags; }