diff --git a/include/freerdp/primitives.h b/include/freerdp/primitives.h index 140c90bb2..73e911a60 100644 --- a/include/freerdp/primitives.h +++ b/include/freerdp/primitives.h @@ -209,7 +209,7 @@ extern "C" FREERDP_API primitives_t* primitives_get_generic(void); FREERDP_API DWORD primitives_flags(primitives_t* p); FREERDP_API BOOL primitives_init(primitives_t* p, primitive_hints hints); - FREERDP_API void primitives_uninit(); + FREERDP_API void primitives_uninit(void); #ifdef __cplusplus } diff --git a/libfreerdp/primitives/prim_YUV_opencl.c b/libfreerdp/primitives/prim_YUV_opencl.c index f35f9f980..3d87f5576 100644 --- a/libfreerdp/primitives/prim_YUV_opencl.c +++ b/libfreerdp/primitives/prim_YUV_opencl.c @@ -48,12 +48,12 @@ typedef struct static primitives_opencl_context* primitives_get_opencl_context(void); -static pstatus_t opencl_YUV420ToRGB(const char* kernelName, const BYTE* pSrc[3], - const UINT32 srcStep[3], BYTE* pDst, UINT32 dstStep, - const prim_size_t* roi) +static pstatus_t opencl_YUVToRGB(const char* kernelName, const BYTE* pSrc[3], + const UINT32 srcStep[3], BYTE* pDst, UINT32 dstStep, + const prim_size_t* roi) { cl_int ret; - int i; + cl_uint i; cl_mem objs[3] = { NULL, NULL, NULL }; cl_mem destObj; cl_kernel kernel; @@ -98,14 +98,14 @@ static pstatus_t opencl_YUV420ToRGB(const char* kernelName, const BYTE* pSrc[3], /* push source + stride arguments*/ for (i = 0; i < 3; i++) { - ret = clSetKernelArg(kernel, i * 2, sizeof(cl_mem), (void*)&objs[i]); + ret = clSetKernelArg(kernel, i * 2, sizeof(cl_mem), &objs[i]); if (ret != CL_SUCCESS) { WLog_ERR(TAG, "unable to set arg for %sobj", sourceNames[i]); goto error_set_args; } - ret = clSetKernelArg(kernel, i * 2 + 1, sizeof(cl_int), (void*)&srcStep[i]); + ret = clSetKernelArg(kernel, i * 2 + 1, sizeof(cl_int), &srcStep[i]); if (ret != CL_SUCCESS) { WLog_ERR(TAG, "unable to set arg stride for %sobj", sourceNames[i]); @@ -113,14 +113,14 @@ static pstatus_t opencl_YUV420ToRGB(const char* kernelName, const BYTE* pSrc[3], } } - ret = clSetKernelArg(kernel, 6, sizeof(cl_mem), (void*)&destObj); + ret = clSetKernelArg(kernel, 6, sizeof(cl_mem), &destObj); if (ret != CL_SUCCESS) { WLog_ERR(TAG, "unable to set arg destObj"); goto error_set_args; } - ret = clSetKernelArg(kernel, 7, sizeof(cl_int), (void*)&dstStep); + ret = clSetKernelArg(kernel, 7, sizeof(cl_int), &dstStep); if (ret != CL_SUCCESS) { WLog_ERR(TAG, "unable to set arg dstStep"); @@ -168,12 +168,12 @@ error_objs: static primitives_opencl_context openclContext; -primitives_opencl_context* primitives_get_opencl_context(void) +static primitives_opencl_context* primitives_get_opencl_context(void) { return &openclContext; } -pstatus_t primitives_uninit_opencl(void) +static pstatus_t primitives_uninit_opencl(void) { if (!openclContext.support) return PRIMITIVES_SUCCESS; @@ -190,7 +190,7 @@ static const char* openclProgram = #include "primitives.cl" ; -BOOL primitives_init_opencl_context(primitives_opencl_context* cl) +static BOOL primitives_init_opencl_context(primitives_opencl_context* cl) { cl_platform_id* platform_ids = NULL; cl_uint ndevices, nplatforms, i; @@ -322,17 +322,6 @@ out_program_create: return FALSE; } -BOOL primitives_init_opencl(primitives_t* prims) -{ - if (!primitives_init_opencl_context(&openclContext)) - return FALSE; - - primitives_init_YUV_opencl(prims); - prims->flags |= PRIM_FLAGS_HAVE_EXTGPU; - prims->uninit = primitives_uninit_opencl; - return TRUE; -} - static pstatus_t opencl_YUV420ToRGB_8u_P3AC4R(const BYTE* pSrc[3], const UINT32 srcStep[3], BYTE* pDst, UINT32 dstStep, UINT32 DstFormat, const prim_size_t* roi) @@ -352,18 +341,27 @@ static pstatus_t opencl_YUV420ToRGB_8u_P3AC4R(const BYTE* pSrc[3], const UINT32 default: { primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU); - if (!p) - p = primitives_get_by_type(PRIMITIVES_PURE_SOFT); if (!p) return -1; return p->YUV420ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi); } } - return opencl_YUV420ToRGB(kernel_name, pSrc, srcStep, pDst, dstStep, roi); + return opencl_YUVToRGB(kernel_name, pSrc, srcStep, pDst, dstStep, roi); } -void primitives_init_YUV_opencl(primitives_t* prims) +BOOL primitives_init_opencl(primitives_t* prims) { + primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU); + if (!prims || !p) + return FALSE; + *prims = *p; + + if (!primitives_init_opencl_context(&openclContext)) + return FALSE; + prims->YUV420ToRGB_8u_P3AC4R = opencl_YUV420ToRGB_8u_P3AC4R; + prims->flags |= PRIM_FLAGS_HAVE_EXTGPU; + prims->uninit = primitives_uninit_opencl; + return TRUE; } diff --git a/libfreerdp/primitives/prim_internal.h b/libfreerdp/primitives/prim_internal.h index a13b5b1e1..f0ea9ce2f 100644 --- a/libfreerdp/primitives/prim_internal.h +++ b/libfreerdp/primitives/prim_internal.h @@ -209,9 +209,6 @@ FREERDP_LOCAL void primitives_init_YUV_opt(primitives_t* prims); #if defined(WITH_OPENCL) FREERDP_LOCAL BOOL primitives_init_opencl(primitives_t* prims); -FREERDP_LOCAL pstatus_t primitives_uninit_opencl(void); - -FREERDP_LOCAL void primitives_init_YUV_opencl(primitives_t* prims); #endif FREERDP_LOCAL primitives_t* primitives_get_by_type(DWORD type); diff --git a/libfreerdp/primitives/primitives.c b/libfreerdp/primitives/primitives.c index 96ffd38ce..f186aa0c1 100644 --- a/libfreerdp/primitives/primitives.c +++ b/libfreerdp/primitives/primitives.c @@ -173,7 +173,8 @@ static BOOL primitives_YUV_benchmark_run(primitives_YUV_benchmark* bench, primit { ULONGLONG dueDate; const BYTE* channels[3]; - int i; + size_t i; + pstatus_t status; *computations = 0; @@ -181,9 +182,8 @@ static BOOL primitives_YUV_benchmark_run(primitives_YUV_benchmark* bench, primit channels[i] = bench->channels[i]; /* do a first dry run to initialize cache and such */ - pstatus_t status = - prims->YUV420ToRGB_8u_P3AC4R(channels, bench->steps, bench->outputBuffer, - bench->outputStride, bench->testedFormat, &bench->roi); + status = prims->YUV420ToRGB_8u_P3AC4R(channels, bench->steps, bench->outputBuffer, + bench->outputStride, bench->testedFormat, &bench->roi); if (status != PRIMITIVES_SUCCESS) return FALSE; @@ -203,78 +203,64 @@ static BOOL primitives_YUV_benchmark_run(primitives_YUV_benchmark* bench, primit static BOOL primitives_autodetect_best(primitives_t* prims) { + size_t x; BOOL ret = FALSE; UINT64 benchDuration = 150; /* 150 ms */ - UINT32 genericCount = 0; - UINT32 bestCount; - primitives_t* genericPrims = primitives_get_generic(); + struct prim_benchmark + { + const char* name; + primitives_t* prims; + UINT32 flags; + UINT32 count; + }; + + struct prim_benchmark testcases[] = + { + { "generic", NULL, PRIMITIVES_PURE_SOFT, 0 }, #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES) - primitives_t* optimizedPrims = primitives_get_by_type(PRIMITIVES_ONLY_CPU); - UINT32 optimizedCount = 0; + { "optimized", NULL, PRIMITIVES_ONLY_CPU, 0 }, #endif #if defined(WITH_OPENCL) - primitives_t* openclPrims = primitives_get_by_type(PRIMITIVES_ONLY_GPU); - UINT32 openclCount = 0; + { "opencl", NULL, PRIMITIVES_ONLY_GPU, 0 }, #endif - const char* primName = "generic"; + }; + const struct prim_benchmark* best = NULL; + primitives_YUV_benchmark bench; primitives_YUV_benchmark* yuvBench = primitives_YUV_benchmark_init(&bench); if (!yuvBench) return FALSE; - if (!primitives_YUV_benchmark_run(yuvBench, genericPrims, benchDuration, &genericCount)) - { - WLog_ERR(TAG, "error running generic YUV bench"); - goto out; - } - -#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES) - if (!primitives_YUV_benchmark_run(yuvBench, optimizedPrims, benchDuration, &optimizedCount)) - { - WLog_ERR(TAG, "error running optimized YUV bench"); - goto out; - } -#endif - -#if defined(WITH_OPENCL) - if (!primitives_YUV_benchmark_run(yuvBench, openclPrims, benchDuration, &openclCount)) - { - WLog_ERR(TAG, "error running opencl YUV bench"); - goto out; - } -#endif - - /* finally compute the results */ - bestCount = genericCount; - *prims = *genericPrims; - -#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES) - if (bestCount < optimizedCount) - { - bestCount = optimizedCount; - *prims = *optimizedPrims; - primName = "optimized"; - } -#endif - -#if defined(WITH_OPENCL) - if (bestCount < openclCount) - { - bestCount = openclCount; - *prims = *openclPrims; - primName = "openCL"; - } -#endif - WLog_DBG(TAG, "primitives benchmark result:"); - WLog_DBG(TAG, " * generic=%" PRIu32, genericCount); -#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES) - WLog_DBG(TAG, " * optimized=%" PRIu32, optimizedCount); -#endif -#if defined(WITH_OPENCL) - WLog_DBG(TAG, " * openCL=%" PRIu32, openclCount); -#endif - WLog_INFO(TAG, "primitives autodetect, using %s", primName); + for (x = 0; x < ARRAYSIZE(testcases); x++) + { + struct prim_benchmark* cur = &testcases[x]; + cur->prims = primitives_get_by_type(cur->flags); + if (!cur->prims) + { + WLog_ERR(TAG, "Failed to initialize %s primitives", cur->name); + goto out; + } + if (!primitives_YUV_benchmark_run(yuvBench, cur->prims, benchDuration, &cur->count)) + { + WLog_ERR(TAG, "error running %s YUV bench", cur->name); + goto out; + } + + WLog_DBG(TAG, " * %s\t= %" PRIu32, cur->name, cur->count); + if (!best || (best->count < cur->count)) + best = cur; + } + + if (!best) + { + WLog_ERR(TAG, "No primitives to test, aborting."); + goto out; + } + /* finally compute the results */ + *prims = *best->prims; + + WLog_INFO(TAG, "primitives autodetect, using %s", best->name); ret = TRUE; out: primitives_YUV_benchmark_free(yuvBench); @@ -288,9 +274,6 @@ static BOOL CALLBACK primitives_init_gpu_cb(PINIT_ONCE once, PVOID param, PVOID* WINPR_UNUSED(param); WINPR_UNUSED(context); - if (!primitives_init_optimized(&pPrimitivesGpu)) - return FALSE; - if (!primitives_init_opencl(&pPrimitivesGpu)) return FALSE; @@ -305,8 +288,10 @@ static BOOL CALLBACK primitives_init_cpu_cb(PINIT_ONCE once, PVOID param, PVOID* WINPR_UNUSED(param); WINPR_UNUSED(context); + if (!primitives_init_optimized(&pPrimitivesCpu)) + return FALSE; - return primitives_init_optimized(&pPrimitivesCpu); + return TRUE; } #endif @@ -331,16 +316,12 @@ BOOL primitives_init(primitives_t* p, primitive_hints hints) case PRIMITIVES_ONLY_CPU: #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES) *p = pPrimitivesCpu; -#else - *p = pPrimitivesGeneric; -#endif return TRUE; +#endif case PRIMITIVES_ONLY_GPU: #if defined(WITH_OPENCL) *p = pPrimitivesGpu; return TRUE; -#else - return FALSE; #endif default: WLog_ERR(TAG, "unknown hint %d", hints); @@ -348,7 +329,7 @@ BOOL primitives_init(primitives_t* p, primitive_hints hints) } } -void primitives_uninit() +void primitives_uninit(void) { #if defined(WITH_OPENCL) if (pPrimitivesGpu.uninit) @@ -395,18 +376,16 @@ primitives_t* primitives_get_by_type(DWORD type) { case PRIMITIVES_ONLY_GPU: #if defined(WITH_OPENCL) - if (InitOnceExecuteOnce(&gpu_primitives_InitOnce, primitives_init_gpu_cb, NULL, NULL)) - return &pPrimitivesGpu; + if (!InitOnceExecuteOnce(&gpu_primitives_InitOnce, primitives_init_gpu_cb, NULL, NULL)) + return NULL; + return &pPrimitivesGpu; #endif - return NULL; - case PRIMITIVES_ONLY_CPU: #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES) - if (InitOnceExecuteOnce(&cpu_primitives_InitOnce, primitives_init_cpu_cb, NULL, NULL)) - return &pPrimitivesCpu; + if (!InitOnceExecuteOnce(&cpu_primitives_InitOnce, primitives_init_cpu_cb, NULL, NULL)) + return NULL; + return &pPrimitivesCpu; #endif - return NULL; - case PRIMITIVES_PURE_SOFT: default: return &pPrimitivesGeneric;