Fixed intialization of opencl primitives and warnings.

This commit is contained in:
Armin Novak 2019-11-13 16:35:03 +01:00 committed by akallabeth
parent 6123920a2e
commit b3a3a6b9c2
4 changed files with 86 additions and 112 deletions

View File

@ -209,7 +209,7 @@ extern "C"
FREERDP_API primitives_t* primitives_get_generic(void); FREERDP_API primitives_t* primitives_get_generic(void);
FREERDP_API DWORD primitives_flags(primitives_t* p); FREERDP_API DWORD primitives_flags(primitives_t* p);
FREERDP_API BOOL primitives_init(primitives_t* p, primitive_hints hints); FREERDP_API BOOL primitives_init(primitives_t* p, primitive_hints hints);
FREERDP_API void primitives_uninit(); FREERDP_API void primitives_uninit(void);
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -48,12 +48,12 @@ typedef struct
static primitives_opencl_context* primitives_get_opencl_context(void); static primitives_opencl_context* primitives_get_opencl_context(void);
static pstatus_t opencl_YUV420ToRGB(const char* kernelName, const BYTE* pSrc[3], static pstatus_t opencl_YUVToRGB(const char* kernelName, const BYTE* pSrc[3],
const UINT32 srcStep[3], BYTE* pDst, UINT32 dstStep, const UINT32 srcStep[3], BYTE* pDst, UINT32 dstStep,
const prim_size_t* roi) const prim_size_t* roi)
{ {
cl_int ret; cl_int ret;
int i; cl_uint i;
cl_mem objs[3] = { NULL, NULL, NULL }; cl_mem objs[3] = { NULL, NULL, NULL };
cl_mem destObj; cl_mem destObj;
cl_kernel kernel; cl_kernel kernel;
@ -98,14 +98,14 @@ static pstatus_t opencl_YUV420ToRGB(const char* kernelName, const BYTE* pSrc[3],
/* push source + stride arguments*/ /* push source + stride arguments*/
for (i = 0; i < 3; i++) for (i = 0; i < 3; i++)
{ {
ret = clSetKernelArg(kernel, i * 2, sizeof(cl_mem), (void*)&objs[i]); ret = clSetKernelArg(kernel, i * 2, sizeof(cl_mem), &objs[i]);
if (ret != CL_SUCCESS) if (ret != CL_SUCCESS)
{ {
WLog_ERR(TAG, "unable to set arg for %sobj", sourceNames[i]); WLog_ERR(TAG, "unable to set arg for %sobj", sourceNames[i]);
goto error_set_args; goto error_set_args;
} }
ret = clSetKernelArg(kernel, i * 2 + 1, sizeof(cl_int), (void*)&srcStep[i]); ret = clSetKernelArg(kernel, i * 2 + 1, sizeof(cl_int), &srcStep[i]);
if (ret != CL_SUCCESS) if (ret != CL_SUCCESS)
{ {
WLog_ERR(TAG, "unable to set arg stride for %sobj", sourceNames[i]); WLog_ERR(TAG, "unable to set arg stride for %sobj", sourceNames[i]);
@ -113,14 +113,14 @@ static pstatus_t opencl_YUV420ToRGB(const char* kernelName, const BYTE* pSrc[3],
} }
} }
ret = clSetKernelArg(kernel, 6, sizeof(cl_mem), (void*)&destObj); ret = clSetKernelArg(kernel, 6, sizeof(cl_mem), &destObj);
if (ret != CL_SUCCESS) if (ret != CL_SUCCESS)
{ {
WLog_ERR(TAG, "unable to set arg destObj"); WLog_ERR(TAG, "unable to set arg destObj");
goto error_set_args; goto error_set_args;
} }
ret = clSetKernelArg(kernel, 7, sizeof(cl_int), (void*)&dstStep); ret = clSetKernelArg(kernel, 7, sizeof(cl_int), &dstStep);
if (ret != CL_SUCCESS) if (ret != CL_SUCCESS)
{ {
WLog_ERR(TAG, "unable to set arg dstStep"); WLog_ERR(TAG, "unable to set arg dstStep");
@ -168,12 +168,12 @@ error_objs:
static primitives_opencl_context openclContext; static primitives_opencl_context openclContext;
primitives_opencl_context* primitives_get_opencl_context(void) static primitives_opencl_context* primitives_get_opencl_context(void)
{ {
return &openclContext; return &openclContext;
} }
pstatus_t primitives_uninit_opencl(void) static pstatus_t primitives_uninit_opencl(void)
{ {
if (!openclContext.support) if (!openclContext.support)
return PRIMITIVES_SUCCESS; return PRIMITIVES_SUCCESS;
@ -190,7 +190,7 @@ static const char* openclProgram =
#include "primitives.cl" #include "primitives.cl"
; ;
BOOL primitives_init_opencl_context(primitives_opencl_context* cl) static BOOL primitives_init_opencl_context(primitives_opencl_context* cl)
{ {
cl_platform_id* platform_ids = NULL; cl_platform_id* platform_ids = NULL;
cl_uint ndevices, nplatforms, i; cl_uint ndevices, nplatforms, i;
@ -322,17 +322,6 @@ out_program_create:
return FALSE; return FALSE;
} }
BOOL primitives_init_opencl(primitives_t* prims)
{
if (!primitives_init_opencl_context(&openclContext))
return FALSE;
primitives_init_YUV_opencl(prims);
prims->flags |= PRIM_FLAGS_HAVE_EXTGPU;
prims->uninit = primitives_uninit_opencl;
return TRUE;
}
static pstatus_t opencl_YUV420ToRGB_8u_P3AC4R(const BYTE* pSrc[3], const UINT32 srcStep[3], static pstatus_t opencl_YUV420ToRGB_8u_P3AC4R(const BYTE* pSrc[3], const UINT32 srcStep[3],
BYTE* pDst, UINT32 dstStep, UINT32 DstFormat, BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
const prim_size_t* roi) const prim_size_t* roi)
@ -352,18 +341,27 @@ static pstatus_t opencl_YUV420ToRGB_8u_P3AC4R(const BYTE* pSrc[3], const UINT32
default: default:
{ {
primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU); primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU);
if (!p)
p = primitives_get_by_type(PRIMITIVES_PURE_SOFT);
if (!p) if (!p)
return -1; return -1;
return p->YUV420ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi); return p->YUV420ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
} }
} }
return opencl_YUV420ToRGB(kernel_name, pSrc, srcStep, pDst, dstStep, roi); return opencl_YUVToRGB(kernel_name, pSrc, srcStep, pDst, dstStep, roi);
} }
void primitives_init_YUV_opencl(primitives_t* prims) BOOL primitives_init_opencl(primitives_t* prims)
{ {
primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU);
if (!prims || !p)
return FALSE;
*prims = *p;
if (!primitives_init_opencl_context(&openclContext))
return FALSE;
prims->YUV420ToRGB_8u_P3AC4R = opencl_YUV420ToRGB_8u_P3AC4R; prims->YUV420ToRGB_8u_P3AC4R = opencl_YUV420ToRGB_8u_P3AC4R;
prims->flags |= PRIM_FLAGS_HAVE_EXTGPU;
prims->uninit = primitives_uninit_opencl;
return TRUE;
} }

View File

@ -209,9 +209,6 @@ FREERDP_LOCAL void primitives_init_YUV_opt(primitives_t* prims);
#if defined(WITH_OPENCL) #if defined(WITH_OPENCL)
FREERDP_LOCAL BOOL primitives_init_opencl(primitives_t* prims); FREERDP_LOCAL BOOL primitives_init_opencl(primitives_t* prims);
FREERDP_LOCAL pstatus_t primitives_uninit_opencl(void);
FREERDP_LOCAL void primitives_init_YUV_opencl(primitives_t* prims);
#endif #endif
FREERDP_LOCAL primitives_t* primitives_get_by_type(DWORD type); FREERDP_LOCAL primitives_t* primitives_get_by_type(DWORD type);

View File

@ -173,7 +173,8 @@ static BOOL primitives_YUV_benchmark_run(primitives_YUV_benchmark* bench, primit
{ {
ULONGLONG dueDate; ULONGLONG dueDate;
const BYTE* channels[3]; const BYTE* channels[3];
int i; size_t i;
pstatus_t status;
*computations = 0; *computations = 0;
@ -181,9 +182,8 @@ static BOOL primitives_YUV_benchmark_run(primitives_YUV_benchmark* bench, primit
channels[i] = bench->channels[i]; channels[i] = bench->channels[i];
/* do a first dry run to initialize cache and such */ /* do a first dry run to initialize cache and such */
pstatus_t status = status = prims->YUV420ToRGB_8u_P3AC4R(channels, bench->steps, bench->outputBuffer,
prims->YUV420ToRGB_8u_P3AC4R(channels, bench->steps, bench->outputBuffer, bench->outputStride, bench->testedFormat, &bench->roi);
bench->outputStride, bench->testedFormat, &bench->roi);
if (status != PRIMITIVES_SUCCESS) if (status != PRIMITIVES_SUCCESS)
return FALSE; return FALSE;
@ -203,78 +203,64 @@ static BOOL primitives_YUV_benchmark_run(primitives_YUV_benchmark* bench, primit
static BOOL primitives_autodetect_best(primitives_t* prims) static BOOL primitives_autodetect_best(primitives_t* prims)
{ {
size_t x;
BOOL ret = FALSE; BOOL ret = FALSE;
UINT64 benchDuration = 150; /* 150 ms */ UINT64 benchDuration = 150; /* 150 ms */
UINT32 genericCount = 0; struct prim_benchmark
UINT32 bestCount; {
primitives_t* genericPrims = primitives_get_generic(); const char* name;
primitives_t* prims;
UINT32 flags;
UINT32 count;
};
struct prim_benchmark testcases[] =
{
{ "generic", NULL, PRIMITIVES_PURE_SOFT, 0 },
#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES) #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
primitives_t* optimizedPrims = primitives_get_by_type(PRIMITIVES_ONLY_CPU); { "optimized", NULL, PRIMITIVES_ONLY_CPU, 0 },
UINT32 optimizedCount = 0;
#endif #endif
#if defined(WITH_OPENCL) #if defined(WITH_OPENCL)
primitives_t* openclPrims = primitives_get_by_type(PRIMITIVES_ONLY_GPU); { "opencl", NULL, PRIMITIVES_ONLY_GPU, 0 },
UINT32 openclCount = 0;
#endif #endif
const char* primName = "generic"; };
const struct prim_benchmark* best = NULL;
primitives_YUV_benchmark bench; primitives_YUV_benchmark bench;
primitives_YUV_benchmark* yuvBench = primitives_YUV_benchmark_init(&bench); primitives_YUV_benchmark* yuvBench = primitives_YUV_benchmark_init(&bench);
if (!yuvBench) if (!yuvBench)
return FALSE; return FALSE;
if (!primitives_YUV_benchmark_run(yuvBench, genericPrims, benchDuration, &genericCount))
{
WLog_ERR(TAG, "error running generic YUV bench");
goto out;
}
#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
if (!primitives_YUV_benchmark_run(yuvBench, optimizedPrims, benchDuration, &optimizedCount))
{
WLog_ERR(TAG, "error running optimized YUV bench");
goto out;
}
#endif
#if defined(WITH_OPENCL)
if (!primitives_YUV_benchmark_run(yuvBench, openclPrims, benchDuration, &openclCount))
{
WLog_ERR(TAG, "error running opencl YUV bench");
goto out;
}
#endif
/* finally compute the results */
bestCount = genericCount;
*prims = *genericPrims;
#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
if (bestCount < optimizedCount)
{
bestCount = optimizedCount;
*prims = *optimizedPrims;
primName = "optimized";
}
#endif
#if defined(WITH_OPENCL)
if (bestCount < openclCount)
{
bestCount = openclCount;
*prims = *openclPrims;
primName = "openCL";
}
#endif
WLog_DBG(TAG, "primitives benchmark result:"); WLog_DBG(TAG, "primitives benchmark result:");
WLog_DBG(TAG, " * generic=%" PRIu32, genericCount); for (x = 0; x < ARRAYSIZE(testcases); x++)
#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES) {
WLog_DBG(TAG, " * optimized=%" PRIu32, optimizedCount); struct prim_benchmark* cur = &testcases[x];
#endif cur->prims = primitives_get_by_type(cur->flags);
#if defined(WITH_OPENCL) if (!cur->prims)
WLog_DBG(TAG, " * openCL=%" PRIu32, openclCount); {
#endif WLog_ERR(TAG, "Failed to initialize %s primitives", cur->name);
WLog_INFO(TAG, "primitives autodetect, using %s", primName); goto out;
}
if (!primitives_YUV_benchmark_run(yuvBench, cur->prims, benchDuration, &cur->count))
{
WLog_ERR(TAG, "error running %s YUV bench", cur->name);
goto out;
}
WLog_DBG(TAG, " * %s\t= %" PRIu32, cur->name, cur->count);
if (!best || (best->count < cur->count))
best = cur;
}
if (!best)
{
WLog_ERR(TAG, "No primitives to test, aborting.");
goto out;
}
/* finally compute the results */
*prims = *best->prims;
WLog_INFO(TAG, "primitives autodetect, using %s", best->name);
ret = TRUE; ret = TRUE;
out: out:
primitives_YUV_benchmark_free(yuvBench); primitives_YUV_benchmark_free(yuvBench);
@ -288,9 +274,6 @@ static BOOL CALLBACK primitives_init_gpu_cb(PINIT_ONCE once, PVOID param, PVOID*
WINPR_UNUSED(param); WINPR_UNUSED(param);
WINPR_UNUSED(context); WINPR_UNUSED(context);
if (!primitives_init_optimized(&pPrimitivesGpu))
return FALSE;
if (!primitives_init_opencl(&pPrimitivesGpu)) if (!primitives_init_opencl(&pPrimitivesGpu))
return FALSE; return FALSE;
@ -305,8 +288,10 @@ static BOOL CALLBACK primitives_init_cpu_cb(PINIT_ONCE once, PVOID param, PVOID*
WINPR_UNUSED(param); WINPR_UNUSED(param);
WINPR_UNUSED(context); WINPR_UNUSED(context);
if (!primitives_init_optimized(&pPrimitivesCpu))
return FALSE;
return primitives_init_optimized(&pPrimitivesCpu); return TRUE;
} }
#endif #endif
@ -331,16 +316,12 @@ BOOL primitives_init(primitives_t* p, primitive_hints hints)
case PRIMITIVES_ONLY_CPU: case PRIMITIVES_ONLY_CPU:
#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES) #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
*p = pPrimitivesCpu; *p = pPrimitivesCpu;
#else
*p = pPrimitivesGeneric;
#endif
return TRUE; return TRUE;
#endif
case PRIMITIVES_ONLY_GPU: case PRIMITIVES_ONLY_GPU:
#if defined(WITH_OPENCL) #if defined(WITH_OPENCL)
*p = pPrimitivesGpu; *p = pPrimitivesGpu;
return TRUE; return TRUE;
#else
return FALSE;
#endif #endif
default: default:
WLog_ERR(TAG, "unknown hint %d", hints); WLog_ERR(TAG, "unknown hint %d", hints);
@ -348,7 +329,7 @@ BOOL primitives_init(primitives_t* p, primitive_hints hints)
} }
} }
void primitives_uninit() void primitives_uninit(void)
{ {
#if defined(WITH_OPENCL) #if defined(WITH_OPENCL)
if (pPrimitivesGpu.uninit) if (pPrimitivesGpu.uninit)
@ -395,18 +376,16 @@ primitives_t* primitives_get_by_type(DWORD type)
{ {
case PRIMITIVES_ONLY_GPU: case PRIMITIVES_ONLY_GPU:
#if defined(WITH_OPENCL) #if defined(WITH_OPENCL)
if (InitOnceExecuteOnce(&gpu_primitives_InitOnce, primitives_init_gpu_cb, NULL, NULL)) if (!InitOnceExecuteOnce(&gpu_primitives_InitOnce, primitives_init_gpu_cb, NULL, NULL))
return &pPrimitivesGpu; return NULL;
return &pPrimitivesGpu;
#endif #endif
return NULL;
case PRIMITIVES_ONLY_CPU: case PRIMITIVES_ONLY_CPU:
#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES) #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
if (InitOnceExecuteOnce(&cpu_primitives_InitOnce, primitives_init_cpu_cb, NULL, NULL)) if (!InitOnceExecuteOnce(&cpu_primitives_InitOnce, primitives_init_cpu_cb, NULL, NULL))
return &pPrimitivesCpu; return NULL;
return &pPrimitivesCpu;
#endif #endif
return NULL;
case PRIMITIVES_PURE_SOFT: case PRIMITIVES_PURE_SOFT:
default: default:
return &pPrimitivesGeneric; return &pPrimitivesGeneric;