66352741f2
When using primitives auto detection it was possible that no primitive implementation was chosen at all. This now skips broken implementations and falls back to generic primitives if no other implementation does work.
401 lines
9.9 KiB
C
401 lines
9.9 KiB
C
/* primitives.c
|
|
* This code queries processor features and calls the init/deinit routines.
|
|
* vi:ts=4 sw=4
|
|
*
|
|
* Copyright 2011 Martin Fleisz <martin.fleisz@thincast.com>
|
|
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
|
|
* Copyright 2019 David Fort <contact@hardening-consulting.com>
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
* not use this file except in compliance with the License. You may obtain
|
|
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
|
* or implied. See the License for the specific language governing
|
|
* permissions and limitations under the License.
|
|
*/
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include "config.h"
|
|
#endif
|
|
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
|
|
#include <winpr/synch.h>
|
|
#include <winpr/sysinfo.h>
|
|
#include <winpr/crypto.h>
|
|
#include <freerdp/primitives.h>
|
|
|
|
#include "prim_internal.h"
|
|
|
|
#define TAG FREERDP_TAG("primitives")
|
|
|
|
/* hints to know which kind of primitives to use */
|
|
static primitive_hints primitivesHints = PRIMITIVES_AUTODETECT;
|
|
static BOOL primitives_init_optimized(primitives_t* prims);
|
|
|
|
void primitives_set_hints(primitive_hints hints)
|
|
{
|
|
primitivesHints = hints;
|
|
}
|
|
|
|
primitive_hints primitives_get_hints(void)
|
|
{
|
|
return primitivesHints;
|
|
}
|
|
|
|
/* Singleton pointer used throughout the program when requested. */
|
|
static primitives_t pPrimitivesGeneric = { 0 };
|
|
static INIT_ONCE generic_primitives_InitOnce = INIT_ONCE_STATIC_INIT;
|
|
|
|
#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
|
|
static primitives_t pPrimitivesCpu = { 0 };
|
|
static INIT_ONCE cpu_primitives_InitOnce = INIT_ONCE_STATIC_INIT;
|
|
|
|
#endif
|
|
#if defined(WITH_OPENCL)
|
|
static primitives_t pPrimitivesGpu = { 0 };
|
|
static INIT_ONCE gpu_primitives_InitOnce = INIT_ONCE_STATIC_INIT;
|
|
|
|
#endif
|
|
|
|
static INIT_ONCE auto_primitives_InitOnce = INIT_ONCE_STATIC_INIT;
|
|
|
|
static primitives_t pPrimitives = { 0 };
|
|
|
|
/* ------------------------------------------------------------------------- */
|
|
static BOOL primitives_init_generic(primitives_t* prims)
|
|
{
|
|
primitives_init_add(prims);
|
|
primitives_init_andor(prims);
|
|
primitives_init_alphaComp(prims);
|
|
primitives_init_copy(prims);
|
|
primitives_init_set(prims);
|
|
primitives_init_shift(prims);
|
|
primitives_init_sign(prims);
|
|
primitives_init_colors(prims);
|
|
primitives_init_YCoCg(prims);
|
|
primitives_init_YUV(prims);
|
|
prims->uninit = NULL;
|
|
return TRUE;
|
|
}
|
|
|
|
static BOOL CALLBACK primitives_init_generic_cb(PINIT_ONCE once, PVOID param, PVOID* context)
|
|
{
|
|
WINPR_UNUSED(once);
|
|
WINPR_UNUSED(param);
|
|
WINPR_UNUSED(context);
|
|
return primitives_init_generic(&pPrimitivesGeneric);
|
|
}
|
|
|
|
static BOOL primitives_init_optimized(primitives_t* prims)
|
|
{
|
|
primitives_init_generic(prims);
|
|
|
|
#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
|
|
primitives_init_add_opt(prims);
|
|
primitives_init_andor_opt(prims);
|
|
primitives_init_alphaComp_opt(prims);
|
|
primitives_init_copy_opt(prims);
|
|
primitives_init_set_opt(prims);
|
|
primitives_init_shift_opt(prims);
|
|
primitives_init_sign_opt(prims);
|
|
primitives_init_colors_opt(prims);
|
|
primitives_init_YCoCg_opt(prims);
|
|
primitives_init_YUV_opt(prims);
|
|
prims->flags |= PRIM_FLAGS_HAVE_EXTCPU;
|
|
#endif
|
|
return TRUE;
|
|
}
|
|
|
|
typedef struct
|
|
{
|
|
BYTE* channels[3];
|
|
UINT32 steps[3];
|
|
prim_size_t roi;
|
|
BYTE* outputBuffer;
|
|
UINT32 outputStride;
|
|
UINT32 testedFormat;
|
|
} primitives_YUV_benchmark;
|
|
|
|
static void primitives_YUV_benchmark_free(primitives_YUV_benchmark* bench)
|
|
{
|
|
int i;
|
|
if (!bench)
|
|
return;
|
|
|
|
free(bench->outputBuffer);
|
|
|
|
for (i = 0; i < 3; i++)
|
|
free(bench->channels[i]);
|
|
memset(bench, 0, sizeof(primitives_YUV_benchmark));
|
|
}
|
|
|
|
static primitives_YUV_benchmark* primitives_YUV_benchmark_init(primitives_YUV_benchmark* ret)
|
|
{
|
|
int i;
|
|
prim_size_t* roi;
|
|
if (!ret)
|
|
return NULL;
|
|
|
|
memset(ret, 0, sizeof(primitives_YUV_benchmark));
|
|
roi = &ret->roi;
|
|
roi->width = 1024;
|
|
roi->height = 768;
|
|
ret->outputStride = roi->width * 4;
|
|
ret->testedFormat = PIXEL_FORMAT_BGRA32;
|
|
|
|
ret->outputBuffer = malloc(ret->outputStride * roi->height);
|
|
if (!ret->outputBuffer)
|
|
goto fail;
|
|
|
|
for (i = 0; i < 3; i++)
|
|
{
|
|
BYTE* buf = ret->channels[i] = malloc(roi->width * roi->height);
|
|
if (!buf)
|
|
goto fail;
|
|
|
|
winpr_RAND(buf, roi->width * roi->height);
|
|
ret->steps[i] = roi->width;
|
|
}
|
|
|
|
return ret;
|
|
|
|
fail:
|
|
primitives_YUV_benchmark_free(ret);
|
|
return ret;
|
|
}
|
|
|
|
static BOOL primitives_YUV_benchmark_run(primitives_YUV_benchmark* bench, primitives_t* prims,
|
|
UINT64 runTime, UINT32* computations)
|
|
{
|
|
ULONGLONG dueDate;
|
|
const BYTE* channels[3];
|
|
size_t i;
|
|
pstatus_t status;
|
|
|
|
*computations = 0;
|
|
|
|
for (i = 0; i < 3; i++)
|
|
channels[i] = bench->channels[i];
|
|
|
|
/* do a first dry run to initialize cache and such */
|
|
status = prims->YUV420ToRGB_8u_P3AC4R(channels, bench->steps, bench->outputBuffer,
|
|
bench->outputStride, bench->testedFormat, &bench->roi);
|
|
if (status != PRIMITIVES_SUCCESS)
|
|
return FALSE;
|
|
|
|
/* let's run the benchmark */
|
|
dueDate = GetTickCount64() + runTime;
|
|
while (GetTickCount64() < dueDate)
|
|
{
|
|
pstatus_t status =
|
|
prims->YUV420ToRGB_8u_P3AC4R(channels, bench->steps, bench->outputBuffer,
|
|
bench->outputStride, bench->testedFormat, &bench->roi);
|
|
if (status != PRIMITIVES_SUCCESS)
|
|
return FALSE;
|
|
*computations = *computations + 1;
|
|
}
|
|
return TRUE;
|
|
}
|
|
|
|
static BOOL primitives_autodetect_best(primitives_t* prims)
|
|
{
|
|
size_t x;
|
|
BOOL ret = FALSE;
|
|
UINT64 benchDuration = 150; /* 150 ms */
|
|
struct prim_benchmark
|
|
{
|
|
const char* name;
|
|
primitives_t* prims;
|
|
UINT32 flags;
|
|
UINT32 count;
|
|
};
|
|
|
|
struct prim_benchmark testcases[] =
|
|
{
|
|
{ "generic", NULL, PRIMITIVES_PURE_SOFT, 0 },
|
|
#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
|
|
{ "optimized", NULL, PRIMITIVES_ONLY_CPU, 0 },
|
|
#endif
|
|
#if defined(WITH_OPENCL)
|
|
{ "opencl", NULL, PRIMITIVES_ONLY_GPU, 0 },
|
|
#endif
|
|
};
|
|
const struct prim_benchmark* best = NULL;
|
|
|
|
primitives_YUV_benchmark bench;
|
|
primitives_YUV_benchmark* yuvBench = primitives_YUV_benchmark_init(&bench);
|
|
if (!yuvBench)
|
|
return FALSE;
|
|
|
|
WLog_DBG(TAG, "primitives benchmark result:");
|
|
for (x = 0; x < ARRAYSIZE(testcases); x++)
|
|
{
|
|
struct prim_benchmark* cur = &testcases[x];
|
|
cur->prims = primitives_get_by_type(cur->flags);
|
|
if (!cur->prims)
|
|
{
|
|
WLog_WARN(TAG, "Failed to initialize %s primitives", cur->name);
|
|
continue;
|
|
}
|
|
if (!primitives_YUV_benchmark_run(yuvBench, cur->prims, benchDuration, &cur->count))
|
|
{
|
|
WLog_WARN(TAG, "error running %s YUV bench", cur->name);
|
|
continue;
|
|
}
|
|
|
|
WLog_DBG(TAG, " * %s= %" PRIu32, cur->name, cur->count);
|
|
if (!best || (best->count < cur->count))
|
|
best = cur;
|
|
}
|
|
|
|
if (!best)
|
|
{
|
|
WLog_ERR(TAG, "No primitives to test, aborting.");
|
|
goto out;
|
|
}
|
|
/* finally compute the results */
|
|
*prims = *best->prims;
|
|
|
|
WLog_INFO(TAG, "primitives autodetect, using %s", best->name);
|
|
ret = TRUE;
|
|
out:
|
|
if (!ret)
|
|
*prims = pPrimitivesGeneric;
|
|
primitives_YUV_benchmark_free(yuvBench);
|
|
return ret;
|
|
}
|
|
|
|
#if defined(WITH_OPENCL)
|
|
static BOOL CALLBACK primitives_init_gpu_cb(PINIT_ONCE once, PVOID param, PVOID* context)
|
|
{
|
|
WINPR_UNUSED(once);
|
|
WINPR_UNUSED(param);
|
|
WINPR_UNUSED(context);
|
|
|
|
if (!primitives_init_opencl(&pPrimitivesGpu))
|
|
return FALSE;
|
|
|
|
return TRUE;
|
|
}
|
|
#endif
|
|
|
|
#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
|
|
static BOOL CALLBACK primitives_init_cpu_cb(PINIT_ONCE once, PVOID param, PVOID* context)
|
|
{
|
|
WINPR_UNUSED(once);
|
|
WINPR_UNUSED(param);
|
|
WINPR_UNUSED(context);
|
|
|
|
if (!primitives_init_optimized(&pPrimitivesCpu))
|
|
return FALSE;
|
|
|
|
return TRUE;
|
|
}
|
|
#endif
|
|
|
|
static BOOL CALLBACK primitives_auto_init_cb(PINIT_ONCE once, PVOID param, PVOID* context)
|
|
{
|
|
WINPR_UNUSED(once);
|
|
WINPR_UNUSED(param);
|
|
WINPR_UNUSED(context);
|
|
|
|
return primitives_init(&pPrimitives, primitivesHints);
|
|
}
|
|
|
|
BOOL primitives_init(primitives_t* p, primitive_hints hints)
|
|
{
|
|
switch (hints)
|
|
{
|
|
case PRIMITIVES_AUTODETECT:
|
|
return primitives_autodetect_best(p);
|
|
case PRIMITIVES_PURE_SOFT:
|
|
*p = pPrimitivesGeneric;
|
|
return TRUE;
|
|
case PRIMITIVES_ONLY_CPU:
|
|
#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
|
|
*p = pPrimitivesCpu;
|
|
return TRUE;
|
|
#endif
|
|
case PRIMITIVES_ONLY_GPU:
|
|
#if defined(WITH_OPENCL)
|
|
*p = pPrimitivesGpu;
|
|
return TRUE;
|
|
#endif
|
|
default:
|
|
WLog_ERR(TAG, "unknown hint %d", hints);
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
void primitives_uninit(void)
|
|
{
|
|
#if defined(WITH_OPENCL)
|
|
if (pPrimitivesGpu.uninit)
|
|
pPrimitivesGpu.uninit();
|
|
#endif
|
|
#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
|
|
if (pPrimitivesCpu.uninit)
|
|
pPrimitivesCpu.uninit();
|
|
#endif
|
|
if (pPrimitivesGeneric.uninit)
|
|
pPrimitivesGeneric.uninit();
|
|
}
|
|
|
|
/* ------------------------------------------------------------------------- */
|
|
static void setup(void)
|
|
{
|
|
InitOnceExecuteOnce(&generic_primitives_InitOnce, primitives_init_generic_cb, NULL, NULL);
|
|
#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
|
|
InitOnceExecuteOnce(&cpu_primitives_InitOnce, primitives_init_cpu_cb, NULL, NULL);
|
|
#endif
|
|
#if defined(WITH_OPENCL)
|
|
InitOnceExecuteOnce(&gpu_primitives_InitOnce, primitives_init_gpu_cb, NULL, NULL);
|
|
#endif
|
|
InitOnceExecuteOnce(&auto_primitives_InitOnce, primitives_auto_init_cb, NULL, NULL);
|
|
}
|
|
|
|
primitives_t* primitives_get(void)
|
|
{
|
|
setup();
|
|
return &pPrimitives;
|
|
}
|
|
|
|
primitives_t* primitives_get_generic(void)
|
|
{
|
|
InitOnceExecuteOnce(&generic_primitives_InitOnce, primitives_init_generic_cb, NULL, NULL);
|
|
return &pPrimitivesGeneric;
|
|
}
|
|
|
|
primitives_t* primitives_get_by_type(DWORD type)
|
|
{
|
|
InitOnceExecuteOnce(&generic_primitives_InitOnce, primitives_init_generic_cb, NULL, NULL);
|
|
|
|
switch (type)
|
|
{
|
|
case PRIMITIVES_ONLY_GPU:
|
|
#if defined(WITH_OPENCL)
|
|
if (!InitOnceExecuteOnce(&gpu_primitives_InitOnce, primitives_init_gpu_cb, NULL, NULL))
|
|
return NULL;
|
|
return &pPrimitivesGpu;
|
|
#endif
|
|
case PRIMITIVES_ONLY_CPU:
|
|
#if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
|
|
if (!InitOnceExecuteOnce(&cpu_primitives_InitOnce, primitives_init_cpu_cb, NULL, NULL))
|
|
return NULL;
|
|
return &pPrimitivesCpu;
|
|
#endif
|
|
case PRIMITIVES_PURE_SOFT:
|
|
default:
|
|
return &pPrimitivesGeneric;
|
|
}
|
|
}
|
|
|
|
DWORD primitives_flags(primitives_t* p)
|
|
{
|
|
return p->flags;
|
|
}
|