Fixed #7753: Step calculation in YUV decoder (#7758)

* Fixed #7753: Step calculation in YUV decoder

* Fixed rectangle_is_empty

* Added rectangle intersection check in YUV decoder

* Skip intersecting rectangles in YUV decoder

* Refactored YUV work object handling

* Allocate / free in yuv_context_new and yuv_context_free
* WINPR_ASSERT all function arguments

* Pass yuv_context_reset result from h264_context_reset

* Reset PTP_WORK buffer to NULL after use
This commit is contained in:
akallabeth 2022-03-29 13:55:52 +02:00 committed by GitHub
parent e68897a694
commit 1378c8e671
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 249 additions and 105 deletions

View File

@ -219,7 +219,8 @@ static PresentationContext* PresentationContext_new(VideoClientContext* video, B
WLog_ERR(TAG, "unable to create a h264 context");
goto fail;
}
h264_context_reset(ret->h264, width, height);
if (!h264_context_reset(ret->h264, width, height))
goto fail;
ret->currentSample = Stream_New(NULL, 4096);
if (!ret->currentSample)

View File

@ -52,7 +52,7 @@ extern "C"
BYTE* pYUVChromaData[3], const RECTANGLE_16* regionRects,
UINT32 numRegionRects);
FREERDP_API void yuv_context_reset(YUV_CONTEXT* context, UINT32 width, UINT32 height);
FREERDP_API BOOL yuv_context_reset(YUV_CONTEXT* context, UINT32 width, UINT32 height);
FREERDP_API YUV_CONTEXT* yuv_context_new(BOOL encoder, UINT32 ThreadingFlags);
FREERDP_API void yuv_context_free(YUV_CONTEXT* context);

View File

@ -643,9 +643,7 @@ BOOL h264_context_reset(H264_CONTEXT* h264, UINT32 width, UINT32 height)
h264->width = width;
h264->height = height;
yuv_context_reset(h264->yuv, width, height);
return TRUE;
return yuv_context_reset(h264->yuv, width, height);
}
H264_CONTEXT* h264_context_new(BOOL Compressor)

View File

@ -140,10 +140,10 @@ static RECTANGLE_16* region16_extents_noconst(REGION16* region)
BOOL rectangle_is_empty(const RECTANGLE_16* rect)
{
/* A rectangle with width = 0 or height = 0 should be regarded
/* A rectangle with width <= 0 or height <= 0 should be regarded
* as empty.
*/
return ((rect->left == rect->right) || (rect->top == rect->bottom)) ? TRUE : FALSE;
return ((rect->left >= rect->right) || (rect->top >= rect->bottom)) ? TRUE : FALSE;
}
BOOL region16_is_empty(const REGION16* region)

View File

@ -1,4 +1,5 @@
#include <winpr/sysinfo.h>
#include <winpr/assert.h>
#include <winpr/pool.h>
#include <freerdp/primitives.h>
@ -7,16 +8,7 @@
#define TAG FREERDP_TAG("codec")
struct S_YUV_CONTEXT
{
UINT32 width, height;
BOOL useThreads;
UINT32 nthreads;
UINT32 heightStep;
PTP_POOL threadPool;
TP_CALLBACK_ENVIRON ThreadPoolEnv;
};
#define TILE_SIZE 64
typedef struct
{
@ -55,6 +47,24 @@ typedef struct
UINT32 iStride[3];
} YUV_ENCODE_WORK_PARAM;
struct S_YUV_CONTEXT
{
UINT32 width, height;
BOOL useThreads;
BOOL encoder;
UINT32 nthreads;
UINT32 heightStep;
PTP_POOL threadPool;
TP_CALLBACK_ENVIRON ThreadPoolEnv;
UINT32 work_object_count;
PTP_WORK* work_objects;
YUV_ENCODE_WORK_PARAM* work_enc_params;
YUV_PROCESS_WORK_PARAM* work_dec_params;
YUV_COMBINE_WORK_PARAM* work_combined_params;
};
static INLINE BOOL avc420_yuv_to_rgb(const BYTE* pYUVData[3], const UINT32 iStride[3],
const RECTANGLE_16* rect, UINT32 nDstStep, BYTE* pDstData,
DWORD DstFormat)
@ -62,6 +72,12 @@ static INLINE BOOL avc420_yuv_to_rgb(const BYTE* pYUVData[3], const UINT32 iStri
primitives_t* prims = primitives_get();
prim_size_t roi;
const BYTE* pYUVPoint[3];
WINPR_ASSERT(pYUVData);
WINPR_ASSERT(iStride);
WINPR_ASSERT(rect);
WINPR_ASSERT(pDstData);
const INT32 width = rect->right - rect->left;
const INT32 height = rect->bottom - rect->top;
BYTE* pDstPoint = pDstData + rect->top * nDstStep + rect->left * GetBytesPerPixel(DstFormat);
@ -87,6 +103,12 @@ static INLINE BOOL avc444_yuv_to_rgb(const BYTE* pYUVData[3], const UINT32 iStri
primitives_t* prims = primitives_get();
prim_size_t roi;
const BYTE* pYUVPoint[3];
WINPR_ASSERT(pYUVData);
WINPR_ASSERT(iStride);
WINPR_ASSERT(rect);
WINPR_ASSERT(pDstData);
const INT32 width = rect->right - rect->left;
const INT32 height = rect->bottom - rect->top;
BYTE* pDstPoint = pDstData + rect->top * nDstStep + rect->left * GetBytesPerPixel(DstFormat);
@ -111,6 +133,7 @@ static void CALLBACK yuv420_process_work_callback(PTP_CALLBACK_INSTANCE instance
YUV_PROCESS_WORK_PARAM* param = (YUV_PROCESS_WORK_PARAM*)context;
WINPR_UNUSED(instance);
WINPR_UNUSED(work);
WINPR_ASSERT(param);
if (!avc420_yuv_to_rgb(param->pYUVData, param->iStride, &param->rect, param->nDstStep,
param->dest, param->DstFormat))
@ -123,17 +146,55 @@ static void CALLBACK yuv444_process_work_callback(PTP_CALLBACK_INSTANCE instance
YUV_PROCESS_WORK_PARAM* param = (YUV_PROCESS_WORK_PARAM*)context;
WINPR_UNUSED(instance);
WINPR_UNUSED(work);
WINPR_ASSERT(param);
if (!avc444_yuv_to_rgb(param->pYUVData, param->iStride, &param->rect, param->nDstStep,
param->dest, param->DstFormat))
WLog_WARN(TAG, "avc444_yuv_to_rgb failed");
}
void yuv_context_reset(YUV_CONTEXT* context, UINT32 width, UINT32 height)
BOOL yuv_context_reset(YUV_CONTEXT* context, UINT32 width, UINT32 height)
{
WINPR_ASSERT(context);
context->width = width;
context->height = height;
context->heightStep = (height / context->nthreads);
if (context->useThreads)
{
const UINT32 count =
(width + TILE_SIZE - 1) / TILE_SIZE * (height + TILE_SIZE - 1) / TILE_SIZE * 4;
context->work_object_count = 0;
if (context->encoder)
{
free(context->work_enc_params);
context->work_enc_params = calloc(count, sizeof(YUV_ENCODE_WORK_PARAM));
if (!context->work_enc_params)
return FALSE;
}
else
{
free(context->work_dec_params);
context->work_dec_params = calloc(count, sizeof(YUV_PROCESS_WORK_PARAM));
if (!context->work_dec_params)
return FALSE;
free(context->work_combined_params);
context->work_combined_params = calloc(count, sizeof(YUV_COMBINE_WORK_PARAM));
if (!context->work_combined_params)
return FALSE;
}
free(context->work_objects);
context->work_objects = calloc(count, sizeof(PTP_WORK));
if (!context->work_objects)
return FALSE;
context->work_object_count = count;
}
return TRUE;
}
YUV_CONTEXT* yuv_context_new(BOOL encoder, UINT32 ThreadingFlags)
@ -146,6 +207,7 @@ YUV_CONTEXT* yuv_context_new(BOOL encoder, UINT32 ThreadingFlags)
/** do it here to avoid a race condition between threads */
primitives_get();
ret->encoder = encoder;
ret->nthreads = 1;
if (!(ThreadingFlags & THREADING_FLAGS_DISABLE_THREADS))
{
@ -181,6 +243,9 @@ void yuv_context_free(YUV_CONTEXT* context)
if (context->threadPool)
CloseThreadpool(context->threadPool);
DestroyThreadpoolEnvironment(&context->ThreadPoolEnv);
free(context->work_objects);
free(context->work_enc_params);
free(context->work_dec_params);
}
free(context);
}
@ -193,6 +258,12 @@ static INLINE YUV_PROCESS_WORK_PARAM pool_decode_param(const RECTANGLE_16* rect,
{
YUV_PROCESS_WORK_PARAM current = { 0 };
WINPR_ASSERT(rect);
WINPR_ASSERT(context);
WINPR_ASSERT(pYUVData);
WINPR_ASSERT(iStride);
WINPR_ASSERT(dest);
current.context = context;
current.DstFormat = DstFormat;
current.pYUVData[0] = pYUVData[0];
@ -207,32 +278,6 @@ static INLINE YUV_PROCESS_WORK_PARAM pool_decode_param(const RECTANGLE_16* rect,
return current;
}
static BOOL allocate_objects(PTP_WORK** work, void** params, size_t size, UINT32 count)
{
if (count == 0)
return FALSE;
count *= 2;
{
PTP_WORK* tmp;
PTP_WORK* cur = *work;
tmp = realloc(cur, sizeof(PTP_WORK*) * count);
if (!tmp)
return FALSE;
*work = tmp;
memset(tmp, 0, sizeof(PTP_WORK*) * count);
}
{
void* cur = *params;
void* tmp = realloc(cur, size * count);
if (!tmp)
return FALSE;
memset(tmp, 0, size * count);
*params = tmp;
}
return TRUE;
}
static BOOL submit_object(PTP_WORK* work_object, PTP_WORK_CALLBACK cb, const void* param,
YUV_CONTEXT* context)
{
@ -247,36 +292,68 @@ static BOOL submit_object(PTP_WORK* work_object, PTP_WORK_CALLBACK cb, const voi
return TRUE;
}
static void free_objects(PTP_WORK* work_objects, void* params, UINT32 waitCount)
static void free_objects(PTP_WORK* work_objects, UINT32 waitCount)
{
if (work_objects)
{
UINT32 i;
for (i = 0; i < waitCount; i++)
WINPR_ASSERT(work_objects || (waitCount == 0));
for (i = 0; i < waitCount; i++)
{
if (!work_objects[i])
continue;
WaitForThreadpoolWorkCallbacks(work_objects[i], FALSE);
CloseThreadpoolWork(work_objects[i]);
PTP_WORK cur = work_objects[i];
work_objects[i] = NULL;
if (!cur)
continue;
WaitForThreadpoolWorkCallbacks(cur, FALSE);
CloseThreadpoolWork(cur);
}
}
static BOOL intersects(UINT32 pos, const RECTANGLE_16* regionRects, UINT32 numRegionRects)
{
UINT32 x;
WINPR_ASSERT(regionRects || (numRegionRects == 0));
for (x = pos + 1; x < numRegionRects; x++)
{
const RECTANGLE_16* what = &regionRects[pos];
const RECTANGLE_16* rect = &regionRects[x];
if (rectangles_intersects(what, rect))
{
WLog_WARN(TAG, "YUV decoder: intersecting rectangles, aborting");
return TRUE;
}
}
free(work_objects);
free(params);
return FALSE;
}
static BOOL pool_decode(YUV_CONTEXT* context, PTP_WORK_CALLBACK cb, const BYTE* pYUVData[3],
const UINT32 iStride[3], UINT32 yuvHeight, UINT32 DstFormat, BYTE* dest,
UINT32 nDstStep, const RECTANGLE_16* regionRects, UINT32 numRegionRects)
{
UINT32 steps;
BOOL rc = FALSE;
UINT32 x, y;
PTP_WORK* work_objects = NULL;
YUV_PROCESS_WORK_PARAM* params = NULL;
UINT32 waitCount = 0, nobjects;
UINT32 waitCount = 0;
primitives_t* prims = primitives_get();
WINPR_ASSERT(context);
WINPR_ASSERT(cb);
WINPR_ASSERT(pYUVData);
WINPR_ASSERT(iStride);
WINPR_ASSERT(dest);
WINPR_ASSERT(regionRects || (numRegionRects == 0));
if (context->encoder)
{
WLog_ERR(TAG, "YUV context set up for encoding, can not decode with it, aborting");
return FALSE;
}
if (!context->useThreads || (primitives_flags(prims) & PRIM_FLAGS_HAVE_EXTGPU))
{
for (y = 0; y < numRegionRects; y++)
@ -290,48 +367,57 @@ static BOOL pool_decode(YUV_CONTEXT* context, PTP_WORK_CALLBACK cb, const BYTE*
}
/* case where we use threads */
steps = MAX((context->nthreads + numRegionRects / 2 + 1) / numRegionRects, 1);
nobjects = numRegionRects * steps;
if (!allocate_objects(&work_objects, (void**)&params, sizeof(YUV_PROCESS_WORK_PARAM), nobjects))
goto fail;
for (x = 0; x < numRegionRects; x++)
{
const RECTANGLE_16* rect = &regionRects[x];
const UINT32 height = rect->bottom - rect->top;
RECTANGLE_16 r = *rect;
const UINT32 heightStep = MAX((height + steps / 2 + 1) / steps, 1);
for (y = 0; y < steps; y++)
if (intersects(x, regionRects, numRegionRects))
continue;
while (r.left < r.right)
{
YUV_PROCESS_WORK_PARAM* cur = &params[waitCount];
RECTANGLE_16 r = *rect;
r.top += y * heightStep;
RECTANGLE_16 y = r;
y.right = MIN(r.right, r.left + TILE_SIZE);
/* If we have an odd bounding rectangle we might end up with < steps
* workers. Check we do not exceed the bounding rectangle. */
r.bottom = r.top + heightStep;
if (r.bottom > rect->bottom)
r.bottom = rect->bottom;
if (r.top >= rect->bottom)
continue;
if (r.bottom > yuvHeight)
r.bottom = yuvHeight;
*cur = pool_decode_param(&r, context, pYUVData, iStride, DstFormat, dest, nDstStep);
if (!submit_object(&work_objects[waitCount], cb, cur, context))
goto fail;
waitCount++;
while (y.top < y.bottom)
{
RECTANGLE_16 z = y;
YUV_PROCESS_WORK_PARAM* cur;
if (context->work_object_count <= waitCount)
{
WLog_ERR(TAG, "YUV decoder: invalid number of tiles, only support %" PRIu32,
context->work_object_count);
goto fail;
}
cur = &context->work_dec_params[waitCount];
z.bottom = MIN(z.bottom, z.top + TILE_SIZE);
if (rectangle_is_empty(&z))
continue;
*cur = pool_decode_param(&z, context, pYUVData, iStride, DstFormat, dest, nDstStep);
if (!submit_object(&context->work_objects[waitCount], cb, cur, context))
goto fail;
waitCount++;
y.top += TILE_SIZE;
}
r.left += TILE_SIZE;
}
}
rc = TRUE;
fail:
free_objects(work_objects, params, nobjects);
free_objects(context->work_objects, waitCount);
return rc;
}
static INLINE BOOL check_rect(const YUV_CONTEXT* yuv, const RECTANGLE_16* rect, UINT32 nDstWidth,
UINT32 nDstHeight)
{
WINPR_ASSERT(yuv);
WINPR_ASSERT(rect);
/* Check, if the output rectangle is valid in decoded h264 frame. */
if ((rect->right > yuv->width) || (rect->left > yuv->width))
return FALSE;
@ -354,8 +440,14 @@ static void CALLBACK yuv444_combine_work_callback(PTP_CALLBACK_INSTANCE instance
{
YUV_COMBINE_WORK_PARAM* param = (YUV_COMBINE_WORK_PARAM*)context;
primitives_t* prims = primitives_get();
WINPR_ASSERT(param);
YUV_CONTEXT* yuv = param->context;
WINPR_ASSERT(yuv);
const RECTANGLE_16* rect = &param->rect;
WINPR_ASSERT(rect);
const UINT32 alignedWidth = yuv->width + ((yuv->width % 16 != 0) ? 16 - yuv->width % 16 : 0);
const UINT32 alignedHeight =
yuv->height + ((yuv->height % 16 != 0) ? 16 - yuv->height % 16 : 0);
@ -377,6 +469,14 @@ static INLINE YUV_COMBINE_WORK_PARAM pool_decode_rect_param(
const UINT32 iStride[3], UINT32 yuvHeight, BYTE* pYUVDstData[3], const UINT32 iDstStride[3])
{
YUV_COMBINE_WORK_PARAM current = { 0 };
WINPR_ASSERT(rect);
WINPR_ASSERT(context);
WINPR_ASSERT(pYUVData);
WINPR_ASSERT(iStride);
WINPR_ASSERT(pYUVDstData);
WINPR_ASSERT(iDstStride);
current.context = context;
current.pYUVData[0] = pYUVData[0];
current.pYUVData[1] = pYUVData[1];
@ -402,12 +502,17 @@ static BOOL pool_decode_rect(YUV_CONTEXT* context, BYTE type, const BYTE* pYUVDa
{
BOOL rc = FALSE;
UINT32 y;
PTP_WORK* work_objects = NULL;
YUV_COMBINE_WORK_PARAM* params = NULL;
UINT32 waitCount = 0;
PTP_WORK_CALLBACK cb = yuv444_combine_work_callback;
primitives_t* prims = primitives_get();
WINPR_ASSERT(context);
WINPR_ASSERT(pYUVData);
WINPR_ASSERT(iStride);
WINPR_ASSERT(pYUVDstData);
WINPR_ASSERT(iDstStride);
WINPR_ASSERT(regionRects || (numRegionRects == 0));
if (!context->useThreads || (primitives_flags(prims) & PRIM_FLAGS_HAVE_EXTGPU))
{
for (y = 0; y < numRegionRects; y++)
@ -421,23 +526,27 @@ static BOOL pool_decode_rect(YUV_CONTEXT* context, BYTE type, const BYTE* pYUVDa
}
/* case where we use threads */
if (!allocate_objects(&work_objects, (void**)&params, sizeof(YUV_COMBINE_WORK_PARAM),
numRegionRects))
goto fail;
for (waitCount = 0; waitCount < numRegionRects; waitCount++)
{
YUV_COMBINE_WORK_PARAM* current = &params[waitCount];
YUV_COMBINE_WORK_PARAM* current;
if (context->work_object_count <= waitCount)
{
WLog_ERR(TAG, "YUV rect decoder: invalid number of tiles, only support %" PRIu32,
context->work_object_count);
goto fail;
}
current = &context->work_combined_params[waitCount];
*current = pool_decode_rect_param(&regionRects[waitCount], context, type, pYUVData, iStride,
yuvHeight, pYUVDstData, iDstStride);
if (!submit_object(&work_objects[waitCount], cb, current, context))
if (!submit_object(&context->work_objects[waitCount], cb, current, context))
goto fail;
}
rc = TRUE;
fail:
free_objects(work_objects, params, waitCount);
free_objects(context->work_objects, waitCount);
return rc;
}
@ -448,6 +557,19 @@ BOOL yuv444_context_decode(YUV_CONTEXT* context, BYTE type, const BYTE* pYUVData
{
const BYTE* pYUVCDstData[3];
WINPR_ASSERT(context);
WINPR_ASSERT(pYUVData);
WINPR_ASSERT(iStride);
WINPR_ASSERT(pYUVDstData);
WINPR_ASSERT(iDstStride);
WINPR_ASSERT(dest);
WINPR_ASSERT(regionRects || (numRegionRects == 0));
if (context->encoder)
{
WLog_ERR(TAG, "YUV context set up for encoding, can not decode with it, aborting");
return FALSE;
}
if (!pool_decode_rect(context, type, pYUVData, iStride, yuvHeight, pYUVDstData, iDstStride,
regionRects, numRegionRects))
return FALSE;
@ -478,6 +600,7 @@ static void CALLBACK yuv420_encode_work_callback(PTP_CALLBACK_INSTANCE instance,
WINPR_UNUSED(instance);
WINPR_UNUSED(work);
WINPR_ASSERT(param);
roi.width = param->rect.right - param->rect.left;
roi.height = param->rect.bottom - param->rect.top;
@ -508,6 +631,7 @@ static void CALLBACK yuv444v1_encode_work_callback(PTP_CALLBACK_INSTANCE instanc
WINPR_UNUSED(instance);
WINPR_UNUSED(work);
WINPR_ASSERT(param);
roi.width = param->rect.right - param->rect.left;
roi.height = param->rect.bottom - param->rect.top;
@ -544,6 +668,7 @@ static void CALLBACK yuv444v2_encode_work_callback(PTP_CALLBACK_INSTANCE instanc
WINPR_UNUSED(instance);
WINPR_UNUSED(work);
WINPR_ASSERT(param);
roi.width = param->rect.right - param->rect.left;
roi.height = param->rect.bottom - param->rect.top;
@ -576,6 +701,12 @@ static INLINE YUV_ENCODE_WORK_PARAM pool_encode_fill(const RECTANGLE_16* rect, Y
{
YUV_ENCODE_WORK_PARAM current = { 0 };
WINPR_ASSERT(rect);
WINPR_ASSERT(context);
WINPR_ASSERT(pSrcData);
WINPR_ASSERT(iStride);
WINPR_ASSERT(pYUVLumaData);
current.context = context;
current.pSrcData = pSrcData;
current.SrcFormat = SrcFormat;
@ -605,11 +736,22 @@ static BOOL pool_encode(YUV_CONTEXT* context, PTP_WORK_CALLBACK cb, const BYTE*
{
BOOL rc = FALSE;
primitives_t* prims = primitives_get();
UINT32 x, y, nobjects;
PTP_WORK* work_objects = NULL;
YUV_ENCODE_WORK_PARAM* params = NULL;
UINT32 x, y;
UINT32 waitCount = 0;
WINPR_ASSERT(context);
WINPR_ASSERT(cb);
WINPR_ASSERT(pSrcData);
WINPR_ASSERT(iStride);
WINPR_ASSERT(regionRects || (numRegionRects == 0));
if (!context->encoder)
{
WLog_ERR(TAG, "YUV context set up for decoding, can not encode with it, aborting");
return FALSE;
}
if (!context->useThreads || (primitives_flags(prims) & PRIM_FLAGS_HAVE_EXTGPU))
{
for (x = 0; x < numRegionRects; x++)
@ -623,21 +765,15 @@ static BOOL pool_encode(YUV_CONTEXT* context, PTP_WORK_CALLBACK cb, const BYTE*
}
/* case where we use threads */
nobjects = (context->height + context->heightStep - 1) / context->heightStep;
for (x = 0; x < numRegionRects; x++)
{
const RECTANGLE_16* rect = &regionRects[x];
const UINT32 height = rect->bottom - rect->top;
const UINT32 steps = (height + context->heightStep / 2) / context->heightStep;
if (waitCount + steps >= nobjects)
nobjects *= 2;
waitCount += steps;
}
if (!allocate_objects(&work_objects, (void**)&params, sizeof(YUV_ENCODE_WORK_PARAM), nobjects))
goto fail;
for (x = 0; x < numRegionRects; x++)
{
const RECTANGLE_16* rect = &regionRects[x];
@ -647,11 +783,20 @@ static BOOL pool_encode(YUV_CONTEXT* context, PTP_WORK_CALLBACK cb, const BYTE*
for (y = 0; y < steps; y++)
{
RECTANGLE_16 r = *rect;
YUV_ENCODE_WORK_PARAM* current = &params[waitCount];
YUV_ENCODE_WORK_PARAM* current;
if (context->work_object_count <= waitCount)
{
WLog_ERR(TAG, "YUV encoder: invalid number of tiles, only support %" PRIu32,
context->work_object_count);
goto fail;
}
current = &context->work_enc_params[waitCount];
r.top += y * context->heightStep;
*current = pool_encode_fill(&r, context, pSrcData, nSrcStep, SrcFormat, iStride,
pYUVLumaData, pYUVChromaData);
if (!submit_object(&work_objects[waitCount], cb, current, context))
if (!submit_object(&context->work_objects[waitCount], cb, current, context))
goto fail;
waitCount++;
}
@ -659,7 +804,7 @@ static BOOL pool_encode(YUV_CONTEXT* context, PTP_WORK_CALLBACK cb, const BYTE*
rc = TRUE;
fail:
free_objects(work_objects, params, waitCount);
free_objects(context->work_objects, waitCount);
return rc;
}