Added SSE optimized RGB to AVC444 frame split and test.
This commit is contained in:
parent
ce10e22b86
commit
8f7bc7990f
@ -203,11 +203,6 @@ typedef pstatus_t (*__RGBToAVC444YUV_t)(
|
||||
BYTE* pMainDst[3], const UINT32 dstMainStep[3],
|
||||
BYTE* pAuxDst[3], const UINT32 dstAuxStep[3],
|
||||
const prim_size_t* roi);
|
||||
typedef pstatus_t (*__RGBToAVC444YUVv2_t)(
|
||||
const BYTE* pSrc, UINT32 srcFormat, UINT32 srcStep,
|
||||
BYTE* pMainDst[3], const UINT32 dstMainStep[3],
|
||||
BYTE* pAuxDst[3], const UINT32 dstAuxStep[3],
|
||||
const prim_size_t* roi);
|
||||
typedef pstatus_t (*__andC_32u_t)(
|
||||
const UINT32* pSrc,
|
||||
UINT32 val,
|
||||
@ -259,7 +254,7 @@ typedef struct
|
||||
__YUV444SplitToYUV420_t YUV444SplitToYUV420;
|
||||
__YUV444ToRGB_8u_P3AC4R_t YUV444ToRGB_8u_P3AC4R;
|
||||
__RGBToAVC444YUV_t RGBToAVC444YUV;
|
||||
__RGBToAVC444YUVv2_t RGBToAVC444YUVv2;
|
||||
__RGBToAVC444YUV_t RGBToAVC444YUVv2;
|
||||
} primitives_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -1294,9 +1294,9 @@ static INLINE pstatus_t general_RGBToAVC444YUVv2_ANY(
|
||||
|
||||
for (x = 0; x < roi->width; x++)
|
||||
{
|
||||
const UINT32 color = ReadColor(src, srcFormat);
|
||||
const UINT32 colorA = ReadColor(src, srcFormat);
|
||||
BYTE r, g, b;
|
||||
SplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
|
||||
SplitColor(colorA, srcFormat, &r, &g, &b, NULL, NULL);
|
||||
src += 4;
|
||||
|
||||
if (b1)
|
||||
@ -1325,13 +1325,28 @@ static INLINE pstatus_t general_RGBToAVC444YUVv2_ANY(
|
||||
|
||||
for (x = 0; x < roi->width / 2; x++)
|
||||
{
|
||||
const UINT32 color = ReadColor(src, srcFormat);
|
||||
const UINT32 colorA = ReadColor(src, srcFormat);
|
||||
const UINT32 colorB = ReadColor(src + 4, srcFormat);
|
||||
const UINT32 colorC = ReadColor(src + srcStep, srcFormat);
|
||||
const UINT32 colorD = ReadColor(src + 4 + srcStep, srcFormat);
|
||||
BYTE r, g, b;
|
||||
SplitColor(color, srcFormat, &r, &g, &b, NULL, NULL);
|
||||
SplitColor(colorA, srcFormat, &r, &g, &b, NULL, NULL);
|
||||
src += 8;
|
||||
{
|
||||
const BYTE u = RGB2U(r, g, b);
|
||||
const BYTE v = RGB2V(r, g, b);
|
||||
BYTE rr, gg, bb;
|
||||
UINT16 u = RGB2U(r, g, b);
|
||||
UINT16 v = RGB2V(r, g, b);
|
||||
SplitColor(colorB, srcFormat, &rr, &gg, &bb, NULL, NULL);
|
||||
u += RGB2U(rr, gg, bb);
|
||||
v += RGB2V(rr, gg, bb);
|
||||
SplitColor(colorC, srcFormat, &rr, &gg, &bb, NULL, NULL);
|
||||
u += RGB2U(rr, gg, bb);
|
||||
v += RGB2V(rr, gg, bb);
|
||||
SplitColor(colorD, srcFormat, &rr, &gg, &bb, NULL, NULL);
|
||||
u += RGB2U(rr, gg, bb);
|
||||
v += RGB2V(rr, gg, bb);
|
||||
u /= 4;
|
||||
v /= 4;
|
||||
*b2++ = u;
|
||||
*b3++ = v;
|
||||
}
|
||||
@ -1426,14 +1441,33 @@ static INLINE pstatus_t general_RGBToAVC444YUVv2_BGRX(
|
||||
const BYTE b = *src++;
|
||||
const BYTE g = *src++;
|
||||
const BYTE r = *src++;
|
||||
src++;
|
||||
const BYTE a = *src++;
|
||||
const BYTE* srcB = src;
|
||||
const BYTE* srcC = src - 4 + srcStep;
|
||||
const BYTE* srcD = src + srcStep;
|
||||
BYTE rr, gg, bb;
|
||||
UINT16 u = RGB2U(r, g, b);
|
||||
UINT16 v = RGB2V(r, g, b);
|
||||
bb = *srcB++;
|
||||
gg = *srcB++;
|
||||
rr = *srcB++;
|
||||
u += RGB2U(rr, gg, bb);
|
||||
v += RGB2V(rr, gg, bb);
|
||||
bb = *srcC++;
|
||||
gg = *srcC++;
|
||||
rr = *srcC++;
|
||||
u += RGB2U(rr, gg, bb);
|
||||
v += RGB2V(rr, gg, bb);
|
||||
bb = *srcD++;
|
||||
gg = *srcD++;
|
||||
rr = *srcD++;
|
||||
u += RGB2U(rr, gg, bb);
|
||||
v += RGB2V(rr, gg, bb);
|
||||
u /= 4;
|
||||
v /= 4;
|
||||
src += 4;
|
||||
{
|
||||
const BYTE u = RGB2U(r, g, b);
|
||||
const BYTE v = RGB2V(r, g, b);
|
||||
*b2++ = u;
|
||||
*b3++ = v;
|
||||
}
|
||||
*b2++ = u;
|
||||
*b3++ = v;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -3,6 +3,8 @@
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "prim_test.h"
|
||||
|
||||
#include <winpr/wlog.h>
|
||||
@ -174,7 +176,7 @@ static BOOL TestPrimitiveYUVCombine(primitives_t* prims, prim_size_t roi)
|
||||
UINT32 lumaStride[3];
|
||||
UINT32 chromaStride[3];
|
||||
UINT32 yuvStride[3];
|
||||
size_t padding = 10000;
|
||||
const size_t padding = 10000;
|
||||
RECTANGLE_16 rect;
|
||||
PROFILER_DEFINE(yuvCombine);
|
||||
PROFILER_DEFINE(yuvSplit);
|
||||
@ -577,6 +579,299 @@ fail:
|
||||
return rc;
|
||||
}
|
||||
|
||||
static BOOL allocate_yuv420(BYTE** planes, UINT32 width, UINT32 height, UINT32 padding)
|
||||
{
|
||||
const size_t size = width * height;
|
||||
const size_t uvwidth = (width + 1) / 2;
|
||||
const size_t uvsize = (height + 1) / 2 * uvwidth;
|
||||
|
||||
if (!(planes[0] = set_padding(size, padding)))
|
||||
goto fail;
|
||||
|
||||
if (!(planes[1] = set_padding(uvsize, padding)))
|
||||
goto fail;
|
||||
|
||||
if (!(planes[2] = set_padding(uvsize, padding)))
|
||||
goto fail;
|
||||
|
||||
return TRUE;
|
||||
fail:
|
||||
free_padding(planes[0], padding);
|
||||
free_padding(planes[1], padding);
|
||||
free_padding(planes[2], padding);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static void free_yuv420(BYTE** planes, UINT32 padding)
|
||||
{
|
||||
if (!planes)
|
||||
return;
|
||||
|
||||
free_padding(planes[0], padding);
|
||||
free_padding(planes[1], padding);
|
||||
free_padding(planes[2], padding);
|
||||
planes[0] = NULL;
|
||||
planes[1] = NULL;
|
||||
planes[2] = NULL;
|
||||
}
|
||||
static BOOL check_yuv420(BYTE** planes, UINT32 width, UINT32 height, UINT32 padding)
|
||||
{
|
||||
const size_t size = width * height;
|
||||
const size_t uvwidth = (width + 1) / 2;
|
||||
const size_t uvsize = (height + 1) / 2 * uvwidth;
|
||||
const BOOL yOk = check_padding(planes[0], size, padding, "Y");
|
||||
const BOOL uOk = check_padding(planes[1], uvsize, padding, "U");
|
||||
const BOOL vOk = check_padding(planes[2], uvsize, padding, "V");
|
||||
return (yOk && uOk && vOk);
|
||||
}
|
||||
|
||||
static BOOL check_for_mismatches(const BYTE* planeA, const BYTE* planeB, UINT32 size)
|
||||
{
|
||||
BOOL rc = FALSE;
|
||||
UINT32 x;
|
||||
|
||||
for (x = 0; x < size; x++)
|
||||
{
|
||||
const BYTE a = planeA[x];
|
||||
const BYTE b = planeB[x];
|
||||
|
||||
if (fabsf(a - b) > 2.0f)
|
||||
{
|
||||
rc = TRUE;
|
||||
fprintf(stderr, "[%08x] %02x != %02x\n",
|
||||
x, a, b);
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static BOOL compare_yuv420(BYTE** planesA, BYTE** planesB, UINT32 width, UINT32 height,
|
||||
UINT32 padding)
|
||||
{
|
||||
BOOL rc = TRUE;
|
||||
const size_t size = width * height;
|
||||
const size_t uvwidth = (width + 1) / 2;
|
||||
const size_t uvsize = (height + 1) / 2 * uvwidth;
|
||||
|
||||
if (check_for_mismatches(planesA[0], planesB[0], size))
|
||||
{
|
||||
fprintf(stderr, "Mismatch in Y planes!");
|
||||
rc = FALSE;
|
||||
}
|
||||
|
||||
if (check_for_mismatches(planesA[1], planesB[1], uvsize))
|
||||
{
|
||||
fprintf(stderr, "Mismatch in U planes!");
|
||||
rc = FALSE;
|
||||
}
|
||||
|
||||
if (check_for_mismatches(planesA[2], planesB[2], uvsize))
|
||||
{
|
||||
fprintf(stderr, "Mismatch in V planes!");
|
||||
rc = FALSE;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static BOOL TestPrimitiveRgbToLumaChroma(primitives_t* prims, prim_size_t roi, UINT32 version)
|
||||
{
|
||||
BOOL rc = FALSE;
|
||||
UINT32 x, y, cnt;
|
||||
UINT32 awidth, aheight;
|
||||
BYTE* luma[3] = {0};
|
||||
BYTE* chroma[3] = {0};
|
||||
BYTE* lumaGeneric[3] = {0};
|
||||
BYTE* chromaGeneric[3] = {0};
|
||||
UINT32 yuv_step[3];
|
||||
BYTE* rgb = NULL;
|
||||
size_t size;
|
||||
size_t uvsize, uvwidth;
|
||||
const size_t padding = 0x1000;
|
||||
UINT32 stride;
|
||||
__RGBToAVC444YUV_t fkt, gen;
|
||||
const UINT32 formats[] =
|
||||
{
|
||||
PIXEL_FORMAT_XRGB32,
|
||||
PIXEL_FORMAT_XBGR32,
|
||||
PIXEL_FORMAT_ARGB32,
|
||||
PIXEL_FORMAT_ABGR32,
|
||||
PIXEL_FORMAT_RGBA32,
|
||||
PIXEL_FORMAT_RGBX32,
|
||||
PIXEL_FORMAT_BGRA32,
|
||||
PIXEL_FORMAT_BGRX32
|
||||
};
|
||||
PROFILER_DEFINE(rgbToYUV444);
|
||||
PROFILER_DEFINE(rgbToYUV444opt);
|
||||
/* Buffers need to be 16x16 aligned. */
|
||||
awidth = roi.width;
|
||||
|
||||
if (awidth % 16 != 0)
|
||||
awidth += 16 - roi.width % 16;
|
||||
|
||||
aheight = roi.height;
|
||||
|
||||
if (aheight % 16 != 0)
|
||||
aheight += 16 - roi.height % 16;
|
||||
|
||||
stride = awidth * sizeof(UINT32);
|
||||
size = awidth * aheight;
|
||||
uvwidth = (awidth + 1) / 2;
|
||||
uvsize = (aheight + 1) / 2 * uvwidth;
|
||||
|
||||
if (!prims || !generic)
|
||||
return FALSE;
|
||||
|
||||
switch (version)
|
||||
{
|
||||
case 1:
|
||||
fkt = prims->RGBToAVC444YUV;
|
||||
gen = generic->RGBToAVC444YUV;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
fkt = prims->RGBToAVC444YUVv2;
|
||||
gen = generic->RGBToAVC444YUVv2;
|
||||
break;
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (!fkt || !gen)
|
||||
return FALSE;
|
||||
|
||||
fprintf(stderr, "Running AVC444 on frame size %"PRIu32"x%"PRIu32"\n",
|
||||
roi.width, roi.height);
|
||||
|
||||
/* Test RGB to YUV444 conversion and vice versa */
|
||||
if (!(rgb = set_padding(size * sizeof(UINT32), padding)))
|
||||
goto fail;
|
||||
|
||||
if (!allocate_yuv420(luma, awidth, aheight, padding))
|
||||
goto fail;
|
||||
|
||||
if (!allocate_yuv420(chroma, awidth, aheight, padding))
|
||||
goto fail;
|
||||
|
||||
if (!allocate_yuv420(lumaGeneric, awidth, aheight, padding))
|
||||
goto fail;
|
||||
|
||||
if (!allocate_yuv420(chromaGeneric, awidth, aheight, padding))
|
||||
goto fail;
|
||||
|
||||
for (y = 0; y < roi.height; y++)
|
||||
{
|
||||
BYTE* line = &rgb[y * stride];
|
||||
|
||||
for (x = 0; x < roi.width; x++)
|
||||
{
|
||||
#if 1
|
||||
line[x * 4 + 0] = rand();
|
||||
line[x * 4 + 1] = rand();
|
||||
line[x * 4 + 2] = rand();
|
||||
line[x * 4 + 3] = rand();
|
||||
#else
|
||||
line[x * 4 + 0] = (y * roi.width + x) * 16 + 5;
|
||||
line[x * 4 + 1] = (y * roi.width + x) * 16 + 7;
|
||||
line[x * 4 + 2] = (y * roi.width + x) * 16 + 11;
|
||||
line[x * 4 + 3] = (y * roi.width + x) * 16 + 0;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
yuv_step[0] = awidth;
|
||||
yuv_step[1] = uvwidth;
|
||||
yuv_step[2] = uvwidth;
|
||||
|
||||
for (x = 0; x < sizeof(formats) / sizeof(formats[0]); x++)
|
||||
{
|
||||
pstatus_t rc;
|
||||
const UINT32 DstFormat = formats[x];
|
||||
printf("Testing destination color format %s\n", FreeRDPGetColorFormatName(DstFormat));
|
||||
PROFILER_CREATE(rgbToYUV444, "RGBToYUV444-generic");
|
||||
PROFILER_CREATE(rgbToYUV444opt, "RGBToYUV444-optimized");
|
||||
|
||||
for (cnt = 0; cnt < 10; cnt++)
|
||||
{
|
||||
PROFILER_ENTER(rgbToYUV444opt);
|
||||
rc = fkt(rgb, DstFormat, stride, luma, yuv_step, chroma, yuv_step, &roi);
|
||||
PROFILER_EXIT(rgbToYUV444opt);
|
||||
|
||||
if (rc != PRIMITIVES_SUCCESS)
|
||||
goto loop_fail;
|
||||
}
|
||||
|
||||
PROFILER_PRINT_HEADER;
|
||||
PROFILER_PRINT(rgbToYUV444opt);
|
||||
PROFILER_PRINT_FOOTER;
|
||||
|
||||
if (!check_padding(rgb, size * sizeof(UINT32), padding, "rgb"))
|
||||
{
|
||||
rc = -1;
|
||||
goto loop_fail;
|
||||
}
|
||||
|
||||
if (!check_yuv420(luma, awidth, aheight, padding) ||
|
||||
!check_yuv420(chroma, awidth, aheight, padding))
|
||||
{
|
||||
rc = -1;
|
||||
goto loop_fail;
|
||||
}
|
||||
|
||||
for (cnt = 0; cnt < 10; cnt++)
|
||||
{
|
||||
PROFILER_ENTER(rgbToYUV444);
|
||||
rc = gen(rgb, DstFormat, stride, lumaGeneric, yuv_step, chromaGeneric, yuv_step, &roi);
|
||||
PROFILER_EXIT(rgbToYUV444);
|
||||
|
||||
if (rc != PRIMITIVES_SUCCESS)
|
||||
goto loop_fail;
|
||||
}
|
||||
|
||||
PROFILER_PRINT_HEADER;
|
||||
PROFILER_PRINT(rgbToYUV444);
|
||||
PROFILER_PRINT_FOOTER;
|
||||
|
||||
if (!check_padding(rgb, size * sizeof(UINT32), padding, "rgb"))
|
||||
{
|
||||
rc = -1;
|
||||
goto loop_fail;
|
||||
}
|
||||
|
||||
if (!check_yuv420(lumaGeneric, awidth, aheight, padding) ||
|
||||
!check_yuv420(chromaGeneric, awidth, aheight, padding))
|
||||
{
|
||||
rc = -1;
|
||||
goto loop_fail;
|
||||
}
|
||||
|
||||
if (!compare_yuv420(luma, lumaGeneric, awidth, aheight, padding) ||
|
||||
!compare_yuv420(chroma, chromaGeneric, awidth, aheight, padding))
|
||||
{
|
||||
rc = -1;
|
||||
goto loop_fail;
|
||||
}
|
||||
|
||||
loop_fail:
|
||||
PROFILER_FREE(rgbToYUV444);
|
||||
PROFILER_FREE(rgbToYUV444opt);
|
||||
|
||||
if (rc != PRIMITIVES_SUCCESS)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
rc = TRUE;
|
||||
fail:
|
||||
free_padding(rgb, padding);
|
||||
free_yuv420(luma, padding);
|
||||
free_yuv420(chroma, padding);
|
||||
free_yuv420(lumaGeneric, padding);
|
||||
free_yuv420(chromaGeneric, padding);
|
||||
return rc;
|
||||
}
|
||||
|
||||
int TestPrimitivesYUV(int argc, char* argv[])
|
||||
{
|
||||
BOOL large = (argc > 1);
|
||||
@ -584,7 +879,7 @@ int TestPrimitivesYUV(int argc, char* argv[])
|
||||
int rc = -1;
|
||||
prim_test_setup(FALSE);
|
||||
primitives_t* prims = primitives_get();
|
||||
primitives_t* generic = primitives_get_generic();
|
||||
WLog_Init();
|
||||
|
||||
for (x = 0; x < 10; x++)
|
||||
{
|
||||
@ -592,8 +887,13 @@ int TestPrimitivesYUV(int argc, char* argv[])
|
||||
|
||||
if (argc > 1)
|
||||
{
|
||||
roi.width = 1920;
|
||||
roi.height = 1080;
|
||||
int rc = sscanf(argv[1], "%l"PRIu32"x%"PRIu32, &roi.width, &roi.height);
|
||||
|
||||
if (rc != 2)
|
||||
{
|
||||
roi.width = 1920;
|
||||
roi.height = 1080;
|
||||
}
|
||||
}
|
||||
else
|
||||
get_size(large, &roi.width, &roi.height);
|
||||
@ -607,7 +907,6 @@ int TestPrimitivesYUV(int argc, char* argv[])
|
||||
}
|
||||
|
||||
printf("---------------------- END --------------------------\n");
|
||||
#if 1
|
||||
printf("------------------- OPTIMIZED -----------------------\n");
|
||||
|
||||
if (!TestPrimitiveYUV(prims, roi, TRUE))
|
||||
@ -617,7 +916,6 @@ int TestPrimitivesYUV(int argc, char* argv[])
|
||||
}
|
||||
|
||||
printf("---------------------- END --------------------------\n");
|
||||
#endif
|
||||
printf("-------------------- GENERIC ------------------------\n");
|
||||
|
||||
if (!TestPrimitiveYUV(generic, roi, FALSE))
|
||||
@ -653,11 +951,30 @@ int TestPrimitivesYUV(int argc, char* argv[])
|
||||
goto end;
|
||||
}
|
||||
|
||||
printf("---------------------- END --------------------------\n");
|
||||
printf("------------------- OPTIMIZED -----------------------\n");
|
||||
|
||||
if (!TestPrimitiveRgbToLumaChroma(prims, roi, 1))
|
||||
{
|
||||
printf("TestPrimitiveRgbToLumaChroma failed.\n");
|
||||
goto end;
|
||||
}
|
||||
|
||||
printf("---------------------- END --------------------------\n");
|
||||
printf("-------------------- GENERIC ------------------------\n");
|
||||
|
||||
if (!TestPrimitiveRgbToLumaChroma(prims, roi, 2))
|
||||
{
|
||||
printf("TestPrimitiveYUVCombine failed.\n");
|
||||
goto end;
|
||||
}
|
||||
|
||||
printf("---------------------- END --------------------------\n");
|
||||
}
|
||||
|
||||
rc = 0;
|
||||
end:
|
||||
WLog_Uninit();
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user