Added NEON and SSSE3 YCbCr conversion functions.

This commit is contained in:
Armin Novak 2017-02-13 16:01:49 +01:00
parent b1e3bab8ef
commit a791ab0b17
4 changed files with 1221 additions and 341 deletions

View File

@ -302,9 +302,9 @@ static INLINE void writeScanlineRGB(BYTE* dst, DWORD formatSize, UINT32 DstForma
for (x = 0; x < width; x++)
{
const BYTE R = *r++;
const BYTE G = *g++;
const BYTE B = *b++;
const BYTE R = CLIP(*r++);
const BYTE G = CLIP(*g++);
const BYTE B = CLIP(*b++);
*dst++ = R;
*dst++ = G;
*dst++ = B;
@ -318,9 +318,9 @@ static INLINE void writeScanlineBGR(BYTE* dst, DWORD formatSize, UINT32 DstForma
for (x = 0; x < width; x++)
{
const BYTE R = *r++;
const BYTE G = *g++;
const BYTE B = *b++;
const BYTE R = CLIP(*r++);
const BYTE G = CLIP(*g++);
const BYTE B = CLIP(*b++);
*dst++ = B;
*dst++ = G;
*dst++ = R;
@ -334,9 +334,9 @@ static INLINE void writeScanlineBGRX(BYTE* dst, DWORD formatSize, UINT32 DstForm
for (x = 0; x < width; x++)
{
const BYTE R = *r++;
const BYTE G = *g++;
const BYTE B = *b++;
const BYTE R = CLIP(*r++);
const BYTE G = CLIP(*g++);
const BYTE B = CLIP(*b++);
*dst++ = B;
*dst++ = G;
*dst++ = R;
@ -351,9 +351,9 @@ static INLINE void writeScanlineRGBX(BYTE* dst, DWORD formatSize, UINT32 DstForm
for (x = 0; x < width; x++)
{
const BYTE R = *r++;
const BYTE G = *g++;
const BYTE B = *b++;
const BYTE R = CLIP(*r++);
const BYTE G = CLIP(*g++);
const BYTE B = CLIP(*b++);
*dst++ = R;
*dst++ = G;
*dst++ = B;
@ -368,9 +368,9 @@ static INLINE void writeScanlineXBGR(BYTE* dst, DWORD formatSize, UINT32 DstForm
for (x = 0; x < width; x++)
{
const BYTE R = *r++;
const BYTE G = *g++;
const BYTE B = *b++;
const BYTE R = CLIP(*r++);
const BYTE G = CLIP(*g++);
const BYTE B = CLIP(*b++);
*dst++ = 0xFF;
*dst++ = B;
*dst++ = G;
@ -385,9 +385,9 @@ static INLINE void writeScanlineXRGB(BYTE* dst, DWORD formatSize, UINT32 DstForm
for (x = 0; x < width; x++)
{
const BYTE R = *r++;
const BYTE G = *g++;
const BYTE B = *b++;
const BYTE R = CLIP(*r++);
const BYTE G = CLIP(*g++);
const BYTE B = CLIP(*b++);
*dst++ = 0xFF;
*dst++ = R;
*dst++ = G;

File diff suppressed because it is too large Load Diff

View File

@ -17,58 +17,102 @@
#endif
#include <winpr/sysinfo.h>
#include <freerdp/utils/profiler.h>
#include "prim_test.h"
static const int RGB_TRIAL_ITERATIONS = 1000;
static const int YCBCR_TRIAL_ITERATIONS = 1000;
static const float TEST_TIME = 4.0;
/* ------------------------------------------------------------------------- */
static BOOL test_RGBToRGB_16s8u_P3AC4R_func(void)
static BOOL test_RGBToRGB_16s8u_P3AC4R_func(prim_size_t roi, DWORD DstFormat)
{
INT16 ALIGN(r[4096]), ALIGN(g[4096]), ALIGN(b[4096]);
UINT32 ALIGN(out1[4096]);
UINT32 ALIGN(out2[4096]);
INT16* r;
INT16* g;
INT16* b;
BYTE* out1;
BYTE* out2;
int i;
BOOL failed = FALSE;
INT16* ptrs[3];
prim_size_t roi = { 64, 64 };
const INT16* ptrs[3];
const UINT32 rgbStride = roi.width * 2;
const UINT32 dstStride = roi.width * 4;
PROFILER_DEFINE(genericProf);
PROFILER_DEFINE(optProf);
PROFILER_CREATE(genericProf, "RGBToRGB_16s8u_P3AC4R-GENERIC");
PROFILER_CREATE(optProf, "RGBToRGB_16s8u_P3AC4R-OPTIMIZED");
r = _aligned_malloc(rgbStride * roi.height, 16);
g = _aligned_malloc(rgbStride * roi.height, 16);
b = _aligned_malloc(rgbStride * roi.height, 16);
out1 = _aligned_malloc(dstStride * roi.height, 16);
out2 = _aligned_malloc(dstStride * roi.height, 16);
winpr_RAND((BYTE*)r, sizeof(r));
winpr_RAND((BYTE*)g, sizeof(g));
winpr_RAND((BYTE*)b, sizeof(b));
if (!r || !g || !b || !out1 || !out2)
goto fail;
/* clear upper bytes */
for (i = 0; i < 4096; ++i)
#if 0
{
r[i] &= 0x00FFU;
g[i] &= 0x00FFU;
b[i] &= 0x00FFU;
}
UINT32 x, y;
for (y = 0; y < roi.height; y++)
{
for (x = 0; x < roi.width; x++)
{
r[y * roi.width + x] = 0x01;
g[y * roi.width + x] = 0x02;
b[y * roi.width + x] = 0x04;
}
}
}
#else
winpr_RAND((BYTE*)r, rgbStride * roi.height);
winpr_RAND((BYTE*)g, rgbStride * roi.height);
winpr_RAND((BYTE*)b, rgbStride * roi.height);
#endif
ptrs[0] = r;
ptrs[1] = g;
ptrs[2] = b;
if (generic->RGBToRGB_16s8u_P3AC4R((const INT16**) ptrs, 64 * 2,
(BYTE*) out1, 64 * 4, PIXEL_FORMAT_RGBA32,
&roi) != PRIMITIVES_SUCCESS)
return FALSE;
PROFILER_ENTER(genericProf);
if (optimized->RGBToRGB_16s8u_P3AC4R((const INT16**) ptrs, 64 * 2,
(BYTE*) out2, 64 * 4, PIXEL_FORMAT_RGBA32,
&roi) != PRIMITIVES_SUCCESS)
return FALSE;
if (generic->RGBToRGB_16s8u_P3AC4R(ptrs, rgbStride,
out1, dstStride, DstFormat,
&roi) != PRIMITIVES_SUCCESS)
goto fail;
for (i = 0; i < 4096; ++i)
PROFILER_EXIT(genericProf);
PROFILER_ENTER(optProf);
if (optimized->RGBToRGB_16s8u_P3AC4R(ptrs, rgbStride,
out2, dstStride, DstFormat,
&roi) != PRIMITIVES_SUCCESS)
goto fail;
PROFILER_EXIT(optProf);
if (memcmp(out1, out2, dstStride * roi.height) != 0)
{
if (out1[i] != out2[i])
for (i = 0; i < roi.width * roi.height; ++i)
{
printf("RGBToRGB-SSE FAIL: out1[%d]=0x%08"PRIx32" out2[%d]=0x%08"PRIx32"\n",
i, out1[i], i, out2[i]);
failed = TRUE;
const UINT32 o1 = ReadColor(out1 + 4 * i, DstFormat);
const UINT32 o2 = ReadColor(out2 + 4 * i, DstFormat);
if (o1 != o2)
{
printf("RGBToRGB_16s8u_P3AC4R FAIL: out1[%d]=0x%08"PRIx32" out2[%d]=0x%08"PRIx32"\n",
i, out1[i], i, out2[i]);
failed = TRUE;
}
}
}
printf("Results for %lux%lu [%s]", roi.width, roi.height, GetColorFormatName(DstFormat));
PROFILER_PRINT(genericProf);
PROFILER_PRINT(optProf);
fail:
PROFILER_FREE(genericProf);
PROFILER_FREE(optProf);
_aligned_free(r);
_aligned_free(g);
_aligned_free(b);
_aligned_free(out1);
_aligned_free(out2);
return !failed;
}
@ -76,11 +120,10 @@ static BOOL test_RGBToRGB_16s8u_P3AC4R_func(void)
static BOOL test_RGBToRGB_16s8u_P3AC4R_speed(void)
{
const prim_size_t roi64x64 = { 64, 64 };
INT16 ALIGN(r[4096+1]), ALIGN(g[4096+1]), ALIGN(b[4096+1]);
UINT32 ALIGN(dst[4096+1]);
INT16 ALIGN(r[4096 + 1]), ALIGN(g[4096 + 1]), ALIGN(b[4096 + 1]);
UINT32 ALIGN(dst[4096 + 1]);
int i;
INT16* ptrs[3];
winpr_RAND((BYTE*)r, sizeof(r));
winpr_RAND((BYTE*)g, sizeof(g));
winpr_RAND((BYTE*)b, sizeof(b));
@ -93,20 +136,20 @@ static BOOL test_RGBToRGB_16s8u_P3AC4R_speed(void)
b[i] &= 0x00FFU;
}
ptrs[0] = r+1;
ptrs[1] = g+1;
ptrs[2] = b+1;
ptrs[0] = r + 1;
ptrs[1] = g + 1;
ptrs[2] = b + 1;
if (!speed_test("RGBToRGB_16s8u_P3AC4R", "aligned", g_Iterations,
(speed_test_fkt)generic->RGBToRGB_16s8u_P3AC4R,
(speed_test_fkt)optimized->RGBToRGB_16s8u_P3AC4R,
(const INT16**) ptrs, 64 * 2, (BYTE*) dst, 64 * 4, &roi64x64))
(speed_test_fkt)generic->RGBToRGB_16s8u_P3AC4R,
(speed_test_fkt)optimized->RGBToRGB_16s8u_P3AC4R,
(const INT16**) ptrs, 64 * 2, (BYTE*) dst, 64 * 4, &roi64x64))
return FALSE;
if (!speed_test("RGBToRGB_16s8u_P3AC4R", "unaligned", g_Iterations,
(speed_test_fkt)generic->RGBToRGB_16s8u_P3AC4R,
(speed_test_fkt)optimized->RGBToRGB_16s8u_P3AC4R,
(const INT16**) ptrs, 64 * 2, ((BYTE*) dst)+1, 64 * 4, &roi64x64))
(speed_test_fkt)generic->RGBToRGB_16s8u_P3AC4R,
(speed_test_fkt)optimized->RGBToRGB_16s8u_P3AC4R,
(const INT16**) ptrs, 64 * 2, ((BYTE*) dst) + 1, 64 * 4, &roi64x64))
return FALSE;
return TRUE;
@ -124,7 +167,6 @@ static BOOL test_yCbCrToRGB_16s16s_P3P3_func(void)
INT16* out1[3];
INT16* out2[3];
prim_size_t roi = { 64, 64 };
winpr_RAND((BYTE*)y, sizeof(y));
winpr_RAND((BYTE*)cb, sizeof(cb));
winpr_RAND((BYTE*)cr, sizeof(cr));
@ -152,12 +194,13 @@ static BOOL test_yCbCrToRGB_16s16s_P3P3_func(void)
out2[0] = r2;
out2[1] = g2;
out2[2] = b2;
status = generic->yCbCrToRGB_16s16s_P3P3(in, 64 * 2, out1, 64 * 2, &roi);
if (status != PRIMITIVES_SUCCESS)
return FALSE;
status = optimized->yCbCrToRGB_16s16s_P3P3(in, 64 * 2, out2, 64 * 2, &roi);
if (status != PRIMITIVES_SUCCESS)
return FALSE;
@ -167,7 +210,8 @@ static BOOL test_yCbCrToRGB_16s16s_P3P3_func(void)
|| (ABS(g1[i] - g2[i]) > 1)
|| (ABS(b1[i] - b2[i]) > 1))
{
printf("YCbCrToRGB-SSE FAIL[%d]: %"PRId16",%"PRId16",%"PRId16" vs %"PRId16",%"PRId16",%"PRId16"\n", i,
printf("YCbCrToRGB-SSE FAIL[%d]: %"PRId16",%"PRId16",%"PRId16" vs %"PRId16",%"PRId16",%"PRId16"\n",
i,
r1[i], g1[i], b1[i], r2[i], g2[i], b2[i]);
return FALSE;
}
@ -185,7 +229,6 @@ static int test_yCbCrToRGB_16s16s_P3P3_speed(void)
int i;
const INT16* input[3];
INT16* output[3];
winpr_RAND((BYTE*)y, sizeof(y));
winpr_RAND((BYTE*)cb, sizeof(cb));
winpr_RAND((BYTE*)cr, sizeof(cr));
@ -206,9 +249,9 @@ static int test_yCbCrToRGB_16s16s_P3P3_speed(void)
output[2] = b;
if (!speed_test("yCbCrToRGB_16s16s_P3P3", "aligned", g_Iterations,
(speed_test_fkt)generic->yCbCrToRGB_16s16s_P3P3,
(speed_test_fkt)optimized->yCbCrToRGB_16s16s_P3P3,
input, 64 * 2, output, 64 * 2, &roi))
(speed_test_fkt)generic->yCbCrToRGB_16s16s_P3P3,
(speed_test_fkt)optimized->yCbCrToRGB_16s16s_P3P3,
input, 64 * 2, output, 64 * 2, &roi))
return FALSE;
return TRUE;
@ -216,24 +259,44 @@ static int test_yCbCrToRGB_16s16s_P3P3_speed(void)
int TestPrimitivesColors(int argc, char* argv[])
{
const DWORD formats[] =
{
PIXEL_FORMAT_ARGB32,
PIXEL_FORMAT_XRGB32,
PIXEL_FORMAT_ABGR32,
PIXEL_FORMAT_XBGR32,
PIXEL_FORMAT_RGBA32,
PIXEL_FORMAT_RGBX32,
PIXEL_FORMAT_BGRA32,
PIXEL_FORMAT_BGRX32
};
DWORD x;
prim_size_t roi = { 1920, 1080};
prim_test_setup(FALSE);
if (!test_RGBToRGB_16s8u_P3AC4R_func())
return 1;
if (g_TestPrimitivesPerformance)
for (x = 0; x < sizeof(formats) / sizeof(formats[0]); x++)
{
if (!test_RGBToRGB_16s8u_P3AC4R_speed())
if (!test_RGBToRGB_16s8u_P3AC4R_func(roi, formats[x]))
return 1;
}
if (!test_yCbCrToRGB_16s16s_P3P3_func())
return 1;
#if 0
if (g_TestPrimitivesPerformance)
{
if (!test_yCbCrToRGB_16s16s_P3P3_speed())
if (g_TestPrimitivesPerformance)
{
if (!test_RGBToRGB_16s8u_P3AC4R_speed())
return 1;
}
if (!test_yCbCrToRGB_16s16s_P3P3_func())
return 1;
if (g_TestPrimitivesPerformance)
{
if (!test_yCbCrToRGB_16s16s_P3P3_speed())
return 1;
}
#endif
}
return 0;

View File

@ -4,6 +4,7 @@
#include <winpr/print.h>
#include <freerdp/codec/color.h>
#include <winpr/wlog.h>
#include <freerdp/utils/profiler.h>
#ifdef HAVE_CONFIG_H
#include "config.h"
@ -11,7 +12,7 @@
#define TAG __FILE__
static INT16 TEST_Y_COMPONENT[4096] =
static const INT16 TEST_Y_COMPONENT[4096] =
{
-32, +16, +64, +272, -32, -16, +0, -16,
-32, -24, -16, -8, +0, -24, -48, -72,
@ -527,7 +528,7 @@ static INT16 TEST_Y_COMPONENT[4096] =
+8, -24, -56, -88, -120, -120, -120, -120
};
static INT16 TEST_CB_COMPONENT[4096] =
static const INT16 TEST_CB_COMPONENT[4096] =
{
+1728, +1730, +1732, +1734, +1736, +1738, +1740, +1742,
+1744, +1740, +1736, +1732, +1728, +1796, +1864, +1804,
@ -1043,7 +1044,7 @@ static INT16 TEST_CB_COMPONENT[4096] =
+2160, +2168, +2176, +2184, +2192, +2192, +2192, +2192
};
static INT16 TEST_CR_COMPONENT[4096] =
static const INT16 TEST_CR_COMPONENT[4096] =
{
-2112, -2114, -2116, -2118, -2120, -2122, -2124, -2126,
-2128, -2118, -2108, -2098, -2088, -2150, -2212, -2146,
@ -1563,7 +1564,7 @@ static INT16 TEST_CR_COMPONENT[4096] =
* 64x64 XRGB Image
*/
static UINT32 TEST_XRGB_IMAGE[4096] =
static const UINT32 TEST_XRGB_IMAGE[4096] =
{
0xFF229cdf, 0xFF249de0, 0xFF259fe2, 0xFF2ca5e8, 0xFF229cdf, 0xFF229ce0, 0xFF239de0, 0xFF229ce0,
0xFF229cdf, 0xFF229cdf, 0xFF239ce0, 0xFF249ce0, 0xFF249ce0, 0xFF219ce3, 0xFF1e9ce6, 0xFF209ae2,
@ -2154,211 +2155,136 @@ static int test_bmp_cmp_dump(const BYTE* actual, const BYTE* expected, int size,
return count;
}
static void test_fill_bitmap_channel(BYTE* data, int width, int height,
BYTE value, int nChannel)
static int test_PrimitivesYCbCr(const primitives_t* prims, UINT32 format, prim_size_t roi)
{
int x, y;
BYTE* pChannel;
pChannel = data + nChannel;
for (y = 0; y < height; y++)
{
for (x = 0; x < width; x++)
{
*pChannel = value;
pChannel += 4;
}
}
}
#define TEST_FP_TYPE float
static TEST_FP_TYPE TEST_YCbCrToRGB_01[4] = { 1.403f, 0.344f, 0.714f, 1.770f };
static TEST_FP_TYPE TEST_YCbCrToRGB_02[4] = { 1.402525f, 0.343730f, 0.714401f, 1.769905f };
static TEST_FP_TYPE TEST_YCbCrToRGB_03[4] = { 1.402524948120117L, 0.3437300026416779L, 0.7144010066986084L, 1.769904971122742L };
static INT16 TEST_YCbCr_01[3] = { +3443, -1863, +272 };
static BYTE TEST_RGB_01[3] = { 247, 249, 132 };
static INT16 TEST_YCbCr_02[3] = { +1086, +1584, -2268 };
static BYTE TEST_RGB_02[3] = { 62, 195, 249 };
static INT16 TEST_YCbCr_03[3] = { -576, +2002, -2179 };
static BYTE TEST_RGB_03[3] = { 15, 137, 221 };
int test_YCbCr_fp(TEST_FP_TYPE coeffs[4], INT16 YCbCr[3], BYTE RGB[3])
{
INT16 R, G, B;
TEST_FP_TYPE Y, Cb, Cr;
TEST_FP_TYPE fR, fG, fB;
TEST_FP_TYPE fR1, fR2;
Y = (TEST_FP_TYPE)(YCbCr[0] + 4096);
Cb = (TEST_FP_TYPE)(YCbCr[1]);
Cr = (TEST_FP_TYPE)(YCbCr[2]);
#if 1
fR1 = Cr * coeffs[0];
fR2 = fR1 + Y + 16.0f;
fR = ((Cr * coeffs[0]) + Y + 16.0f);
fG = (Y - (Cb * coeffs[1]) - (Cr * coeffs[2]) + 16.0f);
fB = ((Cb * coeffs[3]) + Y + 16.0f);
printf("fR: %f fG: %f fB: %f fY: %f\n", fR, fG, fB, Y);
R = (INT16) fR;
G = (INT16) fG;
B = (INT16) fB;
printf("mR: %d mG: %d mB: %d\n", (R - 16) % 32, (G - 16) % 32, (B - 16) % 32);
printf("iR: %"PRId16" iG: %"PRId16" iB: %"PRId16"\n", R, G, B);
R >>= 5;
G >>= 5;
B >>= 5;
printf("R5: %"PRId16" G5: %"PRId16" B5: %"PRId16"\n", R, G, B);
#else
R = ((INT16)(((Cr * coeffs[0]) + Y + 16.0f)) >> 5);
G = ((INT16)((Y - (Cb * coeffs[1]) - (Cr * coeffs[2]) + 16.0f)) >> 5);
B = ((INT16)(((Cb * coeffs[3]) + Y + 16.0f)) >> 5);
#endif
if (R < 0)
R = 0;
else if (R > 255)
R = 255;
if (G < 0)
G = 0;
else if (G > 255)
G = 255;
if (B < 0)
B = 0;
else if (B > 255)
B = 255;
printf("--------------------------------\n");
printf("R: A: %3"PRId16" E: %3"PRIu8" %s\n", R, RGB[0], (R == RGB[0]) ? "" : "***");
printf("G: A: %3"PRId16" E: %3"PRIu8" %s\n", G, RGB[1], (G == RGB[1]) ? "" : "***");
printf("B: A: %3"PRId16" E: %3"PRIu8" %s\n", B, RGB[2], (B == RGB[2]) ? "" : "***");
printf("Y: %+5"PRId16" Cb: %+5"PRId16" Cr: %+5"PRId16"\n", YCbCr[0], YCbCr[1], YCbCr[2]);
//printf("[0]: %20.20lf\n", coeffs[0]);
//printf("[1]: %20.20lf\n", coeffs[1]);
//printf("[2]: %20.20lf\n", coeffs[2]);
//printf("[3]: %20.20lf\n", coeffs[3]);
printf("--------------------------------\n\n");
return 0;
}
int test_YCbCr_pixels()
{
if (0)
{
test_YCbCr_fp(TEST_YCbCrToRGB_01, TEST_YCbCr_01, TEST_RGB_01);
test_YCbCr_fp(TEST_YCbCrToRGB_01, TEST_YCbCr_02, TEST_RGB_02);
test_YCbCr_fp(TEST_YCbCrToRGB_01, TEST_YCbCr_03, TEST_RGB_03);
}
if (1)
{
test_YCbCr_fp(TEST_YCbCrToRGB_02, TEST_YCbCr_01, TEST_RGB_01);
test_YCbCr_fp(TEST_YCbCrToRGB_02, TEST_YCbCr_02, TEST_RGB_02);
test_YCbCr_fp(TEST_YCbCrToRGB_02, TEST_YCbCr_03, TEST_RGB_03);
}
if (0)
{
test_YCbCr_fp(TEST_YCbCrToRGB_03, TEST_YCbCr_01, TEST_RGB_01);
test_YCbCr_fp(TEST_YCbCrToRGB_03, TEST_YCbCr_02, TEST_RGB_02);
test_YCbCr_fp(TEST_YCbCrToRGB_03, TEST_YCbCr_03, TEST_RGB_03);
}
return 0;
}
int TestPrimitivesYCbCr(int argc, char* argv[])
{
pstatus_t status = PRIMITIVES_SUCCESS;
int size;
pstatus_t status = -1;
int cnt[3];
float err[3];
BYTE* actual;
BYTE* expected;
int margin = 1;
INT16* pYCbCr[3];
const primitives_t* prims = primitives_get();
static const prim_size_t roi_64x64 = { 64, 64 };
const INT16* pYCbCr[3];
const UINT32 srcStride = roi.width * 2;
const UINT32 dstStride = roi.width * GetBytesPerPixel(format);
const UINT32 srcSize = srcStride * roi.height;
const UINT32 dstSize = dstStride * roi.height;
PROFILER_DEFINE(prof);
//return test_YCbCr_pixels();
expected = (BYTE*) TEST_XRGB_IMAGE;
size = 64 * 64 * 4;
actual = _aligned_malloc(size, 16);
actual = _aligned_malloc(dstSize, 16);
PROFILER_CREATE(prof, "YCbCr");
if (!actual)
return 1;
goto fail;
ZeroMemory(actual, size);
ZeroMemory(actual, dstSize);
pYCbCr[0] = TEST_Y_COMPONENT;
pYCbCr[1] = TEST_CB_COMPONENT;
pYCbCr[2] = TEST_CR_COMPONENT;
if (1)
{
status = prims->yCbCrToRGB_16s8u_P3AC4R((const INT16**) pYCbCr, 64 * 2,
actual, 64 * 4, PIXEL_FORMAT_BGRA32,
&roi_64x64);
PROFILER_ENTER(prof);
status = prims->yCbCrToRGB_16s8u_P3AC4R((const INT16**) pYCbCr, srcStride,
actual, dstStride, format,
&roi);
PROFILER_EXIT(prof);
}
else
{
INT16* pSrcDst[3];
pSrcDst[0] = _aligned_malloc(4096 * 2, 16);
pSrcDst[1] = _aligned_malloc(4096 * 2, 16);
pSrcDst[2] = _aligned_malloc(4096 * 2, 16);
CopyMemory(pSrcDst[0], pYCbCr[0], 4096 * 2);
CopyMemory(pSrcDst[1], pYCbCr[1], 4096 * 2);
CopyMemory(pSrcDst[2], pYCbCr[2], 4096 * 2);
prims->yCbCrToRGB_16s16s_P3P3((const INT16**) pSrcDst, 64 * 2,
pSrcDst, 64 * 2, &roi_64x64);
prims->RGBToRGB_16s8u_P3AC4R((const INT16**) pSrcDst, 64 * 2,
actual, 64 * 4, PIXEL_FORMAT_BGRA32, &roi_64x64);
pSrcDst[0] = _aligned_malloc(srcSize, 16);
pSrcDst[1] = _aligned_malloc(srcSize, 16);
pSrcDst[2] = _aligned_malloc(srcSize, 16);
CopyMemory(pSrcDst[0], pYCbCr[0], srcSize);
CopyMemory(pSrcDst[1], pYCbCr[1], srcSize);
CopyMemory(pSrcDst[2], pYCbCr[2], srcSize);
PROFILER_ENTER(prof);
status = prims->yCbCrToRGB_16s16s_P3P3((const INT16**) pSrcDst, srcStride,
pSrcDst, srcStride, &roi);
if (status != PRIMITIVES_SUCCESS)
goto fail2;
status = prims->RGBToRGB_16s8u_P3AC4R((const INT16**) pSrcDst, srcStride,
actual, dstStride, format, &roi);
PROFILER_EXIT(prof);
fail2:
_aligned_free(pSrcDst[0]);
_aligned_free(pSrcDst[1]);
_aligned_free(pSrcDst[2]);
if (status != PRIMITIVES_SUCCESS)
goto fail;
}
if (0)
{
test_fill_bitmap_channel(actual, 64, 64, 0, 2); /* red */
test_fill_bitmap_channel(expected, 64, 64, 0, 2); /* red */
}
if (0)
{
test_fill_bitmap_channel(actual, 64, 64, 0, 1); /* green */
test_fill_bitmap_channel(expected, 64, 64, 0, 1); /* green */
}
if (0)
{
test_fill_bitmap_channel(actual, 64, 64, 0, 0); /* blue */
test_fill_bitmap_channel(expected, 64, 64, 0, 0); /* blue */
}
cnt[2] = test_bmp_cmp_count(actual, expected, size, 2, margin); /* red */
err[2] = ((float) cnt[2]) / ((float) size / 4) * 100.0f;
cnt[1] = test_bmp_cmp_count(actual, expected, size, 1, margin); /* green */
err[1] = ((float) cnt[1]) / ((float) size / 4) * 100.0f;
cnt[0] = test_bmp_cmp_count(actual, expected, size, 0, margin); /* blue */
err[0] = ((float) cnt[0]) / ((float) size / 4) * 100.0f;
cnt[2] = test_bmp_cmp_count(actual, expected, dstSize, 2, margin); /* red */
err[2] = ((float) cnt[2]) / ((float) dstSize / 4) * 100.0f;
cnt[1] = test_bmp_cmp_count(actual, expected, dstSize, 1, margin); /* green */
err[1] = ((float) cnt[1]) / ((float) dstSize / 4) * 100.0f;
cnt[0] = test_bmp_cmp_count(actual, expected, dstSize, 0, margin); /* blue */
err[0] = ((float) cnt[0]) / ((float) dstSize / 4) * 100.0f;
if (cnt[0] || cnt[1] || cnt[2])
{
printf("Red Error Dump:\n");
test_bmp_cmp_dump(actual, expected, size, 2, margin); /* red */
test_bmp_cmp_dump(actual, expected, dstSize, 2, margin); /* red */
printf("Green Error Dump:\n");
test_bmp_cmp_dump(actual, expected, size, 1, margin); /* green */
test_bmp_cmp_dump(actual, expected, dstSize, 1, margin); /* green */
printf("Blue Error Dump:\n");
test_bmp_cmp_dump(actual, expected, size, 0, margin); /* blue */
test_bmp_cmp_dump(actual, expected, dstSize, 0, margin); /* blue */
printf("R: diff: %d (%f%%)\n", cnt[2], err[2]);
printf("G: diff: %d (%f%%)\n", cnt[1], err[1]);
printf("B: diff: %d (%f%%)\n", cnt[0], err[0]);
}
PROFILER_PRINT(prof);
fail:
_aligned_free(actual);
return (status == PRIMITIVES_SUCCESS) ? 0 : 1;
PROFILER_FREE(prof);
return status;
}
int TestPrimitivesYCbCr(int argc, char* argv[])
{
const UINT32 formats[] =
{
PIXEL_FORMAT_XRGB32,
PIXEL_FORMAT_XBGR32,
PIXEL_FORMAT_ARGB32,
PIXEL_FORMAT_ABGR32,
PIXEL_FORMAT_RGBA32,
PIXEL_FORMAT_RGBX32,
PIXEL_FORMAT_BGRA32,
PIXEL_FORMAT_BGRX32
};
const primitives_t* prims = primitives_get();
const primitives_t* generics = primitives_get_generic();
prim_size_t roi = { 64, 64 };
UINT32 x;
for (x = 0; x < sizeof(formats) / sizeof(formats[0]); x++)
{
int rc;
printf("----------------------- GENERIC %s -------------------\n",
GetColorFormatName(formats[x]));
rc = test_PrimitivesYCbCr(generics, formats[x], roi);
if (rc != PRIMITIVES_SUCCESS)
return rc;
printf("------------------------- END %s ----------------------\n",
GetColorFormatName(formats[x]));
printf("---------------------- OPTIMIZED %s -------------------\n",
GetColorFormatName(formats[x]));
rc = test_PrimitivesYCbCr(prims, formats[x], roi);
if (rc != PRIMITIVES_SUCCESS)
return rc;
printf("------------------------- END %s ----------------------\n",
GetColorFormatName(formats[x]));
}
return 0;
}