Using faster transformation for RFX decoder.

This commit is contained in:
Armin Novak 2017-01-24 11:37:24 +01:00
parent 0106405fff
commit b04b8303e6
3 changed files with 34 additions and 44 deletions

View File

@ -86,6 +86,7 @@ static void rfx_decode_component(RFX_CONTEXT* context,
BOOL rfx_decode_rgb(RFX_CONTEXT* context, RFX_TILE* tile, BYTE* rgb_buffer,
int stride)
{
BOOL rc = TRUE;
BYTE* pBuffer;
INT16* pSrcDst[3];
UINT32* y_quants, *cb_quants, *cr_quants;
@ -109,14 +110,13 @@ BOOL rfx_decode_rgb(RFX_CONTEXT* context, RFX_TILE* tile, BYTE* rgb_buffer,
rfx_decode_component(context, cr_quants, tile->CrData, tile->CrLen,
pSrcDst[2]); /* CrData */
PROFILER_ENTER(context->priv->prof_rfx_ycbcr_to_rgb);
prims->yCbCrToRGB_16s16s_P3P3((const INT16**) pSrcDst, 64 * sizeof(INT16),
pSrcDst, 64 * sizeof(INT16), &roi_64x64);
if (prims->yCbCrToRGB_16s8u_P3AC4R(pSrcDst, 64 * sizeof(INT16),
rgb_buffer, stride, context->pixel_format, &roi_64x64) != PRIMITIVES_SUCCESS)
rc = FALSE;
PROFILER_EXIT(context->priv->prof_rfx_ycbcr_to_rgb);
PROFILER_ENTER(context->priv->prof_rfx_decode_format_rgb);
rfx_decode_format_rgb(pSrcDst[0], pSrcDst[1], pSrcDst[2],
context->pixel_format, rgb_buffer, stride);
PROFILER_EXIT(context->priv->prof_rfx_decode_format_rgb);
PROFILER_EXIT(context->priv->prof_rfx_decode_rgb);
BufferPool_Return(context->priv->BufferPool, pBuffer);
return TRUE;
return rc;
}

View File

@ -34,7 +34,7 @@
/* ------------------------------------------------------------------------- */
static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_BGRX(
const INT16* pSrc[3], UINT32 srcStep,
BYTE* pDst, UINT32 DstFormat, UINT32 dstStep,
BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
const prim_size_t* roi)
{
UINT32 x, y;
@ -51,23 +51,19 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_BGRX(
for (x = 0; x < roi->width; x++)
{
INT16 R, G, B;
const INT64 divisor = 20;
const INT64 Y = (pY[0] + 4096);
const INT64 Cb = (pCb[0]);
const INT64 Cr = (pCr[0]);
const INT64 CrR = Cr * (INT64)(1.402525f * (1 << divisor));
const INT64 CrG = Cr * (INT64)(0.714401f * (1 << divisor));
const INT64 CbG = Cb * (INT64)(0.343730f * (1 << divisor));
const INT64 CbB = Cb * (INT64)(1.769905f * (1 << divisor));
const INT64 Ytmp = (Y + 16) << divisor;
R = ((INT16)((CrR + Ytmp) >> divisor) >> 5);
G = ((INT16)((Ytmp - CbG - CrG) >> divisor) >> 5);
B = ((INT16)((CbB + Ytmp) >> divisor) >> 5);
const INT32 divisor = 16;
const INT32 Y = ((*pY++) + 4096) << divisor;
const INT32 Cb = (*pCb++);
const INT32 Cr = (*pCr++);
const INT32 CrR = Cr * (INT32)(1.402525f * (1 << divisor));
const INT32 CrG = Cr * (INT32)(0.714401f * (1 << divisor));
const INT32 CbG = Cb * (INT32)(0.343730f * (1 << divisor));
const INT32 CbB = Cb * (INT32)(1.769905f * (1 << divisor));
R = ((INT16)((CrR + Y) >> divisor) >> 5);
G = ((INT16)((Y - CbG - CrG) >> divisor) >> 5);
B = ((INT16)((CbB + Y) >> divisor) >> 5);
pRGB = writePixelBGRX(pRGB, formatSize, DstFormat, CLIP(R), CLIP(G),
CLIP(B), 0xFF);
pY++;
pCb++;
pCr++;
}
pY += srcPad;
@ -81,7 +77,7 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_BGRX(
static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_general(
const INT16* pSrc[3], UINT32 srcStep,
BYTE* pDst, UINT32 DstFormat, UINT32 dstStep,
BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
const prim_size_t* roi)
{
UINT32 x, y;
@ -99,23 +95,19 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_general(
for (x = 0; x < roi->width; x++)
{
INT16 R, G, B;
const INT64 divisor = 20;
const INT64 Y = (pY[0] + 4096);
const INT64 Cb = (pCb[0]);
const INT64 Cr = (pCr[0]);
const INT64 CrR = Cr * (INT64)(1.402525f * (1 << divisor));
const INT64 CrG = Cr * (INT64)(0.714401f * (1 << divisor));
const INT64 CbG = Cb * (INT64)(0.343730f * (1 << divisor));
const INT64 CbB = Cb * (INT64)(1.769905f * (1 << divisor));
const INT64 Ytmp = (Y + 16) << divisor;
R = ((INT16)((CrR + Ytmp) >> divisor) >> 5);
G = ((INT16)((Ytmp - CbG - CrG) >> divisor) >> 5);
B = ((INT16)((CbB + Ytmp) >> divisor) >> 5);
const INT32 divisor = 16;
const INT32 Y = ((*pY++) + 4096) << divisor;
const INT32 Cb = (*pCb++);
const INT32 Cr = (*pCr++);
const INT32 CrR = Cr * (INT32)(1.402525f * (1 << divisor));
const INT32 CrG = Cr * (INT32)(0.714401f * (1 << divisor));
const INT32 CbG = Cb * (INT32)(0.343730f * (1 << divisor));
const INT32 CbB = Cb * (INT32)(1.769905f * (1 << divisor));
R = ((INT16)((CrR + Y) >> divisor) >> 5);
G = ((INT16)((Y - CbG - CrG) >> divisor) >> 5);
B = ((INT16)((CbB + Y) >> divisor) >> 5);
pRGB = (*writePixel)(pRGB, formatSize, DstFormat, CLIP(R), CLIP(G),
CLIP(B), 0xFF);
pY++;
pCb++;
pCr++;
}
pY += srcPad;
@ -129,17 +121,17 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_general(
static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R(
const INT16* pSrc[3], UINT32 srcStep,
BYTE* pDst, UINT32 DstFormat, UINT32 dstStep,
BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
const prim_size_t* roi)
{
switch (DstFormat)
{
case PIXEL_FORMAT_BGRA32:
case PIXEL_FORMAT_BGRX32:
return general_yCbCrToRGB_16s8u_P3AC4R_BGRX(pSrc, srcStep, pDst, DstFormat, dstStep, roi);
return general_yCbCrToRGB_16s8u_P3AC4R_BGRX(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
default:
return general_yCbCrToRGB_16s8u_P3AC4R_general(pSrc, srcStep, pDst, DstFormat, dstStep, roi);
return general_yCbCrToRGB_16s8u_P3AC4R_general(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
}
}

View File

@ -450,7 +450,6 @@ static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R_BGRX(
return PRIMITIVES_SUCCESS;
}
static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R(
const INT16* const pSrc[3], /* 16-bit R,G, and B arrays */
UINT32 srcStep, /* bytes between rows in source data */
@ -469,7 +468,6 @@ static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R(
return generic->RGBToRGB_16s8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
}
}
#endif /* WITH_SSE2 */
/*---------------------------------------------------------------------------*/