Using faster transformation for RFX decoder.

This commit is contained in:
Armin Novak 2017-01-24 11:37:24 +01:00
parent 0106405fff
commit b04b8303e6
3 changed files with 34 additions and 44 deletions

View File

@ -86,6 +86,7 @@ static void rfx_decode_component(RFX_CONTEXT* context,
BOOL rfx_decode_rgb(RFX_CONTEXT* context, RFX_TILE* tile, BYTE* rgb_buffer, BOOL rfx_decode_rgb(RFX_CONTEXT* context, RFX_TILE* tile, BYTE* rgb_buffer,
int stride) int stride)
{ {
BOOL rc = TRUE;
BYTE* pBuffer; BYTE* pBuffer;
INT16* pSrcDst[3]; INT16* pSrcDst[3];
UINT32* y_quants, *cb_quants, *cr_quants; UINT32* y_quants, *cb_quants, *cr_quants;
@ -109,14 +110,13 @@ BOOL rfx_decode_rgb(RFX_CONTEXT* context, RFX_TILE* tile, BYTE* rgb_buffer,
rfx_decode_component(context, cr_quants, tile->CrData, tile->CrLen, rfx_decode_component(context, cr_quants, tile->CrData, tile->CrLen,
pSrcDst[2]); /* CrData */ pSrcDst[2]); /* CrData */
PROFILER_ENTER(context->priv->prof_rfx_ycbcr_to_rgb); PROFILER_ENTER(context->priv->prof_rfx_ycbcr_to_rgb);
prims->yCbCrToRGB_16s16s_P3P3((const INT16**) pSrcDst, 64 * sizeof(INT16),
pSrcDst, 64 * sizeof(INT16), &roi_64x64); if (prims->yCbCrToRGB_16s8u_P3AC4R(pSrcDst, 64 * sizeof(INT16),
rgb_buffer, stride, context->pixel_format, &roi_64x64) != PRIMITIVES_SUCCESS)
rc = FALSE;
PROFILER_EXIT(context->priv->prof_rfx_ycbcr_to_rgb); PROFILER_EXIT(context->priv->prof_rfx_ycbcr_to_rgb);
PROFILER_ENTER(context->priv->prof_rfx_decode_format_rgb);
rfx_decode_format_rgb(pSrcDst[0], pSrcDst[1], pSrcDst[2],
context->pixel_format, rgb_buffer, stride);
PROFILER_EXIT(context->priv->prof_rfx_decode_format_rgb);
PROFILER_EXIT(context->priv->prof_rfx_decode_rgb); PROFILER_EXIT(context->priv->prof_rfx_decode_rgb);
BufferPool_Return(context->priv->BufferPool, pBuffer); BufferPool_Return(context->priv->BufferPool, pBuffer);
return TRUE; return rc;
} }

View File

@ -34,7 +34,7 @@
/* ------------------------------------------------------------------------- */ /* ------------------------------------------------------------------------- */
static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_BGRX( static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_BGRX(
const INT16* pSrc[3], UINT32 srcStep, const INT16* pSrc[3], UINT32 srcStep,
BYTE* pDst, UINT32 DstFormat, UINT32 dstStep, BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
const prim_size_t* roi) const prim_size_t* roi)
{ {
UINT32 x, y; UINT32 x, y;
@ -51,23 +51,19 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_BGRX(
for (x = 0; x < roi->width; x++) for (x = 0; x < roi->width; x++)
{ {
INT16 R, G, B; INT16 R, G, B;
const INT64 divisor = 20; const INT32 divisor = 16;
const INT64 Y = (pY[0] + 4096); const INT32 Y = ((*pY++) + 4096) << divisor;
const INT64 Cb = (pCb[0]); const INT32 Cb = (*pCb++);
const INT64 Cr = (pCr[0]); const INT32 Cr = (*pCr++);
const INT64 CrR = Cr * (INT64)(1.402525f * (1 << divisor)); const INT32 CrR = Cr * (INT32)(1.402525f * (1 << divisor));
const INT64 CrG = Cr * (INT64)(0.714401f * (1 << divisor)); const INT32 CrG = Cr * (INT32)(0.714401f * (1 << divisor));
const INT64 CbG = Cb * (INT64)(0.343730f * (1 << divisor)); const INT32 CbG = Cb * (INT32)(0.343730f * (1 << divisor));
const INT64 CbB = Cb * (INT64)(1.769905f * (1 << divisor)); const INT32 CbB = Cb * (INT32)(1.769905f * (1 << divisor));
const INT64 Ytmp = (Y + 16) << divisor; R = ((INT16)((CrR + Y) >> divisor) >> 5);
R = ((INT16)((CrR + Ytmp) >> divisor) >> 5); G = ((INT16)((Y - CbG - CrG) >> divisor) >> 5);
G = ((INT16)((Ytmp - CbG - CrG) >> divisor) >> 5); B = ((INT16)((CbB + Y) >> divisor) >> 5);
B = ((INT16)((CbB + Ytmp) >> divisor) >> 5);
pRGB = writePixelBGRX(pRGB, formatSize, DstFormat, CLIP(R), CLIP(G), pRGB = writePixelBGRX(pRGB, formatSize, DstFormat, CLIP(R), CLIP(G),
CLIP(B), 0xFF); CLIP(B), 0xFF);
pY++;
pCb++;
pCr++;
} }
pY += srcPad; pY += srcPad;
@ -81,7 +77,7 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_BGRX(
static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_general( static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_general(
const INT16* pSrc[3], UINT32 srcStep, const INT16* pSrc[3], UINT32 srcStep,
BYTE* pDst, UINT32 DstFormat, UINT32 dstStep, BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
const prim_size_t* roi) const prim_size_t* roi)
{ {
UINT32 x, y; UINT32 x, y;
@ -99,23 +95,19 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_general(
for (x = 0; x < roi->width; x++) for (x = 0; x < roi->width; x++)
{ {
INT16 R, G, B; INT16 R, G, B;
const INT64 divisor = 20; const INT32 divisor = 16;
const INT64 Y = (pY[0] + 4096); const INT32 Y = ((*pY++) + 4096) << divisor;
const INT64 Cb = (pCb[0]); const INT32 Cb = (*pCb++);
const INT64 Cr = (pCr[0]); const INT32 Cr = (*pCr++);
const INT64 CrR = Cr * (INT64)(1.402525f * (1 << divisor)); const INT32 CrR = Cr * (INT32)(1.402525f * (1 << divisor));
const INT64 CrG = Cr * (INT64)(0.714401f * (1 << divisor)); const INT32 CrG = Cr * (INT32)(0.714401f * (1 << divisor));
const INT64 CbG = Cb * (INT64)(0.343730f * (1 << divisor)); const INT32 CbG = Cb * (INT32)(0.343730f * (1 << divisor));
const INT64 CbB = Cb * (INT64)(1.769905f * (1 << divisor)); const INT32 CbB = Cb * (INT32)(1.769905f * (1 << divisor));
const INT64 Ytmp = (Y + 16) << divisor; R = ((INT16)((CrR + Y) >> divisor) >> 5);
R = ((INT16)((CrR + Ytmp) >> divisor) >> 5); G = ((INT16)((Y - CbG - CrG) >> divisor) >> 5);
G = ((INT16)((Ytmp - CbG - CrG) >> divisor) >> 5); B = ((INT16)((CbB + Y) >> divisor) >> 5);
B = ((INT16)((CbB + Ytmp) >> divisor) >> 5);
pRGB = (*writePixel)(pRGB, formatSize, DstFormat, CLIP(R), CLIP(G), pRGB = (*writePixel)(pRGB, formatSize, DstFormat, CLIP(R), CLIP(G),
CLIP(B), 0xFF); CLIP(B), 0xFF);
pY++;
pCb++;
pCr++;
} }
pY += srcPad; pY += srcPad;
@ -129,17 +121,17 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_general(
static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R( static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R(
const INT16* pSrc[3], UINT32 srcStep, const INT16* pSrc[3], UINT32 srcStep,
BYTE* pDst, UINT32 DstFormat, UINT32 dstStep, BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
const prim_size_t* roi) const prim_size_t* roi)
{ {
switch (DstFormat) switch (DstFormat)
{ {
case PIXEL_FORMAT_BGRA32: case PIXEL_FORMAT_BGRA32:
case PIXEL_FORMAT_BGRX32: case PIXEL_FORMAT_BGRX32:
return general_yCbCrToRGB_16s8u_P3AC4R_BGRX(pSrc, srcStep, pDst, DstFormat, dstStep, roi); return general_yCbCrToRGB_16s8u_P3AC4R_BGRX(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
default: default:
return general_yCbCrToRGB_16s8u_P3AC4R_general(pSrc, srcStep, pDst, DstFormat, dstStep, roi); return general_yCbCrToRGB_16s8u_P3AC4R_general(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
} }
} }

View File

@ -450,7 +450,6 @@ static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R_BGRX(
return PRIMITIVES_SUCCESS; return PRIMITIVES_SUCCESS;
} }
static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R( static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R(
const INT16* const pSrc[3], /* 16-bit R,G, and B arrays */ const INT16* const pSrc[3], /* 16-bit R,G, and B arrays */
UINT32 srcStep, /* bytes between rows in source data */ UINT32 srcStep, /* bytes between rows in source data */
@ -469,7 +468,6 @@ static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R(
return generic->RGBToRGB_16s8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi); return generic->RGBToRGB_16s8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
} }
} }
#endif /* WITH_SSE2 */ #endif /* WITH_SSE2 */
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/