Using faster transformation for RFX decoder.
This commit is contained in:
parent
0106405fff
commit
b04b8303e6
@ -86,6 +86,7 @@ static void rfx_decode_component(RFX_CONTEXT* context,
|
|||||||
BOOL rfx_decode_rgb(RFX_CONTEXT* context, RFX_TILE* tile, BYTE* rgb_buffer,
|
BOOL rfx_decode_rgb(RFX_CONTEXT* context, RFX_TILE* tile, BYTE* rgb_buffer,
|
||||||
int stride)
|
int stride)
|
||||||
{
|
{
|
||||||
|
BOOL rc = TRUE;
|
||||||
BYTE* pBuffer;
|
BYTE* pBuffer;
|
||||||
INT16* pSrcDst[3];
|
INT16* pSrcDst[3];
|
||||||
UINT32* y_quants, *cb_quants, *cr_quants;
|
UINT32* y_quants, *cb_quants, *cr_quants;
|
||||||
@ -109,14 +110,13 @@ BOOL rfx_decode_rgb(RFX_CONTEXT* context, RFX_TILE* tile, BYTE* rgb_buffer,
|
|||||||
rfx_decode_component(context, cr_quants, tile->CrData, tile->CrLen,
|
rfx_decode_component(context, cr_quants, tile->CrData, tile->CrLen,
|
||||||
pSrcDst[2]); /* CrData */
|
pSrcDst[2]); /* CrData */
|
||||||
PROFILER_ENTER(context->priv->prof_rfx_ycbcr_to_rgb);
|
PROFILER_ENTER(context->priv->prof_rfx_ycbcr_to_rgb);
|
||||||
prims->yCbCrToRGB_16s16s_P3P3((const INT16**) pSrcDst, 64 * sizeof(INT16),
|
|
||||||
pSrcDst, 64 * sizeof(INT16), &roi_64x64);
|
if (prims->yCbCrToRGB_16s8u_P3AC4R(pSrcDst, 64 * sizeof(INT16),
|
||||||
|
rgb_buffer, stride, context->pixel_format, &roi_64x64) != PRIMITIVES_SUCCESS)
|
||||||
|
rc = FALSE;
|
||||||
|
|
||||||
PROFILER_EXIT(context->priv->prof_rfx_ycbcr_to_rgb);
|
PROFILER_EXIT(context->priv->prof_rfx_ycbcr_to_rgb);
|
||||||
PROFILER_ENTER(context->priv->prof_rfx_decode_format_rgb);
|
|
||||||
rfx_decode_format_rgb(pSrcDst[0], pSrcDst[1], pSrcDst[2],
|
|
||||||
context->pixel_format, rgb_buffer, stride);
|
|
||||||
PROFILER_EXIT(context->priv->prof_rfx_decode_format_rgb);
|
|
||||||
PROFILER_EXIT(context->priv->prof_rfx_decode_rgb);
|
PROFILER_EXIT(context->priv->prof_rfx_decode_rgb);
|
||||||
BufferPool_Return(context->priv->BufferPool, pBuffer);
|
BufferPool_Return(context->priv->BufferPool, pBuffer);
|
||||||
return TRUE;
|
return rc;
|
||||||
}
|
}
|
||||||
|
@ -34,7 +34,7 @@
|
|||||||
/* ------------------------------------------------------------------------- */
|
/* ------------------------------------------------------------------------- */
|
||||||
static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_BGRX(
|
static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_BGRX(
|
||||||
const INT16* pSrc[3], UINT32 srcStep,
|
const INT16* pSrc[3], UINT32 srcStep,
|
||||||
BYTE* pDst, UINT32 DstFormat, UINT32 dstStep,
|
BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
|
||||||
const prim_size_t* roi)
|
const prim_size_t* roi)
|
||||||
{
|
{
|
||||||
UINT32 x, y;
|
UINT32 x, y;
|
||||||
@ -51,23 +51,19 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_BGRX(
|
|||||||
for (x = 0; x < roi->width; x++)
|
for (x = 0; x < roi->width; x++)
|
||||||
{
|
{
|
||||||
INT16 R, G, B;
|
INT16 R, G, B;
|
||||||
const INT64 divisor = 20;
|
const INT32 divisor = 16;
|
||||||
const INT64 Y = (pY[0] + 4096);
|
const INT32 Y = ((*pY++) + 4096) << divisor;
|
||||||
const INT64 Cb = (pCb[0]);
|
const INT32 Cb = (*pCb++);
|
||||||
const INT64 Cr = (pCr[0]);
|
const INT32 Cr = (*pCr++);
|
||||||
const INT64 CrR = Cr * (INT64)(1.402525f * (1 << divisor));
|
const INT32 CrR = Cr * (INT32)(1.402525f * (1 << divisor));
|
||||||
const INT64 CrG = Cr * (INT64)(0.714401f * (1 << divisor));
|
const INT32 CrG = Cr * (INT32)(0.714401f * (1 << divisor));
|
||||||
const INT64 CbG = Cb * (INT64)(0.343730f * (1 << divisor));
|
const INT32 CbG = Cb * (INT32)(0.343730f * (1 << divisor));
|
||||||
const INT64 CbB = Cb * (INT64)(1.769905f * (1 << divisor));
|
const INT32 CbB = Cb * (INT32)(1.769905f * (1 << divisor));
|
||||||
const INT64 Ytmp = (Y + 16) << divisor;
|
R = ((INT16)((CrR + Y) >> divisor) >> 5);
|
||||||
R = ((INT16)((CrR + Ytmp) >> divisor) >> 5);
|
G = ((INT16)((Y - CbG - CrG) >> divisor) >> 5);
|
||||||
G = ((INT16)((Ytmp - CbG - CrG) >> divisor) >> 5);
|
B = ((INT16)((CbB + Y) >> divisor) >> 5);
|
||||||
B = ((INT16)((CbB + Ytmp) >> divisor) >> 5);
|
|
||||||
pRGB = writePixelBGRX(pRGB, formatSize, DstFormat, CLIP(R), CLIP(G),
|
pRGB = writePixelBGRX(pRGB, formatSize, DstFormat, CLIP(R), CLIP(G),
|
||||||
CLIP(B), 0xFF);
|
CLIP(B), 0xFF);
|
||||||
pY++;
|
|
||||||
pCb++;
|
|
||||||
pCr++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pY += srcPad;
|
pY += srcPad;
|
||||||
@ -81,7 +77,7 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_BGRX(
|
|||||||
|
|
||||||
static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_general(
|
static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_general(
|
||||||
const INT16* pSrc[3], UINT32 srcStep,
|
const INT16* pSrc[3], UINT32 srcStep,
|
||||||
BYTE* pDst, UINT32 DstFormat, UINT32 dstStep,
|
BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
|
||||||
const prim_size_t* roi)
|
const prim_size_t* roi)
|
||||||
{
|
{
|
||||||
UINT32 x, y;
|
UINT32 x, y;
|
||||||
@ -99,23 +95,19 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_general(
|
|||||||
for (x = 0; x < roi->width; x++)
|
for (x = 0; x < roi->width; x++)
|
||||||
{
|
{
|
||||||
INT16 R, G, B;
|
INT16 R, G, B;
|
||||||
const INT64 divisor = 20;
|
const INT32 divisor = 16;
|
||||||
const INT64 Y = (pY[0] + 4096);
|
const INT32 Y = ((*pY++) + 4096) << divisor;
|
||||||
const INT64 Cb = (pCb[0]);
|
const INT32 Cb = (*pCb++);
|
||||||
const INT64 Cr = (pCr[0]);
|
const INT32 Cr = (*pCr++);
|
||||||
const INT64 CrR = Cr * (INT64)(1.402525f * (1 << divisor));
|
const INT32 CrR = Cr * (INT32)(1.402525f * (1 << divisor));
|
||||||
const INT64 CrG = Cr * (INT64)(0.714401f * (1 << divisor));
|
const INT32 CrG = Cr * (INT32)(0.714401f * (1 << divisor));
|
||||||
const INT64 CbG = Cb * (INT64)(0.343730f * (1 << divisor));
|
const INT32 CbG = Cb * (INT32)(0.343730f * (1 << divisor));
|
||||||
const INT64 CbB = Cb * (INT64)(1.769905f * (1 << divisor));
|
const INT32 CbB = Cb * (INT32)(1.769905f * (1 << divisor));
|
||||||
const INT64 Ytmp = (Y + 16) << divisor;
|
R = ((INT16)((CrR + Y) >> divisor) >> 5);
|
||||||
R = ((INT16)((CrR + Ytmp) >> divisor) >> 5);
|
G = ((INT16)((Y - CbG - CrG) >> divisor) >> 5);
|
||||||
G = ((INT16)((Ytmp - CbG - CrG) >> divisor) >> 5);
|
B = ((INT16)((CbB + Y) >> divisor) >> 5);
|
||||||
B = ((INT16)((CbB + Ytmp) >> divisor) >> 5);
|
|
||||||
pRGB = (*writePixel)(pRGB, formatSize, DstFormat, CLIP(R), CLIP(G),
|
pRGB = (*writePixel)(pRGB, formatSize, DstFormat, CLIP(R), CLIP(G),
|
||||||
CLIP(B), 0xFF);
|
CLIP(B), 0xFF);
|
||||||
pY++;
|
|
||||||
pCb++;
|
|
||||||
pCr++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pY += srcPad;
|
pY += srcPad;
|
||||||
@ -129,17 +121,17 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_general(
|
|||||||
|
|
||||||
static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R(
|
static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R(
|
||||||
const INT16* pSrc[3], UINT32 srcStep,
|
const INT16* pSrc[3], UINT32 srcStep,
|
||||||
BYTE* pDst, UINT32 DstFormat, UINT32 dstStep,
|
BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
|
||||||
const prim_size_t* roi)
|
const prim_size_t* roi)
|
||||||
{
|
{
|
||||||
switch (DstFormat)
|
switch (DstFormat)
|
||||||
{
|
{
|
||||||
case PIXEL_FORMAT_BGRA32:
|
case PIXEL_FORMAT_BGRA32:
|
||||||
case PIXEL_FORMAT_BGRX32:
|
case PIXEL_FORMAT_BGRX32:
|
||||||
return general_yCbCrToRGB_16s8u_P3AC4R_BGRX(pSrc, srcStep, pDst, DstFormat, dstStep, roi);
|
return general_yCbCrToRGB_16s8u_P3AC4R_BGRX(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return general_yCbCrToRGB_16s8u_P3AC4R_general(pSrc, srcStep, pDst, DstFormat, dstStep, roi);
|
return general_yCbCrToRGB_16s8u_P3AC4R_general(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -450,7 +450,6 @@ static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R_BGRX(
|
|||||||
|
|
||||||
return PRIMITIVES_SUCCESS;
|
return PRIMITIVES_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R(
|
static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R(
|
||||||
const INT16* const pSrc[3], /* 16-bit R,G, and B arrays */
|
const INT16* const pSrc[3], /* 16-bit R,G, and B arrays */
|
||||||
UINT32 srcStep, /* bytes between rows in source data */
|
UINT32 srcStep, /* bytes between rows in source data */
|
||||||
@ -469,7 +468,6 @@ static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R(
|
|||||||
return generic->RGBToRGB_16s8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
|
return generic->RGBToRGB_16s8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* WITH_SSE2 */
|
#endif /* WITH_SSE2 */
|
||||||
|
|
||||||
/*---------------------------------------------------------------------------*/
|
/*---------------------------------------------------------------------------*/
|
||||||
|
Loading…
Reference in New Issue
Block a user