From b04b8303e68c908cb3118ed596fdaac1cdd668cc Mon Sep 17 00:00:00 2001 From: Armin Novak Date: Tue, 24 Jan 2017 11:37:24 +0100 Subject: [PATCH] Using faster transformation for RFX decoder. --- libfreerdp/codec/rfx_decode.c | 14 +++--- libfreerdp/primitives/prim_colors.c | 62 +++++++++++-------------- libfreerdp/primitives/prim_colors_opt.c | 2 - 3 files changed, 34 insertions(+), 44 deletions(-) diff --git a/libfreerdp/codec/rfx_decode.c b/libfreerdp/codec/rfx_decode.c index 05a1852d1..087af3e86 100644 --- a/libfreerdp/codec/rfx_decode.c +++ b/libfreerdp/codec/rfx_decode.c @@ -86,6 +86,7 @@ static void rfx_decode_component(RFX_CONTEXT* context, BOOL rfx_decode_rgb(RFX_CONTEXT* context, RFX_TILE* tile, BYTE* rgb_buffer, int stride) { + BOOL rc = TRUE; BYTE* pBuffer; INT16* pSrcDst[3]; UINT32* y_quants, *cb_quants, *cr_quants; @@ -109,14 +110,13 @@ BOOL rfx_decode_rgb(RFX_CONTEXT* context, RFX_TILE* tile, BYTE* rgb_buffer, rfx_decode_component(context, cr_quants, tile->CrData, tile->CrLen, pSrcDst[2]); /* CrData */ PROFILER_ENTER(context->priv->prof_rfx_ycbcr_to_rgb); - prims->yCbCrToRGB_16s16s_P3P3((const INT16**) pSrcDst, 64 * sizeof(INT16), - pSrcDst, 64 * sizeof(INT16), &roi_64x64); + + if (prims->yCbCrToRGB_16s8u_P3AC4R(pSrcDst, 64 * sizeof(INT16), + rgb_buffer, stride, context->pixel_format, &roi_64x64) != PRIMITIVES_SUCCESS) + rc = FALSE; + PROFILER_EXIT(context->priv->prof_rfx_ycbcr_to_rgb); - PROFILER_ENTER(context->priv->prof_rfx_decode_format_rgb); - rfx_decode_format_rgb(pSrcDst[0], pSrcDst[1], pSrcDst[2], - context->pixel_format, rgb_buffer, stride); - PROFILER_EXIT(context->priv->prof_rfx_decode_format_rgb); PROFILER_EXIT(context->priv->prof_rfx_decode_rgb); BufferPool_Return(context->priv->BufferPool, pBuffer); - return TRUE; + return rc; } diff --git a/libfreerdp/primitives/prim_colors.c b/libfreerdp/primitives/prim_colors.c index ea238fb8f..545e93581 100644 --- a/libfreerdp/primitives/prim_colors.c +++ b/libfreerdp/primitives/prim_colors.c @@ -34,7 +34,7 @@ /* ------------------------------------------------------------------------- */ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_BGRX( const INT16* pSrc[3], UINT32 srcStep, - BYTE* pDst, UINT32 DstFormat, UINT32 dstStep, + BYTE* pDst, UINT32 dstStep, UINT32 DstFormat, const prim_size_t* roi) { UINT32 x, y; @@ -51,23 +51,19 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_BGRX( for (x = 0; x < roi->width; x++) { INT16 R, G, B; - const INT64 divisor = 20; - const INT64 Y = (pY[0] + 4096); - const INT64 Cb = (pCb[0]); - const INT64 Cr = (pCr[0]); - const INT64 CrR = Cr * (INT64)(1.402525f * (1 << divisor)); - const INT64 CrG = Cr * (INT64)(0.714401f * (1 << divisor)); - const INT64 CbG = Cb * (INT64)(0.343730f * (1 << divisor)); - const INT64 CbB = Cb * (INT64)(1.769905f * (1 << divisor)); - const INT64 Ytmp = (Y + 16) << divisor; - R = ((INT16)((CrR + Ytmp) >> divisor) >> 5); - G = ((INT16)((Ytmp - CbG - CrG) >> divisor) >> 5); - B = ((INT16)((CbB + Ytmp) >> divisor) >> 5); + const INT32 divisor = 16; + const INT32 Y = ((*pY++) + 4096) << divisor; + const INT32 Cb = (*pCb++); + const INT32 Cr = (*pCr++); + const INT32 CrR = Cr * (INT32)(1.402525f * (1 << divisor)); + const INT32 CrG = Cr * (INT32)(0.714401f * (1 << divisor)); + const INT32 CbG = Cb * (INT32)(0.343730f * (1 << divisor)); + const INT32 CbB = Cb * (INT32)(1.769905f * (1 << divisor)); + R = ((INT16)((CrR + Y) >> divisor) >> 5); + G = ((INT16)((Y - CbG - CrG) >> divisor) >> 5); + B = ((INT16)((CbB + Y) >> divisor) >> 5); pRGB = writePixelBGRX(pRGB, formatSize, DstFormat, CLIP(R), CLIP(G), CLIP(B), 0xFF); - pY++; - pCb++; - pCr++; } pY += srcPad; @@ -81,7 +77,7 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_BGRX( static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_general( const INT16* pSrc[3], UINT32 srcStep, - BYTE* pDst, UINT32 DstFormat, UINT32 dstStep, + BYTE* pDst, UINT32 dstStep, UINT32 DstFormat, const prim_size_t* roi) { UINT32 x, y; @@ -99,23 +95,19 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_general( for (x = 0; x < roi->width; x++) { INT16 R, G, B; - const INT64 divisor = 20; - const INT64 Y = (pY[0] + 4096); - const INT64 Cb = (pCb[0]); - const INT64 Cr = (pCr[0]); - const INT64 CrR = Cr * (INT64)(1.402525f * (1 << divisor)); - const INT64 CrG = Cr * (INT64)(0.714401f * (1 << divisor)); - const INT64 CbG = Cb * (INT64)(0.343730f * (1 << divisor)); - const INT64 CbB = Cb * (INT64)(1.769905f * (1 << divisor)); - const INT64 Ytmp = (Y + 16) << divisor; - R = ((INT16)((CrR + Ytmp) >> divisor) >> 5); - G = ((INT16)((Ytmp - CbG - CrG) >> divisor) >> 5); - B = ((INT16)((CbB + Ytmp) >> divisor) >> 5); + const INT32 divisor = 16; + const INT32 Y = ((*pY++) + 4096) << divisor; + const INT32 Cb = (*pCb++); + const INT32 Cr = (*pCr++); + const INT32 CrR = Cr * (INT32)(1.402525f * (1 << divisor)); + const INT32 CrG = Cr * (INT32)(0.714401f * (1 << divisor)); + const INT32 CbG = Cb * (INT32)(0.343730f * (1 << divisor)); + const INT32 CbB = Cb * (INT32)(1.769905f * (1 << divisor)); + R = ((INT16)((CrR + Y) >> divisor) >> 5); + G = ((INT16)((Y - CbG - CrG) >> divisor) >> 5); + B = ((INT16)((CbB + Y) >> divisor) >> 5); pRGB = (*writePixel)(pRGB, formatSize, DstFormat, CLIP(R), CLIP(G), CLIP(B), 0xFF); - pY++; - pCb++; - pCr++; } pY += srcPad; @@ -129,17 +121,17 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_general( static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R( const INT16* pSrc[3], UINT32 srcStep, - BYTE* pDst, UINT32 DstFormat, UINT32 dstStep, + BYTE* pDst, UINT32 dstStep, UINT32 DstFormat, const prim_size_t* roi) { switch (DstFormat) { case PIXEL_FORMAT_BGRA32: case PIXEL_FORMAT_BGRX32: - return general_yCbCrToRGB_16s8u_P3AC4R_BGRX(pSrc, srcStep, pDst, DstFormat, dstStep, roi); + return general_yCbCrToRGB_16s8u_P3AC4R_BGRX(pSrc, srcStep, pDst, dstStep, DstFormat, roi); default: - return general_yCbCrToRGB_16s8u_P3AC4R_general(pSrc, srcStep, pDst, DstFormat, dstStep, roi); + return general_yCbCrToRGB_16s8u_P3AC4R_general(pSrc, srcStep, pDst, dstStep, DstFormat, roi); } } diff --git a/libfreerdp/primitives/prim_colors_opt.c b/libfreerdp/primitives/prim_colors_opt.c index 4f79eddc4..de9baa50c 100644 --- a/libfreerdp/primitives/prim_colors_opt.c +++ b/libfreerdp/primitives/prim_colors_opt.c @@ -450,7 +450,6 @@ static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R_BGRX( return PRIMITIVES_SUCCESS; } - static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R( const INT16* const pSrc[3], /* 16-bit R,G, and B arrays */ UINT32 srcStep, /* bytes between rows in source data */ @@ -469,7 +468,6 @@ static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R( return generic->RGBToRGB_16s8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi); } } - #endif /* WITH_SSE2 */ /*---------------------------------------------------------------------------*/