From adcd09cd46aebbc6876a248aca1ba27896efcb66 Mon Sep 17 00:00:00 2001 From: Armin Novak Date: Mon, 23 Jan 2017 15:38:02 +0100 Subject: [PATCH] Enabled ASM primitives. --- libfreerdp/primitives/prim_YCoCg_opt.c | 72 +++++++++++++------------ libfreerdp/primitives/prim_YUV.c | 4 +- libfreerdp/primitives/prim_YUV_opt.c | 20 +++++-- libfreerdp/primitives/prim_colors.c | 1 + libfreerdp/primitives/prim_colors_opt.c | 46 ++++++++++++++-- 5 files changed, 98 insertions(+), 45 deletions(-) diff --git a/libfreerdp/primitives/prim_YCoCg_opt.c b/libfreerdp/primitives/prim_YCoCg_opt.c index b9fde7257..f8bee4de4 100644 --- a/libfreerdp/primitives/prim_YCoCg_opt.c +++ b/libfreerdp/primitives/prim_YCoCg_opt.c @@ -50,7 +50,7 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_invert( BYTE* dptr = (BYTE*) pDst; int sRowBump = srcStep - width * sizeof(UINT32); int dRowBump = dstStep - width * sizeof(UINT32); - UINT32 h; + UINT32 h; /* Shift left by "shift" and divide by two is the same as shift * left by "shift-1". */ @@ -70,28 +70,30 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_invert( { /* Too small, or we'll never hit a 16-byte boundary. Punt. */ return generic->YCoCgToRGB_8u_AC4R( - pSrc, srcStep, pDst, DstFormat, dstStep, - width, height, shift, withAlpha); + pSrc, srcStep, pDst, DstFormat, dstStep, + width, height, shift, withAlpha); } for (h = 0; h < height; h++) { - UINT32 w = width; + UINT32 w = width; BOOL onStride; /* Get to a 16-byte destination boundary. */ if ((ULONG_PTR) dptr & 0x0f) { pstatus_t status; - UINT32 startup = (16 - ((ULONG_PTR) dptr & 0x0f)) / 4; + UINT32 startup = (16 - ((ULONG_PTR) dptr & 0x0f)) / 4; if (startup > width) startup = width; status = generic->YCoCgToRGB_8u_AC4R( - sptr, srcStep, dptr, DstFormat, dstStep, - startup, 1, shift, withAlpha); + sptr, srcStep, dptr, DstFormat, dstStep, + startup, 1, shift, withAlpha); + if (status != PRIMITIVES_SUCCESS) return status; + sptr += startup * sizeof(UINT32); dptr += startup * sizeof(UINT32); w -= startup; @@ -201,8 +203,9 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_invert( { pstatus_t status; status = generic->YCoCgToRGB_8u_AC4R( - sptr, srcStep, dptr, DstFormat, dstStep, - w, 1, shift, withAlpha); + sptr, srcStep, dptr, DstFormat, dstStep, + w, 1, shift, withAlpha); + if (status != PRIMITIVES_SUCCESS) return status; @@ -229,7 +232,7 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_no_invert( BYTE* dptr = (BYTE*) pDst; int sRowBump = srcStep - width * sizeof(UINT32); int dRowBump = dstStep - width * sizeof(UINT32); - UINT32 h; + UINT32 h; /* Shift left by "shift" and divide by two is the same as shift * left by "shift-1". */ @@ -249,8 +252,8 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_no_invert( { /* Too small, or we'll never hit a 16-byte boundary. Punt. */ return generic->YCoCgToRGB_8u_AC4R( - pSrc, srcStep, pDst, DstFormat, dstStep, - width, height, shift, withAlpha); + pSrc, srcStep, pDst, DstFormat, dstStep, + width, height, shift, withAlpha); } for (h = 0; h < height; h++) @@ -262,13 +265,14 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_no_invert( if ((ULONG_PTR) dptr & 0x0f) { pstatus_t status; - UINT32 startup = (16 - ((ULONG_PTR) dptr & 0x0f)) / 4; + UINT32 startup = (16 - ((ULONG_PTR) dptr & 0x0f)) / 4; if (startup > width) startup = width; status = generic->YCoCgToRGB_8u_AC4R( - sptr, srcStep, dptr, DstFormat, - dstStep, startup, 1, shift, withAlpha); + sptr, srcStep, dptr, DstFormat, + dstStep, startup, 1, shift, withAlpha); + if (status != PRIMITIVES_SUCCESS) return status; @@ -385,8 +389,9 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_no_invert( { pstatus_t status; status = generic->YCoCgToRGB_8u_AC4R( - sptr, srcStep, dptr, DstFormat, dstStep, - w, 1, shift, withAlpha); + sptr, srcStep, dptr, DstFormat, dstStep, + w, 1, shift, withAlpha); + if (status != PRIMITIVES_SUCCESS) return status; @@ -411,24 +416,23 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R( UINT8 shift, BOOL withAlpha) { - // TODO: Need to implement proper color conversion!!! - return generic->YCoCgToRGB_8u_AC4R(pSrc, srcStep, pDst, DstFormat, - dstStep, width, height, shift, withAlpha); - - switch(DstFormat) + switch (DstFormat) { - case PIXEL_FORMAT_BGRX32: - case PIXEL_FORMAT_BGRA32: - return ssse3_YCoCgRToRGB_8u_AC4R_invert( - pSrc, srcStep, pDst, DstFormat, dstStep, - width, height, shift, withAlpha); - case PIXEL_FORMAT_RGBX32: - case PIXEL_FORMAT_RGBA32: - return ssse3_YCoCgRToRGB_8u_AC4R_no_invert( - pSrc, srcStep, pDst, DstFormat, dstStep, - width, height, shift, withAlpha); - default: - return -1; + case PIXEL_FORMAT_BGRX32: + case PIXEL_FORMAT_BGRA32: + return ssse3_YCoCgRToRGB_8u_AC4R_no_invert( + pSrc, srcStep, pDst, DstFormat, dstStep, + width, height, shift, withAlpha); + + case PIXEL_FORMAT_RGBX32: + case PIXEL_FORMAT_RGBA32: + return ssse3_YCoCgRToRGB_8u_AC4R_invert( + pSrc, srcStep, pDst, DstFormat, dstStep, + width, height, shift, withAlpha); + + default: + return generic->YCoCgToRGB_8u_AC4R(pSrc, srcStep, pDst, DstFormat, + dstStep, width, height, shift, withAlpha); } } #endif /* WITH_SSE2 */ diff --git a/libfreerdp/primitives/prim_YUV.c b/libfreerdp/primitives/prim_YUV.c index 554953304..617c59764 100644 --- a/libfreerdp/primitives/prim_YUV.c +++ b/libfreerdp/primitives/prim_YUV.c @@ -549,8 +549,8 @@ static INLINE BYTE RGB2U(INT32 R, INT32 G, INT32 B) static INLINE BYTE RGB2V(INT32 R, INT32 G, INT32 B) { - const INT32 v = (128L * (R) - 116L * (G) - 12L * (B)); - const INT32 v8 = (v >> 8L) + 128L; + const INT32 v = (128L / 4 * (R) - 116L / 4 * (G) - 12L / 4 * (B)); + const INT32 v8 = (v >> 6L) + 128L; return CLIP(v8); } diff --git a/libfreerdp/primitives/prim_YUV_opt.c b/libfreerdp/primitives/prim_YUV_opt.c index cc902f14a..e0dda085d 100644 --- a/libfreerdp/primitives/prim_YUV_opt.c +++ b/libfreerdp/primitives/prim_YUV_opt.c @@ -25,7 +25,7 @@ static primitives_t* generic = NULL; #include #include -static pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R( +static pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R_BGRX( const BYTE** pSrc, const UINT32* srcStep, BYTE* pDst, UINT32 dstStep, UINT32 DstFormat, const prim_size_t* roi) @@ -35,9 +35,6 @@ static pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R( UINT32 i, nWidth, nHeight, VaddDst, VaddY, VaddU, VaddV; __m128i r0, r1, r2, r3, r4, r5, r6, r7; __m128i* buffer; - // TODO: Need to implement proper color conversion!!!!! - return generic->YUV420ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep, - DstFormat, roi); /* last_line: if the last (U,V doubled) line should be skipped, set to 10B * last_column: if it's the last column in a line, set to 10B (for handling line-endings not multiple by four) */ buffer = _aligned_malloc(4 * 16, 16); @@ -324,6 +321,21 @@ static pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R( _aligned_free(buffer); return PRIMITIVES_SUCCESS; } +static pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(const BYTE** pSrc, const UINT32* srcStep, + BYTE* pDst, UINT32 dstStep, UINT32 DstFormat, + const prim_size_t* roi) +{ + switch (DstFormat) + { + case PIXEL_FORMAT_BGRX32: + case PIXEL_FORMAT_BGRA32: + return ssse3_YUV420ToRGB_8u_P3AC4R_BGRX(pSrc, srcStep, pDst, dstStep, DstFormat, roi); + + default: + return generic->YUV420ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi); + } +} + #endif void primitives_init_YUV_opt(primitives_t* prims) diff --git a/libfreerdp/primitives/prim_colors.c b/libfreerdp/primitives/prim_colors.c index 2f71895d2..ea238fb8f 100644 --- a/libfreerdp/primitives/prim_colors.c +++ b/libfreerdp/primitives/prim_colors.c @@ -134,6 +134,7 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R( { switch (DstFormat) { + case PIXEL_FORMAT_BGRA32: case PIXEL_FORMAT_BGRX32: return general_yCbCrToRGB_16s8u_P3AC4R_BGRX(pSrc, srcStep, pDst, DstFormat, dstStep, roi); diff --git a/libfreerdp/primitives/prim_colors_opt.c b/libfreerdp/primitives/prim_colors_opt.c index 68e789780..efb516196 100644 --- a/libfreerdp/primitives/prim_colors_opt.c +++ b/libfreerdp/primitives/prim_colors_opt.c @@ -356,7 +356,7 @@ static pstatus_t sse2_RGBToYCbCr_16s16s_P3P3( #define XMM_ALL_ONES \ _mm_set1_epi32(0xFFFFFFFFU) -pstatus_t sse2_RGBToRGB_16s8u_P3AC4R( +static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R_BGRX( const INT16* const pSrc[3], /* 16-bit R,G, and B arrays */ UINT32 srcStep, /* bytes between rows in source data */ BYTE* pDst, /* 32-bit interleaved ARGB (ABGR?) data */ @@ -388,9 +388,6 @@ pstatus_t sse2_RGBToRGB_16s8u_P3AC4R( dstStep, DstFormat, roi); } - // TODO: Need to update SSE code to allow color conversion!!! - return generic->RGBToRGB_16s8u_P3AC4R(pSrc, srcStep, pDst, - dstStep, DstFormat, roi); out = (BYTE*) pDst; srcbump = (srcStep - (roi->width * sizeof(UINT16))) / sizeof(UINT16); dstbump = (dstStep - (roi->width * sizeof(UINT32))); @@ -453,11 +450,31 @@ pstatus_t sse2_RGBToRGB_16s8u_P3AC4R( return PRIMITIVES_SUCCESS; } + +static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R( + const INT16* const pSrc[3], /* 16-bit R,G, and B arrays */ + UINT32 srcStep, /* bytes between rows in source data */ + BYTE* pDst, /* 32-bit interleaved ARGB (ABGR?) data */ + UINT32 dstStep, /* bytes between rows in dest data */ + UINT32 DstFormat, + const prim_size_t* roi) +{ + switch (DstFormat) + { + case PIXEL_FORMAT_BGRA32: + case PIXEL_FORMAT_BGRX32: + return sse2_RGBToRGB_16s8u_P3AC4R_BGRX(pSrc, srcStep, pDst, dstStep, DstFormat, roi); + + default: + return generic->RGBToRGB_16s8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi); + } +} + #endif /* WITH_SSE2 */ /*---------------------------------------------------------------------------*/ #ifdef WITH_NEON -static pstatus_t neon_yCbCrToRGB_16s16s_P3P3( +static pstatus_t neon_yCbCrToRGB_16s16s_P3P3_BGRX( const INT16* pSrc[3], int srcStep, INT16* pDst[3], @@ -545,6 +562,25 @@ static pstatus_t neon_yCbCrToRGB_16s16s_P3P3( return PRIMITIVES_SUCCESS; } + +static pstatus_t neon_RGBToRGB_16s8u_P3AC4R( + const INT16* const pSrc[3], /* 16-bit R,G, and B arrays */ + UINT32 srcStep, /* bytes between rows in source data */ + BYTE* pDst, /* 32-bit interleaved ARGB (ABGR?) data */ + UINT32 dstStep, /* bytes between rows in dest data */ + UINT32 DstFormat, + const prim_size_t* roi) +{ + switch (DstFormat) + { + case PIXEL_FORMAT_BGRA32: + case PIXEL_FORMAT_BGRX32: + return neon_RGBToRGB_16s8u_P3AC4R_BGRX(pSrc, srcStep, pDst, dstStep, DstFormat, roi); + + default: + return generic->RGBToRGB_16s8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi); + } +} #endif /* WITH_NEON */