Enabled ASM primitives.

This commit is contained in:
Armin Novak 2017-01-23 15:38:02 +01:00
parent d3429109dd
commit adcd09cd46
5 changed files with 98 additions and 45 deletions

View File

@ -50,7 +50,7 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_invert(
BYTE* dptr = (BYTE*) pDst;
int sRowBump = srcStep - width * sizeof(UINT32);
int dRowBump = dstStep - width * sizeof(UINT32);
UINT32 h;
UINT32 h;
/* Shift left by "shift" and divide by two is the same as shift
* left by "shift-1".
*/
@ -70,28 +70,30 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_invert(
{
/* Too small, or we'll never hit a 16-byte boundary. Punt. */
return generic->YCoCgToRGB_8u_AC4R(
pSrc, srcStep, pDst, DstFormat, dstStep,
width, height, shift, withAlpha);
pSrc, srcStep, pDst, DstFormat, dstStep,
width, height, shift, withAlpha);
}
for (h = 0; h < height; h++)
{
UINT32 w = width;
UINT32 w = width;
BOOL onStride;
/* Get to a 16-byte destination boundary. */
if ((ULONG_PTR) dptr & 0x0f)
{
pstatus_t status;
UINT32 startup = (16 - ((ULONG_PTR) dptr & 0x0f)) / 4;
UINT32 startup = (16 - ((ULONG_PTR) dptr & 0x0f)) / 4;
if (startup > width) startup = width;
status = generic->YCoCgToRGB_8u_AC4R(
sptr, srcStep, dptr, DstFormat, dstStep,
startup, 1, shift, withAlpha);
sptr, srcStep, dptr, DstFormat, dstStep,
startup, 1, shift, withAlpha);
if (status != PRIMITIVES_SUCCESS)
return status;
sptr += startup * sizeof(UINT32);
dptr += startup * sizeof(UINT32);
w -= startup;
@ -201,8 +203,9 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_invert(
{
pstatus_t status;
status = generic->YCoCgToRGB_8u_AC4R(
sptr, srcStep, dptr, DstFormat, dstStep,
w, 1, shift, withAlpha);
sptr, srcStep, dptr, DstFormat, dstStep,
w, 1, shift, withAlpha);
if (status != PRIMITIVES_SUCCESS)
return status;
@ -229,7 +232,7 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_no_invert(
BYTE* dptr = (BYTE*) pDst;
int sRowBump = srcStep - width * sizeof(UINT32);
int dRowBump = dstStep - width * sizeof(UINT32);
UINT32 h;
UINT32 h;
/* Shift left by "shift" and divide by two is the same as shift
* left by "shift-1".
*/
@ -249,8 +252,8 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_no_invert(
{
/* Too small, or we'll never hit a 16-byte boundary. Punt. */
return generic->YCoCgToRGB_8u_AC4R(
pSrc, srcStep, pDst, DstFormat, dstStep,
width, height, shift, withAlpha);
pSrc, srcStep, pDst, DstFormat, dstStep,
width, height, shift, withAlpha);
}
for (h = 0; h < height; h++)
@ -262,13 +265,14 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_no_invert(
if ((ULONG_PTR) dptr & 0x0f)
{
pstatus_t status;
UINT32 startup = (16 - ((ULONG_PTR) dptr & 0x0f)) / 4;
UINT32 startup = (16 - ((ULONG_PTR) dptr & 0x0f)) / 4;
if (startup > width) startup = width;
status = generic->YCoCgToRGB_8u_AC4R(
sptr, srcStep, dptr, DstFormat,
dstStep, startup, 1, shift, withAlpha);
sptr, srcStep, dptr, DstFormat,
dstStep, startup, 1, shift, withAlpha);
if (status != PRIMITIVES_SUCCESS)
return status;
@ -385,8 +389,9 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_no_invert(
{
pstatus_t status;
status = generic->YCoCgToRGB_8u_AC4R(
sptr, srcStep, dptr, DstFormat, dstStep,
w, 1, shift, withAlpha);
sptr, srcStep, dptr, DstFormat, dstStep,
w, 1, shift, withAlpha);
if (status != PRIMITIVES_SUCCESS)
return status;
@ -411,24 +416,23 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R(
UINT8 shift,
BOOL withAlpha)
{
// TODO: Need to implement proper color conversion!!!
return generic->YCoCgToRGB_8u_AC4R(pSrc, srcStep, pDst, DstFormat,
dstStep, width, height, shift, withAlpha);
switch(DstFormat)
switch (DstFormat)
{
case PIXEL_FORMAT_BGRX32:
case PIXEL_FORMAT_BGRA32:
return ssse3_YCoCgRToRGB_8u_AC4R_invert(
pSrc, srcStep, pDst, DstFormat, dstStep,
width, height, shift, withAlpha);
case PIXEL_FORMAT_RGBX32:
case PIXEL_FORMAT_RGBA32:
return ssse3_YCoCgRToRGB_8u_AC4R_no_invert(
pSrc, srcStep, pDst, DstFormat, dstStep,
width, height, shift, withAlpha);
default:
return -1;
case PIXEL_FORMAT_BGRX32:
case PIXEL_FORMAT_BGRA32:
return ssse3_YCoCgRToRGB_8u_AC4R_no_invert(
pSrc, srcStep, pDst, DstFormat, dstStep,
width, height, shift, withAlpha);
case PIXEL_FORMAT_RGBX32:
case PIXEL_FORMAT_RGBA32:
return ssse3_YCoCgRToRGB_8u_AC4R_invert(
pSrc, srcStep, pDst, DstFormat, dstStep,
width, height, shift, withAlpha);
default:
return generic->YCoCgToRGB_8u_AC4R(pSrc, srcStep, pDst, DstFormat,
dstStep, width, height, shift, withAlpha);
}
}
#endif /* WITH_SSE2 */

View File

@ -549,8 +549,8 @@ static INLINE BYTE RGB2U(INT32 R, INT32 G, INT32 B)
static INLINE BYTE RGB2V(INT32 R, INT32 G, INT32 B)
{
const INT32 v = (128L * (R) - 116L * (G) - 12L * (B));
const INT32 v8 = (v >> 8L) + 128L;
const INT32 v = (128L / 4 * (R) - 116L / 4 * (G) - 12L / 4 * (B));
const INT32 v8 = (v >> 6L) + 128L;
return CLIP(v8);
}

View File

@ -25,7 +25,7 @@ static primitives_t* generic = NULL;
#include <emmintrin.h>
#include <tmmintrin.h>
static pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(
static pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R_BGRX(
const BYTE** pSrc, const UINT32* srcStep,
BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
const prim_size_t* roi)
@ -35,9 +35,6 @@ static pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(
UINT32 i, nWidth, nHeight, VaddDst, VaddY, VaddU, VaddV;
__m128i r0, r1, r2, r3, r4, r5, r6, r7;
__m128i* buffer;
// TODO: Need to implement proper color conversion!!!!!
return generic->YUV420ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep,
DstFormat, roi);
/* last_line: if the last (U,V doubled) line should be skipped, set to 10B
* last_column: if it's the last column in a line, set to 10B (for handling line-endings not multiple by four) */
buffer = _aligned_malloc(4 * 16, 16);
@ -324,6 +321,21 @@ static pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(
_aligned_free(buffer);
return PRIMITIVES_SUCCESS;
}
static pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(const BYTE** pSrc, const UINT32* srcStep,
BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
const prim_size_t* roi)
{
switch (DstFormat)
{
case PIXEL_FORMAT_BGRX32:
case PIXEL_FORMAT_BGRA32:
return ssse3_YUV420ToRGB_8u_P3AC4R_BGRX(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
default:
return generic->YUV420ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
}
}
#endif
void primitives_init_YUV_opt(primitives_t* prims)

View File

@ -134,6 +134,7 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R(
{
switch (DstFormat)
{
case PIXEL_FORMAT_BGRA32:
case PIXEL_FORMAT_BGRX32:
return general_yCbCrToRGB_16s8u_P3AC4R_BGRX(pSrc, srcStep, pDst, DstFormat, dstStep, roi);

View File

@ -356,7 +356,7 @@ static pstatus_t sse2_RGBToYCbCr_16s16s_P3P3(
#define XMM_ALL_ONES \
_mm_set1_epi32(0xFFFFFFFFU)
pstatus_t sse2_RGBToRGB_16s8u_P3AC4R(
static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R_BGRX(
const INT16* const pSrc[3], /* 16-bit R,G, and B arrays */
UINT32 srcStep, /* bytes between rows in source data */
BYTE* pDst, /* 32-bit interleaved ARGB (ABGR?) data */
@ -388,9 +388,6 @@ pstatus_t sse2_RGBToRGB_16s8u_P3AC4R(
dstStep, DstFormat, roi);
}
// TODO: Need to update SSE code to allow color conversion!!!
return generic->RGBToRGB_16s8u_P3AC4R(pSrc, srcStep, pDst,
dstStep, DstFormat, roi);
out = (BYTE*) pDst;
srcbump = (srcStep - (roi->width * sizeof(UINT16))) / sizeof(UINT16);
dstbump = (dstStep - (roi->width * sizeof(UINT32)));
@ -453,11 +450,31 @@ pstatus_t sse2_RGBToRGB_16s8u_P3AC4R(
return PRIMITIVES_SUCCESS;
}
static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R(
const INT16* const pSrc[3], /* 16-bit R,G, and B arrays */
UINT32 srcStep, /* bytes between rows in source data */
BYTE* pDst, /* 32-bit interleaved ARGB (ABGR?) data */
UINT32 dstStep, /* bytes between rows in dest data */
UINT32 DstFormat,
const prim_size_t* roi)
{
switch (DstFormat)
{
case PIXEL_FORMAT_BGRA32:
case PIXEL_FORMAT_BGRX32:
return sse2_RGBToRGB_16s8u_P3AC4R_BGRX(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
default:
return generic->RGBToRGB_16s8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
}
}
#endif /* WITH_SSE2 */
/*---------------------------------------------------------------------------*/
#ifdef WITH_NEON
static pstatus_t neon_yCbCrToRGB_16s16s_P3P3(
static pstatus_t neon_yCbCrToRGB_16s16s_P3P3_BGRX(
const INT16* pSrc[3],
int srcStep,
INT16* pDst[3],
@ -545,6 +562,25 @@ static pstatus_t neon_yCbCrToRGB_16s16s_P3P3(
return PRIMITIVES_SUCCESS;
}
static pstatus_t neon_RGBToRGB_16s8u_P3AC4R(
const INT16* const pSrc[3], /* 16-bit R,G, and B arrays */
UINT32 srcStep, /* bytes between rows in source data */
BYTE* pDst, /* 32-bit interleaved ARGB (ABGR?) data */
UINT32 dstStep, /* bytes between rows in dest data */
UINT32 DstFormat,
const prim_size_t* roi)
{
switch (DstFormat)
{
case PIXEL_FORMAT_BGRA32:
case PIXEL_FORMAT_BGRX32:
return neon_RGBToRGB_16s8u_P3AC4R_BGRX(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
default:
return generic->RGBToRGB_16s8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
}
}
#endif /* WITH_NEON */