Enabled ASM primitives.
This commit is contained in:
parent
d3429109dd
commit
adcd09cd46
@ -50,7 +50,7 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_invert(
|
||||
BYTE* dptr = (BYTE*) pDst;
|
||||
int sRowBump = srcStep - width * sizeof(UINT32);
|
||||
int dRowBump = dstStep - width * sizeof(UINT32);
|
||||
UINT32 h;
|
||||
UINT32 h;
|
||||
/* Shift left by "shift" and divide by two is the same as shift
|
||||
* left by "shift-1".
|
||||
*/
|
||||
@ -70,28 +70,30 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_invert(
|
||||
{
|
||||
/* Too small, or we'll never hit a 16-byte boundary. Punt. */
|
||||
return generic->YCoCgToRGB_8u_AC4R(
|
||||
pSrc, srcStep, pDst, DstFormat, dstStep,
|
||||
width, height, shift, withAlpha);
|
||||
pSrc, srcStep, pDst, DstFormat, dstStep,
|
||||
width, height, shift, withAlpha);
|
||||
}
|
||||
|
||||
for (h = 0; h < height; h++)
|
||||
{
|
||||
UINT32 w = width;
|
||||
UINT32 w = width;
|
||||
BOOL onStride;
|
||||
|
||||
/* Get to a 16-byte destination boundary. */
|
||||
if ((ULONG_PTR) dptr & 0x0f)
|
||||
{
|
||||
pstatus_t status;
|
||||
UINT32 startup = (16 - ((ULONG_PTR) dptr & 0x0f)) / 4;
|
||||
UINT32 startup = (16 - ((ULONG_PTR) dptr & 0x0f)) / 4;
|
||||
|
||||
if (startup > width) startup = width;
|
||||
|
||||
status = generic->YCoCgToRGB_8u_AC4R(
|
||||
sptr, srcStep, dptr, DstFormat, dstStep,
|
||||
startup, 1, shift, withAlpha);
|
||||
sptr, srcStep, dptr, DstFormat, dstStep,
|
||||
startup, 1, shift, withAlpha);
|
||||
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return status;
|
||||
|
||||
sptr += startup * sizeof(UINT32);
|
||||
dptr += startup * sizeof(UINT32);
|
||||
w -= startup;
|
||||
@ -201,8 +203,9 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_invert(
|
||||
{
|
||||
pstatus_t status;
|
||||
status = generic->YCoCgToRGB_8u_AC4R(
|
||||
sptr, srcStep, dptr, DstFormat, dstStep,
|
||||
w, 1, shift, withAlpha);
|
||||
sptr, srcStep, dptr, DstFormat, dstStep,
|
||||
w, 1, shift, withAlpha);
|
||||
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return status;
|
||||
|
||||
@ -229,7 +232,7 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_no_invert(
|
||||
BYTE* dptr = (BYTE*) pDst;
|
||||
int sRowBump = srcStep - width * sizeof(UINT32);
|
||||
int dRowBump = dstStep - width * sizeof(UINT32);
|
||||
UINT32 h;
|
||||
UINT32 h;
|
||||
/* Shift left by "shift" and divide by two is the same as shift
|
||||
* left by "shift-1".
|
||||
*/
|
||||
@ -249,8 +252,8 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_no_invert(
|
||||
{
|
||||
/* Too small, or we'll never hit a 16-byte boundary. Punt. */
|
||||
return generic->YCoCgToRGB_8u_AC4R(
|
||||
pSrc, srcStep, pDst, DstFormat, dstStep,
|
||||
width, height, shift, withAlpha);
|
||||
pSrc, srcStep, pDst, DstFormat, dstStep,
|
||||
width, height, shift, withAlpha);
|
||||
}
|
||||
|
||||
for (h = 0; h < height; h++)
|
||||
@ -262,13 +265,14 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_no_invert(
|
||||
if ((ULONG_PTR) dptr & 0x0f)
|
||||
{
|
||||
pstatus_t status;
|
||||
UINT32 startup = (16 - ((ULONG_PTR) dptr & 0x0f)) / 4;
|
||||
UINT32 startup = (16 - ((ULONG_PTR) dptr & 0x0f)) / 4;
|
||||
|
||||
if (startup > width) startup = width;
|
||||
|
||||
status = generic->YCoCgToRGB_8u_AC4R(
|
||||
sptr, srcStep, dptr, DstFormat,
|
||||
dstStep, startup, 1, shift, withAlpha);
|
||||
sptr, srcStep, dptr, DstFormat,
|
||||
dstStep, startup, 1, shift, withAlpha);
|
||||
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return status;
|
||||
|
||||
@ -385,8 +389,9 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_no_invert(
|
||||
{
|
||||
pstatus_t status;
|
||||
status = generic->YCoCgToRGB_8u_AC4R(
|
||||
sptr, srcStep, dptr, DstFormat, dstStep,
|
||||
w, 1, shift, withAlpha);
|
||||
sptr, srcStep, dptr, DstFormat, dstStep,
|
||||
w, 1, shift, withAlpha);
|
||||
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return status;
|
||||
|
||||
@ -411,24 +416,23 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R(
|
||||
UINT8 shift,
|
||||
BOOL withAlpha)
|
||||
{
|
||||
// TODO: Need to implement proper color conversion!!!
|
||||
return generic->YCoCgToRGB_8u_AC4R(pSrc, srcStep, pDst, DstFormat,
|
||||
dstStep, width, height, shift, withAlpha);
|
||||
|
||||
switch(DstFormat)
|
||||
switch (DstFormat)
|
||||
{
|
||||
case PIXEL_FORMAT_BGRX32:
|
||||
case PIXEL_FORMAT_BGRA32:
|
||||
return ssse3_YCoCgRToRGB_8u_AC4R_invert(
|
||||
pSrc, srcStep, pDst, DstFormat, dstStep,
|
||||
width, height, shift, withAlpha);
|
||||
case PIXEL_FORMAT_RGBX32:
|
||||
case PIXEL_FORMAT_RGBA32:
|
||||
return ssse3_YCoCgRToRGB_8u_AC4R_no_invert(
|
||||
pSrc, srcStep, pDst, DstFormat, dstStep,
|
||||
width, height, shift, withAlpha);
|
||||
default:
|
||||
return -1;
|
||||
case PIXEL_FORMAT_BGRX32:
|
||||
case PIXEL_FORMAT_BGRA32:
|
||||
return ssse3_YCoCgRToRGB_8u_AC4R_no_invert(
|
||||
pSrc, srcStep, pDst, DstFormat, dstStep,
|
||||
width, height, shift, withAlpha);
|
||||
|
||||
case PIXEL_FORMAT_RGBX32:
|
||||
case PIXEL_FORMAT_RGBA32:
|
||||
return ssse3_YCoCgRToRGB_8u_AC4R_invert(
|
||||
pSrc, srcStep, pDst, DstFormat, dstStep,
|
||||
width, height, shift, withAlpha);
|
||||
|
||||
default:
|
||||
return generic->YCoCgToRGB_8u_AC4R(pSrc, srcStep, pDst, DstFormat,
|
||||
dstStep, width, height, shift, withAlpha);
|
||||
}
|
||||
}
|
||||
#endif /* WITH_SSE2 */
|
||||
|
@ -549,8 +549,8 @@ static INLINE BYTE RGB2U(INT32 R, INT32 G, INT32 B)
|
||||
|
||||
static INLINE BYTE RGB2V(INT32 R, INT32 G, INT32 B)
|
||||
{
|
||||
const INT32 v = (128L * (R) - 116L * (G) - 12L * (B));
|
||||
const INT32 v8 = (v >> 8L) + 128L;
|
||||
const INT32 v = (128L / 4 * (R) - 116L / 4 * (G) - 12L / 4 * (B));
|
||||
const INT32 v8 = (v >> 6L) + 128L;
|
||||
return CLIP(v8);
|
||||
}
|
||||
|
||||
|
@ -25,7 +25,7 @@ static primitives_t* generic = NULL;
|
||||
#include <emmintrin.h>
|
||||
#include <tmmintrin.h>
|
||||
|
||||
static pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(
|
||||
static pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R_BGRX(
|
||||
const BYTE** pSrc, const UINT32* srcStep,
|
||||
BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
|
||||
const prim_size_t* roi)
|
||||
@ -35,9 +35,6 @@ static pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(
|
||||
UINT32 i, nWidth, nHeight, VaddDst, VaddY, VaddU, VaddV;
|
||||
__m128i r0, r1, r2, r3, r4, r5, r6, r7;
|
||||
__m128i* buffer;
|
||||
// TODO: Need to implement proper color conversion!!!!!
|
||||
return generic->YUV420ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep,
|
||||
DstFormat, roi);
|
||||
/* last_line: if the last (U,V doubled) line should be skipped, set to 10B
|
||||
* last_column: if it's the last column in a line, set to 10B (for handling line-endings not multiple by four) */
|
||||
buffer = _aligned_malloc(4 * 16, 16);
|
||||
@ -324,6 +321,21 @@ static pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(
|
||||
_aligned_free(buffer);
|
||||
return PRIMITIVES_SUCCESS;
|
||||
}
|
||||
static pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(const BYTE** pSrc, const UINT32* srcStep,
|
||||
BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
|
||||
const prim_size_t* roi)
|
||||
{
|
||||
switch (DstFormat)
|
||||
{
|
||||
case PIXEL_FORMAT_BGRX32:
|
||||
case PIXEL_FORMAT_BGRA32:
|
||||
return ssse3_YUV420ToRGB_8u_P3AC4R_BGRX(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
|
||||
|
||||
default:
|
||||
return generic->YUV420ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void primitives_init_YUV_opt(primitives_t* prims)
|
||||
|
@ -134,6 +134,7 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R(
|
||||
{
|
||||
switch (DstFormat)
|
||||
{
|
||||
case PIXEL_FORMAT_BGRA32:
|
||||
case PIXEL_FORMAT_BGRX32:
|
||||
return general_yCbCrToRGB_16s8u_P3AC4R_BGRX(pSrc, srcStep, pDst, DstFormat, dstStep, roi);
|
||||
|
||||
|
@ -356,7 +356,7 @@ static pstatus_t sse2_RGBToYCbCr_16s16s_P3P3(
|
||||
#define XMM_ALL_ONES \
|
||||
_mm_set1_epi32(0xFFFFFFFFU)
|
||||
|
||||
pstatus_t sse2_RGBToRGB_16s8u_P3AC4R(
|
||||
static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R_BGRX(
|
||||
const INT16* const pSrc[3], /* 16-bit R,G, and B arrays */
|
||||
UINT32 srcStep, /* bytes between rows in source data */
|
||||
BYTE* pDst, /* 32-bit interleaved ARGB (ABGR?) data */
|
||||
@ -388,9 +388,6 @@ pstatus_t sse2_RGBToRGB_16s8u_P3AC4R(
|
||||
dstStep, DstFormat, roi);
|
||||
}
|
||||
|
||||
// TODO: Need to update SSE code to allow color conversion!!!
|
||||
return generic->RGBToRGB_16s8u_P3AC4R(pSrc, srcStep, pDst,
|
||||
dstStep, DstFormat, roi);
|
||||
out = (BYTE*) pDst;
|
||||
srcbump = (srcStep - (roi->width * sizeof(UINT16))) / sizeof(UINT16);
|
||||
dstbump = (dstStep - (roi->width * sizeof(UINT32)));
|
||||
@ -453,11 +450,31 @@ pstatus_t sse2_RGBToRGB_16s8u_P3AC4R(
|
||||
|
||||
return PRIMITIVES_SUCCESS;
|
||||
}
|
||||
|
||||
static pstatus_t sse2_RGBToRGB_16s8u_P3AC4R(
|
||||
const INT16* const pSrc[3], /* 16-bit R,G, and B arrays */
|
||||
UINT32 srcStep, /* bytes between rows in source data */
|
||||
BYTE* pDst, /* 32-bit interleaved ARGB (ABGR?) data */
|
||||
UINT32 dstStep, /* bytes between rows in dest data */
|
||||
UINT32 DstFormat,
|
||||
const prim_size_t* roi)
|
||||
{
|
||||
switch (DstFormat)
|
||||
{
|
||||
case PIXEL_FORMAT_BGRA32:
|
||||
case PIXEL_FORMAT_BGRX32:
|
||||
return sse2_RGBToRGB_16s8u_P3AC4R_BGRX(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
|
||||
|
||||
default:
|
||||
return generic->RGBToRGB_16s8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* WITH_SSE2 */
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
#ifdef WITH_NEON
|
||||
static pstatus_t neon_yCbCrToRGB_16s16s_P3P3(
|
||||
static pstatus_t neon_yCbCrToRGB_16s16s_P3P3_BGRX(
|
||||
const INT16* pSrc[3],
|
||||
int srcStep,
|
||||
INT16* pDst[3],
|
||||
@ -545,6 +562,25 @@ static pstatus_t neon_yCbCrToRGB_16s16s_P3P3(
|
||||
|
||||
return PRIMITIVES_SUCCESS;
|
||||
}
|
||||
|
||||
static pstatus_t neon_RGBToRGB_16s8u_P3AC4R(
|
||||
const INT16* const pSrc[3], /* 16-bit R,G, and B arrays */
|
||||
UINT32 srcStep, /* bytes between rows in source data */
|
||||
BYTE* pDst, /* 32-bit interleaved ARGB (ABGR?) data */
|
||||
UINT32 dstStep, /* bytes between rows in dest data */
|
||||
UINT32 DstFormat,
|
||||
const prim_size_t* roi)
|
||||
{
|
||||
switch (DstFormat)
|
||||
{
|
||||
case PIXEL_FORMAT_BGRA32:
|
||||
case PIXEL_FORMAT_BGRX32:
|
||||
return neon_RGBToRGB_16s8u_P3AC4R_BGRX(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
|
||||
|
||||
default:
|
||||
return generic->RGBToRGB_16s8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
|
||||
}
|
||||
}
|
||||
#endif /* WITH_NEON */
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user