mirror of https://github.com/FreeRDP/FreeRDP
Updated primitives API and tests.
This commit is contained in:
parent
e860fde4bc
commit
99c418766c
|
@ -81,62 +81,62 @@ typedef pstatus_t (*__copy_8u_AC4r_t)(
|
|||
typedef pstatus_t (*__set_8u_t)(
|
||||
BYTE val,
|
||||
BYTE* pDst,
|
||||
INT32 len);
|
||||
UINT32 len);
|
||||
typedef pstatus_t (*__set_32s_t)(
|
||||
INT32 val,
|
||||
INT32* pDst,
|
||||
INT32 len);
|
||||
UINT32 len);
|
||||
typedef pstatus_t (*__set_32u_t)(
|
||||
UINT32 val,
|
||||
UINT32* pDst,
|
||||
INT32 len);
|
||||
UINT32 len);
|
||||
typedef pstatus_t (*__zero_t)(
|
||||
void* pDst,
|
||||
size_t bytes);
|
||||
typedef pstatus_t (*__alphaComp_argb_t)(
|
||||
const BYTE* pSrc1, INT32 src1Step,
|
||||
const BYTE* pSrc2, INT32 src2Step,
|
||||
BYTE* pDst, INT32 dstStep,
|
||||
INT32 width, INT32 height);
|
||||
const BYTE* pSrc1, UINT32 src1Step,
|
||||
const BYTE* pSrc2, UINT32 src2Step,
|
||||
BYTE* pDst, UINT32 dstStep,
|
||||
UINT32 width, UINT32 height);
|
||||
typedef pstatus_t (*__add_16s_t)(
|
||||
const INT16* pSrc1,
|
||||
const INT16* pSrc2,
|
||||
INT16* pDst,
|
||||
INT32 len);
|
||||
UINT32 len);
|
||||
typedef pstatus_t (*__lShiftC_16s_t)(
|
||||
const INT16* pSrc,
|
||||
INT32 val,
|
||||
UINT32 val,
|
||||
INT16* pSrcDst,
|
||||
INT32 len);
|
||||
UINT32 len);
|
||||
typedef pstatus_t (*__lShiftC_16u_t)(
|
||||
const UINT16* pSrc,
|
||||
INT32 val,
|
||||
UINT32 val,
|
||||
UINT16* pSrcDst,
|
||||
INT32 len);
|
||||
UINT32 len);
|
||||
typedef pstatus_t (*__rShiftC_16s_t)(
|
||||
const INT16* pSrc,
|
||||
INT32 val,
|
||||
UINT32 val,
|
||||
INT16* pSrcDst,
|
||||
INT32 len);
|
||||
UINT32 len);
|
||||
typedef pstatus_t (*__rShiftC_16u_t)(
|
||||
const UINT16* pSrc,
|
||||
INT32 val,
|
||||
UINT32 val,
|
||||
UINT16* pSrcDst,
|
||||
INT32 len);
|
||||
UINT32 len);
|
||||
typedef pstatus_t (*__shiftC_16s_t)(
|
||||
const INT16* pSrc,
|
||||
INT32 val,
|
||||
INT16* pSrcDst,
|
||||
INT32 len);
|
||||
UINT32 len);
|
||||
typedef pstatus_t (*__shiftC_16u_t)(
|
||||
const UINT16* pSrc,
|
||||
INT32 val,
|
||||
UINT16* pSrcDst,
|
||||
INT32 len);
|
||||
UINT32 len);
|
||||
typedef pstatus_t (*__sign_16s_t)(
|
||||
const INT16* pSrc,
|
||||
INT16* pDst,
|
||||
INT32 len);
|
||||
UINT32 len);
|
||||
typedef pstatus_t (*__yCbCrToRGB_16s8u_P3AC4R_t)(
|
||||
const INT16* pSrc[3], INT32 srcStep,
|
||||
BYTE* pDst, INT32 dstStep, UINT32 DstFormat,
|
||||
|
@ -154,8 +154,8 @@ typedef pstatus_t (*__RGBToYCbCr_16s16s_P3P3_t)(
|
|||
INT16* pDst[3], INT32 dstStep,
|
||||
const prim_size_t* roi);
|
||||
typedef pstatus_t (*__RGBToRGB_16s8u_P3AC4R_t)(
|
||||
const INT16* pSrc[3], INT32 srcStep,
|
||||
BYTE* pDst, INT32 dstStep, UINT32 DstFormat,
|
||||
const INT16* const pSrc[3], UINT32 srcStep,
|
||||
BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
|
||||
const prim_size_t* roi);
|
||||
typedef pstatus_t (*__YCoCgToRGB_8u_AC4R_t)(
|
||||
const BYTE* pSrc, INT32 srcStep,
|
||||
|
@ -177,11 +177,11 @@ typedef pstatus_t (*__YUV444ToRGB_8u_P3AC4R_t)(
|
|||
BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
|
||||
const prim_size_t* roi);
|
||||
typedef pstatus_t (*__RGBToYUV420_8u_P3AC4R_t)(
|
||||
const BYTE* pSrc, UINT32 srcStep,
|
||||
const BYTE* pSrc, UINT32 SrcFormat, UINT32 srcStep,
|
||||
BYTE* pDst[3], UINT32 dstStep[3],
|
||||
const prim_size_t* roi);
|
||||
typedef pstatus_t (*__RGBToYUV444_8u_P3AC4R_t)(
|
||||
const BYTE* pSrc, UINT32 srcStep,
|
||||
const BYTE* pSrc, UINT32 SrcFormat, UINT32 srcStep,
|
||||
BYTE* pDst[3], UINT32 dstStep[3],
|
||||
const prim_size_t* roi);
|
||||
typedef pstatus_t (*__YUV420CombineToYUV444_t)(
|
||||
|
|
|
@ -1567,7 +1567,7 @@ INT32 avc420_compress(H264_CONTEXT* h264, BYTE* pSrcData, DWORD SrcFormat,
|
|||
roi.width = nSrcWidth;
|
||||
roi.height = nSrcHeight;
|
||||
|
||||
prims->RGBToYUV420_8u_P3AC4R(pSrcData, nSrcStep, pYUVData, iStride, &roi);
|
||||
prims->RGBToYUV420_8u_P3AC4R(pSrcData, SrcFormat, nSrcStep, pYUVData, iStride, &roi);
|
||||
|
||||
status = h264->subsystem->Compress(h264, ppDstData, pDstSize, 0);
|
||||
|
||||
|
|
|
@ -66,8 +66,11 @@ static pstatus_t general_YCoCgToRGB_8u_AC4R(
|
|||
R = T + Co;
|
||||
G = Y + Cg;
|
||||
B = T - Co;
|
||||
color = GetColor(DstFormat, MINMAX(R, 0, 255), MINMAX(G, 0, 255), MINMAX(B, 0,
|
||||
255), A);
|
||||
|
||||
color = GetColor(DstFormat,
|
||||
MINMAX(R, 0, 255), MINMAX(G, 0, 255),
|
||||
MINMAX(B, 0, 255), A);
|
||||
WriteColor(dptr, DstFormat, color);
|
||||
dptr += GetBytesPerPixel(DstFormat);
|
||||
}
|
||||
|
||||
|
|
|
@ -40,8 +40,8 @@ static primitives_t* generic = NULL;
|
|||
#ifdef WITH_SSE2
|
||||
/* ------------------------------------------------------------------------- */
|
||||
static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_invert(
|
||||
const BYTE* pSrc, INT32 srcStep,
|
||||
BYTE* pDst, INT32 dstStep,
|
||||
const BYTE* pSrc, UINT32 srcStep,
|
||||
BYTE* pDst, UINT32 DstFormat, UINT32 dstStep,
|
||||
UINT32 width, UINT32 height,
|
||||
UINT8 shift,
|
||||
BOOL withAlpha)
|
||||
|
@ -70,8 +70,8 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_invert(
|
|||
{
|
||||
/* Too small, or we'll never hit a 16-byte boundary. Punt. */
|
||||
return generic->YCoCgToRGB_8u_AC4R(
|
||||
pSrc, srcStep, pDst, dstStep,
|
||||
width, height, shift, withAlpha, TRUE);
|
||||
pSrc, srcStep, pDst, DstFormat, dstStep,
|
||||
width, height, shift, withAlpha);
|
||||
}
|
||||
|
||||
for (h = 0; h < height; h++)
|
||||
|
@ -82,12 +82,16 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_invert(
|
|||
/* Get to a 16-byte destination boundary. */
|
||||
if ((ULONG_PTR) dptr & 0x0f)
|
||||
{
|
||||
pstatus_t status;
|
||||
int startup = (16 - ((ULONG_PTR) dptr & 0x0f)) / 4;
|
||||
|
||||
if (startup > width) startup = width;
|
||||
|
||||
generic->YCoCgToRGB_8u_AC4R(sptr, srcStep, dptr, dstStep,
|
||||
startup, 1, shift, withAlpha, TRUE);
|
||||
status = generic->YCoCgToRGB_8u_AC4R(
|
||||
sptr, srcStep, dptr, DstFormat, dstStep,
|
||||
startup, 1, shift, withAlpha);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return status;
|
||||
sptr += startup * sizeof(UINT32);
|
||||
dptr += startup * sizeof(UINT32);
|
||||
w -= startup;
|
||||
|
@ -195,8 +199,13 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_invert(
|
|||
/* Handle any remainder pixels. */
|
||||
if (w > 0)
|
||||
{
|
||||
generic->YCoCgToRGB_8u_AC4R(sptr, srcStep, dptr, dstStep,
|
||||
w, 1, shift, withAlpha, TRUE);
|
||||
pstatus_t status;
|
||||
status = generic->YCoCgToRGB_8u_AC4R(
|
||||
sptr, srcStep, dptr, DstFormat, dstStep,
|
||||
w, 1, shift, withAlpha);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return status;
|
||||
|
||||
sptr += w * sizeof(UINT32);
|
||||
dptr += w * sizeof(UINT32);
|
||||
}
|
||||
|
@ -210,8 +219,8 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_invert(
|
|||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_no_invert(
|
||||
const BYTE* pSrc, INT32 srcStep,
|
||||
BYTE* pDst, INT32 dstStep,
|
||||
const BYTE* pSrc, UINT32 srcStep,
|
||||
BYTE* pDst, UINT32 DstFormat, UINT32 dstStep,
|
||||
UINT32 width, UINT32 height,
|
||||
UINT8 shift,
|
||||
BOOL withAlpha)
|
||||
|
@ -240,9 +249,8 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_no_invert(
|
|||
{
|
||||
/* Too small, or we'll never hit a 16-byte boundary. Punt. */
|
||||
return generic->YCoCgToRGB_8u_AC4R(
|
||||
pSrc, srcStep,
|
||||
pDst, dstStep, width, height, shift,
|
||||
withAlpha, FALSE);
|
||||
pSrc, srcStep, pDst, DstFormat, dstStep,
|
||||
width, height, shift, withAlpha);
|
||||
}
|
||||
|
||||
for (h = 0; h < height; h++)
|
||||
|
@ -253,12 +261,17 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_no_invert(
|
|||
/* Get to a 16-byte destination boundary. */
|
||||
if ((ULONG_PTR) dptr & 0x0f)
|
||||
{
|
||||
pstatus_t status;
|
||||
int startup = (16 - ((ULONG_PTR) dptr & 0x0f)) / 4;
|
||||
|
||||
if (startup > width) startup = width;
|
||||
|
||||
generic->YCoCgToRGB_8u_AC4R(sptr, srcStep, dptr, dstStep,
|
||||
startup, 1, shift, withAlpha, FALSE);
|
||||
status = generic->YCoCgToRGB_8u_AC4R(
|
||||
sptr, srcStep, dptr, DstFormat,
|
||||
dstStep, startup, 1, shift, withAlpha);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return status;
|
||||
|
||||
sptr += startup * sizeof(UINT32);
|
||||
dptr += startup * sizeof(UINT32);
|
||||
w -= startup;
|
||||
|
@ -370,8 +383,13 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_no_invert(
|
|||
/* Handle any remainder pixels. */
|
||||
if (w > 0)
|
||||
{
|
||||
generic->YCoCgToRGB_8u_AC4R(sptr, srcStep, dptr, dstStep,
|
||||
w, 1, shift, withAlpha, FALSE);
|
||||
pstatus_t status;
|
||||
status = generic->YCoCgToRGB_8u_AC4R(
|
||||
sptr, srcStep, dptr, DstFormat, dstStep,
|
||||
w, 1, shift, withAlpha);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return status;
|
||||
|
||||
sptr += w * sizeof(UINT32);
|
||||
dptr += w * sizeof(UINT32);
|
||||
}
|
||||
|
@ -388,21 +406,29 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R_no_invert(
|
|||
/* ------------------------------------------------------------------------- */
|
||||
static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R(
|
||||
const BYTE* pSrc, INT32 srcStep,
|
||||
BYTE* pDst, INT32 dstStep,
|
||||
BYTE* pDst, UINT32 DstFormat, INT32 dstStep,
|
||||
UINT32 width, UINT32 height,
|
||||
UINT8 shift,
|
||||
BOOL withAlpha,
|
||||
BOOL invert)
|
||||
BOOL withAlpha)
|
||||
{
|
||||
if (invert)
|
||||
// TODO: Need to implement proper color conversion!!!
|
||||
return generic->YCoCgToRGB_8u_AC4R(pSrc, srcStep, pDst, DstFormat,
|
||||
dstStep, width, height, shift, withAlpha);
|
||||
|
||||
switch(DstFormat)
|
||||
{
|
||||
return ssse3_YCoCgRToRGB_8u_AC4R_invert(pSrc, srcStep, pDst, dstStep,
|
||||
case PIXEL_FORMAT_BGRX32:
|
||||
case PIXEL_FORMAT_BGRA32:
|
||||
return ssse3_YCoCgRToRGB_8u_AC4R_invert(
|
||||
pSrc, srcStep, pDst, DstFormat, dstStep,
|
||||
width, height, shift, withAlpha);
|
||||
}
|
||||
else
|
||||
{
|
||||
return ssse3_YCoCgRToRGB_8u_AC4R_no_invert(pSrc, srcStep, pDst, dstStep,
|
||||
case PIXEL_FORMAT_RGBX32:
|
||||
case PIXEL_FORMAT_RGBA32:
|
||||
return ssse3_YCoCgRToRGB_8u_AC4R_no_invert(
|
||||
pSrc, srcStep, pDst, DstFormat, dstStep,
|
||||
width, height, shift, withAlpha);
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif /* WITH_SSE2 */
|
||||
|
|
|
@ -331,7 +331,7 @@ static INLINE BYTE* writePixel(BYTE* dst, UINT32 format, BYTE Y, BYTE U, BYTE V)
|
|||
const BYTE r = YUV2R(Y, U, V);
|
||||
const BYTE g = YUV2G(Y, U, V);
|
||||
const BYTE b = YUV2B(Y, U, V);
|
||||
UINT32 color = GetColor(format, r, g, b, 0);
|
||||
UINT32 color = GetColor(format, r, g, b, 0xFF);
|
||||
WriteColor(dst, format, color);
|
||||
return dst + GetBytesPerPixel(format);
|
||||
}
|
||||
|
@ -500,9 +500,10 @@ static INLINE BYTE RGB2V(INT32 R, INT32 G, INT32 B)
|
|||
}
|
||||
|
||||
static pstatus_t general_RGBToYUV444_8u_P3AC4R(
|
||||
const BYTE* pSrc, const UINT32 srcStep,
|
||||
const BYTE* pSrc, UINT32 SrcFormat, const UINT32 srcStep,
|
||||
BYTE* pDst[3], UINT32 dstStep[3], const prim_size_t* roi)
|
||||
{
|
||||
const UINT32 bpp = GetBytesPerPixel(SrcFormat);
|
||||
UINT32 x, y;
|
||||
UINT32 nWidth, nHeight;
|
||||
nWidth = roi->width;
|
||||
|
@ -517,9 +518,10 @@ static pstatus_t general_RGBToYUV444_8u_P3AC4R(
|
|||
|
||||
for (x = 0; x < nWidth; x++)
|
||||
{
|
||||
const BYTE B = pRGB[4 * x + 0];
|
||||
const BYTE G = pRGB[4 * x + 1];
|
||||
const BYTE R = pRGB[4 * x + 2];
|
||||
BYTE B, G, R;
|
||||
const UINT32 color = ReadColor(&pRGB[x * bpp], SrcFormat);
|
||||
SplitColor(color, SrcFormat, &R, &G, &B, NULL, NULL);
|
||||
|
||||
pY[x] = RGB2Y(R, G, B);
|
||||
pU[x] = RGB2U(R, G, B);
|
||||
pV[x] = RGB2V(R, G, B);
|
||||
|
@ -530,9 +532,10 @@ static pstatus_t general_RGBToYUV444_8u_P3AC4R(
|
|||
}
|
||||
|
||||
static pstatus_t general_RGBToYUV420_8u_P3AC4R(
|
||||
const BYTE* pSrc, UINT32 srcStep,
|
||||
const BYTE* pSrc, UINT32 SrcFormat, UINT32 srcStep,
|
||||
BYTE* pDst[3], UINT32 dstStep[3], const prim_size_t* roi)
|
||||
{
|
||||
const UINT32 bpp = GetBytesPerPixel(SrcFormat);
|
||||
UINT32 x, y;
|
||||
UINT32 halfWidth;
|
||||
UINT32 halfHeight;
|
||||
|
@ -555,39 +558,50 @@ static pstatus_t general_RGBToYUV420_8u_P3AC4R(
|
|||
|
||||
for (x = 0; x < halfWidth; x++)
|
||||
{
|
||||
INT32 R, G, B;
|
||||
UINT32 color;
|
||||
INT32 Ra, Ga, Ba;
|
||||
const UINT32 val2x = (x * 2);
|
||||
const UINT32 val2x1 = val2x + 1;
|
||||
BYTE B, G, R;
|
||||
|
||||
/* 1st pixel */
|
||||
Ba = B = pRGB[val2x * 4 + 0];
|
||||
Ga = G = pRGB[val2x * 4 + 1];
|
||||
Ra = R = pRGB[val2x * 4 + 2];
|
||||
color = ReadColor(&pRGB[val2x * bpp], SrcFormat);
|
||||
SplitColor(color, SrcFormat, &R, &G, &B, NULL, NULL);
|
||||
|
||||
Ba = B;
|
||||
Ga = G;
|
||||
Ra = R;
|
||||
pY[val2x] = RGB2Y(R, G, B);
|
||||
|
||||
if (val2x1 < nWidth)
|
||||
{
|
||||
/* 2nd pixel */
|
||||
Ba += B = pRGB[val2x * 4 + 4];
|
||||
Ga += G = pRGB[val2x * 4 + 5];
|
||||
Ra += R = pRGB[val2x * 4 + 6];
|
||||
color = ReadColor(&pRGB[val2x1 * bpp], SrcFormat);
|
||||
SplitColor(color, SrcFormat, &R, &G, &B, NULL, NULL);
|
||||
Ba += B;
|
||||
Ga += G;
|
||||
Ra += R;
|
||||
pY[val2x1] = RGB2Y(R, G, B);
|
||||
}
|
||||
|
||||
if (val2y1 < nHeight)
|
||||
{
|
||||
/* 3rd pixel */
|
||||
Ba += B = pRGB1[val2x * 4 + 0];
|
||||
Ga += G = pRGB1[val2x * 4 + 1];
|
||||
Ra += R = pRGB1[val2x * 4 + 2];
|
||||
color = ReadColor(&pRGB1[val2x * bpp], SrcFormat);
|
||||
SplitColor(color, SrcFormat, &R, &G, &B, NULL, NULL);
|
||||
Ba += B;
|
||||
Ga += G;
|
||||
Ra += R;
|
||||
pY1[val2x] = RGB2Y(R, G, B);
|
||||
|
||||
if (val2x1 < nWidth)
|
||||
{
|
||||
/* 4th pixel */
|
||||
Ba += B = pRGB1[val2x * 4 + 4];
|
||||
Ga += G = pRGB1[val2x * 4 + 5];
|
||||
Ra += R = pRGB1[val2x * 4 + 6];
|
||||
color = ReadColor(&pRGB1[val2x1 * bpp], SrcFormat);
|
||||
SplitColor(color, SrcFormat, &R, &G, &B, NULL, NULL);
|
||||
Ba += B;
|
||||
Ga += G;
|
||||
Ra += R;
|
||||
pY1[val2x1] = RGB2Y(R, G, B);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,6 +35,11 @@ static pstatus_t ssse3_YUV420ToRGB_8u_P3AC4R(
|
|||
UINT32 i, nWidth, nHeight, VaddDst, VaddY, VaddU, VaddV;
|
||||
__m128i r0, r1, r2, r3, r4, r5, r6, r7;
|
||||
__m128i* buffer;
|
||||
|
||||
// TODO: Need to implement proper color conversion!!!!!
|
||||
return generic->YUV420ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep,
|
||||
DstFormat, roi);
|
||||
|
||||
/* last_line: if the last (U,V doubled) line should be skipped, set to 10B
|
||||
* last_column: if it's the last column in a line, set to 10B (for handling line-endings not multiple by four) */
|
||||
buffer = _aligned_malloc(4 * 16, 16);
|
||||
|
|
|
@ -30,7 +30,7 @@ static pstatus_t general_add_16s(
|
|||
const INT16* pSrc1,
|
||||
const INT16* pSrc2,
|
||||
INT16* pDst,
|
||||
INT32 len)
|
||||
UINT32 len)
|
||||
{
|
||||
while (len--)
|
||||
{
|
||||
|
|
|
@ -36,23 +36,19 @@
|
|||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
static pstatus_t general_alphaComp_argb(
|
||||
const BYTE* pSrc1, INT32 src1Step,
|
||||
const BYTE* pSrc2, INT32 src2Step,
|
||||
BYTE* pDst, INT32 dstStep,
|
||||
INT32 width, INT32 height)
|
||||
const BYTE* pSrc1, UINT32 src1Step,
|
||||
const BYTE* pSrc2, UINT32 src2Step,
|
||||
BYTE* pDst, UINT32 dstStep,
|
||||
UINT32 width, UINT32 height)
|
||||
{
|
||||
const UINT32* sptr1 = (const UINT32*) pSrc1;
|
||||
const UINT32* sptr2 = (const UINT32*) pSrc2;
|
||||
UINT32* dptr = (UINT32*) pDst;
|
||||
int linebytes = width * sizeof(UINT32);
|
||||
int src1Jump = (src1Step - linebytes) / sizeof(UINT32);
|
||||
int src2Jump = (src2Step - linebytes) / sizeof(UINT32);
|
||||
int dstJump = (dstStep - linebytes) / sizeof(UINT32);
|
||||
int y;
|
||||
UINT32 y;
|
||||
|
||||
for (y = 0; y < height; y++)
|
||||
{
|
||||
int x;
|
||||
const UINT32* sptr1 = (const UINT32*) (pSrc1 + y * src1Step);
|
||||
const UINT32* sptr2 = (const UINT32*) (pSrc2 + y * src2Step);
|
||||
UINT32* dptr = (UINT32*) (pDst + y * dstStep);
|
||||
UINT32 x;
|
||||
|
||||
for (x = 0; x < width; x++)
|
||||
{
|
||||
|
@ -92,10 +88,6 @@ static pstatus_t general_alphaComp_argb(
|
|||
*dptr++ = rb | ag;
|
||||
}
|
||||
}
|
||||
|
||||
sptr1 += src1Jump;
|
||||
sptr2 += src2Jump;
|
||||
dptr += dstJump;
|
||||
}
|
||||
|
||||
return PRIMITIVES_SUCCESS;
|
||||
|
|
|
@ -46,10 +46,10 @@ static primitives_t* generic = NULL;
|
|||
#if !defined(WITH_IPP) || defined(ALL_PRIMITIVES_VERSIONS)
|
||||
|
||||
pstatus_t sse2_alphaComp_argb(
|
||||
const BYTE* pSrc1, INT32 src1Step,
|
||||
const BYTE* pSrc2, INT32 src2Step,
|
||||
BYTE* pDst, INT32 dstStep,
|
||||
INT32 width, INT32 height)
|
||||
const BYTE* pSrc1, UINT32 src1Step,
|
||||
const BYTE* pSrc2, UINT32 src2Step,
|
||||
BYTE* pDst, UINT32 dstStep,
|
||||
UINT32 width, UINT32 height)
|
||||
{
|
||||
const UINT32* sptr1 = (const UINT32*) pSrc1;
|
||||
const UINT32* sptr2 = (const UINT32*) pSrc2;
|
||||
|
@ -108,9 +108,13 @@ pstatus_t sse2_alphaComp_argb(
|
|||
|
||||
if (leadIn)
|
||||
{
|
||||
generic->alphaComp_argb((const BYTE*) sptr1,
|
||||
pstatus_t status;
|
||||
status = generic->alphaComp_argb((const BYTE*) sptr1,
|
||||
src1Step, (const BYTE*) sptr2, src2Step,
|
||||
(BYTE*) dptr, dstStep, leadIn, 1);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return status;
|
||||
|
||||
sptr1 += leadIn;
|
||||
sptr2 += leadIn;
|
||||
dptr += leadIn;
|
||||
|
@ -181,9 +185,13 @@ pstatus_t sse2_alphaComp_argb(
|
|||
/* Finish off the remainder. */
|
||||
if (pixels)
|
||||
{
|
||||
generic->alphaComp_argb((const BYTE*) sptr1, src1Step,
|
||||
pstatus_t status;
|
||||
status = generic->alphaComp_argb((const BYTE*) sptr1, src1Step,
|
||||
(const BYTE*) sptr2, src2Step,
|
||||
(BYTE*) dptr, dstStep, pixels, 1);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return status;
|
||||
|
||||
sptr1 += pixels;
|
||||
sptr2 += pixels;
|
||||
dptr += pixels;
|
||||
|
|
|
@ -262,7 +262,7 @@ static pstatus_t general_RGBToYCbCr_16s16s_P3P3(
|
|||
|
||||
for (y = 0; y < roi->height; y++)
|
||||
{
|
||||
int x;
|
||||
UINT32 x;
|
||||
|
||||
for (x = 0; x < roi->width; ++x)
|
||||
{
|
||||
|
@ -305,10 +305,10 @@ static pstatus_t general_RGBToYCbCr_16s16s_P3P3(
|
|||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
static pstatus_t general_RGBToRGB_16s8u_P3AC4R(
|
||||
const INT16* pSrc[3], /* 16-bit R,G, and B arrays */
|
||||
INT32 srcStep, /* bytes between rows in source data */
|
||||
const INT16* const pSrc[3], /* 16-bit R,G, and B arrays */
|
||||
UINT32 srcStep, /* bytes between rows in source data */
|
||||
BYTE* pDst, /* 32-bit interleaved ARGB (ABGR?) data */
|
||||
INT32 dstStep, /* bytes between rows in dest data */
|
||||
UINT32 dstStep, /* bytes between rows in dest data */
|
||||
UINT32 DstFormat,
|
||||
const prim_size_t* roi) /* region of interest */
|
||||
{
|
||||
|
|
|
@ -357,10 +357,10 @@ static pstatus_t sse2_RGBToYCbCr_16s16s_P3P3(
|
|||
_mm_set1_epi32(0xFFFFFFFFU)
|
||||
|
||||
pstatus_t sse2_RGBToRGB_16s8u_P3AC4R(
|
||||
const INT16* pSrc[3], /* 16-bit R,G, and B arrays */
|
||||
INT32 srcStep, /* bytes between rows in source data */
|
||||
const INT16* const pSrc[3], /* 16-bit R,G, and B arrays */
|
||||
UINT32 srcStep, /* bytes between rows in source data */
|
||||
BYTE* pDst, /* 32-bit interleaved ARGB (ABGR?) data */
|
||||
INT32 dstStep, /* bytes between rows in dest data */
|
||||
UINT32 dstStep, /* bytes between rows in dest data */
|
||||
UINT32 DstFormat,
|
||||
const prim_size_t* roi) /* region of interest */
|
||||
{
|
||||
|
@ -388,6 +388,10 @@ pstatus_t sse2_RGBToRGB_16s8u_P3AC4R(
|
|||
dstStep, DstFormat, roi);
|
||||
}
|
||||
|
||||
// TODO: Need to update SSE code to allow color conversion!!!
|
||||
return generic->RGBToRGB_16s8u_P3AC4R(pSrc, srcStep, pDst,
|
||||
dstStep, DstFormat, roi);
|
||||
|
||||
out = (BYTE*) pDst;
|
||||
srcbump = (srcStep - (roi->width * sizeof(UINT16))) / sizeof(UINT16);
|
||||
dstbump = (dstStep - (roi->width * sizeof(UINT32)));
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
static pstatus_t general_set_8u(
|
||||
BYTE val,
|
||||
BYTE* pDst,
|
||||
INT32 len)
|
||||
UINT32 len)
|
||||
{
|
||||
memset((void*) pDst, (int) val, (size_t) len);
|
||||
return PRIMITIVES_SUCCESS;
|
||||
|
@ -48,7 +48,7 @@ static pstatus_t general_zero(
|
|||
static pstatus_t general_set_32s(
|
||||
INT32 val,
|
||||
INT32* pDst,
|
||||
INT32 len)
|
||||
UINT32 len)
|
||||
{
|
||||
INT32* dptr = (INT32*) pDst;
|
||||
size_t span, remaining;
|
||||
|
@ -85,7 +85,7 @@ static pstatus_t general_set_32s(
|
|||
static pstatus_t general_set_32u(
|
||||
UINT32 val,
|
||||
UINT32* pDst,
|
||||
INT32 len)
|
||||
UINT32 len)
|
||||
{
|
||||
UINT32* dptr = (UINT32*) pDst;
|
||||
size_t span, remaining;
|
||||
|
|
|
@ -40,7 +40,7 @@ static primitives_t* generic = NULL;
|
|||
static pstatus_t sse2_set_8u(
|
||||
BYTE val,
|
||||
BYTE* pDst,
|
||||
INT32 len)
|
||||
UINT32 len)
|
||||
{
|
||||
BYTE byte, *dptr;
|
||||
__m128i xmm0;
|
||||
|
@ -126,7 +126,7 @@ static pstatus_t sse2_set_8u(
|
|||
static pstatus_t sse2_set_32u(
|
||||
UINT32 val,
|
||||
UINT32* pDst,
|
||||
INT32 len)
|
||||
UINT32 len)
|
||||
{
|
||||
const primitives_t* prim = primitives_get_generic();
|
||||
UINT32* dptr = (UINT32*) pDst;
|
||||
|
@ -218,7 +218,7 @@ static pstatus_t sse2_set_32u(
|
|||
static pstatus_t sse2_set_32s(
|
||||
INT32 val,
|
||||
INT32* pDst,
|
||||
INT32 len)
|
||||
UINT32 len)
|
||||
{
|
||||
UINT32 uval = *((UINT32*) &val);
|
||||
return sse2_set_32u(uval, (UINT32*) pDst, len);
|
||||
|
|
|
@ -24,9 +24,9 @@
|
|||
/* ------------------------------------------------------------------------- */
|
||||
static pstatus_t general_lShiftC_16s(
|
||||
const INT16* pSrc,
|
||||
INT32 val,
|
||||
UINT32 val,
|
||||
INT16* pDst,
|
||||
INT32 len)
|
||||
UINT32 len)
|
||||
{
|
||||
if (val == 0) return PRIMITIVES_SUCCESS;
|
||||
|
||||
|
@ -38,9 +38,9 @@ static pstatus_t general_lShiftC_16s(
|
|||
/* ------------------------------------------------------------------------- */
|
||||
static pstatus_t general_rShiftC_16s(
|
||||
const INT16* pSrc,
|
||||
INT32 val,
|
||||
UINT32 val,
|
||||
INT16* pDst,
|
||||
INT32 len)
|
||||
UINT32 len)
|
||||
{
|
||||
if (val == 0) return PRIMITIVES_SUCCESS;
|
||||
|
||||
|
@ -52,9 +52,9 @@ static pstatus_t general_rShiftC_16s(
|
|||
/* ------------------------------------------------------------------------- */
|
||||
static pstatus_t general_lShiftC_16u(
|
||||
const UINT16* pSrc,
|
||||
INT32 val,
|
||||
UINT32 val,
|
||||
UINT16* pDst,
|
||||
INT32 len)
|
||||
UINT32 len)
|
||||
{
|
||||
if (val == 0) return PRIMITIVES_SUCCESS;
|
||||
|
||||
|
@ -66,9 +66,9 @@ static pstatus_t general_lShiftC_16u(
|
|||
/* ------------------------------------------------------------------------- */
|
||||
static pstatus_t general_rShiftC_16u(
|
||||
const UINT16* pSrc,
|
||||
INT32 val,
|
||||
UINT32 val,
|
||||
UINT16* pDst,
|
||||
INT32 len)
|
||||
UINT32 len)
|
||||
{
|
||||
if (val == 0) return PRIMITIVES_SUCCESS;
|
||||
|
||||
|
@ -82,7 +82,7 @@ static pstatus_t general_shiftC_16s(
|
|||
const INT16* pSrc,
|
||||
INT32 val,
|
||||
INT16* pDst,
|
||||
INT32 len)
|
||||
UINT32 len)
|
||||
{
|
||||
if (val == 0) return PRIMITIVES_SUCCESS;
|
||||
|
||||
|
@ -95,7 +95,7 @@ static pstatus_t general_shiftC_16u(
|
|||
const UINT16* pSrc,
|
||||
INT32 val,
|
||||
UINT16* pDst,
|
||||
INT32 len)
|
||||
UINT32 len)
|
||||
{
|
||||
if (val == 0) return PRIMITIVES_SUCCESS;
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
static pstatus_t general_sign_16s(
|
||||
const INT16* pSrc,
|
||||
INT16* pDst,
|
||||
INT32 len)
|
||||
UINT32 len)
|
||||
{
|
||||
while (len--)
|
||||
{
|
||||
|
|
|
@ -35,7 +35,7 @@ static primitives_t* generic = NULL;
|
|||
static pstatus_t ssse3_sign_16s(
|
||||
const INT16* pSrc,
|
||||
INT16* pDst,
|
||||
INT32 len)
|
||||
UINT32 len)
|
||||
{
|
||||
const INT16* sptr = (const INT16*) pSrc;
|
||||
INT16* dptr = (INT16*) pDst;
|
||||
|
|
|
@ -44,7 +44,7 @@
|
|||
* SCD = Source, Constant, Destination
|
||||
*/
|
||||
#define SSE3_SCD_ROUTINE(_name_, _type_, _fallback_, _op_, _slowWay_) \
|
||||
static pstatus_t _name_(const _type_ *pSrc, INT32 val, _type_ *pDst, INT32 len) \
|
||||
static pstatus_t _name_(const _type_ *pSrc, UINT32 val, _type_ *pDst, UINT32 len) \
|
||||
{ \
|
||||
INT32 shifts; \
|
||||
UINT32 offBeatMask; \
|
||||
|
@ -293,7 +293,7 @@
|
|||
* SSD = Source1, Source2, Destination
|
||||
*/
|
||||
#define SSE3_SSD_ROUTINE(_name_, _type_, _fallback_, _op_, _slowWay_) \
|
||||
pstatus_t _name_(const _type_ *pSrc1, const _type_ *pSrc2, _type_ *pDst, INT32 len) \
|
||||
pstatus_t _name_(const _type_ *pSrc1, const _type_ *pSrc2, _type_ *pDst, UINT32 len) \
|
||||
{ \
|
||||
int shifts; \
|
||||
UINT32 offBeatMask; \
|
||||
|
@ -318,7 +318,9 @@
|
|||
/* Get to the 16-byte boundary now. */ \
|
||||
while ((ULONG_PTR) dptr & 0x0f) \
|
||||
{ \
|
||||
_slowWay_; \
|
||||
pstatus_t status; \
|
||||
status = _slowWay_; \
|
||||
if (status != PRIMITIVES_SUCCESS) return status; \
|
||||
if (--len == 0) return PRIMITIVES_SUCCESS; \
|
||||
} \
|
||||
/* Use 4 128-bit SSE registers. */ \
|
||||
|
|
|
@ -81,6 +81,6 @@ primitives_t* primitives_get_generic(void)
|
|||
if (!pPrimitivesGenericInitialized)
|
||||
primitives_init_generic();
|
||||
|
||||
return &pPrimitives;
|
||||
return &pPrimitivesGeneric;
|
||||
}
|
||||
|
||||
|
|
|
@ -59,7 +59,8 @@ static BOOL test_add16s_speed(void)
|
|||
winpr_RAND(src2, sizeof(src2));
|
||||
|
||||
if (!speed_test("add16s", "aligned", g_Iterations,
|
||||
generic->add_16s, optimized->add_16s,
|
||||
(speed_test_fkt)generic->add_16s,
|
||||
(speed_test_fkt)optimized->add_16s,
|
||||
src1, src2, dst, FUNC_TEST_SIZE))
|
||||
return FALSE;
|
||||
|
||||
|
@ -72,8 +73,11 @@ int TestPrimitivesAdd(int argc, char* argv[])
|
|||
if (!test_add16s_func())
|
||||
return -1;
|
||||
|
||||
if (g_TestPrimitivesPerformance)
|
||||
{
|
||||
if (!test_add16s_speed())
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -33,8 +33,13 @@ static const int block_size[] = { 4, 64, 256 };
|
|||
#define GRN(_c_) (((_c_) & 0x0000FF00U) >> 8)
|
||||
#define BLU(_c_) ((_c_) & 0x000000FFU)
|
||||
#define TOLERANCE 1
|
||||
#define PIXEL(_addr_, _bytes_, _x_, _y_) \
|
||||
((UINT32 *) (((BYTE *) (_addr_)) + (_x_)*4 + (_y_)*(_bytes_)))
|
||||
static inline const UINT32* PIXEL(const BYTE* _addr_, UINT32 _bytes_, UINT32 _x_, UINT32 _y_)
|
||||
{
|
||||
const BYTE* addr = _addr_ + _x_ * sizeof(UINT32) + _y_ * _bytes_;
|
||||
|
||||
return (const UINT32*)addr;
|
||||
}
|
||||
|
||||
#define SRC1_WIDTH 6
|
||||
#define SRC1_HEIGHT 6
|
||||
#define SRC2_WIDTH 7
|
||||
|
@ -90,10 +95,10 @@ static UINT32 colordist(
|
|||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
static BOOL check(const BYTE* pSrc1, INT32 src1Step,
|
||||
const BYTE* pSrc2, INT32 src2Step,
|
||||
BYTE* pDst, INT32 dstStep,
|
||||
INT32 width, INT32 height)
|
||||
static BOOL check(const BYTE* pSrc1, UINT32 src1Step,
|
||||
const BYTE* pSrc2, UINT32 src2Step,
|
||||
BYTE* pDst, UINT32 dstStep,
|
||||
UINT32 width, UINT32 height)
|
||||
{
|
||||
UINT32 x, y;
|
||||
for (y = 0; y < height; ++y)
|
||||
|
@ -120,14 +125,14 @@ static BOOL check(const BYTE* pSrc1, INT32 src1Step,
|
|||
static BOOL test_alphaComp_func(void)
|
||||
{
|
||||
pstatus_t status;
|
||||
BYTE ALIGN(src1[SRC1_WIDTH * SRC1_HEIGHT]);
|
||||
BYTE ALIGN(src2[SRC2_WIDTH * SRC2_HEIGHT]);
|
||||
BYTE ALIGN(dst1[DST_WIDTH * DST_HEIGHT]);
|
||||
char testStr[256];
|
||||
BYTE ALIGN(src1[SRC1_WIDTH * SRC1_HEIGHT * 4]);
|
||||
BYTE ALIGN(src2[SRC2_WIDTH * SRC2_HEIGHT * 4]);
|
||||
BYTE ALIGN(dst1[DST_WIDTH * DST_HEIGHT * 4]);
|
||||
UINT32* ptr;
|
||||
UINT32 i;
|
||||
testStr[0] = '\0';
|
||||
|
||||
winpr_RAND((BYTE*)src1, sizeof(src1));
|
||||
|
||||
/* Special-case the first two values */
|
||||
src1[0] &= 0x00FFFFFFU;
|
||||
src1[1] |= 0xFF000000U;
|
||||
|
@ -188,7 +193,8 @@ static int test_alphaComp_speed(void)
|
|||
memset(dst1, 0, sizeof(dst1));
|
||||
|
||||
if (!speed_test("add16s", "aligned", g_Iterations,
|
||||
generic->alphaComp_argb, optimized->alphaComp_argb,
|
||||
(speed_test_fkt)generic->alphaComp_argb,
|
||||
(speed_test_fkt)optimized->alphaComp_argb,
|
||||
src1, 4 * SRC1_WIDTH,
|
||||
src2, 4 * SRC2_WIDTH,
|
||||
dst1, 4 * DST_WIDTH, TEST_WIDTH, TEST_HEIGHT))
|
||||
|
@ -203,8 +209,11 @@ int TestPrimitivesAlphaComp(int argc, char* argv[])
|
|||
if (!test_alphaComp_func())
|
||||
return -1;
|
||||
|
||||
if (g_TestPrimitivesPerformance)
|
||||
{
|
||||
if (!test_alphaComp_speed())
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -20,99 +20,87 @@
|
|||
#include "prim_test.h"
|
||||
|
||||
#define FUNC_TEST_SIZE 65536
|
||||
static const int ANDOR_PRETEST_ITERATIONS = 100000;
|
||||
static const int TEST_TIME = 2.0; // seconds
|
||||
|
||||
#define VALUE (0xA5A5A5A5U)
|
||||
|
||||
/* ========================================================================= */
|
||||
static BOOL test_and_32u_impl(const char* name, __andC_32u_t fkt,
|
||||
const UINT32* src, const UINT32 val,
|
||||
UINT32* dst, size_t size)
|
||||
{
|
||||
size_t i;
|
||||
pstatus_t status = fkt(src, val, dst, size);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
|
||||
for (i = 0; i < size; ++i)
|
||||
{
|
||||
if (dst[i] != (src[i] & val))
|
||||
{
|
||||
printf("AND %s FAIL[%d] 0x%08x&0x%08x=0x%08x, got 0x%08x\n",
|
||||
name, i, src[i], val, src[i] & val, dst[i]);
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static BOOL test_and_32u_func(void)
|
||||
{
|
||||
UINT32 ALIGN(src[FUNC_TEST_SIZE + 3]), ALIGN(dst[FUNC_TEST_SIZE + 3]);
|
||||
int failed = 0;
|
||||
int i;
|
||||
char testStr[256];
|
||||
testStr[0] = '\0';
|
||||
winpr_RAND(src, sizeof(src));
|
||||
generic->andC_32u(src + 1, VALUE, dst + 1, FUNC_TEST_SIZE);
|
||||
strcat(testStr, " general");
|
||||
|
||||
for (i = 1; i <= FUNC_TEST_SIZE; ++i)
|
||||
{
|
||||
if (dst[i] != (src[i] & VALUE))
|
||||
{
|
||||
printf("AND-general FAIL[%d] 0x%08x&0x%08x=0x%08x, got 0x%08x\n",
|
||||
i, src[i], VALUE, src[i] & VALUE, dst[i]);
|
||||
++failed;
|
||||
}
|
||||
}
|
||||
winpr_RAND((BYTE*)src, sizeof(src));
|
||||
|
||||
#ifdef WITH_SSE2
|
||||
if (!test_and_32u_impl("generic->andC_32u aligned", generic->andC_32u,
|
||||
src + 1, VALUE, dst + 1, FUNC_TEST_SIZE))
|
||||
return FALSE;
|
||||
if (!test_and_32u_impl("generic->andC_32u unaligned", generic->andC_32u,
|
||||
src + 1, VALUE, dst + 2, FUNC_TEST_SIZE))
|
||||
return FALSE;
|
||||
if (!test_and_32u_impl("optimized->andC_32u aligned", optimized->andC_32u,
|
||||
src + 1, VALUE, dst + 1, FUNC_TEST_SIZE))
|
||||
return FALSE;
|
||||
if (!test_and_32u_impl("optimized->andC_32u unaligned", optimized->andC_32u,
|
||||
src + 1, VALUE, dst + 2, FUNC_TEST_SIZE))
|
||||
return FALSE;
|
||||
|
||||
if (IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
strcat(testStr, " SSE3");
|
||||
/* Aligned */
|
||||
memset(dst, 0, sizeof(dst));
|
||||
sse3_andC_32u(src + 1, VALUE, dst + 1, FUNC_TEST_SIZE);
|
||||
|
||||
for (i = 1; i <= FUNC_TEST_SIZE; ++i)
|
||||
{
|
||||
if (dst[i] != (src[i] & VALUE))
|
||||
{
|
||||
printf("AND-SSE-aligned FAIL[%d] 0x%08x&0x%08x=0x%08x, got 0x%08x\n",
|
||||
i, src[i], VALUE, src[i] & VALUE, dst[i]);
|
||||
++failed;
|
||||
}
|
||||
}
|
||||
|
||||
/* Unaligned */
|
||||
memset(dst, 0, sizeof(dst));
|
||||
sse3_andC_32u(src + 1, VALUE, dst + 2, FUNC_TEST_SIZE);
|
||||
|
||||
for (i = 1; i <= FUNC_TEST_SIZE; ++i)
|
||||
{
|
||||
if (dst[i + 1] != (src[i] & VALUE))
|
||||
{
|
||||
printf("AND-SSE-unaligned FAIL[%d] 0x%08x&0x%08x=0x%08x, got 0x%08x\n",
|
||||
i, src[i], VALUE, src[i] & VALUE, dst[i + 1]);
|
||||
++failed;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* i386 */
|
||||
|
||||
if (!failed) printf("All and_32u tests passed (%s).\n", testStr);
|
||||
|
||||
return (failed > 0) ? FAILURE : SUCCESS;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
static BOOL test_and_32u_speed(void)
|
||||
{
|
||||
UINT32 ALIGN(src[MAX_TEST_SIZE + 3]), ALIGN(dst[MAX_TEST_SIZE + 3]);
|
||||
winpr_RAND(src, sizeof(src));
|
||||
andC_32u_speed_test("and32u", "aligned", src, NULL, VALUE, dst,
|
||||
test_sizes, NUM_TEST_SIZES, ANDOR_PRETEST_ITERATIONS, TEST_TIME);
|
||||
andC_32u_speed_test("and32u", "unaligned", src + 1, NULL, VALUE, dst,
|
||||
test_sizes, NUM_TEST_SIZES, ANDOR_PRETEST_ITERATIONS, TEST_TIME);
|
||||
return SUCCESS;
|
||||
|
||||
winpr_RAND((BYTE*)src, sizeof(src));
|
||||
|
||||
if (!speed_test("andC_32u", "aligned", g_Iterations,
|
||||
(speed_test_fkt)generic->andC_32u,
|
||||
(speed_test_fkt)optimized->andC_32u,
|
||||
src + 1, VALUE, dst + 1, MAX_TEST_SIZE))
|
||||
return FALSE;
|
||||
if (!speed_test("andC_32u", "unaligned", g_Iterations,
|
||||
(speed_test_fkt)generic->andC_32u,
|
||||
(speed_test_fkt)optimized->andC_32u,
|
||||
src + 1, VALUE, dst + 2, MAX_TEST_SIZE))
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* ========================================================================= */
|
||||
static BOOL check(const UINT32* src, const UINT32* dst, UINT32 size, UINT32 value)
|
||||
{
|
||||
UINT32 i;
|
||||
UINT32 failed = 0;
|
||||
|
||||
for (i = 1; i <= size; ++i)
|
||||
for (i = 0; i < size; ++i)
|
||||
{
|
||||
if (dst[i] != (src[i] | value))
|
||||
{
|
||||
printf("OR-general general FAIL[%d] 0x%08x&0x%08x=0x%08x, got 0x%08x\n",
|
||||
i, src[i], value, src[i] | value, dst[i]);
|
||||
++failed;
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -123,8 +111,7 @@ static BOOL test_or_32u_func(void)
|
|||
{
|
||||
pstatus_t status;
|
||||
UINT32 ALIGN(src[FUNC_TEST_SIZE + 3]), ALIGN(dst[FUNC_TEST_SIZE + 3]);
|
||||
char testStr[256];
|
||||
testStr[0] = '\0';
|
||||
|
||||
winpr_RAND((BYTE*)src, sizeof(src));
|
||||
|
||||
status = generic->orC_32u(src + 1, VALUE, dst + 1, FUNC_TEST_SIZE);
|
||||
|
@ -153,7 +140,8 @@ static BOOL test_or_32u_speed(void)
|
|||
winpr_RAND((BYTE*)src, sizeof(src));
|
||||
|
||||
if (!speed_test("add16s", "aligned", g_Iterations,
|
||||
generic->orC_32u, optimized->orC_32u,
|
||||
(speed_test_fkt)generic->orC_32u,
|
||||
(speed_test_fkt)optimized->orC_32u,
|
||||
src + 1, VALUE, dst + 1, FUNC_TEST_SIZE))
|
||||
return FALSE;
|
||||
|
||||
|
@ -167,14 +155,16 @@ int TestPrimitivesAndOr(int argc, char* argv[])
|
|||
if (!test_and_32u_func())
|
||||
return -1;
|
||||
|
||||
if (!test_and_32u_speed())
|
||||
return -1;
|
||||
|
||||
if (!test_or_32u_func())
|
||||
return -1;
|
||||
|
||||
if (g_TestPrimitivesPerformance)
|
||||
{
|
||||
if (!test_and_32u_speed())
|
||||
return -1;
|
||||
if (!test_or_32u_speed())
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -24,19 +24,16 @@ static const int YCBCR_TRIAL_ITERATIONS = 1000;
|
|||
static const float TEST_TIME = 4.0;
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
int test_RGBToRGB_16s8u_P3AC4R_func(void)
|
||||
static BOOL test_RGBToRGB_16s8u_P3AC4R_func(void)
|
||||
{
|
||||
INT16 ALIGN(r[4096]), ALIGN(g[4096]), ALIGN(b[4096]);
|
||||
UINT32 ALIGN(out1[4096]);
|
||||
#ifdef WITH_SSE2
|
||||
UINT32 ALIGN(out2[4096]);
|
||||
#endif
|
||||
int i;
|
||||
int failed = 0;
|
||||
char testStr[256];
|
||||
BOOL failed = FALSE;
|
||||
INT16* ptrs[3];
|
||||
prim_size_t roi = { 64, 64 };
|
||||
testStr[0] = '\0';
|
||||
|
||||
winpr_RAND((BYTE*)r, sizeof(r));
|
||||
winpr_RAND((BYTE*)g, sizeof(g));
|
||||
winpr_RAND((BYTE*)b, sizeof(b));
|
||||
|
@ -52,15 +49,15 @@ int test_RGBToRGB_16s8u_P3AC4R_func(void)
|
|||
ptrs[0] = r;
|
||||
ptrs[1] = g;
|
||||
ptrs[2] = b;
|
||||
generic->RGBToRGB_16s8u_P3AC4R((const INT16**) ptrs, 64 * 2,
|
||||
(BYTE*) out1, 64 * 4, &roi);
|
||||
#ifdef WITH_SSE2
|
||||
if (generic->RGBToRGB_16s8u_P3AC4R((const INT16**) ptrs, 64 * 2,
|
||||
(BYTE*) out1, 64 * 4, PIXEL_FORMAT_RGBA32,
|
||||
&roi) != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
|
||||
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
strcat(testStr, " SSE2");
|
||||
sse2_RGBToRGB_16s8u_P3AC4R((const INT16**) ptrs, 64 * 2,
|
||||
(BYTE*) out2, 64 * 4, &roi);
|
||||
if (optimized->RGBToRGB_16s8u_P3AC4R((const INT16**) ptrs, 64 * 2,
|
||||
(BYTE*) out2, 64 * 4, PIXEL_FORMAT_RGBA32,
|
||||
&roi) != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
|
||||
for (i = 0; i < 4096; ++i)
|
||||
{
|
||||
|
@ -68,40 +65,22 @@ int test_RGBToRGB_16s8u_P3AC4R_func(void)
|
|||
{
|
||||
printf("RGBToRGB-SSE FAIL: out1[%d]=0x%08x out2[%d]=0x%08x\n",
|
||||
i, out1[i], i, out2[i]);
|
||||
failed = 1;
|
||||
}
|
||||
failed = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* i386 */
|
||||
|
||||
if (!failed) printf("All RGBToRGB_16s8u_P3AC4R tests passed (%s).\n", testStr);
|
||||
|
||||
return (failed > 0) ? FAILURE : SUCCESS;
|
||||
return !failed;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
static const prim_size_t roi64x64 = { 64, 64 };
|
||||
STD_SPEED_TEST(
|
||||
rgb_to_argb_speed, INT16*, UINT32, dst = dst,
|
||||
TRUE, generic->RGBToRGB_16s8u_P3AC4R(
|
||||
(const INT16**) src1, 64 * 2, (BYTE*) dst, 64 * 4, &roi64x64),
|
||||
#ifdef WITH_SSE2
|
||||
TRUE, sse2_RGBToRGB_16s8u_P3AC4R(
|
||||
(const INT16**) src1, 64 * 2, (BYTE*) dst, 64 * 4, &roi64x64),
|
||||
PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE,
|
||||
#else
|
||||
FALSE, PRIM_NOP, 0, FALSE,
|
||||
#endif
|
||||
FALSE, dst = dst);
|
||||
|
||||
int test_RGBToRGB_16s8u_P3AC4R_speed(void)
|
||||
static BOOL test_RGBToRGB_16s8u_P3AC4R_speed(void)
|
||||
{
|
||||
INT16 ALIGN(r[4096]), ALIGN(g[4096]), ALIGN(b[4096]);
|
||||
UINT32 ALIGN(dst[4096]);
|
||||
const prim_size_t roi64x64 = { 64, 64 };
|
||||
INT16 ALIGN(r[4096+1]), ALIGN(g[4096+1]), ALIGN(b[4096+1]);
|
||||
UINT32 ALIGN(dst[4096+1]);
|
||||
int i;
|
||||
INT16* ptrs[3];
|
||||
int size_array[] = { 64 };
|
||||
|
||||
winpr_RAND((BYTE*)r, sizeof(r));
|
||||
winpr_RAND((BYTE*)g, sizeof(g));
|
||||
winpr_RAND((BYTE*)b, sizeof(b));
|
||||
|
@ -114,29 +93,38 @@ int test_RGBToRGB_16s8u_P3AC4R_speed(void)
|
|||
b[i] &= 0x00FFU;
|
||||
}
|
||||
|
||||
ptrs[0] = r;
|
||||
ptrs[1] = g;
|
||||
ptrs[2] = b;
|
||||
rgb_to_argb_speed("RGBToARGB", "aligned",
|
||||
(const INT16**) ptrs, NULL, 0, dst,
|
||||
size_array, 1, RGB_TRIAL_ITERATIONS, TEST_TIME);
|
||||
return SUCCESS;
|
||||
ptrs[0] = r+1;
|
||||
ptrs[1] = g+1;
|
||||
ptrs[2] = b+1;
|
||||
|
||||
if (!speed_test("RGBToRGB_16s8u_P3AC4R", "aligned", g_Iterations,
|
||||
(speed_test_fkt)generic->RGBToRGB_16s8u_P3AC4R,
|
||||
(speed_test_fkt)optimized->RGBToRGB_16s8u_P3AC4R,
|
||||
(const INT16**) ptrs, 64 * 2, (BYTE*) dst, 64 * 4, &roi64x64))
|
||||
return FALSE;
|
||||
|
||||
if (!speed_test("RGBToRGB_16s8u_P3AC4R", "unaligned", g_Iterations,
|
||||
(speed_test_fkt)generic->RGBToRGB_16s8u_P3AC4R,
|
||||
(speed_test_fkt)optimized->RGBToRGB_16s8u_P3AC4R,
|
||||
(const INT16**) ptrs, 64 * 2, ((BYTE*) dst)+1, 64 * 4, &roi64x64))
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* ========================================================================= */
|
||||
int test_yCbCrToRGB_16s16s_P3P3_func(void)
|
||||
static BOOL test_yCbCrToRGB_16s16s_P3P3_func(void)
|
||||
{
|
||||
pstatus_t status;
|
||||
INT16 ALIGN(y[4096]), ALIGN(cb[4096]), ALIGN(cr[4096]);
|
||||
INT16 ALIGN(r1[4096]), ALIGN(g1[4096]), ALIGN(b1[4096]);
|
||||
INT16 ALIGN(r2[4096]), ALIGN(g2[4096]), ALIGN(b2[4096]);
|
||||
int i;
|
||||
int failed = 0;
|
||||
char testStr[256];
|
||||
const INT16* in[3];
|
||||
INT16* out1[3];
|
||||
INT16* out2[3];
|
||||
prim_size_t roi = { 64, 64 };
|
||||
testStr[0] = '\0';
|
||||
|
||||
winpr_RAND((BYTE*)y, sizeof(y));
|
||||
winpr_RAND((BYTE*)cb, sizeof(cb));
|
||||
winpr_RAND((BYTE*)cr, sizeof(cr));
|
||||
|
@ -164,13 +152,14 @@ int test_yCbCrToRGB_16s16s_P3P3_func(void)
|
|||
out2[0] = r2;
|
||||
out2[1] = g2;
|
||||
out2[2] = b2;
|
||||
generic->yCbCrToRGB_16s16s_P3P3(in, 64 * 2, out1, 64 * 2, &roi);
|
||||
#ifdef WITH_SSE2
|
||||
|
||||
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
strcat(testStr, " SSE2");
|
||||
sse2_yCbCrToRGB_16s16s_P3P3(in, 64 * 2, out2, 64 * 2, &roi);
|
||||
status = generic->yCbCrToRGB_16s16s_P3P3(in, 64 * 2, out1, 64 * 2, &roi);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
|
||||
status = optimized->yCbCrToRGB_16s16s_P3P3(in, 64 * 2, out2, 64 * 2, &roi);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
|
||||
for (i = 0; i < 4096; ++i)
|
||||
{
|
||||
|
@ -180,41 +169,23 @@ int test_yCbCrToRGB_16s16s_P3P3_func(void)
|
|||
{
|
||||
printf("YCbCrToRGB-SSE FAIL[%d]: %d,%d,%d vs %d,%d,%d\n", i,
|
||||
r1[i], g1[i], b1[i], r2[i], g2[i], b2[i]);
|
||||
failed = 1;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* i386 */
|
||||
|
||||
if (!failed) printf("All yCbCrToRGB_16s16s_P3P3 tests passed (%s).\n", testStr);
|
||||
|
||||
return (failed > 0) ? FAILURE : SUCCESS;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
STD_SPEED_TEST(
|
||||
ycbcr_to_rgb_speed, INT16*, INT16*, dst = dst,
|
||||
TRUE, generic->yCbCrToRGB_16s16s_P3P3(src1, 64 * 2, dst, 64 * 2, &roi64x64),
|
||||
#ifdef WITH_SSE2
|
||||
TRUE, sse2_yCbCrToRGB_16s16s_P3P3(src1, 64 * 2, dst, 64 * 2, &roi64x64),
|
||||
PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE,
|
||||
#elif defined(WITH_NEON)
|
||||
TRUE, neon_yCbCrToRGB_16s16s_P3P3(src1, 64 * 2, dst, 64 * 2, &roi64x64),
|
||||
PF_ARM_NEON_INSTRUCTIONS_AVAILABLE, FALSE,
|
||||
#else
|
||||
FALSE, PRIM_NOP, 0, FALSE,
|
||||
#endif
|
||||
FALSE, dst = dst);
|
||||
|
||||
static int test_yCbCrToRGB_16s16s_P3P3_speed(void)
|
||||
{
|
||||
prim_size_t roi = { 64, 64 };
|
||||
INT16 ALIGN(y[4096]), ALIGN(cb[4096]), ALIGN(cr[4096]);
|
||||
INT16 ALIGN(r[4096]), ALIGN(g[4096]), ALIGN(b[4096]);
|
||||
int i;
|
||||
const INT16* input[3];
|
||||
INT16* output[3];
|
||||
int size_array[] = { 64 };
|
||||
|
||||
winpr_RAND((BYTE*)y, sizeof(y));
|
||||
winpr_RAND((BYTE*)cb, sizeof(cb));
|
||||
winpr_RAND((BYTE*)cr, sizeof(cr));
|
||||
|
@ -233,37 +204,35 @@ static int test_yCbCrToRGB_16s16s_P3P3_speed(void)
|
|||
output[0] = r;
|
||||
output[1] = g;
|
||||
output[2] = b;
|
||||
ycbcr_to_rgb_speed("yCbCrToRGB", "aligned", input, NULL, NULL, output,
|
||||
size_array, 1, YCBCR_TRIAL_ITERATIONS, TEST_TIME);
|
||||
return SUCCESS;
|
||||
|
||||
if (!speed_test("yCbCrToRGB_16s16s_P3P3", "aligned", g_Iterations,
|
||||
(speed_test_fkt)generic->yCbCrToRGB_16s16s_P3P3,
|
||||
(speed_test_fkt)optimized->yCbCrToRGB_16s16s_P3P3,
|
||||
input, 64 * 2, output, 64 * 2, &roi))
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
int TestPrimitivesColors(int argc, char* argv[])
|
||||
{
|
||||
int status;
|
||||
status = test_RGBToRGB_16s8u_P3AC4R_func();
|
||||
prim_test_setup(FALSE);
|
||||
|
||||
if (status != SUCCESS)
|
||||
if (!test_RGBToRGB_16s8u_P3AC4R_func())
|
||||
return 1;
|
||||
|
||||
if (g_TestPrimitivesPerformance)
|
||||
{
|
||||
status = test_RGBToRGB_16s8u_P3AC4R_speed();
|
||||
|
||||
if (status != SUCCESS)
|
||||
if (!test_RGBToRGB_16s8u_P3AC4R_speed())
|
||||
return 1;
|
||||
}
|
||||
|
||||
status = test_yCbCrToRGB_16s16s_P3P3_func();
|
||||
|
||||
if (status != SUCCESS)
|
||||
if (!test_yCbCrToRGB_16s16s_P3P3_func())
|
||||
return 1;
|
||||
|
||||
if (g_TestPrimitivesPerformance)
|
||||
{
|
||||
status = test_yCbCrToRGB_16s16s_P3P3_speed();
|
||||
|
||||
if (status != SUCCESS)
|
||||
if (!test_yCbCrToRGB_16s16s_P3P3_speed())
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
|
@ -19,22 +19,17 @@
|
|||
#include <winpr/sysinfo.h>
|
||||
#include "prim_test.h"
|
||||
|
||||
static const int MEMCPY_PRETEST_ITERATIONS = 1000000;
|
||||
static const int TEST_TIME = 1.0; // seconds
|
||||
#define COPY_TESTSIZE (256*2+16*2+15+15)
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
static int test_copy8u_func(void)
|
||||
static BOOL test_copy8u_func(void)
|
||||
{
|
||||
primitives_t* prims = primitives_get();
|
||||
BYTE ALIGN(data[COPY_TESTSIZE + 15]);
|
||||
int i, soff;
|
||||
int failed = 0;
|
||||
char testStr[256];
|
||||
BYTE ALIGN(dest[COPY_TESTSIZE + 15]);
|
||||
testStr[0] = '\0';
|
||||
|
||||
winpr_RAND(data, sizeof(data));
|
||||
strcat(testStr, " ptr");
|
||||
|
||||
for (soff = 0; soff < 16; ++soff)
|
||||
{
|
||||
|
@ -47,7 +42,8 @@ static int test_copy8u_func(void)
|
|||
for (length = 1; length <= COPY_TESTSIZE - doff; ++length)
|
||||
{
|
||||
memset(dest, 0, sizeof(dest));
|
||||
prims->copy_8u(data + soff, dest + doff, length);
|
||||
if (prims->copy_8u(data + soff, dest + doff, length) != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
|
||||
for (i = 0; i < length; ++i)
|
||||
{
|
||||
|
@ -57,48 +53,47 @@ static int test_copy8u_func(void)
|
|||
"data[%d]=0x%02x\n",
|
||||
doff, length, i + doff, dest[i + doff],
|
||||
i + soff, data[i + soff]);
|
||||
failed = 1;
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!failed) printf("All copy8 tests passed (%s).\n", testStr);
|
||||
|
||||
return (failed > 0) ? FAILURE : SUCCESS;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
STD_SPEED_TEST(copy8u_speed_test, BYTE, BYTE, dst = dst,
|
||||
TRUE, memcpy(dst, src1, size),
|
||||
FALSE, PRIM_NOP, 0, FALSE,
|
||||
TRUE, ippsCopy_8u(src1, dst, size));
|
||||
|
||||
int test_copy8u_speed(void)
|
||||
static BOOL test_copy8u_speed(void)
|
||||
{
|
||||
BYTE ALIGN(src[MAX_TEST_SIZE + 4]);
|
||||
BYTE ALIGN(dst[MAX_TEST_SIZE + 4]);
|
||||
copy8u_speed_test("copy8u", "aligned", src, NULL, 0, dst,
|
||||
test_sizes, NUM_TEST_SIZES, MEMCPY_PRETEST_ITERATIONS, TEST_TIME);
|
||||
copy8u_speed_test("copy8u", "unaligned", src + 1, NULL, 0, dst,
|
||||
test_sizes, NUM_TEST_SIZES, MEMCPY_PRETEST_ITERATIONS, TEST_TIME);
|
||||
return SUCCESS;
|
||||
|
||||
if (!speed_test("copy_8u", "aligned", g_Iterations,
|
||||
(speed_test_fkt)generic->copy_8u,
|
||||
(speed_test_fkt)optimized->copy_8u,
|
||||
src, dst, MAX_TEST_SIZE))
|
||||
return FALSE;
|
||||
|
||||
if (!speed_test("copy_8u", "unaligned", g_Iterations,
|
||||
(speed_test_fkt)generic->copy_8u,
|
||||
(speed_test_fkt)optimized->copy_8u,
|
||||
src+1, dst+1, MAX_TEST_SIZE))
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
int TestPrimitivesCopy(int argc, char* argv[])
|
||||
{
|
||||
int status;
|
||||
status = test_copy8u_func();
|
||||
prim_test_setup(FALSE);
|
||||
|
||||
if (status != SUCCESS)
|
||||
if (!test_copy8u_func())
|
||||
return 1;
|
||||
|
||||
if (g_TestPrimitivesPerformance)
|
||||
{
|
||||
status = test_copy8u_speed();
|
||||
|
||||
if (status != SUCCESS)
|
||||
if (!test_copy8u_speed())
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
|
@ -53,14 +53,14 @@ static BOOL test_set8u_func(void)
|
|||
{
|
||||
UINT32 len;
|
||||
|
||||
memset(dest, 0, sizeof(dest));
|
||||
memset(dest, 3, sizeof(dest));
|
||||
for (len = 1; len < 48 - off; ++len)
|
||||
{
|
||||
status = generic->set_8u(0xa5, dest + off, len);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
|
||||
if (!check8(dest, len, off, 0xa8))
|
||||
if (!check8(dest, len, off, 0xa5))
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
@ -69,14 +69,14 @@ static BOOL test_set8u_func(void)
|
|||
{
|
||||
UINT32 len;
|
||||
|
||||
memset(dest, 0, sizeof(dest));
|
||||
memset(dest, 3, sizeof(dest));
|
||||
for (len = 1; len < 48 - off; ++len)
|
||||
{
|
||||
status = optimized->set_8u(0xa5, dest + off, len);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
|
||||
if (!check8(dest, len, off, 0xa8))
|
||||
if (!check8(dest, len, off, 0xa5))
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
@ -95,8 +95,9 @@ static BOOL test_set8u_speed(void)
|
|||
{
|
||||
winpr_RAND(&value, sizeof(value));
|
||||
if (!speed_test("set_8u", "", g_Iterations,
|
||||
generic->set_8u, optimized->set_8u,
|
||||
value, dest + x, len))
|
||||
(speed_test_fkt)generic->set_8u,
|
||||
(speed_test_fkt)optimized->set_8u,
|
||||
value, dest + x, x))
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
@ -232,8 +233,9 @@ static BOOL test_set32u_speed(void)
|
|||
{
|
||||
winpr_RAND(&value, sizeof(value));
|
||||
if (!speed_test("set_32u", "", g_Iterations,
|
||||
generic->set_32u, optimized->set_32u,
|
||||
value, dest + x, len))
|
||||
(speed_test_fkt)generic->set_32u,
|
||||
(speed_test_fkt)optimized->set_32u,
|
||||
value, dest + x, x))
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
@ -251,8 +253,9 @@ static BOOL test_set32s_speed(void)
|
|||
{
|
||||
winpr_RAND(&value, sizeof(value));
|
||||
if (!speed_test("set_32s", "", g_Iterations,
|
||||
generic->set_32s, optimized->set_32s,
|
||||
value, dest + x, len))
|
||||
(speed_test_fkt)generic->set_32s,
|
||||
(speed_test_fkt)optimized->set_32s,
|
||||
value, dest + x, x))
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
@ -265,21 +268,20 @@ int TestPrimitivesSet(int argc, char* argv[])
|
|||
|
||||
if (!test_set8u_func())
|
||||
return -1;
|
||||
|
||||
if (!test_set8u_speed())
|
||||
return -1;
|
||||
|
||||
if (!test_set32s_func())
|
||||
return -1;
|
||||
|
||||
if (!test_set32s_speed())
|
||||
return -1;
|
||||
|
||||
if (!test_set32u_func())
|
||||
return -1;
|
||||
|
||||
if (g_TestPrimitivesPerformance)
|
||||
{
|
||||
if (!test_set8u_speed())
|
||||
return -1;
|
||||
if (!test_set32s_speed())
|
||||
return -1;
|
||||
if (!test_set32u_speed())
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -20,207 +20,361 @@
|
|||
#include "prim_test.h"
|
||||
|
||||
#define FUNC_TEST_SIZE 65536
|
||||
static const int SHIFT_PRETEST_ITERATIONS = 50000;
|
||||
static const float TEST_TIME = 1.0;
|
||||
|
||||
#ifdef WITH_SSE2
|
||||
#define SHIFT_TEST_FUNC(_name_, _type_, _str_, _f1_, _f2_) \
|
||||
int _name_(void) \
|
||||
{ \
|
||||
_type_ ALIGN(src[FUNC_TEST_SIZE+3]), \
|
||||
ALIGN(d1[FUNC_TEST_SIZE+3]), ALIGN(d2[FUNC_TEST_SIZE+3]); \
|
||||
int failed = 0; \
|
||||
int i; \
|
||||
char testStr[256]; \
|
||||
testStr[0] = '\0'; \
|
||||
get_random_data(src, sizeof(src)); \
|
||||
_f1_(src+1, 3, d1+1, FUNC_TEST_SIZE); \
|
||||
if (IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) \
|
||||
{ \
|
||||
strcat(testStr, " SSE3"); \
|
||||
/* Aligned */ \
|
||||
_f2_(src+1, 3, d2+1, FUNC_TEST_SIZE); \
|
||||
for (i=1; i<=FUNC_TEST_SIZE; ++i) \
|
||||
{ \
|
||||
if (d1[i] != d2[i]) \
|
||||
{ \
|
||||
printf("%s-SSE-aligned FAIL[%d]: 0x%x>>3=0x%x, got 0x%x\n", \
|
||||
_str_, i, src[i], d1[i], d2[i]); \
|
||||
++failed; \
|
||||
} \
|
||||
} \
|
||||
/* Unaligned */ \
|
||||
_f2_(src+1, 3, d2+2, FUNC_TEST_SIZE); \
|
||||
for (i=1; i<=FUNC_TEST_SIZE; ++i) \
|
||||
{ \
|
||||
if (d1[i] != d2[i+1]) \
|
||||
{ \
|
||||
printf("%s-SSE-unaligned FAIL[%d]: 0x%x>>3=0x%x, got 0x%x\n", \
|
||||
_str_, i, src[i], d1[i], d2[i+1]); \
|
||||
++failed; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
if (!failed) printf("All %s tests passed (%s).\n", _str_, testStr); \
|
||||
return (failed > 0) ? FAILURE : SUCCESS; \
|
||||
static BOOL test_lShift_16s_func(void)
|
||||
{
|
||||
pstatus_t status;
|
||||
INT16 ALIGN(src[FUNC_TEST_SIZE+3]);
|
||||
INT16 ALIGN(d1[FUNC_TEST_SIZE+3]);
|
||||
UINT32 val;
|
||||
|
||||
winpr_RAND((BYTE*)&val, sizeof(val));
|
||||
winpr_RAND((BYTE*)src, sizeof(src));
|
||||
|
||||
val = (val % (FUNC_TEST_SIZE - 1)) + 1;
|
||||
|
||||
/* Aligned */
|
||||
status = generic->lShiftC_16s(src+1, val, d1+1, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
status = optimized->lShiftC_16s(src+1, val, d1+1, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
|
||||
/* Unaligned */
|
||||
status = generic->lShiftC_16s(src+1, val, d1+2, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
status = optimized->lShiftC_16s(src+1, val, d1+2, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
#else
|
||||
#define SHIFT_TEST_FUNC(_name_, _type_, _str_, _f1_, _f2_) \
|
||||
int _name_(void) \
|
||||
{ \
|
||||
return SUCCESS; \
|
||||
|
||||
static BOOL test_lShift_16u_func(void)
|
||||
{
|
||||
pstatus_t status;
|
||||
UINT16 ALIGN(src[FUNC_TEST_SIZE+3]);
|
||||
UINT16 ALIGN(d1[FUNC_TEST_SIZE+3]);
|
||||
UINT32 val;
|
||||
|
||||
winpr_RAND((BYTE*)&val, sizeof(val));
|
||||
winpr_RAND((BYTE*)src, sizeof(src));
|
||||
|
||||
val = (val % (FUNC_TEST_SIZE - 1)) + 1;
|
||||
|
||||
/* Aligned */
|
||||
status = generic->lShiftC_16u(src+1, val, d1+1, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
status = optimized->lShiftC_16u(src+1, val, d1+1, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
|
||||
/* Unaligned */
|
||||
status = generic->lShiftC_16u(src+1, val, d1+2, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
status = optimized->lShiftC_16u(src+1, val, d1+2, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
#endif /* i386 */
|
||||
|
||||
SHIFT_TEST_FUNC(test_lShift_16s_func, INT16, "lshift_16s", general_lShiftC_16s,
|
||||
sse2_lShiftC_16s)
|
||||
SHIFT_TEST_FUNC(test_lShift_16u_func, UINT16, "lshift_16u", general_lShiftC_16u,
|
||||
sse2_lShiftC_16u)
|
||||
SHIFT_TEST_FUNC(test_rShift_16s_func, INT16, "rshift_16s", general_rShiftC_16s,
|
||||
sse2_rShiftC_16s)
|
||||
SHIFT_TEST_FUNC(test_rShift_16u_func, UINT16, "rshift_16u", general_rShiftC_16u,
|
||||
sse2_rShiftC_16u)
|
||||
static BOOL test_rShift_16s_func(void)
|
||||
{
|
||||
pstatus_t status;
|
||||
INT16 ALIGN(src[FUNC_TEST_SIZE+3]);
|
||||
INT16 ALIGN(d1[FUNC_TEST_SIZE+3]);
|
||||
UINT32 val;
|
||||
|
||||
/* ========================================================================= */
|
||||
STD_SPEED_TEST(speed_lShift_16s, INT16, INT16, dst = dst,
|
||||
TRUE, general_lShiftC_16s(src1, constant, dst, size),
|
||||
#ifdef WITH_SSE2
|
||||
TRUE, sse2_lShiftC_16s(src1, constant, dst, size),
|
||||
PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE,
|
||||
#else
|
||||
FALSE, PRIM_NOP, 0, FALSE,
|
||||
#endif
|
||||
TRUE, ippsLShiftC_16s(src1, constant, dst, size));
|
||||
STD_SPEED_TEST(speed_lShift_16u, UINT16, UINT16, dst = dst,
|
||||
TRUE, general_lShiftC_16u(src1, constant, dst, size),
|
||||
#ifdef WITH_SSE2
|
||||
TRUE, sse2_lShiftC_16u(src1, constant, dst, size),
|
||||
PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE,
|
||||
#else
|
||||
FALSE, PRIM_NOP, 0, FALSE,
|
||||
#endif
|
||||
TRUE, ippsLShiftC_16u(src1, constant, dst, size));
|
||||
STD_SPEED_TEST(speed_rShift_16s, INT16, INT16, dst = dst,
|
||||
TRUE, general_rShiftC_16s(src1, constant, dst, size),
|
||||
#ifdef WITH_SSE2
|
||||
TRUE, sse2_rShiftC_16s(src1, constant, dst, size),
|
||||
PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE,
|
||||
#else
|
||||
FALSE, PRIM_NOP, 0, FALSE,
|
||||
#endif
|
||||
TRUE, ippsRShiftC_16s(src1, constant, dst, size));
|
||||
STD_SPEED_TEST(speed_rShift_16u, UINT16, UINT16, dst = dst,
|
||||
TRUE, general_rShiftC_16u(src1, constant, dst, size),
|
||||
#ifdef WITH_SSE2
|
||||
TRUE, sse2_rShiftC_16u(src1, constant, dst, size),
|
||||
PF_SSE2_INSTRUCTIONS_AVAILABLE, FALSE,
|
||||
#else
|
||||
FALSE, PRIM_NOP, 0, FALSE,
|
||||
#endif
|
||||
TRUE, ippsRShiftC_16u(src1, constant, dst, size));
|
||||
winpr_RAND((BYTE*)&val, sizeof(val));
|
||||
winpr_RAND((BYTE*)src, sizeof(src));
|
||||
|
||||
val = (val % (FUNC_TEST_SIZE - 1)) + 1;
|
||||
|
||||
/* Aligned */
|
||||
status = generic->rShiftC_16s(src+1, val, d1+1, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
status = optimized->rShiftC_16s(src+1, val, d1+1, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
|
||||
/* Unaligned */
|
||||
status = generic->rShiftC_16s(src+1, val, d1+2, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
status = optimized->rShiftC_16s(src+1, val, d1+2, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static BOOL test_rShift_16u_func(void)
|
||||
{
|
||||
pstatus_t status;
|
||||
UINT16 ALIGN(src[FUNC_TEST_SIZE+3]);
|
||||
UINT16 ALIGN(d1[FUNC_TEST_SIZE+3]);
|
||||
UINT32 val;
|
||||
|
||||
winpr_RAND((BYTE*)&val, sizeof(val));
|
||||
winpr_RAND((BYTE*)src, sizeof(src));
|
||||
|
||||
val = (val % (FUNC_TEST_SIZE - 1)) + 1;
|
||||
|
||||
/* Aligned */
|
||||
status = generic->rShiftC_16u(src+1, val, d1+1, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
status = optimized->rShiftC_16u(src+1, val, d1+1, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
|
||||
/* Unaligned */
|
||||
status = generic->rShiftC_16u(src+1, val, d1+2, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
status = optimized->rShiftC_16u(src+1, val, d1+2, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static BOOL test_ShiftWrapper_16s_func(void)
|
||||
{
|
||||
pstatus_t status;
|
||||
INT16 ALIGN(src[FUNC_TEST_SIZE+3]);
|
||||
INT16 ALIGN(d1[FUNC_TEST_SIZE+3]);
|
||||
UINT32 tmp;
|
||||
INT32 val;
|
||||
|
||||
winpr_RAND((BYTE*)&tmp, sizeof(tmp));
|
||||
winpr_RAND((BYTE*)src, sizeof(src));
|
||||
|
||||
val = (tmp % (FUNC_TEST_SIZE - 1)) + 1;
|
||||
|
||||
/* Aligned */
|
||||
status = generic->shiftC_16s(src+1, val, d1+1, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
status = optimized->shiftC_16s(src+1, val, d1+1, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
|
||||
status = generic->shiftC_16s(src+1, -val, d1+1, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
status = optimized->shiftC_16s(src+1, -val, d1+1, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
|
||||
/* Unaligned */
|
||||
status = generic->shiftC_16s(src+1, val, d1+2, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
status = optimized->shiftC_16s(src+1, val, d1+2, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
status = generic->shiftC_16s(src+1, -val, d1+2, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
status = optimized->shiftC_16s(src+1, -val, d1+2, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static BOOL test_ShiftWrapper_16u_func(void)
|
||||
{
|
||||
pstatus_t status;
|
||||
UINT16 ALIGN(src[FUNC_TEST_SIZE+3]);
|
||||
UINT16 ALIGN(d1[FUNC_TEST_SIZE+3]);
|
||||
UINT32 tmp;
|
||||
INT32 val;
|
||||
|
||||
winpr_RAND((BYTE*)&tmp, sizeof(tmp));
|
||||
winpr_RAND((BYTE*)src, sizeof(src));
|
||||
|
||||
val = (tmp % (FUNC_TEST_SIZE - 1)) + 1;
|
||||
|
||||
/* Aligned */
|
||||
status = generic->shiftC_16u(src+1, val, d1+1, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
status = optimized->shiftC_16u(src+1, val, d1+1, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
|
||||
status = generic->shiftC_16u(src+1, -val, d1+1, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
status = optimized->shiftC_16u(src+1, -val, d1+1, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
|
||||
/* Unaligned */
|
||||
status = generic->shiftC_16u(src+1, val, d1+2, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
status = optimized->shiftC_16u(src+1, val, d1+2, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
status = generic->shiftC_16u(src+1, -val, d1+2, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
status = optimized->shiftC_16u(src+1, -val, d1+2, FUNC_TEST_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
int test_lShift_16s_speed(void)
|
||||
static BOOL test_lShift_16s_speed(void)
|
||||
{
|
||||
UINT32 val;
|
||||
INT16 ALIGN(src[MAX_TEST_SIZE+1]), ALIGN(dst[MAX_TEST_SIZE+1]);
|
||||
winpr_RAND(src, sizeof(src));
|
||||
speed_lShift_16s("lShift_16s", "aligned", src, NULL, 3, dst,
|
||||
test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME);
|
||||
speed_lShift_16s("lShift_16s", "unaligned", src + 1, NULL, 3, dst,
|
||||
test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME);
|
||||
return SUCCESS;
|
||||
|
||||
winpr_RAND((BYTE*)src, sizeof(src));
|
||||
winpr_RAND((BYTE*)&val, sizeof(val));
|
||||
|
||||
if (!speed_test("lShift_16s", "aligned", g_Iterations,
|
||||
(speed_test_fkt)generic->lShiftC_16s,
|
||||
(speed_test_fkt)optimized->lShiftC_16s, src, val,
|
||||
dst, MAX_TEST_SIZE))
|
||||
return FALSE;
|
||||
|
||||
if (!speed_test("lShift_16s", "unaligned", g_Iterations,
|
||||
(speed_test_fkt)generic->lShiftC_16s,
|
||||
(speed_test_fkt)optimized->lShiftC_16s, src + 1, val,
|
||||
dst, MAX_TEST_SIZE))
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
int test_lShift_16u_speed(void)
|
||||
static BOOL test_lShift_16u_speed(void)
|
||||
{
|
||||
UINT32 val;
|
||||
UINT16 ALIGN(src[MAX_TEST_SIZE + 1]), ALIGN(dst[MAX_TEST_SIZE + 1]);
|
||||
winpr_RAND(src, sizeof(src));
|
||||
speed_lShift_16u("lShift_16u", "aligned", src, NULL, 3, dst,
|
||||
test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME);
|
||||
speed_lShift_16u("lShift_16u", "unaligned", src + 1, NULL, 3, dst,
|
||||
test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME);
|
||||
return SUCCESS;
|
||||
|
||||
winpr_RAND((BYTE*)&val, sizeof(val));
|
||||
winpr_RAND((BYTE*)src, sizeof(src));
|
||||
|
||||
if (!speed_test("lShift_16u", "aligned", g_Iterations,
|
||||
(speed_test_fkt)generic->lShiftC_16u,
|
||||
(speed_test_fkt)optimized->lShiftC_16u, src, val,
|
||||
dst, MAX_TEST_SIZE))
|
||||
return FALSE;
|
||||
|
||||
if (!speed_test("lShift_16u", "unaligned", g_Iterations,
|
||||
(speed_test_fkt)generic->lShiftC_16u,
|
||||
(speed_test_fkt)optimized->lShiftC_16u, src + 1, val,
|
||||
dst, MAX_TEST_SIZE))
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
int test_rShift_16s_speed(void)
|
||||
static BOOL test_rShift_16s_speed(void)
|
||||
{
|
||||
UINT32 val;
|
||||
INT16 ALIGN(src[MAX_TEST_SIZE+1]), ALIGN(dst[MAX_TEST_SIZE+1]);
|
||||
winpr_RAND(src, sizeof(src));
|
||||
speed_rShift_16s("rShift_16s", "aligned", src, NULL, 3, dst,
|
||||
test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME);
|
||||
speed_rShift_16s("rShift_16s", "unaligned", src + 1, NULL, 3, dst,
|
||||
test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME);
|
||||
return SUCCESS;
|
||||
|
||||
winpr_RAND((BYTE*)src, sizeof(src));
|
||||
winpr_RAND((BYTE*)&val, sizeof(val));
|
||||
if (!speed_test("rShift_16s", "aligned", g_Iterations,
|
||||
(speed_test_fkt)generic->rShiftC_16s,
|
||||
(speed_test_fkt)optimized->rShiftC_16s, src, val,
|
||||
dst, MAX_TEST_SIZE))
|
||||
return FALSE;
|
||||
|
||||
if (!speed_test("rShift_16s", "unaligned", g_Iterations,
|
||||
(speed_test_fkt)generic->rShiftC_16s,
|
||||
(speed_test_fkt)optimized->rShiftC_16s, src + 1, val,
|
||||
dst, MAX_TEST_SIZE))
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
int test_rShift_16u_speed(void)
|
||||
static BOOL test_rShift_16u_speed(void)
|
||||
{
|
||||
UINT32 val;
|
||||
UINT16 ALIGN(src[MAX_TEST_SIZE + 1]), ALIGN(dst[MAX_TEST_SIZE + 1]);
|
||||
winpr_RAND(src, sizeof(src));
|
||||
speed_rShift_16u("rShift_16u", "aligned", src, NULL, 3, dst,
|
||||
test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME);
|
||||
speed_rShift_16u("rShift_16u", "unaligned", src + 1, NULL, 3, dst,
|
||||
test_sizes, NUM_TEST_SIZES, SHIFT_PRETEST_ITERATIONS, TEST_TIME);
|
||||
return SUCCESS;
|
||||
|
||||
winpr_RAND((BYTE*)&val, sizeof(val));
|
||||
winpr_RAND((BYTE*)src, sizeof(src));
|
||||
|
||||
if (!speed_test("rShift_16u", "aligned", g_Iterations,
|
||||
(speed_test_fkt)generic->rShiftC_16u,
|
||||
(speed_test_fkt)optimized->rShiftC_16u, src, val,
|
||||
dst, MAX_TEST_SIZE))
|
||||
return FALSE;
|
||||
|
||||
if (!speed_test("rShift_16u", "unaligned", g_Iterations,
|
||||
(speed_test_fkt)generic->rShiftC_16u,
|
||||
(speed_test_fkt)optimized->rShiftC_16u, src + 1, val,
|
||||
dst, MAX_TEST_SIZE))
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
int TestPrimitivesShift(int argc, char* argv[])
|
||||
{
|
||||
int status;
|
||||
status = test_lShift_16s_func();
|
||||
prim_test_setup(FALSE);
|
||||
|
||||
if (status != SUCCESS)
|
||||
if (!test_lShift_16s_func())
|
||||
return 1;
|
||||
|
||||
if (g_TestPrimitivesPerformance)
|
||||
{
|
||||
status = test_lShift_16s_speed();
|
||||
|
||||
if (status != SUCCESS)
|
||||
if (!test_lShift_16s_speed())
|
||||
return 1;
|
||||
}
|
||||
|
||||
status = test_lShift_16u_func();
|
||||
|
||||
if (status != SUCCESS)
|
||||
if (!test_lShift_16u_func())
|
||||
return 1;
|
||||
|
||||
if (g_TestPrimitivesPerformance)
|
||||
{
|
||||
status = test_lShift_16u_speed();
|
||||
|
||||
if (status != SUCCESS)
|
||||
if (!test_lShift_16u_speed())
|
||||
return 1;
|
||||
}
|
||||
|
||||
status = test_rShift_16s_func();
|
||||
|
||||
if (status != SUCCESS)
|
||||
if (!test_rShift_16s_func())
|
||||
return 1;
|
||||
|
||||
if (g_TestPrimitivesPerformance)
|
||||
{
|
||||
status = test_rShift_16s_speed();
|
||||
|
||||
if (status != SUCCESS)
|
||||
if (!test_rShift_16s_speed())
|
||||
return 1;
|
||||
}
|
||||
|
||||
status = test_rShift_16u_func();
|
||||
|
||||
if (status != SUCCESS)
|
||||
if (!test_rShift_16u_func())
|
||||
return 1;
|
||||
|
||||
if (g_TestPrimitivesPerformance)
|
||||
{
|
||||
status = test_rShift_16u_speed();
|
||||
|
||||
if (status != SUCCESS)
|
||||
if (!test_rShift_16u_speed())
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!test_ShiftWrapper_16s_func())
|
||||
return 1;
|
||||
|
||||
if (!test_ShiftWrapper_16u_func())
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -19,103 +19,71 @@
|
|||
#include <winpr/sysinfo.h>
|
||||
#include "prim_test.h"
|
||||
|
||||
static const int SIGN_PRETEST_ITERATIONS = 100000;
|
||||
static const float TEST_TIME = 1.0;
|
||||
#define TEST_BUFFER_SIZE 65535
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
static int test_sign16s_func(void)
|
||||
static BOOL test_sign16s_func(void)
|
||||
{
|
||||
INT16 ALIGN(src[65535]), ALIGN(d1[65535]);
|
||||
#ifdef WITH_SSE2
|
||||
INT16 ALIGN(d2[65535]);
|
||||
int i;
|
||||
#endif
|
||||
int failed = 0;
|
||||
char testStr[256];
|
||||
/* Test when we can reach 16-byte alignment */
|
||||
testStr[0] = '\0';
|
||||
winpr_RAND(src, sizeof(src));
|
||||
general_sign_16s(src + 1, d1 + 1, 65535);
|
||||
#ifdef WITH_SSE2
|
||||
pstatus_t status;
|
||||
INT16 ALIGN(src[TEST_BUFFER_SIZE]);
|
||||
INT16 ALIGN(d1[TEST_BUFFER_SIZE]);
|
||||
INT16 ALIGN(d2[TEST_BUFFER_SIZE]);
|
||||
|
||||
if (IsProcessorFeaturePresentEx(PF_EX_SSSE3))
|
||||
{
|
||||
strcat(testStr, " SSSE3");
|
||||
ssse3_sign_16s(src + 1, d2 + 1, 65535);
|
||||
winpr_RAND((BYTE*)src, sizeof(src));
|
||||
|
||||
for (i = 1; i < 65535; ++i)
|
||||
{
|
||||
if (d1[i] != d2[i])
|
||||
{
|
||||
printf("SIGN16s-SSE-aligned FAIL[%d] of %d: want %d, got %d\n",
|
||||
i, src[i], d1[i], d2[i]);
|
||||
++failed;
|
||||
status = generic->sign_16s(src + 1, d1 + 1, TEST_BUFFER_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
status = optimized->sign_16s(src + 1, d2 + 1, TEST_BUFFER_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
|
||||
if (memcmp(d1, d2, sizeof(d1)) != 0)
|
||||
return FALSE;
|
||||
|
||||
status = generic->sign_16s(src + 1, d1 + 2, TEST_BUFFER_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
status = optimized->sign_16s(src + 1, d2 + 2, TEST_BUFFER_SIZE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
|
||||
if (memcmp(d1, d2, sizeof(d1)) != 0)
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* i386 */
|
||||
/* Test when we cannot reach 16-byte alignment */
|
||||
winpr_RAND(src, sizeof(src));
|
||||
general_sign_16s(src + 1, d1 + 2, 65535);
|
||||
#ifdef WITH_SSE2
|
||||
|
||||
if (IsProcessorFeaturePresentEx(PF_EX_SSSE3))
|
||||
{
|
||||
ssse3_sign_16s(src + 1, d2 + 2, 65535);
|
||||
|
||||
for (i = 2; i < 65535; ++i)
|
||||
{
|
||||
if (d1[i] != d2[i])
|
||||
{
|
||||
printf("SIGN16s-SSE-unaligned FAIL[%d] of %d: want %d, got %d\n",
|
||||
i, src[i - 1], d1[i], d2[i]);
|
||||
++failed;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* i386 */
|
||||
|
||||
if (!failed) printf("All sign16s tests passed (%s).\n", testStr);
|
||||
|
||||
return (failed > 0) ? FAILURE : SUCCESS;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
STD_SPEED_TEST(sign16s_speed_test, INT16, INT16, dst = dst,
|
||||
TRUE, general_sign_16s(src1, dst, size),
|
||||
#ifdef WITH_SSE2
|
||||
TRUE, ssse3_sign_16s(src1, dst, size), PF_EX_SSSE3, TRUE,
|
||||
#else
|
||||
FALSE, PRIM_NOP, 0, FALSE,
|
||||
#endif
|
||||
FALSE, dst = dst);
|
||||
|
||||
static int test_sign16s_speed(void)
|
||||
{
|
||||
INT16 ALIGN(src[MAX_TEST_SIZE + 3]), ALIGN(dst[MAX_TEST_SIZE + 3]);
|
||||
winpr_RAND(src, sizeof(src));
|
||||
sign16s_speed_test("sign16s", "aligned", src, NULL, 0, dst,
|
||||
test_sizes, NUM_TEST_SIZES, SIGN_PRETEST_ITERATIONS, TEST_TIME);
|
||||
sign16s_speed_test("sign16s", "unaligned", src + 1, NULL, 0, dst,
|
||||
test_sizes, NUM_TEST_SIZES, SIGN_PRETEST_ITERATIONS, TEST_TIME);
|
||||
return SUCCESS;
|
||||
winpr_RAND((BYTE*)src, sizeof(src));
|
||||
|
||||
if (!speed_test("sign16s", "aligned", g_Iterations,
|
||||
(speed_test_fkt)generic->sign_16s,
|
||||
(speed_test_fkt)optimized->sign_16s, src + 1, dst + 1,
|
||||
MAX_TEST_SIZE))
|
||||
return FALSE;
|
||||
|
||||
if (!speed_test("sign16s", "unaligned", g_Iterations,
|
||||
(speed_test_fkt)generic->sign_16s,
|
||||
(speed_test_fkt)optimized->sign_16s, src + 1, dst + 2,
|
||||
MAX_TEST_SIZE))
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
int TestPrimitivesSign(int argc, char* argv[])
|
||||
{
|
||||
int status;
|
||||
status = test_sign16s_func();
|
||||
prim_test_setup(FALSE);
|
||||
|
||||
if (status != SUCCESS)
|
||||
if (!test_sign16s_func())
|
||||
return 1;
|
||||
|
||||
if (g_TestPrimitivesPerformance)
|
||||
{
|
||||
status = test_sign16s_speed();
|
||||
|
||||
if (status != SUCCESS)
|
||||
if (!test_sign16s_speed())
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
|
@ -23,105 +23,103 @@
|
|||
#include <winpr/sysinfo.h>
|
||||
#include "prim_test.h"
|
||||
|
||||
static const int YCOCG_TRIAL_ITERATIONS = 20000;
|
||||
static const float TEST_TIME = 4.0;
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
int test_YCoCgRToRGB_8u_AC4R_func(void)
|
||||
static BOOL test_YCoCgRToRGB_8u_AC4R_func(void)
|
||||
{
|
||||
#ifdef WITH_SSE2
|
||||
int i;
|
||||
BOOL result = TRUE;
|
||||
pstatus_t status;
|
||||
INT32 ALIGN(out_sse[4098]), ALIGN(out_sse_inv[4098]);
|
||||
#endif
|
||||
INT32 ALIGN(in[4098]);
|
||||
INT32 ALIGN(out_c[4098]), ALIGN(out_c_inv[4098]);
|
||||
char testStr[256];
|
||||
BOOL failed = FALSE;
|
||||
testStr[0] = '\0';
|
||||
winpr_RAND(in, sizeof(in));
|
||||
general_YCoCgToRGB_8u_AC4R((const BYTE*)(in + 1), 63 * 4,
|
||||
(BYTE*) out_c, 63 * 4, 63, 61, 2, TRUE, FALSE);
|
||||
general_YCoCgToRGB_8u_AC4R((const BYTE*)(in + 1), 63 * 4,
|
||||
(BYTE*) out_c_inv, 63 * 4, 63, 61, 2, TRUE, TRUE);
|
||||
#ifdef WITH_SSE2
|
||||
|
||||
if (IsProcessorFeaturePresentEx(PF_EX_SSSE3))
|
||||
UINT32 i, x;
|
||||
const UINT32 formats[] = {
|
||||
PIXEL_FORMAT_ARGB32,
|
||||
PIXEL_FORMAT_ABGR32,
|
||||
PIXEL_FORMAT_RGBA32,
|
||||
PIXEL_FORMAT_RGBX32,
|
||||
PIXEL_FORMAT_BGRA32,
|
||||
PIXEL_FORMAT_BGRX32
|
||||
};
|
||||
|
||||
winpr_RAND((BYTE*)in, sizeof(in));
|
||||
|
||||
for (x=0; x<sizeof(formats)/sizeof(formats[0]); x++)
|
||||
{
|
||||
strcat(testStr, " SSSE3");
|
||||
ssse3_YCoCgRToRGB_8u_AC4R((const BYTE*)(in + 1), 63 * 4,
|
||||
(BYTE*) out_sse, 63 * 4, 63, 61, 2, TRUE, FALSE);
|
||||
UINT32 format = formats[x];
|
||||
|
||||
status = generic->YCoCgToRGB_8u_AC4R(
|
||||
(const BYTE*)(in + 1), 63 * 4,
|
||||
(BYTE*) out_c, format, 63 * 4, 63, 61, 2, TRUE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
status = generic->YCoCgToRGB_8u_AC4R(
|
||||
(const BYTE*)(in + 1), 63 * 4,
|
||||
(BYTE*) out_c_inv, format, 63 * 4, 63, 61, 2, TRUE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
|
||||
status = optimized->YCoCgToRGB_8u_AC4R(
|
||||
(const BYTE*)(in + 1), 63 * 4,
|
||||
(BYTE*) out_sse, format, 63 * 4, 63, 61, 2, TRUE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
status = optimized->YCoCgToRGB_8u_AC4R(
|
||||
(const BYTE*)(in + 1), 63 * 4,
|
||||
(BYTE*) out_sse_inv, format, 63 * 4, 63, 61, 2, TRUE);
|
||||
if (status != PRIMITIVES_SUCCESS)
|
||||
return FALSE;
|
||||
|
||||
for (i = 0; i < 63 * 61; ++i)
|
||||
{
|
||||
if (out_c[i] != out_sse[i])
|
||||
{
|
||||
printf("YCoCgRToRGB-SSE FAIL[%d]: 0x%08x -> C 0x%08x vs SSE 0x%08x\n", i,
|
||||
printf("optimized->YCoCgRToRGB FAIL[%d]: 0x%08x -> C 0x%08x vs optimized 0x%08x\n", i,
|
||||
in[i + 1], out_c[i], out_sse[i]);
|
||||
failed = TRUE;
|
||||
result = FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
ssse3_YCoCgRToRGB_8u_AC4R((const BYTE*)(in + 1), 63 * 4,
|
||||
(BYTE*) out_sse_inv, 63 * 4, 63, 61, 2, TRUE, TRUE);
|
||||
|
||||
for (i = 0; i < 63 * 61; ++i)
|
||||
{
|
||||
if (out_c_inv[i] != out_sse_inv[i])
|
||||
{
|
||||
printf("YCoCgRToRGB-SSE inverted FAIL[%d]: 0x%08x -> C 0x%08x vs SSE 0x%08x\n",
|
||||
printf("optimized->YCoCgRToRGB inverted FAIL[%d]: 0x%08x -> C 0x%08x vs optimized 0x%08x\n",
|
||||
i,
|
||||
in[i + 1], out_c_inv[i], out_sse_inv[i]);
|
||||
failed = TRUE;
|
||||
result = FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* i386 */
|
||||
|
||||
if (!failed) printf("All YCoCgRToRGB_8u_AC4R tests passed (%s).\n", testStr);
|
||||
|
||||
return (failed > 0) ? FAILURE : SUCCESS;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
STD_SPEED_TEST(
|
||||
ycocg_to_rgb_speed, BYTE, BYTE, PRIM_NOP,
|
||||
TRUE, general_YCoCgToRGB_8u_AC4R(src1, 64 * 4, dst, 64 * 4, 64, 64, 2, FALSE,
|
||||
FALSE),
|
||||
#ifdef WITH_SSE2
|
||||
TRUE, ssse3_YCoCgRToRGB_8u_AC4R(src1, 64 * 4, dst, 64 * 4, 64, 64, 2, FALSE,
|
||||
FALSE),
|
||||
PF_EX_SSSE3, TRUE,
|
||||
#else
|
||||
FALSE, PRIM_NOP, 0, FALSE,
|
||||
#endif
|
||||
FALSE, PRIM_NOP);
|
||||
|
||||
static int test_YCoCgRToRGB_8u_AC4R_speed(void)
|
||||
{
|
||||
INT32 ALIGN(in[4096]);
|
||||
INT32 ALIGN(out[4096]);
|
||||
int size_array[] = { 64 };
|
||||
winpr_RAND(in, sizeof(in));
|
||||
ycocg_to_rgb_speed("YCoCgToRGB", "aligned", (const BYTE*) in,
|
||||
0, 0, (BYTE*) out,
|
||||
size_array, 1, YCOCG_TRIAL_ITERATIONS, TEST_TIME);
|
||||
return SUCCESS;
|
||||
|
||||
winpr_RAND((BYTE*)in, sizeof(in));
|
||||
|
||||
if (!speed_test("YCoCgToRGB_8u_AC4R", "aligned", g_Iterations,
|
||||
(speed_test_fkt)generic->YCoCgToRGB_8u_AC4R,
|
||||
(speed_test_fkt)optimized->YCoCgToRGB_8u_AC4R,
|
||||
in, 64 * 4, out, 64 * 4, 64, 64, 2, FALSE, FALSE))
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
int TestPrimitivesYCoCg(int argc, char* argv[])
|
||||
{
|
||||
int status;
|
||||
status = test_YCoCgRToRGB_8u_AC4R_func();
|
||||
prim_test_setup(FALSE);
|
||||
|
||||
if (status != SUCCESS)
|
||||
if (!test_YCoCgRToRGB_8u_AC4R_func())
|
||||
return 1;
|
||||
|
||||
if (g_TestPrimitivesPerformance)
|
||||
{
|
||||
status = test_YCoCgRToRGB_8u_AC4R_speed();
|
||||
|
||||
if (status != SUCCESS)
|
||||
if (!test_YCoCgRToRGB_8u_AC4R_speed())
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
|
@ -38,8 +38,8 @@ static void get_size(UINT32* width, UINT32* height)
|
|||
winpr_RAND((BYTE*)width, sizeof(*width));
|
||||
winpr_RAND((BYTE*)height, sizeof(*height));
|
||||
// TODO: Algorithm only works on even resolutions...
|
||||
*width = (*width % 4000) << 1;
|
||||
*height = (*height % 4000 << 1);
|
||||
*width = (*width % 64) << 1;
|
||||
*height = (*height % 64 << 1);
|
||||
}
|
||||
|
||||
static BOOL check_padding(const BYTE* psrc, size_t size, size_t padding,
|
||||
|
@ -370,11 +370,13 @@ static BOOL TestPrimitiveYUV(BOOL use444)
|
|||
|
||||
if (use444)
|
||||
{
|
||||
if (prims->RGBToYUV444_8u_P3AC4R(rgb, stride, yuv, yuv_step,
|
||||
if (prims->RGBToYUV444_8u_P3AC4R(rgb, PIXEL_FORMAT_BGRA32,
|
||||
stride, yuv, yuv_step,
|
||||
&roi) != PRIMITIVES_SUCCESS)
|
||||
goto fail;
|
||||
}
|
||||
else if (prims->RGBToYUV420_8u_P3AC4R(rgb, stride, yuv, yuv_step,
|
||||
else if (prims->RGBToYUV420_8u_P3AC4R(rgb, PIXEL_FORMAT_BGRA32,
|
||||
stride, yuv, yuv_step,
|
||||
&roi) != PRIMITIVES_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
|
@ -429,16 +431,16 @@ int TestPrimitivesYUV(int argc, char* argv[])
|
|||
UINT32 x;
|
||||
int rc = -1;
|
||||
|
||||
prim_test_setup(FALSE);
|
||||
|
||||
for (x = 0; x < 10; x++)
|
||||
{
|
||||
/* TODO: This test fails on value comparison,
|
||||
* there seems to be some issue left with encoder / decoder pass.
|
||||
if (!TestPrimitiveYUV(FALSE))
|
||||
goto end;
|
||||
*/
|
||||
if (!TestPrimitiveYUV(TRUE))
|
||||
goto end;
|
||||
|
||||
if (!TestPrimitiveYUV(FALSE))
|
||||
goto end;
|
||||
|
||||
if (!TestPrimitiveYUVCombine())
|
||||
goto end;
|
||||
}
|
||||
|
|
|
@ -43,13 +43,6 @@
|
|||
extern int test_sizes[];
|
||||
#define NUM_TEST_SIZES 10
|
||||
|
||||
#ifndef SUCCESS
|
||||
#define SUCCESS 0
|
||||
#endif
|
||||
#ifndef FAILURE
|
||||
#define FAILURE 1
|
||||
#endif
|
||||
|
||||
extern BOOL g_TestPrimitivesPerformance;
|
||||
extern UINT32 g_Iterations;
|
||||
|
||||
|
@ -58,8 +51,10 @@ extern primitives_t* optimized;
|
|||
|
||||
void prim_test_setup(BOOL performance);
|
||||
|
||||
typedef pstatus_t (*speed_test_fkt)();
|
||||
|
||||
BOOL speed_test(const char* name, const char* dsc, UINT32 iterations,
|
||||
pstatus_t (*generic)(), pstatus_t (*optimised)(),
|
||||
speed_test_fkt generic, speed_test_fkt optimized,
|
||||
...);
|
||||
|
||||
#endif // !__PRIMTEST_H_INCLUDED__
|
||||
|
|
Loading…
Reference in New Issue