Merge pull request #3626 from akallabeth/primitives_speedup

Added optimized pixel write functions for colors.
This commit is contained in:
Bernhard Miklautz 2016-12-06 14:34:35 +01:00 committed by GitHub
commit d1e0d44f22
4 changed files with 266 additions and 199 deletions

View File

@ -32,81 +32,6 @@
#endif /* !MINMAX */
/* ------------------------------------------------------------------------- */
static INLINE BYTE* writePixelBGRX(BYTE* dst, DWORD formatSize, UINT32 format,
BYTE R, BYTE G, BYTE B, BYTE A)
{
dst[0] = B;
dst[1] = G;
dst[2] = R;
dst[3] = A;
return dst + formatSize;
}
static INLINE BYTE* writePixelRGBX(BYTE* dst, DWORD formatSize, UINT32 format,
BYTE R, BYTE G, BYTE B, BYTE A)
{
dst[0] = R;
dst[1] = G;
dst[2] = B;
dst[3] = A;
return dst + formatSize;
}
static INLINE BYTE* writePixelXBGR(BYTE* dst, DWORD formatSize, UINT32 format,
BYTE R, BYTE G, BYTE B, BYTE A)
{
dst[0] = A;
dst[1] = B;
dst[2] = G;
dst[3] = R;
return dst + formatSize;
}
static INLINE BYTE* writePixelXRGB(BYTE* dst, DWORD formatSize, UINT32 format,
BYTE R, BYTE G, BYTE B, BYTE A)
{
dst[0] = A;
dst[1] = R;
dst[2] = G;
dst[3] = B;
return dst + formatSize;
}
static INLINE BYTE* writePixelGeneric(BYTE* dst, DWORD formatSize, UINT32 format,
BYTE R, BYTE G, BYTE B, BYTE A)
{
UINT32 color = GetColor(format, R, G, B, A);
WriteColor(dst, format, color);
return dst + formatSize;
}
typedef BYTE* (*fkt_writePixel)(BYTE*, DWORD, UINT32, BYTE, BYTE, BYTE, BYTE);
static INLINE fkt_writePixel getWriteFunction(DWORD format)
{
switch (format)
{
case PIXEL_FORMAT_ARGB32:
case PIXEL_FORMAT_XRGB32:
return writePixelXRGB;
case PIXEL_FORMAT_ABGR32:
case PIXEL_FORMAT_XBGR32:
return writePixelXBGR;
case PIXEL_FORMAT_RGBA32:
case PIXEL_FORMAT_RGBX32:
return writePixelRGBX;
case PIXEL_FORMAT_BGRA32:
case PIXEL_FORMAT_BGRX32:
return writePixelBGRX;
default:
return writePixelGeneric;
}
}
static pstatus_t general_YCoCgToRGB_8u_AC4R(
const BYTE* pSrc, INT32 srcStep,
BYTE* pDst, UINT32 DstFormat, INT32 dstStep,
@ -120,7 +45,7 @@ static pstatus_t general_YCoCgToRGB_8u_AC4R(
const BYTE* sptr = pSrc;
INT16 Cg, Co, Y, T, R, G, B;
const DWORD formatSize = GetBytesPerPixel(DstFormat);
fkt_writePixel writePixel = getWriteFunction(DstFormat);
fkt_writePixel writePixel = getPixelWriteFunction(DstFormat);
int cll = shift - 1; /* -1 builds in the /2's */
UINT32 srcPad = srcStep - (width * 4);
UINT32 dstPad = dstStep - (width * formatSize);

View File

@ -23,6 +23,7 @@
#include <freerdp/types.h>
#include <freerdp/primitives.h>
#include <freerdp/codec/color.h>
#include "prim_internal.h"
static INLINE BYTE CLIP(INT32 X)
{
@ -326,101 +327,6 @@ static INLINE BYTE YUV2B(INT32 Y, INT32 U, INT32 V)
return CLIP(b8);
}
static INLINE BYTE* writeYUVPixelBGRX(BYTE* dst, DWORD formatSize, UINT32 format, BYTE Y, BYTE U,
BYTE V)
{
const BYTE r = YUV2R(Y, U, V);
const BYTE g = YUV2G(Y, U, V);
const BYTE b = YUV2B(Y, U, V);
const BYTE a = 0xFF;
dst[0] = b;
dst[1] = g;
dst[2] = r;
dst[3] = a;
return dst + formatSize;
}
static INLINE BYTE* writeYUVPixelRGBX(BYTE* dst, DWORD formatSize, UINT32 format, BYTE Y, BYTE U,
BYTE V)
{
const BYTE r = YUV2R(Y, U, V);
const BYTE g = YUV2G(Y, U, V);
const BYTE b = YUV2B(Y, U, V);
const BYTE a = 0xFF;
dst[0] = r;
dst[1] = g;
dst[2] = b;
dst[3] = a;
return dst + formatSize;
}
static INLINE BYTE* writeYUVPixelXBGR(BYTE* dst, DWORD formatSize, UINT32 format, BYTE Y, BYTE U,
BYTE V)
{
const BYTE r = YUV2R(Y, U, V);
const BYTE g = YUV2G(Y, U, V);
const BYTE b = YUV2B(Y, U, V);
const BYTE a = 0xFF;
dst[0] = a;
dst[1] = b;
dst[2] = g;
dst[3] = r;
return dst + formatSize;
}
static INLINE BYTE* writeYUVPixelXRGB(BYTE* dst, DWORD formatSize, UINT32 format, BYTE Y, BYTE U,
BYTE V)
{
const BYTE r = YUV2R(Y, U, V);
const BYTE g = YUV2G(Y, U, V);
const BYTE b = YUV2B(Y, U, V);
const BYTE a = 0xFF;
dst[0] = a;
dst[1] = r;
dst[2] = g;
dst[3] = b;
return dst + formatSize;
}
static INLINE BYTE* writeYUVPixelGeneric(BYTE* dst, DWORD formatSize, UINT32 format, BYTE Y, BYTE U,
BYTE V)
{
const BYTE r = YUV2R(Y, U, V);
const BYTE g = YUV2G(Y, U, V);
const BYTE b = YUV2B(Y, U, V);
const BYTE a = 0xFF;
UINT32 color = GetColor(format, r, g, b, a);
WriteColor(dst, format, color);
return dst + formatSize;
}
typedef BYTE* (*fkt_writeYUVPixel)(BYTE*, DWORD, UINT32, BYTE, BYTE, BYTE);
static INLINE fkt_writeYUVPixel getWriteFunction(DWORD format)
{
switch (format)
{
case PIXEL_FORMAT_ARGB32:
case PIXEL_FORMAT_XRGB32:
return writeYUVPixelXRGB;
case PIXEL_FORMAT_ABGR32:
case PIXEL_FORMAT_XBGR32:
return writeYUVPixelXBGR;
case PIXEL_FORMAT_RGBA32:
case PIXEL_FORMAT_RGBX32:
return writeYUVPixelRGBX;
case PIXEL_FORMAT_BGRA32:
case PIXEL_FORMAT_BGRX32:
return writeYUVPixelBGRX;
default:
return writeYUVPixelGeneric;
}
}
static pstatus_t general_YUV444ToRGB_8u_P3AC4R(
const BYTE* pSrc[3], const UINT32 srcStep[3],
BYTE* pDst, UINT32 dstStep, UINT32 DstFormat,
@ -429,7 +335,7 @@ static pstatus_t general_YUV444ToRGB_8u_P3AC4R(
UINT32 x, y;
UINT32 nWidth, nHeight;
const DWORD formatSize = GetBytesPerPixel(DstFormat);
fkt_writeYUVPixel writeYUVPixel = getWriteFunction(DstFormat);
fkt_writePixel writePixel = getPixelWriteFunction(DstFormat);
nWidth = roi->width;
nHeight = roi->height;
@ -445,7 +351,10 @@ static pstatus_t general_YUV444ToRGB_8u_P3AC4R(
const BYTE Y = pY[x];
const INT32 U = pU[x];
const INT32 V = pV[x];
pRGB = (*writeYUVPixel)(pRGB, formatSize, DstFormat, Y, U, V);
const BYTE r = YUV2R(Y, U, V);
const BYTE g = YUV2G(Y, U, V);
const BYTE b = YUV2B(Y, U, V);
pRGB = (*writePixel)(pRGB, formatSize, DstFormat, r, g, b, 0xFF);
}
}
@ -475,7 +384,7 @@ static pstatus_t general_YUV420ToRGB_8u_P3AC4R(
UINT32 nWidth, nHeight;
UINT32 lastRow, lastCol;
const DWORD formatSize = GetBytesPerPixel(DstFormat);
fkt_writeYUVPixel writeYUVPixel = getWriteFunction(DstFormat);
fkt_writePixel writePixel = getPixelWriteFunction(DstFormat);
pY = pSrc[0];
pU = pSrc[1];
pV = pSrc[2];
@ -497,6 +406,10 @@ static pstatus_t general_YUV420ToRGB_8u_P3AC4R(
for (x = 0; x < halfWidth;)
{
BYTE r;
BYTE g;
BYTE b;
if (++x == halfWidth)
lastCol <<= 1;
@ -504,13 +417,19 @@ static pstatus_t general_YUV420ToRGB_8u_P3AC4R(
V = *pV++;
/* 1st pixel */
Y = *pY++;
pRGB = (*writeYUVPixel)(pRGB, formatSize, DstFormat, Y, U, V);
r = YUV2R(Y, U, V);
g = YUV2G(Y, U, V);
b = YUV2B(Y, U, V);
pRGB = (*writePixel)(pRGB, formatSize, DstFormat, r, g, b, 0xFF);
/* 2nd pixel */
if (!(lastCol & 0x02))
{
Y = *pY++;
pRGB = (*writeYUVPixel)(pRGB, formatSize, DstFormat, Y, U, V);
r = YUV2R(Y, U, V);
g = YUV2G(Y, U, V);
b = YUV2B(Y, U, V);
pRGB = (*writePixel)(pRGB, formatSize, DstFormat, r, g, b, 0xFF);
}
else
{
@ -530,6 +449,10 @@ static pstatus_t general_YUV420ToRGB_8u_P3AC4R(
for (x = 0; x < halfWidth;)
{
BYTE r;
BYTE g;
BYTE b;
if (++x == halfWidth)
lastCol <<= 1;
@ -537,13 +460,19 @@ static pstatus_t general_YUV420ToRGB_8u_P3AC4R(
V = *pV++;
/* 3rd pixel */
Y = *pY++;
pRGB = (*writeYUVPixel)(pRGB, formatSize, DstFormat, Y, U, V);
r = YUV2R(Y, U, V);
g = YUV2G(Y, U, V);
b = YUV2B(Y, U, V);
pRGB = (*writePixel)(pRGB, formatSize, DstFormat, r, g, b, 0xFF);
/* 4th pixel */
if (!(lastCol & 0x02))
{
Y = *pY++;
pRGB = (*writeYUVPixel)(pRGB, formatSize, DstFormat, Y, U, V);
r = YUV2R(Y, U, V);
g = YUV2G(Y, U, V);
b = YUV2B(Y, U, V);
pRGB = (*writePixel)(pRGB, formatSize, DstFormat, r, g, b, 0xFF);
}
else
{

View File

@ -33,14 +33,6 @@
#endif /* !MINMAX */
/* ------------------------------------------------------------------------- */
static INLINE BYTE* writePixel(BYTE* dst, UINT32 format, BYTE r, BYTE g, BYTE b)
{
UINT32 color = GetColor(format, r, g, b, 0);
WriteColor(dst, format, color);
return dst + GetBytesPerPixel(format);
}
static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R(
const INT16* pSrc[3], UINT32 srcStep,
BYTE* pDst, UINT32 DstFormat, UINT32 dstStep,
@ -55,6 +47,8 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R(
const INT16* pCr = pSrc[2];
int srcPad = (srcStep - (roi->width * 2)) / 2;
int dstPad = (dstStep - (roi->width * 4)) / 4;
fkt_writePixel writePixel = getPixelWriteFunction(DstFormat);
const DWORD formatSize = GetBytesPerPixel(DstFormat);
for (y = 0; y < roi->height; y++)
{
@ -82,7 +76,7 @@ static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R(
else if (B > 255)
B = 255;
pRGB = writePixel(pRGB, DstFormat, R, G, B);
pRGB = (*writePixel)(pRGB, formatSize, DstFormat, R, G, B, 0xFF);
pY++;
pCb++;
pCr++;
@ -111,6 +105,8 @@ static pstatus_t general_yCbCrToBGR_16s8u_P3AC4R(
const INT16* pCr = pSrc[2];
UINT32 srcPad = (srcStep - (roi->width * 2)) / 2;
UINT32 dstPad = (dstStep - (roi->width * 4)) / 4;
fkt_writePixel writePixel = getPixelWriteFunction(DstFormat);
const DWORD formatSize = GetBytesPerPixel(DstFormat);
for (y = 0; y < roi->height; y++)
{
@ -138,7 +134,7 @@ static pstatus_t general_yCbCrToBGR_16s8u_P3AC4R(
else if (B > 255)
B = 255;
pRGB = writePixel(pRGB, DstFormat, R, G, B);
pRGB = (*writePixel)(pRGB, formatSize, DstFormat, R, G, B, 0xFF);
pY++;
pCb++;
pCr++;
@ -303,6 +299,150 @@ static pstatus_t general_RGBToYCbCr_16s16s_P3P3(
return PRIMITIVES_SUCCESS;
}
static INLINE void writeScanlineGeneric(BYTE* dst, DWORD formatSize, UINT32 DstFormat,
const INT16* r, const INT16* g, const INT16* b, DWORD width)
{
DWORD x;
fkt_writePixel writePixel = getPixelWriteFunction(DstFormat);
for (x = 0; x < width; x++)
dst = (*writePixel)(dst, formatSize, DstFormat, *r++, *g++, *b++, 0xFF);
}
static INLINE void writeScanlineRGB(BYTE* dst, DWORD formatSize, UINT32 DstFormat,
const INT16* r, const INT16* g, const INT16* b, DWORD width)
{
DWORD x;
for (x = 0; x < width; x++)
{
const BYTE R = *r++;
const BYTE G = *g++;
const BYTE B = *b++;
*dst++ = R;
*dst++ = G;
*dst++ = B;
}
}
static INLINE void writeScanlineBGR(BYTE* dst, DWORD formatSize, UINT32 DstFormat,
const INT16* r, const INT16* g, const INT16* b, DWORD width)
{
DWORD x;
for (x = 0; x < width; x++)
{
const BYTE R = *r++;
const BYTE G = *g++;
const BYTE B = *b++;
*dst++ = B;
*dst++ = G;
*dst++ = R;
}
}
static INLINE void writeScanlineBGRX(BYTE* dst, DWORD formatSize, UINT32 DstFormat,
const INT16* r, const INT16* g, const INT16* b, DWORD width)
{
DWORD x;
for (x = 0; x < width; x++)
{
const BYTE R = *r++;
const BYTE G = *g++;
const BYTE B = *b++;
*dst++ = B;
*dst++ = G;
*dst++ = R;
*dst++ = 0xFF;
}
}
static INLINE void writeScanlineRGBX(BYTE* dst, DWORD formatSize, UINT32 DstFormat,
const INT16* r, const INT16* g, const INT16* b, DWORD width)
{
DWORD x;
for (x = 0; x < width; x++)
{
const BYTE R = *r++;
const BYTE G = *g++;
const BYTE B = *b++;
*dst++ = R;
*dst++ = G;
*dst++ = B;
*dst++ = 0xFF;
}
}
static INLINE void writeScanlineXBGR(BYTE* dst, DWORD formatSize, UINT32 DstFormat,
const INT16* r, const INT16* g, const INT16* b, DWORD width)
{
DWORD x;
for (x = 0; x < width; x++)
{
const BYTE R = *r++;
const BYTE G = *g++;
const BYTE B = *b++;
*dst++ = 0xFF;
*dst++ = B;
*dst++ = G;
*dst++ = R;
}
}
static INLINE void writeScanlineXRGB(BYTE* dst, DWORD formatSize, UINT32 DstFormat,
const INT16* r, const INT16* g, const INT16* b, DWORD width)
{
DWORD x;
for (x = 0; x < width; x++)
{
const BYTE R = *r++;
const BYTE G = *g++;
const BYTE B = *b++;
*dst++ = 0xFF;
*dst++ = R;
*dst++ = G;
*dst++ = B;
}
}
typedef void (*fkt_writeScanline)(BYTE*, DWORD, UINT32, const INT16*,
const INT16*, const INT16*, DWORD);
static INLINE fkt_writeScanline getScanlineWriteFunction(DWORD format)
{
switch (format)
{
case PIXEL_FORMAT_ARGB32:
case PIXEL_FORMAT_XRGB32:
return writeScanlineXRGB;
case PIXEL_FORMAT_ABGR32:
case PIXEL_FORMAT_XBGR32:
return writeScanlineXBGR;
case PIXEL_FORMAT_RGBA32:
case PIXEL_FORMAT_RGBX32:
return writeScanlineRGBX;
case PIXEL_FORMAT_BGRA32:
case PIXEL_FORMAT_BGRX32:
return writeScanlineBGRX;
case PIXEL_FORMAT_BGR24:
return writeScanlineBGR;
case PIXEL_FORMAT_RGB24:
return writeScanlineRGB;
default:
return writeScanlineGeneric;
}
}
/* ------------------------------------------------------------------------- */
static pstatus_t general_RGBToRGB_16s8u_P3AC4R(
const INT16* const pSrc[3], /* 16-bit R,G, and B arrays */
@ -315,20 +455,18 @@ static pstatus_t general_RGBToRGB_16s8u_P3AC4R(
const INT16* r = pSrc[0];
const INT16* g = pSrc[1];
const INT16* b = pSrc[2];
BYTE* dst = pDst;
UINT32 x, y;
UINT32 srcbump = (srcStep - (roi->width * sizeof(UINT16))) / sizeof(UINT16);
UINT32 dstbump = (dstStep - (roi->width * sizeof(UINT32)));
UINT32 y;
const DWORD srcAdd = srcStep / sizeof(INT16);
fkt_writeScanline writeScanline = getScanlineWriteFunction(DstFormat);
const DWORD formatSize = GetBytesPerPixel(DstFormat);
for (y = 0; y < roi->height; ++y)
{
for (x = 0; x < roi->width; ++x)
dst = writePixel(dst, DstFormat, *r++, *g++, *b++);
dst += dstbump;
r += srcbump;
g += srcbump;
b += srcbump;
(*writeScanline)(pDst, formatSize, DstFormat, r, g, b, roi->width);
pDst += dstStep;
r += srcAdd;
g += srcAdd;
b += srcAdd;
}
return PRIMITIVES_SUCCESS;

View File

@ -34,6 +34,81 @@
? _mm_lddqu_si128((__m128i *) (_ptr_)) \
: _mm_load_si128((__m128i *) (_ptr_)))
static INLINE BYTE* writePixelBGRX(BYTE* dst, DWORD formatSize, UINT32 format,
BYTE R, BYTE G, BYTE B, BYTE A)
{
*dst++ = B;
*dst++ = G;
*dst++ = R;
*dst++ = A;
return dst;
}
static INLINE BYTE* writePixelRGBX(BYTE* dst, DWORD formatSize, UINT32 format,
BYTE R, BYTE G, BYTE B, BYTE A)
{
*dst++ = R;
*dst++ = G;
*dst++ = B;
*dst++ = A;
return dst;
}
static INLINE BYTE* writePixelXBGR(BYTE* dst, DWORD formatSize, UINT32 format,
BYTE R, BYTE G, BYTE B, BYTE A)
{
*dst++ = A;
*dst++ = B;
*dst++ = G;
*dst++ = R;
return dst;
}
static INLINE BYTE* writePixelXRGB(BYTE* dst, DWORD formatSize, UINT32 format,
BYTE R, BYTE G, BYTE B, BYTE A)
{
*dst++ = A;
*dst++ = R;
*dst++ = G;
*dst++ = B;
return dst;
}
static INLINE BYTE* writePixelGeneric(BYTE* dst, DWORD formatSize, UINT32 format,
BYTE R, BYTE G, BYTE B, BYTE A)
{
UINT32 color = GetColor(format, R, G, B, A);
WriteColor(dst, format, color);
return dst + formatSize;
}
typedef BYTE* (*fkt_writePixel)(BYTE*, DWORD, UINT32, BYTE, BYTE, BYTE, BYTE);
static INLINE fkt_writePixel getPixelWriteFunction(DWORD format)
{
switch (format)
{
case PIXEL_FORMAT_ARGB32:
case PIXEL_FORMAT_XRGB32:
return writePixelXRGB;
case PIXEL_FORMAT_ABGR32:
case PIXEL_FORMAT_XBGR32:
return writePixelXBGR;
case PIXEL_FORMAT_RGBA32:
case PIXEL_FORMAT_RGBX32:
return writePixelRGBX;
case PIXEL_FORMAT_BGRA32:
case PIXEL_FORMAT_BGRX32:
return writePixelBGRX;
default:
return writePixelGeneric;
}
}
/* Function prototypes for all the init/deinit routines. */
FREERDP_LOCAL void primitives_init_copy(primitives_t* prims);
FREERDP_LOCAL void primitives_init_set(primitives_t* prims);