[primitives] add image copy primitive
* move freerdp_image_copy_no_overlap implementation to primitives * add SSE4.1 and AVX2 optimizations
This commit is contained in:
parent
2ee987e665
commit
311068e605
@ -104,6 +104,12 @@ typedef pstatus_t (*__add_16s_t)(const INT16* WINPR_RESTRICT pSrc1,
|
||||
UINT32 len);
|
||||
typedef pstatus_t (*__add_16s_inplace_t)(INT16* WINPR_RESTRICT pSrcDst1,
|
||||
INT16* WINPR_RESTRICT pSrcDst2, UINT32 len);
|
||||
typedef pstatus_t (*__copy_no_overlap_t)(BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat,
|
||||
UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, UINT32 nWidth,
|
||||
UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData,
|
||||
DWORD SrcFormat, UINT32 nSrcStep, UINT32 nXSrc,
|
||||
UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
|
||||
UINT32 flags);
|
||||
typedef pstatus_t (*__lShiftC_16s_inplace_t)(INT16* WINPR_RESTRICT pSrcDst, UINT32 val, UINT32 len);
|
||||
typedef pstatus_t (*__lShiftC_16s_t)(const INT16* pSrc, UINT32 val, INT16* pSrcDst, UINT32 len);
|
||||
typedef pstatus_t (*__lShiftC_16u_t)(const UINT16* pSrc, UINT32 val, UINT16* pSrcDst, UINT32 len);
|
||||
@ -222,6 +228,7 @@ typedef struct
|
||||
*/
|
||||
__add_16s_inplace_t add_16s_inplace;
|
||||
__lShiftC_16s_inplace_t lShiftC_16s_inplace;
|
||||
__copy_no_overlap_t copy_no_overlap;
|
||||
} primitives_t;
|
||||
|
||||
typedef enum
|
||||
|
@ -5,6 +5,7 @@ set(CODEC_SRCS
|
||||
bulk.h
|
||||
dsp.c
|
||||
color.c
|
||||
color.h
|
||||
audio.c
|
||||
planar.c
|
||||
bitmap.c
|
||||
|
@ -39,17 +39,9 @@
|
||||
#include <libswscale/swscale.h>
|
||||
#endif
|
||||
|
||||
#define TAG FREERDP_TAG("color")
|
||||
#include "color.h"
|
||||
|
||||
static INLINE BOOL FreeRDPWriteColorIgnoreAlpha_int(BYTE* WINPR_RESTRICT dst, UINT32 format,
|
||||
UINT32 color);
|
||||
static INLINE BOOL FreeRDPWriteColor_int(BYTE* WINPR_RESTRICT dst, UINT32 format, UINT32 color);
|
||||
static INLINE UINT32 FreeRDPReadColor_int(const BYTE* WINPR_RESTRICT src, UINT32 format);
|
||||
static INLINE DWORD FreeRDPAreColorFormatsEqualNoAlpha_int(DWORD first, DWORD second)
|
||||
{
|
||||
const DWORD mask = (DWORD) ~(8UL << 12UL);
|
||||
return (first & mask) == (second & mask);
|
||||
}
|
||||
#define TAG FREERDP_TAG("color")
|
||||
|
||||
BYTE* freerdp_glyph_convert(UINT32 width, UINT32 height, const BYTE* WINPR_RESTRICT data)
|
||||
{
|
||||
@ -733,102 +725,6 @@ static INLINE BOOL freerdp_image_copy_no_overlap_dst_alpha(
|
||||
srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset);
|
||||
}
|
||||
|
||||
BOOL freerdp_image_copy_no_overlap(BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep,
|
||||
UINT32 nXDst, UINT32 nYDst, UINT32 nWidth, UINT32 nHeight,
|
||||
const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
|
||||
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc,
|
||||
const gdiPalette* WINPR_RESTRICT palette, UINT32 flags)
|
||||
{
|
||||
const SSIZE_T dstByte = FreeRDPGetBytesPerPixel(DstFormat);
|
||||
const SSIZE_T srcByte = FreeRDPGetBytesPerPixel(SrcFormat);
|
||||
const SSIZE_T copyDstWidth = nWidth * dstByte;
|
||||
const SSIZE_T xSrcOffset = nXSrc * srcByte;
|
||||
const SSIZE_T xDstOffset = nXDst * dstByte;
|
||||
const BOOL vSrcVFlip = (flags & FREERDP_FLIP_VERTICAL) ? TRUE : FALSE;
|
||||
SSIZE_T srcVOffset = 0;
|
||||
SSIZE_T srcVMultiplier = 1;
|
||||
SSIZE_T dstVOffset = 0;
|
||||
SSIZE_T dstVMultiplier = 1;
|
||||
|
||||
if ((nWidth == 0) || (nHeight == 0))
|
||||
return TRUE;
|
||||
|
||||
if ((nHeight > INT32_MAX) || (nWidth > INT32_MAX))
|
||||
return FALSE;
|
||||
|
||||
if (!pDstData || !pSrcData)
|
||||
return FALSE;
|
||||
|
||||
if (nDstStep == 0)
|
||||
nDstStep = nWidth * FreeRDPGetBytesPerPixel(DstFormat);
|
||||
|
||||
if (nSrcStep == 0)
|
||||
nSrcStep = nWidth * FreeRDPGetBytesPerPixel(SrcFormat);
|
||||
|
||||
if (vSrcVFlip)
|
||||
{
|
||||
srcVOffset = (nHeight - 1ll) * nSrcStep;
|
||||
srcVMultiplier = -1;
|
||||
}
|
||||
|
||||
if (((flags & FREERDP_KEEP_DST_ALPHA) != 0) && FreeRDPColorHasAlpha(DstFormat))
|
||||
return freerdp_image_copy_no_overlap_dst_alpha(
|
||||
pDstData, DstFormat, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, SrcFormat,
|
||||
nSrcStep, nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset, dstVMultiplier,
|
||||
dstVOffset);
|
||||
else if (FreeRDPAreColorFormatsEqualNoAlpha_int(SrcFormat, DstFormat))
|
||||
{
|
||||
if (!vSrcVFlip && (nDstStep == nSrcStep) && (xSrcOffset == 0) && (xDstOffset == 0))
|
||||
{
|
||||
const void* src = &pSrcData[1ull * nYSrc * nSrcStep];
|
||||
void* dst = &pDstData[1ull * nYDst * nDstStep];
|
||||
memcpy(dst, src, 1ull * nDstStep * nHeight);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (SSIZE_T y = 0; y < nHeight; y++)
|
||||
{
|
||||
const BYTE* WINPR_RESTRICT srcLine =
|
||||
&pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
|
||||
BYTE* WINPR_RESTRICT dstLine =
|
||||
&pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
|
||||
memcpy(&dstLine[xDstOffset], &srcLine[xSrcOffset], copyDstWidth);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (SSIZE_T y = 0; y < nHeight; y++)
|
||||
{
|
||||
const BYTE* WINPR_RESTRICT srcLine =
|
||||
&pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
|
||||
BYTE* WINPR_RESTRICT dstLine =
|
||||
&pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
|
||||
|
||||
UINT32 color = FreeRDPReadColor_int(&srcLine[nXSrc * srcByte], SrcFormat);
|
||||
UINT32 oldColor = color;
|
||||
UINT32 dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette);
|
||||
FreeRDPWriteColor_int(&dstLine[nXDst * dstByte], DstFormat, dstColor);
|
||||
for (SSIZE_T x = 1; x < nWidth; x++)
|
||||
{
|
||||
color = FreeRDPReadColor_int(&srcLine[(x + nXSrc) * srcByte], SrcFormat);
|
||||
if (color == oldColor)
|
||||
{
|
||||
FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor);
|
||||
}
|
||||
else
|
||||
{
|
||||
oldColor = color;
|
||||
dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette);
|
||||
FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
BOOL freerdp_image_copy_overlap(BYTE* pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst,
|
||||
UINT32 nYDst, UINT32 nWidth, UINT32 nHeight, const BYTE* pSrcData,
|
||||
DWORD SrcFormat, UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc,
|
||||
@ -1608,124 +1504,16 @@ BOOL FreeRDPWriteColorIgnoreAlpha(BYTE* WINPR_RESTRICT dst, UINT32 format, UINT3
|
||||
return FreeRDPWriteColorIgnoreAlpha_int(dst, format, color);
|
||||
}
|
||||
|
||||
BOOL FreeRDPWriteColorIgnoreAlpha_int(BYTE* WINPR_RESTRICT dst, UINT32 format, UINT32 color)
|
||||
{
|
||||
switch (format)
|
||||
{
|
||||
case PIXEL_FORMAT_XBGR32:
|
||||
case PIXEL_FORMAT_XRGB32:
|
||||
case PIXEL_FORMAT_ABGR32:
|
||||
case PIXEL_FORMAT_ARGB32:
|
||||
{
|
||||
const UINT32 tmp = ((UINT32)dst[0] << 24ULL) | (color & 0x00FFFFFFULL);
|
||||
return FreeRDPWriteColor_int(dst, format, tmp);
|
||||
}
|
||||
case PIXEL_FORMAT_BGRX32:
|
||||
case PIXEL_FORMAT_RGBX32:
|
||||
case PIXEL_FORMAT_BGRA32:
|
||||
case PIXEL_FORMAT_RGBA32:
|
||||
{
|
||||
const UINT32 tmp = ((UINT32)dst[3]) | (color & 0xFFFFFF00ULL);
|
||||
return FreeRDPWriteColor_int(dst, format, tmp);
|
||||
}
|
||||
default:
|
||||
return FreeRDPWriteColor_int(dst, format, color);
|
||||
}
|
||||
}
|
||||
|
||||
BOOL FreeRDPWriteColor(BYTE* WINPR_RESTRICT dst, UINT32 format, UINT32 color)
|
||||
{
|
||||
return FreeRDPWriteColor_int(dst, format, color);
|
||||
}
|
||||
BOOL FreeRDPWriteColor_int(BYTE* WINPR_RESTRICT dst, UINT32 format, UINT32 color)
|
||||
{
|
||||
switch (FreeRDPGetBitsPerPixel(format))
|
||||
{
|
||||
case 32:
|
||||
dst[0] = (BYTE)(color >> 24);
|
||||
dst[1] = (BYTE)(color >> 16);
|
||||
dst[2] = (BYTE)(color >> 8);
|
||||
dst[3] = (BYTE)color;
|
||||
break;
|
||||
|
||||
case 24:
|
||||
dst[0] = (BYTE)(color >> 16);
|
||||
dst[1] = (BYTE)(color >> 8);
|
||||
dst[2] = (BYTE)color;
|
||||
break;
|
||||
|
||||
case 16:
|
||||
dst[1] = (BYTE)(color >> 8);
|
||||
dst[0] = (BYTE)color;
|
||||
break;
|
||||
|
||||
case 15:
|
||||
if (!FreeRDPColorHasAlpha(format))
|
||||
color = color & 0x7FFF;
|
||||
|
||||
dst[1] = (BYTE)(color >> 8);
|
||||
dst[0] = (BYTE)color;
|
||||
break;
|
||||
|
||||
case 8:
|
||||
dst[0] = (BYTE)color;
|
||||
break;
|
||||
|
||||
default:
|
||||
WLog_ERR(TAG, "Unsupported format %s", FreeRDPGetColorFormatName(format));
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
UINT32 FreeRDPReadColor(const BYTE* WINPR_RESTRICT src, UINT32 format)
|
||||
{
|
||||
return FreeRDPReadColor_int(src, format);
|
||||
}
|
||||
|
||||
UINT32 FreeRDPReadColor_int(const BYTE* WINPR_RESTRICT src, UINT32 format)
|
||||
{
|
||||
UINT32 color = 0;
|
||||
|
||||
switch (FreeRDPGetBitsPerPixel(format))
|
||||
{
|
||||
case 32:
|
||||
color =
|
||||
((UINT32)src[0] << 24) | ((UINT32)src[1] << 16) | ((UINT32)src[2] << 8) | src[3];
|
||||
break;
|
||||
|
||||
case 24:
|
||||
color = ((UINT32)src[0] << 16) | ((UINT32)src[1] << 8) | src[2];
|
||||
break;
|
||||
|
||||
case 16:
|
||||
color = ((UINT32)src[1] << 8) | src[0];
|
||||
break;
|
||||
|
||||
case 15:
|
||||
color = ((UINT32)src[1] << 8) | src[0];
|
||||
|
||||
if (!FreeRDPColorHasAlpha(format))
|
||||
color = color & 0x7FFF;
|
||||
|
||||
break;
|
||||
|
||||
case 8:
|
||||
case 4:
|
||||
case 1:
|
||||
color = *src;
|
||||
break;
|
||||
|
||||
default:
|
||||
WLog_ERR(TAG, "Unsupported format %s", FreeRDPGetColorFormatName(format));
|
||||
color = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
return color;
|
||||
}
|
||||
|
||||
UINT32 FreeRDPGetColor(UINT32 format, BYTE r, BYTE g, BYTE b, BYTE a)
|
||||
{
|
||||
UINT32 _r = r;
|
||||
@ -1817,3 +1605,20 @@ UINT32 FreeRDPGetColor(UINT32 format, BYTE r, BYTE g, BYTE b, BYTE a)
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
BOOL freerdp_image_copy_no_overlap(BYTE* pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst,
|
||||
UINT32 nYDst, UINT32 nWidth, UINT32 nHeight,
|
||||
const BYTE* pSrcData, DWORD SrcFormat, UINT32 nSrcStep,
|
||||
UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* palette,
|
||||
UINT32 flags)
|
||||
{
|
||||
static primitives_t* prims = NULL;
|
||||
if (!prims)
|
||||
prims = primitives_get();
|
||||
|
||||
WINPR_ASSERT(prims);
|
||||
WINPR_ASSERT(prims->copy_no_overlap);
|
||||
return prims->copy_no_overlap(pDstData, DstFormat, nDstStep, nXDst, nYDst, nWidth, nHeight,
|
||||
pSrcData, SrcFormat, nSrcStep, nXSrc, nYSrc, palette,
|
||||
flags) == PRIMITIVES_SUCCESS;
|
||||
}
|
||||
|
147
libfreerdp/codec/color.h
Normal file
147
libfreerdp/codec/color.h
Normal file
@ -0,0 +1,147 @@
|
||||
/**
|
||||
* FreeRDP: A Remote Desktop Protocol Implementation
|
||||
* codec color
|
||||
*
|
||||
* Copyright 2024 Armin Novak <anovak@thincast.com>
|
||||
* Copyright 2024 Thincast Technologies GmbH
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef FREERDP_LIB_CODEC_COLOR_H
|
||||
#define FREERDP_LIB_CODEC_COLOR_H
|
||||
|
||||
#include <winpr/winpr.h>
|
||||
#include <winpr/wtypes.h>
|
||||
|
||||
#include <freerdp/codec/color.h>
|
||||
#include <freerdp/log.h>
|
||||
|
||||
#define INT_COLOR_TAG FREERDP_TAG("codec.color.h")
|
||||
|
||||
static INLINE DWORD FreeRDPAreColorFormatsEqualNoAlpha_int(DWORD first, DWORD second)
|
||||
{
|
||||
const DWORD mask = (DWORD) ~(8UL << 12UL);
|
||||
return (first & mask) == (second & mask);
|
||||
}
|
||||
|
||||
static INLINE BOOL FreeRDPWriteColor_int(BYTE* WINPR_RESTRICT dst, UINT32 format, UINT32 color)
|
||||
{
|
||||
switch (FreeRDPGetBitsPerPixel(format))
|
||||
{
|
||||
case 32:
|
||||
dst[0] = (BYTE)(color >> 24);
|
||||
dst[1] = (BYTE)(color >> 16);
|
||||
dst[2] = (BYTE)(color >> 8);
|
||||
dst[3] = (BYTE)color;
|
||||
break;
|
||||
|
||||
case 24:
|
||||
dst[0] = (BYTE)(color >> 16);
|
||||
dst[1] = (BYTE)(color >> 8);
|
||||
dst[2] = (BYTE)color;
|
||||
break;
|
||||
|
||||
case 16:
|
||||
dst[1] = (BYTE)(color >> 8);
|
||||
dst[0] = (BYTE)color;
|
||||
break;
|
||||
|
||||
case 15:
|
||||
if (!FreeRDPColorHasAlpha(format))
|
||||
color = color & 0x7FFF;
|
||||
|
||||
dst[1] = (BYTE)(color >> 8);
|
||||
dst[0] = (BYTE)color;
|
||||
break;
|
||||
|
||||
case 8:
|
||||
dst[0] = (BYTE)color;
|
||||
break;
|
||||
|
||||
default:
|
||||
WLog_ERR(INT_COLOR_TAG, "Unsupported format %s", FreeRDPGetColorFormatName(format));
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static INLINE BOOL FreeRDPWriteColorIgnoreAlpha_int(BYTE* WINPR_RESTRICT dst, UINT32 format,
|
||||
UINT32 color)
|
||||
{
|
||||
switch (format)
|
||||
{
|
||||
case PIXEL_FORMAT_XBGR32:
|
||||
case PIXEL_FORMAT_XRGB32:
|
||||
case PIXEL_FORMAT_ABGR32:
|
||||
case PIXEL_FORMAT_ARGB32:
|
||||
{
|
||||
const UINT32 tmp = ((UINT32)dst[0] << 24ULL) | (color & 0x00FFFFFFULL);
|
||||
return FreeRDPWriteColor_int(dst, format, tmp);
|
||||
}
|
||||
case PIXEL_FORMAT_BGRX32:
|
||||
case PIXEL_FORMAT_RGBX32:
|
||||
case PIXEL_FORMAT_BGRA32:
|
||||
case PIXEL_FORMAT_RGBA32:
|
||||
{
|
||||
const UINT32 tmp = ((UINT32)dst[3]) | (color & 0xFFFFFF00ULL);
|
||||
return FreeRDPWriteColor_int(dst, format, tmp);
|
||||
}
|
||||
default:
|
||||
return FreeRDPWriteColor_int(dst, format, color);
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE UINT32 FreeRDPReadColor_int(const BYTE* WINPR_RESTRICT src, UINT32 format)
|
||||
{
|
||||
UINT32 color = 0;
|
||||
|
||||
switch (FreeRDPGetBitsPerPixel(format))
|
||||
{
|
||||
case 32:
|
||||
color =
|
||||
((UINT32)src[0] << 24) | ((UINT32)src[1] << 16) | ((UINT32)src[2] << 8) | src[3];
|
||||
break;
|
||||
|
||||
case 24:
|
||||
color = ((UINT32)src[0] << 16) | ((UINT32)src[1] << 8) | src[2];
|
||||
break;
|
||||
|
||||
case 16:
|
||||
color = ((UINT32)src[1] << 8) | src[0];
|
||||
break;
|
||||
|
||||
case 15:
|
||||
color = ((UINT32)src[1] << 8) | src[0];
|
||||
|
||||
if (!FreeRDPColorHasAlpha(format))
|
||||
color = color & 0x7FFF;
|
||||
|
||||
break;
|
||||
|
||||
case 8:
|
||||
case 4:
|
||||
case 1:
|
||||
color = *src;
|
||||
break;
|
||||
|
||||
default:
|
||||
WLog_ERR(INT_COLOR_TAG, "Unsupported format %s", FreeRDPGetColorFormatName(format));
|
||||
color = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
return color;
|
||||
}
|
||||
|
||||
#endif
|
@ -6,6 +6,7 @@ set(PRIMITIVES_SRCS
|
||||
prim_alphaComp.c
|
||||
prim_colors.c
|
||||
prim_copy.c
|
||||
prim_copy.h
|
||||
prim_set.c
|
||||
prim_shift.c
|
||||
prim_sign.c
|
||||
@ -17,6 +18,8 @@ set(PRIMITIVES_SRCS
|
||||
if (WITH_SSE2 OR WITH_NEON)
|
||||
set(PRIMITIVES_SSE2_SRCS
|
||||
prim_colors_opt.c
|
||||
prim_copy_sse.c
|
||||
prim_copy_avx2.c
|
||||
prim_set_opt.c)
|
||||
|
||||
set(PRIMITIVES_SSE3_SRCS
|
||||
@ -71,6 +74,8 @@ if(WITH_SSE2)
|
||||
if (PRIMITIVES_SSSE3_SRCS)
|
||||
set_source_files_properties(${PRIMITIVES_SSSE3_SRCS} PROPERTIES COMPILE_FLAGS "-mssse3" )
|
||||
endif()
|
||||
set_source_files_properties(prim_copy_sse.c PROPERTIES COMPILE_FLAGS "-msse4.1" )
|
||||
set_source_files_properties(prim_copy_avx2.c PROPERTIES COMPILE_FLAGS "-mavx2" )
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
|
@ -18,7 +18,15 @@
|
||||
#include <string.h>
|
||||
#include <freerdp/types.h>
|
||||
#include <freerdp/primitives.h>
|
||||
#include <freerdp/log.h>
|
||||
|
||||
#include "prim_internal.h"
|
||||
#include "prim_copy.h"
|
||||
#include "../codec/color.h"
|
||||
|
||||
#include <freerdp/codec/color.h>
|
||||
|
||||
#define TAG FREERDP_TAG("primitives.copy")
|
||||
|
||||
static primitives_t* generic = NULL;
|
||||
|
||||
@ -128,6 +136,247 @@ static pstatus_t general_copy_8u_AC4r(const BYTE* pSrc, INT32 srcStep, BYTE* pDs
|
||||
return PRIMITIVES_SUCCESS;
|
||||
}
|
||||
|
||||
static INLINE pstatus_t generic_image_copy_bgr24_bgrx32(BYTE* WINPR_RESTRICT pDstData,
|
||||
UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
|
||||
UINT32 nWidth, UINT32 nHeight,
|
||||
const BYTE* WINPR_RESTRICT pSrcData,
|
||||
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc,
|
||||
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset,
|
||||
SSIZE_T dstVMultiplier, SSIZE_T dstVOffset)
|
||||
{
|
||||
|
||||
const SSIZE_T srcByte = 3;
|
||||
const SSIZE_T dstByte = 4;
|
||||
|
||||
for (SSIZE_T y = 0; y < nHeight; y++)
|
||||
{
|
||||
const BYTE* WINPR_RESTRICT srcLine =
|
||||
&pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
|
||||
BYTE* WINPR_RESTRICT dstLine =
|
||||
&pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
|
||||
|
||||
for (SSIZE_T x = 0; x < nWidth; x++)
|
||||
{
|
||||
dstLine[(x + nXDst) * dstByte + 0] = srcLine[(x + nXSrc) * srcByte + 0];
|
||||
dstLine[(x + nXDst) * dstByte + 1] = srcLine[(x + nXSrc) * srcByte + 1];
|
||||
dstLine[(x + nXDst) * dstByte + 2] = srcLine[(x + nXSrc) * srcByte + 2];
|
||||
}
|
||||
}
|
||||
|
||||
return PRIMITIVES_SUCCESS;
|
||||
}
|
||||
|
||||
static INLINE pstatus_t generic_image_copy_bgrx32_bgrx32(
|
||||
BYTE* WINPR_RESTRICT pDstData, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, UINT32 nWidth,
|
||||
UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, UINT32 nSrcStep, UINT32 nXSrc,
|
||||
UINT32 nYSrc, SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier,
|
||||
SSIZE_T dstVOffset)
|
||||
{
|
||||
|
||||
const SSIZE_T srcByte = 4;
|
||||
const SSIZE_T dstByte = 4;
|
||||
|
||||
for (SSIZE_T y = 0; y < nHeight; y++)
|
||||
{
|
||||
const BYTE* WINPR_RESTRICT srcLine =
|
||||
&pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
|
||||
BYTE* WINPR_RESTRICT dstLine =
|
||||
&pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
|
||||
|
||||
for (SSIZE_T x = 0; x < nWidth; x++)
|
||||
{
|
||||
dstLine[(x + nXDst) * dstByte + 0] = srcLine[(x + nXSrc) * srcByte + 0];
|
||||
dstLine[(x + nXDst) * dstByte + 1] = srcLine[(x + nXSrc) * srcByte + 1];
|
||||
dstLine[(x + nXDst) * dstByte + 2] = srcLine[(x + nXSrc) * srcByte + 2];
|
||||
}
|
||||
}
|
||||
|
||||
return PRIMITIVES_SUCCESS;
|
||||
}
|
||||
|
||||
pstatus_t generic_image_copy_no_overlap_convert(
|
||||
BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
|
||||
UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
|
||||
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
|
||||
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset)
|
||||
{
|
||||
const SSIZE_T srcByte = FreeRDPGetBytesPerPixel(SrcFormat);
|
||||
const SSIZE_T dstByte = FreeRDPGetBytesPerPixel(DstFormat);
|
||||
|
||||
const UINT32 width = nWidth - nWidth % 8;
|
||||
for (SSIZE_T y = 0; y < nHeight; y++)
|
||||
{
|
||||
const BYTE* WINPR_RESTRICT srcLine =
|
||||
&pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
|
||||
BYTE* WINPR_RESTRICT dstLine =
|
||||
&pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
|
||||
|
||||
SSIZE_T x = 0;
|
||||
WINPR_PRAGMA_UNROLL_LOOP
|
||||
for (; x < width; x++)
|
||||
{
|
||||
const UINT32 color = FreeRDPReadColor_int(&srcLine[(x + nXSrc) * srcByte], SrcFormat);
|
||||
const UINT32 dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette);
|
||||
FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor);
|
||||
}
|
||||
for (; x < nWidth; x++)
|
||||
{
|
||||
const UINT32 color = FreeRDPReadColor_int(&srcLine[(x + nXSrc) * srcByte], SrcFormat);
|
||||
const UINT32 dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette);
|
||||
FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor);
|
||||
}
|
||||
}
|
||||
return PRIMITIVES_SUCCESS;
|
||||
}
|
||||
|
||||
pstatus_t generic_image_copy_no_overlap_memcpy(
|
||||
BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
|
||||
UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
|
||||
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
|
||||
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset,
|
||||
UINT32 flags)
|
||||
{
|
||||
const BOOL vSrcVFlip = (flags & FREERDP_FLIP_VERTICAL) ? TRUE : FALSE;
|
||||
const SSIZE_T dstByte = FreeRDPGetBytesPerPixel(DstFormat);
|
||||
const SSIZE_T srcByte = FreeRDPGetBytesPerPixel(SrcFormat);
|
||||
const SSIZE_T copyDstWidth = nWidth * dstByte;
|
||||
const SSIZE_T xSrcOffset = nXSrc * srcByte;
|
||||
const SSIZE_T xDstOffset = nXDst * dstByte;
|
||||
|
||||
if (!vSrcVFlip && (nDstStep == nSrcStep) && (xSrcOffset == 0) && (xDstOffset == 0))
|
||||
{
|
||||
const void* src = &pSrcData[1ull * nYSrc * nSrcStep];
|
||||
void* dst = &pDstData[1ull * nYDst * nDstStep];
|
||||
memcpy(dst, src, 1ull * nDstStep * nHeight);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (SSIZE_T y = 0; y < nHeight; y++)
|
||||
{
|
||||
const BYTE* WINPR_RESTRICT srcLine =
|
||||
&pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
|
||||
BYTE* WINPR_RESTRICT dstLine =
|
||||
&pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
|
||||
memcpy(&dstLine[xDstOffset], &srcLine[xSrcOffset], copyDstWidth);
|
||||
}
|
||||
}
|
||||
|
||||
return PRIMITIVES_SUCCESS;
|
||||
}
|
||||
|
||||
static INLINE pstatus_t generic_image_copy_no_overlap_dst_alpha(
|
||||
BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
|
||||
UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
|
||||
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
|
||||
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset)
|
||||
{
|
||||
WINPR_ASSERT(pDstData);
|
||||
WINPR_ASSERT(pSrcData);
|
||||
|
||||
switch (SrcFormat)
|
||||
{
|
||||
case PIXEL_FORMAT_BGR24:
|
||||
switch (DstFormat)
|
||||
{
|
||||
case PIXEL_FORMAT_BGRX32:
|
||||
case PIXEL_FORMAT_BGRA32:
|
||||
return generic_image_copy_bgr24_bgrx32(
|
||||
pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep,
|
||||
nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case PIXEL_FORMAT_BGRX32:
|
||||
case PIXEL_FORMAT_BGRA32:
|
||||
switch (DstFormat)
|
||||
{
|
||||
case PIXEL_FORMAT_BGRX32:
|
||||
case PIXEL_FORMAT_BGRA32:
|
||||
return generic_image_copy_bgrx32_bgrx32(
|
||||
pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep,
|
||||
nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return generic_image_copy_no_overlap_convert(
|
||||
pDstData, DstFormat, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, SrcFormat, nSrcStep,
|
||||
nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset);
|
||||
}
|
||||
|
||||
static INLINE pstatus_t generic_image_copy_no_overlap_no_alpha(
|
||||
BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
|
||||
UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
|
||||
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
|
||||
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset,
|
||||
UINT32 flags)
|
||||
{
|
||||
if (FreeRDPAreColorFormatsEqualNoAlpha(SrcFormat, DstFormat))
|
||||
return generic_image_copy_no_overlap_memcpy(pDstData, DstFormat, nDstStep, nXDst, nYDst,
|
||||
nWidth, nHeight, pSrcData, SrcFormat, nSrcStep,
|
||||
nXSrc, nYSrc, palette, srcVMultiplier,
|
||||
srcVOffset, dstVMultiplier, dstVOffset, flags);
|
||||
else
|
||||
return generic_image_copy_no_overlap_convert(pDstData, DstFormat, nDstStep, nXDst, nYDst,
|
||||
nWidth, nHeight, pSrcData, SrcFormat, nSrcStep,
|
||||
nXSrc, nYSrc, palette, srcVMultiplier,
|
||||
srcVOffset, dstVMultiplier, dstVOffset);
|
||||
}
|
||||
|
||||
static pstatus_t generic_image_copy_no_overlap(BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat,
|
||||
UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
|
||||
UINT32 nWidth, UINT32 nHeight,
|
||||
const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
|
||||
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc,
|
||||
const gdiPalette* WINPR_RESTRICT palette,
|
||||
UINT32 flags)
|
||||
{
|
||||
const BOOL vSrcVFlip = (flags & FREERDP_FLIP_VERTICAL) ? TRUE : FALSE;
|
||||
SSIZE_T srcVOffset = 0;
|
||||
SSIZE_T srcVMultiplier = 1;
|
||||
SSIZE_T dstVOffset = 0;
|
||||
SSIZE_T dstVMultiplier = 1;
|
||||
|
||||
if ((nWidth == 0) || (nHeight == 0))
|
||||
return PRIMITIVES_SUCCESS;
|
||||
|
||||
if ((nHeight > INT32_MAX) || (nWidth > INT32_MAX))
|
||||
return -1;
|
||||
|
||||
if (!pDstData || !pSrcData)
|
||||
return -1;
|
||||
|
||||
if (nDstStep == 0)
|
||||
nDstStep = nWidth * FreeRDPGetBytesPerPixel(DstFormat);
|
||||
|
||||
if (nSrcStep == 0)
|
||||
nSrcStep = nWidth * FreeRDPGetBytesPerPixel(SrcFormat);
|
||||
|
||||
if (vSrcVFlip)
|
||||
{
|
||||
srcVOffset = (nHeight - 1ll) * nSrcStep;
|
||||
srcVMultiplier = -1;
|
||||
}
|
||||
|
||||
if (((flags & FREERDP_KEEP_DST_ALPHA) != 0) && FreeRDPColorHasAlpha(DstFormat))
|
||||
return generic_image_copy_no_overlap_dst_alpha(
|
||||
pDstData, DstFormat, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, SrcFormat,
|
||||
nSrcStep, nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset, dstVMultiplier,
|
||||
dstVOffset);
|
||||
else
|
||||
return generic_image_copy_no_overlap_no_alpha(
|
||||
pDstData, DstFormat, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, SrcFormat,
|
||||
nSrcStep, nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset,
|
||||
flags);
|
||||
|
||||
return PRIMITIVES_SUCCESS;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_copy(primitives_t* prims)
|
||||
{
|
||||
@ -136,6 +385,7 @@ void primitives_init_copy(primitives_t* prims)
|
||||
prims->copy_8u_AC4r = general_copy_8u_AC4r;
|
||||
/* This is just an alias with void* parameters */
|
||||
prims->copy = (__copy_t)(prims->copy_8u);
|
||||
prims->copy_no_overlap = generic_image_copy_no_overlap;
|
||||
}
|
||||
|
||||
#if defined(WITH_SSE2) || defined(WITH_NEON)
|
||||
@ -153,5 +403,7 @@ void primitives_init_copy_opt(primitives_t* prims)
|
||||
*/
|
||||
/* This is just an alias with void* parameters */
|
||||
prims->copy = (__copy_t)(prims->copy_8u);
|
||||
primitives_init_copy_sse(prims);
|
||||
primitives_init_copy_avx2(prims);
|
||||
}
|
||||
#endif
|
||||
|
42
libfreerdp/primitives/prim_copy.h
Normal file
42
libfreerdp/primitives/prim_copy.h
Normal file
@ -0,0 +1,42 @@
|
||||
/**
|
||||
* FreeRDP: A Remote Desktop Protocol Implementation
|
||||
* Primitives copy
|
||||
*
|
||||
* Copyright 2024 Armin Novak <anovak@thincast.com>
|
||||
* Copyright 2024 Thincast Technologies GmbH
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef FREERDP_LIB_PRIM_COPY_H
|
||||
#define FREERDP_LIB_PRIM_COPY_H
|
||||
|
||||
#include <winpr/wtypes.h>
|
||||
#include <freerdp/primitives.h>
|
||||
|
||||
pstatus_t generic_image_copy_no_overlap_convert(
|
||||
BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
|
||||
UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
|
||||
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
|
||||
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset);
|
||||
|
||||
pstatus_t generic_image_copy_no_overlap_memcpy(
|
||||
BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
|
||||
UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
|
||||
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
|
||||
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset,
|
||||
UINT32 flags);
|
||||
|
||||
extern void primitives_init_copy_sse(primitives_t* prims);
|
||||
extern void primitives_init_copy_avx2(primitives_t* prims);
|
||||
#endif
|
276
libfreerdp/primitives/prim_copy_avx2.c
Normal file
276
libfreerdp/primitives/prim_copy_avx2.c
Normal file
@ -0,0 +1,276 @@
|
||||
/* FreeRDP: A Remote Desktop Protocol Client
|
||||
* Copy operations.
|
||||
* vi:ts=4 sw=4:
|
||||
*
|
||||
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License. You may obtain
|
||||
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
* or implied. See the License for the specific language governing
|
||||
* permissions and limitations under the License.
|
||||
*/
|
||||
|
||||
#include <winpr/sysinfo.h>
|
||||
|
||||
#include <freerdp/config.h>
|
||||
|
||||
#include <string.h>
|
||||
#include <freerdp/types.h>
|
||||
#include <freerdp/primitives.h>
|
||||
#include <freerdp/log.h>
|
||||
|
||||
#include "prim_internal.h"
|
||||
#include "prim_copy.h"
|
||||
#include "../codec/color.h"
|
||||
|
||||
#include <freerdp/codec/color.h>
|
||||
|
||||
#define TAG FREERDP_TAG("primitives.copy")
|
||||
|
||||
#if defined(WITH_SSE2)
|
||||
#include <emmintrin.h>
|
||||
#include <immintrin.h>
|
||||
|
||||
static INLINE pstatus_t avx2_image_copy_bgr24_bgrx32(BYTE* WINPR_RESTRICT pDstData, UINT32 nDstStep,
|
||||
UINT32 nXDst, UINT32 nYDst, UINT32 nWidth,
|
||||
UINT32 nHeight,
|
||||
const BYTE* WINPR_RESTRICT pSrcData,
|
||||
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc,
|
||||
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset,
|
||||
SSIZE_T dstVMultiplier, SSIZE_T dstVOffset)
|
||||
{
|
||||
|
||||
const SSIZE_T srcByte = 3;
|
||||
const SSIZE_T dstByte = 4;
|
||||
|
||||
const __m256i mask = _mm256_set_epi32(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
|
||||
const SSIZE_T rem = nWidth % 8;
|
||||
const SSIZE_T width = nWidth - rem;
|
||||
for (SSIZE_T y = 0; y < nHeight; y++)
|
||||
{
|
||||
const BYTE* WINPR_RESTRICT srcLine =
|
||||
&pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
|
||||
BYTE* WINPR_RESTRICT dstLine =
|
||||
&pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
|
||||
|
||||
SSIZE_T x = 0;
|
||||
for (; x < width; x += 8)
|
||||
{
|
||||
const __m256i* src = (const __m256i*)&srcLine[(x + nXSrc) * srcByte];
|
||||
__m256i* dst = (__m256i*)&dstLine[(x + nXDst) * dstByte];
|
||||
const __m256i s0 = _mm256_loadu_si256(src);
|
||||
const __m256i s1 = _mm256_loadu_si256(dst);
|
||||
const __m256i s2 = _mm256_shuffle_epi8(s1, mask);
|
||||
__m256i d0 = _mm256_blendv_epi8(s2, s0, mask);
|
||||
_mm256_storeu_si256(dst, d0);
|
||||
}
|
||||
for (; x < nWidth; x++)
|
||||
{
|
||||
const BYTE* src = &srcLine[(x + nXSrc) * srcByte];
|
||||
BYTE* dst = &dstLine[(x + nXDst) * dstByte];
|
||||
*dst++ = *src++;
|
||||
*dst++ = *src++;
|
||||
*dst++ = *src++;
|
||||
}
|
||||
}
|
||||
|
||||
return PRIMITIVES_SUCCESS;
|
||||
}
|
||||
|
||||
static INLINE pstatus_t avx2_image_copy_bgrx32_bgrx32(BYTE* WINPR_RESTRICT pDstData,
|
||||
UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
|
||||
UINT32 nWidth, UINT32 nHeight,
|
||||
const BYTE* WINPR_RESTRICT pSrcData,
|
||||
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc,
|
||||
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset,
|
||||
SSIZE_T dstVMultiplier, SSIZE_T dstVOffset)
|
||||
{
|
||||
|
||||
const SSIZE_T srcByte = 4;
|
||||
const SSIZE_T dstByte = 4;
|
||||
|
||||
const __m256i mask =
|
||||
_mm256_setr_epi8(0xFF, 0xFF, 0xFF, 0x00, 0xFF, 0xFF, 0xFF, 0x00, 0xFF, 0xFF, 0xFF, 0x00,
|
||||
0xFF, 0xFF, 0xFF, 0x00, 0xFF, 0xFF, 0xFF, 0x00, 0xFF, 0xFF, 0xFF, 0x00,
|
||||
0xFF, 0xFF, 0xFF, 0x00, 0xFF, 0xFF, 0xFF, 0x00);
|
||||
const SSIZE_T rem = nWidth % 8;
|
||||
const SSIZE_T width = nWidth - rem;
|
||||
for (SSIZE_T y = 0; y < nHeight; y++)
|
||||
{
|
||||
const BYTE* WINPR_RESTRICT srcLine =
|
||||
&pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
|
||||
BYTE* WINPR_RESTRICT dstLine =
|
||||
&pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
|
||||
|
||||
SSIZE_T x = 0;
|
||||
for (; x < width; x += 8)
|
||||
{
|
||||
const __m256i* src = (const __m256i*)&srcLine[(x + nXSrc) * srcByte];
|
||||
__m256i* dst = (__m256i*)&dstLine[(x + nXDst) * dstByte];
|
||||
const __m256i s0 = _mm256_loadu_si256(src);
|
||||
const __m256i s1 = _mm256_loadu_si256(dst);
|
||||
__m256i d0 = _mm256_blendv_epi8(s1, s0, mask);
|
||||
_mm256_storeu_si256(dst, d0);
|
||||
}
|
||||
|
||||
for (; x < nWidth; x++)
|
||||
{
|
||||
const BYTE* src = &srcLine[(x + nXSrc) * srcByte];
|
||||
BYTE* dst = &dstLine[(x + nXDst) * dstByte];
|
||||
*dst++ = *src++;
|
||||
*dst++ = *src++;
|
||||
*dst++ = *src++;
|
||||
}
|
||||
}
|
||||
|
||||
return PRIMITIVES_SUCCESS;
|
||||
}
|
||||
|
||||
static pstatus_t avx2_image_copy_no_overlap_dst_alpha(
|
||||
BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
|
||||
UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
|
||||
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
|
||||
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset)
|
||||
{
|
||||
WINPR_ASSERT(pDstData);
|
||||
WINPR_ASSERT(pSrcData);
|
||||
|
||||
switch (SrcFormat)
|
||||
{
|
||||
case PIXEL_FORMAT_BGR24:
|
||||
switch (DstFormat)
|
||||
{
|
||||
case PIXEL_FORMAT_BGRX32:
|
||||
case PIXEL_FORMAT_BGRA32:
|
||||
return avx2_image_copy_bgr24_bgrx32(
|
||||
pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep,
|
||||
nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case PIXEL_FORMAT_BGRX32:
|
||||
case PIXEL_FORMAT_BGRA32:
|
||||
switch (DstFormat)
|
||||
{
|
||||
case PIXEL_FORMAT_BGRX32:
|
||||
case PIXEL_FORMAT_BGRA32:
|
||||
return avx2_image_copy_bgrx32_bgrx32(
|
||||
pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep,
|
||||
nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
WLog_DBG(TAG, "unsupported format src %s --> dst %s", FreeRDPGetColorFormatName(SrcFormat),
|
||||
FreeRDPGetColorFormatName(DstFormat));
|
||||
return -1;
|
||||
}
|
||||
|
||||
static INLINE pstatus_t avx2_image_copy_no_overlap_convert(
|
||||
BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
|
||||
UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
|
||||
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
|
||||
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset)
|
||||
{
|
||||
const SSIZE_T srcByte = FreeRDPGetBytesPerPixel(SrcFormat);
|
||||
const SSIZE_T dstByte = FreeRDPGetBytesPerPixel(DstFormat);
|
||||
|
||||
const UINT32 width = nWidth - nWidth % 8;
|
||||
for (SSIZE_T y = 0; y < nHeight; y++)
|
||||
{
|
||||
const BYTE* WINPR_RESTRICT srcLine =
|
||||
&pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
|
||||
BYTE* WINPR_RESTRICT dstLine =
|
||||
&pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
|
||||
|
||||
SSIZE_T x = 0;
|
||||
WINPR_PRAGMA_UNROLL_LOOP
|
||||
for (; x < width; x++)
|
||||
{
|
||||
const UINT32 color = FreeRDPReadColor_int(&srcLine[(x + nXSrc) * srcByte], SrcFormat);
|
||||
const UINT32 dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette);
|
||||
FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor);
|
||||
}
|
||||
for (; x < nWidth; x++)
|
||||
{
|
||||
const UINT32 color = FreeRDPReadColor_int(&srcLine[(x + nXSrc) * srcByte], SrcFormat);
|
||||
const UINT32 dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette);
|
||||
FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor);
|
||||
}
|
||||
}
|
||||
return PRIMITIVES_SUCCESS;
|
||||
}
|
||||
|
||||
static pstatus_t avx2_image_copy_no_overlap(BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat,
|
||||
UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
|
||||
UINT32 nWidth, UINT32 nHeight,
|
||||
const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
|
||||
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc,
|
||||
const gdiPalette* WINPR_RESTRICT palette, UINT32 flags)
|
||||
{
|
||||
const BOOL vSrcVFlip = (flags & FREERDP_FLIP_VERTICAL) ? TRUE : FALSE;
|
||||
SSIZE_T srcVOffset = 0;
|
||||
SSIZE_T srcVMultiplier = 1;
|
||||
SSIZE_T dstVOffset = 0;
|
||||
SSIZE_T dstVMultiplier = 1;
|
||||
|
||||
if ((nWidth == 0) || (nHeight == 0))
|
||||
return PRIMITIVES_SUCCESS;
|
||||
|
||||
if ((nHeight > INT32_MAX) || (nWidth > INT32_MAX))
|
||||
return -1;
|
||||
|
||||
if (!pDstData || !pSrcData)
|
||||
return -1;
|
||||
|
||||
if (nDstStep == 0)
|
||||
nDstStep = nWidth * FreeRDPGetBytesPerPixel(DstFormat);
|
||||
|
||||
if (nSrcStep == 0)
|
||||
nSrcStep = nWidth * FreeRDPGetBytesPerPixel(SrcFormat);
|
||||
|
||||
if (vSrcVFlip)
|
||||
{
|
||||
srcVOffset = (nHeight - 1ll) * nSrcStep;
|
||||
srcVMultiplier = -1;
|
||||
}
|
||||
|
||||
if (((flags & FREERDP_KEEP_DST_ALPHA) != 0) && FreeRDPColorHasAlpha(DstFormat))
|
||||
return avx2_image_copy_no_overlap_dst_alpha(pDstData, DstFormat, nDstStep, nXDst, nYDst,
|
||||
nWidth, nHeight, pSrcData, SrcFormat, nSrcStep,
|
||||
nXSrc, nYSrc, palette, srcVMultiplier,
|
||||
srcVOffset, dstVMultiplier, dstVOffset);
|
||||
else if (FreeRDPAreColorFormatsEqualNoAlpha(SrcFormat, DstFormat))
|
||||
return generic_image_copy_no_overlap_memcpy(pDstData, DstFormat, nDstStep, nXDst, nYDst,
|
||||
nWidth, nHeight, pSrcData, SrcFormat, nSrcStep,
|
||||
nXSrc, nYSrc, palette, srcVMultiplier,
|
||||
srcVOffset, dstVMultiplier, dstVOffset, flags);
|
||||
else
|
||||
return avx2_image_copy_no_overlap_convert(pDstData, DstFormat, nDstStep, nXDst, nYDst,
|
||||
nWidth, nHeight, pSrcData, SrcFormat, nSrcStep,
|
||||
nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset,
|
||||
dstVMultiplier, dstVOffset);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_copy_avx2(primitives_t* prims)
|
||||
{
|
||||
#if defined(WITH_SSE2)
|
||||
if (IsProcessorFeaturePresent(PF_AVX2_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
prims->copy_no_overlap = avx2_image_copy_no_overlap;
|
||||
}
|
||||
#else
|
||||
WINPR_UNUSED(prims);
|
||||
#endif
|
||||
}
|
274
libfreerdp/primitives/prim_copy_sse.c
Normal file
274
libfreerdp/primitives/prim_copy_sse.c
Normal file
@ -0,0 +1,274 @@
|
||||
/* FreeRDP: A Remote Desktop Protocol Client
|
||||
* Copy operations.
|
||||
* vi:ts=4 sw=4:
|
||||
*
|
||||
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License. You may obtain
|
||||
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
* or implied. See the License for the specific language governing
|
||||
* permissions and limitations under the License.
|
||||
*/
|
||||
|
||||
#include <winpr/sysinfo.h>
|
||||
|
||||
#include <freerdp/config.h>
|
||||
|
||||
#include <string.h>
|
||||
#include <freerdp/types.h>
|
||||
#include <freerdp/primitives.h>
|
||||
#include <freerdp/log.h>
|
||||
|
||||
#include "prim_internal.h"
|
||||
#include "prim_copy.h"
|
||||
#include "../codec/color.h"
|
||||
|
||||
#include <freerdp/codec/color.h>
|
||||
|
||||
#define TAG FREERDP_TAG("primitives.copy")
|
||||
|
||||
#if defined(WITH_SSE2)
|
||||
#include <emmintrin.h>
|
||||
#include <immintrin.h>
|
||||
|
||||
static INLINE pstatus_t sse_image_copy_bgr24_bgrx32(BYTE* WINPR_RESTRICT pDstData, UINT32 nDstStep,
|
||||
UINT32 nXDst, UINT32 nYDst, UINT32 nWidth,
|
||||
UINT32 nHeight,
|
||||
const BYTE* WINPR_RESTRICT pSrcData,
|
||||
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc,
|
||||
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset,
|
||||
SSIZE_T dstVMultiplier, SSIZE_T dstVOffset)
|
||||
{
|
||||
|
||||
const SSIZE_T srcByte = 3;
|
||||
const SSIZE_T dstByte = 4;
|
||||
|
||||
const __m128i mask = _mm_set_epi32(0xFF, 0xFF, 0xFF, 0xFF);
|
||||
const SSIZE_T rem = nWidth % 4;
|
||||
const SSIZE_T width = nWidth - rem;
|
||||
for (SSIZE_T y = 0; y < nHeight; y++)
|
||||
{
|
||||
const BYTE* WINPR_RESTRICT srcLine =
|
||||
&pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
|
||||
BYTE* WINPR_RESTRICT dstLine =
|
||||
&pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
|
||||
|
||||
SSIZE_T x = 0;
|
||||
for (; x < width; x += 4)
|
||||
{
|
||||
const __m128i* src = (const __m128i*)&srcLine[(x + nXSrc) * srcByte];
|
||||
__m128i* dst = (__m128i*)&dstLine[(x + nXDst) * dstByte];
|
||||
const __m128i s0 = _mm_loadu_si128(src);
|
||||
const __m128i s1 = _mm_loadu_si128(dst);
|
||||
const __m128i s2 = _mm_shuffle_epi8(s1, mask);
|
||||
__m128i d0 = _mm_blendv_epi8(s2, s0, mask);
|
||||
_mm_storeu_si128(dst, d0);
|
||||
}
|
||||
for (; x < nWidth; x++)
|
||||
{
|
||||
const BYTE* src = &srcLine[(x + nXSrc) * srcByte];
|
||||
BYTE* dst = &dstLine[(x + nXDst) * dstByte];
|
||||
*dst++ = *src++;
|
||||
*dst++ = *src++;
|
||||
*dst++ = *src++;
|
||||
}
|
||||
}
|
||||
|
||||
return PRIMITIVES_SUCCESS;
|
||||
}
|
||||
|
||||
static INLINE pstatus_t sse_image_copy_bgrx32_bgrx32(BYTE* WINPR_RESTRICT pDstData, UINT32 nDstStep,
|
||||
UINT32 nXDst, UINT32 nYDst, UINT32 nWidth,
|
||||
UINT32 nHeight,
|
||||
const BYTE* WINPR_RESTRICT pSrcData,
|
||||
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc,
|
||||
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset,
|
||||
SSIZE_T dstVMultiplier, SSIZE_T dstVOffset)
|
||||
{
|
||||
|
||||
const SSIZE_T srcByte = 4;
|
||||
const SSIZE_T dstByte = 4;
|
||||
|
||||
const __m128i mask = _mm_setr_epi8(0xFF, 0xFF, 0xFF, 0x00, 0xFF, 0xFF, 0xFF, 0x00, 0xFF, 0xFF,
|
||||
0xFF, 0x00, 0xFF, 0xFF, 0xFF, 0x00);
|
||||
const SSIZE_T rem = nWidth % 4;
|
||||
const SSIZE_T width = nWidth - rem;
|
||||
for (SSIZE_T y = 0; y < nHeight; y++)
|
||||
{
|
||||
const BYTE* WINPR_RESTRICT srcLine =
|
||||
&pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
|
||||
BYTE* WINPR_RESTRICT dstLine =
|
||||
&pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
|
||||
|
||||
SSIZE_T x = 0;
|
||||
for (; x < width; x += 4)
|
||||
{
|
||||
const __m128i* src = (const __m128i*)&srcLine[(x + nXSrc) * srcByte];
|
||||
__m128i* dst = (__m128i*)&dstLine[(x + nXDst) * dstByte];
|
||||
const __m128i s0 = _mm_loadu_si128(src);
|
||||
const __m128i s1 = _mm_loadu_si128(dst);
|
||||
__m128i d0 = _mm_blendv_epi8(s1, s0, mask);
|
||||
_mm_storeu_si128(dst, d0);
|
||||
}
|
||||
|
||||
for (; x < nWidth; x++)
|
||||
{
|
||||
const BYTE* src = &srcLine[(x + nXSrc) * srcByte];
|
||||
BYTE* dst = &dstLine[(x + nXDst) * dstByte];
|
||||
*dst++ = *src++;
|
||||
*dst++ = *src++;
|
||||
*dst++ = *src++;
|
||||
}
|
||||
}
|
||||
|
||||
return PRIMITIVES_SUCCESS;
|
||||
}
|
||||
|
||||
static pstatus_t sse_image_copy_no_overlap_dst_alpha(
|
||||
BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
|
||||
UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
|
||||
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
|
||||
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset)
|
||||
{
|
||||
WINPR_ASSERT(pDstData);
|
||||
WINPR_ASSERT(pSrcData);
|
||||
|
||||
switch (SrcFormat)
|
||||
{
|
||||
case PIXEL_FORMAT_BGR24:
|
||||
switch (DstFormat)
|
||||
{
|
||||
case PIXEL_FORMAT_BGRX32:
|
||||
case PIXEL_FORMAT_BGRA32:
|
||||
return sse_image_copy_bgr24_bgrx32(
|
||||
pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep,
|
||||
nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case PIXEL_FORMAT_BGRX32:
|
||||
case PIXEL_FORMAT_BGRA32:
|
||||
switch (DstFormat)
|
||||
{
|
||||
case PIXEL_FORMAT_BGRX32:
|
||||
case PIXEL_FORMAT_BGRA32:
|
||||
return sse_image_copy_bgrx32_bgrx32(
|
||||
pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep,
|
||||
nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
WLog_DBG(TAG, "unsupported format src %s --> dst %s", FreeRDPGetColorFormatName(SrcFormat),
|
||||
FreeRDPGetColorFormatName(DstFormat));
|
||||
return -1;
|
||||
}
|
||||
|
||||
static INLINE pstatus_t sse_image_copy_no_overlap_convert(
|
||||
BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
|
||||
UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
|
||||
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
|
||||
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset)
|
||||
{
|
||||
const SSIZE_T srcByte = FreeRDPGetBytesPerPixel(SrcFormat);
|
||||
const SSIZE_T dstByte = FreeRDPGetBytesPerPixel(DstFormat);
|
||||
|
||||
const UINT32 width = nWidth - nWidth % 8;
|
||||
for (SSIZE_T y = 0; y < nHeight; y++)
|
||||
{
|
||||
const BYTE* WINPR_RESTRICT srcLine =
|
||||
&pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
|
||||
BYTE* WINPR_RESTRICT dstLine =
|
||||
&pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
|
||||
|
||||
SSIZE_T x = 0;
|
||||
WINPR_PRAGMA_UNROLL_LOOP
|
||||
for (; x < width; x++)
|
||||
{
|
||||
const UINT32 color = FreeRDPReadColor(&srcLine[(x + nXSrc) * srcByte], SrcFormat);
|
||||
const UINT32 dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette);
|
||||
FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor);
|
||||
}
|
||||
for (; x < nWidth; x++)
|
||||
{
|
||||
const UINT32 color = FreeRDPReadColor(&srcLine[(x + nXSrc) * srcByte], SrcFormat);
|
||||
const UINT32 dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette);
|
||||
FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor);
|
||||
}
|
||||
}
|
||||
return PRIMITIVES_SUCCESS;
|
||||
}
|
||||
|
||||
static pstatus_t sse_image_copy_no_overlap(BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat,
|
||||
UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
|
||||
UINT32 nWidth, UINT32 nHeight,
|
||||
const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
|
||||
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc,
|
||||
const gdiPalette* WINPR_RESTRICT palette, UINT32 flags)
|
||||
{
|
||||
const BOOL vSrcVFlip = (flags & FREERDP_FLIP_VERTICAL) ? TRUE : FALSE;
|
||||
SSIZE_T srcVOffset = 0;
|
||||
SSIZE_T srcVMultiplier = 1;
|
||||
SSIZE_T dstVOffset = 0;
|
||||
SSIZE_T dstVMultiplier = 1;
|
||||
|
||||
if ((nWidth == 0) || (nHeight == 0))
|
||||
return PRIMITIVES_SUCCESS;
|
||||
|
||||
if ((nHeight > INT32_MAX) || (nWidth > INT32_MAX))
|
||||
return -1;
|
||||
|
||||
if (!pDstData || !pSrcData)
|
||||
return -1;
|
||||
|
||||
if (nDstStep == 0)
|
||||
nDstStep = nWidth * FreeRDPGetBytesPerPixel(DstFormat);
|
||||
|
||||
if (nSrcStep == 0)
|
||||
nSrcStep = nWidth * FreeRDPGetBytesPerPixel(SrcFormat);
|
||||
|
||||
if (vSrcVFlip)
|
||||
{
|
||||
srcVOffset = (nHeight - 1ll) * nSrcStep;
|
||||
srcVMultiplier = -1;
|
||||
}
|
||||
|
||||
if (((flags & FREERDP_KEEP_DST_ALPHA) != 0) && FreeRDPColorHasAlpha(DstFormat))
|
||||
return sse_image_copy_no_overlap_dst_alpha(pDstData, DstFormat, nDstStep, nXDst, nYDst,
|
||||
nWidth, nHeight, pSrcData, SrcFormat, nSrcStep,
|
||||
nXSrc, nYSrc, palette, srcVMultiplier,
|
||||
srcVOffset, dstVMultiplier, dstVOffset);
|
||||
else if (FreeRDPAreColorFormatsEqualNoAlpha(SrcFormat, DstFormat))
|
||||
return generic_image_copy_no_overlap_memcpy(pDstData, DstFormat, nDstStep, nXDst, nYDst,
|
||||
nWidth, nHeight, pSrcData, SrcFormat, nSrcStep,
|
||||
nXSrc, nYSrc, palette, srcVMultiplier,
|
||||
srcVOffset, dstVMultiplier, dstVOffset, flags);
|
||||
else
|
||||
return sse_image_copy_no_overlap_convert(pDstData, DstFormat, nDstStep, nXDst, nYDst,
|
||||
nWidth, nHeight, pSrcData, SrcFormat, nSrcStep,
|
||||
nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset,
|
||||
dstVMultiplier, dstVOffset);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
void primitives_init_copy_sse(primitives_t* prims)
|
||||
{
|
||||
#if defined(WITH_SSE2)
|
||||
if (IsProcessorFeaturePresent(PF_SSE4_1_INSTRUCTIONS_AVAILABLE))
|
||||
{
|
||||
prims->copy_no_overlap = sse_image_copy_no_overlap;
|
||||
}
|
||||
#else
|
||||
WINPR_UNUSED(prims);
|
||||
#endif
|
||||
}
|
Loading…
Reference in New Issue
Block a user