Merge pull request #10269 from akallabeth/image_copy_sse

Image copy sse
This commit is contained in:
akallabeth 2024-06-12 16:45:57 +02:00 committed by GitHub
commit bcd663b7d9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
25 changed files with 1501 additions and 527 deletions

View File

@ -8,8 +8,9 @@ set (WITH_SANITIZE_ADDRESS ON CACHE BOOL "build with address sanitizer")
set (WITH_CLIENT OFF CACHE BOOL "disable iOS client")
set (WITH_SERVER OFF CACHE BOOL "disable iOS server")
set (WITH_KRB5 OFF CACHE BOOL "Kerberos support")
set (WITH_CLIENT_SDL OFF CACHE BOOL "SDL client")
set (WITH_FFMPEG OFF CACHE BOOL "SDL client")
set (WITH_SWSCALE OFF CACHE BOOL "SDL client")
set (WITH_NEON ON CACHE BOOL "SDL client")
set (BUILD_SHARED_LIBS OFF CACHE BOOL "SDL client")
set (WITH_CLIENT_SDL OFF CACHE BOOL "iOS preload")
set (WITH_FFMPEG OFF CACHE BOOL "iOS preload")
set (WITH_SWSCALE OFF CACHE BOOL "iOS preload")
set (WITH_NEON ON CACHE BOOL "iOS preload")
set (WITH_OPUS OFF CACHE BOOL "iOS preload")
set (BUILD_SHARED_LIBS OFF CACHE BOOL "iOS preload")

View File

@ -13,4 +13,5 @@ set (WITH_FREERDP_DEPRECATED_COMMANDLINE ON CACHE BOOL "Enable deprecated comman
set (WITH_KRB5 OFF CACHE BOOL "Kerberos support")
set (WITH_WEBVIEW OFF CACHE BOOL "ci default")
set (WITH_FFMPEG OFF CACHE BOOL "ci default")
set (WITH_OPUS OFF CACHE BOOL "ci default")
set (WITH_SWSCALE OFF CACHE BOOL "ci default")

View File

@ -104,6 +104,12 @@ typedef pstatus_t (*__add_16s_t)(const INT16* WINPR_RESTRICT pSrc1,
UINT32 len);
typedef pstatus_t (*__add_16s_inplace_t)(INT16* WINPR_RESTRICT pSrcDst1,
INT16* WINPR_RESTRICT pSrcDst2, UINT32 len);
typedef pstatus_t (*__copy_no_overlap_t)(BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat,
UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, UINT32 nWidth,
UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData,
DWORD SrcFormat, UINT32 nSrcStep, UINT32 nXSrc,
UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
UINT32 flags);
typedef pstatus_t (*__lShiftC_16s_inplace_t)(INT16* WINPR_RESTRICT pSrcDst, UINT32 val, UINT32 len);
typedef pstatus_t (*__lShiftC_16s_t)(const INT16* pSrc, UINT32 val, INT16* pSrcDst, UINT32 len);
typedef pstatus_t (*__lShiftC_16u_t)(const UINT16* pSrc, UINT32 val, UINT16* pSrcDst, UINT32 len);
@ -222,6 +228,7 @@ typedef struct
*/
__add_16s_inplace_t add_16s_inplace;
__lShiftC_16s_inplace_t lShiftC_16s_inplace;
__copy_no_overlap_t copy_no_overlap;
} primitives_t;
typedef enum

View File

@ -23,9 +23,11 @@ include(FindCairo)
set(LIBFREERDP_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(LIBFREERDP_SRCS "")
set(LIBFREERDP_OBJECT_LIBS "")
set(LIBFREERDP_LIBS "")
set(LIBFREERDP_INCLUDES "")
set(LIBFREERDP_DEFINITIONS "")
set(LIBFREERDP_COMPILE_OPTIONS "")
macro (freerdp_module_add)
file (RELATIVE_PATH _relPath "${LIBFREERDP_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}")
@ -66,6 +68,13 @@ macro (freerdp_library_add_public)
set (LIBFREERDP_PUB_LIBS ${LIBFREERDP_PUB_LIBS} PARENT_SCOPE)
endmacro()
macro (freerdp_object_library_add)
foreach (_lib ${ARGN})
list (APPEND LIBFREERDP_OBJECT_LIBS "$<TARGET_OBJECTS:${_lib}>")
endforeach()
set (LIBFREERDP_OBJECT_LIBS ${LIBFREERDP_OBJECT_LIBS} PARENT_SCOPE)
endmacro()
macro (freerdp_library_add)
foreach (_lib ${ARGN})
list (APPEND LIBFREERDP_LIBS "${_lib}")
@ -80,6 +89,13 @@ macro (freerdp_definition_add)
set (LIBFREERDP_DEFINITIONS ${LIBFREERDP_DEFINITIONS} PARENT_SCOPE)
endmacro()
macro (freerdp_compile_options_add)
foreach (_lib ${ARGN})
list (APPEND LIBFREERDP_COMPILE_OPTIONS "${_lib}")
endforeach()
set (LIBFREERDP_COMPILE_OPTIONS ${LIBFREERDP_COMPILE_OPTIONS} PARENT_SCOPE)
endmacro()
if (WITH_SWSCALE)
find_package(SWScale REQUIRED)
endif(WITH_SWSCALE)
@ -118,134 +134,8 @@ if (NOT WITH_DSP_FFMPEG AND NOT WITH_FAAC)
message(WARNING "Compiling without WITH_DSP_FFMPEG and WITH_FAAC, AAC encoder support disabled")
endif ()
## cmake source properties are only seen by targets in the same CMakeLists.txt
## therefore primitives and codecs need to be defined here
# codec
set(CODEC_SRCS
codec/bulk.c
codec/bulk.h
codec/dsp.c
codec/color.c
codec/audio.c
codec/planar.c
codec/bitmap.c
codec/interleaved.c
codec/progressive.c
codec/rfx_bitstream.h
codec/rfx_constants.h
codec/rfx_decode.c
codec/rfx_decode.h
codec/rfx_differential.h
codec/rfx_dwt.c
codec/rfx_dwt.h
codec/rfx_encode.c
codec/rfx_encode.h
codec/rfx_quantization.c
codec/rfx_quantization.h
codec/rfx_rlgr.c
codec/rfx_rlgr.h
codec/rfx_types.h
codec/rfx.c
codec/region.c
codec/nsc.c
codec/nsc_encode.c
codec/nsc_encode.h
codec/nsc_types.h
codec/ncrush.c
codec/xcrush.c
codec/mppc.c
codec/zgfx.c
codec/clear.c
codec/jpeg.c
codec/h264.c
codec/yuv.c)
set(CODEC_SSE2_SRCS
codec/rfx_sse2.c
codec/rfx_sse2.h
codec/nsc_sse2.c
codec/nsc_sse2.h)
set(CODEC_NEON_SRCS
codec/rfx_neon.c
codec/rfx_neon.h)
if(WITH_SSE2)
set(CODEC_SRCS ${CODEC_SRCS} ${CODEC_SSE2_SRCS})
if(CMAKE_COMPILER_IS_GNUCC OR ${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
set_source_files_properties(${CODEC_SSE2_SRCS} PROPERTIES COMPILE_FLAGS "-msse2" )
endif()
if(MSVC)
set_source_files_properties(${CODEC_SSE2_SRCS} PROPERTIES COMPILE_FLAGS "/arch:SSE2" )
endif()
endif()
if (WITH_DSP_FFMPEG)
set(CODEC_SRCS
${CODEC_SRCS}
codec/dsp_ffmpeg.c
codec/dsp_ffmpeg.h)
freerdp_include_directory_add(${FFMPEG_INCLUDE_DIRS})
freerdp_library_add(${FFMPEG_LIBRARIES})
endif (WITH_DSP_FFMPEG)
if (WITH_SOXR)
freerdp_library_add(${SOXR_LIBRARIES})
include_directories(${SOXR_INCLUDE_DIR})
endif(WITH_SOXR)
if(GSM_FOUND)
freerdp_library_add(${GSM_LIBRARIES})
include_directories(${GSM_INCLUDE_DIRS})
endif()
if(LAME_FOUND)
freerdp_library_add(${LAME_LIBRARIES})
include_directories(${LAME_INCLUDE_DIRS})
endif()
set(OPUS_DEFAULT OFF)
if (NOT WITH_DSP_FFMPEG)
find_package(Opus)
if (Opus_FOUND)
set(OPUS_DEFAULT ${OPUS_FOUND})
else()
find_package(PkgConfig)
if (PkgConfig_FOUND)
pkg_check_modules(OPUS opus)
set(OPUS_DEFAULT ${OPUS_FOUND})
endif()
endif()
endif()
option(WITH_OPUS "compile with opus codec support" ${OPUS_DEFAULT})
if (WITH_OPUS)
find_package(Opus)
if (Opus_FOUND)
freerdp_library_add(Opus::opus)
else()
find_package(PkgConfig REQUIRED)
pkg_check_modules(OPUS REQUIRED opus)
if(OPUS_FOUND)
freerdp_library_add(${OPUS_LIBRARIES})
include_directories(${OPUS_INCLUDE_DIRS})
link_directories(${OPUS_LIBRARY_DIRS})
endif()
endif()
endif()
if(FAAD2_FOUND)
freerdp_library_add(${FAAD2_LIBRARIES})
include_directories(${FAAD2_INCLUDE_DIRS})
endif()
if(FAAC_FOUND)
freerdp_library_add(${FAAC_LIBRARIES})
include_directories(${FAAC_INCLUDE_DIRS})
endif()
add_subdirectory(codec)
add_subdirectory(primitives)
if (WITH_AAD)
if (NOT WITH_WINPR_JSON)
@ -253,141 +143,6 @@ if (WITH_AAD)
endif()
endif()
if(WITH_NEON)
check_symbol_exists("_M_AMD64" "" MSVC_ARM64)
check_symbol_exists("__aarch64__" "" ARCH_ARM64)
if (NOT MSVC_ARM64 AND NOT ARCH_ARM64)
set_source_files_properties(${CODEC_NEON_SRCS} PROPERTIES COMPILE_FLAGS "-mfpu=neon" )
endif()
set(CODEC_SRCS ${CODEC_SRCS} ${CODEC_NEON_SRCS})
endif()
if(WITH_OPENH264)
set(CODEC_SRCS ${CODEC_SRCS} codec/h264_openh264.c)
freerdp_include_directory_add(${OPENH264_INCLUDE_DIR})
if (NOT WITH_OPENH264_LOADING)
freerdp_library_add(${OPENH264_LIBRARIES})
endif (NOT WITH_OPENH264_LOADING)
endif()
if(WITH_VIDEO_FFMPEG)
set(CODEC_SRCS ${CODEC_SRCS} codec/h264_ffmpeg.c)
freerdp_include_directory_add(${FFMPEG_INCLUDE_DIRS})
freerdp_library_add(${FFMPEG_LIBRARIES})
endif()
if(WIN32 AND WITH_MEDIA_FOUNDATION)
set(CODEC_SRCS ${CODEC_SRCS} codec/h264_mf.c)
endif()
if(ANDROID AND WITH_MEDIACODEC)
list(APPEND CODEC_SRCS codec/h264_mediacodec.c)
find_library(MEDIACODEC mediandk REQUIRED)
freerdp_library_add(${MEDIACODEC})
endif()
freerdp_module_add(${CODEC_SRCS})
if(BUILD_TESTING)
add_subdirectory(codec/test)
endif()
# /codec
# primitives
set(PRIMITIVES_SRCS
primitives/prim_add.c
primitives/prim_andor.c
primitives/prim_alphaComp.c
primitives/prim_colors.c
primitives/prim_copy.c
primitives/prim_set.c
primitives/prim_shift.c
primitives/prim_sign.c
primitives/prim_YUV.c
primitives/prim_YCoCg.c
primitives/primitives.c
primitives/prim_internal.h)
if (WITH_SSE2 OR WITH_NEON)
set(PRIMITIVES_SSE2_SRCS
primitives/prim_colors_opt.c
primitives/prim_set_opt.c)
set(PRIMITIVES_SSE3_SRCS
primitives/prim_add_opt.c
primitives/prim_alphaComp_opt.c
primitives/prim_andor_opt.c
primitives/prim_shift_opt.c)
set(PRIMITIVES_SSSE3_SRCS
primitives/prim_sign_opt.c
primitives/prim_YCoCg_opt.c)
if (WITH_SSE2)
set(PRIMITIVES_SSSE3_SRCS ${PRIMITIVES_SSSE3_SRCS}
primitives/prim_YUV_ssse3.c)
endif()
if (WITH_NEON)
set(PRIMITIVES_SSSE3_SRCS ${PRIMITIVES_SSSE3_SRCS}
primitives/prim_YUV_neon.c)
endif()
endif()
if (WITH_OPENCL)
set(PRIMITIVES_OPENCL_SRCS primitives/prim_YUV_opencl.c)
freerdp_include_directory_add(${OpenCL_INCLUDE_DIRS})
freerdp_library_add(OpenCL::OpenCL)
endif()
set(PRIMITIVES_OPT_SRCS
${PRIMITIVES_SSE2_SRCS}
${PRIMITIVES_SSE3_SRCS}
${PRIMITIVES_SSSE3_SRCS}
${PRIMITIVES_OPENCL_SRCS})
if(WITH_SSE2)
if(CMAKE_COMPILER_IS_GNUCC OR ${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
set_source_files_properties(${PRIMITIVES_SSE2_SRCS}
PROPERTIES COMPILE_FLAGS "${OPTIMIZATION} -msse2")
set_source_files_properties(${PRIMITIVES_SSE3_SRCS}
PROPERTIES COMPILE_FLAGS "${OPTIMIZATION} -msse3")
set_source_files_properties(${PRIMITIVES_SSSE3_SRCS}
PROPERTIES COMPILE_FLAGS "${OPTIMIZATION} -mssse3")
endif()
if(MSVC)
set_source_files_properties(${PRIMITIVES_OPT_SRCS}
PROPERTIES COMPILE_FLAGS "${OPTIMIZATION} /arch:SSE2")
endif()
elseif(WITH_NEON)
if(CMAKE_COMPILER_IS_GNUCC OR ${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
if (NOT MSVC_ARM64 AND NOT ARCH_ARM64)
set_source_files_properties(${PRIMITIVES_OPT_SRCS}
PROPERTIES COMPILE_FLAGS "${OPTIMIZATION} -mfpu=neon")
endif()
endif()
# TODO: Add MSVC equivalent
endif()
set(PRIMITIVES_SRCS ${PRIMITIVES_SRCS} ${PRIMITIVES_OPT_SRCS})
freerdp_module_add(${PRIMITIVES_SRCS})
if(BUILD_TESTING AND NOT WIN32 AND NOT APPLE)
add_subdirectory(primitives/test)
endif()
# /primitives
list(APPEND LIBFREERDP_PUB_LIBS winpr)
list(REMOVE_DUPLICATES LIBFREERDP_DEFINITIONS)
@ -398,6 +153,16 @@ AddTargetWithResourceFile(${MODULE_NAME} FALSE "${FREERDP_VERSION}" LIBFREERDP_S
add_definitions(${LIBFREERDP_DEFINITIONS})
if (LIBFREERDP_COMPILE_OPTIONS)
list(REMOVE_DUPLICATES LIBFREERDP_COMPILE_OPTIONS)
target_compile_options(${MODULE_NAME} PRIVATE ${LIBFREERDP_COMPILE_OPTIONS})
endif()
if (LIBFREERDP_OBJECT_LIBS)
list(REMOVE_DUPLICATES LIBFREERDP_OBJECT_LIBS)
target_link_libraries(${MODULE_NAME} PRIVATE ${LIBFREERDP_OBJECT_LIBS})
endif()
if (WITH_FULL_CONFIG_PATH)
add_definitions(-DWITH_FULL_CONFIG_PATH)
endif()

View File

@ -0,0 +1,185 @@
# codec
set(CODEC_SRCS
bulk.c
bulk.h
dsp.c
color.c
color.h
audio.c
planar.c
bitmap.c
interleaved.c
progressive.c
rfx_bitstream.h
rfx_constants.h
rfx_decode.c
rfx_decode.h
rfx_differential.h
rfx_dwt.c
rfx_dwt.h
rfx_encode.c
rfx_encode.h
rfx_quantization.c
rfx_quantization.h
rfx_rlgr.c
rfx_rlgr.h
rfx_types.h
rfx.c
region.c
nsc.c
nsc_encode.c
nsc_encode.h
nsc_types.h
ncrush.c
xcrush.c
mppc.c
zgfx.c
clear.c
jpeg.c
h264.c
yuv.c)
set(CODEC_SSE2_SRCS
sse/rfx_sse2.c
sse/rfx_sse2.h
sse/nsc_sse2.c
sse/nsc_sse2.h
)
set(CODEC_NEON_SRCS
neon/rfx_neon.c
neon/rfx_neon.h
neon/nsc_neon.c
neon/nsc_neon.h
)
# Append initializers
set(CODEC_LIBS "")
list(APPEND CODEC_SRCS ${CODEC_SSE2_SRCS})
list(APPEND CODEC_SRCS ${CODEC_NEON_SRCS})
if(WITH_SSE2)
if(CMAKE_COMPILER_IS_GNUCC OR ${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
if (CODEC_SSE2_SRCS)
set_source_files_properties(${CODEC_SSE2_SRCS} PROPERTIES COMPILE_FLAGS "-msse2" )
endif()
endif()
if(MSVC)
if (CODEC_SSE2_SRCS)
set_source_files_properties(${CODEC_SSE2_SRCS} PROPERTIES COMPILE_FLAGS "/arch:SSE2" )
endif()
endif()
endif()
if(WITH_NEON)
check_symbol_exists("_M_AMD64" "" MSVC_ARM64)
check_symbol_exists("__aarch64__" "" ARCH_ARM64)
if (NOT MSVC_ARM64 AND NOT ARCH_ARM64)
if (CODEC_SSE2_SRCS)
set_source_files_properties(${CODEC_NEON_SRCS} PROPERTIES COMPILE_FLAGS "-mfpu=neon" )
endif()
endif()
endif()
if (WITH_DSP_FFMPEG)
set(CODEC_SRCS
${CODEC_SRCS}
dsp_ffmpeg.c
dsp_ffmpeg.h)
include_directories(${FFMPEG_INCLUDE_DIRS})
list(APPEND CODEC_LIBS ${FFMPEG_LIBRARIES})
endif (WITH_DSP_FFMPEG)
if (WITH_SOXR)
list(APPEND CODEC_LIBS ${SOXR_LIBRARIES})
include_directories(${SOXR_INCLUDE_DIR})
endif(WITH_SOXR)
if(GSM_FOUND)
list(APPEND CODEC_LIBS ${GSM_LIBRARIES})
include_directories(${GSM_INCLUDE_DIRS})
endif()
if(LAME_FOUND)
list(APPEND CODEC_LIBS ${LAME_LIBRARIES})
include_directories(${LAME_INCLUDE_DIRS})
endif()
set(OPUS_DEFAULT OFF)
if (NOT WITH_DSP_FFMPEG)
find_package(Opus)
if (Opus_FOUND)
set(OPUS_DEFAULT ${OPUS_FOUND})
else()
find_package(PkgConfig)
if (PkgConfig_FOUND)
pkg_check_modules(OPUS opus)
set(OPUS_DEFAULT ${OPUS_FOUND})
endif()
endif()
message("Using OPUS: ${OPUS_DEFAULT}")
endif()
option(WITH_OPUS "compile with opus codec support" ${OPUS_DEFAULT})
if (WITH_OPUS)
find_package(Opus)
if (Opus_FOUND)
list(APPEND CODEC_LIBS ${OPUS_LIBRARIES})
else()
find_package(PkgConfig REQUIRED)
pkg_check_modules(OPUS REQUIRED opus)
if(OPUS_FOUND)
list(APPEND CODEC_LIBS ${OPUS_LIBRARIES})
include_directories(${OPUS_INCLUDE_DIRS})
link_directories(${OPUS_LIBRARY_DIRS})
endif()
endif()
endif()
if(FAAD2_FOUND)
list(APPEND CODEC_LIBS ${FAAD2_LIBRARIES})
include_directories(${FAAD2_INCLUDE_DIRS})
endif()
if(FAAC_FOUND)
list(APPEND CODEC_LIBS ${FAAC_LIBRARIES})
include_directories(${FAAC_INCLUDE_DIRS})
endif()
if(WITH_OPENH264)
set(CODEC_SRCS ${CODEC_SRCS} h264_openh264.c)
include_directories(${OPENH264_INCLUDE_DIR})
if (NOT WITH_OPENH264_LOADING)
list(APPEND CODEC_LIBS ${OPENH264_LIBRARIES})
endif (NOT WITH_OPENH264_LOADING)
endif()
if(WITH_VIDEO_FFMPEG)
set(CODEC_SRCS ${CODEC_SRCS} h264_ffmpeg.c)
include_directories(${FFMPEG_INCLUDE_DIRS})
list(APPEND CODEC_LIBS ${FFMPEG_LIBRARIES})
endif()
if(WIN32 AND WITH_MEDIA_FOUNDATION)
set(CODEC_SRCS ${CODEC_SRCS} h264_mf.c)
endif()
if(ANDROID AND WITH_MEDIACODEC)
list(APPEND CODEC_SRCS h264_mediacodec.c)
find_library(MEDIACODEC mediandk REQUIRED)
list(APPEND CODEC_LIBS ${MEDIACODEC})
endif()
add_library(freerdp-codecs OBJECT
${CODEC_SRCS}
)
freerdp_library_add(${CODEC_LIBS})
freerdp_object_library_add(freerdp-codecs)
if(BUILD_TESTING)
add_subdirectory(test)
endif()

View File

@ -39,17 +39,9 @@
#include <libswscale/swscale.h>
#endif
#define TAG FREERDP_TAG("color")
#include "color.h"
static INLINE BOOL FreeRDPWriteColorIgnoreAlpha_int(BYTE* WINPR_RESTRICT dst, UINT32 format,
UINT32 color);
static INLINE BOOL FreeRDPWriteColor_int(BYTE* WINPR_RESTRICT dst, UINT32 format, UINT32 color);
static INLINE UINT32 FreeRDPReadColor_int(const BYTE* WINPR_RESTRICT src, UINT32 format);
static INLINE DWORD FreeRDPAreColorFormatsEqualNoAlpha_int(DWORD first, DWORD second)
{
const DWORD mask = (DWORD) ~(8UL << 12UL);
return (first & mask) == (second & mask);
}
#define TAG FREERDP_TAG("color")
BYTE* freerdp_glyph_convert(UINT32 width, UINT32 height, const BYTE* WINPR_RESTRICT data)
{
@ -733,102 +725,6 @@ static INLINE BOOL freerdp_image_copy_no_overlap_dst_alpha(
srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset);
}
BOOL freerdp_image_copy_no_overlap(BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep,
UINT32 nXDst, UINT32 nYDst, UINT32 nWidth, UINT32 nHeight,
const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc,
const gdiPalette* WINPR_RESTRICT palette, UINT32 flags)
{
const SSIZE_T dstByte = FreeRDPGetBytesPerPixel(DstFormat);
const SSIZE_T srcByte = FreeRDPGetBytesPerPixel(SrcFormat);
const SSIZE_T copyDstWidth = nWidth * dstByte;
const SSIZE_T xSrcOffset = nXSrc * srcByte;
const SSIZE_T xDstOffset = nXDst * dstByte;
const BOOL vSrcVFlip = (flags & FREERDP_FLIP_VERTICAL) ? TRUE : FALSE;
SSIZE_T srcVOffset = 0;
SSIZE_T srcVMultiplier = 1;
SSIZE_T dstVOffset = 0;
SSIZE_T dstVMultiplier = 1;
if ((nWidth == 0) || (nHeight == 0))
return TRUE;
if ((nHeight > INT32_MAX) || (nWidth > INT32_MAX))
return FALSE;
if (!pDstData || !pSrcData)
return FALSE;
if (nDstStep == 0)
nDstStep = nWidth * FreeRDPGetBytesPerPixel(DstFormat);
if (nSrcStep == 0)
nSrcStep = nWidth * FreeRDPGetBytesPerPixel(SrcFormat);
if (vSrcVFlip)
{
srcVOffset = (nHeight - 1ll) * nSrcStep;
srcVMultiplier = -1;
}
if (((flags & FREERDP_KEEP_DST_ALPHA) != 0) && FreeRDPColorHasAlpha(DstFormat))
return freerdp_image_copy_no_overlap_dst_alpha(
pDstData, DstFormat, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, SrcFormat,
nSrcStep, nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset, dstVMultiplier,
dstVOffset);
else if (FreeRDPAreColorFormatsEqualNoAlpha_int(SrcFormat, DstFormat))
{
if (!vSrcVFlip && (nDstStep == nSrcStep) && (xSrcOffset == 0) && (xDstOffset == 0))
{
const void* src = &pSrcData[1ull * nYSrc * nSrcStep];
void* dst = &pDstData[1ull * nYDst * nDstStep];
memcpy(dst, src, 1ull * nDstStep * nHeight);
}
else
{
for (SSIZE_T y = 0; y < nHeight; y++)
{
const BYTE* WINPR_RESTRICT srcLine =
&pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
BYTE* WINPR_RESTRICT dstLine =
&pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
memcpy(&dstLine[xDstOffset], &srcLine[xSrcOffset], copyDstWidth);
}
}
}
else
{
for (SSIZE_T y = 0; y < nHeight; y++)
{
const BYTE* WINPR_RESTRICT srcLine =
&pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
BYTE* WINPR_RESTRICT dstLine =
&pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
UINT32 color = FreeRDPReadColor_int(&srcLine[nXSrc * srcByte], SrcFormat);
UINT32 oldColor = color;
UINT32 dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette);
FreeRDPWriteColor_int(&dstLine[nXDst * dstByte], DstFormat, dstColor);
for (SSIZE_T x = 1; x < nWidth; x++)
{
color = FreeRDPReadColor_int(&srcLine[(x + nXSrc) * srcByte], SrcFormat);
if (color == oldColor)
{
FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor);
}
else
{
oldColor = color;
dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette);
FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor);
}
}
}
}
return TRUE;
}
BOOL freerdp_image_copy_overlap(BYTE* pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst,
UINT32 nYDst, UINT32 nWidth, UINT32 nHeight, const BYTE* pSrcData,
DWORD SrcFormat, UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc,
@ -1608,124 +1504,16 @@ BOOL FreeRDPWriteColorIgnoreAlpha(BYTE* WINPR_RESTRICT dst, UINT32 format, UINT3
return FreeRDPWriteColorIgnoreAlpha_int(dst, format, color);
}
BOOL FreeRDPWriteColorIgnoreAlpha_int(BYTE* WINPR_RESTRICT dst, UINT32 format, UINT32 color)
{
switch (format)
{
case PIXEL_FORMAT_XBGR32:
case PIXEL_FORMAT_XRGB32:
case PIXEL_FORMAT_ABGR32:
case PIXEL_FORMAT_ARGB32:
{
const UINT32 tmp = ((UINT32)dst[0] << 24ULL) | (color & 0x00FFFFFFULL);
return FreeRDPWriteColor_int(dst, format, tmp);
}
case PIXEL_FORMAT_BGRX32:
case PIXEL_FORMAT_RGBX32:
case PIXEL_FORMAT_BGRA32:
case PIXEL_FORMAT_RGBA32:
{
const UINT32 tmp = ((UINT32)dst[3]) | (color & 0xFFFFFF00ULL);
return FreeRDPWriteColor_int(dst, format, tmp);
}
default:
return FreeRDPWriteColor_int(dst, format, color);
}
}
BOOL FreeRDPWriteColor(BYTE* WINPR_RESTRICT dst, UINT32 format, UINT32 color)
{
return FreeRDPWriteColor_int(dst, format, color);
}
BOOL FreeRDPWriteColor_int(BYTE* WINPR_RESTRICT dst, UINT32 format, UINT32 color)
{
switch (FreeRDPGetBitsPerPixel(format))
{
case 32:
dst[0] = (BYTE)(color >> 24);
dst[1] = (BYTE)(color >> 16);
dst[2] = (BYTE)(color >> 8);
dst[3] = (BYTE)color;
break;
case 24:
dst[0] = (BYTE)(color >> 16);
dst[1] = (BYTE)(color >> 8);
dst[2] = (BYTE)color;
break;
case 16:
dst[1] = (BYTE)(color >> 8);
dst[0] = (BYTE)color;
break;
case 15:
if (!FreeRDPColorHasAlpha(format))
color = color & 0x7FFF;
dst[1] = (BYTE)(color >> 8);
dst[0] = (BYTE)color;
break;
case 8:
dst[0] = (BYTE)color;
break;
default:
WLog_ERR(TAG, "Unsupported format %s", FreeRDPGetColorFormatName(format));
return FALSE;
}
return TRUE;
}
UINT32 FreeRDPReadColor(const BYTE* WINPR_RESTRICT src, UINT32 format)
{
return FreeRDPReadColor_int(src, format);
}
UINT32 FreeRDPReadColor_int(const BYTE* WINPR_RESTRICT src, UINT32 format)
{
UINT32 color = 0;
switch (FreeRDPGetBitsPerPixel(format))
{
case 32:
color =
((UINT32)src[0] << 24) | ((UINT32)src[1] << 16) | ((UINT32)src[2] << 8) | src[3];
break;
case 24:
color = ((UINT32)src[0] << 16) | ((UINT32)src[1] << 8) | src[2];
break;
case 16:
color = ((UINT32)src[1] << 8) | src[0];
break;
case 15:
color = ((UINT32)src[1] << 8) | src[0];
if (!FreeRDPColorHasAlpha(format))
color = color & 0x7FFF;
break;
case 8:
case 4:
case 1:
color = *src;
break;
default:
WLog_ERR(TAG, "Unsupported format %s", FreeRDPGetColorFormatName(format));
color = 0;
break;
}
return color;
}
UINT32 FreeRDPGetColor(UINT32 format, BYTE r, BYTE g, BYTE b, BYTE a)
{
UINT32 _r = r;
@ -1817,3 +1605,20 @@ UINT32 FreeRDPGetColor(UINT32 format, BYTE r, BYTE g, BYTE b, BYTE a)
return 0;
}
}
BOOL freerdp_image_copy_no_overlap(BYTE* pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst,
UINT32 nYDst, UINT32 nWidth, UINT32 nHeight,
const BYTE* pSrcData, DWORD SrcFormat, UINT32 nSrcStep,
UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* palette,
UINT32 flags)
{
static primitives_t* prims = NULL;
if (!prims)
prims = primitives_get();
WINPR_ASSERT(prims);
WINPR_ASSERT(prims->copy_no_overlap);
return prims->copy_no_overlap(pDstData, DstFormat, nDstStep, nXDst, nYDst, nWidth, nHeight,
pSrcData, SrcFormat, nSrcStep, nXSrc, nYSrc, palette,
flags) == PRIMITIVES_SUCCESS;
}

147
libfreerdp/codec/color.h Normal file
View File

@ -0,0 +1,147 @@
/**
* FreeRDP: A Remote Desktop Protocol Implementation
* codec color
*
* Copyright 2024 Armin Novak <anovak@thincast.com>
* Copyright 2024 Thincast Technologies GmbH
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FREERDP_LIB_CODEC_COLOR_H
#define FREERDP_LIB_CODEC_COLOR_H
#include <winpr/winpr.h>
#include <winpr/wtypes.h>
#include <freerdp/codec/color.h>
#include <freerdp/log.h>
#define INT_COLOR_TAG FREERDP_TAG("codec.color.h")
static INLINE DWORD FreeRDPAreColorFormatsEqualNoAlpha_int(DWORD first, DWORD second)
{
const DWORD mask = (DWORD) ~(8UL << 12UL);
return (first & mask) == (second & mask);
}
static INLINE BOOL FreeRDPWriteColor_int(BYTE* WINPR_RESTRICT dst, UINT32 format, UINT32 color)
{
switch (FreeRDPGetBitsPerPixel(format))
{
case 32:
dst[0] = (BYTE)(color >> 24);
dst[1] = (BYTE)(color >> 16);
dst[2] = (BYTE)(color >> 8);
dst[3] = (BYTE)color;
break;
case 24:
dst[0] = (BYTE)(color >> 16);
dst[1] = (BYTE)(color >> 8);
dst[2] = (BYTE)color;
break;
case 16:
dst[1] = (BYTE)(color >> 8);
dst[0] = (BYTE)color;
break;
case 15:
if (!FreeRDPColorHasAlpha(format))
color = color & 0x7FFF;
dst[1] = (BYTE)(color >> 8);
dst[0] = (BYTE)color;
break;
case 8:
dst[0] = (BYTE)color;
break;
default:
WLog_ERR(INT_COLOR_TAG, "Unsupported format %s", FreeRDPGetColorFormatName(format));
return FALSE;
}
return TRUE;
}
static INLINE BOOL FreeRDPWriteColorIgnoreAlpha_int(BYTE* WINPR_RESTRICT dst, UINT32 format,
UINT32 color)
{
switch (format)
{
case PIXEL_FORMAT_XBGR32:
case PIXEL_FORMAT_XRGB32:
case PIXEL_FORMAT_ABGR32:
case PIXEL_FORMAT_ARGB32:
{
const UINT32 tmp = ((UINT32)dst[0] << 24ULL) | (color & 0x00FFFFFFULL);
return FreeRDPWriteColor_int(dst, format, tmp);
}
case PIXEL_FORMAT_BGRX32:
case PIXEL_FORMAT_RGBX32:
case PIXEL_FORMAT_BGRA32:
case PIXEL_FORMAT_RGBA32:
{
const UINT32 tmp = ((UINT32)dst[3]) | (color & 0xFFFFFF00ULL);
return FreeRDPWriteColor_int(dst, format, tmp);
}
default:
return FreeRDPWriteColor_int(dst, format, color);
}
}
static INLINE UINT32 FreeRDPReadColor_int(const BYTE* WINPR_RESTRICT src, UINT32 format)
{
UINT32 color = 0;
switch (FreeRDPGetBitsPerPixel(format))
{
case 32:
color =
((UINT32)src[0] << 24) | ((UINT32)src[1] << 16) | ((UINT32)src[2] << 8) | src[3];
break;
case 24:
color = ((UINT32)src[0] << 16) | ((UINT32)src[1] << 8) | src[2];
break;
case 16:
color = ((UINT32)src[1] << 8) | src[0];
break;
case 15:
color = ((UINT32)src[1] << 8) | src[0];
if (!FreeRDPColorHasAlpha(format))
color = color & 0x7FFF;
break;
case 8:
case 4:
case 1:
color = *src;
break;
default:
WLog_ERR(INT_COLOR_TAG, "Unsupported format %s", FreeRDPGetColorFormatName(format));
color = 0;
break;
}
return color;
}
#endif

View File

@ -0,0 +1,38 @@
/**
* FreeRDP: A Remote Desktop Protocol Implementation
* NSCodec Library - SSE2 Optimizations
*
* Copyright 2024 Armin Novak <anovak@thincast.com>
* Copyright 2024 Thincast Technologies GmbH
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <winpr/sysinfo.h>
#include <freerdp/config.h>
#include <freerdp/log.h>
#include "../nsc_types.h"
#include "nsc_neon.h"
#define TAG FREERDP_TAG("codec.nsc.neon")
void nsc_init_neon(NSC_CONTEXT* context)
{
#if defined(WITH_NEON)
if (!IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
return;
WLog_WARN(TAG, "TODO: Implement neon optimized version of this function");
#endif
}

View File

@ -0,0 +1,28 @@
/**
* FreeRDP: A Remote Desktop Protocol Implementation
* NSCodec Library - NEON Optimizations
*
* Copyright 2012 Vic Lee
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FREERDP_LIB_CODEC_NSC_NEON_H
#define FREERDP_LIB_CODEC_NSC_NEON_H
#include <freerdp/codec/nsc.h>
#include <freerdp/api.h>
FREERDP_LOCAL void nsc_init_neon(NSC_CONTEXT* context);
#endif /* FREERDP_LIB_CODEC_NSC_NEON_H */

View File

@ -18,6 +18,12 @@
*/
#include <freerdp/config.h>
#include <freerdp/log.h>
#include "../rfx_types.h"
#include "rfx_neon.h"
#define TAG FREERDP_TAG("codec.rfx.neon")
#if defined(WITH_NEON)
@ -27,9 +33,6 @@
#include <arm_neon.h>
#include <winpr/sysinfo.h>
#include "rfx_types.h"
#include "rfx_neon.h"
/* rfx_decode_YCbCr_to_RGB_NEON code now resides in the primitives library. */
static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@ -517,9 +520,11 @@ static void rfx_dwt_2d_extrapolate_decode_neon(INT16* buffer, INT16* temp)
rfx_dwt_2d_decode_extrapolate_block_neon(&buffer[3007], temp, 2);
rfx_dwt_2d_decode_extrapolate_block_neon(&buffer[0], temp, 1);
}
#endif // WITH_NEON
void rfx_init_neon(RFX_CONTEXT* context)
{
#if defined(WITH_NEON)
if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
{
DEBUG_RFX("Using NEON optimizations");
@ -531,6 +536,7 @@ void rfx_init_neon(RFX_CONTEXT* context)
context->dwt_2d_decode = rfx_dwt_2d_decode_NEON;
context->dwt_2d_extrapolate_decode = rfx_dwt_2d_extrapolate_decode_neon;
}
#else
WINPR_UNUSED(context);
#endif
}
#endif // WITH_NEON

View File

@ -25,10 +25,4 @@
FREERDP_LOCAL void rfx_init_neon(RFX_CONTEXT* context);
#ifndef RFX_INIT_SIMD
#if defined(WITH_NEON)
#define RFX_INIT_SIMD(_rfx_context) rfx_init_neon(_rfx_context)
#endif
#endif
#endif /* FREERDP_LIB_CODEC_RFX_NEON_H */

View File

@ -34,7 +34,8 @@
#include "nsc_types.h"
#include "nsc_encode.h"
#include "nsc_sse2.h"
#include "sse/nsc_sse2.h"
#include "neon/nsc_neon.h"
#include <freerdp/log.h>
#define TAG FREERDP_TAG("codec.nsc")
@ -368,7 +369,8 @@ NSC_CONTEXT* nsc_context_new(void)
context->ColorLossLevel = 3;
context->ChromaSubsamplingLevel = 1;
/* init optimized methods */
NSC_INIT_SIMD(context);
nsc_init_sse2(context);
nsc_init_neon(context);
return context;
error:
WINPR_PRAGMA_DIAG_PUSH

View File

@ -47,18 +47,11 @@
#include "rfx_dwt.h"
#include "rfx_rlgr.h"
#include "rfx_sse2.h"
#include "rfx_neon.h"
#include "sse/rfx_sse2.h"
#include "neon/rfx_neon.h"
#define TAG FREERDP_TAG("codec")
#ifndef RFX_INIT_SIMD
#define RFX_INIT_SIMD(_rfx_context) \
do \
{ \
} while (0)
#endif
#define RFX_KEY "Software\\" FREERDP_VENDOR_STRING "\\" FREERDP_PRODUCT_STRING "\\RemoteFX"
/**
@ -338,7 +331,8 @@ RFX_CONTEXT* rfx_context_new_ex(BOOL encoder, UINT32 ThreadingFlags)
context->dwt_2d_encode = rfx_dwt_2d_encode;
context->rlgr_decode = rfx_rlgr_decode;
context->rlgr_encode = rfx_rlgr_encode;
RFX_INIT_SIMD(context);
rfx_init_sse2(context);
rfx_init_neon(context);
context->state = RFX_STATE_SEND_HEADERS;
context->expectedDataBlockType = WBT_FRAME_BEGIN;
return context;

View File

@ -18,7 +18,14 @@
*/
#include <freerdp/config.h>
#include <freerdp/log.h>
#include "../nsc_types.h"
#include "nsc_sse2.h"
#define TAG FREERDP_TAG("codec.nsc.sse2")
#if defined(WITH_SSE2)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -30,9 +37,6 @@
#include <winpr/crt.h>
#include <winpr/sysinfo.h>
#include "nsc_types.h"
#include "nsc_sse2.h"
static BOOL nsc_encode_argb_to_aycocg_sse2(NSC_CONTEXT* context, const BYTE* data, UINT32 scanline)
{
UINT16 y = 0;
@ -373,12 +377,17 @@ static BOOL nsc_encode_sse2(NSC_CONTEXT* context, const BYTE* data, UINT32 scanl
return TRUE;
}
#endif
void nsc_init_sse2(NSC_CONTEXT* context)
{
#if defined(WITH_SSE2)
if (!IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
return;
PROFILER_RENAME(context->priv->prof_nsc_encode, "nsc_encode_sse2")
context->encode = nsc_encode_sse2;
#else
WINPR_UNUSED(context);
#endif
}

View File

@ -25,10 +25,4 @@
FREERDP_LOCAL void nsc_init_sse2(NSC_CONTEXT* context);
#ifdef WITH_SSE2
#ifndef NSC_INIT_SIMD
#define NSC_INIT_SIMD(_context) nsc_init_sse2(_context)
#endif
#endif
#endif /* FREERDP_LIB_CODEC_NSC_SSE2_H */

View File

@ -19,7 +19,14 @@
*/
#include <freerdp/config.h>
#include <freerdp/log.h>
#include "../rfx_types.h"
#include "rfx_sse2.h"
#define TAG FREERDP_TAG("codec.rfx.sse2")
#if defined(WITH_SSE2)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -28,9 +35,6 @@
#include <xmmintrin.h>
#include <emmintrin.h>
#include "rfx_types.h"
#include "rfx_sse2.h"
#ifdef _MSC_VER
#define __attribute__(...)
#endif
@ -477,9 +481,11 @@ static void rfx_dwt_2d_encode_sse2(INT16* WINPR_RESTRICT buffer, INT16* WINPR_RE
rfx_dwt_2d_encode_block_sse2(buffer + 3072, dwt_buffer, 16);
rfx_dwt_2d_encode_block_sse2(buffer + 3840, dwt_buffer, 8);
}
#endif
void rfx_init_sse2(RFX_CONTEXT* context)
{
#if defined(WITH_SSE2)
if (!IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
return;
@ -491,4 +497,7 @@ void rfx_init_sse2(RFX_CONTEXT* context)
context->quantization_encode = rfx_quantization_encode_sse2;
context->dwt_2d_decode = rfx_dwt_2d_decode_sse2;
context->dwt_2d_encode = rfx_dwt_2d_encode_sse2;
#else
WINPR_UNUSED(context);
#endif
}

View File

@ -25,10 +25,4 @@
FREERDP_LOCAL void rfx_init_sse2(RFX_CONTEXT* context);
#ifdef WITH_SSE2
#ifndef RFX_INIT_SIMD
#define RFX_INIT_SIMD(_rfx_context) rfx_init_sse2(_rfx_context)
#endif
#endif
#endif /* FREERDP_LIB_CODEC_RFX_SSE2_H */

View File

@ -0,0 +1,97 @@
# primitives
set(PRIMITIVES_SRCS
prim_add.c
prim_andor.c
prim_alphaComp.c
prim_colors.c
prim_copy.c
prim_copy.h
prim_set.c
prim_shift.c
prim_sign.c
prim_YUV.c
prim_YCoCg.c
primitives.c
prim_internal.h)
if (WITH_SSE2 OR WITH_NEON)
set(PRIMITIVES_SSE2_SRCS
prim_colors_opt.c
prim_copy_sse.c
prim_copy_avx2.c
prim_set_opt.c)
set(PRIMITIVES_SSE3_SRCS
prim_add_opt.c
prim_alphaComp_opt.c
prim_andor_opt.c
prim_shift_opt.c)
set(PRIMITIVES_SSSE3_SRCS
prim_sign_opt.c
prim_YCoCg_opt.c)
if (WITH_SSE2)
set(PRIMITIVES_SSSE3_SRCS ${PRIMITIVES_SSSE3_SRCS}
prim_YUV_ssse3.c)
endif()
if (WITH_NEON)
set(PRIMITIVES_SSSE3_SRCS ${PRIMITIVES_SSSE3_SRCS}
prim_YUV_neon.c)
endif()
endif()
if (WITH_OPENCL)
set(PRIMITIVES_OPENCL_SRCS prim_YUV_opencl.c)
freerdp_include_directory_add(${OpenCL_INCLUDE_DIRS})
freerdp_library_add(OpenCL::OpenCL)
endif()
set(PRIMITIVES_OPT_SRCS
${PRIMITIVES_SSE2_SRCS}
${PRIMITIVES_SSE3_SRCS}
${PRIMITIVES_SSSE3_SRCS}
${PRIMITIVES_OPENCL_SRCS})
set(PRIMITIVES_SRCS ${PRIMITIVES_SRCS} ${PRIMITIVES_OPT_SRCS})
add_library(freerdp-primitives OBJECT
${PRIMITIVES_SRCS}
)
if(WITH_SSE2)
if(CMAKE_COMPILER_IS_GNUCC OR ${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
if(PRIMITIVES_SSE2_SRCS)
set_source_files_properties(${PRIMITIVES_SSE2_SRCS} PROPERTIES COMPILE_FLAGS "-msse2" )
endif()
if (PRIMITIVES_SSE3_SRCS)
set_source_files_properties(${PRIMITIVES_SSE3_SRCS} PROPERTIES COMPILE_FLAGS "-msse3" )
endif()
if (PRIMITIVES_SSSE3_SRCS)
set_source_files_properties(${PRIMITIVES_SSSE3_SRCS} PROPERTIES COMPILE_FLAGS "-mssse3" )
endif()
set_source_files_properties(prim_copy_sse.c PROPERTIES COMPILE_FLAGS "-msse4.1" )
set_source_files_properties(prim_copy_avx2.c PROPERTIES COMPILE_FLAGS "-mavx2" )
endif()
if(MSVC)
set_source_files_properties(${PRIMITIVES_OPT_SRCS} PROPERTIES COMPILE_FLAGS "/arch:SSE2")
endif()
elseif(WITH_NEON)
if(CMAKE_COMPILER_IS_GNUCC OR ${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
if (NOT MSVC_ARM64 AND NOT ARCH_ARM64)
set_source_files_properties(${PRIMITIVES_OPT_SRCS} PROPERTIES COMPILE_FLAGS "-mfpu=neon")
endif()
endif()
# TODO: Add MSVC equivalent
endif()
freerdp_object_library_add(freerdp-primitives)
if(BUILD_TESTING AND NOT WIN32 AND NOT APPLE)
add_subdirectory(test)
endif()

View File

@ -18,7 +18,15 @@
#include <string.h>
#include <freerdp/types.h>
#include <freerdp/primitives.h>
#include <freerdp/log.h>
#include "prim_internal.h"
#include "prim_copy.h"
#include "../codec/color.h"
#include <freerdp/codec/color.h>
#define TAG FREERDP_TAG("primitives.copy")
static primitives_t* generic = NULL;
@ -128,6 +136,247 @@ static pstatus_t general_copy_8u_AC4r(const BYTE* pSrc, INT32 srcStep, BYTE* pDs
return PRIMITIVES_SUCCESS;
}
static INLINE pstatus_t generic_image_copy_bgr24_bgrx32(BYTE* WINPR_RESTRICT pDstData,
UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
UINT32 nWidth, UINT32 nHeight,
const BYTE* WINPR_RESTRICT pSrcData,
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc,
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset,
SSIZE_T dstVMultiplier, SSIZE_T dstVOffset)
{
const SSIZE_T srcByte = 3;
const SSIZE_T dstByte = 4;
for (SSIZE_T y = 0; y < nHeight; y++)
{
const BYTE* WINPR_RESTRICT srcLine =
&pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
BYTE* WINPR_RESTRICT dstLine =
&pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
for (SSIZE_T x = 0; x < nWidth; x++)
{
dstLine[(x + nXDst) * dstByte + 0] = srcLine[(x + nXSrc) * srcByte + 0];
dstLine[(x + nXDst) * dstByte + 1] = srcLine[(x + nXSrc) * srcByte + 1];
dstLine[(x + nXDst) * dstByte + 2] = srcLine[(x + nXSrc) * srcByte + 2];
}
}
return PRIMITIVES_SUCCESS;
}
static INLINE pstatus_t generic_image_copy_bgrx32_bgrx32(
BYTE* WINPR_RESTRICT pDstData, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, UINT32 nWidth,
UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, UINT32 nSrcStep, UINT32 nXSrc,
UINT32 nYSrc, SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier,
SSIZE_T dstVOffset)
{
const SSIZE_T srcByte = 4;
const SSIZE_T dstByte = 4;
for (SSIZE_T y = 0; y < nHeight; y++)
{
const BYTE* WINPR_RESTRICT srcLine =
&pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
BYTE* WINPR_RESTRICT dstLine =
&pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
for (SSIZE_T x = 0; x < nWidth; x++)
{
dstLine[(x + nXDst) * dstByte + 0] = srcLine[(x + nXSrc) * srcByte + 0];
dstLine[(x + nXDst) * dstByte + 1] = srcLine[(x + nXSrc) * srcByte + 1];
dstLine[(x + nXDst) * dstByte + 2] = srcLine[(x + nXSrc) * srcByte + 2];
}
}
return PRIMITIVES_SUCCESS;
}
pstatus_t generic_image_copy_no_overlap_convert(
BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset)
{
const SSIZE_T srcByte = FreeRDPGetBytesPerPixel(SrcFormat);
const SSIZE_T dstByte = FreeRDPGetBytesPerPixel(DstFormat);
const UINT32 width = nWidth - nWidth % 8;
for (SSIZE_T y = 0; y < nHeight; y++)
{
const BYTE* WINPR_RESTRICT srcLine =
&pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
BYTE* WINPR_RESTRICT dstLine =
&pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
SSIZE_T x = 0;
WINPR_PRAGMA_UNROLL_LOOP
for (; x < width; x++)
{
const UINT32 color = FreeRDPReadColor_int(&srcLine[(x + nXSrc) * srcByte], SrcFormat);
const UINT32 dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette);
FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor);
}
for (; x < nWidth; x++)
{
const UINT32 color = FreeRDPReadColor_int(&srcLine[(x + nXSrc) * srcByte], SrcFormat);
const UINT32 dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette);
FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor);
}
}
return PRIMITIVES_SUCCESS;
}
pstatus_t generic_image_copy_no_overlap_memcpy(
BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset,
UINT32 flags)
{
const BOOL vSrcVFlip = (flags & FREERDP_FLIP_VERTICAL) ? TRUE : FALSE;
const SSIZE_T dstByte = FreeRDPGetBytesPerPixel(DstFormat);
const SSIZE_T srcByte = FreeRDPGetBytesPerPixel(SrcFormat);
const SSIZE_T copyDstWidth = nWidth * dstByte;
const SSIZE_T xSrcOffset = nXSrc * srcByte;
const SSIZE_T xDstOffset = nXDst * dstByte;
if (!vSrcVFlip && (nDstStep == nSrcStep) && (xSrcOffset == 0) && (xDstOffset == 0))
{
const void* src = &pSrcData[1ull * nYSrc * nSrcStep];
void* dst = &pDstData[1ull * nYDst * nDstStep];
memcpy(dst, src, 1ull * nDstStep * nHeight);
}
else
{
for (SSIZE_T y = 0; y < nHeight; y++)
{
const BYTE* WINPR_RESTRICT srcLine =
&pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
BYTE* WINPR_RESTRICT dstLine =
&pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
memcpy(&dstLine[xDstOffset], &srcLine[xSrcOffset], copyDstWidth);
}
}
return PRIMITIVES_SUCCESS;
}
static INLINE pstatus_t generic_image_copy_no_overlap_dst_alpha(
BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset)
{
WINPR_ASSERT(pDstData);
WINPR_ASSERT(pSrcData);
switch (SrcFormat)
{
case PIXEL_FORMAT_BGR24:
switch (DstFormat)
{
case PIXEL_FORMAT_BGRX32:
case PIXEL_FORMAT_BGRA32:
return generic_image_copy_bgr24_bgrx32(
pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep,
nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset);
default:
break;
}
break;
case PIXEL_FORMAT_BGRX32:
case PIXEL_FORMAT_BGRA32:
switch (DstFormat)
{
case PIXEL_FORMAT_BGRX32:
case PIXEL_FORMAT_BGRA32:
return generic_image_copy_bgrx32_bgrx32(
pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep,
nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset);
default:
break;
}
break;
default:
break;
}
return generic_image_copy_no_overlap_convert(
pDstData, DstFormat, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, SrcFormat, nSrcStep,
nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset);
}
static INLINE pstatus_t generic_image_copy_no_overlap_no_alpha(
BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset,
UINT32 flags)
{
if (FreeRDPAreColorFormatsEqualNoAlpha(SrcFormat, DstFormat))
return generic_image_copy_no_overlap_memcpy(pDstData, DstFormat, nDstStep, nXDst, nYDst,
nWidth, nHeight, pSrcData, SrcFormat, nSrcStep,
nXSrc, nYSrc, palette, srcVMultiplier,
srcVOffset, dstVMultiplier, dstVOffset, flags);
else
return generic_image_copy_no_overlap_convert(pDstData, DstFormat, nDstStep, nXDst, nYDst,
nWidth, nHeight, pSrcData, SrcFormat, nSrcStep,
nXSrc, nYSrc, palette, srcVMultiplier,
srcVOffset, dstVMultiplier, dstVOffset);
}
static pstatus_t generic_image_copy_no_overlap(BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat,
UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
UINT32 nWidth, UINT32 nHeight,
const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc,
const gdiPalette* WINPR_RESTRICT palette,
UINT32 flags)
{
const BOOL vSrcVFlip = (flags & FREERDP_FLIP_VERTICAL) ? TRUE : FALSE;
SSIZE_T srcVOffset = 0;
SSIZE_T srcVMultiplier = 1;
SSIZE_T dstVOffset = 0;
SSIZE_T dstVMultiplier = 1;
if ((nWidth == 0) || (nHeight == 0))
return PRIMITIVES_SUCCESS;
if ((nHeight > INT32_MAX) || (nWidth > INT32_MAX))
return -1;
if (!pDstData || !pSrcData)
return -1;
if (nDstStep == 0)
nDstStep = nWidth * FreeRDPGetBytesPerPixel(DstFormat);
if (nSrcStep == 0)
nSrcStep = nWidth * FreeRDPGetBytesPerPixel(SrcFormat);
if (vSrcVFlip)
{
srcVOffset = (nHeight - 1ll) * nSrcStep;
srcVMultiplier = -1;
}
if (((flags & FREERDP_KEEP_DST_ALPHA) != 0) && FreeRDPColorHasAlpha(DstFormat))
return generic_image_copy_no_overlap_dst_alpha(
pDstData, DstFormat, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, SrcFormat,
nSrcStep, nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset, dstVMultiplier,
dstVOffset);
else
return generic_image_copy_no_overlap_no_alpha(
pDstData, DstFormat, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, SrcFormat,
nSrcStep, nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset,
flags);
return PRIMITIVES_SUCCESS;
}
/* ------------------------------------------------------------------------- */
void primitives_init_copy(primitives_t* prims)
{
@ -136,6 +385,7 @@ void primitives_init_copy(primitives_t* prims)
prims->copy_8u_AC4r = general_copy_8u_AC4r;
/* This is just an alias with void* parameters */
prims->copy = (__copy_t)(prims->copy_8u);
prims->copy_no_overlap = generic_image_copy_no_overlap;
}
#if defined(WITH_SSE2) || defined(WITH_NEON)
@ -153,5 +403,7 @@ void primitives_init_copy_opt(primitives_t* prims)
*/
/* This is just an alias with void* parameters */
prims->copy = (__copy_t)(prims->copy_8u);
primitives_init_copy_sse(prims);
primitives_init_copy_avx2(prims);
}
#endif

View File

@ -0,0 +1,42 @@
/**
* FreeRDP: A Remote Desktop Protocol Implementation
* Primitives copy
*
* Copyright 2024 Armin Novak <anovak@thincast.com>
* Copyright 2024 Thincast Technologies GmbH
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FREERDP_LIB_PRIM_COPY_H
#define FREERDP_LIB_PRIM_COPY_H
#include <winpr/wtypes.h>
#include <freerdp/primitives.h>
pstatus_t generic_image_copy_no_overlap_convert(
BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset);
pstatus_t generic_image_copy_no_overlap_memcpy(
BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset,
UINT32 flags);
extern void primitives_init_copy_sse(primitives_t* prims);
extern void primitives_init_copy_avx2(primitives_t* prims);
#endif

View File

@ -0,0 +1,276 @@
/* FreeRDP: A Remote Desktop Protocol Client
* Copy operations.
* vi:ts=4 sw=4:
*
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
#include <winpr/sysinfo.h>
#include <freerdp/config.h>
#include <string.h>
#include <freerdp/types.h>
#include <freerdp/primitives.h>
#include <freerdp/log.h>
#include "prim_internal.h"
#include "prim_copy.h"
#include "../codec/color.h"
#include <freerdp/codec/color.h>
#define TAG FREERDP_TAG("primitives.copy")
#if defined(WITH_SSE2)
#include <emmintrin.h>
#include <immintrin.h>
static INLINE pstatus_t avx2_image_copy_bgr24_bgrx32(BYTE* WINPR_RESTRICT pDstData, UINT32 nDstStep,
UINT32 nXDst, UINT32 nYDst, UINT32 nWidth,
UINT32 nHeight,
const BYTE* WINPR_RESTRICT pSrcData,
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc,
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset,
SSIZE_T dstVMultiplier, SSIZE_T dstVOffset)
{
const SSIZE_T srcByte = 3;
const SSIZE_T dstByte = 4;
const __m256i mask = _mm256_set_epi32(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
const SSIZE_T rem = nWidth % 8;
const SSIZE_T width = nWidth - rem;
for (SSIZE_T y = 0; y < nHeight; y++)
{
const BYTE* WINPR_RESTRICT srcLine =
&pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
BYTE* WINPR_RESTRICT dstLine =
&pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
SSIZE_T x = 0;
for (; x < width; x += 8)
{
const __m256i* src = (const __m256i*)&srcLine[(x + nXSrc) * srcByte];
__m256i* dst = (__m256i*)&dstLine[(x + nXDst) * dstByte];
const __m256i s0 = _mm256_loadu_si256(src);
const __m256i s1 = _mm256_loadu_si256(dst);
const __m256i s2 = _mm256_shuffle_epi8(s1, mask);
__m256i d0 = _mm256_blendv_epi8(s2, s0, mask);
_mm256_storeu_si256(dst, d0);
}
for (; x < nWidth; x++)
{
const BYTE* src = &srcLine[(x + nXSrc) * srcByte];
BYTE* dst = &dstLine[(x + nXDst) * dstByte];
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
}
}
return PRIMITIVES_SUCCESS;
}
static INLINE pstatus_t avx2_image_copy_bgrx32_bgrx32(BYTE* WINPR_RESTRICT pDstData,
UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
UINT32 nWidth, UINT32 nHeight,
const BYTE* WINPR_RESTRICT pSrcData,
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc,
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset,
SSIZE_T dstVMultiplier, SSIZE_T dstVOffset)
{
const SSIZE_T srcByte = 4;
const SSIZE_T dstByte = 4;
const __m256i mask =
_mm256_setr_epi8(0xFF, 0xFF, 0xFF, 0x00, 0xFF, 0xFF, 0xFF, 0x00, 0xFF, 0xFF, 0xFF, 0x00,
0xFF, 0xFF, 0xFF, 0x00, 0xFF, 0xFF, 0xFF, 0x00, 0xFF, 0xFF, 0xFF, 0x00,
0xFF, 0xFF, 0xFF, 0x00, 0xFF, 0xFF, 0xFF, 0x00);
const SSIZE_T rem = nWidth % 8;
const SSIZE_T width = nWidth - rem;
for (SSIZE_T y = 0; y < nHeight; y++)
{
const BYTE* WINPR_RESTRICT srcLine =
&pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
BYTE* WINPR_RESTRICT dstLine =
&pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
SSIZE_T x = 0;
for (; x < width; x += 8)
{
const __m256i* src = (const __m256i*)&srcLine[(x + nXSrc) * srcByte];
__m256i* dst = (__m256i*)&dstLine[(x + nXDst) * dstByte];
const __m256i s0 = _mm256_loadu_si256(src);
const __m256i s1 = _mm256_loadu_si256(dst);
__m256i d0 = _mm256_blendv_epi8(s1, s0, mask);
_mm256_storeu_si256(dst, d0);
}
for (; x < nWidth; x++)
{
const BYTE* src = &srcLine[(x + nXSrc) * srcByte];
BYTE* dst = &dstLine[(x + nXDst) * dstByte];
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
}
}
return PRIMITIVES_SUCCESS;
}
static pstatus_t avx2_image_copy_no_overlap_dst_alpha(
BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset)
{
WINPR_ASSERT(pDstData);
WINPR_ASSERT(pSrcData);
switch (SrcFormat)
{
case PIXEL_FORMAT_BGR24:
switch (DstFormat)
{
case PIXEL_FORMAT_BGRX32:
case PIXEL_FORMAT_BGRA32:
return avx2_image_copy_bgr24_bgrx32(
pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep,
nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset);
default:
break;
}
break;
case PIXEL_FORMAT_BGRX32:
case PIXEL_FORMAT_BGRA32:
switch (DstFormat)
{
case PIXEL_FORMAT_BGRX32:
case PIXEL_FORMAT_BGRA32:
return avx2_image_copy_bgrx32_bgrx32(
pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep,
nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset);
default:
break;
}
break;
default:
break;
}
WLog_DBG(TAG, "unsupported format src %s --> dst %s", FreeRDPGetColorFormatName(SrcFormat),
FreeRDPGetColorFormatName(DstFormat));
return -1;
}
static INLINE pstatus_t avx2_image_copy_no_overlap_convert(
BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset)
{
const SSIZE_T srcByte = FreeRDPGetBytesPerPixel(SrcFormat);
const SSIZE_T dstByte = FreeRDPGetBytesPerPixel(DstFormat);
const UINT32 width = nWidth - nWidth % 8;
for (SSIZE_T y = 0; y < nHeight; y++)
{
const BYTE* WINPR_RESTRICT srcLine =
&pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
BYTE* WINPR_RESTRICT dstLine =
&pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
SSIZE_T x = 0;
WINPR_PRAGMA_UNROLL_LOOP
for (; x < width; x++)
{
const UINT32 color = FreeRDPReadColor_int(&srcLine[(x + nXSrc) * srcByte], SrcFormat);
const UINT32 dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette);
FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor);
}
for (; x < nWidth; x++)
{
const UINT32 color = FreeRDPReadColor_int(&srcLine[(x + nXSrc) * srcByte], SrcFormat);
const UINT32 dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette);
FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor);
}
}
return PRIMITIVES_SUCCESS;
}
static pstatus_t avx2_image_copy_no_overlap(BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat,
UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
UINT32 nWidth, UINT32 nHeight,
const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc,
const gdiPalette* WINPR_RESTRICT palette, UINT32 flags)
{
const BOOL vSrcVFlip = (flags & FREERDP_FLIP_VERTICAL) ? TRUE : FALSE;
SSIZE_T srcVOffset = 0;
SSIZE_T srcVMultiplier = 1;
SSIZE_T dstVOffset = 0;
SSIZE_T dstVMultiplier = 1;
if ((nWidth == 0) || (nHeight == 0))
return PRIMITIVES_SUCCESS;
if ((nHeight > INT32_MAX) || (nWidth > INT32_MAX))
return -1;
if (!pDstData || !pSrcData)
return -1;
if (nDstStep == 0)
nDstStep = nWidth * FreeRDPGetBytesPerPixel(DstFormat);
if (nSrcStep == 0)
nSrcStep = nWidth * FreeRDPGetBytesPerPixel(SrcFormat);
if (vSrcVFlip)
{
srcVOffset = (nHeight - 1ll) * nSrcStep;
srcVMultiplier = -1;
}
if (((flags & FREERDP_KEEP_DST_ALPHA) != 0) && FreeRDPColorHasAlpha(DstFormat))
return avx2_image_copy_no_overlap_dst_alpha(pDstData, DstFormat, nDstStep, nXDst, nYDst,
nWidth, nHeight, pSrcData, SrcFormat, nSrcStep,
nXSrc, nYSrc, palette, srcVMultiplier,
srcVOffset, dstVMultiplier, dstVOffset);
else if (FreeRDPAreColorFormatsEqualNoAlpha(SrcFormat, DstFormat))
return generic_image_copy_no_overlap_memcpy(pDstData, DstFormat, nDstStep, nXDst, nYDst,
nWidth, nHeight, pSrcData, SrcFormat, nSrcStep,
nXSrc, nYSrc, palette, srcVMultiplier,
srcVOffset, dstVMultiplier, dstVOffset, flags);
else
return avx2_image_copy_no_overlap_convert(pDstData, DstFormat, nDstStep, nXDst, nYDst,
nWidth, nHeight, pSrcData, SrcFormat, nSrcStep,
nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset,
dstVMultiplier, dstVOffset);
}
#endif
/* ------------------------------------------------------------------------- */
void primitives_init_copy_avx2(primitives_t* prims)
{
#if defined(WITH_SSE2)
if (IsProcessorFeaturePresent(PF_AVX2_INSTRUCTIONS_AVAILABLE))
{
prims->copy_no_overlap = avx2_image_copy_no_overlap;
}
#else
WINPR_UNUSED(prims);
#endif
}

View File

@ -0,0 +1,274 @@
/* FreeRDP: A Remote Desktop Protocol Client
* Copy operations.
* vi:ts=4 sw=4:
*
* (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
#include <winpr/sysinfo.h>
#include <freerdp/config.h>
#include <string.h>
#include <freerdp/types.h>
#include <freerdp/primitives.h>
#include <freerdp/log.h>
#include "prim_internal.h"
#include "prim_copy.h"
#include "../codec/color.h"
#include <freerdp/codec/color.h>
#define TAG FREERDP_TAG("primitives.copy")
#if defined(WITH_SSE2)
#include <emmintrin.h>
#include <immintrin.h>
static INLINE pstatus_t sse_image_copy_bgr24_bgrx32(BYTE* WINPR_RESTRICT pDstData, UINT32 nDstStep,
UINT32 nXDst, UINT32 nYDst, UINT32 nWidth,
UINT32 nHeight,
const BYTE* WINPR_RESTRICT pSrcData,
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc,
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset,
SSIZE_T dstVMultiplier, SSIZE_T dstVOffset)
{
const SSIZE_T srcByte = 3;
const SSIZE_T dstByte = 4;
const __m128i mask = _mm_set_epi32(0xFF, 0xFF, 0xFF, 0xFF);
const SSIZE_T rem = nWidth % 4;
const SSIZE_T width = nWidth - rem;
for (SSIZE_T y = 0; y < nHeight; y++)
{
const BYTE* WINPR_RESTRICT srcLine =
&pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
BYTE* WINPR_RESTRICT dstLine =
&pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
SSIZE_T x = 0;
for (; x < width; x += 4)
{
const __m128i* src = (const __m128i*)&srcLine[(x + nXSrc) * srcByte];
__m128i* dst = (__m128i*)&dstLine[(x + nXDst) * dstByte];
const __m128i s0 = _mm_loadu_si128(src);
const __m128i s1 = _mm_loadu_si128(dst);
const __m128i s2 = _mm_shuffle_epi8(s1, mask);
__m128i d0 = _mm_blendv_epi8(s2, s0, mask);
_mm_storeu_si128(dst, d0);
}
for (; x < nWidth; x++)
{
const BYTE* src = &srcLine[(x + nXSrc) * srcByte];
BYTE* dst = &dstLine[(x + nXDst) * dstByte];
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
}
}
return PRIMITIVES_SUCCESS;
}
static INLINE pstatus_t sse_image_copy_bgrx32_bgrx32(BYTE* WINPR_RESTRICT pDstData, UINT32 nDstStep,
UINT32 nXDst, UINT32 nYDst, UINT32 nWidth,
UINT32 nHeight,
const BYTE* WINPR_RESTRICT pSrcData,
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc,
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset,
SSIZE_T dstVMultiplier, SSIZE_T dstVOffset)
{
const SSIZE_T srcByte = 4;
const SSIZE_T dstByte = 4;
const __m128i mask = _mm_setr_epi8(0xFF, 0xFF, 0xFF, 0x00, 0xFF, 0xFF, 0xFF, 0x00, 0xFF, 0xFF,
0xFF, 0x00, 0xFF, 0xFF, 0xFF, 0x00);
const SSIZE_T rem = nWidth % 4;
const SSIZE_T width = nWidth - rem;
for (SSIZE_T y = 0; y < nHeight; y++)
{
const BYTE* WINPR_RESTRICT srcLine =
&pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
BYTE* WINPR_RESTRICT dstLine =
&pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
SSIZE_T x = 0;
for (; x < width; x += 4)
{
const __m128i* src = (const __m128i*)&srcLine[(x + nXSrc) * srcByte];
__m128i* dst = (__m128i*)&dstLine[(x + nXDst) * dstByte];
const __m128i s0 = _mm_loadu_si128(src);
const __m128i s1 = _mm_loadu_si128(dst);
__m128i d0 = _mm_blendv_epi8(s1, s0, mask);
_mm_storeu_si128(dst, d0);
}
for (; x < nWidth; x++)
{
const BYTE* src = &srcLine[(x + nXSrc) * srcByte];
BYTE* dst = &dstLine[(x + nXDst) * dstByte];
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
}
}
return PRIMITIVES_SUCCESS;
}
static pstatus_t sse_image_copy_no_overlap_dst_alpha(
BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset)
{
WINPR_ASSERT(pDstData);
WINPR_ASSERT(pSrcData);
switch (SrcFormat)
{
case PIXEL_FORMAT_BGR24:
switch (DstFormat)
{
case PIXEL_FORMAT_BGRX32:
case PIXEL_FORMAT_BGRA32:
return sse_image_copy_bgr24_bgrx32(
pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep,
nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset);
default:
break;
}
break;
case PIXEL_FORMAT_BGRX32:
case PIXEL_FORMAT_BGRA32:
switch (DstFormat)
{
case PIXEL_FORMAT_BGRX32:
case PIXEL_FORMAT_BGRA32:
return sse_image_copy_bgrx32_bgrx32(
pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep,
nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset);
default:
break;
}
break;
default:
break;
}
WLog_DBG(TAG, "unsupported format src %s --> dst %s", FreeRDPGetColorFormatName(SrcFormat),
FreeRDPGetColorFormatName(DstFormat));
return -1;
}
static INLINE pstatus_t sse_image_copy_no_overlap_convert(
BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset)
{
const SSIZE_T srcByte = FreeRDPGetBytesPerPixel(SrcFormat);
const SSIZE_T dstByte = FreeRDPGetBytesPerPixel(DstFormat);
const UINT32 width = nWidth - nWidth % 8;
for (SSIZE_T y = 0; y < nHeight; y++)
{
const BYTE* WINPR_RESTRICT srcLine =
&pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
BYTE* WINPR_RESTRICT dstLine =
&pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
SSIZE_T x = 0;
WINPR_PRAGMA_UNROLL_LOOP
for (; x < width; x++)
{
const UINT32 color = FreeRDPReadColor(&srcLine[(x + nXSrc) * srcByte], SrcFormat);
const UINT32 dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette);
FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor);
}
for (; x < nWidth; x++)
{
const UINT32 color = FreeRDPReadColor(&srcLine[(x + nXSrc) * srcByte], SrcFormat);
const UINT32 dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette);
FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor);
}
}
return PRIMITIVES_SUCCESS;
}
static pstatus_t sse_image_copy_no_overlap(BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat,
UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
UINT32 nWidth, UINT32 nHeight,
const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc,
const gdiPalette* WINPR_RESTRICT palette, UINT32 flags)
{
const BOOL vSrcVFlip = (flags & FREERDP_FLIP_VERTICAL) ? TRUE : FALSE;
SSIZE_T srcVOffset = 0;
SSIZE_T srcVMultiplier = 1;
SSIZE_T dstVOffset = 0;
SSIZE_T dstVMultiplier = 1;
if ((nWidth == 0) || (nHeight == 0))
return PRIMITIVES_SUCCESS;
if ((nHeight > INT32_MAX) || (nWidth > INT32_MAX))
return -1;
if (!pDstData || !pSrcData)
return -1;
if (nDstStep == 0)
nDstStep = nWidth * FreeRDPGetBytesPerPixel(DstFormat);
if (nSrcStep == 0)
nSrcStep = nWidth * FreeRDPGetBytesPerPixel(SrcFormat);
if (vSrcVFlip)
{
srcVOffset = (nHeight - 1ll) * nSrcStep;
srcVMultiplier = -1;
}
if (((flags & FREERDP_KEEP_DST_ALPHA) != 0) && FreeRDPColorHasAlpha(DstFormat))
return sse_image_copy_no_overlap_dst_alpha(pDstData, DstFormat, nDstStep, nXDst, nYDst,
nWidth, nHeight, pSrcData, SrcFormat, nSrcStep,
nXSrc, nYSrc, palette, srcVMultiplier,
srcVOffset, dstVMultiplier, dstVOffset);
else if (FreeRDPAreColorFormatsEqualNoAlpha(SrcFormat, DstFormat))
return generic_image_copy_no_overlap_memcpy(pDstData, DstFormat, nDstStep, nXDst, nYDst,
nWidth, nHeight, pSrcData, SrcFormat, nSrcStep,
nXSrc, nYSrc, palette, srcVMultiplier,
srcVOffset, dstVMultiplier, dstVOffset, flags);
else
return sse_image_copy_no_overlap_convert(pDstData, DstFormat, nDstStep, nXDst, nYDst,
nWidth, nHeight, pSrcData, SrcFormat, nSrcStep,
nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset,
dstVMultiplier, dstVOffset);
}
#endif
/* ------------------------------------------------------------------------- */
void primitives_init_copy_sse(primitives_t* prims)
{
#if defined(WITH_SSE2)
if (IsProcessorFeaturePresent(PF_SSE4_1_INSTRUCTIONS_AVAILABLE))
{
prims->copy_no_overlap = sse_image_copy_no_overlap;
}
#else
WINPR_UNUSED(prims);
#endif
}

View File

@ -40,6 +40,7 @@
#define WINPR_PRAGMA_DIAG_IGNORED_MISMATCHED_DEALLOC \
_Pragma("clang diagnostic ignored \"-Wmismatched-dealloc\"")
#define WINPR_PRAGMA_DIAG_POP _Pragma("clang diagnostic pop")
#define WINPR_PRAGMA_UNROLL_LOOP _Pragma("clang loop vectorize_width(8) interleave_count(8)")
#elif defined(__GNUC__)
#define WINPR_PRAGMA_DIAG_PUSH _Pragma("GCC diagnostic push")
#define WINPR_PRAGMA_DIAG_IGNORED_PEDANTIC _Pragma("GCC diagnostic ignored \"-Wpedantic\"")
@ -59,6 +60,7 @@
#define WINPR_PRAGMA_DIAG_IGNORED_MISMATCHED_DEALLOC \
_Pragma("GCC diagnostic ignored \"-Wmismatched-dealloc\"")
#define WINPR_PRAGMA_DIAG_POP _Pragma("GCC diagnostic pop")
#define WINPR_PRAGMA_UNROLL_LOOP _Pragma("GCC unroll 8") _Pragma("GCC ivdep")
#else
#define WINPR_PRAGMA_DIAG_PUSH
#define WINPR_PRAGMA_DIAG_IGNORED_PEDANTIC
@ -70,6 +72,12 @@
#define WINPR_PRAGMA_DIAG_IGNORED_FORMAT_SECURITY
#define WINPR_PRAGMA_DIAG_IGNORED_MISMATCHED_DEALLOC
#define WINPR_PRAGMA_DIAG_POP
#define WINPR_PRAGMA_UNROLL_LOOP
#endif
#if defined(MSVC)
#undef WINPR_PRAGMA_UNROLL_LOOP
#define WINPR_PRAGMA_UNROLL_LOOP _Pragma("loop ( ivdep )")
#endif
WINPR_PRAGMA_DIAG_PUSH

View File

@ -227,6 +227,19 @@ extern "C"
#define PF_ARM_64BIT_LOADSTORE_ATOMIC 25
#define PF_ARM_EXTERNAL_CACHE_AVAILABLE 26
#define PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE 27
#define PF_SSSE3_INSTRUCTIONS_AVAILABLE 36
#define PF_SSE4_1_INSTRUCTIONS_AVAILABLE 37
#define PF_SSE4_2_INSTRUCTIONS_AVAILABLE 38
#define PF_AVX_INSTRUCTIONS_AVAILABLE 39
#define PF_AVX2_INSTRUCTIONS_AVAILABLE 40
#define PF_AVX512F_INSTRUCTIONS_AVAILABLE 41
#define PF_ARM_V8_INSTRUCTIONS_AVAILABLE 29
#define PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE 30
#define PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE 31
#define PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE 34
#define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE 43
#define PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE 44
#define PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE 45
#define PF_ARM_V4 0x80000001
#define PF_ARM_V5 0x80000002

View File

@ -788,6 +788,7 @@ BOOL IsProcessorFeaturePresent(DWORD ProcessorFeature)
return features & ANDROID_CPU_ARM_FEATURE_NEON;
default:
WLog_WARN(TAG, "feature 0x%08" PRIx32 " check not implemented", ProcessorFeature);
return FALSE;
}
@ -857,8 +858,15 @@ BOOL IsProcessorFeaturePresent(DWORD ProcessorFeature)
ret = TRUE;
break;
case PF_ARM_V8_INSTRUCTIONS_AVAILABLE:
case PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE:
case PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE:
case PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE:
case PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE:
case PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE:
case PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE:
default:
WLog_WARN(TAG, "feature 0x%08" PRIx32 " check not implemented", ProcessorFeature);
break;
}
@ -872,7 +880,15 @@ BOOL IsProcessorFeaturePresent(DWORD ProcessorFeature)
ret = TRUE;
#endif
break;
case PF_ARM_V8_INSTRUCTIONS_AVAILABLE:
case PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE:
case PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE:
case PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE:
case PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE:
case PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE:
case PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE:
default:
WLog_WARN(TAG, "feature 0x%08" PRIx32 " check not implemented", ProcessorFeature);
break;
}
@ -912,12 +928,29 @@ BOOL IsProcessorFeaturePresent(DWORD ProcessorFeature)
break;
case PF_SSE3_INSTRUCTIONS_AVAILABLE:
if (c & C_BIT_SSE3)
ret = TRUE;
ret = __builtin_cpu_supports("sse3");
break;
case PF_SSSE3_INSTRUCTIONS_AVAILABLE:
ret = __builtin_cpu_supports("ssse3");
break;
case PF_SSE4_1_INSTRUCTIONS_AVAILABLE:
ret = __builtin_cpu_supports("sse4.1");
break;
case PF_SSE4_2_INSTRUCTIONS_AVAILABLE:
ret = __builtin_cpu_supports("sse4.2");
break;
case PF_AVX_INSTRUCTIONS_AVAILABLE:
ret = __builtin_cpu_supports("avx");
break;
case PF_AVX2_INSTRUCTIONS_AVAILABLE:
ret = __builtin_cpu_supports("avx2");
break;
case PF_AVX512F_INSTRUCTIONS_AVAILABLE:
ret = __builtin_cpu_supports("avx512f");
break;
default:
WLog_WARN(TAG, "feature 0x%08" PRIx32 " check not implemented", ProcessorFeature);
break;
}