From 896ea3c44577cb03aee00caaf1ba59528c2985af Mon Sep 17 00:00:00 2001 From: akallabeth Date: Tue, 25 Jun 2024 09:40:09 +0200 Subject: [PATCH 1/5] [primitives,codec] guard SSE code with platform SSE optimized code might be used in multiarch/universal builds. So not only guard with WITH_SSE2 but also with architecture defines from winpr/platform.h --- libfreerdp/codec/sse/nsc_sse2.c | 9 ++++++++- libfreerdp/codec/sse/rfx_sse2.c | 9 ++++++++- libfreerdp/primitives/prim_internal.h | 13 ++++++++++--- libfreerdp/primitives/sse/prim_YCoCg_ssse3.c | 14 ++++++-------- libfreerdp/primitives/sse/prim_YUV_ssse3.c | 4 ++-- libfreerdp/primitives/sse/prim_add_sse3.c | 13 +++++-------- libfreerdp/primitives/sse/prim_alphaComp_sse3.c | 12 +++++------- libfreerdp/primitives/sse/prim_andor_sse3.c | 13 ++++++------- libfreerdp/primitives/sse/prim_colors_sse2.c | 12 +++++------- libfreerdp/primitives/sse/prim_copy_avx2.c | 5 ++--- libfreerdp/primitives/sse/prim_copy_sse4_1.c | 5 ++--- libfreerdp/primitives/sse/prim_set_sse2.c | 14 +++++--------- libfreerdp/primitives/sse/prim_shift_sse3.c | 13 +++++-------- libfreerdp/primitives/sse/prim_sign_ssse3.c | 15 +++++++-------- winpr/include/winpr/platform.h | 14 +++++++++++--- 15 files changed, 87 insertions(+), 78 deletions(-) diff --git a/libfreerdp/codec/sse/nsc_sse2.c b/libfreerdp/codec/sse/nsc_sse2.c index 1c88139f8..327f6ed99 100644 --- a/libfreerdp/codec/sse/nsc_sse2.c +++ b/libfreerdp/codec/sse/nsc_sse2.c @@ -17,6 +17,7 @@ * limitations under the License. */ +#include #include #include @@ -26,6 +27,12 @@ #define TAG FREERDP_TAG("codec.nsc.sse2") #if defined(WITH_SSE2) +#if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64) || defined(_M_IX86_AMD64) +#define SSE2_ENABLED +#endif +#endif + +#if defined(SSE2_ENABLED) #include #include #include @@ -381,7 +388,7 @@ static BOOL nsc_encode_sse2(NSC_CONTEXT* context, const BYTE* data, UINT32 scanl void nsc_init_sse2(NSC_CONTEXT* context) { -#if defined(WITH_SSE2) +#if defined(SSE2_ENABLED) if (!IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) return; diff --git a/libfreerdp/codec/sse/rfx_sse2.c b/libfreerdp/codec/sse/rfx_sse2.c index 4c83a3b8a..4762e7aad 100644 --- a/libfreerdp/codec/sse/rfx_sse2.c +++ b/libfreerdp/codec/sse/rfx_sse2.c @@ -18,6 +18,7 @@ * limitations under the License. */ +#include #include #include @@ -27,6 +28,12 @@ #define TAG FREERDP_TAG("codec.rfx.sse2") #if defined(WITH_SSE2) +#if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64) || defined(_M_IX86_AMD64) +#define SSE2_ENABLED +#endif +#endif + +#if defined(SSE2_ENABLED) #include #include #include @@ -485,7 +492,7 @@ static void rfx_dwt_2d_encode_sse2(INT16* WINPR_RESTRICT buffer, INT16* WINPR_RE void rfx_init_sse2(RFX_CONTEXT* context) { -#if defined(WITH_SSE2) +#if defined(SSE2_ENABLED) if (!IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE)) return; diff --git a/libfreerdp/primitives/prim_internal.h b/libfreerdp/primitives/prim_internal.h index 94556e4b3..70d13192a 100644 --- a/libfreerdp/primitives/prim_internal.h +++ b/libfreerdp/primitives/prim_internal.h @@ -17,6 +17,7 @@ #ifndef FREERDP_LIB_PRIM_INTERNAL_H #define FREERDP_LIB_PRIM_INTERNAL_H +#include #include #include @@ -33,15 +34,21 @@ #endif #endif -#if defined(WITH_SSE2) || defined(WITH_NEON) || defined(WITH_OPENCL) +#if defined(WITH_SSE2) +#if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64) || defined(_M_IX86_AMD64) +#define SSE2_ENABLED +#endif +#endif + +#if defined(SSE2_ENABLED) || defined(WITH_NEON) || defined(WITH_OPENCL) #define HAVE_OPTIMIZED_PRIMITIVES 1 #endif -#if defined(WITH_SSE2) || defined(WITH_NEON) +#if defined(SSE2_ENABLED) || defined(WITH_NEON) #define HAVE_CPU_OPTIMIZED_PRIMITIVES 1 #endif -#if defined(WITH_SSE2) +#if defined(SSE2_ENABLED) /* Use lddqu for unaligned; load for 16-byte aligned. */ #define LOAD_SI128(_ptr_) \ (((const ULONG_PTR)(_ptr_)&0x0f) ? _mm_lddqu_si128((const __m128i*)(_ptr_)) \ diff --git a/libfreerdp/primitives/sse/prim_YCoCg_ssse3.c b/libfreerdp/primitives/sse/prim_YCoCg_ssse3.c index e87e10eda..fb739f66f 100644 --- a/libfreerdp/primitives/sse/prim_YCoCg_ssse3.c +++ b/libfreerdp/primitives/sse/prim_YCoCg_ssse3.c @@ -25,15 +25,13 @@ #include "prim_YCoCg.h" -#ifdef WITH_SSE2 -#include -#include -#endif - #include "prim_internal.h" #include "prim_templates.h" -#ifdef WITH_SSE2 +#if defined(SSE2_ENABLED) +#include +#include + static primitives_t* generic = NULL; /* ------------------------------------------------------------------------- */ @@ -436,12 +434,12 @@ static pstatus_t ssse3_YCoCgRToRGB_8u_AC4R(const BYTE* WINPR_RESTRICT pSrc, INT3 } } -#endif /* WITH_SSE2 */ +#endif /* ------------------------------------------------------------------------- */ void primitives_init_YCoCg_ssse3(primitives_t* WINPR_RESTRICT prims) { -#if defined(WITH_SSE2) +#if defined(SSE2_ENABLED) generic = primitives_get_generic(); primitives_init_YCoCg(prims); diff --git a/libfreerdp/primitives/sse/prim_YUV_ssse3.c b/libfreerdp/primitives/sse/prim_YUV_ssse3.c index 5ca0f6339..063a27a4b 100644 --- a/libfreerdp/primitives/sse/prim_YUV_ssse3.c +++ b/libfreerdp/primitives/sse/prim_YUV_ssse3.c @@ -31,7 +31,7 @@ #include "prim_internal.h" #include "prim_YUV.h" -#if defined(WITH_SSE2) +#if defined(SSE2_ENABLED) #include #include @@ -1498,7 +1498,7 @@ static pstatus_t ssse3_YUV420CombineToYUV444(avc444_frame_type type, void primitives_init_YUV_ssse3(primitives_t* WINPR_RESTRICT prims) { -#if defined(WITH_SSE2) +#if defined(SSE2_ENABLED) generic = primitives_get_generic(); primitives_init_YUV(prims); diff --git a/libfreerdp/primitives/sse/prim_add_sse3.c b/libfreerdp/primitives/sse/prim_add_sse3.c index 9d368b258..e235aee8c 100644 --- a/libfreerdp/primitives/sse/prim_add_sse3.c +++ b/libfreerdp/primitives/sse/prim_add_sse3.c @@ -22,15 +22,13 @@ #include "prim_add.h" -#ifdef WITH_SSE2 -#include -#include -#endif /* WITH_SSE2 */ - #include "prim_internal.h" #include "prim_templates.h" -#ifdef WITH_SSE2 +#if defined(SSE2_ENABLED) +#include +#include + static primitives_t* generic = NULL; /* ------------------------------------------------------------------------- */ @@ -172,13 +170,12 @@ static pstatus_t sse3_add_16s_inplace(INT16* WINPR_RESTRICT pSrcDst1, return PRIMITIVES_SUCCESS; } - #endif /* ------------------------------------------------------------------------- */ void primitives_init_add_sse3(primitives_t* WINPR_RESTRICT prims) { -#if defined(WITH_SSE2) +#if defined(SSE2_ENABLED) generic = primitives_get_generic(); primitives_init_add(prims); diff --git a/libfreerdp/primitives/sse/prim_alphaComp_sse3.c b/libfreerdp/primitives/sse/prim_alphaComp_sse3.c index beee2cc4c..61537bcb9 100644 --- a/libfreerdp/primitives/sse/prim_alphaComp_sse3.c +++ b/libfreerdp/primitives/sse/prim_alphaComp_sse3.c @@ -28,15 +28,13 @@ #include "prim_alphaComp.h" -#ifdef WITH_SSE2 -#include -#include -#endif /* WITH_SSE2 */ - #include "prim_internal.h" /* ------------------------------------------------------------------------- */ -#ifdef WITH_SSE2 +#if defined(SSE2_ENABLED) +#include +#include + static primitives_t* generic = NULL; static pstatus_t sse2_alphaComp_argb(const BYTE* WINPR_RESTRICT pSrc1, UINT32 src1Step, @@ -211,7 +209,7 @@ static pstatus_t sse2_alphaComp_argb(const BYTE* WINPR_RESTRICT pSrc1, UINT32 sr /* ------------------------------------------------------------------------- */ void primitives_init_alphaComp_sse3(primitives_t* WINPR_RESTRICT prims) { -#if defined(WITH_SSE2) +#if defined(SSE2_ENABLED) generic = primitives_get_generic(); primitives_init_alphaComp(prims); diff --git a/libfreerdp/primitives/sse/prim_andor_sse3.c b/libfreerdp/primitives/sse/prim_andor_sse3.c index 155228426..122fdc2d6 100644 --- a/libfreerdp/primitives/sse/prim_andor_sse3.c +++ b/libfreerdp/primitives/sse/prim_andor_sse3.c @@ -21,27 +21,26 @@ #include "prim_andor.h" -#ifdef WITH_SSE2 -#include -#include -#endif /* WITH_SSE2 */ - #include "prim_internal.h" #include "prim_templates.h" -#ifdef WITH_SSE2 +#if defined(SSE2_ENABLED) +#include +#include + static primitives_t* generic = NULL; /* ------------------------------------------------------------------------- */ SSE3_SCD_PRE_ROUTINE(sse3_andC_32u, UINT32, generic->andC_32u, _mm_and_si128, *dptr++ = *sptr++ & val) SSE3_SCD_PRE_ROUTINE(sse3_orC_32u, UINT32, generic->orC_32u, _mm_or_si128, *dptr++ = *sptr++ | val) + #endif /* ------------------------------------------------------------------------- */ void primitives_init_andor_sse3(primitives_t* WINPR_RESTRICT prims) { -#if defined(WITH_SSE2) +#if defined(SSE2_ENABLED) generic = primitives_get_generic(); primitives_init_andor(prims); diff --git a/libfreerdp/primitives/sse/prim_colors_sse2.c b/libfreerdp/primitives/sse/prim_colors_sse2.c index 19e59f484..32a569588 100644 --- a/libfreerdp/primitives/sse/prim_colors_sse2.c +++ b/libfreerdp/primitives/sse/prim_colors_sse2.c @@ -25,14 +25,12 @@ #include "prim_colors.h" -#ifdef WITH_SSE2 -#include -#endif /* WITH_SSE2 */ - #include "prim_internal.h" #include "prim_templates.h" -#ifdef WITH_SSE2 +#if defined(SSE2_ENABLED) +#include + static primitives_t* generic = NULL; #ifdef __GNUC__ @@ -1243,11 +1241,11 @@ sse2_RGBToRGB_16s8u_P3AC4R(const INT16* const WINPR_RESTRICT pSrc[3], /* 16-bit return generic->RGBToRGB_16s8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi); } } -#endif /* WITH_SSE2 */ +#endif void primitives_init_colors_sse2(primitives_t* prims) { -#if defined(WITH_SSE2) +#if defined(SSE2_ENABLED) generic = primitives_get_generic(); primitives_init_colors(prims); diff --git a/libfreerdp/primitives/sse/prim_copy_avx2.c b/libfreerdp/primitives/sse/prim_copy_avx2.c index 2a7478149..7a09f8871 100644 --- a/libfreerdp/primitives/sse/prim_copy_avx2.c +++ b/libfreerdp/primitives/sse/prim_copy_avx2.c @@ -30,7 +30,7 @@ #define TAG FREERDP_TAG("primitives.copy") -#if defined(WITH_SSE2) +#if defined(SSE2_ENABLED) #include #include @@ -265,13 +265,12 @@ static pstatus_t avx2_image_copy_no_overlap(BYTE* WINPR_RESTRICT pDstData, DWORD nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset); } - #endif /* ------------------------------------------------------------------------- */ void primitives_init_copy_avx2(primitives_t* prims) { -#if defined(WITH_SSE2) +#if defined(SSE2_ENABLED) if (IsProcessorFeaturePresent(PF_AVX2_INSTRUCTIONS_AVAILABLE)) { WLog_VRB(PRIM_TAG, "AVX2 optimizations"); diff --git a/libfreerdp/primitives/sse/prim_copy_sse4_1.c b/libfreerdp/primitives/sse/prim_copy_sse4_1.c index 853b6461c..2f0092a17 100644 --- a/libfreerdp/primitives/sse/prim_copy_sse4_1.c +++ b/libfreerdp/primitives/sse/prim_copy_sse4_1.c @@ -30,7 +30,7 @@ #define TAG FREERDP_TAG("primitives.copy") -#if defined(WITH_SSE2) +#if defined(SSE2_ENABLED) #include #include @@ -264,13 +264,12 @@ static pstatus_t sse_image_copy_no_overlap(BYTE* WINPR_RESTRICT pDstData, DWORD nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset); } - #endif /* ------------------------------------------------------------------------- */ void primitives_init_copy_sse41(primitives_t* prims) { -#if defined(WITH_SSE2) +#if defined(SSE2_ENABLED) if (IsProcessorFeaturePresent(PF_SSE4_1_INSTRUCTIONS_AVAILABLE)) { WLog_VRB(PRIM_TAG, "SSE4.1 optimizations"); diff --git a/libfreerdp/primitives/sse/prim_set_sse2.c b/libfreerdp/primitives/sse/prim_set_sse2.c index ef8e3a147..cdc483097 100644 --- a/libfreerdp/primitives/sse/prim_set_sse2.c +++ b/libfreerdp/primitives/sse/prim_set_sse2.c @@ -21,15 +21,13 @@ #include #include -#ifdef WITH_SSE2 -#include -#endif /* WITH_SSE2 */ - #include "prim_internal.h" #include "prim_set.h" /* ========================================================================= */ -#ifdef WITH_SSE2 +#if defined(SSE2_ENABLED) +#include + static primitives_t* generic = NULL; static pstatus_t sse2_set_8u(BYTE val, BYTE* WINPR_RESTRICT pDst, UINT32 len) @@ -113,10 +111,8 @@ static pstatus_t sse2_set_8u(BYTE val, BYTE* WINPR_RESTRICT pDst, UINT32 len) return PRIMITIVES_SUCCESS; } -#endif /* WITH_SSE2 */ /* ------------------------------------------------------------------------- */ -#ifdef WITH_SSE2 static pstatus_t sse2_set_32u(UINT32 val, UINT32* WINPR_RESTRICT pDst, UINT32 len) { const primitives_t* prim = primitives_get_generic(); @@ -214,12 +210,12 @@ static pstatus_t sse2_set_32s(INT32 val, INT32* WINPR_RESTRICT pDst, UINT32 len) UINT32 uval = *((UINT32*)&val); return sse2_set_32u(uval, (UINT32*)pDst, len); } -#endif /* WITH_SSE2 */ +#endif /* ------------------------------------------------------------------------- */ void primitives_init_set_sse2(primitives_t* WINPR_RESTRICT prims) { -#if defined(WITH_SSE2) +#if defined(SSE2_ENABLED) generic = primitives_get_generic(); primitives_init_set(prims); /* Pick tuned versions if possible. */ diff --git a/libfreerdp/primitives/sse/prim_shift_sse3.c b/libfreerdp/primitives/sse/prim_shift_sse3.c index a6c993fa2..342311282 100644 --- a/libfreerdp/primitives/sse/prim_shift_sse3.c +++ b/libfreerdp/primitives/sse/prim_shift_sse3.c @@ -21,15 +21,13 @@ #include "prim_shift.h" -#ifdef WITH_SSE2 -#include -#include -#endif /* WITH_SSE2 */ - #include "prim_internal.h" #include "prim_templates.h" -#ifdef WITH_SSE2 +#if defined(SSE2_ENABLED) +#include +#include + static primitives_t* generic = NULL; /* ------------------------------------------------------------------------- */ @@ -135,7 +133,6 @@ static pstatus_t sse2_lShiftC_16s_inplace(INT16* WINPR_RESTRICT pSrcDst, UINT32 return PRIMITIVES_SUCCESS; } - #endif /* Note: the IPP version will have to call ippLShiftC_16s or ippRShiftC_16s @@ -146,7 +143,7 @@ static pstatus_t sse2_lShiftC_16s_inplace(INT16* WINPR_RESTRICT pSrcDst, UINT32 /* ------------------------------------------------------------------------- */ void primitives_init_shift_sse3(primitives_t* WINPR_RESTRICT prims) { -#if defined(WITH_SSE2) +#if defined(SSE2_ENABLED) generic = primitives_get_generic(); primitives_init_shift(prims); diff --git a/libfreerdp/primitives/sse/prim_sign_ssse3.c b/libfreerdp/primitives/sse/prim_sign_ssse3.c index 41abcbea9..6b2fde664 100644 --- a/libfreerdp/primitives/sse/prim_sign_ssse3.c +++ b/libfreerdp/primitives/sse/prim_sign_ssse3.c @@ -21,14 +21,12 @@ #include "prim_sign.h" -#if defined(WITH_SSE2) -#include -#include -#endif - #include "prim_internal.h" -#if defined(WITH_SSE2) +#if defined(SSE2_ENABLED) +#include +#include + static primitives_t* generic = NULL; /* ------------------------------------------------------------------------- */ @@ -166,12 +164,13 @@ static pstatus_t ssse3_sign_16s(const INT16* WINPR_RESTRICT pSrc, INT16* WINPR_R return PRIMITIVES_SUCCESS; } -#endif /* WITH_SSE2 */ + +#endif /* SSE2_ENABLED */ /* ------------------------------------------------------------------------- */ void primitives_init_sign_ssse3(primitives_t* WINPR_RESTRICT prims) { -#if defined(WITH_SSE2) +#if defined(SSE2_ENABLED) generic = primitives_get_generic(); primitives_init_sign(prims); /* Pick tuned versions if possible. */ diff --git a/winpr/include/winpr/platform.h b/winpr/include/winpr/platform.h index ca1761fa8..0628270ef 100644 --- a/winpr/include/winpr/platform.h +++ b/winpr/include/winpr/platform.h @@ -94,9 +94,10 @@ WINPR_PRAGMA_DIAG_IGNORED_RESERVED_ID_MACRO /* Intel x86 (_M_IX86) */ -#if defined(i386) || defined(__i386) || defined(__i386__) || defined(__i486__) || \ - defined(__i586__) || defined(__i686__) || defined(__X86__) || defined(_X86_) || \ - defined(__I86__) || defined(__IA32__) || defined(__THW_INTEL__) || defined(__INTEL__) +#if defined(i386) || defined(__i386) || defined(__i386__) || defined(__i486__) || \ + defined(__i586__) || defined(__i686__) || defined(__X86__) || defined(_X86_) || \ + defined(__I86__) || defined(__IA32__) || defined(__THW_INTEL__) || defined(__INTEL__) || \ + defined(_M_IX86) #ifndef _M_IX86 #define _M_IX86 1 #endif @@ -111,6 +112,13 @@ WINPR_PRAGMA_DIAG_IGNORED_RESERVED_ID_MACRO #endif #endif +/* Intel ia64 */ +#if defined(__ia64) || defined(__ia64__) || defined(_M_IA64) +#ifndef _M_IA64 +#define _M_IA64 1 +#endif +#endif + /* Intel x86 or AMD64 (_M_IX86_AMD64) */ #if defined(_M_IX86) || defined(_M_AMD64) From 8020efcd0bdc0d850a243f64e805569f1a394470 Mon Sep 17 00:00:00 2001 From: akallabeth Date: Tue, 25 Jun 2024 10:00:41 +0200 Subject: [PATCH 2/5] [primitives,codec] guard NEON code with platform NEON optimized code might be used in multiarch/universal builds. So not only guard with WITH_NEON but also with architecture defines from winpr/platform.h --- libfreerdp/codec/neon/nsc_neon.c | 9 ++++++++- libfreerdp/codec/neon/rfx_neon.c | 11 +++++++++-- libfreerdp/primitives/neon/prim_YCoCg_neon.c | 10 ++++------ libfreerdp/primitives/neon/prim_YUV_neon.c | 4 ++-- libfreerdp/primitives/neon/prim_colors_neon.c | 12 +++++------- libfreerdp/primitives/prim_internal.h | 10 ++++++++-- 6 files changed, 36 insertions(+), 20 deletions(-) diff --git a/libfreerdp/codec/neon/nsc_neon.c b/libfreerdp/codec/neon/nsc_neon.c index 5d9887b9a..9526752a8 100644 --- a/libfreerdp/codec/neon/nsc_neon.c +++ b/libfreerdp/codec/neon/nsc_neon.c @@ -18,6 +18,7 @@ * limitations under the License. */ +#include #include #include #include @@ -27,9 +28,15 @@ #define TAG FREERDP_TAG("codec.nsc.neon") +#if defined(WITH_NEON) +#if defined(_M_ARM64) || defined(_M_ARM) +#define NEON_ENABLED +#endif +#endif + void nsc_init_neon(NSC_CONTEXT* context) { -#if defined(WITH_NEON) +#if defined(NEON_ENABLED) if (!IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) return; diff --git a/libfreerdp/codec/neon/rfx_neon.c b/libfreerdp/codec/neon/rfx_neon.c index 677b55856..709610b06 100644 --- a/libfreerdp/codec/neon/rfx_neon.c +++ b/libfreerdp/codec/neon/rfx_neon.c @@ -17,6 +17,7 @@ limitations under the License. */ +#include #include #include @@ -26,6 +27,12 @@ #define TAG FREERDP_TAG("codec.rfx.neon") #if defined(WITH_NEON) +#if defined(_M_ARM64) || defined(_M_ARM) +#define NEON_ENABLED +#endif +#endif + +#if defined(NEON_ENABLED) #include #include @@ -520,11 +527,11 @@ static void rfx_dwt_2d_extrapolate_decode_neon(INT16* buffer, INT16* temp) rfx_dwt_2d_decode_extrapolate_block_neon(&buffer[3007], temp, 2); rfx_dwt_2d_decode_extrapolate_block_neon(&buffer[0], temp, 1); } -#endif // WITH_NEON +#endif // NEON_ENABLED void rfx_init_neon(RFX_CONTEXT* context) { -#if defined(WITH_NEON) +#if defined(NEON_ENABLED) if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) { DEBUG_RFX("Using NEON optimizations"); diff --git a/libfreerdp/primitives/neon/prim_YCoCg_neon.c b/libfreerdp/primitives/neon/prim_YCoCg_neon.c index 604553447..d33b4e3b6 100644 --- a/libfreerdp/primitives/neon/prim_YCoCg_neon.c +++ b/libfreerdp/primitives/neon/prim_YCoCg_neon.c @@ -23,15 +23,13 @@ #include #include -#if defined(WITH_NEON) -#include -#endif - #include "prim_internal.h" #include "prim_templates.h" #include "prim_YCoCg.h" -#if defined(WITH_NEON) +#if defined(NEON_ENABLED) +#include + static primitives_t* generic = NULL; static pstatus_t neon_YCoCgToRGB_8u_X(const BYTE* WINPR_RESTRICT pSrc, INT32 srcStep, @@ -159,7 +157,7 @@ static pstatus_t neon_YCoCgToRGB_8u_AC4R(const BYTE* WINPR_RESTRICT pSrc, INT32 /* ------------------------------------------------------------------------- */ void primitives_init_YCoCg_neon(primitives_t* WINPR_RESTRICT prims) { -#if defined(WITH_NEON) +#if defined(NEON_ENABLED) generic = primitives_get_generic(); primitives_init_YCoCg(prims); diff --git a/libfreerdp/primitives/neon/prim_YUV_neon.c b/libfreerdp/primitives/neon/prim_YUV_neon.c index 206c83abf..8c1a2dade 100644 --- a/libfreerdp/primitives/neon/prim_YUV_neon.c +++ b/libfreerdp/primitives/neon/prim_YUV_neon.c @@ -30,7 +30,7 @@ #include "prim_internal.h" #include "prim_YUV.h" -#if defined(WITH_NEON) +#if defined(NEON_ENABLED) #include static primitives_t* generic = NULL; @@ -744,7 +744,7 @@ static pstatus_t neon_YUV420CombineToYUV444(avc444_frame_type type, void primitives_init_YUV_neon(primitives_t* prims) { -#if defined(WITH_NEON) +#if defined(NEON_ENABLED) generic = primitives_get_generic(); primitives_init_YUV(prims); diff --git a/libfreerdp/primitives/neon/prim_colors_neon.c b/libfreerdp/primitives/neon/prim_colors_neon.c index d0401cb6e..cb76823ef 100644 --- a/libfreerdp/primitives/neon/prim_colors_neon.c +++ b/libfreerdp/primitives/neon/prim_colors_neon.c @@ -23,16 +23,14 @@ #include #include -#if defined(WITH_NEON) -#include -#endif - #include "prim_internal.h" #include "prim_templates.h" #include "prim_colors.h" /*---------------------------------------------------------------------------*/ -#ifdef WITH_NEON +#if defined(NEON_ENABLED) +#include + static primitives_t* generic = NULL; static pstatus_t @@ -344,12 +342,12 @@ neon_RGBToRGB_16s8u_P3AC4R(const INT16* const WINPR_RESTRICT pSrc[3], /* 16-bit return generic->RGBToRGB_16s8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi); } } -#endif /* WITH_NEON */ +#endif /* NEON_ENABLED */ /* ------------------------------------------------------------------------- */ void primitives_init_colors_neon(primitives_t* prims) { -#if defined(WITH_NEON) +#if defined(NEON_ENABLED) generic = primitives_get_generic(); primitives_init_colors(prims); diff --git a/libfreerdp/primitives/prim_internal.h b/libfreerdp/primitives/prim_internal.h index 70d13192a..6bafc59d8 100644 --- a/libfreerdp/primitives/prim_internal.h +++ b/libfreerdp/primitives/prim_internal.h @@ -40,11 +40,17 @@ #endif #endif -#if defined(SSE2_ENABLED) || defined(WITH_NEON) || defined(WITH_OPENCL) +#if defined(WITH_NEON) +#if defined(_M_ARM64) || defined(_M_ARM) +#define NEON_ENABLED +#endif +#endif + +#if defined(SSE2_ENABLED) || defined(NEON_ENABLED) || defined(WITH_OPENCL) #define HAVE_OPTIMIZED_PRIMITIVES 1 #endif -#if defined(SSE2_ENABLED) || defined(WITH_NEON) +#if defined(SSE2_ENABLED) || defined(NEON_ENABLED) #define HAVE_CPU_OPTIMIZED_PRIMITIVES 1 #endif From 4cd5fe48ed7b9b379e5071ff49b5ed74dbb9eadd Mon Sep 17 00:00:00 2001 From: akallabeth Date: Tue, 25 Jun 2024 10:11:20 +0200 Subject: [PATCH 3/5] [build,mac] update dependency versions update bundle-mac-os.sh versions checked out --- scripts/bundle-mac-os.sh | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/scripts/bundle-mac-os.sh b/scripts/bundle-mac-os.sh index 8bd387d19..6c0d79459 100755 --- a/scripts/bundle-mac-os.sh +++ b/scripts/bundle-mac-os.sh @@ -143,17 +143,17 @@ if [ ! -d $SRC ]; then mkdir -p $SRC cd $SRC - git clone -b openssl-3.2.0 https://github.com/openssl/openssl.git - git clone --depth 1 -b v1.3 https://github.com/madler/zlib.git - git clone --depth 1 -b uriparser-0.9.7 https://github.com/uriparser/uriparser.git - git clone --depth 1 -b v1.7.16 https://github.com/DaveGamble/cJSON.git - git clone --depth 1 -b release-2.28.1 https://github.com/libsdl-org/SDL.git - git clone --depth 1 --shallow-submodules --recurse-submodules -b release-2.20.2 https://github.com/libsdl-org/SDL_ttf.git - git clone --depth 1 --shallow-submodules --recurse-submodules -b release-2.8.1 https://github.com/libsdl-org/SDL_image.git - git clone --depth 1 --shallow-submodules --recurse-submodules -b v1.0.26 https://github.com/libusb/libusb-cmake.git - git clone --depth 1 -b n6.0 https://github.com/FFmpeg/FFmpeg.git - git clone --depth 1 -b v2.4.0 https://github.com/cisco/openh264.git - git clone --depth 1 -b v1.4 https://gitlab.xiph.org/xiph/opus.git + git clone --depth 1 -b openssl-3.3.1 https://github.com/openssl/openssl.git + git clone --depth 1 -b v1.3.1 https://github.com/madler/zlib.git + git clone --depth 1 -b uriparser-0.9.8 https://github.com/uriparser/uriparser.git + git clone --depth 1 -b v1.7.18 https://github.com/DaveGamble/cJSON.git + git clone --depth 1 -b release-2.30.4 https://github.com/libsdl-org/SDL.git + git clone --depth 1 --shallow-submodules --recurse-submodules -b release-2.22.0 https://github.com/libsdl-org/SDL_ttf.git + git clone --depth 1 --shallow-submodules --recurse-submodules -b release-2.8.2 https://github.com/libsdl-org/SDL_image.git + git clone --depth 1 --shallow-submodules --recurse-submodules -b v1.0.27-1 https://github.com/libusb/libusb-cmake.git + git clone --depth 1 -b n7.0.1 https://github.com/FFmpeg/FFmpeg.git + git clone --depth 1 -b v2.4.1 https://github.com/cisco/openh264.git + git clone --depth 1 -b v1.5.2 https://gitlab.xiph.org/xiph/opus.git git clone --depth 1 -b 2.11.1 https://github.com/knik0/faad2.git git clone --depth 1 -b 1.18.0 https://gitlab.freedesktop.org/cairo/cairo.git git clone --depth 1 -b 1_30 https://github.com/knik0/faac.git From 9b65c1109f72d7d062c72b1d3369d08572db57c3 Mon Sep 17 00:00:00 2001 From: akallabeth Date: Tue, 25 Jun 2024 10:11:42 +0200 Subject: [PATCH 4/5] [build,mac] enable sse+neon enable SSE and NEON optimized code for universal build --- scripts/bundle-mac-os.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/bundle-mac-os.sh b/scripts/bundle-mac-os.sh index 6c0d79459..6a39fbee0 100755 --- a/scripts/bundle-mac-os.sh +++ b/scripts/bundle-mac-os.sh @@ -268,8 +268,8 @@ cd $BUILD cmake -GNinja -Bfreerdp -S"$SCRIPT_PATH/.." \ $CMAKE_ARGS \ -DWITH_PLATFORM_SERVER=OFF \ - -DWITH_NEON=OFF \ - -DWITH_SSE=OFF \ + -DWITH_NEON=ON \ + -DWITH_SSE=ON \ -DWITH_FFMPEG=OFF \ -DWITH_SWSCALE=ON \ -DWITH_OPUS=ON \ From 7abf5eb3a71a06c57899884acbd0a7fc4183c8ce Mon Sep 17 00:00:00 2001 From: akallabeth Date: Tue, 25 Jun 2024 11:29:47 +0200 Subject: [PATCH 5/5] [cmake,codec] fix WITH_OPUS detect Opus in libfreerdp directly. The target definition is there and if we run detection in a subdirectory the required symbols are not visible --- libfreerdp/CMakeLists.txt | 30 ++++++++++++++++++++++++++++++ libfreerdp/codec/CMakeLists.txt | 32 -------------------------------- 2 files changed, 30 insertions(+), 32 deletions(-) diff --git a/libfreerdp/CMakeLists.txt b/libfreerdp/CMakeLists.txt index 53e757da0..cda1ce768 100644 --- a/libfreerdp/CMakeLists.txt +++ b/libfreerdp/CMakeLists.txt @@ -96,6 +96,36 @@ macro (freerdp_compile_options_add) set (LIBFREERDP_COMPILE_OPTIONS ${LIBFREERDP_COMPILE_OPTIONS} PARENT_SCOPE) endmacro() +set(OPUS_DEFAULT OFF) +if (NOT WITH_DSP_FFMPEG) + find_package(Opus) + if (Opus_FOUND) + set(OPUS_DEFAULT ${OPUS_FOUND}) + else() + find_package(PkgConfig) + if (PkgConfig_FOUND) + pkg_check_modules(OPUS opus) + set(OPUS_DEFAULT ${OPUS_FOUND}) + endif() + endif() + + message("Using OPUS: ${OPUS_DEFAULT}") +endif() + +option(WITH_OPUS "compile with opus codec support" ${OPUS_DEFAULT}) +if (WITH_OPUS) + find_package(Opus) + if (Opus_FOUND) + freerdp_library_add(${OPUS_LIBRARIES}) + else() + find_package(PkgConfig REQUIRED) + pkg_check_modules(OPUS REQUIRED opus) + freerdp_library_add(${OPUS_LIBRARIES}) + freerdp_include_directory_add(${OPUS_INCLUDE_DIRS}) + link_directories(${OPUS_LIBRARY_DIRS}) + endif() +endif() + if (WITH_SWSCALE) find_package(SWScale REQUIRED) endif(WITH_SWSCALE) diff --git a/libfreerdp/codec/CMakeLists.txt b/libfreerdp/codec/CMakeLists.txt index 811009525..fcdf43464 100644 --- a/libfreerdp/codec/CMakeLists.txt +++ b/libfreerdp/codec/CMakeLists.txt @@ -107,38 +107,6 @@ if(LAME_FOUND) include_directories(${LAME_INCLUDE_DIRS}) endif() -set(OPUS_DEFAULT OFF) -if (NOT WITH_DSP_FFMPEG) - find_package(Opus) - if (Opus_FOUND) - set(OPUS_DEFAULT ${OPUS_FOUND}) - else() - find_package(PkgConfig) - if (PkgConfig_FOUND) - pkg_check_modules(OPUS opus) - set(OPUS_DEFAULT ${OPUS_FOUND}) - endif() - endif() - - message("Using OPUS: ${OPUS_DEFAULT}") -endif() - -option(WITH_OPUS "compile with opus codec support" ${OPUS_DEFAULT}) -if (WITH_OPUS) - find_package(Opus) - if (Opus_FOUND) - list(APPEND CODEC_LIBS ${OPUS_LIBRARIES}) - else() - find_package(PkgConfig REQUIRED) - pkg_check_modules(OPUS REQUIRED opus) - if(OPUS_FOUND) - list(APPEND CODEC_LIBS ${OPUS_LIBRARIES}) - include_directories(${OPUS_INCLUDE_DIRS}) - link_directories(${OPUS_LIBRARY_DIRS}) - endif() - endif() -endif() - if(FAAD2_FOUND) list(APPEND CODEC_LIBS ${FAAD2_LIBRARIES}) include_directories(${FAAD2_INCLUDE_DIRS})