From fe2053c26da25c5700b819bc608a6f6f6a2163d0 Mon Sep 17 00:00:00 2001 From: akallabeth Date: Mon, 17 Jun 2024 20:47:24 +0200 Subject: [PATCH 1/2] [winpr,sysinfo] fix linux arm cpu feature detect --- winpr/libwinpr/sysinfo/sysinfo.c | 23 ++++++++++++++----- winpr/libwinpr/sysinfo/test/TestCPUFeatures.c | 2 +- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/winpr/libwinpr/sysinfo/sysinfo.c b/winpr/libwinpr/sysinfo/sysinfo.c index 89bfb3798..659f79d60 100644 --- a/winpr/libwinpr/sysinfo/sysinfo.c +++ b/winpr/libwinpr/sysinfo/sysinfo.c @@ -710,7 +710,7 @@ static void cpuid(unsigned info, unsigned* eax, unsigned* ebx, unsigned* ecx, un *edx = a[3]; #endif } -#elif defined(_M_ARM) +#elif defined(_M_ARM) || defined(_M_ARM64) #if defined(__linux__) // HWCAP flags from linux kernel - uapi/asm/hwcap.h #define HWCAP_SWP (1 << 0) @@ -775,6 +775,12 @@ static unsigned GetARMCPUCaps(void) #ifndef _WIN32 +#if defined(_M_ARM) || defined(_M_ARM64) +#ifdef __linux__ +#include +#endif +#endif + BOOL IsProcessorFeaturePresent(DWORD ProcessorFeature) { BOOL ret = FALSE; @@ -792,14 +798,15 @@ BOOL IsProcessorFeaturePresent(DWORD ProcessorFeature) return FALSE; } -#elif defined(_M_ARM) +#elif defined(_M_ARM) || defined(_M_ARM64) #ifdef __linux__ - const unsigned caps = GetARMCPUCaps(); + const unsigned long caps = getauxval(AT_HWCAP); switch (ProcessorFeature) { case PF_ARM_NEON_INSTRUCTIONS_AVAILABLE: case PF_ARM_NEON: + if (caps & HWCAP_NEON) ret = TRUE; @@ -893,7 +900,9 @@ BOOL IsProcessorFeaturePresent(DWORD ProcessorFeature) } #endif // __linux__ -#elif defined(_M_IX86_AMD64) +#endif + +#if defined(_M_IX86_AMD64) #ifdef __GNUC__ unsigned a = 0; unsigned b = 0; @@ -955,7 +964,9 @@ BOOL IsProcessorFeaturePresent(DWORD ProcessorFeature) } #endif // __GNUC__ -#elif defined(_M_E2K) +#endif + +#if defined(_M_E2K) /* compiler flags on e2k arch determine CPU features */ switch (ProcessorFeature) { @@ -1003,7 +1014,7 @@ DWORD GetTickCountPrecise(void) BOOL IsProcessorFeaturePresentEx(DWORD ProcessorFeature) { BOOL ret = FALSE; -#ifdef _M_ARM +#if defined(_M_ARM) || defined(_M_ARM64) #ifdef __linux__ unsigned caps; caps = GetARMCPUCaps(); diff --git a/winpr/libwinpr/sysinfo/test/TestCPUFeatures.c b/winpr/libwinpr/sysinfo/test/TestCPUFeatures.c index 8a596dd32..75903c5e7 100644 --- a/winpr/libwinpr/sysinfo/test/TestCPUFeatures.c +++ b/winpr/libwinpr/sysinfo/test/TestCPUFeatures.c @@ -30,7 +30,7 @@ int TestCPUFeatures(int argc, char* argv[]) TEST_FEATURE_EX(PF_EX_FMA); TEST_FEATURE_EX(PF_EX_AVX_AES); TEST_FEATURE_EX(PF_EX_AVX_PCLMULQDQ); -#elif defined(_M_ARM) +#elif defined(_M_ARM) || defined(_M_ARM64) TEST_FEATURE(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE); TEST_FEATURE(PF_ARM_THUMB); TEST_FEATURE(PF_ARM_VFP_32_REGISTERS_AVAILABLE); From 889dff4f55722a2d6e38824c2850e56ef2ab23e2 Mon Sep 17 00:00:00 2001 From: akallabeth Date: Mon, 17 Jun 2024 20:42:57 +0200 Subject: [PATCH 2/2] [primitives] log initialization --- libfreerdp/primitives/neon/prim_YCoCg_neon.c | 2 ++ libfreerdp/primitives/neon/prim_YUV_neon.c | 2 ++ libfreerdp/primitives/neon/prim_colors_neon.c | 2 ++ libfreerdp/primitives/prim_internal.h | 3 +++ libfreerdp/primitives/sse/prim_YCoCg_ssse3.c | 2 ++ libfreerdp/primitives/sse/prim_YUV_ssse3.c | 2 ++ libfreerdp/primitives/sse/prim_add_sse3.c | 2 ++ libfreerdp/primitives/sse/prim_alphaComp_sse3.c | 2 ++ libfreerdp/primitives/sse/prim_andor_sse3.c | 2 ++ libfreerdp/primitives/sse/prim_colors_sse2.c | 2 ++ libfreerdp/primitives/sse/prim_copy_avx2.c | 2 ++ libfreerdp/primitives/sse/prim_copy_sse4_1.c | 2 ++ libfreerdp/primitives/sse/prim_set_sse2.c | 2 ++ libfreerdp/primitives/sse/prim_shift_sse3.c | 2 ++ libfreerdp/primitives/sse/prim_sign_ssse3.c | 2 ++ 15 files changed, 31 insertions(+) diff --git a/libfreerdp/primitives/neon/prim_YCoCg_neon.c b/libfreerdp/primitives/neon/prim_YCoCg_neon.c index ff1ff002d..604553447 100644 --- a/libfreerdp/primitives/neon/prim_YCoCg_neon.c +++ b/libfreerdp/primitives/neon/prim_YCoCg_neon.c @@ -165,9 +165,11 @@ void primitives_init_YCoCg_neon(primitives_t* WINPR_RESTRICT prims) if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) { + WLog_VRB(PRIM_TAG, "NEON optimizations"); prims->YCoCgToRGB_8u_AC4R = neon_YCoCgToRGB_8u_AC4R; } #else + WLog_VRB(PRIM_TAG, "undefined WITH_NEON"); WINPR_UNUSED(prims); #endif } diff --git a/libfreerdp/primitives/neon/prim_YUV_neon.c b/libfreerdp/primitives/neon/prim_YUV_neon.c index fd1cafac4..206c83abf 100644 --- a/libfreerdp/primitives/neon/prim_YUV_neon.c +++ b/libfreerdp/primitives/neon/prim_YUV_neon.c @@ -750,11 +750,13 @@ void primitives_init_YUV_neon(primitives_t* prims) if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) { + WLog_VRB(PRIM_TAG, "NEON optimizations"); prims->YUV420ToRGB_8u_P3AC4R = neon_YUV420ToRGB_8u_P3AC4R; prims->YUV444ToRGB_8u_P3AC4R = neon_YUV444ToRGB_8u_P3AC4R; prims->YUV420CombineToYUV444 = neon_YUV420CombineToYUV444; } #else + WLog_VRB(PRIM_TAG, "undefined WITH_NEON"); WINPR_UNUSED(prims); #endif } diff --git a/libfreerdp/primitives/neon/prim_colors_neon.c b/libfreerdp/primitives/neon/prim_colors_neon.c index cf61c4055..d0401cb6e 100644 --- a/libfreerdp/primitives/neon/prim_colors_neon.c +++ b/libfreerdp/primitives/neon/prim_colors_neon.c @@ -355,11 +355,13 @@ void primitives_init_colors_neon(primitives_t* prims) if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) { + WLog_VRB(PRIM_TAG, "NEON optimizations"); prims->RGBToRGB_16s8u_P3AC4R = neon_RGBToRGB_16s8u_P3AC4R; prims->yCbCrToRGB_16s8u_P3AC4R = neon_yCbCrToRGB_16s8u_P3AC4R; prims->yCbCrToRGB_16s16s_P3P3 = neon_yCbCrToRGB_16s16s_P3P3; } #else + WLog_VRB(PRIM_TAG, "undefined WITH_NEON"); WINPR_UNUSED(prims); #endif } diff --git a/libfreerdp/primitives/prim_internal.h b/libfreerdp/primitives/prim_internal.h index b3a2a5cf0..94556e4b3 100644 --- a/libfreerdp/primitives/prim_internal.h +++ b/libfreerdp/primitives/prim_internal.h @@ -22,6 +22,9 @@ #include #include +#include +#define PRIM_TAG FREERDP_TAG("primitives") + #ifdef __GNUC__ #define PRIM_ALIGN_128 __attribute__((aligned(16))) #else diff --git a/libfreerdp/primitives/sse/prim_YCoCg_ssse3.c b/libfreerdp/primitives/sse/prim_YCoCg_ssse3.c index 8408d50ef..e87e10eda 100644 --- a/libfreerdp/primitives/sse/prim_YCoCg_ssse3.c +++ b/libfreerdp/primitives/sse/prim_YCoCg_ssse3.c @@ -448,9 +448,11 @@ void primitives_init_YCoCg_ssse3(primitives_t* WINPR_RESTRICT prims) if (IsProcessorFeaturePresentEx(PF_EX_SSSE3) && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) { + WLog_VRB(PRIM_TAG, "SSE3/SSSE3 optimizations"); prims->YCoCgToRGB_8u_AC4R = ssse3_YCoCgRToRGB_8u_AC4R; } #else + WLog_VRB(PRIM_TAG, "undefined WITH_SSE2"); WINPR_UNUSED(prims); #endif } diff --git a/libfreerdp/primitives/sse/prim_YUV_ssse3.c b/libfreerdp/primitives/sse/prim_YUV_ssse3.c index c204b74e5..5ca0f6339 100644 --- a/libfreerdp/primitives/sse/prim_YUV_ssse3.c +++ b/libfreerdp/primitives/sse/prim_YUV_ssse3.c @@ -1505,6 +1505,7 @@ void primitives_init_YUV_ssse3(primitives_t* WINPR_RESTRICT prims) if (IsProcessorFeaturePresentEx(PF_EX_SSSE3) && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) { + WLog_VRB(PRIM_TAG, "SSE3/SSSE3 optimizations"); prims->RGBToYUV420_8u_P3AC4R = ssse3_RGBToYUV420; prims->RGBToAVC444YUV = ssse3_RGBToAVC444YUV; prims->RGBToAVC444YUVv2 = ssse3_RGBToAVC444YUVv2; @@ -1513,6 +1514,7 @@ void primitives_init_YUV_ssse3(primitives_t* WINPR_RESTRICT prims) prims->YUV420CombineToYUV444 = ssse3_YUV420CombineToYUV444; } #else + WLog_VRB(PRIM_TAG, "undefined WITH_SSE2"); WINPR_UNUSED(prims); #endif } diff --git a/libfreerdp/primitives/sse/prim_add_sse3.c b/libfreerdp/primitives/sse/prim_add_sse3.c index 0a97440a2..9d368b258 100644 --- a/libfreerdp/primitives/sse/prim_add_sse3.c +++ b/libfreerdp/primitives/sse/prim_add_sse3.c @@ -185,11 +185,13 @@ void primitives_init_add_sse3(primitives_t* WINPR_RESTRICT prims) if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */ { + WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations"); prims->add_16s = sse3_add_16s; prims->add_16s_inplace = sse3_add_16s_inplace; } #else + WLog_VRB(PRIM_TAG, "undefined WITH_SSE2"); WINPR_UNUSED(prims); #endif } diff --git a/libfreerdp/primitives/sse/prim_alphaComp_sse3.c b/libfreerdp/primitives/sse/prim_alphaComp_sse3.c index 392f9d31b..beee2cc4c 100644 --- a/libfreerdp/primitives/sse/prim_alphaComp_sse3.c +++ b/libfreerdp/primitives/sse/prim_alphaComp_sse3.c @@ -218,10 +218,12 @@ void primitives_init_alphaComp_sse3(primitives_t* WINPR_RESTRICT prims) if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */ { + WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations"); prims->alphaComp_argb = sse2_alphaComp_argb; } #else + WLog_VRB(PRIM_TAG, "undefined WITH_SSE2"); WINPR_UNUSED(prims); #endif } diff --git a/libfreerdp/primitives/sse/prim_andor_sse3.c b/libfreerdp/primitives/sse/prim_andor_sse3.c index 57809b2ad..155228426 100644 --- a/libfreerdp/primitives/sse/prim_andor_sse3.c +++ b/libfreerdp/primitives/sse/prim_andor_sse3.c @@ -48,11 +48,13 @@ void primitives_init_andor_sse3(primitives_t* WINPR_RESTRICT prims) if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) { + WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations"); prims->andC_32u = sse3_andC_32u; prims->orC_32u = sse3_orC_32u; } #else + WLog_VRB(PRIM_TAG, "undefined WITH_SSE2"); WINPR_UNUSED(prims); #endif } diff --git a/libfreerdp/primitives/sse/prim_colors_sse2.c b/libfreerdp/primitives/sse/prim_colors_sse2.c index 40eea9ddc..19e59f484 100644 --- a/libfreerdp/primitives/sse/prim_colors_sse2.c +++ b/libfreerdp/primitives/sse/prim_colors_sse2.c @@ -1253,6 +1253,7 @@ void primitives_init_colors_sse2(primitives_t* prims) if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE)) { + WLog_VRB(PRIM_TAG, "SSE2 optimizations"); prims->RGBToRGB_16s8u_P3AC4R = sse2_RGBToRGB_16s8u_P3AC4R; prims->yCbCrToRGB_16s16s_P3P3 = sse2_yCbCrToRGB_16s16s_P3P3; prims->yCbCrToRGB_16s8u_P3AC4R = sse2_yCbCrToRGB_16s8u_P3AC4R; @@ -1260,6 +1261,7 @@ void primitives_init_colors_sse2(primitives_t* prims) } #else + WLog_VRB(PRIM_TAG, "undefined WITH_SSE2"); WINPR_UNUSED(prims); #endif } diff --git a/libfreerdp/primitives/sse/prim_copy_avx2.c b/libfreerdp/primitives/sse/prim_copy_avx2.c index ec8e8f5ea..2a7478149 100644 --- a/libfreerdp/primitives/sse/prim_copy_avx2.c +++ b/libfreerdp/primitives/sse/prim_copy_avx2.c @@ -274,9 +274,11 @@ void primitives_init_copy_avx2(primitives_t* prims) #if defined(WITH_SSE2) if (IsProcessorFeaturePresent(PF_AVX2_INSTRUCTIONS_AVAILABLE)) { + WLog_VRB(PRIM_TAG, "AVX2 optimizations"); prims->copy_no_overlap = avx2_image_copy_no_overlap; } #else + WLog_VRB(PRIM_TAG, "undefined WITH_SSE2"); WINPR_UNUSED(prims); #endif } diff --git a/libfreerdp/primitives/sse/prim_copy_sse4_1.c b/libfreerdp/primitives/sse/prim_copy_sse4_1.c index d073928a3..853b6461c 100644 --- a/libfreerdp/primitives/sse/prim_copy_sse4_1.c +++ b/libfreerdp/primitives/sse/prim_copy_sse4_1.c @@ -273,9 +273,11 @@ void primitives_init_copy_sse41(primitives_t* prims) #if defined(WITH_SSE2) if (IsProcessorFeaturePresent(PF_SSE4_1_INSTRUCTIONS_AVAILABLE)) { + WLog_VRB(PRIM_TAG, "SSE4.1 optimizations"); prims->copy_no_overlap = sse_image_copy_no_overlap; } #else + WLog_VRB(PRIM_TAG, "undefined WITH_SSE2"); WINPR_UNUSED(prims); #endif } diff --git a/libfreerdp/primitives/sse/prim_set_sse2.c b/libfreerdp/primitives/sse/prim_set_sse2.c index b4c1949f0..ef8e3a147 100644 --- a/libfreerdp/primitives/sse/prim_set_sse2.c +++ b/libfreerdp/primitives/sse/prim_set_sse2.c @@ -226,12 +226,14 @@ void primitives_init_set_sse2(primitives_t* WINPR_RESTRICT prims) if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE)) { + WLog_VRB(PRIM_TAG, "SSE2 optimizations"); prims->set_8u = sse2_set_8u; prims->set_32s = sse2_set_32s; prims->set_32u = sse2_set_32u; } #else + WLog_VRB(PRIM_TAG, "undefined WITH_SSE2"); WINPR_UNUSED(prims); #endif } diff --git a/libfreerdp/primitives/sse/prim_shift_sse3.c b/libfreerdp/primitives/sse/prim_shift_sse3.c index ea50eb6a4..a6c993fa2 100644 --- a/libfreerdp/primitives/sse/prim_shift_sse3.c +++ b/libfreerdp/primitives/sse/prim_shift_sse3.c @@ -153,6 +153,7 @@ void primitives_init_shift_sse3(primitives_t* WINPR_RESTRICT prims) if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) { + WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations"); prims->lShiftC_16s_inplace = sse2_lShiftC_16s_inplace; prims->lShiftC_16s = sse2_lShiftC_16s; prims->rShiftC_16s = sse2_rShiftC_16s; @@ -161,6 +162,7 @@ void primitives_init_shift_sse3(primitives_t* WINPR_RESTRICT prims) } #else + WLog_VRB(PRIM_TAG, "undefined WITH_SSE2"); WINPR_UNUSED(prims); #endif } diff --git a/libfreerdp/primitives/sse/prim_sign_ssse3.c b/libfreerdp/primitives/sse/prim_sign_ssse3.c index c430c827d..41abcbea9 100644 --- a/libfreerdp/primitives/sse/prim_sign_ssse3.c +++ b/libfreerdp/primitives/sse/prim_sign_ssse3.c @@ -180,10 +180,12 @@ void primitives_init_sign_ssse3(primitives_t* WINPR_RESTRICT prims) if (IsProcessorFeaturePresentEx(PF_EX_SSSE3) && IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) { + WLog_VRB(PRIM_TAG, "SSE3/SSSE3 optimizations"); prims->sign_16s = ssse3_sign_16s; } #else + WLog_VRB(PRIM_TAG, "undefined WITH_SSE2"); WINPR_UNUSED(prims); #endif }