Merge pull request #10284 from akallabeth/neon_detect

[winpr,sysinfo] fix linux arm cpu feature detect
This commit is contained in:
akallabeth 2024-06-18 09:06:12 +02:00 committed by GitHub
commit a2a0cc0792
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 49 additions and 7 deletions

View File

@ -165,9 +165,11 @@ void primitives_init_YCoCg_neon(primitives_t* WINPR_RESTRICT prims)
if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
{
WLog_VRB(PRIM_TAG, "NEON optimizations");
prims->YCoCgToRGB_8u_AC4R = neon_YCoCgToRGB_8u_AC4R;
}
#else
WLog_VRB(PRIM_TAG, "undefined WITH_NEON");
WINPR_UNUSED(prims);
#endif
}

View File

@ -750,11 +750,13 @@ void primitives_init_YUV_neon(primitives_t* prims)
if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
{
WLog_VRB(PRIM_TAG, "NEON optimizations");
prims->YUV420ToRGB_8u_P3AC4R = neon_YUV420ToRGB_8u_P3AC4R;
prims->YUV444ToRGB_8u_P3AC4R = neon_YUV444ToRGB_8u_P3AC4R;
prims->YUV420CombineToYUV444 = neon_YUV420CombineToYUV444;
}
#else
WLog_VRB(PRIM_TAG, "undefined WITH_NEON");
WINPR_UNUSED(prims);
#endif
}

View File

@ -355,11 +355,13 @@ void primitives_init_colors_neon(primitives_t* prims)
if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
{
WLog_VRB(PRIM_TAG, "NEON optimizations");
prims->RGBToRGB_16s8u_P3AC4R = neon_RGBToRGB_16s8u_P3AC4R;
prims->yCbCrToRGB_16s8u_P3AC4R = neon_yCbCrToRGB_16s8u_P3AC4R;
prims->yCbCrToRGB_16s16s_P3P3 = neon_yCbCrToRGB_16s16s_P3P3;
}
#else
WLog_VRB(PRIM_TAG, "undefined WITH_NEON");
WINPR_UNUSED(prims);
#endif
}

View File

@ -22,6 +22,9 @@
#include <freerdp/primitives.h>
#include <freerdp/api.h>
#include <freerdp/log.h>
#define PRIM_TAG FREERDP_TAG("primitives")
#ifdef __GNUC__
#define PRIM_ALIGN_128 __attribute__((aligned(16)))
#else

View File

@ -448,9 +448,11 @@ void primitives_init_YCoCg_ssse3(primitives_t* WINPR_RESTRICT prims)
if (IsProcessorFeaturePresentEx(PF_EX_SSSE3) &&
IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
WLog_VRB(PRIM_TAG, "SSE3/SSSE3 optimizations");
prims->YCoCgToRGB_8u_AC4R = ssse3_YCoCgRToRGB_8u_AC4R;
}
#else
WLog_VRB(PRIM_TAG, "undefined WITH_SSE2");
WINPR_UNUSED(prims);
#endif
}

View File

@ -1505,6 +1505,7 @@ void primitives_init_YUV_ssse3(primitives_t* WINPR_RESTRICT prims)
if (IsProcessorFeaturePresentEx(PF_EX_SSSE3) &&
IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
WLog_VRB(PRIM_TAG, "SSE3/SSSE3 optimizations");
prims->RGBToYUV420_8u_P3AC4R = ssse3_RGBToYUV420;
prims->RGBToAVC444YUV = ssse3_RGBToAVC444YUV;
prims->RGBToAVC444YUVv2 = ssse3_RGBToAVC444YUVv2;
@ -1513,6 +1514,7 @@ void primitives_init_YUV_ssse3(primitives_t* WINPR_RESTRICT prims)
prims->YUV420CombineToYUV444 = ssse3_YUV420CombineToYUV444;
}
#else
WLog_VRB(PRIM_TAG, "undefined WITH_SSE2");
WINPR_UNUSED(prims);
#endif
}

View File

@ -185,11 +185,13 @@ void primitives_init_add_sse3(primitives_t* WINPR_RESTRICT prims)
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) &&
IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */
{
WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations");
prims->add_16s = sse3_add_16s;
prims->add_16s_inplace = sse3_add_16s_inplace;
}
#else
WLog_VRB(PRIM_TAG, "undefined WITH_SSE2");
WINPR_UNUSED(prims);
#endif
}

View File

@ -218,10 +218,12 @@ void primitives_init_alphaComp_sse3(primitives_t* WINPR_RESTRICT prims)
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) &&
IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE)) /* for LDDQU */
{
WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations");
prims->alphaComp_argb = sse2_alphaComp_argb;
}
#else
WLog_VRB(PRIM_TAG, "undefined WITH_SSE2");
WINPR_UNUSED(prims);
#endif
}

View File

@ -48,11 +48,13 @@ void primitives_init_andor_sse3(primitives_t* WINPR_RESTRICT prims)
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) &&
IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations");
prims->andC_32u = sse3_andC_32u;
prims->orC_32u = sse3_orC_32u;
}
#else
WLog_VRB(PRIM_TAG, "undefined WITH_SSE2");
WINPR_UNUSED(prims);
#endif
}

View File

@ -1253,6 +1253,7 @@ void primitives_init_colors_sse2(primitives_t* prims)
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE))
{
WLog_VRB(PRIM_TAG, "SSE2 optimizations");
prims->RGBToRGB_16s8u_P3AC4R = sse2_RGBToRGB_16s8u_P3AC4R;
prims->yCbCrToRGB_16s16s_P3P3 = sse2_yCbCrToRGB_16s16s_P3P3;
prims->yCbCrToRGB_16s8u_P3AC4R = sse2_yCbCrToRGB_16s8u_P3AC4R;
@ -1260,6 +1261,7 @@ void primitives_init_colors_sse2(primitives_t* prims)
}
#else
WLog_VRB(PRIM_TAG, "undefined WITH_SSE2");
WINPR_UNUSED(prims);
#endif
}

View File

@ -274,9 +274,11 @@ void primitives_init_copy_avx2(primitives_t* prims)
#if defined(WITH_SSE2)
if (IsProcessorFeaturePresent(PF_AVX2_INSTRUCTIONS_AVAILABLE))
{
WLog_VRB(PRIM_TAG, "AVX2 optimizations");
prims->copy_no_overlap = avx2_image_copy_no_overlap;
}
#else
WLog_VRB(PRIM_TAG, "undefined WITH_SSE2");
WINPR_UNUSED(prims);
#endif
}

View File

@ -273,9 +273,11 @@ void primitives_init_copy_sse41(primitives_t* prims)
#if defined(WITH_SSE2)
if (IsProcessorFeaturePresent(PF_SSE4_1_INSTRUCTIONS_AVAILABLE))
{
WLog_VRB(PRIM_TAG, "SSE4.1 optimizations");
prims->copy_no_overlap = sse_image_copy_no_overlap;
}
#else
WLog_VRB(PRIM_TAG, "undefined WITH_SSE2");
WINPR_UNUSED(prims);
#endif
}

View File

@ -226,12 +226,14 @@ void primitives_init_set_sse2(primitives_t* WINPR_RESTRICT prims)
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE))
{
WLog_VRB(PRIM_TAG, "SSE2 optimizations");
prims->set_8u = sse2_set_8u;
prims->set_32s = sse2_set_32s;
prims->set_32u = sse2_set_32u;
}
#else
WLog_VRB(PRIM_TAG, "undefined WITH_SSE2");
WINPR_UNUSED(prims);
#endif
}

View File

@ -153,6 +153,7 @@ void primitives_init_shift_sse3(primitives_t* WINPR_RESTRICT prims)
if (IsProcessorFeaturePresent(PF_SSE2_INSTRUCTIONS_AVAILABLE) &&
IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
WLog_VRB(PRIM_TAG, "SSE2/SSE3 optimizations");
prims->lShiftC_16s_inplace = sse2_lShiftC_16s_inplace;
prims->lShiftC_16s = sse2_lShiftC_16s;
prims->rShiftC_16s = sse2_rShiftC_16s;
@ -161,6 +162,7 @@ void primitives_init_shift_sse3(primitives_t* WINPR_RESTRICT prims)
}
#else
WLog_VRB(PRIM_TAG, "undefined WITH_SSE2");
WINPR_UNUSED(prims);
#endif
}

View File

@ -180,10 +180,12 @@ void primitives_init_sign_ssse3(primitives_t* WINPR_RESTRICT prims)
if (IsProcessorFeaturePresentEx(PF_EX_SSSE3) &&
IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
{
WLog_VRB(PRIM_TAG, "SSE3/SSSE3 optimizations");
prims->sign_16s = ssse3_sign_16s;
}
#else
WLog_VRB(PRIM_TAG, "undefined WITH_SSE2");
WINPR_UNUSED(prims);
#endif
}

View File

@ -710,7 +710,7 @@ static void cpuid(unsigned info, unsigned* eax, unsigned* ebx, unsigned* ecx, un
*edx = a[3];
#endif
}
#elif defined(_M_ARM)
#elif defined(_M_ARM) || defined(_M_ARM64)
#if defined(__linux__)
// HWCAP flags from linux kernel - uapi/asm/hwcap.h
#define HWCAP_SWP (1 << 0)
@ -775,6 +775,12 @@ static unsigned GetARMCPUCaps(void)
#ifndef _WIN32
#if defined(_M_ARM) || defined(_M_ARM64)
#ifdef __linux__
#include <sys/auxv.h>
#endif
#endif
BOOL IsProcessorFeaturePresent(DWORD ProcessorFeature)
{
BOOL ret = FALSE;
@ -792,14 +798,15 @@ BOOL IsProcessorFeaturePresent(DWORD ProcessorFeature)
return FALSE;
}
#elif defined(_M_ARM)
#elif defined(_M_ARM) || defined(_M_ARM64)
#ifdef __linux__
const unsigned caps = GetARMCPUCaps();
const unsigned long caps = getauxval(AT_HWCAP);
switch (ProcessorFeature)
{
case PF_ARM_NEON_INSTRUCTIONS_AVAILABLE:
case PF_ARM_NEON:
if (caps & HWCAP_NEON)
ret = TRUE;
@ -893,7 +900,9 @@ BOOL IsProcessorFeaturePresent(DWORD ProcessorFeature)
}
#endif // __linux__
#elif defined(_M_IX86_AMD64)
#endif
#if defined(_M_IX86_AMD64)
#ifdef __GNUC__
unsigned a = 0;
unsigned b = 0;
@ -955,7 +964,9 @@ BOOL IsProcessorFeaturePresent(DWORD ProcessorFeature)
}
#endif // __GNUC__
#elif defined(_M_E2K)
#endif
#if defined(_M_E2K)
/* compiler flags on e2k arch determine CPU features */
switch (ProcessorFeature)
{
@ -1003,7 +1014,7 @@ DWORD GetTickCountPrecise(void)
BOOL IsProcessorFeaturePresentEx(DWORD ProcessorFeature)
{
BOOL ret = FALSE;
#ifdef _M_ARM
#if defined(_M_ARM) || defined(_M_ARM64)
#ifdef __linux__
unsigned caps;
caps = GetARMCPUCaps();

View File

@ -30,7 +30,7 @@ int TestCPUFeatures(int argc, char* argv[])
TEST_FEATURE_EX(PF_EX_FMA);
TEST_FEATURE_EX(PF_EX_AVX_AES);
TEST_FEATURE_EX(PF_EX_AVX_PCLMULQDQ);
#elif defined(_M_ARM)
#elif defined(_M_ARM) || defined(_M_ARM64)
TEST_FEATURE(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE);
TEST_FEATURE(PF_ARM_THUMB);
TEST_FEATURE(PF_ARM_VFP_32_REGISTERS_AVAILABLE);