diff --git a/src/system/libroot/posix/string/arch/x86_64/arch_string.cpp b/src/system/libroot/posix/string/arch/x86_64/arch_string.cpp index b83376c331..33fca22474 100644 --- a/src/system/libroot/posix/string/arch/x86_64/arch_string.cpp +++ b/src/system/libroot/posix/string/arch/x86_64/arch_string.cpp @@ -5,6 +5,9 @@ #include +#include + +#include extern "C" void* @@ -18,14 +21,77 @@ memcpy(void* destination, const void* source, size_t length) } -extern "C" void* -memset(void* destination, int value, size_t length) +static inline void +memset_repstos(uint8_t* destination, uint8_t value, size_t length) { - auto returnValue = destination; __asm__ __volatile__("rep stosb" : "+D" (destination), "+c" (length) : "a" (value) : "memory"); - return returnValue; +} + + +static inline void +memset_sse(uint8_t* destination, uint8_t value, size_t length) +{ + __m128i packed = _mm_set1_epi8(value); + auto end = reinterpret_cast<__m128i*>(destination + length - 16); + auto diff = reinterpret_cast(destination) % 16; + if (diff) { + diff = 16 - diff; + length -= diff; + _mm_storeu_si128(reinterpret_cast<__m128i*>(destination), packed); + } + auto ptr = reinterpret_cast<__m128i*>(destination + diff); + while (length >= 64) { + _mm_store_si128(ptr++, packed); + _mm_store_si128(ptr++, packed); + _mm_store_si128(ptr++, packed); + _mm_store_si128(ptr++, packed); + length -= 64; + } + while (length >= 16) { + _mm_store_si128(ptr++, packed); + length -= 16; + } + _mm_storeu_si128(end, packed); +} + + +static inline void +memset_small(uint8_t* destination, uint8_t value, size_t length) +{ + if (length >= 8) { + auto packed = value * 0x101010101010101ul; + auto ptr = reinterpret_cast(destination); + auto end = reinterpret_cast(destination + length - 8); + while (length >= 8) { + *ptr++ = packed; + length -= 8; + } + *end = packed; + } else { + while (length--) { + *destination++ = value; + } + } +} + + +extern "C" void* +memset(void* ptr, int chr, size_t length) +{ + auto value = static_cast(chr); + auto destination = static_cast(ptr); + if (length < 32) { + memset_small(destination, value, length); + return ptr; + } + if (length < 2048) { + memset_sse(destination, value, length); + return ptr; + } + memset_repstos(destination, value, length); + return ptr; }