From 0b682c54171d2923b6f3a6c0f4cae05b707ee711 Mon Sep 17 00:00:00 2001 From: Anurag Thakur Date: Fri, 16 Sep 2022 00:45:17 +0530 Subject: [PATCH] Add SIMD --- .vscode/settings.json | 5 +++-- src/dense/ftdense.c | 47 ++++++++++++++++++++++++++++++------------- src/dense/rules.mk | 5 +++-- 3 files changed, 39 insertions(+), 18 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 1a2f8af47..7f56b4bc5 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -2,6 +2,7 @@ "files.associations": { "ftoutln.h": "c", "svprop.h": "c", - "ftdebug.h": "c" + "ftdebug.h": "c", + "tmmintrin.h": "c" } -} \ No newline at end of file +} diff --git a/src/dense/ftdense.c b/src/dense/ftdense.c index dfd0f7417..4b0a20b28 100644 --- a/src/dense/ftdense.c +++ b/src/dense/ftdense.c @@ -11,6 +11,7 @@ #include "ftdense.h" #include +#include #include "ftdenseerrs.h" #define PIXEL_BITS 8 @@ -372,22 +373,40 @@ dense_render_glyph( dense_worker* worker, const FT_Bitmap* target ) unsigned char* dest = target->buffer; unsigned char* dest_end = target->buffer + worker->m_w * worker->m_h; - float value = 0.0f; - while ( dest < dest_end ) - { - value += *source++; - if ( value > 0.0f ) - { - int n = (int)( fabs( value ) * 255.0f + 0.5f ); - if ( n > 255 ) - n = 255; - *dest = (unsigned char)n; - } - else - *dest = 0; - dest++; + + __m128 offset = _mm_setzero_ps(); + __m128i mask = _mm_set1_epi32(0x0c080400); + __m128 sign_mask = _mm_set1_ps(-0.f); + for (int i = 0; i < worker->m_h*worker->m_w; i += 4) { + __m128 x = _mm_load_ps(&source[i]); + x = _mm_add_ps(x, _mm_castsi128_ps(_mm_slli_si128(_mm_castps_si128(x), 4))); + x = _mm_add_ps(x, _mm_shuffle_ps(_mm_setzero_ps(), x, 0x40)); + x = _mm_add_ps(x, offset); + __m128 y = _mm_andnot_ps(sign_mask, x); // fabs(x) + y = _mm_min_ps(y, _mm_set1_ps(1.0f)); + y = _mm_mul_ps(y, _mm_set1_ps(255.0f)); + __m128i z = _mm_cvtps_epi32(y); + z = _mm_shuffle_epi8(z, mask); + _mm_store_ss((float *)&dest[i], (__m128)z); + offset = _mm_shuffle_ps(x, x, _MM_SHUFFLE(3, 3, 3, 3)); } + // float value = 0.0f; + // while ( dest < dest_end ) + // { + // value += *source++; + // if ( value > 0.0f ) + // { + // int n = (int)( fabs( value ) * 255.0f + 0.5f ); + // if ( n > 255 ) + // n = 255; + // *dest = (unsigned char)n; + // } + // else + // *dest = 0; + // dest++; + // } + free(worker->m_a); return error; } diff --git a/src/dense/rules.mk b/src/dense/rules.mk index 005116873..38874f28e 100644 --- a/src/dense/rules.mk +++ b/src/dense/rules.mk @@ -22,8 +22,9 @@ DENSE_DIR := $(SRC_DIR)/dense # DENSE_COMPILE := $(CC) $(ANSIFLAGS) \ $I$(subst /,$(COMPILER_SEP),$(DENSE_DIR)) \ - $(INCLUDE_FLAGS) \ - $(FT_CFLAGS) + $(INCLUDE_FLAGS) \ + $(FT_CFLAGS) \ + "-msse4.1" # DENSE driver sources (i.e., C files)