lib: put SSE intrinsics behind an ifdef
This commit is contained in:
parent
f1892b70dd
commit
ef149df1e2
|
@ -14,8 +14,10 @@
|
||||||
|
|
||||||
#include <sys/ioctl.h>
|
#include <sys/ioctl.h>
|
||||||
|
|
||||||
|
#ifndef NO_SSE
|
||||||
#include <xmmintrin.h>
|
#include <xmmintrin.h>
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <kernel/video.h>
|
#include <kernel/video.h>
|
||||||
|
|
||||||
|
@ -517,6 +519,7 @@ _cleanup_sprite:
|
||||||
free(bufferb);
|
free(bufferb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef NO_SSE
|
||||||
static __m128i mask00ff;
|
static __m128i mask00ff;
|
||||||
static __m128i mask0080;
|
static __m128i mask0080;
|
||||||
static __m128i mask0101;
|
static __m128i mask0101;
|
||||||
|
@ -526,6 +529,7 @@ __attribute__((constructor)) static void _masks(void) {
|
||||||
mask0080 = _mm_set1_epi16(0x0080);
|
mask0080 = _mm_set1_epi16(0x0080);
|
||||||
mask0101 = _mm_set1_epi16(0x0101);
|
mask0101 = _mm_set1_epi16(0x0101);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
__attribute__((__force_align_arg_pointer__))
|
__attribute__((__force_align_arg_pointer__))
|
||||||
void draw_sprite(gfx_context_t * ctx, sprite_t * sprite, int32_t x, int32_t y) {
|
void draw_sprite(gfx_context_t * ctx, sprite_t * sprite, int32_t x, int32_t y) {
|
||||||
|
@ -547,7 +551,7 @@ void draw_sprite(gfx_context_t * ctx, sprite_t * sprite, int32_t x, int32_t y) {
|
||||||
/* Alpha embedded is the most important step. */
|
/* Alpha embedded is the most important step. */
|
||||||
for (uint16_t _y = 0; _y < sprite->height; ++_y) {
|
for (uint16_t _y = 0; _y < sprite->height; ++_y) {
|
||||||
if (!_is_in_clip(ctx, y + _y)) continue;
|
if (!_is_in_clip(ctx, y + _y)) continue;
|
||||||
#if 0
|
#ifdef NO_SSE
|
||||||
for (uint16_t _x = 0; _x < sprite->width; ++_x) {
|
for (uint16_t _x = 0; _x < sprite->width; ++_x) {
|
||||||
if (x + _x < _left || x + _x > _right || y + _y < _top || y + _y > _bottom)
|
if (x + _x < _left || x + _x > _right || y + _y < _top || y + _y > _bottom)
|
||||||
continue;
|
continue;
|
||||||
|
|
10
lib/jpeg.c
10
lib/jpeg.c
|
@ -38,8 +38,10 @@
|
||||||
|
|
||||||
#include <toaru/graphics.h>
|
#include <toaru/graphics.h>
|
||||||
|
|
||||||
|
#ifndef NO_SSE
|
||||||
#include <xmmintrin.h>
|
#include <xmmintrin.h>
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
#include <toaru/trace.h>
|
#include <toaru/trace.h>
|
||||||
|
@ -226,6 +228,13 @@ static float cosines[8][8] = {
|
||||||
static float premul[8][8][8][8]= {{{{0}}}};
|
static float premul[8][8][8][8]= {{{{0}}}};
|
||||||
|
|
||||||
static void add_idc(struct idct * self, int n, int m, int coeff) {
|
static void add_idc(struct idct * self, int n, int m, int coeff) {
|
||||||
|
#ifdef NO_SSE
|
||||||
|
for (int y = 0; y < 8; ++y) {
|
||||||
|
for (int x = 0; x < 8; ++x) {
|
||||||
|
self->base[xy_to_lin(x, y)] += premul[n][m][y][x] * coeff;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
__m128 c = _mm_set_ps(coeff,coeff,coeff,coeff);
|
__m128 c = _mm_set_ps(coeff,coeff,coeff,coeff);
|
||||||
for (int y = 0; y < 8; ++y) {
|
for (int y = 0; y < 8; ++y) {
|
||||||
__m128 a, b;
|
__m128 a, b;
|
||||||
|
@ -245,6 +254,7 @@ static void add_idc(struct idct * self, int n, int m, int coeff) {
|
||||||
a = _mm_add_ps(a,b);
|
a = _mm_add_ps(a,b);
|
||||||
_mm_store_ps(&self->base[xy_to_lin(4,y)], a);
|
_mm_store_ps(&self->base[xy_to_lin(4,y)], a);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static void add_zigzag(struct idct * self, int zi, int coeff) {
|
static void add_zigzag(struct idct * self, int zi, int coeff) {
|
||||||
|
|
Loading…
Reference in New Issue