162 lines
4.4 KiB
Diff
162 lines
4.4 KiB
Diff
From 9c3a4420900970e200962e5bb20492e0a4a43fdf Mon Sep 17 00:00:00 2001
|
|
Date: Mon, 30 Oct 2017 17:33:39 +0300
|
|
Subject: [PATCH] Add fbBlt function optimizations for the sparc v9 arch
|
|
|
|
---
|
|
fb/fb.h | 4 +++
|
|
fb/fbblt.c | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
|
|
2 files changed, 110 insertions(+), 1 deletion(-)
|
|
|
|
diff --git a/fb/fb.h b/fb/fb.h
|
|
index b64a5d1..8d87082 100644
|
|
--- a/fb/fb.h
|
|
+++ b/fb/fb.h
|
|
@@ -41,6 +41,10 @@
|
|
#include "migc.h"
|
|
#include "picturestr.h"
|
|
|
|
+#ifdef __sparc_v9__
|
|
+#include <vis.h>
|
|
+#endif
|
|
+
|
|
#ifdef FB_ACCESS_WRAPPER
|
|
|
|
#include "wfbrename.h"
|
|
diff --git a/fb/fbblt.c b/fb/fbblt.c
|
|
index 03c4904..45613bf 100644
|
|
--- a/fb/fbblt.c
|
|
+++ b/fb/fbblt.c
|
|
@@ -39,6 +39,98 @@
|
|
} \
|
|
}
|
|
|
|
+#if defined(__sparc_v9__)
|
|
+
|
|
+static inline void *
|
|
+fbBlt_memcpy_vis(void *dest, const void *sorc, int n)
|
|
+{
|
|
+ uint8_t *dst, *src, *ps;
|
|
+ int32_t i;
|
|
+
|
|
+ vis_d64 *addr_aligned;
|
|
+ vis_d64 data_hi, data_lo, data;
|
|
+ vis_d64 data_lo2, data2;
|
|
+ vis_d64 data_lo3, data3;
|
|
+ vis_d64 data_lo4, data4;
|
|
+
|
|
+ dst = (uint8_t *) dest;
|
|
+ src = (uint8_t *) sorc;
|
|
+
|
|
+ for (ps = src; ps < src + n; ps += 64) {
|
|
+ __builtin_prefetch(ps, 0);
|
|
+ }
|
|
+
|
|
+#pragma loop count(1)
|
|
+ while (n-- && (uintptr_t) dst & 7) {
|
|
+ *dst++ = *src++;
|
|
+ }
|
|
+
|
|
+ if (!((uintptr_t) src & 7)) {
|
|
+#pragma loop count(1000)
|
|
+ while (n >= 64) {
|
|
+ __builtin_prefetch(dst + 128, 1, 3);
|
|
+ *(vis_d64 *) dst = *(vis_d64 *) src;
|
|
+ *(vis_d64 *) (dst + 8) = *(vis_d64 *) (src + 8);
|
|
+ *(vis_d64 *) (dst + 16) = *(vis_d64 *) (src + 16);
|
|
+ *(vis_d64 *) (dst + 24) = *(vis_d64 *) (src + 24);
|
|
+ *(vis_d64 *) (dst + 32) = *(vis_d64 *) (src + 32);
|
|
+ *(vis_d64 *) (dst + 40) = *(vis_d64 *) (src + 40);
|
|
+ *(vis_d64 *) (dst + 48) = *(vis_d64 *) (src + 48);
|
|
+ *(vis_d64 *) (dst + 56) = *(vis_d64 *) (src + 56);
|
|
+
|
|
+ dst += 64;
|
|
+ src += 64;
|
|
+ n -= 64;
|
|
+
|
|
+ }
|
|
+
|
|
+#pragma loop count(4)
|
|
+ while (n >= 8) {
|
|
+ *(vis_d64 *) dst = *(vis_d64 *) src;
|
|
+ dst += 8;
|
|
+ src += 8;
|
|
+ n -= 8;
|
|
+ }
|
|
+ }
|
|
+ else {
|
|
+ i = 0;
|
|
+ addr_aligned = (vis_d64 *) vis_alignaddr(src, 0);
|
|
+ data_hi = addr_aligned[i];
|
|
+#pragma loop count(1000)
|
|
+ while (n >= 32) {
|
|
+ vis_prefetch_write((vis_d64 *) dst + 16);
|
|
+ data_lo = addr_aligned[i + 1];
|
|
+ data_lo2 = addr_aligned[i + 2];
|
|
+ data_lo3 = addr_aligned[i + 3];
|
|
+ data_lo4 = addr_aligned[i + 4];
|
|
+
|
|
+ data = vis_faligndata(data_hi, data_lo);
|
|
+ data2 = vis_faligndata(data_lo, data_lo2);
|
|
+ data3 = vis_faligndata(data_lo2, data_lo3);
|
|
+ data4 = vis_faligndata(data_lo3, data_lo4);
|
|
+
|
|
+ *(vis_d64 *) dst = data;
|
|
+ *(vis_d64 *) (dst + 8) = data2;
|
|
+ *(vis_d64 *) (dst + 16) = data3;
|
|
+ *(vis_d64 *) (dst + 24) = data4;
|
|
+
|
|
+ data_hi = data_lo4;
|
|
+
|
|
+ dst += 32;
|
|
+ src += 32;
|
|
+ n -= 32;
|
|
+ i += 4;
|
|
+ }
|
|
+ }
|
|
+
|
|
+#pragma loop count(16)
|
|
+ while (n-- >= 0) {
|
|
+ *dst++ = *src++;
|
|
+ }
|
|
+ return dest;
|
|
+}
|
|
+#endif // __sparc_v9__
|
|
+
|
|
void
|
|
fbBlt(FbBits * srcLine,
|
|
FbStride srcStride,
|
|
@@ -78,16 +170,29 @@ fbBlt(FbBits * srcLine,
|
|
int i;
|
|
|
|
if (!upsidedown)
|
|
+ #pragma loop count(1000)
|
|
for (i = 0; i < height; i++)
|
|
+#if defined(__sparc_v9__)
|
|
+ fbBlt_memcpy_vis(dst_byte + i * dst_byte_stride,
|
|
+ src_byte + i * src_byte_stride,
|
|
+ width_byte);
|
|
+#else
|
|
MEMCPY_WRAPPED(dst_byte + i * dst_byte_stride,
|
|
src_byte + i * src_byte_stride,
|
|
width_byte);
|
|
+#endif
|
|
else
|
|
+ #pragma loop count(1000)
|
|
for (i = height - 1; i >= 0; i--)
|
|
+#if defined(__sparc_v9__)
|
|
+ fbBlt_memcpy_vis(dst_byte + i * dst_byte_stride,
|
|
+ src_byte + i * src_byte_stride,
|
|
+ width_byte);
|
|
+#else
|
|
MEMCPY_WRAPPED(dst_byte + i * dst_byte_stride,
|
|
src_byte + i * src_byte_stride,
|
|
width_byte);
|
|
-
|
|
+#endif
|
|
return;
|
|
}
|
|
}
|
|
--
|
|
2.16.4
|
|
|