mcst-linux-kernel/patches-2024.06.26/xorg-server-21.1.8/0011-Add-fbBlt-function-opt...

162 lines
4.4 KiB
Diff

From 9c3a4420900970e200962e5bb20492e0a4a43fdf Mon Sep 17 00:00:00 2001
Date: Mon, 30 Oct 2017 17:33:39 +0300
Subject: [PATCH] Add fbBlt function optimizations for the sparc v9 arch
---
fb/fb.h | 4 +++
fb/fbblt.c | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 110 insertions(+), 1 deletion(-)
diff --git a/fb/fb.h b/fb/fb.h
index b64a5d1..8d87082 100644
--- a/fb/fb.h
+++ b/fb/fb.h
@@ -41,6 +41,10 @@
#include "migc.h"
#include "picturestr.h"
+#ifdef __sparc_v9__
+#include <vis.h>
+#endif
+
#ifdef FB_ACCESS_WRAPPER
#include "wfbrename.h"
diff --git a/fb/fbblt.c b/fb/fbblt.c
index 03c4904..45613bf 100644
--- a/fb/fbblt.c
+++ b/fb/fbblt.c
@@ -39,6 +39,98 @@
} \
}
+#if defined(__sparc_v9__)
+
+static inline void *
+fbBlt_memcpy_vis(void *dest, const void *sorc, int n)
+{
+ uint8_t *dst, *src, *ps;
+ int32_t i;
+
+ vis_d64 *addr_aligned;
+ vis_d64 data_hi, data_lo, data;
+ vis_d64 data_lo2, data2;
+ vis_d64 data_lo3, data3;
+ vis_d64 data_lo4, data4;
+
+ dst = (uint8_t *) dest;
+ src = (uint8_t *) sorc;
+
+ for (ps = src; ps < src + n; ps += 64) {
+ __builtin_prefetch(ps, 0);
+ }
+
+#pragma loop count(1)
+ while (n-- && (uintptr_t) dst & 7) {
+ *dst++ = *src++;
+ }
+
+ if (!((uintptr_t) src & 7)) {
+#pragma loop count(1000)
+ while (n >= 64) {
+ __builtin_prefetch(dst + 128, 1, 3);
+ *(vis_d64 *) dst = *(vis_d64 *) src;
+ *(vis_d64 *) (dst + 8) = *(vis_d64 *) (src + 8);
+ *(vis_d64 *) (dst + 16) = *(vis_d64 *) (src + 16);
+ *(vis_d64 *) (dst + 24) = *(vis_d64 *) (src + 24);
+ *(vis_d64 *) (dst + 32) = *(vis_d64 *) (src + 32);
+ *(vis_d64 *) (dst + 40) = *(vis_d64 *) (src + 40);
+ *(vis_d64 *) (dst + 48) = *(vis_d64 *) (src + 48);
+ *(vis_d64 *) (dst + 56) = *(vis_d64 *) (src + 56);
+
+ dst += 64;
+ src += 64;
+ n -= 64;
+
+ }
+
+#pragma loop count(4)
+ while (n >= 8) {
+ *(vis_d64 *) dst = *(vis_d64 *) src;
+ dst += 8;
+ src += 8;
+ n -= 8;
+ }
+ }
+ else {
+ i = 0;
+ addr_aligned = (vis_d64 *) vis_alignaddr(src, 0);
+ data_hi = addr_aligned[i];
+#pragma loop count(1000)
+ while (n >= 32) {
+ vis_prefetch_write((vis_d64 *) dst + 16);
+ data_lo = addr_aligned[i + 1];
+ data_lo2 = addr_aligned[i + 2];
+ data_lo3 = addr_aligned[i + 3];
+ data_lo4 = addr_aligned[i + 4];
+
+ data = vis_faligndata(data_hi, data_lo);
+ data2 = vis_faligndata(data_lo, data_lo2);
+ data3 = vis_faligndata(data_lo2, data_lo3);
+ data4 = vis_faligndata(data_lo3, data_lo4);
+
+ *(vis_d64 *) dst = data;
+ *(vis_d64 *) (dst + 8) = data2;
+ *(vis_d64 *) (dst + 16) = data3;
+ *(vis_d64 *) (dst + 24) = data4;
+
+ data_hi = data_lo4;
+
+ dst += 32;
+ src += 32;
+ n -= 32;
+ i += 4;
+ }
+ }
+
+#pragma loop count(16)
+ while (n-- >= 0) {
+ *dst++ = *src++;
+ }
+ return dest;
+}
+#endif // __sparc_v9__
+
void
fbBlt(FbBits * srcLine,
FbStride srcStride,
@@ -78,16 +170,29 @@ fbBlt(FbBits * srcLine,
int i;
if (!upsidedown)
+ #pragma loop count(1000)
for (i = 0; i < height; i++)
+#if defined(__sparc_v9__)
+ fbBlt_memcpy_vis(dst_byte + i * dst_byte_stride,
+ src_byte + i * src_byte_stride,
+ width_byte);
+#else
MEMCPY_WRAPPED(dst_byte + i * dst_byte_stride,
src_byte + i * src_byte_stride,
width_byte);
+#endif
else
+ #pragma loop count(1000)
for (i = height - 1; i >= 0; i--)
+#if defined(__sparc_v9__)
+ fbBlt_memcpy_vis(dst_byte + i * dst_byte_stride,
+ src_byte + i * src_byte_stride,
+ width_byte);
+#else
MEMCPY_WRAPPED(dst_byte + i * dst_byte_stride,
src_byte + i * src_byte_stride,
width_byte);
-
+#endif
return;
}
}
--
2.16.4