mcst-linux-kernel/patches-2024.06.26/xf86-video-ati-19.1.0/0001-Added-memcpy-supportin...

155 lines
5.0 KiB
Diff

From 4b695b498fd9f7e2212a2b3b51f9edd062052b30 Mon Sep 17 00:00:00 2001
Date: Fri, 21 Sep 2018 16:20:35 +0300
Subject: [PATCH] Added memcpy supporting VIS
---
src/evergreen_exa.c | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 109 insertions(+), 1 deletion(-)
diff --git a/src/evergreen_exa.c b/src/evergreen_exa.c
index 447ed21..cfb93fb 100644
--- a/src/evergreen_exa.c
+++ b/src/evergreen_exa.c
@@ -40,6 +40,12 @@
#include "radeon_exa_shared.h"
#include "radeon_vbo.h"
+#if defined(__sparc_v9__)
+ #include "vis.h"
+ inline void * EVERGREEN_memcpy_vis(void * dest, const void *sorc, int n) __attribute__((always_inline));
+#endif
+
+
extern int cayman_solid_vs(RADEONChipFamily ChipSet, uint32_t* vs);
extern int cayman_solid_ps(RADEONChipFamily ChipSet, uint32_t* ps);
@@ -1645,6 +1651,100 @@ static void EVERGREENComposite(PixmapPtr pDst,
}
+#ifdef __sparc_v9__
+inline void * EVERGREEN_memcpy_vis(void * dest, const void *sorc, int n)
+{
+ uint8_t *dst, *src;
+ int32_t i;
+
+ vis_d64 *addr_aligned;
+ vis_d64 data_hi, data_lo, data;
+ vis_d64 data_lo2, data2;
+ vis_d64 data_lo3, data3;
+ vis_d64 data_lo4, data4;
+
+ dst = (uint8_t *)dest;
+ src = (uint8_t *)sorc;
+
+ for(uint8_t *ps = src; ps < src + n; ps +=64)
+ {
+ __builtin_prefetch(ps,0);
+ }
+#pragma loop count(1)
+ while (n-- && (uintptr_t)dst & 7)
+ {
+ *dst++ = *src++;
+ }
+ if(!((uintptr_t)src & 7))
+ {
+#pragma loop count(1000)
+ while (n >=64)
+ {
+ __builtin_prefetch(dst, 1);
+ *(vis_d64*)dst = *(vis_d64*)src;
+ *(vis_d64*)(dst+8) = *(vis_d64*)(src+8);
+ *(vis_d64*)(dst+16) = *(vis_d64*)(src+16);
+ *(vis_d64*)(dst+24) = *(vis_d64*)(src+24);
+ *(vis_d64*)(dst+32) = *(vis_d64*)(src+32);
+ *(vis_d64*)(dst+40) = *(vis_d64*)(src+40);
+ *(vis_d64*)(dst+48) = *(vis_d64*)(src+48);
+ *(vis_d64*)(dst+56) = *(vis_d64*)(src+56);
+
+ dst += 64;
+ src += 64;
+ n -= 64;
+ }
+#pragma loop count(4)
+ while (n >= 8)
+ {
+ *(vis_d64*)dst = *(vis_d64*)src;
+ dst += 8;
+ src += 8;
+ n -= 8;
+ }
+ }
+ else
+ {
+ i = 0;
+ addr_aligned = (vis_d64*) vis_alignaddr(src, 0);
+ data_hi = addr_aligned[i];
+ #pragma loop count(1000)
+ while (n >= 32)
+ {
+ vis_prefetch_write ((vis_d64*)dst + 16);
+ data_lo = addr_aligned[i + 1];
+ data_lo2 = addr_aligned[i + 2];
+ data_lo3 = addr_aligned[i + 3];
+ data_lo4 = addr_aligned[i + 4];
+
+ data = vis_faligndata(data_hi, data_lo);
+ data2 = vis_faligndata(data_lo, data_lo2);
+ data3 = vis_faligndata(data_lo2, data_lo3);
+ data4 = vis_faligndata(data_lo3, data_lo4);
+
+ *(vis_d64*)dst = data;
+ *(vis_d64*)(dst+8) = data2;
+ *(vis_d64*)(dst+16) = data3;
+ *(vis_d64*)(dst+24) = data4;
+
+ data_hi = data_lo4;
+
+ dst += 32;
+ src += 32;
+ n -= 32;
+ i +=4;
+ }
+ }
+#pragma loop count(16)
+ while (n-- >= 0)
+ {
+ *dst++ = *src++;
+ }
+ return dest;
+}
+
+#endif
+
static Bool
EVERGREENUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
char *src, int src_pitch)
@@ -1742,7 +1842,11 @@ copy:
if (copy_dst == driver_priv->bo->bo.radeon)
dst += y * copy_pitch + x * bpp / 8;
for (i = 0; i < h; i++) {
- memcpy(dst + i * copy_pitch, src, size);
+#ifdef __sparc_v9__
+ EVERGREEN_memcpy_vis(dst + i * copy_pitch, src, size);
+#else
+ memcpy(dst + i * copy_pitch, src, size);
+#endif
src += src_pitch;
}
radeon_bo_unmap(copy_dst);
@@ -1888,7 +1992,11 @@ copy:
else
size = 0;
while (h--) {
+#if defined __sparc_v9__
+ EVERGREEN_memcpy_vis(dst, copy_src->ptr + size, w);
+#else
memcpy(dst, copy_src->ptr + size, w);
+#endif
size += copy_pitch;
dst += dst_pitch;
}
--
2.16.4