155 lines
5.0 KiB
Diff
155 lines
5.0 KiB
Diff
From 4b695b498fd9f7e2212a2b3b51f9edd062052b30 Mon Sep 17 00:00:00 2001
|
|
Date: Fri, 21 Sep 2018 16:20:35 +0300
|
|
Subject: [PATCH] Added memcpy supporting VIS
|
|
|
|
---
|
|
src/evergreen_exa.c | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++-
|
|
1 file changed, 109 insertions(+), 1 deletion(-)
|
|
|
|
diff --git a/src/evergreen_exa.c b/src/evergreen_exa.c
|
|
index 447ed21..cfb93fb 100644
|
|
--- a/src/evergreen_exa.c
|
|
+++ b/src/evergreen_exa.c
|
|
@@ -40,6 +40,12 @@
|
|
#include "radeon_exa_shared.h"
|
|
#include "radeon_vbo.h"
|
|
|
|
+#if defined(__sparc_v9__)
|
|
+ #include "vis.h"
|
|
+ inline void * EVERGREEN_memcpy_vis(void * dest, const void *sorc, int n) __attribute__((always_inline));
|
|
+#endif
|
|
+
|
|
+
|
|
extern int cayman_solid_vs(RADEONChipFamily ChipSet, uint32_t* vs);
|
|
extern int cayman_solid_ps(RADEONChipFamily ChipSet, uint32_t* ps);
|
|
|
|
@@ -1645,6 +1651,100 @@ static void EVERGREENComposite(PixmapPtr pDst,
|
|
|
|
}
|
|
|
|
+#ifdef __sparc_v9__
|
|
+inline void * EVERGREEN_memcpy_vis(void * dest, const void *sorc, int n)
|
|
+{
|
|
+ uint8_t *dst, *src;
|
|
+ int32_t i;
|
|
+
|
|
+ vis_d64 *addr_aligned;
|
|
+ vis_d64 data_hi, data_lo, data;
|
|
+ vis_d64 data_lo2, data2;
|
|
+ vis_d64 data_lo3, data3;
|
|
+ vis_d64 data_lo4, data4;
|
|
+
|
|
+ dst = (uint8_t *)dest;
|
|
+ src = (uint8_t *)sorc;
|
|
+
|
|
+ for(uint8_t *ps = src; ps < src + n; ps +=64)
|
|
+ {
|
|
+ __builtin_prefetch(ps,0);
|
|
+ }
|
|
+#pragma loop count(1)
|
|
+ while (n-- && (uintptr_t)dst & 7)
|
|
+ {
|
|
+ *dst++ = *src++;
|
|
+ }
|
|
+ if(!((uintptr_t)src & 7))
|
|
+ {
|
|
+#pragma loop count(1000)
|
|
+ while (n >=64)
|
|
+ {
|
|
+ __builtin_prefetch(dst, 1);
|
|
+ *(vis_d64*)dst = *(vis_d64*)src;
|
|
+ *(vis_d64*)(dst+8) = *(vis_d64*)(src+8);
|
|
+ *(vis_d64*)(dst+16) = *(vis_d64*)(src+16);
|
|
+ *(vis_d64*)(dst+24) = *(vis_d64*)(src+24);
|
|
+ *(vis_d64*)(dst+32) = *(vis_d64*)(src+32);
|
|
+ *(vis_d64*)(dst+40) = *(vis_d64*)(src+40);
|
|
+ *(vis_d64*)(dst+48) = *(vis_d64*)(src+48);
|
|
+ *(vis_d64*)(dst+56) = *(vis_d64*)(src+56);
|
|
+
|
|
+ dst += 64;
|
|
+ src += 64;
|
|
+ n -= 64;
|
|
+ }
|
|
+#pragma loop count(4)
|
|
+ while (n >= 8)
|
|
+ {
|
|
+ *(vis_d64*)dst = *(vis_d64*)src;
|
|
+ dst += 8;
|
|
+ src += 8;
|
|
+ n -= 8;
|
|
+ }
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ i = 0;
|
|
+ addr_aligned = (vis_d64*) vis_alignaddr(src, 0);
|
|
+ data_hi = addr_aligned[i];
|
|
+ #pragma loop count(1000)
|
|
+ while (n >= 32)
|
|
+ {
|
|
+ vis_prefetch_write ((vis_d64*)dst + 16);
|
|
+ data_lo = addr_aligned[i + 1];
|
|
+ data_lo2 = addr_aligned[i + 2];
|
|
+ data_lo3 = addr_aligned[i + 3];
|
|
+ data_lo4 = addr_aligned[i + 4];
|
|
+
|
|
+ data = vis_faligndata(data_hi, data_lo);
|
|
+ data2 = vis_faligndata(data_lo, data_lo2);
|
|
+ data3 = vis_faligndata(data_lo2, data_lo3);
|
|
+ data4 = vis_faligndata(data_lo3, data_lo4);
|
|
+
|
|
+ *(vis_d64*)dst = data;
|
|
+ *(vis_d64*)(dst+8) = data2;
|
|
+ *(vis_d64*)(dst+16) = data3;
|
|
+ *(vis_d64*)(dst+24) = data4;
|
|
+
|
|
+ data_hi = data_lo4;
|
|
+
|
|
+ dst += 32;
|
|
+ src += 32;
|
|
+ n -= 32;
|
|
+ i +=4;
|
|
+ }
|
|
+ }
|
|
+#pragma loop count(16)
|
|
+ while (n-- >= 0)
|
|
+ {
|
|
+ *dst++ = *src++;
|
|
+ }
|
|
+ return dest;
|
|
+}
|
|
+
|
|
+#endif
|
|
+
|
|
static Bool
|
|
EVERGREENUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
|
|
char *src, int src_pitch)
|
|
@@ -1742,7 +1842,11 @@ copy:
|
|
if (copy_dst == driver_priv->bo->bo.radeon)
|
|
dst += y * copy_pitch + x * bpp / 8;
|
|
for (i = 0; i < h; i++) {
|
|
- memcpy(dst + i * copy_pitch, src, size);
|
|
+#ifdef __sparc_v9__
|
|
+ EVERGREEN_memcpy_vis(dst + i * copy_pitch, src, size);
|
|
+#else
|
|
+ memcpy(dst + i * copy_pitch, src, size);
|
|
+#endif
|
|
src += src_pitch;
|
|
}
|
|
radeon_bo_unmap(copy_dst);
|
|
@@ -1888,7 +1992,11 @@ copy:
|
|
else
|
|
size = 0;
|
|
while (h--) {
|
|
+#if defined __sparc_v9__
|
|
+ EVERGREEN_memcpy_vis(dst, copy_src->ptr + size, w);
|
|
+#else
|
|
memcpy(dst, copy_src->ptr + size, w);
|
|
+#endif
|
|
size += copy_pitch;
|
|
dst += dst_pitch;
|
|
}
|
|
--
|
|
2.16.4
|
|
|