mcst-linux-kernel/patches-2024.06.26/zfs-2.1.5/0007-perf-fletcher-supersca...

84 lines
2.9 KiB
Diff

Subject: add fletcher4 performance patch (8kb)
Bug: 110247
Tags: perf e2k
diff -rupN a/module/zcommon/zfs_fletcher_superscalar4.c b/module/zcommon/zfs_fletcher_superscalar4.c
--- a/module/zcommon/zfs_fletcher_superscalar4.c 2021-02-25 22:22:29.255056812 +0300
+++ b/module/zcommon/zfs_fletcher_superscalar4.c 2021-02-28 20:26:02.983118452 +0300
@@ -82,6 +82,27 @@ fletcher_4_superscalar4_fini(fletcher_4_
ZIO_SET_CHECKSUM(zcp, A, B, C, D);
}
+#define MAS_CACHE_1_D 1UL /* DCACHE1 disabled only */
+#define MAS_DCACHE_SHIFT 0x05
+#define MAS_BYPASS_L1_CACHE (MAS_CACHE_1_D << MAS_DCACHE_SHIFT)
+
+#define E2K_PREFETCH_L2_256(addr) \
+ ({ \
+ int unused; \
+ asm ( "ldb,0,sm %1, 0, %%empty, mas=%2\n" \
+ "ldb,2,sm %1, 64, %%empty, mas=%2\n" \
+ "ldb,3,sm %1, 128, %%empty, mas=%2\n" \
+ "ldb,5,sm %1, 192, %%empty, mas=%2" \
+ : "=r" (unused) \
+ : "r" (addr), \
+ "i" (MAS_BYPASS_L1_CACHE)); \
+ })
+
+#if 0
+ for (i = 0; i <= (s64) len - 256; i += 256)
+ E2K_PREFETCH_L2_256(addr + i);
+#endif
+
static void
fletcher_4_superscalar4_native(fletcher_4_ctx_t *ctx,
const void *buf, uint64_t size)
@@ -110,6 +131,39 @@ fletcher_4_superscalar4_native(fletcher_
c4 = ctx->superscalar[2].v[3];
d4 = ctx->superscalar[3].v[3];
+#if defined(__e2k__)
+ const uint32_t *ip_range_start, *ip_range_end;
+ // prefetch 8 Kilobytes to L2
+ for (ip_range_start=buf; ip_range_start < ipend; ip_range_start+=(8*1024)/4) {
+ ip = ip_range_start;
+ ip_range_end = ip_range_start+ (8*1024)/4;
+ if(ip_range_end > ipend) ip_range_end = ipend;
+ // do prefetch of range by 256 bytes (4 L2 lines)
+ for(;ip < ip_range_end; ip += 256/4)
+ E2K_PREFETCH_L2_256(ip);
+
+ // compute over the range
+ ip = ip_range_start;
+ for (; ip < ip_range_end; ip += 4) {
+ a += ip[0];
+ a2 += ip[1];
+ a3 += ip[2];
+ a4 += ip[3];
+ b += a;
+ b2 += a2;
+ b3 += a3;
+ b4 += a4;
+ c += b;
+ c2 += b2;
+ c3 += b3;
+ c4 += b4;
+ d += c;
+ d2 += c2;
+ d3 += c3;
+ d4 += c4;
+ }
+ }
+#else
for (; ip < ipend; ip += 4) {
a += ip[0];
a2 += ip[1];
@@ -128,6 +182,7 @@ fletcher_4_superscalar4_native(fletcher_
d3 += c3;
d4 += c4;
}
+#endif
ctx->superscalar[0].v[0] = a;
ctx->superscalar[1].v[0] = b;