84 lines
2.9 KiB
Diff
84 lines
2.9 KiB
Diff
Subject: add fletcher4 performance patch (8kb)
|
|
Bug: 110247
|
|
Tags: perf e2k
|
|
|
|
diff -rupN a/module/zcommon/zfs_fletcher_superscalar4.c b/module/zcommon/zfs_fletcher_superscalar4.c
|
|
--- a/module/zcommon/zfs_fletcher_superscalar4.c 2021-02-25 22:22:29.255056812 +0300
|
|
+++ b/module/zcommon/zfs_fletcher_superscalar4.c 2021-02-28 20:26:02.983118452 +0300
|
|
@@ -82,6 +82,27 @@ fletcher_4_superscalar4_fini(fletcher_4_
|
|
ZIO_SET_CHECKSUM(zcp, A, B, C, D);
|
|
}
|
|
|
|
+#define MAS_CACHE_1_D 1UL /* DCACHE1 disabled only */
|
|
+#define MAS_DCACHE_SHIFT 0x05
|
|
+#define MAS_BYPASS_L1_CACHE (MAS_CACHE_1_D << MAS_DCACHE_SHIFT)
|
|
+
|
|
+#define E2K_PREFETCH_L2_256(addr) \
|
|
+ ({ \
|
|
+ int unused; \
|
|
+ asm ( "ldb,0,sm %1, 0, %%empty, mas=%2\n" \
|
|
+ "ldb,2,sm %1, 64, %%empty, mas=%2\n" \
|
|
+ "ldb,3,sm %1, 128, %%empty, mas=%2\n" \
|
|
+ "ldb,5,sm %1, 192, %%empty, mas=%2" \
|
|
+ : "=r" (unused) \
|
|
+ : "r" (addr), \
|
|
+ "i" (MAS_BYPASS_L1_CACHE)); \
|
|
+ })
|
|
+
|
|
+#if 0
|
|
+ for (i = 0; i <= (s64) len - 256; i += 256)
|
|
+ E2K_PREFETCH_L2_256(addr + i);
|
|
+#endif
|
|
+
|
|
static void
|
|
fletcher_4_superscalar4_native(fletcher_4_ctx_t *ctx,
|
|
const void *buf, uint64_t size)
|
|
@@ -110,6 +131,39 @@ fletcher_4_superscalar4_native(fletcher_
|
|
c4 = ctx->superscalar[2].v[3];
|
|
d4 = ctx->superscalar[3].v[3];
|
|
|
|
+#if defined(__e2k__)
|
|
+ const uint32_t *ip_range_start, *ip_range_end;
|
|
+ // prefetch 8 Kilobytes to L2
|
|
+ for (ip_range_start=buf; ip_range_start < ipend; ip_range_start+=(8*1024)/4) {
|
|
+ ip = ip_range_start;
|
|
+ ip_range_end = ip_range_start+ (8*1024)/4;
|
|
+ if(ip_range_end > ipend) ip_range_end = ipend;
|
|
+ // do prefetch of range by 256 bytes (4 L2 lines)
|
|
+ for(;ip < ip_range_end; ip += 256/4)
|
|
+ E2K_PREFETCH_L2_256(ip);
|
|
+
|
|
+ // compute over the range
|
|
+ ip = ip_range_start;
|
|
+ for (; ip < ip_range_end; ip += 4) {
|
|
+ a += ip[0];
|
|
+ a2 += ip[1];
|
|
+ a3 += ip[2];
|
|
+ a4 += ip[3];
|
|
+ b += a;
|
|
+ b2 += a2;
|
|
+ b3 += a3;
|
|
+ b4 += a4;
|
|
+ c += b;
|
|
+ c2 += b2;
|
|
+ c3 += b3;
|
|
+ c4 += b4;
|
|
+ d += c;
|
|
+ d2 += c2;
|
|
+ d3 += c3;
|
|
+ d4 += c4;
|
|
+ }
|
|
+ }
|
|
+#else
|
|
for (; ip < ipend; ip += 4) {
|
|
a += ip[0];
|
|
a2 += ip[1];
|
|
@@ -128,6 +182,7 @@ fletcher_4_superscalar4_native(fletcher_
|
|
d3 += c3;
|
|
d4 += c4;
|
|
}
|
|
+#endif
|
|
|
|
ctx->superscalar[0].v[0] = a;
|
|
ctx->superscalar[1].v[0] = b;
|