From d33271213437ed1834b0a50540d79e877e1cd894 Mon Sep 17 00:00:00 2001
From: Aurelien Jarno <aurelien@aurel32.net>
Date: Thu, 1 Jun 2017 00:01:19 +0200
Subject: [PATCH] target/s390x: improve MOVE LONG and MOVE LONG EXTENDED

As MVCL and MVCLE only differ by their operands, use a common
do_mvcl helper. Optimize it calling fast_memmove and fast_memset.
Correctly write back addresses. Check that r1 and r2/r3 registers
are even.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Message-Id: <20170531220129.27724-21-aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target/s390x/mem_helper.c | 90 +++++++++++++++++----------------------
 target/s390x/translate.c  | 40 ++++++++++++-----
 2 files changed, 70 insertions(+), 60 deletions(-)

diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index 2326f0bdb9..3b2ff09fec 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c
@@ -576,6 +576,38 @@ void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
     }
 }
 
+/* move long helper */
+static inline uint32_t do_mvcl(CPUS390XState *env,
+                               uint64_t *dest, uint64_t *destlen,
+                               uint64_t *src, uint64_t *srclen,
+                               uint8_t pad, uintptr_t ra)
+{
+    uint64_t len = MIN(*srclen, *destlen);
+    uint32_t cc;
+
+    if (*destlen == *srclen) {
+        cc = 0;
+    } else if (*destlen < *srclen) {
+        cc = 1;
+    } else {
+        cc = 2;
+    }
+
+    /* Copy the src array */
+    fast_memmove(env, *dest, *src, len, ra);
+    *src += len;
+    *srclen -= len;
+    *dest += len;
+    *destlen -= len;
+
+    /* Pad the remaining area */
+    fast_memset(env, *dest, pad, *destlen, ra);
+    *dest += *destlen;
+    *destlen = 0;
+
+    return cc;
+}
+
 /* move long */
 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 {
@@ -585,40 +617,19 @@ uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
     uint64_t src = get_address(env, r2);
     uint8_t pad = env->regs[r2 + 1] >> 24;
-    uint8_t v;
     uint32_t cc;
 
-    if (destlen == srclen) {
-        cc = 0;
-    } else if (destlen < srclen) {
-        cc = 1;
-    } else {
-        cc = 2;
-    }
+    cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, ra);
 
-    if (srclen > destlen) {
-        srclen = destlen;
-    }
-
-    for (; destlen && srclen; src++, dest++, destlen--, srclen--) {
-        v = cpu_ldub_data_ra(env, src, ra);
-        cpu_stb_data_ra(env, dest, v, ra);
-    }
-
-    for (; destlen; dest++, destlen--) {
-        cpu_stb_data_ra(env, dest, pad, ra);
-    }
-
-    env->regs[r1 + 1] = destlen;
-    /* can't use srclen here, we trunc'ed it */
-    env->regs[r2 + 1] -= src - env->regs[r2];
+    env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
+    env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
     set_address(env, r1, dest);
     set_address(env, r2, src);
 
     return cc;
 }
 
-/* move long extended another memcopy insn with more bells and whistles */
+/* move long extended */
 uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
                        uint32_t r3)
 {
@@ -627,34 +638,13 @@ uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
     uint64_t dest = get_address(env, r1);
     uint64_t srclen = get_length(env, r3 + 1);
     uint64_t src = get_address(env, r3);
-    uint8_t pad = a2 & 0xff;
-    uint8_t v;
+    uint8_t pad = a2;
     uint32_t cc;
 
-    if (destlen == srclen) {
-        cc = 0;
-    } else if (destlen < srclen) {
-        cc = 1;
-    } else {
-        cc = 2;
-    }
+    cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, ra);
 
-    if (srclen > destlen) {
-        srclen = destlen;
-    }
-
-    for (; destlen && srclen; src++, dest++, destlen--, srclen--) {
-        v = cpu_ldub_data_ra(env, src, ra);
-        cpu_stb_data_ra(env, dest, v, ra);
-    }
-
-    for (; destlen; dest++, destlen--) {
-        cpu_stb_data_ra(env, dest, pad, ra);
-    }
-
-    set_length(env, r1 + 1 , destlen);
-    /* can't use srclen here, we trunc'ed it */
-    set_length(env, r3 + 1, env->regs[r3 + 1] - src - env->regs[r3]);
+    set_length(env, r1 + 1, destlen);
+    set_length(env, r3 + 1, srclen);
     set_address(env, r1, dest);
     set_address(env, r3, src);
 
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 2d47f1d2b4..c131f67a60 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -2979,22 +2979,42 @@ static ExitStatus op_mvcin(DisasContext *s, DisasOps *o)
 
 static ExitStatus op_mvcl(DisasContext *s, DisasOps *o)
 {
-    TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1));
-    TCGv_i32 r2 = tcg_const_i32(get_field(s->fields, r2));
-    gen_helper_mvcl(cc_op, cpu_env, r1, r2);
-    tcg_temp_free_i32(r1);
-    tcg_temp_free_i32(r2);
+    int r1 = get_field(s->fields, r1);
+    int r2 = get_field(s->fields, r2);
+    TCGv_i32 t1, t2;
+
+    /* r1 and r2 must be even.  */
+    if (r1 & 1 || r2 & 1) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return EXIT_NORETURN;
+    }
+
+    t1 = tcg_const_i32(r1);
+    t2 = tcg_const_i32(r2);
+    gen_helper_mvcl(cc_op, cpu_env, t1, t2);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
     set_cc_static(s);
     return NO_EXIT;
 }
 
 static ExitStatus op_mvcle(DisasContext *s, DisasOps *o)
 {
-    TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1));
-    TCGv_i32 r3 = tcg_const_i32(get_field(s->fields, r3));
-    gen_helper_mvcle(cc_op, cpu_env, r1, o->in2, r3);
-    tcg_temp_free_i32(r1);
-    tcg_temp_free_i32(r3);
+    int r1 = get_field(s->fields, r1);
+    int r3 = get_field(s->fields, r3);
+    TCGv_i32 t1, t3;
+
+    /* r1 and r3 must be even.  */
+    if (r1 & 1 || r3 & 1) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return EXIT_NORETURN;
+    }
+
+    t1 = tcg_const_i32(r1);
+    t3 = tcg_const_i32(r3);
+    gen_helper_mvcle(cc_op, cpu_env, t1, o->in2, t3);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t3);
     set_cc_static(s);
     return NO_EXIT;
 }