target/loongarch: Remove cpu_fcsr0

All of the fpu operations are defined with TCG_CALL_NO_WG, but they all modify FCSR0. The most efficient way to fix this is to remove cpu_fcsr0, and instead use explicit load and store operations for the two instructions that manipulate that value. Acked-by: Qi Hu <huqi@loongson.cn> Reviewed-by: Song Gao <gaosong@loongson.cn> Reported-by: Feiyang Chen <chenfeiyang@loongson.cn> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2022-08-06 10:04:44 -07:00 · 2022-08-06 10:04:44 -07:00 · 10dcb08b03
commit 10dcb08b03
parent 7b06148df8
6 changed files with 36 additions and 22 deletions
--- a/target/loongarch/fpu_helper.c
+++ b/target/loongarch/fpu_helper.c
@ -872,8 +872,8 @@ uint64_t helper_ftint_w_d(CPULoongArchState *env, uint64_t fj)
    return fd;
 }
-void helper_set_rounding_mode(CPULoongArchState *env, uint32_t fcsr0)
+void helper_set_rounding_mode(CPULoongArchState *env)
 {
-    set_float_rounding_mode(ieee_rm[(fcsr0 >> FCSR0_RM) & 0x3],
+    set_float_rounding_mode(ieee_rm[(env->fcsr0 >> FCSR0_RM) & 0x3],
                            &env->fp_status);
 }
--- a/target/loongarch/helper.h
+++ b/target/loongarch/helper.h
@ -91,7 +91,7 @@ DEF_HELPER_2(ftint_w_d, i64, env, i64)
 DEF_HELPER_2(frint_s, i64, env, i64)
 DEF_HELPER_2(frint_d, i64, env, i64)
-DEF_HELPER_FLAGS_2(set_rounding_mode, TCG_CALL_NO_RWG, void, env, i32)
+DEF_HELPER_FLAGS_1(set_rounding_mode, TCG_CALL_NO_RWG, void, env)
 DEF_HELPER_1(rdtime_d, i64, env)
--- a/target/loongarch/insn_trans/trans_fmov.c.inc
+++ b/target/loongarch/insn_trans/trans_fmov.c.inc
@ -60,38 +60,39 @@ static bool trans_movgr2fcsr(DisasContext *ctx, arg_movgr2fcsr *a)
    TCGv Rj = gpr_src(ctx, a->rj, EXT_NONE);
    if (mask == UINT32_MAX) {
-        tcg_gen_extrl_i64_i32(cpu_fcsr0, Rj);
+        tcg_gen_st32_i64(Rj, cpu_env, offsetof(CPULoongArchState, fcsr0));
    } else {
        TCGv_i32 fcsr0 = tcg_temp_new_i32();
        TCGv_i32 temp = tcg_temp_new_i32();
        tcg_gen_ld_i32(fcsr0, cpu_env, offsetof(CPULoongArchState, fcsr0));
        tcg_gen_extrl_i64_i32(temp, Rj);
        tcg_gen_andi_i32(temp, temp, mask);
-        tcg_gen_andi_i32(cpu_fcsr0, cpu_fcsr0, ~mask);
+        tcg_gen_andi_i32(fcsr0, fcsr0, ~mask);
-        tcg_gen_or_i32(cpu_fcsr0, cpu_fcsr0, temp);
+        tcg_gen_or_i32(fcsr0, fcsr0, temp);
        tcg_gen_st_i32(fcsr0, cpu_env, offsetof(CPULoongArchState, fcsr0));
        tcg_temp_free_i32(temp);
-
+        tcg_temp_free_i32(fcsr0);
        /*
         * Install the new rounding mode to fpu_status, if changed.
         * Note that FCSR3 is exactly the rounding mode field.
         */
        if (mask != FCSR0_M3) {
            return true;
        }
    }
    gen_helper_set_rounding_mode(cpu_env, cpu_fcsr0);
    /*
     * Install the new rounding mode to fpu_status, if changed.
     * Note that FCSR3 is exactly the rounding mode field.
     */
    if (mask & FCSR0_M3) {
        gen_helper_set_rounding_mode(cpu_env);
    }
    return true;
 }
 static bool trans_movfcsr2gr(DisasContext *ctx, arg_movfcsr2gr *a)
 {
    TCGv_i32 temp = tcg_temp_new_i32();
    TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
-    tcg_gen_andi_i32(temp, cpu_fcsr0, fcsr_mask[a->fcsrs]);
+    tcg_gen_ld32u_i64(dest, cpu_env, offsetof(CPULoongArchState, fcsr0));
-    tcg_gen_ext_i32_i64(dest, temp);
+    tcg_gen_andi_i64(dest, dest, fcsr_mask[a->fcsrs]);
    gen_set_gpr(a->rd, dest, EXT_NONE);
    tcg_temp_free_i32(temp);
    return true;
 }
--- a/target/loongarch/translate.c
+++ b/target/loongarch/translate.c
@ -22,7 +22,6 @@
 /* Global register indices */
 TCGv cpu_gpr[32], cpu_pc;
 static TCGv cpu_lladdr, cpu_llval;
 TCGv_i32 cpu_fcsr0;
 TCGv_i64 cpu_fpr[32];
 #include "exec/gen-icount.h"
@ -266,8 +265,6 @@ void loongarch_translate_init(void)
    }
    cpu_pc = tcg_global_mem_new(cpu_env, offsetof(CPULoongArchState, pc), "pc");
    cpu_fcsr0 = tcg_global_mem_new_i32(cpu_env,
                    offsetof(CPULoongArchState, fcsr0), "fcsr0");
    cpu_lladdr = tcg_global_mem_new(cpu_env,
                    offsetof(CPULoongArchState, lladdr), "lladdr");
    cpu_llval = tcg_global_mem_new(cpu_env,
--- a/tests/tcg/loongarch64/Makefile.target
+++ b/tests/tcg/loongarch64/Makefile.target
@ -15,5 +15,6 @@ LOONGARCH64_TESTS  += test_div
 LOONGARCH64_TESTS  += test_fclass
 LOONGARCH64_TESTS  += test_fpcom
 LOONGARCH64_TESTS  += test_pcadd
 LOONGARCH64_TESTS  += test_fcsr
 TESTS += $(LOONGARCH64_TESTS)
--- a/tests/tcg/loongarch64/test_fcsr.c
+++ b/tests/tcg/loongarch64/test_fcsr.c
@ -0,0 +1,15 @@
 #include <assert.h>
 int main()
 {
    unsigned fcsr;
    asm("movgr2fcsr $r0,$r0\n\t"
        "movgr2fr.d $f0,$r0\n\t"
        "fdiv.d     $f0,$f0,$f0\n\t"
        "movfcsr2gr %0,$r0"
        : "=r"(fcsr) : : "f0");
    assert(fcsr & (16 << 16)); /* Invalid */
    return 0;
 }