From 2d6ac920837f558be214ad2ddd28cad7f3b15e5c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 24 Oct 2018 07:50:20 +0100 Subject: [PATCH] target/arm: Reorg NEON VLD/VST single element to one lane Instead of shifts and masks, use direct loads and stores from the neon register file. Signed-off-by: Richard Henderson Message-id: 20181011205206.3552-21-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- target/arm/translate.c | 92 +++++++++++++++++++++++------------------- 1 file changed, 50 insertions(+), 42 deletions(-) diff --git a/target/arm/translate.c b/target/arm/translate.c index 83e8824208..7c4675ffd8 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -1611,6 +1611,25 @@ static TCGv_i32 neon_load_reg(int reg, int pass) return tmp; } +static void neon_load_element(TCGv_i32 var, int reg, int ele, TCGMemOp mop) +{ + long offset = neon_element_offset(reg, ele, mop & MO_SIZE); + + switch (mop) { + case MO_UB: + tcg_gen_ld8u_i32(var, cpu_env, offset); + break; + case MO_UW: + tcg_gen_ld16u_i32(var, cpu_env, offset); + break; + case MO_UL: + tcg_gen_ld_i32(var, cpu_env, offset); + break; + default: + g_assert_not_reached(); + } +} + static void neon_load_element64(TCGv_i64 var, int reg, int ele, TCGMemOp mop) { long offset = neon_element_offset(reg, ele, mop & MO_SIZE); @@ -1639,6 +1658,25 @@ static void neon_store_reg(int reg, int pass, TCGv_i32 var) tcg_temp_free_i32(var); } +static void neon_store_element(int reg, int ele, TCGMemOp size, TCGv_i32 var) +{ + long offset = neon_element_offset(reg, ele, size); + + switch (size) { + case MO_8: + tcg_gen_st8_i32(var, cpu_env, offset); + break; + case MO_16: + tcg_gen_st16_i32(var, cpu_env, offset); + break; + case MO_32: + tcg_gen_st_i32(var, cpu_env, offset); + break; + default: + g_assert_not_reached(); + } +} + static void neon_store_element64(int reg, int ele, TCGMemOp size, TCGv_i64 var) { long offset = neon_element_offset(reg, ele, size); @@ -4954,9 +4992,7 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) int stride; int size; int reg; - int pass; int load; - int shift; int n; int vec_size; int mmu_idx; @@ -5104,18 +5140,18 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) } else { /* Single element. */ int idx = (insn >> 4) & 0xf; - pass = (insn >> 7) & 1; + int reg_idx; switch (size) { case 0: - shift = ((insn >> 5) & 3) * 8; + reg_idx = (insn >> 5) & 7; stride = 1; break; case 1: - shift = ((insn >> 6) & 1) * 16; + reg_idx = (insn >> 6) & 3; stride = (insn & (1 << 5)) ? 2 : 1; break; case 2: - shift = 0; + reg_idx = (insn >> 7) & 1; stride = (insn & (1 << 6)) ? 2 : 1; break; default: @@ -5155,52 +5191,24 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) */ return 1; } + tmp = tcg_temp_new_i32(); addr = tcg_temp_new_i32(); load_reg_var(s, addr, rn); for (reg = 0; reg < nregs; reg++) { if (load) { - tmp = tcg_temp_new_i32(); - switch (size) { - case 0: - gen_aa32_ld8u(s, tmp, addr, get_mem_index(s)); - break; - case 1: - gen_aa32_ld16u(s, tmp, addr, get_mem_index(s)); - break; - case 2: - gen_aa32_ld32u(s, tmp, addr, get_mem_index(s)); - break; - default: /* Avoid compiler warnings. */ - abort(); - } - if (size != 2) { - tmp2 = neon_load_reg(rd, pass); - tcg_gen_deposit_i32(tmp, tmp2, tmp, - shift, size ? 16 : 8); - tcg_temp_free_i32(tmp2); - } - neon_store_reg(rd, pass, tmp); + gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), + s->be_data | size); + neon_store_element(rd, reg_idx, size, tmp); } else { /* Store */ - tmp = neon_load_reg(rd, pass); - if (shift) - tcg_gen_shri_i32(tmp, tmp, shift); - switch (size) { - case 0: - gen_aa32_st8(s, tmp, addr, get_mem_index(s)); - break; - case 1: - gen_aa32_st16(s, tmp, addr, get_mem_index(s)); - break; - case 2: - gen_aa32_st32(s, tmp, addr, get_mem_index(s)); - break; - } - tcg_temp_free_i32(tmp); + neon_load_element(tmp, rd, reg_idx, size); + gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), + s->be_data | size); } rd += stride; tcg_gen_addi_i32(addr, addr, 1 << size); } tcg_temp_free_i32(addr); + tcg_temp_free_i32(tmp); stride = nregs * (1 << size); } }