diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 3ed03391a8..87235dae63 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -3441,7 +3441,7 @@ typedef ARMCPU ArchCPU; * | TBFLAG_AM32 | +-----+----------+ * | | |TBFLAG_M32| * +-------------+----------------+----------+ - * 31 23 5 4 0 + * 31 23 6 5 0 * * Unless otherwise noted, these bits are cached in env->hflags. */ @@ -3499,6 +3499,8 @@ FIELD(TBFLAG_M32, LSPACT, 2, 1) /* Not cached. */ FIELD(TBFLAG_M32, NEW_FP_CTXT_NEEDED, 3, 1) /* Not cached. */ /* Set if FPCCR.S does not match current security state */ FIELD(TBFLAG_M32, FPCCR_S_WRONG, 4, 1) /* Not cached. */ +/* Set if MVE insns are definitely not predicated by VPR or LTPSIZE */ +FIELD(TBFLAG_M32, MVE_NO_PRED, 5, 1) /* Not cached. */ /* * Bit usage when in AArch64 state diff --git a/target/arm/helper.c b/target/arm/helper.c index 21ee94ec2e..6274221447 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -13637,6 +13637,35 @@ static inline void assert_hflags_rebuild_correctly(CPUARMState *env) #endif } +static bool mve_no_pred(CPUARMState *env) +{ + /* + * Return true if there is definitely no predication of MVE + * instructions by VPR or LTPSIZE. (Returning false even if there + * isn't any predication is OK; generated code will just be + * a little worse.) + * If the CPU does not implement MVE then this TB flag is always 0. + * + * NOTE: if you change this logic, the "recalculate s->mve_no_pred" + * logic in gen_update_fp_context() needs to be updated to match. + * + * We do not include the effect of the ECI bits here -- they are + * tracked in other TB flags. This simplifies the logic for + * "when did we emit code that changes the MVE_NO_PRED TB flag + * and thus need to end the TB?". + */ + if (cpu_isar_feature(aa32_mve, env_archcpu(env))) { + return false; + } + if (env->v7m.vpr) { + return false; + } + if (env->v7m.ltpsize < 4) { + return false; + } + return true; +} + void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, target_ulong *cs_base, uint32_t *pflags) { @@ -13676,6 +13705,10 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, if (env->v7m.fpccr[is_secure] & R_V7M_FPCCR_LSPACT_MASK) { DP_TBFLAG_M32(flags, LSPACT, 1); } + + if (mve_no_pred(env)) { + DP_TBFLAG_M32(flags, MVE_NO_PRED, 1); + } } else { /* * Note that XSCALE_CPAR shares bits with VECSTRIDE. diff --git a/target/arm/translate-m-nocp.c b/target/arm/translate-m-nocp.c index 5eab04832c..d9e144e8eb 100644 --- a/target/arm/translate-m-nocp.c +++ b/target/arm/translate-m-nocp.c @@ -95,7 +95,10 @@ static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a) clear_eci_state(s); - /* End the TB, because we have updated FP control bits */ + /* + * End the TB, because we have updated FP control bits, + * and possibly VPR or LTPSIZE. + */ s->base.is_jmp = DISAS_UPDATE_EXIT; return true; } @@ -397,6 +400,7 @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno, store_cpu_field(control, v7m.control[M_REG_S]); tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK); gen_helper_vfp_set_fpscr(cpu_env, tmp); + s->base.is_jmp = DISAS_UPDATE_NOCHAIN; tcg_temp_free_i32(tmp); tcg_temp_free_i32(sfpa); break; @@ -409,6 +413,7 @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno, } tmp = loadfn(s, opaque, true); store_cpu_field(tmp, v7m.vpr); + s->base.is_jmp = DISAS_UPDATE_NOCHAIN; break; case ARM_VFP_P0: { @@ -418,6 +423,7 @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno, tcg_gen_deposit_i32(vpr, vpr, tmp, R_V7M_VPR_P0_SHIFT, R_V7M_VPR_P0_LENGTH); store_cpu_field(vpr, v7m.vpr); + s->base.is_jmp = DISAS_UPDATE_NOCHAIN; tcg_temp_free_i32(tmp); break; } diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c index 2ed91577ec..0eca96e29c 100644 --- a/target/arm/translate-mve.c +++ b/target/arm/translate-mve.c @@ -810,7 +810,12 @@ DO_LOGIC(VORR, gen_helper_mve_vorr) DO_LOGIC(VORN, gen_helper_mve_vorn) DO_LOGIC(VEOR, gen_helper_mve_veor) -DO_LOGIC(VPSEL, gen_helper_mve_vpsel) +static bool trans_VPSEL(DisasContext *s, arg_2op *a) +{ + /* This insn updates predication bits */ + s->base.is_jmp = DISAS_UPDATE_NOCHAIN; + return do_2op(s, a, gen_helper_mve_vpsel); +} #define DO_2OP(INSN, FN) \ static bool trans_##INSN(DisasContext *s, arg_2op *a) \ @@ -1366,6 +1371,8 @@ static bool trans_VPNOT(DisasContext *s, arg_VPNOT *a) } gen_helper_mve_vpnot(cpu_env); + /* This insn updates predication bits */ + s->base.is_jmp = DISAS_UPDATE_NOCHAIN; mve_update_eci(s); return true; } @@ -1852,6 +1859,8 @@ static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn) /* VPT */ gen_vpst(s, a->mask); } + /* This insn updates predication bits */ + s->base.is_jmp = DISAS_UPDATE_NOCHAIN; mve_update_eci(s); return true; } @@ -1883,6 +1892,8 @@ static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a, /* VPT */ gen_vpst(s, a->mask); } + /* This insn updates predication bits */ + s->base.is_jmp = DISAS_UPDATE_NOCHAIN; mve_update_eci(s); return true; } diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c index e2eb797c82..59bcaec5be 100644 --- a/target/arm/translate-vfp.c +++ b/target/arm/translate-vfp.c @@ -109,7 +109,7 @@ static inline long vfp_f16_offset(unsigned reg, bool top) * Generate code for M-profile lazy FP state preservation if needed; * this corresponds to the pseudocode PreserveFPState() function. */ -static void gen_preserve_fp_state(DisasContext *s) +static void gen_preserve_fp_state(DisasContext *s, bool skip_context_update) { if (s->v7m_lspact) { /* @@ -128,6 +128,20 @@ static void gen_preserve_fp_state(DisasContext *s) * any further FP insns in this TB. */ s->v7m_lspact = false; + /* + * The helper might have zeroed VPR, so we do not know the + * correct value for the MVE_NO_PRED TB flag any more. + * If we're about to create a new fp context then that + * will precisely determine the MVE_NO_PRED value (see + * gen_update_fp_context()). Otherwise, we must: + * - set s->mve_no_pred to false, so this instruction + * is generated to use helper functions + * - end the TB now, without chaining to the next TB + */ + if (skip_context_update || !s->v7m_new_fp_ctxt_needed) { + s->mve_no_pred = false; + s->base.is_jmp = DISAS_UPDATE_NOCHAIN; + } } } @@ -169,12 +183,19 @@ static void gen_update_fp_context(DisasContext *s) TCGv_i32 z32 = tcg_const_i32(0); store_cpu_field(z32, v7m.vpr); } - /* - * We don't need to arrange to end the TB, because the only - * parts of FPSCR which we cache in the TB flags are the VECLEN - * and VECSTRIDE, and those don't exist for M-profile. + * We just updated the FPSCR and VPR. Some of this state is cached + * in the MVE_NO_PRED TB flag. We want to avoid having to end the + * TB here, which means we need the new value of the MVE_NO_PRED + * flag to be exactly known here and the same for all executions. + * Luckily FPDSCR.LTPSIZE is always constant 4 and the VPR is + * always set to 0, so the new MVE_NO_PRED flag is always 1 + * if and only if we have MVE. + * + * (The other FPSCR state cached in TB flags is VECLEN and VECSTRIDE, + * but those do not exist for M-profile, so are not relevant here.) */ + s->mve_no_pred = dc_isar_feature(aa32_mve, s); if (s->v8m_secure) { bits |= R_V7M_CONTROL_SFPA_MASK; @@ -238,7 +259,7 @@ bool vfp_access_check_m(DisasContext *s, bool skip_context_update) /* Handle M-profile lazy FP state mechanics */ /* Trigger lazy-state preservation if necessary */ - gen_preserve_fp_state(s); + gen_preserve_fp_state(s, skip_context_update); if (!skip_context_update) { /* Update ownership of FP context and create new FP context if needed */ diff --git a/target/arm/translate.c b/target/arm/translate.c index cf31e0237d..f7086c66a5 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -8496,6 +8496,7 @@ static bool trans_DLS(DisasContext *s, arg_DLS *a) /* DLSTP: set FPSCR.LTPSIZE */ tmp = tcg_const_i32(a->size); store_cpu_field(tmp, v7m.ltpsize); + s->base.is_jmp = DISAS_UPDATE_NOCHAIN; } return true; } @@ -8561,6 +8562,10 @@ static bool trans_WLS(DisasContext *s, arg_WLS *a) assert(ok); tmp = tcg_const_i32(a->size); store_cpu_field(tmp, v7m.ltpsize); + /* + * LTPSIZE updated, but MVE_NO_PRED will always be the same thing (0) + * when we take this upcoming exit from this TB, so gen_jmp_tb() is OK. + */ } gen_jmp_tb(s, s->base.pc_next, 1); @@ -8743,6 +8748,8 @@ static bool trans_VCTP(DisasContext *s, arg_VCTP *a) gen_helper_mve_vctp(cpu_env, masklen); tcg_temp_free_i32(masklen); tcg_temp_free_i32(rn_shifted); + /* This insn updates predication bits */ + s->base.is_jmp = DISAS_UPDATE_NOCHAIN; mve_update_eci(s); return true; } @@ -9413,6 +9420,7 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) dc->v7m_new_fp_ctxt_needed = EX_TBFLAG_M32(tb_flags, NEW_FP_CTXT_NEEDED); dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT); + dc->mve_no_pred = EX_TBFLAG_M32(tb_flags, MVE_NO_PRED); } else { dc->debug_target_el = EX_TBFLAG_ANY(tb_flags, DEBUG_TARGET_EL); dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B); diff --git a/target/arm/translate.h b/target/arm/translate.h index 605d1f2e33..3a0db801d3 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -100,6 +100,8 @@ typedef struct DisasContext { bool align_mem; /* True if PSTATE.IL is set */ bool pstate_il; + /* True if MVE insns are definitely not predicated by VPR or LTPSIZE */ + bool mve_no_pred; /* * >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI. * < 0, set by the current instruction.