target/ppc: Add recording of taken branches to BHRB

This commit continues adding support for the Branch History
Rolling Buffer (BHRB) as is provided starting with the P8
processor and continuing with its successors.  This commit
is limited to the recording and filtering of taken branches.

The following changes were made:

  - Enabled functionality on P10 processors only due to
    performance impact seen with P8 and P9 where it is not
    disabled for non problem state branches.
  - Added a BHRB buffer for storing branch instruction and
    target addresses for taken branches
  - Renamed gen_update_cfar to gen_update_branch_history and
    added a 'target' parameter to hold the branch target
    address and 'inst_type' parameter to use for filtering
  - Added TCG code to gen_update_branch_history that stores
    data to the BHRB and updates the BHRB offset.
  - Added BHRB resource initialization and reset functions

Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Glenn Miles <milesg@linux.vnet.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
This commit is contained in:
Glenn Miles 2024-03-28 20:41:33 +10:00 committed by Nicholas Piggin
parent a7138e28a2
commit 4de4a4705f
6 changed files with 185 additions and 8 deletions

View File

@ -548,6 +548,8 @@ FIELD(MSR, LE, MSR_LE, 1)
MMCR2_FC4P0 | MMCR2_FC5P0 | MMCR2_FC6P0)
#define MMCRA_BHRBRD PPC_BIT(26) /* BHRB Recording Disable */
#define MMCRA_IFM_MASK PPC_BITMASK(32, 33) /* BHRB Instruction Filtering */
#define MMCRA_IFM_SHIFT PPC_BIT_NR(33)
#define MMCR1_EVT_SIZE 8
/* extract64() does a right shift before extracting */
@ -774,6 +776,8 @@ enum {
POWERPC_FLAG_SMT = 0x00400000,
/* Using "LPAR per core" mode (as opposed to per-thread) */
POWERPC_FLAG_SMT_1LPAR = 0x00800000,
/* Has BHRB */
POWERPC_FLAG_BHRB = 0x01000000,
};
/*
@ -1215,6 +1219,9 @@ struct pnv_tod_tbst {
#define PPC_CPU_OPCODES_LEN 0x40
#define PPC_CPU_INDIRECT_OPCODES_LEN 0x20
#define BHRB_MAX_NUM_ENTRIES_LOG2 (5)
#define BHRB_MAX_NUM_ENTRIES (1 << BHRB_MAX_NUM_ENTRIES_LOG2)
struct CPUArchState {
/* Most commonly used resources during translated code execution first */
target_ulong gpr[32]; /* general purpose registers */
@ -1311,6 +1318,16 @@ struct CPUArchState {
int dcache_line_size;
int icache_line_size;
#ifdef TARGET_PPC64
/* Branch History Rolling Buffer (BHRB) resources */
target_ulong bhrb_num_entries;
intptr_t bhrb_base;
target_ulong bhrb_filter;
target_ulong bhrb_offset;
target_ulong bhrb_offset_mask;
uint64_t bhrb[BHRB_MAX_NUM_ENTRIES];
#endif
/* These resources are used during exception processing */
/* CPU model definition */
target_ulong msr_mask;

View File

@ -6142,6 +6142,28 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
pcc->l1_icache_size = 0x8000;
}
static void bhrb_init_state(CPUPPCState *env, target_long num_entries_log2)
{
if (env->flags & POWERPC_FLAG_BHRB) {
if (num_entries_log2 > BHRB_MAX_NUM_ENTRIES_LOG2) {
num_entries_log2 = BHRB_MAX_NUM_ENTRIES_LOG2;
}
env->bhrb_num_entries = 1 << num_entries_log2;
env->bhrb_base = (intptr_t)&env->bhrb[0];
env->bhrb_offset_mask = (env->bhrb_num_entries * sizeof(uint64_t)) - 1;
}
}
static void bhrb_reset_state(CPUPPCState *env)
{
if (env->flags & POWERPC_FLAG_BHRB) {
env->bhrb_offset = 0;
env->bhrb_filter = 0;
memset(env->bhrb, 0, sizeof(env->bhrb));
}
}
#define POWER8_BHRB_ENTRIES_LOG2 5
static void init_proc_POWER8(CPUPPCState *env)
{
/* Common Registers */
@ -6183,6 +6205,8 @@ static void init_proc_POWER8(CPUPPCState *env)
env->dcache_line_size = 128;
env->icache_line_size = 128;
bhrb_init_state(env, POWER8_BHRB_ENTRIES_LOG2);
/* Allocate hardware IRQ controller */
init_excp_POWER8(env);
ppcPOWER7_irq_init(env_archcpu(env));
@ -6307,6 +6331,7 @@ static struct ppc_radix_page_info POWER9_radix_page_info = {
};
#endif /* CONFIG_USER_ONLY */
#define POWER9_BHRB_ENTRIES_LOG2 5
static void init_proc_POWER9(CPUPPCState *env)
{
/* Common Registers */
@ -6357,6 +6382,8 @@ static void init_proc_POWER9(CPUPPCState *env)
env->dcache_line_size = 128;
env->icache_line_size = 128;
bhrb_init_state(env, POWER9_BHRB_ENTRIES_LOG2);
/* Allocate hardware IRQ controller */
init_excp_POWER9(env);
ppcPOWER9_irq_init(env_archcpu(env));
@ -6497,6 +6524,7 @@ static struct ppc_radix_page_info POWER10_radix_page_info = {
};
#endif /* !CONFIG_USER_ONLY */
#define POWER10_BHRB_ENTRIES_LOG2 5
static void init_proc_POWER10(CPUPPCState *env)
{
/* Common Registers */
@ -6546,6 +6574,8 @@ static void init_proc_POWER10(CPUPPCState *env)
env->dcache_line_size = 128;
env->icache_line_size = 128;
bhrb_init_state(env, POWER10_BHRB_ENTRIES_LOG2);
/* Allocate hardware IRQ controller */
init_excp_POWER10(env);
ppcPOWER9_irq_init(env_archcpu(env));
@ -6650,7 +6680,8 @@ POWERPC_FAMILY(POWER10)(ObjectClass *oc, void *data)
pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
POWERPC_FLAG_VSX | POWERPC_FLAG_SCV;
POWERPC_FLAG_VSX | POWERPC_FLAG_SCV |
POWERPC_FLAG_BHRB;
pcc->l1_dcache_size = 0x8000;
pcc->l1_icache_size = 0x8000;
}
@ -7222,6 +7253,10 @@ static void ppc_cpu_reset_hold(Object *obj, ResetType type)
}
env->spr[i] = spr->default_value;
}
#if defined(TARGET_PPC64)
bhrb_reset_state(env);
#endif
}
#ifndef CONFIG_USER_ONLY

View File

@ -82,6 +82,37 @@ static void pmu_update_summaries(CPUPPCState *env)
env->pmc_cyc_cnt = cyc_cnt;
}
static void hreg_bhrb_filter_update(CPUPPCState *env)
{
target_long ifm;
if (!(env->spr[SPR_POWER_MMCR0] & MMCR0_PMAE)) {
/* disable recording to BHRB */
env->bhrb_filter = BHRB_TYPE_NORECORD;
return;
}
ifm = (env->spr[SPR_POWER_MMCRA] & MMCRA_IFM_MASK) >> MMCRA_IFM_SHIFT;
switch (ifm) {
case 0:
/* record all branches */
env->bhrb_filter = -1;
break;
case 1:
/* only record calls (LK = 1) */
env->bhrb_filter = BHRB_TYPE_CALL;
break;
case 2:
/* only record indirect branches */
env->bhrb_filter = BHRB_TYPE_INDIRECT;
break;
case 3:
/* only record conditional branches */
env->bhrb_filter = BHRB_TYPE_COND;
break;
}
}
void pmu_mmcr01a_updated(CPUPPCState *env)
{
PowerPCCPU *cpu = env_archcpu(env);
@ -95,6 +126,8 @@ void pmu_mmcr01a_updated(CPUPPCState *env)
ppc_set_irq(cpu, PPC_INTERRUPT_PERFM, 0);
}
hreg_bhrb_filter_update(env);
/*
* Should this update overflow timers (if mmcr0 is updated) so they
* get set in cpu_post_load?

View File

@ -13,6 +13,13 @@
#ifndef POWER8_PMU_H
#define POWER8_PMU_H
#define BHRB_TYPE_NORECORD 0x00
#define BHRB_TYPE_CALL 0x01
#define BHRB_TYPE_INDIRECT 0x02
#define BHRB_TYPE_COND 0x04
#define BHRB_TYPE_OTHER 0x08
#define BHRB_TYPE_XL_FORM 0x10
#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
#define PMC_COUNTER_NEGATIVE_VAL 0x80000000UL

View File

@ -180,6 +180,7 @@ struct DisasContext {
#if defined(TARGET_PPC64)
bool sf_mode;
bool has_cfar;
bool has_bhrb;
#endif
bool fpu_enabled;
bool altivec_enabled;
@ -3371,14 +3372,85 @@ static void gen_rvwinkle(DisasContext *ctx)
gen_exception_nip(ctx, EXCP_HLT, ctx->base.pc_next);
#endif /* defined(CONFIG_USER_ONLY) */
}
static inline TCGv gen_write_bhrb(TCGv_ptr base, TCGv offset, TCGv mask, TCGv value)
{
TCGv_ptr tmp = tcg_temp_new_ptr();
/* add base and offset to get address of bhrb entry */
tcg_gen_add_ptr(tmp, base, (TCGv_ptr)offset);
/* store value into bhrb at bhrb_offset */
tcg_gen_st_i64(value, tmp, 0);
/* add 8 to current bhrb_offset */
tcg_gen_addi_tl(offset, offset, 8);
/* apply offset mask */
tcg_gen_and_tl(offset, offset, mask);
return offset;
}
#endif /* #if defined(TARGET_PPC64) */
static inline void gen_update_cfar(DisasContext *ctx, target_ulong nip)
static inline void gen_update_branch_history(DisasContext *ctx,
target_ulong nip,
TCGv target,
target_long inst_type)
{
#if defined(TARGET_PPC64)
TCGv_ptr base;
TCGv tmp;
TCGv offset;
TCGv mask;
TCGLabel *no_update;
if (ctx->has_cfar) {
tcg_gen_movi_tl(cpu_cfar, nip);
}
if (!ctx->has_bhrb ||
!ctx->bhrb_enable ||
inst_type == BHRB_TYPE_NORECORD) {
return;
}
tmp = tcg_temp_new();
no_update = gen_new_label();
/* check for bhrb filtering */
tcg_gen_ld_tl(tmp, tcg_env, offsetof(CPUPPCState, bhrb_filter));
tcg_gen_andi_tl(tmp, tmp, inst_type);
tcg_gen_brcondi_tl(TCG_COND_EQ, tmp, 0, no_update);
base = tcg_temp_new_ptr();
offset = tcg_temp_new();
mask = tcg_temp_new();
/* load bhrb base address */
tcg_gen_ld_ptr(base, tcg_env, offsetof(CPUPPCState, bhrb_base));
/* load current bhrb_offset */
tcg_gen_ld_tl(offset, tcg_env, offsetof(CPUPPCState, bhrb_offset));
/* load a BHRB offset mask */
tcg_gen_ld_tl(mask, tcg_env, offsetof(CPUPPCState, bhrb_offset_mask));
offset = gen_write_bhrb(base, offset, mask, tcg_constant_i64(nip));
/* Also record the target address for XL-Form branches */
if (inst_type & BHRB_TYPE_XL_FORM) {
/* Set the 'T' bit for target entries */
tcg_gen_ori_tl(tmp, target, 0x2);
offset = gen_write_bhrb(base, offset, mask, tmp);
}
/* save updated bhrb_offset for next time */
tcg_gen_st_tl(offset, tcg_env, offsetof(CPUPPCState, bhrb_offset));
gen_set_label(no_update);
#endif
}
@ -3508,8 +3580,10 @@ static void gen_b(DisasContext *ctx)
}
if (LK(ctx->opcode)) {
gen_setlr(ctx, ctx->base.pc_next);
gen_update_branch_history(ctx, ctx->cia, NULL, BHRB_TYPE_CALL);
} else {
gen_update_branch_history(ctx, ctx->cia, NULL, BHRB_TYPE_OTHER);
}
gen_update_cfar(ctx, ctx->cia);
gen_goto_tb(ctx, 0, target);
ctx->base.is_jmp = DISAS_NORETURN;
}
@ -3524,6 +3598,7 @@ static void gen_bcond(DisasContext *ctx, int type)
uint32_t bo = BO(ctx->opcode);
TCGLabel *l1;
TCGv target;
target_long bhrb_type = BHRB_TYPE_OTHER;
if (type == BCOND_LR || type == BCOND_CTR || type == BCOND_TAR) {
target = tcg_temp_new();
@ -3534,11 +3609,16 @@ static void gen_bcond(DisasContext *ctx, int type)
} else {
tcg_gen_mov_tl(target, cpu_lr);
}
if (!LK(ctx->opcode)) {
bhrb_type |= BHRB_TYPE_INDIRECT;
}
bhrb_type |= BHRB_TYPE_XL_FORM;
} else {
target = NULL;
}
if (LK(ctx->opcode)) {
gen_setlr(ctx, ctx->base.pc_next);
bhrb_type |= BHRB_TYPE_CALL;
}
l1 = gen_new_label();
if ((bo & 0x4) == 0) {
@ -3589,6 +3669,7 @@ static void gen_bcond(DisasContext *ctx, int type)
tcg_gen_brcondi_tl(TCG_COND_EQ, temp, 0, l1);
}
}
bhrb_type |= BHRB_TYPE_COND;
}
if ((bo & 0x10) == 0) {
/* Test CR */
@ -3603,8 +3684,11 @@ static void gen_bcond(DisasContext *ctx, int type)
tcg_gen_andi_i32(temp, cpu_crf[bi >> 2], mask);
tcg_gen_brcondi_i32(TCG_COND_NE, temp, 0, l1);
}
bhrb_type |= BHRB_TYPE_COND;
}
gen_update_cfar(ctx, ctx->cia);
gen_update_branch_history(ctx, ctx->cia, target, bhrb_type);
if (type == BCOND_IM) {
target_ulong li = (target_long)((int16_t)(BD(ctx->opcode)));
if (likely(AA(ctx->opcode) == 0)) {
@ -3720,7 +3804,7 @@ static void gen_rfi(DisasContext *ctx)
/* Restore CPU state */
CHK_SV(ctx);
translator_io_start(&ctx->base);
gen_update_cfar(ctx, ctx->cia);
gen_update_branch_history(ctx, ctx->cia, NULL, BHRB_TYPE_NORECORD);
gen_helper_rfi(tcg_env);
ctx->base.is_jmp = DISAS_EXIT;
#endif
@ -3735,7 +3819,7 @@ static void gen_rfid(DisasContext *ctx)
/* Restore CPU state */
CHK_SV(ctx);
translator_io_start(&ctx->base);
gen_update_cfar(ctx, ctx->cia);
gen_update_branch_history(ctx, ctx->cia, NULL, BHRB_TYPE_NORECORD);
gen_helper_rfid(tcg_env);
ctx->base.is_jmp = DISAS_EXIT;
#endif
@ -3750,7 +3834,7 @@ static void gen_rfscv(DisasContext *ctx)
/* Restore CPU state */
CHK_SV(ctx);
translator_io_start(&ctx->base);
gen_update_cfar(ctx, ctx->cia);
gen_update_branch_history(ctx, ctx->cia, NULL, BHRB_TYPE_NORECORD);
gen_helper_rfscv(tcg_env);
ctx->base.is_jmp = DISAS_EXIT;
#endif
@ -6330,6 +6414,7 @@ static void ppc_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
#if defined(TARGET_PPC64)
ctx->sf_mode = (hflags >> HFLAGS_64) & 1;
ctx->has_cfar = !!(env->flags & POWERPC_FLAG_CFAR);
ctx->has_bhrb = !!(env->flags & POWERPC_FLAG_BHRB);
#endif
ctx->lazy_tlb_flush = env->mmu_model == POWERPC_MMU_32B
|| env->mmu_model & POWERPC_MMU_64;

View File

@ -17,7 +17,7 @@ static bool trans_RFEBB(DisasContext *ctx, arg_XL_s *arg)
REQUIRE_INSNS_FLAGS2(ctx, ISA207S);
translator_io_start(&ctx->base);
gen_update_cfar(ctx, ctx->cia);
gen_update_branch_history(ctx, ctx->cia, NULL, BHRB_TYPE_NORECORD);
gen_helper_rfebb(tcg_env, cpu_gpr[arg->s]);
ctx->base.is_jmp = DISAS_CHAIN;