Hexagon (target/hexagon) Short-circuit packet register writes

In certain cases, we can avoid the overhead of writing to hex_new_value
and write directly to hex_gpr.  We add need_commit field to DisasContext
indicating if the end-of-packet commit is needed.  If it is not needed,
get_result_gpr() and get_result_gpr_pair() can return hex_gpr.

We pass the ctx->need_commit to helpers when needed.

Finally, we can early-exit from gen_reg_writes during packet commit.

There are a few instructions whose semantics write to the result before
reading all the inputs.  Therefore, the idef-parser generated code is
incompatible with short-circuit.  We tell idef-parser to skip them.

For debugging purposes, we add a cpu property to turn off short-circuit.
When the short-circuit property is false, we skip the analysis and force
the end-of-packet commit.

Here's a simple example of the TCG generated for
0x004000b4:  0x7800c020 {       R0 = #0x1 }

BEFORE:
 ---- 004000b4
 movi_i32 new_r0,$0x1
 mov_i32 r0,new_r0

AFTER:
 ---- 004000b4
 movi_i32 r0,$0x1

This patch reintroduces a use of check_for_attrib, so we remove the
G_GNUC_UNUSED added earlier in this series.

Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Brian Cain <bcain@quicinc.com>
Message-Id: <20230427230012.3800327-12-tsimpson@quicinc.com>
This commit is contained in:
Taylor Simpson 2023-04-27 16:00:02 -07:00
parent b9f0326bf7
commit d54c56156f
16 changed files with 128 additions and 30 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
* Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -224,6 +224,7 @@ void arch_fpop_start(CPUHexagonState *env)
void arch_fpop_end(CPUHexagonState *env)
{
const bool pkt_need_commit = true;
int flags = get_float_exception_flags(&env->fp_status);
if (flags != 0) {
SOFTFLOAT_TEST_FLAG(float_flag_inexact, FPINPF, FPINPE);

View File

@ -54,6 +54,8 @@ static Property hexagon_lldb_compat_property =
static Property hexagon_lldb_stack_adjust_property =
DEFINE_PROP_UNSIGNED("lldb-stack-adjust", HexagonCPU, lldb_stack_adjust,
0, qdev_prop_uint32, target_ulong);
static Property hexagon_short_circuit_property =
DEFINE_PROP_BOOL("short-circuit", HexagonCPU, short_circuit, true);
const char * const hexagon_regnames[TOTAL_PER_THREAD_REGS] = {
"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
@ -330,6 +332,7 @@ static void hexagon_cpu_init(Object *obj)
cpu_set_cpustate_pointers(cpu);
qdev_property_add_static(DEVICE(obj), &hexagon_lldb_compat_property);
qdev_property_add_static(DEVICE(obj), &hexagon_lldb_stack_adjust_property);
qdev_property_add_static(DEVICE(obj), &hexagon_short_circuit_property);
}
#include "hw/core/tcg-cpu-ops.h"

View File

@ -150,6 +150,7 @@ struct ArchCPU {
bool lldb_compat;
target_ulong lldb_stack_adjust;
bool short_circuit;
};
#include "cpu_bits.h"

View File

@ -287,6 +287,8 @@ def gen_helper_function(f, tag, tagregs, tagimms):
if hex_common.need_pkt_has_multi_cof(tag):
f.write(", uint32_t pkt_has_multi_cof")
if (hex_common.need_pkt_need_commit(tag)):
f.write(", uint32_t pkt_need_commit")
if hex_common.need_PC(tag):
if i > 0:

View File

@ -86,6 +86,8 @@ def gen_helper_prototype(f, tag, tagregs, tagimms):
def_helper_size = len(regs) + len(imms) + numscalarreadwrite + 1
if hex_common.need_pkt_has_multi_cof(tag):
def_helper_size += 1
if hex_common.need_pkt_need_commit(tag):
def_helper_size += 1
if hex_common.need_part1(tag):
def_helper_size += 1
if hex_common.need_slot(tag):
@ -103,6 +105,8 @@ def gen_helper_prototype(f, tag, tagregs, tagimms):
def_helper_size = len(regs) + len(imms) + numscalarreadwrite
if hex_common.need_pkt_has_multi_cof(tag):
def_helper_size += 1
if hex_common.need_pkt_need_commit(tag):
def_helper_size += 1
if hex_common.need_part1(tag):
def_helper_size += 1
if hex_common.need_slot(tag):
@ -156,10 +160,12 @@ def gen_helper_prototype(f, tag, tagregs, tagimms):
for immlett, bits, immshift in imms:
f.write(", s32")
## Add the arguments for the instruction pkt_has_multi_cof, slot and
## part1 (if needed)
## Add the arguments for the instruction pkt_has_multi_cof,
## pkt_needs_commit, PC, next_PC, slot, and part1 (if needed)
if hex_common.need_pkt_has_multi_cof(tag):
f.write(", i32")
if hex_common.need_pkt_need_commit(tag):
f.write(', i32')
if hex_common.need_PC(tag):
f.write(", i32")
if hex_common.helper_needs_next_PC(tag):

View File

@ -111,6 +111,13 @@ def main():
continue
if ( tag.startswith('R6_release_') ):
continue
## Skip instructions that are incompatible with short-circuit
## packet register writes
if ( tag == 'S2_insert' or
tag == 'S2_insert_rp' or
tag == 'S2_asr_r_svw_trun' or
tag == 'A2_swiz' ):
continue
regs = tagregs[tag]
imms = tagimms[tag]

View File

@ -592,7 +592,8 @@
#define fGEN_TCG_A5_ACS(SHORTCODE) \
do { \
gen_helper_vacsh_pred(PeV, cpu_env, RxxV, RssV, RttV); \
gen_helper_vacsh_val(RxxV, cpu_env, RxxV, RssV, RttV); \
gen_helper_vacsh_val(RxxV, cpu_env, RxxV, RssV, RttV, \
tcg_constant_tl(ctx->need_commit)); \
} while (0)
#define fGEN_TCG_S2_cabacdecbin(SHORTCODE) \

View File

@ -550,6 +550,9 @@ def gen_tcg_func(f, tag, regs, imms):
if hex_common.need_pkt_has_multi_cof(tag):
f.write(" TCGv pkt_has_multi_cof = ")
f.write("tcg_constant_tl(ctx->pkt->pkt_has_multi_cof);\n")
if hex_common.need_pkt_need_commit(tag):
f.write(" TCGv pkt_need_commit = ")
f.write("tcg_constant_tl(ctx->need_commit);\n")
if hex_common.need_part1(tag):
f.write(" TCGv part1 = tcg_constant_tl(insn->part1);\n")
if hex_common.need_slot(tag):
@ -596,6 +599,8 @@ def gen_tcg_func(f, tag, regs, imms):
if hex_common.need_pkt_has_multi_cof(tag):
f.write(", pkt_has_multi_cof")
if hex_common.need_pkt_need_commit(tag):
f.write(", pkt_need_commit")
if hex_common.need_PC(tag):
f.write(", PC")
if hex_common.helper_needs_next_PC(tag):

View File

@ -45,7 +45,7 @@ TCGv gen_read_preg(TCGv pred, uint8_t num)
#define IMMUTABLE (~0)
static const target_ulong reg_immut_masks[TOTAL_PER_THREAD_REGS] = {
const target_ulong reg_immut_masks[TOTAL_PER_THREAD_REGS] = {
[HEX_REG_USR] = 0xc13000c0,
[HEX_REG_PC] = IMMUTABLE,
[HEX_REG_GP] = 0x3f,
@ -70,14 +70,18 @@ static inline void gen_masked_reg_write(TCGv new_val, TCGv cur_val,
static TCGv get_result_gpr(DisasContext *ctx, int rnum)
{
return hex_new_value[rnum];
if (ctx->need_commit) {
return hex_new_value[rnum];
} else {
return hex_gpr[rnum];
}
}
static TCGv_i64 get_result_gpr_pair(DisasContext *ctx, int rnum)
{
TCGv_i64 result = tcg_temp_new_i64();
tcg_gen_concat_i32_i64(result, hex_new_value[rnum],
hex_new_value[rnum + 1]);
tcg_gen_concat_i32_i64(result, get_result_gpr(ctx, rnum),
get_result_gpr(ctx, rnum + 1));
return result;
}
@ -86,7 +90,7 @@ void gen_log_reg_write(DisasContext *ctx, int rnum, TCGv val)
const target_ulong reg_mask = reg_immut_masks[rnum];
gen_masked_reg_write(val, hex_gpr[rnum], reg_mask);
tcg_gen_mov_tl(hex_new_value[rnum], val);
tcg_gen_mov_tl(get_result_gpr(ctx, rnum), val);
if (HEX_DEBUG) {
/* Do this so HELPER(debug_commit_end) will know */
tcg_gen_movi_tl(hex_reg_written[rnum], 1);
@ -95,27 +99,15 @@ void gen_log_reg_write(DisasContext *ctx, int rnum, TCGv val)
static void gen_log_reg_write_pair(DisasContext *ctx, int rnum, TCGv_i64 val)
{
const target_ulong reg_mask_low = reg_immut_masks[rnum];
const target_ulong reg_mask_high = reg_immut_masks[rnum + 1];
TCGv val32 = tcg_temp_new();
/* Low word */
tcg_gen_extrl_i64_i32(val32, val);
gen_masked_reg_write(val32, hex_gpr[rnum], reg_mask_low);
tcg_gen_mov_tl(hex_new_value[rnum], val32);
if (HEX_DEBUG) {
/* Do this so HELPER(debug_commit_end) will know */
tcg_gen_movi_tl(hex_reg_written[rnum], 1);
}
gen_log_reg_write(ctx, rnum, val32);
/* High word */
tcg_gen_extrh_i64_i32(val32, val);
gen_masked_reg_write(val32, hex_gpr[rnum + 1], reg_mask_high);
tcg_gen_mov_tl(hex_new_value[rnum + 1], val32);
if (HEX_DEBUG) {
/* Do this so HELPER(debug_commit_end) will know */
tcg_gen_movi_tl(hex_reg_written[rnum + 1], 1);
}
gen_log_reg_write(ctx, rnum + 1, val32);
}
void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val)

View File

@ -58,4 +58,6 @@ void gen_set_half(int N, TCGv result, TCGv src);
void gen_set_half_i64(int N, TCGv_i64 result, TCGv src);
void probe_noshuf_load(TCGv va, int s, int mi);
extern const target_ulong reg_immut_masks[TOTAL_PER_THREAD_REGS];
#endif

View File

@ -29,7 +29,7 @@ DEF_HELPER_FLAGS_4(fcircadd, TCG_CALL_NO_RWG_SE, s32, s32, s32, s32, s32)
DEF_HELPER_FLAGS_1(fbrev, TCG_CALL_NO_RWG_SE, i32, i32)
DEF_HELPER_3(sfrecipa, i64, env, f32, f32)
DEF_HELPER_2(sfinvsqrta, i64, env, f32)
DEF_HELPER_4(vacsh_val, s64, env, s64, s64, s64)
DEF_HELPER_5(vacsh_val, s64, env, s64, s64, s64, i32)
DEF_HELPER_FLAGS_4(vacsh_pred, TCG_CALL_NO_RWG_SE, s32, env, s64, s64, s64)
DEF_HELPER_FLAGS_2(cabacdecbin_val, TCG_CALL_NO_RWG_SE, s64, s64, s64)
DEF_HELPER_FLAGS_2(cabacdecbin_pred, TCG_CALL_NO_RWG_SE, s32, s64, s64)

View File

@ -276,6 +276,9 @@ def need_pkt_has_multi_cof(tag):
return "A_COF" in attribdict[tag]
def need_pkt_need_commit(tag):
return 'A_IMPLICIT_WRITES_USR' in attribdict[tag]
def need_condexec_reg(tag, regs):
if "A_CONDEXEC" in attribdict[tag]:
for regtype, regid, toss, numregs in regs:

View File

@ -44,8 +44,17 @@
reg_field_info[FIELD].offset)
#define SET_USR_FIELD(FIELD, VAL) \
fINSERT_BITS(env->new_value[HEX_REG_USR], reg_field_info[FIELD].width, \
reg_field_info[FIELD].offset, (VAL))
do { \
if (pkt_need_commit) { \
fINSERT_BITS(env->new_value[HEX_REG_USR], \
reg_field_info[FIELD].width, \
reg_field_info[FIELD].offset, (VAL)); \
} else { \
fINSERT_BITS(env->gpr[HEX_REG_USR], \
reg_field_info[FIELD].width, \
reg_field_info[FIELD].offset, (VAL)); \
} \
} while (0)
#endif
#ifdef QEMU_GENERATE

View File

@ -220,7 +220,7 @@ void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1)
reg_printed = true;
}
HEX_DEBUG_LOG("\tr%d = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")\n",
i, env->new_value[i], env->new_value[i]);
i, env->gpr[i], env->gpr[i]);
}
}
@ -352,7 +352,8 @@ uint64_t HELPER(sfinvsqrta)(CPUHexagonState *env, float32 RsV)
}
int64_t HELPER(vacsh_val)(CPUHexagonState *env,
int64_t RxxV, int64_t RssV, int64_t RttV)
int64_t RxxV, int64_t RssV, int64_t RttV,
uint32_t pkt_need_commit)
{
for (int i = 0; i < 4; i++) {
int xv = sextract64(RxxV, i * 16, 16);

View File

@ -27,6 +27,7 @@
#include "insn.h"
#include "decode.h"
#include "translate.h"
#include "genptr.h"
#include "printinsn.h"
#include "analyze_funcs_generated.c.inc"
@ -239,7 +240,7 @@ static int read_packet_words(CPUHexagonState *env, DisasContext *ctx,
return nwords;
}
static G_GNUC_UNUSED bool check_for_attrib(Packet *pkt, int attrib)
static bool check_for_attrib(Packet *pkt, int attrib)
{
for (int i = 0; i < pkt->num_insns; i++) {
if (GET_ATTRIB(pkt->insn[i].opcode, attrib)) {
@ -336,6 +337,58 @@ static void mark_implicit_pred_writes(DisasContext *ctx)
mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3);
}
static bool pkt_raises_exception(Packet *pkt)
{
if (check_for_attrib(pkt, A_LOAD) ||
check_for_attrib(pkt, A_STORE)) {
return true;
}
return false;
}
static bool need_commit(DisasContext *ctx)
{
Packet *pkt = ctx->pkt;
/*
* If the short-circuit property is set to false, we'll always do the commit
*/
if (!ctx->short_circuit) {
return true;
}
if (pkt_raises_exception(pkt)) {
return true;
}
/* Registers with immutability flags require new_value */
for (int i = 0; i < ctx->reg_log_idx; i++) {
int rnum = ctx->reg_log[i];
if (reg_immut_masks[rnum]) {
return true;
}
}
/* Floating point instructions are hard-coded to use new_value */
if (check_for_attrib(pkt, A_FPOP)) {
return true;
}
if (pkt->num_insns == 1) {
return false;
}
/* Check for overlap between register reads and writes */
for (int i = 0; i < ctx->reg_log_idx; i++) {
int rnum = ctx->reg_log[i];
if (test_bit(rnum, ctx->regs_read)) {
return true;
}
}
return false;
}
static void mark_implicit_pred_read(DisasContext *ctx, int attrib, int pnum)
{
if (GET_ATTRIB(ctx->insn->opcode, attrib)) {
@ -365,6 +418,8 @@ static void analyze_packet(DisasContext *ctx)
mark_implicit_pred_writes(ctx);
mark_implicit_pred_reads(ctx);
}
ctx->need_commit = need_commit(ctx);
}
static void gen_start_packet(DisasContext *ctx)
@ -434,7 +489,8 @@ static void gen_start_packet(DisasContext *ctx)
}
/* Preload the predicated registers into hex_new_value[i] */
if (!bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) {
if (ctx->need_commit &&
!bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) {
int i = find_first_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
while (i < TOTAL_PER_THREAD_REGS) {
tcg_gen_mov_tl(hex_new_value[i], hex_gpr[i]);
@ -544,6 +600,11 @@ static void gen_reg_writes(DisasContext *ctx)
{
int i;
/* Early exit if not needed */
if (!ctx->need_commit) {
return;
}
for (i = 0; i < ctx->reg_log_idx; i++) {
int reg_num = ctx->reg_log[i];
@ -922,6 +983,7 @@ static void hexagon_tr_init_disas_context(DisasContextBase *dcbase,
CPUState *cs)
{
DisasContext *ctx = container_of(dcbase, DisasContext, base);
HexagonCPU *hex_cpu = env_archcpu(cs->env_ptr);
uint32_t hex_flags = dcbase->tb->flags;
ctx->mem_idx = MMU_USER_IDX;
@ -930,6 +992,7 @@ static void hexagon_tr_init_disas_context(DisasContextBase *dcbase,
ctx->num_hvx_insns = 0;
ctx->branch_cond = TCG_COND_NEVER;
ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP);
ctx->short_circuit = hex_cpu->short_circuit;
}
static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu)

View File

@ -62,10 +62,12 @@ typedef struct DisasContext {
int qreg_log_idx;
DECLARE_BITMAP(qregs_read, NUM_QREGS);
bool pre_commit;
bool need_commit;
TCGCond branch_cond;
target_ulong branch_dest;
bool is_tight_loop;
bool need_pkt_has_store_s1;
bool short_circuit;
} DisasContext;
static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)