2021-02-08 08:46:19 +03:00
|
|
|
/*
|
2023-03-07 05:58:19 +03:00
|
|
|
* Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
|
2021-02-08 08:46:19 +03:00
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define QEMU_GENERATE
|
|
|
|
#include "qemu/osdep.h"
|
|
|
|
#include "cpu.h"
|
|
|
|
#include "tcg/tcg-op.h"
|
2021-09-30 22:29:00 +03:00
|
|
|
#include "tcg/tcg-op-gvec.h"
|
2021-02-08 08:46:19 +03:00
|
|
|
#include "exec/cpu_ldst.h"
|
|
|
|
#include "exec/log.h"
|
|
|
|
#include "internal.h"
|
|
|
|
#include "attribs.h"
|
|
|
|
#include "insn.h"
|
|
|
|
#include "decode.h"
|
|
|
|
#include "translate.h"
|
Hexagon (target/hexagon) Short-circuit packet register writes
In certain cases, we can avoid the overhead of writing to hex_new_value
and write directly to hex_gpr. We add need_commit field to DisasContext
indicating if the end-of-packet commit is needed. If it is not needed,
get_result_gpr() and get_result_gpr_pair() can return hex_gpr.
We pass the ctx->need_commit to helpers when needed.
Finally, we can early-exit from gen_reg_writes during packet commit.
There are a few instructions whose semantics write to the result before
reading all the inputs. Therefore, the idef-parser generated code is
incompatible with short-circuit. We tell idef-parser to skip them.
For debugging purposes, we add a cpu property to turn off short-circuit.
When the short-circuit property is false, we skip the analysis and force
the end-of-packet commit.
Here's a simple example of the TCG generated for
0x004000b4: 0x7800c020 { R0 = #0x1 }
BEFORE:
---- 004000b4
movi_i32 new_r0,$0x1
mov_i32 r0,new_r0
AFTER:
---- 004000b4
movi_i32 r0,$0x1
This patch reintroduces a use of check_for_attrib, so we remove the
G_GNUC_UNUSED added earlier in this series.
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Brian Cain <bcain@quicinc.com>
Message-Id: <20230427230012.3800327-12-tsimpson@quicinc.com>
2023-04-28 02:00:02 +03:00
|
|
|
#include "genptr.h"
|
2021-02-08 08:46:19 +03:00
|
|
|
#include "printinsn.h"
|
|
|
|
|
2023-03-07 05:58:19 +03:00
|
|
|
#include "analyze_funcs_generated.c.inc"
|
|
|
|
|
|
|
|
typedef void (*AnalyzeInsn)(DisasContext *ctx);
|
|
|
|
static const AnalyzeInsn opcode_analyze[XX_LAST_OPCODE] = {
|
|
|
|
#define OPCODE(X) [X] = analyze_##X
|
|
|
|
#include "opcodes_def_generated.h.inc"
|
|
|
|
#undef OPCODE
|
|
|
|
};
|
|
|
|
|
2021-02-08 08:46:19 +03:00
|
|
|
TCGv hex_gpr[TOTAL_PER_THREAD_REGS];
|
|
|
|
TCGv hex_pred[NUM_PREGS];
|
|
|
|
TCGv hex_this_PC;
|
|
|
|
TCGv hex_slot_cancelled;
|
|
|
|
TCGv hex_branch_taken;
|
|
|
|
TCGv hex_new_value[TOTAL_PER_THREAD_REGS];
|
|
|
|
TCGv hex_reg_written[TOTAL_PER_THREAD_REGS];
|
|
|
|
TCGv hex_new_pred_value[NUM_PREGS];
|
|
|
|
TCGv hex_pred_written;
|
|
|
|
TCGv hex_store_addr[STORES_MAX];
|
|
|
|
TCGv hex_store_width[STORES_MAX];
|
|
|
|
TCGv hex_store_val32[STORES_MAX];
|
|
|
|
TCGv_i64 hex_store_val64[STORES_MAX];
|
|
|
|
TCGv hex_pkt_has_store_s1;
|
|
|
|
TCGv hex_dczero_addr;
|
|
|
|
TCGv hex_llsc_addr;
|
|
|
|
TCGv hex_llsc_val;
|
|
|
|
TCGv_i64 hex_llsc_val_i64;
|
2021-09-30 22:29:00 +03:00
|
|
|
TCGv hex_vstore_addr[VSTORES_MAX];
|
|
|
|
TCGv hex_vstore_size[VSTORES_MAX];
|
|
|
|
TCGv hex_vstore_pending[VSTORES_MAX];
|
2021-02-08 08:46:19 +03:00
|
|
|
|
|
|
|
static const char * const hexagon_prednames[] = {
|
|
|
|
"p0", "p1", "p2", "p3"
|
|
|
|
};
|
|
|
|
|
2021-09-30 22:29:00 +03:00
|
|
|
intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum,
|
|
|
|
int num, bool alloc_ok)
|
|
|
|
{
|
|
|
|
intptr_t offset;
|
|
|
|
|
|
|
|
/* See if it is already allocated */
|
|
|
|
for (int i = 0; i < ctx->future_vregs_idx; i++) {
|
|
|
|
if (ctx->future_vregs_num[i] == regnum) {
|
|
|
|
return offsetof(CPUHexagonState, future_VRegs[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
g_assert(alloc_ok);
|
|
|
|
offset = offsetof(CPUHexagonState, future_VRegs[ctx->future_vregs_idx]);
|
|
|
|
for (int i = 0; i < num; i++) {
|
|
|
|
ctx->future_vregs_num[ctx->future_vregs_idx + i] = regnum++;
|
|
|
|
}
|
|
|
|
ctx->future_vregs_idx += num;
|
|
|
|
g_assert(ctx->future_vregs_idx <= VECTOR_TEMPS_MAX);
|
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum,
|
|
|
|
int num, bool alloc_ok)
|
|
|
|
{
|
|
|
|
intptr_t offset;
|
|
|
|
|
|
|
|
/* See if it is already allocated */
|
|
|
|
for (int i = 0; i < ctx->tmp_vregs_idx; i++) {
|
|
|
|
if (ctx->tmp_vregs_num[i] == regnum) {
|
|
|
|
return offsetof(CPUHexagonState, tmp_VRegs[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
g_assert(alloc_ok);
|
|
|
|
offset = offsetof(CPUHexagonState, tmp_VRegs[ctx->tmp_vregs_idx]);
|
|
|
|
for (int i = 0; i < num; i++) {
|
|
|
|
ctx->tmp_vregs_num[ctx->tmp_vregs_idx + i] = regnum++;
|
|
|
|
}
|
|
|
|
ctx->tmp_vregs_idx += num;
|
|
|
|
g_assert(ctx->tmp_vregs_idx <= VECTOR_TEMPS_MAX);
|
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
|
2021-04-09 04:07:33 +03:00
|
|
|
static void gen_exception_raw(int excp)
|
2021-02-08 08:46:19 +03:00
|
|
|
{
|
2021-10-03 03:47:50 +03:00
|
|
|
gen_helper_raise_exception(cpu_env, tcg_constant_i32(excp));
|
2021-02-08 08:46:19 +03:00
|
|
|
}
|
|
|
|
|
2021-04-09 04:07:33 +03:00
|
|
|
static void gen_exec_counters(DisasContext *ctx)
|
|
|
|
{
|
|
|
|
tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT],
|
|
|
|
hex_gpr[HEX_REG_QEMU_PKT_CNT], ctx->num_packets);
|
|
|
|
tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_INSN_CNT],
|
|
|
|
hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns);
|
2021-09-30 22:29:00 +03:00
|
|
|
tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_HVX_CNT],
|
|
|
|
hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns);
|
2021-04-09 04:07:33 +03:00
|
|
|
}
|
|
|
|
|
2022-11-08 19:29:05 +03:00
|
|
|
static bool use_goto_tb(DisasContext *ctx, target_ulong dest)
|
|
|
|
{
|
|
|
|
return translator_use_goto_tb(&ctx->base, dest);
|
|
|
|
}
|
|
|
|
|
Hexagon (translate.c): avoid redundant PC updates on COF
When there is a conditional change of flow or an endloop instruction, we
preload HEX_REG_PC with ctx->next_PC at gen_start_packet(). Nonetheless,
we still generate TCG code to do this update again at gen_goto_tb() when
the condition for the COF is not met, thus producing redundant
instructions. This can be seen with the following packet:
0x004002e4: 0x5c20d000 { if (!P0) jump:t PC+0 }
Which generates this TCG code:
---- 004002e4
-> mov_i32 pc,$0x4002e8
and_i32 loc9,p0,$0x1
mov_i32 branch_taken,loc9
add_i32 pkt_cnt,pkt_cnt,$0x2
add_i32 insn_cnt,insn_cnt,$0x2
brcond_i32 branch_taken,$0x0,ne,$L1
goto_tb $0x0
mov_i32 pc,$0x4002e4
exit_tb $0x7fb0c36e5200
set_label $L1
goto_tb $0x1
-> mov_i32 pc,$0x4002e8
exit_tb $0x7fb0c36e5201
set_label $L0
exit_tb $0x7fb0c36e5203
Note that even after optimizations, the redundant PC update is still
present:
---- 004002e4
-> mov_i32 pc,$0x4002e8 sync: 0 dead: 0 1 pref=0xffff
mov_i32 branch_taken,$0x1 sync: 0 dead: 0 1 pref=0xffff
add_i32 pkt_cnt,pkt_cnt,$0x2 sync: 0 dead: 0 1 pref=0xffff
add_i32 insn_cnt,insn_cnt,$0x2 sync: 0 dead: 0 1 2 pref=0xffff
goto_tb $0x1
-> mov_i32 pc,$0x4002e8 sync: 0 dead: 0 1 pref=0xffff
exit_tb $0x7fb0c36e5201
set_label $L0
exit_tb $0x7fb0c36e5203
With this patch, the second redundant update is properly discarded.
Note that we need the additional "move_to_pc" flag instead of just
avoiding the update whenever `dest == ctx->next_PC`, as that could
potentially skip updates from a COF with met condition, whose
ctx->branch_dest just happens to be equal to ctx->next_PC.
Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com>
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Anton Johansson <anjo@rev.ng>
Reviewed-by: Taylor Simpson <tsimpson@quicinc.com>
Message-Id: <fc059153c3f0526d97b7f13450c02b276b0908e1.1679519341.git.quic_mathbern@quicinc.com>
2023-03-23 00:17:10 +03:00
|
|
|
static void gen_goto_tb(DisasContext *ctx, int idx, target_ulong dest, bool
|
|
|
|
move_to_pc)
|
2022-11-08 19:29:05 +03:00
|
|
|
{
|
|
|
|
if (use_goto_tb(ctx, dest)) {
|
|
|
|
tcg_gen_goto_tb(idx);
|
Hexagon (translate.c): avoid redundant PC updates on COF
When there is a conditional change of flow or an endloop instruction, we
preload HEX_REG_PC with ctx->next_PC at gen_start_packet(). Nonetheless,
we still generate TCG code to do this update again at gen_goto_tb() when
the condition for the COF is not met, thus producing redundant
instructions. This can be seen with the following packet:
0x004002e4: 0x5c20d000 { if (!P0) jump:t PC+0 }
Which generates this TCG code:
---- 004002e4
-> mov_i32 pc,$0x4002e8
and_i32 loc9,p0,$0x1
mov_i32 branch_taken,loc9
add_i32 pkt_cnt,pkt_cnt,$0x2
add_i32 insn_cnt,insn_cnt,$0x2
brcond_i32 branch_taken,$0x0,ne,$L1
goto_tb $0x0
mov_i32 pc,$0x4002e4
exit_tb $0x7fb0c36e5200
set_label $L1
goto_tb $0x1
-> mov_i32 pc,$0x4002e8
exit_tb $0x7fb0c36e5201
set_label $L0
exit_tb $0x7fb0c36e5203
Note that even after optimizations, the redundant PC update is still
present:
---- 004002e4
-> mov_i32 pc,$0x4002e8 sync: 0 dead: 0 1 pref=0xffff
mov_i32 branch_taken,$0x1 sync: 0 dead: 0 1 pref=0xffff
add_i32 pkt_cnt,pkt_cnt,$0x2 sync: 0 dead: 0 1 pref=0xffff
add_i32 insn_cnt,insn_cnt,$0x2 sync: 0 dead: 0 1 2 pref=0xffff
goto_tb $0x1
-> mov_i32 pc,$0x4002e8 sync: 0 dead: 0 1 pref=0xffff
exit_tb $0x7fb0c36e5201
set_label $L0
exit_tb $0x7fb0c36e5203
With this patch, the second redundant update is properly discarded.
Note that we need the additional "move_to_pc" flag instead of just
avoiding the update whenever `dest == ctx->next_PC`, as that could
potentially skip updates from a COF with met condition, whose
ctx->branch_dest just happens to be equal to ctx->next_PC.
Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com>
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Anton Johansson <anjo@rev.ng>
Reviewed-by: Taylor Simpson <tsimpson@quicinc.com>
Message-Id: <fc059153c3f0526d97b7f13450c02b276b0908e1.1679519341.git.quic_mathbern@quicinc.com>
2023-03-23 00:17:10 +03:00
|
|
|
if (move_to_pc) {
|
|
|
|
tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
|
|
|
|
}
|
2022-11-08 19:29:05 +03:00
|
|
|
tcg_gen_exit_tb(ctx->base.tb, idx);
|
|
|
|
} else {
|
Hexagon (translate.c): avoid redundant PC updates on COF
When there is a conditional change of flow or an endloop instruction, we
preload HEX_REG_PC with ctx->next_PC at gen_start_packet(). Nonetheless,
we still generate TCG code to do this update again at gen_goto_tb() when
the condition for the COF is not met, thus producing redundant
instructions. This can be seen with the following packet:
0x004002e4: 0x5c20d000 { if (!P0) jump:t PC+0 }
Which generates this TCG code:
---- 004002e4
-> mov_i32 pc,$0x4002e8
and_i32 loc9,p0,$0x1
mov_i32 branch_taken,loc9
add_i32 pkt_cnt,pkt_cnt,$0x2
add_i32 insn_cnt,insn_cnt,$0x2
brcond_i32 branch_taken,$0x0,ne,$L1
goto_tb $0x0
mov_i32 pc,$0x4002e4
exit_tb $0x7fb0c36e5200
set_label $L1
goto_tb $0x1
-> mov_i32 pc,$0x4002e8
exit_tb $0x7fb0c36e5201
set_label $L0
exit_tb $0x7fb0c36e5203
Note that even after optimizations, the redundant PC update is still
present:
---- 004002e4
-> mov_i32 pc,$0x4002e8 sync: 0 dead: 0 1 pref=0xffff
mov_i32 branch_taken,$0x1 sync: 0 dead: 0 1 pref=0xffff
add_i32 pkt_cnt,pkt_cnt,$0x2 sync: 0 dead: 0 1 pref=0xffff
add_i32 insn_cnt,insn_cnt,$0x2 sync: 0 dead: 0 1 2 pref=0xffff
goto_tb $0x1
-> mov_i32 pc,$0x4002e8 sync: 0 dead: 0 1 pref=0xffff
exit_tb $0x7fb0c36e5201
set_label $L0
exit_tb $0x7fb0c36e5203
With this patch, the second redundant update is properly discarded.
Note that we need the additional "move_to_pc" flag instead of just
avoiding the update whenever `dest == ctx->next_PC`, as that could
potentially skip updates from a COF with met condition, whose
ctx->branch_dest just happens to be equal to ctx->next_PC.
Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com>
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Anton Johansson <anjo@rev.ng>
Reviewed-by: Taylor Simpson <tsimpson@quicinc.com>
Message-Id: <fc059153c3f0526d97b7f13450c02b276b0908e1.1679519341.git.quic_mathbern@quicinc.com>
2023-03-23 00:17:10 +03:00
|
|
|
if (move_to_pc) {
|
|
|
|
tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], dest);
|
|
|
|
}
|
2022-11-08 19:29:05 +03:00
|
|
|
tcg_gen_lookup_and_goto_ptr();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-09 04:07:33 +03:00
|
|
|
static void gen_end_tb(DisasContext *ctx)
|
2021-02-08 08:46:19 +03:00
|
|
|
{
|
2022-11-10 20:49:35 +03:00
|
|
|
Packet *pkt = ctx->pkt;
|
|
|
|
|
2021-04-09 04:07:33 +03:00
|
|
|
gen_exec_counters(ctx);
|
2022-11-08 19:29:05 +03:00
|
|
|
|
|
|
|
if (ctx->branch_cond != TCG_COND_NEVER) {
|
|
|
|
if (ctx->branch_cond != TCG_COND_ALWAYS) {
|
|
|
|
TCGLabel *skip = gen_new_label();
|
|
|
|
tcg_gen_brcondi_tl(ctx->branch_cond, hex_branch_taken, 0, skip);
|
Hexagon (translate.c): avoid redundant PC updates on COF
When there is a conditional change of flow or an endloop instruction, we
preload HEX_REG_PC with ctx->next_PC at gen_start_packet(). Nonetheless,
we still generate TCG code to do this update again at gen_goto_tb() when
the condition for the COF is not met, thus producing redundant
instructions. This can be seen with the following packet:
0x004002e4: 0x5c20d000 { if (!P0) jump:t PC+0 }
Which generates this TCG code:
---- 004002e4
-> mov_i32 pc,$0x4002e8
and_i32 loc9,p0,$0x1
mov_i32 branch_taken,loc9
add_i32 pkt_cnt,pkt_cnt,$0x2
add_i32 insn_cnt,insn_cnt,$0x2
brcond_i32 branch_taken,$0x0,ne,$L1
goto_tb $0x0
mov_i32 pc,$0x4002e4
exit_tb $0x7fb0c36e5200
set_label $L1
goto_tb $0x1
-> mov_i32 pc,$0x4002e8
exit_tb $0x7fb0c36e5201
set_label $L0
exit_tb $0x7fb0c36e5203
Note that even after optimizations, the redundant PC update is still
present:
---- 004002e4
-> mov_i32 pc,$0x4002e8 sync: 0 dead: 0 1 pref=0xffff
mov_i32 branch_taken,$0x1 sync: 0 dead: 0 1 pref=0xffff
add_i32 pkt_cnt,pkt_cnt,$0x2 sync: 0 dead: 0 1 pref=0xffff
add_i32 insn_cnt,insn_cnt,$0x2 sync: 0 dead: 0 1 2 pref=0xffff
goto_tb $0x1
-> mov_i32 pc,$0x4002e8 sync: 0 dead: 0 1 pref=0xffff
exit_tb $0x7fb0c36e5201
set_label $L0
exit_tb $0x7fb0c36e5203
With this patch, the second redundant update is properly discarded.
Note that we need the additional "move_to_pc" flag instead of just
avoiding the update whenever `dest == ctx->next_PC`, as that could
potentially skip updates from a COF with met condition, whose
ctx->branch_dest just happens to be equal to ctx->next_PC.
Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com>
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Anton Johansson <anjo@rev.ng>
Reviewed-by: Taylor Simpson <tsimpson@quicinc.com>
Message-Id: <fc059153c3f0526d97b7f13450c02b276b0908e1.1679519341.git.quic_mathbern@quicinc.com>
2023-03-23 00:17:10 +03:00
|
|
|
gen_goto_tb(ctx, 0, ctx->branch_dest, true);
|
2022-11-08 19:29:05 +03:00
|
|
|
gen_set_label(skip);
|
Hexagon (translate.c): avoid redundant PC updates on COF
When there is a conditional change of flow or an endloop instruction, we
preload HEX_REG_PC with ctx->next_PC at gen_start_packet(). Nonetheless,
we still generate TCG code to do this update again at gen_goto_tb() when
the condition for the COF is not met, thus producing redundant
instructions. This can be seen with the following packet:
0x004002e4: 0x5c20d000 { if (!P0) jump:t PC+0 }
Which generates this TCG code:
---- 004002e4
-> mov_i32 pc,$0x4002e8
and_i32 loc9,p0,$0x1
mov_i32 branch_taken,loc9
add_i32 pkt_cnt,pkt_cnt,$0x2
add_i32 insn_cnt,insn_cnt,$0x2
brcond_i32 branch_taken,$0x0,ne,$L1
goto_tb $0x0
mov_i32 pc,$0x4002e4
exit_tb $0x7fb0c36e5200
set_label $L1
goto_tb $0x1
-> mov_i32 pc,$0x4002e8
exit_tb $0x7fb0c36e5201
set_label $L0
exit_tb $0x7fb0c36e5203
Note that even after optimizations, the redundant PC update is still
present:
---- 004002e4
-> mov_i32 pc,$0x4002e8 sync: 0 dead: 0 1 pref=0xffff
mov_i32 branch_taken,$0x1 sync: 0 dead: 0 1 pref=0xffff
add_i32 pkt_cnt,pkt_cnt,$0x2 sync: 0 dead: 0 1 pref=0xffff
add_i32 insn_cnt,insn_cnt,$0x2 sync: 0 dead: 0 1 2 pref=0xffff
goto_tb $0x1
-> mov_i32 pc,$0x4002e8 sync: 0 dead: 0 1 pref=0xffff
exit_tb $0x7fb0c36e5201
set_label $L0
exit_tb $0x7fb0c36e5203
With this patch, the second redundant update is properly discarded.
Note that we need the additional "move_to_pc" flag instead of just
avoiding the update whenever `dest == ctx->next_PC`, as that could
potentially skip updates from a COF with met condition, whose
ctx->branch_dest just happens to be equal to ctx->next_PC.
Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com>
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Anton Johansson <anjo@rev.ng>
Reviewed-by: Taylor Simpson <tsimpson@quicinc.com>
Message-Id: <fc059153c3f0526d97b7f13450c02b276b0908e1.1679519341.git.quic_mathbern@quicinc.com>
2023-03-23 00:17:10 +03:00
|
|
|
gen_goto_tb(ctx, 1, ctx->next_PC, false);
|
2022-11-08 19:29:05 +03:00
|
|
|
} else {
|
Hexagon (translate.c): avoid redundant PC updates on COF
When there is a conditional change of flow or an endloop instruction, we
preload HEX_REG_PC with ctx->next_PC at gen_start_packet(). Nonetheless,
we still generate TCG code to do this update again at gen_goto_tb() when
the condition for the COF is not met, thus producing redundant
instructions. This can be seen with the following packet:
0x004002e4: 0x5c20d000 { if (!P0) jump:t PC+0 }
Which generates this TCG code:
---- 004002e4
-> mov_i32 pc,$0x4002e8
and_i32 loc9,p0,$0x1
mov_i32 branch_taken,loc9
add_i32 pkt_cnt,pkt_cnt,$0x2
add_i32 insn_cnt,insn_cnt,$0x2
brcond_i32 branch_taken,$0x0,ne,$L1
goto_tb $0x0
mov_i32 pc,$0x4002e4
exit_tb $0x7fb0c36e5200
set_label $L1
goto_tb $0x1
-> mov_i32 pc,$0x4002e8
exit_tb $0x7fb0c36e5201
set_label $L0
exit_tb $0x7fb0c36e5203
Note that even after optimizations, the redundant PC update is still
present:
---- 004002e4
-> mov_i32 pc,$0x4002e8 sync: 0 dead: 0 1 pref=0xffff
mov_i32 branch_taken,$0x1 sync: 0 dead: 0 1 pref=0xffff
add_i32 pkt_cnt,pkt_cnt,$0x2 sync: 0 dead: 0 1 pref=0xffff
add_i32 insn_cnt,insn_cnt,$0x2 sync: 0 dead: 0 1 2 pref=0xffff
goto_tb $0x1
-> mov_i32 pc,$0x4002e8 sync: 0 dead: 0 1 pref=0xffff
exit_tb $0x7fb0c36e5201
set_label $L0
exit_tb $0x7fb0c36e5203
With this patch, the second redundant update is properly discarded.
Note that we need the additional "move_to_pc" flag instead of just
avoiding the update whenever `dest == ctx->next_PC`, as that could
potentially skip updates from a COF with met condition, whose
ctx->branch_dest just happens to be equal to ctx->next_PC.
Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com>
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Anton Johansson <anjo@rev.ng>
Reviewed-by: Taylor Simpson <tsimpson@quicinc.com>
Message-Id: <fc059153c3f0526d97b7f13450c02b276b0908e1.1679519341.git.quic_mathbern@quicinc.com>
2023-03-23 00:17:10 +03:00
|
|
|
gen_goto_tb(ctx, 0, ctx->branch_dest, true);
|
2022-11-08 19:29:05 +03:00
|
|
|
}
|
2022-11-10 20:49:35 +03:00
|
|
|
} else if (ctx->is_tight_loop &&
|
|
|
|
pkt->insn[pkt->num_insns - 1].opcode == J2_endloop0) {
|
|
|
|
/*
|
|
|
|
* When we're in a tight loop, we defer the endloop0 processing
|
|
|
|
* to take advantage of direct block chaining
|
|
|
|
*/
|
|
|
|
TCGLabel *skip = gen_new_label();
|
|
|
|
tcg_gen_brcondi_tl(TCG_COND_LEU, hex_gpr[HEX_REG_LC0], 1, skip);
|
|
|
|
tcg_gen_subi_tl(hex_gpr[HEX_REG_LC0], hex_gpr[HEX_REG_LC0], 1);
|
Hexagon (translate.c): avoid redundant PC updates on COF
When there is a conditional change of flow or an endloop instruction, we
preload HEX_REG_PC with ctx->next_PC at gen_start_packet(). Nonetheless,
we still generate TCG code to do this update again at gen_goto_tb() when
the condition for the COF is not met, thus producing redundant
instructions. This can be seen with the following packet:
0x004002e4: 0x5c20d000 { if (!P0) jump:t PC+0 }
Which generates this TCG code:
---- 004002e4
-> mov_i32 pc,$0x4002e8
and_i32 loc9,p0,$0x1
mov_i32 branch_taken,loc9
add_i32 pkt_cnt,pkt_cnt,$0x2
add_i32 insn_cnt,insn_cnt,$0x2
brcond_i32 branch_taken,$0x0,ne,$L1
goto_tb $0x0
mov_i32 pc,$0x4002e4
exit_tb $0x7fb0c36e5200
set_label $L1
goto_tb $0x1
-> mov_i32 pc,$0x4002e8
exit_tb $0x7fb0c36e5201
set_label $L0
exit_tb $0x7fb0c36e5203
Note that even after optimizations, the redundant PC update is still
present:
---- 004002e4
-> mov_i32 pc,$0x4002e8 sync: 0 dead: 0 1 pref=0xffff
mov_i32 branch_taken,$0x1 sync: 0 dead: 0 1 pref=0xffff
add_i32 pkt_cnt,pkt_cnt,$0x2 sync: 0 dead: 0 1 pref=0xffff
add_i32 insn_cnt,insn_cnt,$0x2 sync: 0 dead: 0 1 2 pref=0xffff
goto_tb $0x1
-> mov_i32 pc,$0x4002e8 sync: 0 dead: 0 1 pref=0xffff
exit_tb $0x7fb0c36e5201
set_label $L0
exit_tb $0x7fb0c36e5203
With this patch, the second redundant update is properly discarded.
Note that we need the additional "move_to_pc" flag instead of just
avoiding the update whenever `dest == ctx->next_PC`, as that could
potentially skip updates from a COF with met condition, whose
ctx->branch_dest just happens to be equal to ctx->next_PC.
Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com>
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Anton Johansson <anjo@rev.ng>
Reviewed-by: Taylor Simpson <tsimpson@quicinc.com>
Message-Id: <fc059153c3f0526d97b7f13450c02b276b0908e1.1679519341.git.quic_mathbern@quicinc.com>
2023-03-23 00:17:10 +03:00
|
|
|
gen_goto_tb(ctx, 0, ctx->base.tb->pc, true);
|
2022-11-10 20:49:35 +03:00
|
|
|
gen_set_label(skip);
|
Hexagon (translate.c): avoid redundant PC updates on COF
When there is a conditional change of flow or an endloop instruction, we
preload HEX_REG_PC with ctx->next_PC at gen_start_packet(). Nonetheless,
we still generate TCG code to do this update again at gen_goto_tb() when
the condition for the COF is not met, thus producing redundant
instructions. This can be seen with the following packet:
0x004002e4: 0x5c20d000 { if (!P0) jump:t PC+0 }
Which generates this TCG code:
---- 004002e4
-> mov_i32 pc,$0x4002e8
and_i32 loc9,p0,$0x1
mov_i32 branch_taken,loc9
add_i32 pkt_cnt,pkt_cnt,$0x2
add_i32 insn_cnt,insn_cnt,$0x2
brcond_i32 branch_taken,$0x0,ne,$L1
goto_tb $0x0
mov_i32 pc,$0x4002e4
exit_tb $0x7fb0c36e5200
set_label $L1
goto_tb $0x1
-> mov_i32 pc,$0x4002e8
exit_tb $0x7fb0c36e5201
set_label $L0
exit_tb $0x7fb0c36e5203
Note that even after optimizations, the redundant PC update is still
present:
---- 004002e4
-> mov_i32 pc,$0x4002e8 sync: 0 dead: 0 1 pref=0xffff
mov_i32 branch_taken,$0x1 sync: 0 dead: 0 1 pref=0xffff
add_i32 pkt_cnt,pkt_cnt,$0x2 sync: 0 dead: 0 1 pref=0xffff
add_i32 insn_cnt,insn_cnt,$0x2 sync: 0 dead: 0 1 2 pref=0xffff
goto_tb $0x1
-> mov_i32 pc,$0x4002e8 sync: 0 dead: 0 1 pref=0xffff
exit_tb $0x7fb0c36e5201
set_label $L0
exit_tb $0x7fb0c36e5203
With this patch, the second redundant update is properly discarded.
Note that we need the additional "move_to_pc" flag instead of just
avoiding the update whenever `dest == ctx->next_PC`, as that could
potentially skip updates from a COF with met condition, whose
ctx->branch_dest just happens to be equal to ctx->next_PC.
Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com>
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Anton Johansson <anjo@rev.ng>
Reviewed-by: Taylor Simpson <tsimpson@quicinc.com>
Message-Id: <fc059153c3f0526d97b7f13450c02b276b0908e1.1679519341.git.quic_mathbern@quicinc.com>
2023-03-23 00:17:10 +03:00
|
|
|
gen_goto_tb(ctx, 1, ctx->next_PC, false);
|
2022-11-08 19:29:05 +03:00
|
|
|
} else {
|
|
|
|
tcg_gen_lookup_and_goto_ptr();
|
|
|
|
}
|
|
|
|
|
2021-04-09 04:07:33 +03:00
|
|
|
ctx->base.is_jmp = DISAS_NORETURN;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void gen_exception_end_tb(DisasContext *ctx, int excp)
|
|
|
|
{
|
|
|
|
gen_exec_counters(ctx);
|
2022-11-08 19:29:01 +03:00
|
|
|
tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->next_PC);
|
2021-04-09 04:07:33 +03:00
|
|
|
gen_exception_raw(excp);
|
|
|
|
ctx->base.is_jmp = DISAS_NORETURN;
|
|
|
|
|
2021-02-08 08:46:19 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
#define PACKET_BUFFER_LEN 1028
|
|
|
|
static void print_pkt(Packet *pkt)
|
|
|
|
{
|
|
|
|
GString *buf = g_string_sized_new(PACKET_BUFFER_LEN);
|
|
|
|
snprint_a_pkt_debug(buf, pkt);
|
|
|
|
HEX_DEBUG_LOG("%s", buf->str);
|
|
|
|
g_string_free(buf, true);
|
|
|
|
}
|
2021-04-09 04:07:44 +03:00
|
|
|
#define HEX_DEBUG_PRINT_PKT(pkt) \
|
|
|
|
do { \
|
|
|
|
if (HEX_DEBUG) { \
|
|
|
|
print_pkt(pkt); \
|
|
|
|
} \
|
|
|
|
} while (0)
|
2021-02-08 08:46:19 +03:00
|
|
|
|
|
|
|
static int read_packet_words(CPUHexagonState *env, DisasContext *ctx,
|
|
|
|
uint32_t words[])
|
|
|
|
{
|
|
|
|
bool found_end = false;
|
|
|
|
int nwords, max_words;
|
|
|
|
|
|
|
|
memset(words, 0, PACKET_WORDS_MAX * sizeof(uint32_t));
|
|
|
|
for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) {
|
2021-03-15 07:53:04 +03:00
|
|
|
words[nwords] =
|
2021-08-10 01:32:59 +03:00
|
|
|
translator_ldl(env, &ctx->base,
|
|
|
|
ctx->base.pc_next + nwords * sizeof(uint32_t));
|
2021-02-08 08:46:19 +03:00
|
|
|
found_end = is_packet_end(words[nwords]);
|
|
|
|
}
|
|
|
|
if (!found_end) {
|
|
|
|
/* Read too many words without finding the end */
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check for page boundary crossing */
|
|
|
|
max_words = -(ctx->base.pc_next | TARGET_PAGE_MASK) / sizeof(uint32_t);
|
|
|
|
if (nwords > max_words) {
|
|
|
|
/* We can only cross a page boundary at the beginning of a TB */
|
|
|
|
g_assert(ctx->base.num_insns == 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
HEX_DEBUG_LOG("decode_packet: pc = 0x%x\n", ctx->base.pc_next);
|
|
|
|
HEX_DEBUG_LOG(" words = { ");
|
|
|
|
for (int i = 0; i < nwords; i++) {
|
|
|
|
HEX_DEBUG_LOG("0x%x, ", words[i]);
|
|
|
|
}
|
|
|
|
HEX_DEBUG_LOG("}\n");
|
|
|
|
|
|
|
|
return nwords;
|
|
|
|
}
|
|
|
|
|
Hexagon (target/hexagon) Short-circuit packet register writes
In certain cases, we can avoid the overhead of writing to hex_new_value
and write directly to hex_gpr. We add need_commit field to DisasContext
indicating if the end-of-packet commit is needed. If it is not needed,
get_result_gpr() and get_result_gpr_pair() can return hex_gpr.
We pass the ctx->need_commit to helpers when needed.
Finally, we can early-exit from gen_reg_writes during packet commit.
There are a few instructions whose semantics write to the result before
reading all the inputs. Therefore, the idef-parser generated code is
incompatible with short-circuit. We tell idef-parser to skip them.
For debugging purposes, we add a cpu property to turn off short-circuit.
When the short-circuit property is false, we skip the analysis and force
the end-of-packet commit.
Here's a simple example of the TCG generated for
0x004000b4: 0x7800c020 { R0 = #0x1 }
BEFORE:
---- 004000b4
movi_i32 new_r0,$0x1
mov_i32 r0,new_r0
AFTER:
---- 004000b4
movi_i32 r0,$0x1
This patch reintroduces a use of check_for_attrib, so we remove the
G_GNUC_UNUSED added earlier in this series.
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Brian Cain <bcain@quicinc.com>
Message-Id: <20230427230012.3800327-12-tsimpson@quicinc.com>
2023-04-28 02:00:02 +03:00
|
|
|
static bool check_for_attrib(Packet *pkt, int attrib)
|
2021-02-08 08:46:19 +03:00
|
|
|
{
|
|
|
|
for (int i = 0; i < pkt->num_insns; i++) {
|
|
|
|
if (GET_ATTRIB(pkt->insn[i].opcode, attrib)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool need_slot_cancelled(Packet *pkt)
|
|
|
|
{
|
2023-03-07 05:58:28 +03:00
|
|
|
/* We only need slot_cancelled for conditional store instructions */
|
Hexagon (target/hexagon) Reduce manipulation of slot_cancelled
We only need to track slot for predicated stores and predicated HVX
instructions.
Add arguments to the probe helper functions to indicate if the slot
is predicated.
Here is a simple example of the differences in the TCG code generated:
IN:
0x00400094: 0xf900c102 { if (P0) R2 = and(R0,R1) }
BEFORE
---- 00400094
mov_i32 slot_cancelled,$0x0
mov_i32 new_r2,r2
and_i32 tmp0,p0,$0x1
brcond_i32 tmp0,$0x0,eq,$L1
and_i32 tmp0,r0,r1
mov_i32 new_r2,tmp0
br $L2
set_label $L1
or_i32 slot_cancelled,slot_cancelled,$0x8
set_label $L2
mov_i32 r2,new_r2
AFTER
---- 00400094
mov_i32 new_r2,r2
and_i32 tmp0,p0,$0x1
brcond_i32 tmp0,$0x0,eq,$L1
and_i32 tmp0,r0,r1
mov_i32 new_r2,tmp0
br $L2
set_label $L1
set_label $L2
mov_i32 r2,new_r2
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Anton Johansson <anjo@rev.ng>
Message-Id: <20230307025828.1612809-14-tsimpson@quicinc.com>
2023-03-07 05:58:27 +03:00
|
|
|
for (int i = 0; i < pkt->num_insns; i++) {
|
|
|
|
uint16_t opcode = pkt->insn[i].opcode;
|
|
|
|
if (GET_ATTRIB(opcode, A_CONDEXEC) &&
|
2023-03-07 05:58:28 +03:00
|
|
|
GET_ATTRIB(opcode, A_SCALAR_STORE)) {
|
Hexagon (target/hexagon) Reduce manipulation of slot_cancelled
We only need to track slot for predicated stores and predicated HVX
instructions.
Add arguments to the probe helper functions to indicate if the slot
is predicated.
Here is a simple example of the differences in the TCG code generated:
IN:
0x00400094: 0xf900c102 { if (P0) R2 = and(R0,R1) }
BEFORE
---- 00400094
mov_i32 slot_cancelled,$0x0
mov_i32 new_r2,r2
and_i32 tmp0,p0,$0x1
brcond_i32 tmp0,$0x0,eq,$L1
and_i32 tmp0,r0,r1
mov_i32 new_r2,tmp0
br $L2
set_label $L1
or_i32 slot_cancelled,slot_cancelled,$0x8
set_label $L2
mov_i32 r2,new_r2
AFTER
---- 00400094
mov_i32 new_r2,r2
and_i32 tmp0,p0,$0x1
brcond_i32 tmp0,$0x0,eq,$L1
and_i32 tmp0,r0,r1
mov_i32 new_r2,tmp0
br $L2
set_label $L1
set_label $L2
mov_i32 r2,new_r2
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Anton Johansson <anjo@rev.ng>
Message-Id: <20230307025828.1612809-14-tsimpson@quicinc.com>
2023-03-07 05:58:27 +03:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
2021-02-08 08:46:19 +03:00
|
|
|
}
|
|
|
|
|
2022-11-08 19:29:01 +03:00
|
|
|
static bool need_next_PC(DisasContext *ctx)
|
|
|
|
{
|
|
|
|
Packet *pkt = ctx->pkt;
|
|
|
|
|
|
|
|
/* Check for conditional control flow or HW loop end */
|
|
|
|
for (int i = 0; i < pkt->num_insns; i++) {
|
|
|
|
uint16_t opcode = pkt->insn[i].opcode;
|
|
|
|
if (GET_ATTRIB(opcode, A_CONDEXEC) && GET_ATTRIB(opcode, A_COF)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (GET_ATTRIB(opcode, A_HWLOOP0_END) ||
|
|
|
|
GET_ATTRIB(opcode, A_HWLOOP1_END)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2023-03-07 05:58:19 +03:00
|
|
|
/*
|
|
|
|
* The opcode_analyze functions mark most of the writes in a packet
|
|
|
|
* However, there are some implicit writes marked as attributes
|
|
|
|
* of the applicable instructions.
|
|
|
|
*/
|
|
|
|
static void mark_implicit_reg_write(DisasContext *ctx, int attrib, int rnum)
|
|
|
|
{
|
|
|
|
uint16_t opcode = ctx->insn->opcode;
|
|
|
|
if (GET_ATTRIB(opcode, attrib)) {
|
|
|
|
/*
|
|
|
|
* USR is used to set overflow and FP exceptions,
|
|
|
|
* so treat it as conditional
|
|
|
|
*/
|
|
|
|
bool is_predicated = GET_ATTRIB(opcode, A_CONDEXEC) ||
|
|
|
|
rnum == HEX_REG_USR;
|
|
|
|
|
|
|
|
/* LC0/LC1 is conditionally written by endloop instructions */
|
|
|
|
if ((rnum == HEX_REG_LC0 || rnum == HEX_REG_LC1) &&
|
|
|
|
(opcode == J2_endloop0 ||
|
|
|
|
opcode == J2_endloop1 ||
|
|
|
|
opcode == J2_endloop01)) {
|
|
|
|
is_predicated = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
ctx_log_reg_write(ctx, rnum, is_predicated);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mark_implicit_reg_writes(DisasContext *ctx)
|
|
|
|
{
|
|
|
|
mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_FP, HEX_REG_FP);
|
|
|
|
mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SP, HEX_REG_SP);
|
|
|
|
mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LR, HEX_REG_LR);
|
|
|
|
mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC0, HEX_REG_LC0);
|
|
|
|
mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA0, HEX_REG_SA0);
|
|
|
|
mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_LC1, HEX_REG_LC1);
|
|
|
|
mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA1, HEX_REG_SA1);
|
|
|
|
mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_USR, HEX_REG_USR);
|
|
|
|
mark_implicit_reg_write(ctx, A_FPOP, HEX_REG_USR);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mark_implicit_pred_write(DisasContext *ctx, int attrib, int pnum)
|
|
|
|
{
|
|
|
|
if (GET_ATTRIB(ctx->insn->opcode, attrib)) {
|
|
|
|
ctx_log_pred_write(ctx, pnum);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mark_implicit_pred_writes(DisasContext *ctx)
|
|
|
|
{
|
|
|
|
mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P0, 0);
|
|
|
|
mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P1, 1);
|
|
|
|
mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P2, 2);
|
|
|
|
mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3);
|
|
|
|
}
|
|
|
|
|
Hexagon (target/hexagon) Short-circuit packet register writes
In certain cases, we can avoid the overhead of writing to hex_new_value
and write directly to hex_gpr. We add need_commit field to DisasContext
indicating if the end-of-packet commit is needed. If it is not needed,
get_result_gpr() and get_result_gpr_pair() can return hex_gpr.
We pass the ctx->need_commit to helpers when needed.
Finally, we can early-exit from gen_reg_writes during packet commit.
There are a few instructions whose semantics write to the result before
reading all the inputs. Therefore, the idef-parser generated code is
incompatible with short-circuit. We tell idef-parser to skip them.
For debugging purposes, we add a cpu property to turn off short-circuit.
When the short-circuit property is false, we skip the analysis and force
the end-of-packet commit.
Here's a simple example of the TCG generated for
0x004000b4: 0x7800c020 { R0 = #0x1 }
BEFORE:
---- 004000b4
movi_i32 new_r0,$0x1
mov_i32 r0,new_r0
AFTER:
---- 004000b4
movi_i32 r0,$0x1
This patch reintroduces a use of check_for_attrib, so we remove the
G_GNUC_UNUSED added earlier in this series.
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Brian Cain <bcain@quicinc.com>
Message-Id: <20230427230012.3800327-12-tsimpson@quicinc.com>
2023-04-28 02:00:02 +03:00
|
|
|
static bool pkt_raises_exception(Packet *pkt)
|
|
|
|
{
|
|
|
|
if (check_for_attrib(pkt, A_LOAD) ||
|
|
|
|
check_for_attrib(pkt, A_STORE)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool need_commit(DisasContext *ctx)
|
|
|
|
{
|
|
|
|
Packet *pkt = ctx->pkt;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the short-circuit property is set to false, we'll always do the commit
|
|
|
|
*/
|
|
|
|
if (!ctx->short_circuit) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pkt_raises_exception(pkt)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Registers with immutability flags require new_value */
|
|
|
|
for (int i = 0; i < ctx->reg_log_idx; i++) {
|
|
|
|
int rnum = ctx->reg_log[i];
|
|
|
|
if (reg_immut_masks[rnum]) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Floating point instructions are hard-coded to use new_value */
|
|
|
|
if (check_for_attrib(pkt, A_FPOP)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pkt->num_insns == 1) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check for overlap between register reads and writes */
|
|
|
|
for (int i = 0; i < ctx->reg_log_idx; i++) {
|
|
|
|
int rnum = ctx->reg_log[i];
|
|
|
|
if (test_bit(rnum, ctx->regs_read)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-28 02:00:03 +03:00
|
|
|
/* Check for overlap between predicate reads and writes */
|
|
|
|
for (int i = 0; i < ctx->preg_log_idx; i++) {
|
|
|
|
int pnum = ctx->preg_log[i];
|
|
|
|
if (test_bit(pnum, ctx->pregs_read)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Hexagon (target/hexagon) Short-circuit packet register writes
In certain cases, we can avoid the overhead of writing to hex_new_value
and write directly to hex_gpr. We add need_commit field to DisasContext
indicating if the end-of-packet commit is needed. If it is not needed,
get_result_gpr() and get_result_gpr_pair() can return hex_gpr.
We pass the ctx->need_commit to helpers when needed.
Finally, we can early-exit from gen_reg_writes during packet commit.
There are a few instructions whose semantics write to the result before
reading all the inputs. Therefore, the idef-parser generated code is
incompatible with short-circuit. We tell idef-parser to skip them.
For debugging purposes, we add a cpu property to turn off short-circuit.
When the short-circuit property is false, we skip the analysis and force
the end-of-packet commit.
Here's a simple example of the TCG generated for
0x004000b4: 0x7800c020 { R0 = #0x1 }
BEFORE:
---- 004000b4
movi_i32 new_r0,$0x1
mov_i32 r0,new_r0
AFTER:
---- 004000b4
movi_i32 r0,$0x1
This patch reintroduces a use of check_for_attrib, so we remove the
G_GNUC_UNUSED added earlier in this series.
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Brian Cain <bcain@quicinc.com>
Message-Id: <20230427230012.3800327-12-tsimpson@quicinc.com>
2023-04-28 02:00:02 +03:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2023-04-28 02:00:01 +03:00
|
|
|
static void mark_implicit_pred_read(DisasContext *ctx, int attrib, int pnum)
|
|
|
|
{
|
|
|
|
if (GET_ATTRIB(ctx->insn->opcode, attrib)) {
|
|
|
|
ctx_log_pred_read(ctx, pnum);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mark_implicit_pred_reads(DisasContext *ctx)
|
|
|
|
{
|
|
|
|
mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P0, 0);
|
|
|
|
mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P1, 1);
|
|
|
|
mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P3, 2);
|
|
|
|
mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P3, 3);
|
|
|
|
}
|
|
|
|
|
2023-03-07 05:58:19 +03:00
|
|
|
static void analyze_packet(DisasContext *ctx)
|
|
|
|
{
|
|
|
|
Packet *pkt = ctx->pkt;
|
2023-03-07 05:58:20 +03:00
|
|
|
ctx->need_pkt_has_store_s1 = false;
|
2023-03-07 05:58:19 +03:00
|
|
|
for (int i = 0; i < pkt->num_insns; i++) {
|
|
|
|
Insn *insn = &pkt->insn[i];
|
|
|
|
ctx->insn = insn;
|
|
|
|
if (opcode_analyze[insn->opcode]) {
|
|
|
|
opcode_analyze[insn->opcode](ctx);
|
|
|
|
}
|
|
|
|
mark_implicit_reg_writes(ctx);
|
|
|
|
mark_implicit_pred_writes(ctx);
|
2023-04-28 02:00:01 +03:00
|
|
|
mark_implicit_pred_reads(ctx);
|
2023-03-07 05:58:19 +03:00
|
|
|
}
|
Hexagon (target/hexagon) Short-circuit packet register writes
In certain cases, we can avoid the overhead of writing to hex_new_value
and write directly to hex_gpr. We add need_commit field to DisasContext
indicating if the end-of-packet commit is needed. If it is not needed,
get_result_gpr() and get_result_gpr_pair() can return hex_gpr.
We pass the ctx->need_commit to helpers when needed.
Finally, we can early-exit from gen_reg_writes during packet commit.
There are a few instructions whose semantics write to the result before
reading all the inputs. Therefore, the idef-parser generated code is
incompatible with short-circuit. We tell idef-parser to skip them.
For debugging purposes, we add a cpu property to turn off short-circuit.
When the short-circuit property is false, we skip the analysis and force
the end-of-packet commit.
Here's a simple example of the TCG generated for
0x004000b4: 0x7800c020 { R0 = #0x1 }
BEFORE:
---- 004000b4
movi_i32 new_r0,$0x1
mov_i32 r0,new_r0
AFTER:
---- 004000b4
movi_i32 r0,$0x1
This patch reintroduces a use of check_for_attrib, so we remove the
G_GNUC_UNUSED added earlier in this series.
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Brian Cain <bcain@quicinc.com>
Message-Id: <20230427230012.3800327-12-tsimpson@quicinc.com>
2023-04-28 02:00:02 +03:00
|
|
|
|
|
|
|
ctx->need_commit = need_commit(ctx);
|
2023-03-07 05:58:19 +03:00
|
|
|
}
|
|
|
|
|
2022-11-08 19:28:56 +03:00
|
|
|
static void gen_start_packet(DisasContext *ctx)
|
2021-02-08 08:46:19 +03:00
|
|
|
{
|
2022-11-08 19:28:56 +03:00
|
|
|
Packet *pkt = ctx->pkt;
|
2021-02-08 08:46:19 +03:00
|
|
|
target_ulong next_PC = ctx->base.pc_next + pkt->encod_pkt_size_in_bytes;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/* Clear out the disassembly context */
|
2022-11-08 19:29:01 +03:00
|
|
|
ctx->next_PC = next_PC;
|
2021-02-08 08:46:19 +03:00
|
|
|
ctx->reg_log_idx = 0;
|
|
|
|
bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS);
|
2023-04-28 02:00:01 +03:00
|
|
|
bitmap_zero(ctx->regs_read, TOTAL_PER_THREAD_REGS);
|
2023-03-07 05:58:19 +03:00
|
|
|
bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
|
2021-02-08 08:46:19 +03:00
|
|
|
ctx->preg_log_idx = 0;
|
2021-04-09 04:07:34 +03:00
|
|
|
bitmap_zero(ctx->pregs_written, NUM_PREGS);
|
2023-04-28 02:00:01 +03:00
|
|
|
bitmap_zero(ctx->pregs_read, NUM_PREGS);
|
2021-09-30 22:29:00 +03:00
|
|
|
ctx->future_vregs_idx = 0;
|
|
|
|
ctx->tmp_vregs_idx = 0;
|
|
|
|
ctx->vreg_log_idx = 0;
|
|
|
|
bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS);
|
|
|
|
bitmap_zero(ctx->vregs_updated, NUM_VREGS);
|
|
|
|
bitmap_zero(ctx->vregs_select, NUM_VREGS);
|
2023-03-07 05:58:21 +03:00
|
|
|
bitmap_zero(ctx->predicated_future_vregs, NUM_VREGS);
|
|
|
|
bitmap_zero(ctx->predicated_tmp_vregs, NUM_VREGS);
|
2023-04-28 02:00:01 +03:00
|
|
|
bitmap_zero(ctx->vregs_read, NUM_VREGS);
|
|
|
|
bitmap_zero(ctx->qregs_read, NUM_QREGS);
|
2021-09-30 22:29:00 +03:00
|
|
|
ctx->qreg_log_idx = 0;
|
2021-02-08 08:46:19 +03:00
|
|
|
for (i = 0; i < STORES_MAX; i++) {
|
|
|
|
ctx->store_width[i] = 0;
|
|
|
|
}
|
2021-04-09 04:07:35 +03:00
|
|
|
ctx->s1_store_processed = false;
|
2021-09-30 22:29:00 +03:00
|
|
|
ctx->pre_commit = true;
|
2021-02-08 08:46:19 +03:00
|
|
|
|
2023-03-07 05:58:19 +03:00
|
|
|
analyze_packet(ctx);
|
|
|
|
|
2023-03-07 05:58:20 +03:00
|
|
|
if (ctx->need_pkt_has_store_s1) {
|
|
|
|
tcg_gen_movi_tl(hex_pkt_has_store_s1, pkt->pkt_has_store_s1);
|
|
|
|
}
|
|
|
|
|
2023-03-07 05:58:19 +03:00
|
|
|
/*
|
|
|
|
* pregs_written is used both in the analyze phase as well as the code
|
|
|
|
* gen phase, so clear it again.
|
|
|
|
*/
|
|
|
|
bitmap_zero(ctx->pregs_written, NUM_PREGS);
|
|
|
|
|
2021-04-09 04:07:44 +03:00
|
|
|
if (HEX_DEBUG) {
|
|
|
|
/* Handy place to set a breakpoint before the packet executes */
|
|
|
|
gen_helper_debug_start_packet(cpu_env);
|
|
|
|
tcg_gen_movi_tl(hex_this_PC, ctx->base.pc_next);
|
|
|
|
}
|
2021-02-08 08:46:19 +03:00
|
|
|
|
|
|
|
/* Initialize the runtime state for packet semantics */
|
|
|
|
if (need_slot_cancelled(pkt)) {
|
|
|
|
tcg_gen_movi_tl(hex_slot_cancelled, 0);
|
|
|
|
}
|
|
|
|
if (pkt->pkt_has_cof) {
|
2022-11-08 19:28:59 +03:00
|
|
|
if (pkt->pkt_has_multi_cof) {
|
|
|
|
tcg_gen_movi_tl(hex_branch_taken, 0);
|
|
|
|
}
|
2022-11-08 19:29:01 +03:00
|
|
|
if (need_next_PC(ctx)) {
|
|
|
|
tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], next_PC);
|
|
|
|
}
|
2021-02-08 08:46:19 +03:00
|
|
|
}
|
2023-04-28 01:59:59 +03:00
|
|
|
if (HEX_DEBUG) {
|
2021-02-08 08:46:19 +03:00
|
|
|
tcg_gen_movi_tl(hex_pred_written, 0);
|
|
|
|
}
|
2021-09-30 22:29:00 +03:00
|
|
|
|
2023-03-07 05:58:19 +03:00
|
|
|
/* Preload the predicated registers into hex_new_value[i] */
|
Hexagon (target/hexagon) Short-circuit packet register writes
In certain cases, we can avoid the overhead of writing to hex_new_value
and write directly to hex_gpr. We add need_commit field to DisasContext
indicating if the end-of-packet commit is needed. If it is not needed,
get_result_gpr() and get_result_gpr_pair() can return hex_gpr.
We pass the ctx->need_commit to helpers when needed.
Finally, we can early-exit from gen_reg_writes during packet commit.
There are a few instructions whose semantics write to the result before
reading all the inputs. Therefore, the idef-parser generated code is
incompatible with short-circuit. We tell idef-parser to skip them.
For debugging purposes, we add a cpu property to turn off short-circuit.
When the short-circuit property is false, we skip the analysis and force
the end-of-packet commit.
Here's a simple example of the TCG generated for
0x004000b4: 0x7800c020 { R0 = #0x1 }
BEFORE:
---- 004000b4
movi_i32 new_r0,$0x1
mov_i32 r0,new_r0
AFTER:
---- 004000b4
movi_i32 r0,$0x1
This patch reintroduces a use of check_for_attrib, so we remove the
G_GNUC_UNUSED added earlier in this series.
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Brian Cain <bcain@quicinc.com>
Message-Id: <20230427230012.3800327-12-tsimpson@quicinc.com>
2023-04-28 02:00:02 +03:00
|
|
|
if (ctx->need_commit &&
|
|
|
|
!bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) {
|
2023-03-07 05:58:19 +03:00
|
|
|
int i = find_first_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
|
|
|
|
while (i < TOTAL_PER_THREAD_REGS) {
|
|
|
|
tcg_gen_mov_tl(hex_new_value[i], hex_gpr[i]);
|
|
|
|
i = find_next_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS,
|
|
|
|
i + 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-28 01:59:59 +03:00
|
|
|
/*
|
|
|
|
* Preload the predicated pred registers into hex_new_pred_value[pred_num]
|
|
|
|
* Only endloop instructions conditionally write to pred registers
|
|
|
|
*/
|
2023-04-28 02:00:03 +03:00
|
|
|
if (ctx->need_commit && pkt->pkt_has_endloop) {
|
2023-04-28 01:59:59 +03:00
|
|
|
for (int i = 0; i < ctx->preg_log_idx; i++) {
|
|
|
|
int pred_num = ctx->preg_log[i];
|
|
|
|
tcg_gen_mov_tl(hex_new_pred_value[pred_num], hex_pred[pred_num]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-07 05:58:21 +03:00
|
|
|
/* Preload the predicated HVX registers into future_VRegs and tmp_VRegs */
|
|
|
|
if (!bitmap_empty(ctx->predicated_future_vregs, NUM_VREGS)) {
|
|
|
|
int i = find_first_bit(ctx->predicated_future_vregs, NUM_VREGS);
|
|
|
|
while (i < NUM_VREGS) {
|
|
|
|
const intptr_t VdV_off =
|
|
|
|
ctx_future_vreg_off(ctx, i, 1, true);
|
|
|
|
intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]);
|
|
|
|
tcg_gen_gvec_mov(MO_64, VdV_off,
|
|
|
|
src_off,
|
|
|
|
sizeof(MMVector),
|
|
|
|
sizeof(MMVector));
|
|
|
|
i = find_next_bit(ctx->predicated_future_vregs, NUM_VREGS, i + 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!bitmap_empty(ctx->predicated_tmp_vregs, NUM_VREGS)) {
|
|
|
|
int i = find_first_bit(ctx->predicated_tmp_vregs, NUM_VREGS);
|
|
|
|
while (i < NUM_VREGS) {
|
|
|
|
const intptr_t VdV_off =
|
|
|
|
ctx_tmp_vreg_off(ctx, i, 1, true);
|
|
|
|
intptr_t src_off = offsetof(CPUHexagonState, VRegs[i]);
|
|
|
|
tcg_gen_gvec_mov(MO_64, VdV_off,
|
|
|
|
src_off,
|
|
|
|
sizeof(MMVector),
|
|
|
|
sizeof(MMVector));
|
|
|
|
i = find_next_bit(ctx->predicated_tmp_vregs, NUM_VREGS, i + 1);
|
|
|
|
}
|
|
|
|
}
|
2021-09-30 22:29:00 +03:00
|
|
|
}
|
|
|
|
|
2022-11-08 19:28:56 +03:00
|
|
|
bool is_gather_store_insn(DisasContext *ctx)
|
2021-09-30 22:29:00 +03:00
|
|
|
{
|
2022-11-08 19:28:56 +03:00
|
|
|
Packet *pkt = ctx->pkt;
|
|
|
|
Insn *insn = ctx->insn;
|
2021-09-30 22:29:00 +03:00
|
|
|
if (GET_ATTRIB(insn->opcode, A_CVI_NEW) &&
|
|
|
|
insn->new_value_producer_slot == 1) {
|
|
|
|
/* Look for gather instruction */
|
|
|
|
for (int i = 0; i < pkt->num_insns; i++) {
|
|
|
|
Insn *in = &pkt->insn[i];
|
|
|
|
if (GET_ATTRIB(in->opcode, A_CVI_GATHER) && in->slot == 1) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
2021-02-08 08:46:19 +03:00
|
|
|
}
|
|
|
|
|
2022-11-08 19:28:56 +03:00
|
|
|
static void mark_store_width(DisasContext *ctx)
|
2022-09-20 11:07:45 +03:00
|
|
|
{
|
2022-11-08 19:28:56 +03:00
|
|
|
uint16_t opcode = ctx->insn->opcode;
|
|
|
|
uint32_t slot = ctx->insn->slot;
|
2022-09-20 11:07:45 +03:00
|
|
|
uint8_t width = 0;
|
|
|
|
|
|
|
|
if (GET_ATTRIB(opcode, A_SCALAR_STORE)) {
|
2023-04-28 01:40:50 +03:00
|
|
|
if (GET_ATTRIB(opcode, A_MEMSIZE_0B)) {
|
|
|
|
return;
|
|
|
|
}
|
2022-09-20 11:07:45 +03:00
|
|
|
if (GET_ATTRIB(opcode, A_MEMSIZE_1B)) {
|
|
|
|
width |= 1;
|
|
|
|
}
|
|
|
|
if (GET_ATTRIB(opcode, A_MEMSIZE_2B)) {
|
|
|
|
width |= 2;
|
|
|
|
}
|
|
|
|
if (GET_ATTRIB(opcode, A_MEMSIZE_4B)) {
|
|
|
|
width |= 4;
|
|
|
|
}
|
|
|
|
if (GET_ATTRIB(opcode, A_MEMSIZE_8B)) {
|
|
|
|
width |= 8;
|
|
|
|
}
|
|
|
|
tcg_debug_assert(is_power_of_2(width));
|
|
|
|
ctx->store_width[slot] = width;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-08 19:28:56 +03:00
|
|
|
static void gen_insn(DisasContext *ctx)
|
2021-02-08 08:46:19 +03:00
|
|
|
{
|
2022-11-08 19:28:56 +03:00
|
|
|
if (ctx->insn->generate) {
|
|
|
|
ctx->insn->generate(ctx);
|
|
|
|
mark_store_width(ctx);
|
2021-02-08 08:46:19 +03:00
|
|
|
} else {
|
2021-04-09 04:07:33 +03:00
|
|
|
gen_exception_end_tb(ctx, HEX_EXCP_INVALID_OPCODE);
|
2021-02-08 08:46:19 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Helpers for generating the packet commit
|
|
|
|
*/
|
|
|
|
static void gen_reg_writes(DisasContext *ctx)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
Hexagon (target/hexagon) Short-circuit packet register writes
In certain cases, we can avoid the overhead of writing to hex_new_value
and write directly to hex_gpr. We add need_commit field to DisasContext
indicating if the end-of-packet commit is needed. If it is not needed,
get_result_gpr() and get_result_gpr_pair() can return hex_gpr.
We pass the ctx->need_commit to helpers when needed.
Finally, we can early-exit from gen_reg_writes during packet commit.
There are a few instructions whose semantics write to the result before
reading all the inputs. Therefore, the idef-parser generated code is
incompatible with short-circuit. We tell idef-parser to skip them.
For debugging purposes, we add a cpu property to turn off short-circuit.
When the short-circuit property is false, we skip the analysis and force
the end-of-packet commit.
Here's a simple example of the TCG generated for
0x004000b4: 0x7800c020 { R0 = #0x1 }
BEFORE:
---- 004000b4
movi_i32 new_r0,$0x1
mov_i32 r0,new_r0
AFTER:
---- 004000b4
movi_i32 r0,$0x1
This patch reintroduces a use of check_for_attrib, so we remove the
G_GNUC_UNUSED added earlier in this series.
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Brian Cain <bcain@quicinc.com>
Message-Id: <20230427230012.3800327-12-tsimpson@quicinc.com>
2023-04-28 02:00:02 +03:00
|
|
|
/* Early exit if not needed */
|
|
|
|
if (!ctx->need_commit) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2021-02-08 08:46:19 +03:00
|
|
|
for (i = 0; i < ctx->reg_log_idx; i++) {
|
|
|
|
int reg_num = ctx->reg_log[i];
|
|
|
|
|
|
|
|
tcg_gen_mov_tl(hex_gpr[reg_num], hex_new_value[reg_num]);
|
2022-11-10 20:49:35 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* ctx->is_tight_loop is set when SA0 points to the beginning of the TB.
|
|
|
|
* If we write to SA0, we have to turn off tight loop handling.
|
|
|
|
*/
|
|
|
|
if (reg_num == HEX_REG_SA0) {
|
|
|
|
ctx->is_tight_loop = false;
|
|
|
|
}
|
2021-02-08 08:46:19 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-08 19:28:56 +03:00
|
|
|
static void gen_pred_writes(DisasContext *ctx)
|
2021-02-08 08:46:19 +03:00
|
|
|
{
|
2023-04-28 02:00:03 +03:00
|
|
|
/* Early exit if not needed or the log is empty */
|
|
|
|
if (!ctx->need_commit || !ctx->preg_log_idx) {
|
2021-02-08 08:46:19 +03:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2023-04-28 01:59:59 +03:00
|
|
|
for (int i = 0; i < ctx->preg_log_idx; i++) {
|
|
|
|
int pred_num = ctx->preg_log[i];
|
|
|
|
tcg_gen_mov_tl(hex_pred[pred_num], hex_new_pred_value[pred_num]);
|
2021-02-08 08:46:19 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-15 07:53:04 +03:00
|
|
|
static void gen_check_store_width(DisasContext *ctx, int slot_num)
|
2021-02-08 08:46:19 +03:00
|
|
|
{
|
2021-04-09 04:07:44 +03:00
|
|
|
if (HEX_DEBUG) {
|
2021-10-03 03:47:50 +03:00
|
|
|
TCGv slot = tcg_constant_tl(slot_num);
|
|
|
|
TCGv check = tcg_constant_tl(ctx->store_width[slot_num]);
|
2021-04-09 04:07:44 +03:00
|
|
|
gen_helper_debug_check_store_width(cpu_env, slot, check);
|
|
|
|
}
|
2021-03-15 07:53:04 +03:00
|
|
|
}
|
2021-02-08 08:46:19 +03:00
|
|
|
|
|
|
|
static bool slot_is_predicated(Packet *pkt, int slot_num)
|
|
|
|
{
|
|
|
|
for (int i = 0; i < pkt->num_insns; i++) {
|
|
|
|
if (pkt->insn[i].slot == slot_num) {
|
|
|
|
return GET_ATTRIB(pkt->insn[i].opcode, A_CONDEXEC);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* If we get to here, we didn't find an instruction in the requested slot */
|
|
|
|
g_assert_not_reached();
|
|
|
|
}
|
|
|
|
|
2022-11-08 19:28:56 +03:00
|
|
|
void process_store(DisasContext *ctx, int slot_num)
|
2021-02-08 08:46:19 +03:00
|
|
|
{
|
2022-11-08 19:28:56 +03:00
|
|
|
bool is_predicated = slot_is_predicated(ctx->pkt, slot_num);
|
2021-02-08 08:46:19 +03:00
|
|
|
TCGLabel *label_end = NULL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We may have already processed this store
|
|
|
|
* See CHECK_NOSHUF in macros.h
|
|
|
|
*/
|
|
|
|
if (slot_num == 1 && ctx->s1_store_processed) {
|
|
|
|
return;
|
|
|
|
}
|
2021-04-09 04:07:35 +03:00
|
|
|
ctx->s1_store_processed = true;
|
2021-02-08 08:46:19 +03:00
|
|
|
|
|
|
|
if (is_predicated) {
|
|
|
|
TCGv cancelled = tcg_temp_new();
|
|
|
|
label_end = gen_new_label();
|
|
|
|
|
|
|
|
/* Don't do anything if the slot was cancelled */
|
|
|
|
tcg_gen_extract_tl(cancelled, hex_slot_cancelled, slot_num, 1);
|
|
|
|
tcg_gen_brcondi_tl(TCG_COND_NE, cancelled, 0, label_end);
|
|
|
|
}
|
|
|
|
{
|
2023-01-30 03:41:33 +03:00
|
|
|
TCGv address = tcg_temp_new();
|
2021-02-08 08:46:19 +03:00
|
|
|
tcg_gen_mov_tl(address, hex_store_addr[slot_num]);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we know the width from the DisasContext, we can
|
|
|
|
* generate much cleaner code.
|
|
|
|
* Unfortunately, not all instructions execute the fSTORE
|
|
|
|
* macro during code generation. Anything that uses the
|
|
|
|
* generic helper will have this problem. Instructions
|
|
|
|
* that use fWRAP to generate proper TCG code will be OK.
|
|
|
|
*/
|
|
|
|
switch (ctx->store_width[slot_num]) {
|
|
|
|
case 1:
|
2021-03-15 07:53:04 +03:00
|
|
|
gen_check_store_width(ctx, slot_num);
|
2023-05-02 16:57:35 +03:00
|
|
|
tcg_gen_qemu_st_tl(hex_store_val32[slot_num],
|
|
|
|
hex_store_addr[slot_num],
|
|
|
|
ctx->mem_idx, MO_UB);
|
2021-02-08 08:46:19 +03:00
|
|
|
break;
|
|
|
|
case 2:
|
2021-03-15 07:53:04 +03:00
|
|
|
gen_check_store_width(ctx, slot_num);
|
2023-05-02 16:57:35 +03:00
|
|
|
tcg_gen_qemu_st_tl(hex_store_val32[slot_num],
|
|
|
|
hex_store_addr[slot_num],
|
|
|
|
ctx->mem_idx, MO_TEUW);
|
2021-02-08 08:46:19 +03:00
|
|
|
break;
|
|
|
|
case 4:
|
2021-03-15 07:53:04 +03:00
|
|
|
gen_check_store_width(ctx, slot_num);
|
2023-05-02 16:57:35 +03:00
|
|
|
tcg_gen_qemu_st_tl(hex_store_val32[slot_num],
|
|
|
|
hex_store_addr[slot_num],
|
|
|
|
ctx->mem_idx, MO_TEUL);
|
2021-02-08 08:46:19 +03:00
|
|
|
break;
|
|
|
|
case 8:
|
2021-03-15 07:53:04 +03:00
|
|
|
gen_check_store_width(ctx, slot_num);
|
2023-05-02 16:57:35 +03:00
|
|
|
tcg_gen_qemu_st_i64(hex_store_val64[slot_num],
|
|
|
|
hex_store_addr[slot_num],
|
|
|
|
ctx->mem_idx, MO_TEUQ);
|
2021-02-08 08:46:19 +03:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* If we get to here, we don't know the width at
|
|
|
|
* TCG generation time, we'll use a helper to
|
|
|
|
* avoid branching based on the width at runtime.
|
|
|
|
*/
|
2021-10-03 03:47:50 +03:00
|
|
|
TCGv slot = tcg_constant_tl(slot_num);
|
2021-02-08 08:46:19 +03:00
|
|
|
gen_helper_commit_store(cpu_env, slot);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (is_predicated) {
|
|
|
|
gen_set_label(label_end);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-08 19:28:56 +03:00
|
|
|
static void process_store_log(DisasContext *ctx)
|
2021-02-08 08:46:19 +03:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* When a packet has two stores, the hardware processes
|
2021-09-22 18:30:46 +03:00
|
|
|
* slot 1 and then slot 0. This will be important when
|
2021-02-08 08:46:19 +03:00
|
|
|
* the memory accesses overlap.
|
|
|
|
*/
|
2022-11-08 19:28:56 +03:00
|
|
|
Packet *pkt = ctx->pkt;
|
2022-09-20 11:07:46 +03:00
|
|
|
if (pkt->pkt_has_store_s1) {
|
|
|
|
g_assert(!pkt->pkt_has_dczeroa);
|
2022-11-08 19:28:56 +03:00
|
|
|
process_store(ctx, 1);
|
2021-02-08 08:46:19 +03:00
|
|
|
}
|
2022-09-20 11:07:46 +03:00
|
|
|
if (pkt->pkt_has_store_s0) {
|
|
|
|
g_assert(!pkt->pkt_has_dczeroa);
|
2022-11-08 19:28:56 +03:00
|
|
|
process_store(ctx, 0);
|
2021-02-08 08:46:19 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Zero out a 32-bit cache line */
|
2022-11-08 19:28:56 +03:00
|
|
|
static void process_dczeroa(DisasContext *ctx)
|
2021-02-08 08:46:19 +03:00
|
|
|
{
|
2022-11-08 19:28:56 +03:00
|
|
|
if (ctx->pkt->pkt_has_dczeroa) {
|
2021-02-08 08:46:19 +03:00
|
|
|
/* Store 32 bytes of zero starting at (addr & ~0x1f) */
|
|
|
|
TCGv addr = tcg_temp_new();
|
2021-10-03 03:47:50 +03:00
|
|
|
TCGv_i64 zero = tcg_constant_i64(0);
|
2021-02-08 08:46:19 +03:00
|
|
|
|
|
|
|
tcg_gen_andi_tl(addr, hex_dczero_addr, ~0x1f);
|
2023-05-02 16:57:35 +03:00
|
|
|
tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ);
|
2021-02-08 08:46:19 +03:00
|
|
|
tcg_gen_addi_tl(addr, addr, 8);
|
2023-05-02 16:57:35 +03:00
|
|
|
tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ);
|
2021-02-08 08:46:19 +03:00
|
|
|
tcg_gen_addi_tl(addr, addr, 8);
|
2023-05-02 16:57:35 +03:00
|
|
|
tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ);
|
2021-02-08 08:46:19 +03:00
|
|
|
tcg_gen_addi_tl(addr, addr, 8);
|
2023-05-02 16:57:35 +03:00
|
|
|
tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ);
|
2021-02-08 08:46:19 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-09-30 22:29:00 +03:00
|
|
|
static bool pkt_has_hvx_store(Packet *pkt)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < pkt->num_insns; i++) {
|
|
|
|
int opcode = pkt->insn[i].opcode;
|
|
|
|
if (GET_ATTRIB(opcode, A_CVI) && GET_ATTRIB(opcode, A_STORE)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2022-11-08 19:28:56 +03:00
|
|
|
static void gen_commit_hvx(DisasContext *ctx)
|
2021-09-30 22:29:00 +03:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* for (i = 0; i < ctx->vreg_log_idx; i++) {
|
|
|
|
* int rnum = ctx->vreg_log[i];
|
2023-03-07 05:58:28 +03:00
|
|
|
* env->VRegs[rnum] = env->future_VRegs[rnum];
|
2021-09-30 22:29:00 +03:00
|
|
|
* }
|
|
|
|
*/
|
|
|
|
for (i = 0; i < ctx->vreg_log_idx; i++) {
|
|
|
|
int rnum = ctx->vreg_log[i];
|
|
|
|
intptr_t dstoff = offsetof(CPUHexagonState, VRegs[rnum]);
|
|
|
|
intptr_t srcoff = ctx_future_vreg_off(ctx, rnum, 1, false);
|
|
|
|
size_t size = sizeof(MMVector);
|
|
|
|
|
2023-03-07 05:58:28 +03:00
|
|
|
tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
|
2021-09-30 22:29:00 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* for (i = 0; i < ctx->qreg_log_idx; i++) {
|
|
|
|
* int rnum = ctx->qreg_log[i];
|
2023-03-07 05:58:28 +03:00
|
|
|
* env->QRegs[rnum] = env->future_QRegs[rnum];
|
2021-09-30 22:29:00 +03:00
|
|
|
* }
|
|
|
|
*/
|
|
|
|
for (i = 0; i < ctx->qreg_log_idx; i++) {
|
|
|
|
int rnum = ctx->qreg_log[i];
|
|
|
|
intptr_t dstoff = offsetof(CPUHexagonState, QRegs[rnum]);
|
|
|
|
intptr_t srcoff = offsetof(CPUHexagonState, future_QRegs[rnum]);
|
|
|
|
size_t size = sizeof(MMQReg);
|
|
|
|
|
2023-03-07 05:58:28 +03:00
|
|
|
tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
|
2021-09-30 22:29:00 +03:00
|
|
|
}
|
|
|
|
|
2022-11-08 19:28:56 +03:00
|
|
|
if (pkt_has_hvx_store(ctx->pkt)) {
|
2021-09-30 22:29:00 +03:00
|
|
|
gen_helper_commit_hvx_stores(cpu_env);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-08 19:28:56 +03:00
|
|
|
static void update_exec_counters(DisasContext *ctx)
|
2021-02-08 08:46:19 +03:00
|
|
|
{
|
2022-11-08 19:28:56 +03:00
|
|
|
Packet *pkt = ctx->pkt;
|
2021-02-08 08:46:19 +03:00
|
|
|
int num_insns = pkt->num_insns;
|
|
|
|
int num_real_insns = 0;
|
2021-09-30 22:29:00 +03:00
|
|
|
int num_hvx_insns = 0;
|
2021-02-08 08:46:19 +03:00
|
|
|
|
|
|
|
for (int i = 0; i < num_insns; i++) {
|
|
|
|
if (!pkt->insn[i].is_endloop &&
|
|
|
|
!pkt->insn[i].part1 &&
|
|
|
|
!GET_ATTRIB(pkt->insn[i].opcode, A_IT_NOP)) {
|
|
|
|
num_real_insns++;
|
|
|
|
}
|
2021-09-30 22:29:00 +03:00
|
|
|
if (GET_ATTRIB(pkt->insn[i].opcode, A_CVI)) {
|
|
|
|
num_hvx_insns++;
|
|
|
|
}
|
2021-02-08 08:46:19 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
ctx->num_packets++;
|
|
|
|
ctx->num_insns += num_real_insns;
|
2021-09-30 22:29:00 +03:00
|
|
|
ctx->num_hvx_insns += num_hvx_insns;
|
2021-02-08 08:46:19 +03:00
|
|
|
}
|
|
|
|
|
2022-11-08 19:28:56 +03:00
|
|
|
static void gen_commit_packet(DisasContext *ctx)
|
2021-02-08 08:46:19 +03:00
|
|
|
{
|
2021-09-22 18:30:46 +03:00
|
|
|
/*
|
|
|
|
* If there is more than one store in a packet, make sure they are all OK
|
|
|
|
* before proceeding with the rest of the packet commit.
|
|
|
|
*
|
|
|
|
* dczeroa has to be the only store operation in the packet, so we go
|
|
|
|
* ahead and process that first.
|
|
|
|
*
|
2021-09-30 22:29:00 +03:00
|
|
|
* When there is an HVX store, there can also be a scalar store in either
|
|
|
|
* slot 0 or slot1, so we create a mask for the helper to indicate what
|
|
|
|
* work to do.
|
|
|
|
*
|
2021-09-22 18:30:46 +03:00
|
|
|
* When there are two scalar stores, we probe the one in slot 0.
|
|
|
|
*
|
|
|
|
* Note that we don't call the probe helper for packets with only one
|
|
|
|
* store. Therefore, we call process_store_log before anything else
|
|
|
|
* involved in committing the packet.
|
|
|
|
*/
|
2022-11-08 19:28:56 +03:00
|
|
|
Packet *pkt = ctx->pkt;
|
2021-09-22 18:30:46 +03:00
|
|
|
bool has_store_s0 = pkt->pkt_has_store_s0;
|
|
|
|
bool has_store_s1 = (pkt->pkt_has_store_s1 && !ctx->s1_store_processed);
|
2021-09-30 22:29:00 +03:00
|
|
|
bool has_hvx_store = pkt_has_hvx_store(pkt);
|
2021-09-22 18:30:46 +03:00
|
|
|
if (pkt->pkt_has_dczeroa) {
|
|
|
|
/*
|
|
|
|
* The dczeroa will be the store in slot 0, check that we don't have
|
2021-09-30 22:29:00 +03:00
|
|
|
* a store in slot 1 or an HVX store.
|
2021-09-22 18:30:46 +03:00
|
|
|
*/
|
2022-09-20 11:07:46 +03:00
|
|
|
g_assert(!has_store_s1 && !has_hvx_store);
|
2022-11-08 19:28:56 +03:00
|
|
|
process_dczeroa(ctx);
|
2021-09-30 22:29:00 +03:00
|
|
|
} else if (has_hvx_store) {
|
|
|
|
if (!has_store_s0 && !has_store_s1) {
|
2023-04-05 19:42:10 +03:00
|
|
|
TCGv mem_idx = tcg_constant_tl(ctx->mem_idx);
|
2021-09-30 22:29:00 +03:00
|
|
|
gen_helper_probe_hvx_stores(cpu_env, mem_idx);
|
|
|
|
} else {
|
|
|
|
int mask = 0;
|
|
|
|
|
|
|
|
if (has_store_s0) {
|
Hexagon (target/hexagon) Reduce manipulation of slot_cancelled
We only need to track slot for predicated stores and predicated HVX
instructions.
Add arguments to the probe helper functions to indicate if the slot
is predicated.
Here is a simple example of the differences in the TCG code generated:
IN:
0x00400094: 0xf900c102 { if (P0) R2 = and(R0,R1) }
BEFORE
---- 00400094
mov_i32 slot_cancelled,$0x0
mov_i32 new_r2,r2
and_i32 tmp0,p0,$0x1
brcond_i32 tmp0,$0x0,eq,$L1
and_i32 tmp0,r0,r1
mov_i32 new_r2,tmp0
br $L2
set_label $L1
or_i32 slot_cancelled,slot_cancelled,$0x8
set_label $L2
mov_i32 r2,new_r2
AFTER
---- 00400094
mov_i32 new_r2,r2
and_i32 tmp0,p0,$0x1
brcond_i32 tmp0,$0x0,eq,$L1
and_i32 tmp0,r0,r1
mov_i32 new_r2,tmp0
br $L2
set_label $L1
set_label $L2
mov_i32 r2,new_r2
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Anton Johansson <anjo@rev.ng>
Message-Id: <20230307025828.1612809-14-tsimpson@quicinc.com>
2023-03-07 05:58:27 +03:00
|
|
|
mask =
|
|
|
|
FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST0, 1);
|
2021-09-30 22:29:00 +03:00
|
|
|
}
|
|
|
|
if (has_store_s1) {
|
Hexagon (target/hexagon) Reduce manipulation of slot_cancelled
We only need to track slot for predicated stores and predicated HVX
instructions.
Add arguments to the probe helper functions to indicate if the slot
is predicated.
Here is a simple example of the differences in the TCG code generated:
IN:
0x00400094: 0xf900c102 { if (P0) R2 = and(R0,R1) }
BEFORE
---- 00400094
mov_i32 slot_cancelled,$0x0
mov_i32 new_r2,r2
and_i32 tmp0,p0,$0x1
brcond_i32 tmp0,$0x0,eq,$L1
and_i32 tmp0,r0,r1
mov_i32 new_r2,tmp0
br $L2
set_label $L1
or_i32 slot_cancelled,slot_cancelled,$0x8
set_label $L2
mov_i32 r2,new_r2
AFTER
---- 00400094
mov_i32 new_r2,r2
and_i32 tmp0,p0,$0x1
brcond_i32 tmp0,$0x0,eq,$L1
and_i32 tmp0,r0,r1
mov_i32 new_r2,tmp0
br $L2
set_label $L1
set_label $L2
mov_i32 r2,new_r2
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Anton Johansson <anjo@rev.ng>
Message-Id: <20230307025828.1612809-14-tsimpson@quicinc.com>
2023-03-07 05:58:27 +03:00
|
|
|
mask =
|
|
|
|
FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST1, 1);
|
2021-09-30 22:29:00 +03:00
|
|
|
}
|
|
|
|
if (has_hvx_store) {
|
Hexagon (target/hexagon) Reduce manipulation of slot_cancelled
We only need to track slot for predicated stores and predicated HVX
instructions.
Add arguments to the probe helper functions to indicate if the slot
is predicated.
Here is a simple example of the differences in the TCG code generated:
IN:
0x00400094: 0xf900c102 { if (P0) R2 = and(R0,R1) }
BEFORE
---- 00400094
mov_i32 slot_cancelled,$0x0
mov_i32 new_r2,r2
and_i32 tmp0,p0,$0x1
brcond_i32 tmp0,$0x0,eq,$L1
and_i32 tmp0,r0,r1
mov_i32 new_r2,tmp0
br $L2
set_label $L1
or_i32 slot_cancelled,slot_cancelled,$0x8
set_label $L2
mov_i32 r2,new_r2
AFTER
---- 00400094
mov_i32 new_r2,r2
and_i32 tmp0,p0,$0x1
brcond_i32 tmp0,$0x0,eq,$L1
and_i32 tmp0,r0,r1
mov_i32 new_r2,tmp0
br $L2
set_label $L1
set_label $L2
mov_i32 r2,new_r2
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Anton Johansson <anjo@rev.ng>
Message-Id: <20230307025828.1612809-14-tsimpson@quicinc.com>
2023-03-07 05:58:27 +03:00
|
|
|
mask =
|
|
|
|
FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES,
|
|
|
|
HAS_HVX_STORES, 1);
|
|
|
|
}
|
|
|
|
if (has_store_s0 && slot_is_predicated(pkt, 0)) {
|
|
|
|
mask =
|
|
|
|
FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES,
|
|
|
|
S0_IS_PRED, 1);
|
|
|
|
}
|
|
|
|
if (has_store_s1 && slot_is_predicated(pkt, 1)) {
|
|
|
|
mask =
|
|
|
|
FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES,
|
|
|
|
S1_IS_PRED, 1);
|
2021-09-30 22:29:00 +03:00
|
|
|
}
|
2023-04-05 19:42:10 +03:00
|
|
|
mask = FIELD_DP32(mask, PROBE_PKT_SCALAR_HVX_STORES, MMU_IDX,
|
|
|
|
ctx->mem_idx);
|
|
|
|
gen_helper_probe_pkt_scalar_hvx_stores(cpu_env,
|
|
|
|
tcg_constant_tl(mask));
|
2021-09-30 22:29:00 +03:00
|
|
|
}
|
2021-09-22 18:30:46 +03:00
|
|
|
} else if (has_store_s0 && has_store_s1) {
|
|
|
|
/*
|
|
|
|
* process_store_log will execute the slot 1 store first,
|
|
|
|
* so we only have to probe the store in slot 0
|
|
|
|
*/
|
Hexagon (target/hexagon) Reduce manipulation of slot_cancelled
We only need to track slot for predicated stores and predicated HVX
instructions.
Add arguments to the probe helper functions to indicate if the slot
is predicated.
Here is a simple example of the differences in the TCG code generated:
IN:
0x00400094: 0xf900c102 { if (P0) R2 = and(R0,R1) }
BEFORE
---- 00400094
mov_i32 slot_cancelled,$0x0
mov_i32 new_r2,r2
and_i32 tmp0,p0,$0x1
brcond_i32 tmp0,$0x0,eq,$L1
and_i32 tmp0,r0,r1
mov_i32 new_r2,tmp0
br $L2
set_label $L1
or_i32 slot_cancelled,slot_cancelled,$0x8
set_label $L2
mov_i32 r2,new_r2
AFTER
---- 00400094
mov_i32 new_r2,r2
and_i32 tmp0,p0,$0x1
brcond_i32 tmp0,$0x0,eq,$L1
and_i32 tmp0,r0,r1
mov_i32 new_r2,tmp0
br $L2
set_label $L1
set_label $L2
mov_i32 r2,new_r2
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Anton Johansson <anjo@rev.ng>
Message-Id: <20230307025828.1612809-14-tsimpson@quicinc.com>
2023-03-07 05:58:27 +03:00
|
|
|
int args = 0;
|
|
|
|
args =
|
|
|
|
FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, MMU_IDX, ctx->mem_idx);
|
|
|
|
if (slot_is_predicated(pkt, 0)) {
|
|
|
|
args =
|
|
|
|
FIELD_DP32(args, PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED, 1);
|
|
|
|
}
|
|
|
|
TCGv args_tcgv = tcg_constant_tl(args);
|
|
|
|
gen_helper_probe_pkt_scalar_store_s0(cpu_env, args_tcgv);
|
2021-09-22 18:30:46 +03:00
|
|
|
}
|
|
|
|
|
2022-11-08 19:28:56 +03:00
|
|
|
process_store_log(ctx);
|
2021-09-22 18:30:46 +03:00
|
|
|
|
2021-02-08 08:46:19 +03:00
|
|
|
gen_reg_writes(ctx);
|
2022-11-08 19:28:56 +03:00
|
|
|
gen_pred_writes(ctx);
|
2021-09-30 22:29:00 +03:00
|
|
|
if (pkt->pkt_has_hvx) {
|
2022-11-08 19:28:56 +03:00
|
|
|
gen_commit_hvx(ctx);
|
2021-09-30 22:29:00 +03:00
|
|
|
}
|
2022-11-08 19:28:56 +03:00
|
|
|
update_exec_counters(ctx);
|
2021-04-09 04:07:44 +03:00
|
|
|
if (HEX_DEBUG) {
|
2021-02-08 08:46:19 +03:00
|
|
|
TCGv has_st0 =
|
2021-10-03 03:47:50 +03:00
|
|
|
tcg_constant_tl(pkt->pkt_has_store_s0 && !pkt->pkt_has_dczeroa);
|
2021-02-08 08:46:19 +03:00
|
|
|
TCGv has_st1 =
|
2021-10-03 03:47:50 +03:00
|
|
|
tcg_constant_tl(pkt->pkt_has_store_s1 && !pkt->pkt_has_dczeroa);
|
2021-02-08 08:46:19 +03:00
|
|
|
|
|
|
|
/* Handy place to set a breakpoint at the end of execution */
|
|
|
|
gen_helper_debug_commit_end(cpu_env, has_st0, has_st1);
|
|
|
|
}
|
|
|
|
|
2021-09-30 22:29:00 +03:00
|
|
|
if (pkt->vhist_insn != NULL) {
|
|
|
|
ctx->pre_commit = false;
|
2022-11-08 19:28:56 +03:00
|
|
|
ctx->insn = pkt->vhist_insn;
|
|
|
|
pkt->vhist_insn->generate(ctx);
|
2021-09-30 22:29:00 +03:00
|
|
|
}
|
|
|
|
|
2021-02-08 08:46:19 +03:00
|
|
|
if (pkt->pkt_has_cof) {
|
2021-04-09 04:07:33 +03:00
|
|
|
gen_end_tb(ctx);
|
2021-02-08 08:46:19 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx)
|
|
|
|
{
|
|
|
|
uint32_t words[PACKET_WORDS_MAX];
|
|
|
|
int nwords;
|
|
|
|
Packet pkt;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
nwords = read_packet_words(env, ctx, words);
|
|
|
|
if (!nwords) {
|
2021-04-09 04:07:33 +03:00
|
|
|
gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET);
|
2021-02-08 08:46:19 +03:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (decode_packet(nwords, words, &pkt, false) > 0) {
|
2022-11-08 19:29:00 +03:00
|
|
|
pkt.pc = ctx->base.pc_next;
|
2021-02-08 08:46:19 +03:00
|
|
|
HEX_DEBUG_PRINT_PKT(&pkt);
|
2022-11-08 19:28:56 +03:00
|
|
|
ctx->pkt = &pkt;
|
|
|
|
gen_start_packet(ctx);
|
2021-02-08 08:46:19 +03:00
|
|
|
for (i = 0; i < pkt.num_insns; i++) {
|
2022-11-08 19:28:56 +03:00
|
|
|
ctx->insn = &pkt.insn[i];
|
|
|
|
gen_insn(ctx);
|
2021-02-08 08:46:19 +03:00
|
|
|
}
|
2022-11-08 19:28:56 +03:00
|
|
|
gen_commit_packet(ctx);
|
2021-02-08 08:46:19 +03:00
|
|
|
ctx->base.pc_next += pkt.encod_pkt_size_in_bytes;
|
|
|
|
} else {
|
2021-04-09 04:07:33 +03:00
|
|
|
gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET);
|
2021-02-08 08:46:19 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void hexagon_tr_init_disas_context(DisasContextBase *dcbase,
|
|
|
|
CPUState *cs)
|
|
|
|
{
|
|
|
|
DisasContext *ctx = container_of(dcbase, DisasContext, base);
|
Hexagon (target/hexagon) Short-circuit packet register writes
In certain cases, we can avoid the overhead of writing to hex_new_value
and write directly to hex_gpr. We add need_commit field to DisasContext
indicating if the end-of-packet commit is needed. If it is not needed,
get_result_gpr() and get_result_gpr_pair() can return hex_gpr.
We pass the ctx->need_commit to helpers when needed.
Finally, we can early-exit from gen_reg_writes during packet commit.
There are a few instructions whose semantics write to the result before
reading all the inputs. Therefore, the idef-parser generated code is
incompatible with short-circuit. We tell idef-parser to skip them.
For debugging purposes, we add a cpu property to turn off short-circuit.
When the short-circuit property is false, we skip the analysis and force
the end-of-packet commit.
Here's a simple example of the TCG generated for
0x004000b4: 0x7800c020 { R0 = #0x1 }
BEFORE:
---- 004000b4
movi_i32 new_r0,$0x1
mov_i32 r0,new_r0
AFTER:
---- 004000b4
movi_i32 r0,$0x1
This patch reintroduces a use of check_for_attrib, so we remove the
G_GNUC_UNUSED added earlier in this series.
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Brian Cain <bcain@quicinc.com>
Message-Id: <20230427230012.3800327-12-tsimpson@quicinc.com>
2023-04-28 02:00:02 +03:00
|
|
|
HexagonCPU *hex_cpu = env_archcpu(cs->env_ptr);
|
2022-11-10 20:49:35 +03:00
|
|
|
uint32_t hex_flags = dcbase->tb->flags;
|
2021-02-08 08:46:19 +03:00
|
|
|
|
|
|
|
ctx->mem_idx = MMU_USER_IDX;
|
|
|
|
ctx->num_packets = 0;
|
|
|
|
ctx->num_insns = 0;
|
2021-09-30 22:29:00 +03:00
|
|
|
ctx->num_hvx_insns = 0;
|
2022-11-08 19:29:05 +03:00
|
|
|
ctx->branch_cond = TCG_COND_NEVER;
|
2022-11-10 20:49:35 +03:00
|
|
|
ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP);
|
Hexagon (target/hexagon) Short-circuit packet register writes
In certain cases, we can avoid the overhead of writing to hex_new_value
and write directly to hex_gpr. We add need_commit field to DisasContext
indicating if the end-of-packet commit is needed. If it is not needed,
get_result_gpr() and get_result_gpr_pair() can return hex_gpr.
We pass the ctx->need_commit to helpers when needed.
Finally, we can early-exit from gen_reg_writes during packet commit.
There are a few instructions whose semantics write to the result before
reading all the inputs. Therefore, the idef-parser generated code is
incompatible with short-circuit. We tell idef-parser to skip them.
For debugging purposes, we add a cpu property to turn off short-circuit.
When the short-circuit property is false, we skip the analysis and force
the end-of-packet commit.
Here's a simple example of the TCG generated for
0x004000b4: 0x7800c020 { R0 = #0x1 }
BEFORE:
---- 004000b4
movi_i32 new_r0,$0x1
mov_i32 r0,new_r0
AFTER:
---- 004000b4
movi_i32 r0,$0x1
This patch reintroduces a use of check_for_attrib, so we remove the
G_GNUC_UNUSED added earlier in this series.
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Brian Cain <bcain@quicinc.com>
Message-Id: <20230427230012.3800327-12-tsimpson@quicinc.com>
2023-04-28 02:00:02 +03:00
|
|
|
ctx->short_circuit = hex_cpu->short_circuit;
|
2021-02-08 08:46:19 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
static void hexagon_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
|
|
|
|
{
|
|
|
|
DisasContext *ctx = container_of(dcbase, DisasContext, base);
|
|
|
|
|
|
|
|
tcg_gen_insn_start(ctx->base.pc_next);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool pkt_crosses_page(CPUHexagonState *env, DisasContext *ctx)
|
|
|
|
{
|
|
|
|
target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
|
|
|
|
bool found_end = false;
|
|
|
|
int nwords;
|
|
|
|
|
|
|
|
for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) {
|
|
|
|
uint32_t word = cpu_ldl_code(env,
|
|
|
|
ctx->base.pc_next + nwords * sizeof(uint32_t));
|
|
|
|
found_end = is_packet_end(word);
|
|
|
|
}
|
|
|
|
uint32_t next_ptr = ctx->base.pc_next + nwords * sizeof(uint32_t);
|
|
|
|
return found_end && next_ptr - page_start >= TARGET_PAGE_SIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void hexagon_tr_translate_packet(DisasContextBase *dcbase, CPUState *cpu)
|
|
|
|
{
|
|
|
|
DisasContext *ctx = container_of(dcbase, DisasContext, base);
|
|
|
|
CPUHexagonState *env = cpu->env_ptr;
|
|
|
|
|
|
|
|
decode_and_translate_packet(env, ctx);
|
|
|
|
|
|
|
|
if (ctx->base.is_jmp == DISAS_NEXT) {
|
|
|
|
target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
|
|
|
|
target_ulong bytes_max = PACKET_WORDS_MAX * sizeof(target_ulong);
|
|
|
|
|
|
|
|
if (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE ||
|
|
|
|
(ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE - bytes_max &&
|
|
|
|
pkt_crosses_page(env, ctx))) {
|
|
|
|
ctx->base.is_jmp = DISAS_TOO_MANY;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The CPU log is used to compare against LLDB single stepping,
|
|
|
|
* so end the TLB after every packet.
|
|
|
|
*/
|
2021-04-09 04:07:32 +03:00
|
|
|
HexagonCPU *hex_cpu = env_archcpu(env);
|
2021-02-08 08:46:19 +03:00
|
|
|
if (hex_cpu->lldb_compat && qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
|
|
|
|
ctx->base.is_jmp = DISAS_TOO_MANY;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void hexagon_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
|
|
|
|
{
|
|
|
|
DisasContext *ctx = container_of(dcbase, DisasContext, base);
|
|
|
|
|
|
|
|
switch (ctx->base.is_jmp) {
|
|
|
|
case DISAS_TOO_MANY:
|
|
|
|
gen_exec_counters(ctx);
|
|
|
|
tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->base.pc_next);
|
2021-07-19 10:02:03 +03:00
|
|
|
tcg_gen_exit_tb(NULL, 0);
|
2021-02-08 08:46:19 +03:00
|
|
|
break;
|
|
|
|
case DISAS_NORETURN:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
g_assert_not_reached();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-04-17 21:29:52 +03:00
|
|
|
static void hexagon_tr_disas_log(const DisasContextBase *dcbase,
|
|
|
|
CPUState *cpu, FILE *logfile)
|
2021-02-08 08:46:19 +03:00
|
|
|
{
|
2022-04-17 21:29:52 +03:00
|
|
|
fprintf(logfile, "IN: %s\n", lookup_symbol(dcbase->pc_first));
|
|
|
|
target_disas(logfile, cpu, dcbase->pc_first, dcbase->tb->size);
|
2021-02-08 08:46:19 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static const TranslatorOps hexagon_tr_ops = {
|
|
|
|
.init_disas_context = hexagon_tr_init_disas_context,
|
|
|
|
.tb_start = hexagon_tr_tb_start,
|
|
|
|
.insn_start = hexagon_tr_insn_start,
|
|
|
|
.translate_insn = hexagon_tr_translate_packet,
|
|
|
|
.tb_stop = hexagon_tr_tb_stop,
|
|
|
|
.disas_log = hexagon_tr_disas_log,
|
|
|
|
};
|
|
|
|
|
2023-01-29 04:19:22 +03:00
|
|
|
void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
|
2022-08-11 23:48:03 +03:00
|
|
|
target_ulong pc, void *host_pc)
|
2021-02-08 08:46:19 +03:00
|
|
|
{
|
|
|
|
DisasContext ctx;
|
|
|
|
|
2022-08-11 23:48:03 +03:00
|
|
|
translator_loop(cs, tb, max_insns, pc, host_pc,
|
|
|
|
&hexagon_tr_ops, &ctx.base);
|
2021-02-08 08:46:19 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
#define NAME_LEN 64
|
|
|
|
static char new_value_names[TOTAL_PER_THREAD_REGS][NAME_LEN];
|
|
|
|
static char reg_written_names[TOTAL_PER_THREAD_REGS][NAME_LEN];
|
|
|
|
static char new_pred_value_names[NUM_PREGS][NAME_LEN];
|
|
|
|
static char store_addr_names[STORES_MAX][NAME_LEN];
|
|
|
|
static char store_width_names[STORES_MAX][NAME_LEN];
|
|
|
|
static char store_val32_names[STORES_MAX][NAME_LEN];
|
|
|
|
static char store_val64_names[STORES_MAX][NAME_LEN];
|
2021-09-30 22:29:00 +03:00
|
|
|
static char vstore_addr_names[VSTORES_MAX][NAME_LEN];
|
|
|
|
static char vstore_size_names[VSTORES_MAX][NAME_LEN];
|
|
|
|
static char vstore_pending_names[VSTORES_MAX][NAME_LEN];
|
2021-02-08 08:46:19 +03:00
|
|
|
|
|
|
|
void hexagon_translate_init(void)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
opcode_init();
|
|
|
|
|
|
|
|
for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
|
|
|
|
hex_gpr[i] = tcg_global_mem_new(cpu_env,
|
|
|
|
offsetof(CPUHexagonState, gpr[i]),
|
|
|
|
hexagon_regnames[i]);
|
|
|
|
|
|
|
|
snprintf(new_value_names[i], NAME_LEN, "new_%s", hexagon_regnames[i]);
|
|
|
|
hex_new_value[i] = tcg_global_mem_new(cpu_env,
|
|
|
|
offsetof(CPUHexagonState, new_value[i]),
|
|
|
|
new_value_names[i]);
|
|
|
|
|
2021-04-09 04:07:44 +03:00
|
|
|
if (HEX_DEBUG) {
|
|
|
|
snprintf(reg_written_names[i], NAME_LEN, "reg_written_%s",
|
|
|
|
hexagon_regnames[i]);
|
|
|
|
hex_reg_written[i] = tcg_global_mem_new(cpu_env,
|
|
|
|
offsetof(CPUHexagonState, reg_written[i]),
|
|
|
|
reg_written_names[i]);
|
|
|
|
}
|
2021-02-08 08:46:19 +03:00
|
|
|
}
|
|
|
|
for (i = 0; i < NUM_PREGS; i++) {
|
|
|
|
hex_pred[i] = tcg_global_mem_new(cpu_env,
|
|
|
|
offsetof(CPUHexagonState, pred[i]),
|
|
|
|
hexagon_prednames[i]);
|
|
|
|
|
|
|
|
snprintf(new_pred_value_names[i], NAME_LEN, "new_pred_%s",
|
|
|
|
hexagon_prednames[i]);
|
|
|
|
hex_new_pred_value[i] = tcg_global_mem_new(cpu_env,
|
|
|
|
offsetof(CPUHexagonState, new_pred_value[i]),
|
|
|
|
new_pred_value_names[i]);
|
|
|
|
}
|
|
|
|
hex_pred_written = tcg_global_mem_new(cpu_env,
|
|
|
|
offsetof(CPUHexagonState, pred_written), "pred_written");
|
|
|
|
hex_this_PC = tcg_global_mem_new(cpu_env,
|
|
|
|
offsetof(CPUHexagonState, this_PC), "this_PC");
|
|
|
|
hex_slot_cancelled = tcg_global_mem_new(cpu_env,
|
|
|
|
offsetof(CPUHexagonState, slot_cancelled), "slot_cancelled");
|
|
|
|
hex_branch_taken = tcg_global_mem_new(cpu_env,
|
|
|
|
offsetof(CPUHexagonState, branch_taken), "branch_taken");
|
|
|
|
hex_pkt_has_store_s1 = tcg_global_mem_new(cpu_env,
|
|
|
|
offsetof(CPUHexagonState, pkt_has_store_s1), "pkt_has_store_s1");
|
|
|
|
hex_dczero_addr = tcg_global_mem_new(cpu_env,
|
|
|
|
offsetof(CPUHexagonState, dczero_addr), "dczero_addr");
|
|
|
|
hex_llsc_addr = tcg_global_mem_new(cpu_env,
|
|
|
|
offsetof(CPUHexagonState, llsc_addr), "llsc_addr");
|
|
|
|
hex_llsc_val = tcg_global_mem_new(cpu_env,
|
|
|
|
offsetof(CPUHexagonState, llsc_val), "llsc_val");
|
|
|
|
hex_llsc_val_i64 = tcg_global_mem_new_i64(cpu_env,
|
|
|
|
offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64");
|
|
|
|
for (i = 0; i < STORES_MAX; i++) {
|
|
|
|
snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i);
|
|
|
|
hex_store_addr[i] = tcg_global_mem_new(cpu_env,
|
|
|
|
offsetof(CPUHexagonState, mem_log_stores[i].va),
|
|
|
|
store_addr_names[i]);
|
|
|
|
|
|
|
|
snprintf(store_width_names[i], NAME_LEN, "store_width_%d", i);
|
|
|
|
hex_store_width[i] = tcg_global_mem_new(cpu_env,
|
|
|
|
offsetof(CPUHexagonState, mem_log_stores[i].width),
|
|
|
|
store_width_names[i]);
|
|
|
|
|
|
|
|
snprintf(store_val32_names[i], NAME_LEN, "store_val32_%d", i);
|
|
|
|
hex_store_val32[i] = tcg_global_mem_new(cpu_env,
|
|
|
|
offsetof(CPUHexagonState, mem_log_stores[i].data32),
|
|
|
|
store_val32_names[i]);
|
|
|
|
|
|
|
|
snprintf(store_val64_names[i], NAME_LEN, "store_val64_%d", i);
|
|
|
|
hex_store_val64[i] = tcg_global_mem_new_i64(cpu_env,
|
|
|
|
offsetof(CPUHexagonState, mem_log_stores[i].data64),
|
|
|
|
store_val64_names[i]);
|
|
|
|
}
|
2021-09-30 22:29:00 +03:00
|
|
|
for (int i = 0; i < VSTORES_MAX; i++) {
|
|
|
|
snprintf(vstore_addr_names[i], NAME_LEN, "vstore_addr_%d", i);
|
|
|
|
hex_vstore_addr[i] = tcg_global_mem_new(cpu_env,
|
|
|
|
offsetof(CPUHexagonState, vstore[i].va),
|
|
|
|
vstore_addr_names[i]);
|
|
|
|
|
|
|
|
snprintf(vstore_size_names[i], NAME_LEN, "vstore_size_%d", i);
|
|
|
|
hex_vstore_size[i] = tcg_global_mem_new(cpu_env,
|
|
|
|
offsetof(CPUHexagonState, vstore[i].size),
|
|
|
|
vstore_size_names[i]);
|
|
|
|
|
|
|
|
snprintf(vstore_pending_names[i], NAME_LEN, "vstore_pending_%d", i);
|
|
|
|
hex_vstore_pending[i] = tcg_global_mem_new(cpu_env,
|
|
|
|
offsetof(CPUHexagonState, vstore_pending[i]),
|
|
|
|
vstore_pending_names[i]);
|
|
|
|
}
|
2021-02-08 08:46:19 +03:00
|
|
|
}
|