2021-02-08 08:46:19 +03:00
|
|
|
/*
|
2024-02-01 13:33:38 +03:00
|
|
|
* Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
|
2021-02-08 08:46:19 +03:00
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef HEXAGON_TRANSLATE_H
|
|
|
|
#define HEXAGON_TRANSLATE_H
|
|
|
|
|
|
|
|
#include "qemu/bitmap.h"
|
2022-02-07 11:27:56 +03:00
|
|
|
#include "qemu/log.h"
|
2021-02-08 08:46:19 +03:00
|
|
|
#include "cpu.h"
|
|
|
|
#include "exec/translator.h"
|
|
|
|
#include "tcg/tcg-op.h"
|
2022-11-08 19:28:56 +03:00
|
|
|
#include "insn.h"
|
2021-02-08 08:46:19 +03:00
|
|
|
#include "internal.h"
|
|
|
|
|
|
|
|
typedef struct DisasContext {
|
|
|
|
DisasContextBase base;
|
2022-11-08 19:28:56 +03:00
|
|
|
Packet *pkt;
|
|
|
|
Insn *insn;
|
2022-11-08 19:29:01 +03:00
|
|
|
uint32_t next_PC;
|
2021-02-08 08:46:19 +03:00
|
|
|
uint32_t mem_idx;
|
|
|
|
uint32_t num_packets;
|
|
|
|
uint32_t num_insns;
|
2021-09-30 22:29:00 +03:00
|
|
|
uint32_t num_hvx_insns;
|
2021-02-08 08:46:19 +03:00
|
|
|
int reg_log[REG_WRITES_MAX];
|
|
|
|
int reg_log_idx;
|
|
|
|
DECLARE_BITMAP(regs_written, TOTAL_PER_THREAD_REGS);
|
2023-03-07 05:58:19 +03:00
|
|
|
DECLARE_BITMAP(predicated_regs, TOTAL_PER_THREAD_REGS);
|
2021-02-08 08:46:19 +03:00
|
|
|
int preg_log[PRED_WRITES_MAX];
|
|
|
|
int preg_log_idx;
|
2021-04-09 04:07:34 +03:00
|
|
|
DECLARE_BITMAP(pregs_written, NUM_PREGS);
|
2021-02-08 08:46:19 +03:00
|
|
|
uint8_t store_width[STORES_MAX];
|
2021-04-09 04:07:35 +03:00
|
|
|
bool s1_store_processed;
|
2021-09-30 22:29:00 +03:00
|
|
|
int future_vregs_idx;
|
|
|
|
int future_vregs_num[VECTOR_TEMPS_MAX];
|
|
|
|
int tmp_vregs_idx;
|
|
|
|
int tmp_vregs_num[VECTOR_TEMPS_MAX];
|
|
|
|
int vreg_log[NUM_VREGS];
|
|
|
|
int vreg_log_idx;
|
|
|
|
DECLARE_BITMAP(vregs_updated_tmp, NUM_VREGS);
|
|
|
|
DECLARE_BITMAP(vregs_updated, NUM_VREGS);
|
|
|
|
DECLARE_BITMAP(vregs_select, NUM_VREGS);
|
2023-03-07 05:58:21 +03:00
|
|
|
DECLARE_BITMAP(predicated_future_vregs, NUM_VREGS);
|
|
|
|
DECLARE_BITMAP(predicated_tmp_vregs, NUM_VREGS);
|
2023-04-28 02:00:01 +03:00
|
|
|
DECLARE_BITMAP(vregs_read, NUM_VREGS);
|
2021-09-30 22:29:00 +03:00
|
|
|
int qreg_log[NUM_QREGS];
|
|
|
|
int qreg_log_idx;
|
2023-04-28 02:00:01 +03:00
|
|
|
DECLARE_BITMAP(qregs_read, NUM_QREGS);
|
2021-09-30 22:29:00 +03:00
|
|
|
bool pre_commit;
|
Hexagon (target/hexagon) Short-circuit packet register writes
In certain cases, we can avoid the overhead of writing to hex_new_value
and write directly to hex_gpr. We add need_commit field to DisasContext
indicating if the end-of-packet commit is needed. If it is not needed,
get_result_gpr() and get_result_gpr_pair() can return hex_gpr.
We pass the ctx->need_commit to helpers when needed.
Finally, we can early-exit from gen_reg_writes during packet commit.
There are a few instructions whose semantics write to the result before
reading all the inputs. Therefore, the idef-parser generated code is
incompatible with short-circuit. We tell idef-parser to skip them.
For debugging purposes, we add a cpu property to turn off short-circuit.
When the short-circuit property is false, we skip the analysis and force
the end-of-packet commit.
Here's a simple example of the TCG generated for
0x004000b4: 0x7800c020 { R0 = #0x1 }
BEFORE:
---- 004000b4
movi_i32 new_r0,$0x1
mov_i32 r0,new_r0
AFTER:
---- 004000b4
movi_i32 r0,$0x1
This patch reintroduces a use of check_for_attrib, so we remove the
G_GNUC_UNUSED added earlier in this series.
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Brian Cain <bcain@quicinc.com>
Message-Id: <20230427230012.3800327-12-tsimpson@quicinc.com>
2023-04-28 02:00:02 +03:00
|
|
|
bool need_commit;
|
2022-11-08 19:29:05 +03:00
|
|
|
TCGCond branch_cond;
|
|
|
|
target_ulong branch_dest;
|
2022-11-10 20:49:35 +03:00
|
|
|
bool is_tight_loop;
|
Hexagon (target/hexagon) Short-circuit packet register writes
In certain cases, we can avoid the overhead of writing to hex_new_value
and write directly to hex_gpr. We add need_commit field to DisasContext
indicating if the end-of-packet commit is needed. If it is not needed,
get_result_gpr() and get_result_gpr_pair() can return hex_gpr.
We pass the ctx->need_commit to helpers when needed.
Finally, we can early-exit from gen_reg_writes during packet commit.
There are a few instructions whose semantics write to the result before
reading all the inputs. Therefore, the idef-parser generated code is
incompatible with short-circuit. We tell idef-parser to skip them.
For debugging purposes, we add a cpu property to turn off short-circuit.
When the short-circuit property is false, we skip the analysis and force
the end-of-packet commit.
Here's a simple example of the TCG generated for
0x004000b4: 0x7800c020 { R0 = #0x1 }
BEFORE:
---- 004000b4
movi_i32 new_r0,$0x1
mov_i32 r0,new_r0
AFTER:
---- 004000b4
movi_i32 r0,$0x1
This patch reintroduces a use of check_for_attrib, so we remove the
G_GNUC_UNUSED added earlier in this series.
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Brian Cain <bcain@quicinc.com>
Message-Id: <20230427230012.3800327-12-tsimpson@quicinc.com>
2023-04-28 02:00:02 +03:00
|
|
|
bool short_circuit;
|
2023-04-28 02:00:05 +03:00
|
|
|
bool has_hvx_helper;
|
Hexagon (target/hexagon) Enable more short-circuit packets (scalar core)
Look for read-after-write instead of overlap of reads and writes
Here is an example with overalp but no read-after-write:
0x000200fc: 0x38103876 { R0 = add(R0,R1); R6 = add(R6,R7) }
BEFORE:
---- 00000000000200fc
mov_i32 loc2,$0x0
mov_i32 loc2,r0
add_i32 loc3,loc2,r1
mov_i32 loc2,loc3
mov_i32 loc4,$0x0
mov_i32 loc4,r6
add_i32 loc5,loc4,r7
mov_i32 loc4,loc5
mov_i32 r0,loc2
mov_i32 r6,loc4
AFTER:
---- 00000000000200fc
add_i32 loc2,r0,r1
mov_i32 r0,loc2
add_i32 loc3,r6,r7
mov_i32 r6,loc3
We can also short-circuit packets with .new values by reading from the
real destination instead of the temporary.
0x00020100: 0x78005ff3 { R19 = #0xff
0x00020104: 0x2002e204 if (cmp.eq(N19.new,R2)) jump:t PC+8 }
BEFORE:
---- 0000000000020100
mov_i32 pc,$0x20108
mov_i32 loc8,$0x0
mov_i32 loc8,$0xff
setcond_i32 loc10,loc8,r2,eq
mov_i32 loc6,loc10
mov_i32 r19,loc8
add_i32 pkt_cnt,pkt_cnt,$0x2
add_i32 insn_cnt,insn_cnt,$0x4
brcond_i32 loc6,$0x0,eq,$L1
goto_tb $0x0
mov_i32 pc,$0x20108
exit_tb $0x7fbb54000040
set_label $L1
goto_tb $0x1
exit_tb $0x7fbb54000041
set_label $L0
exit_tb $0x7fbb54000043
AFTER:
---- 0000000000020100
mov_i32 pc,$0x20108
mov_i32 r19,$0xff
setcond_i32 loc7,r19,r2,eq
mov_i32 loc4,loc7
add_i32 pkt_cnt,pkt_cnt,$0x2
add_i32 insn_cnt,insn_cnt,$0x4
brcond_i32 loc4,$0x0,eq,$L1
goto_tb $0x0
mov_i32 pc,$0x20108
exit_tb $0x7f9764000040
set_label $L1
goto_tb $0x1
exit_tb $0x7f9764000041
set_label $L0
exit_tb $0x7f9764000043
Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com>
Reviewed-by: Brian Cain <bcain@quicinc.com>
Message-Id: <20240201103340.119081-3-ltaylorsimpson@gmail.com>
Signed-off-by: Brian Cain <bcain@quicinc.com>
2024-02-01 13:33:39 +03:00
|
|
|
bool read_after_write;
|
2023-04-28 02:00:08 +03:00
|
|
|
TCGv new_value[TOTAL_PER_THREAD_REGS];
|
2023-04-28 02:00:09 +03:00
|
|
|
TCGv new_pred_value[NUM_PREGS];
|
2023-04-28 02:00:10 +03:00
|
|
|
TCGv pred_written;
|
2023-04-28 02:00:12 +03:00
|
|
|
TCGv branch_taken;
|
|
|
|
TCGv dczero_addr;
|
2021-02-08 08:46:19 +03:00
|
|
|
} DisasContext;
|
|
|
|
|
2024-02-01 13:33:38 +03:00
|
|
|
bool is_gather_store_insn(DisasContext *ctx);
|
|
|
|
|
2023-03-07 05:58:19 +03:00
|
|
|
static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
|
2021-02-08 08:46:19 +03:00
|
|
|
{
|
2023-03-07 05:58:19 +03:00
|
|
|
if (!test_bit(pnum, ctx->pregs_written)) {
|
|
|
|
ctx->preg_log[ctx->preg_log_idx] = pnum;
|
|
|
|
ctx->preg_log_idx++;
|
|
|
|
set_bit(pnum, ctx->pregs_written);
|
2021-02-08 08:46:19 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-28 02:00:01 +03:00
|
|
|
static inline void ctx_log_pred_read(DisasContext *ctx, int pnum)
|
|
|
|
{
|
Hexagon (target/hexagon) Enable more short-circuit packets (scalar core)
Look for read-after-write instead of overlap of reads and writes
Here is an example with overalp but no read-after-write:
0x000200fc: 0x38103876 { R0 = add(R0,R1); R6 = add(R6,R7) }
BEFORE:
---- 00000000000200fc
mov_i32 loc2,$0x0
mov_i32 loc2,r0
add_i32 loc3,loc2,r1
mov_i32 loc2,loc3
mov_i32 loc4,$0x0
mov_i32 loc4,r6
add_i32 loc5,loc4,r7
mov_i32 loc4,loc5
mov_i32 r0,loc2
mov_i32 r6,loc4
AFTER:
---- 00000000000200fc
add_i32 loc2,r0,r1
mov_i32 r0,loc2
add_i32 loc3,r6,r7
mov_i32 r6,loc3
We can also short-circuit packets with .new values by reading from the
real destination instead of the temporary.
0x00020100: 0x78005ff3 { R19 = #0xff
0x00020104: 0x2002e204 if (cmp.eq(N19.new,R2)) jump:t PC+8 }
BEFORE:
---- 0000000000020100
mov_i32 pc,$0x20108
mov_i32 loc8,$0x0
mov_i32 loc8,$0xff
setcond_i32 loc10,loc8,r2,eq
mov_i32 loc6,loc10
mov_i32 r19,loc8
add_i32 pkt_cnt,pkt_cnt,$0x2
add_i32 insn_cnt,insn_cnt,$0x4
brcond_i32 loc6,$0x0,eq,$L1
goto_tb $0x0
mov_i32 pc,$0x20108
exit_tb $0x7fbb54000040
set_label $L1
goto_tb $0x1
exit_tb $0x7fbb54000041
set_label $L0
exit_tb $0x7fbb54000043
AFTER:
---- 0000000000020100
mov_i32 pc,$0x20108
mov_i32 r19,$0xff
setcond_i32 loc7,r19,r2,eq
mov_i32 loc4,loc7
add_i32 pkt_cnt,pkt_cnt,$0x2
add_i32 insn_cnt,insn_cnt,$0x4
brcond_i32 loc4,$0x0,eq,$L1
goto_tb $0x0
mov_i32 pc,$0x20108
exit_tb $0x7f9764000040
set_label $L1
goto_tb $0x1
exit_tb $0x7f9764000041
set_label $L0
exit_tb $0x7f9764000043
Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com>
Reviewed-by: Brian Cain <bcain@quicinc.com>
Message-Id: <20240201103340.119081-3-ltaylorsimpson@gmail.com>
Signed-off-by: Brian Cain <bcain@quicinc.com>
2024-02-01 13:33:39 +03:00
|
|
|
if (test_bit(pnum, ctx->pregs_written)) {
|
|
|
|
ctx->read_after_write = true;
|
|
|
|
}
|
2023-04-28 02:00:01 +03:00
|
|
|
}
|
|
|
|
|
2024-02-01 13:33:38 +03:00
|
|
|
static inline void ctx_log_pred_read_new(DisasContext *ctx, int pnum)
|
|
|
|
{
|
|
|
|
g_assert(test_bit(pnum, ctx->pregs_written));
|
|
|
|
}
|
|
|
|
|
2023-03-07 05:58:19 +03:00
|
|
|
static inline void ctx_log_reg_write(DisasContext *ctx, int rnum,
|
|
|
|
bool is_predicated)
|
2021-02-08 08:46:19 +03:00
|
|
|
{
|
2023-03-07 05:58:19 +03:00
|
|
|
if (rnum == HEX_REG_P3_0_ALIASED) {
|
|
|
|
for (int i = 0; i < NUM_PREGS; i++) {
|
|
|
|
ctx_log_pred_write(ctx, i);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (!test_bit(rnum, ctx->regs_written)) {
|
|
|
|
ctx->reg_log[ctx->reg_log_idx] = rnum;
|
|
|
|
ctx->reg_log_idx++;
|
|
|
|
set_bit(rnum, ctx->regs_written);
|
|
|
|
}
|
|
|
|
if (is_predicated) {
|
|
|
|
set_bit(rnum, ctx->predicated_regs);
|
|
|
|
}
|
|
|
|
}
|
2021-02-08 08:46:19 +03:00
|
|
|
}
|
|
|
|
|
2023-03-07 05:58:19 +03:00
|
|
|
static inline void ctx_log_reg_write_pair(DisasContext *ctx, int rnum,
|
|
|
|
bool is_predicated)
|
2021-02-08 08:46:19 +03:00
|
|
|
{
|
2023-03-07 05:58:19 +03:00
|
|
|
ctx_log_reg_write(ctx, rnum, is_predicated);
|
|
|
|
ctx_log_reg_write(ctx, rnum + 1, is_predicated);
|
2021-02-08 08:46:19 +03:00
|
|
|
}
|
|
|
|
|
2023-04-28 02:00:01 +03:00
|
|
|
static inline void ctx_log_reg_read(DisasContext *ctx, int rnum)
|
|
|
|
{
|
Hexagon (target/hexagon) Enable more short-circuit packets (scalar core)
Look for read-after-write instead of overlap of reads and writes
Here is an example with overalp but no read-after-write:
0x000200fc: 0x38103876 { R0 = add(R0,R1); R6 = add(R6,R7) }
BEFORE:
---- 00000000000200fc
mov_i32 loc2,$0x0
mov_i32 loc2,r0
add_i32 loc3,loc2,r1
mov_i32 loc2,loc3
mov_i32 loc4,$0x0
mov_i32 loc4,r6
add_i32 loc5,loc4,r7
mov_i32 loc4,loc5
mov_i32 r0,loc2
mov_i32 r6,loc4
AFTER:
---- 00000000000200fc
add_i32 loc2,r0,r1
mov_i32 r0,loc2
add_i32 loc3,r6,r7
mov_i32 r6,loc3
We can also short-circuit packets with .new values by reading from the
real destination instead of the temporary.
0x00020100: 0x78005ff3 { R19 = #0xff
0x00020104: 0x2002e204 if (cmp.eq(N19.new,R2)) jump:t PC+8 }
BEFORE:
---- 0000000000020100
mov_i32 pc,$0x20108
mov_i32 loc8,$0x0
mov_i32 loc8,$0xff
setcond_i32 loc10,loc8,r2,eq
mov_i32 loc6,loc10
mov_i32 r19,loc8
add_i32 pkt_cnt,pkt_cnt,$0x2
add_i32 insn_cnt,insn_cnt,$0x4
brcond_i32 loc6,$0x0,eq,$L1
goto_tb $0x0
mov_i32 pc,$0x20108
exit_tb $0x7fbb54000040
set_label $L1
goto_tb $0x1
exit_tb $0x7fbb54000041
set_label $L0
exit_tb $0x7fbb54000043
AFTER:
---- 0000000000020100
mov_i32 pc,$0x20108
mov_i32 r19,$0xff
setcond_i32 loc7,r19,r2,eq
mov_i32 loc4,loc7
add_i32 pkt_cnt,pkt_cnt,$0x2
add_i32 insn_cnt,insn_cnt,$0x4
brcond_i32 loc4,$0x0,eq,$L1
goto_tb $0x0
mov_i32 pc,$0x20108
exit_tb $0x7f9764000040
set_label $L1
goto_tb $0x1
exit_tb $0x7f9764000041
set_label $L0
exit_tb $0x7f9764000043
Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com>
Reviewed-by: Brian Cain <bcain@quicinc.com>
Message-Id: <20240201103340.119081-3-ltaylorsimpson@gmail.com>
Signed-off-by: Brian Cain <bcain@quicinc.com>
2024-02-01 13:33:39 +03:00
|
|
|
if (test_bit(rnum, ctx->regs_written)) {
|
|
|
|
ctx->read_after_write = true;
|
|
|
|
}
|
2023-04-28 02:00:01 +03:00
|
|
|
}
|
|
|
|
|
2024-02-01 13:33:38 +03:00
|
|
|
static inline void ctx_log_reg_read_new(DisasContext *ctx, int rnum)
|
|
|
|
{
|
|
|
|
g_assert(test_bit(rnum, ctx->regs_written));
|
|
|
|
}
|
|
|
|
|
2023-04-28 02:00:01 +03:00
|
|
|
static inline void ctx_log_reg_read_pair(DisasContext *ctx, int rnum)
|
|
|
|
{
|
|
|
|
ctx_log_reg_read(ctx, rnum);
|
|
|
|
ctx_log_reg_read(ctx, rnum + 1);
|
|
|
|
}
|
|
|
|
|
2021-09-30 22:29:00 +03:00
|
|
|
intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum,
|
|
|
|
int num, bool alloc_ok);
|
|
|
|
intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum,
|
|
|
|
int num, bool alloc_ok);
|
|
|
|
|
|
|
|
static inline void ctx_log_vreg_write(DisasContext *ctx,
|
|
|
|
int rnum, VRegWriteType type,
|
|
|
|
bool is_predicated)
|
|
|
|
{
|
|
|
|
if (type != EXT_TMP) {
|
2023-03-07 05:58:28 +03:00
|
|
|
if (!test_bit(rnum, ctx->vregs_updated)) {
|
|
|
|
ctx->vreg_log[ctx->vreg_log_idx] = rnum;
|
|
|
|
ctx->vreg_log_idx++;
|
|
|
|
set_bit(rnum, ctx->vregs_updated);
|
|
|
|
}
|
2021-09-30 22:29:00 +03:00
|
|
|
|
|
|
|
set_bit(rnum, ctx->vregs_updated);
|
2023-03-07 05:58:21 +03:00
|
|
|
if (is_predicated) {
|
|
|
|
set_bit(rnum, ctx->predicated_future_vregs);
|
|
|
|
}
|
2021-09-30 22:29:00 +03:00
|
|
|
}
|
|
|
|
if (type == EXT_NEW) {
|
|
|
|
set_bit(rnum, ctx->vregs_select);
|
|
|
|
}
|
|
|
|
if (type == EXT_TMP) {
|
|
|
|
set_bit(rnum, ctx->vregs_updated_tmp);
|
2023-03-07 05:58:21 +03:00
|
|
|
if (is_predicated) {
|
|
|
|
set_bit(rnum, ctx->predicated_tmp_vregs);
|
|
|
|
}
|
2021-09-30 22:29:00 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void ctx_log_vreg_write_pair(DisasContext *ctx,
|
|
|
|
int rnum, VRegWriteType type,
|
|
|
|
bool is_predicated)
|
|
|
|
{
|
|
|
|
ctx_log_vreg_write(ctx, rnum ^ 0, type, is_predicated);
|
|
|
|
ctx_log_vreg_write(ctx, rnum ^ 1, type, is_predicated);
|
|
|
|
}
|
|
|
|
|
2023-04-28 02:00:01 +03:00
|
|
|
static inline void ctx_log_vreg_read(DisasContext *ctx, int rnum)
|
|
|
|
{
|
|
|
|
set_bit(rnum, ctx->vregs_read);
|
|
|
|
}
|
|
|
|
|
2024-02-01 13:33:38 +03:00
|
|
|
static inline void ctx_log_vreg_read_new(DisasContext *ctx, int rnum)
|
|
|
|
{
|
|
|
|
g_assert(is_gather_store_insn(ctx) ||
|
|
|
|
test_bit(rnum, ctx->vregs_updated) ||
|
|
|
|
test_bit(rnum, ctx->vregs_select) ||
|
|
|
|
test_bit(rnum, ctx->vregs_updated_tmp));
|
|
|
|
set_bit(rnum, ctx->vregs_read);
|
|
|
|
}
|
|
|
|
|
2023-04-28 02:00:01 +03:00
|
|
|
static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum)
|
|
|
|
{
|
|
|
|
ctx_log_vreg_read(ctx, rnum ^ 0);
|
|
|
|
ctx_log_vreg_read(ctx, rnum ^ 1);
|
|
|
|
}
|
|
|
|
|
2021-09-30 22:29:00 +03:00
|
|
|
static inline void ctx_log_qreg_write(DisasContext *ctx,
|
2023-03-07 05:58:28 +03:00
|
|
|
int rnum)
|
2021-09-30 22:29:00 +03:00
|
|
|
{
|
|
|
|
ctx->qreg_log[ctx->qreg_log_idx] = rnum;
|
|
|
|
ctx->qreg_log_idx++;
|
|
|
|
}
|
|
|
|
|
2023-04-28 02:00:01 +03:00
|
|
|
static inline void ctx_log_qreg_read(DisasContext *ctx, int qnum)
|
|
|
|
{
|
|
|
|
set_bit(qnum, ctx->qregs_read);
|
|
|
|
}
|
|
|
|
|
2021-02-08 08:46:19 +03:00
|
|
|
extern TCGv hex_gpr[TOTAL_PER_THREAD_REGS];
|
|
|
|
extern TCGv hex_pred[NUM_PREGS];
|
|
|
|
extern TCGv hex_slot_cancelled;
|
2023-04-28 02:00:07 +03:00
|
|
|
extern TCGv hex_new_value_usr;
|
2021-02-08 08:46:19 +03:00
|
|
|
extern TCGv hex_reg_written[TOTAL_PER_THREAD_REGS];
|
|
|
|
extern TCGv hex_store_addr[STORES_MAX];
|
|
|
|
extern TCGv hex_store_width[STORES_MAX];
|
|
|
|
extern TCGv hex_store_val32[STORES_MAX];
|
|
|
|
extern TCGv_i64 hex_store_val64[STORES_MAX];
|
|
|
|
extern TCGv hex_llsc_addr;
|
|
|
|
extern TCGv hex_llsc_val;
|
|
|
|
extern TCGv_i64 hex_llsc_val_i64;
|
2021-09-30 22:29:00 +03:00
|
|
|
extern TCGv hex_vstore_addr[VSTORES_MAX];
|
|
|
|
extern TCGv hex_vstore_size[VSTORES_MAX];
|
|
|
|
extern TCGv hex_vstore_pending[VSTORES_MAX];
|
2021-02-08 08:46:19 +03:00
|
|
|
|
2022-11-08 19:28:56 +03:00
|
|
|
void process_store(DisasContext *ctx, int slot_num);
|
Hexagon (target/hexagon) Reduce manipulation of slot_cancelled
We only need to track slot for predicated stores and predicated HVX
instructions.
Add arguments to the probe helper functions to indicate if the slot
is predicated.
Here is a simple example of the differences in the TCG code generated:
IN:
0x00400094: 0xf900c102 { if (P0) R2 = and(R0,R1) }
BEFORE
---- 00400094
mov_i32 slot_cancelled,$0x0
mov_i32 new_r2,r2
and_i32 tmp0,p0,$0x1
brcond_i32 tmp0,$0x0,eq,$L1
and_i32 tmp0,r0,r1
mov_i32 new_r2,tmp0
br $L2
set_label $L1
or_i32 slot_cancelled,slot_cancelled,$0x8
set_label $L2
mov_i32 r2,new_r2
AFTER
---- 00400094
mov_i32 new_r2,r2
and_i32 tmp0,p0,$0x1
brcond_i32 tmp0,$0x0,eq,$L1
and_i32 tmp0,r0,r1
mov_i32 new_r2,tmp0
br $L2
set_label $L1
set_label $L2
mov_i32 r2,new_r2
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Anton Johansson <anjo@rev.ng>
Message-Id: <20230307025828.1612809-14-tsimpson@quicinc.com>
2023-03-07 05:58:27 +03:00
|
|
|
|
|
|
|
FIELD(PROBE_PKT_SCALAR_STORE_S0, MMU_IDX, 0, 2)
|
|
|
|
FIELD(PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED, 2, 1)
|
|
|
|
|
|
|
|
FIELD(PROBE_PKT_SCALAR_HVX_STORES, HAS_ST0, 0, 1)
|
|
|
|
FIELD(PROBE_PKT_SCALAR_HVX_STORES, HAS_ST1, 1, 1)
|
|
|
|
FIELD(PROBE_PKT_SCALAR_HVX_STORES, HAS_HVX_STORES, 2, 1)
|
|
|
|
FIELD(PROBE_PKT_SCALAR_HVX_STORES, S0_IS_PRED, 3, 1)
|
|
|
|
FIELD(PROBE_PKT_SCALAR_HVX_STORES, S1_IS_PRED, 4, 1)
|
2023-04-05 19:42:10 +03:00
|
|
|
FIELD(PROBE_PKT_SCALAR_HVX_STORES, MMU_IDX, 5, 2)
|
Hexagon (target/hexagon) Reduce manipulation of slot_cancelled
We only need to track slot for predicated stores and predicated HVX
instructions.
Add arguments to the probe helper functions to indicate if the slot
is predicated.
Here is a simple example of the differences in the TCG code generated:
IN:
0x00400094: 0xf900c102 { if (P0) R2 = and(R0,R1) }
BEFORE
---- 00400094
mov_i32 slot_cancelled,$0x0
mov_i32 new_r2,r2
and_i32 tmp0,p0,$0x1
brcond_i32 tmp0,$0x0,eq,$L1
and_i32 tmp0,r0,r1
mov_i32 new_r2,tmp0
br $L2
set_label $L1
or_i32 slot_cancelled,slot_cancelled,$0x8
set_label $L2
mov_i32 r2,new_r2
AFTER
---- 00400094
mov_i32 new_r2,r2
and_i32 tmp0,p0,$0x1
brcond_i32 tmp0,$0x0,eq,$L1
and_i32 tmp0,r0,r1
mov_i32 new_r2,tmp0
br $L2
set_label $L1
set_label $L2
mov_i32 r2,new_r2
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Anton Johansson <anjo@rev.ng>
Message-Id: <20230307025828.1612809-14-tsimpson@quicinc.com>
2023-03-07 05:58:27 +03:00
|
|
|
|
2021-02-08 08:46:19 +03:00
|
|
|
#endif
|