From fc2622f660fea5355565a6734c74f68e65953ac8 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 15:40:49 -0700 Subject: [PATCH 01/44] Hexagon (target/hexagon) Add support for v68/v69/v71/v73 Add support for the ELF flags Move target/hexagon/cpu.[ch] to be v73 Change the compiler flag used by "make check-tcg" The decbin instruction is removed in Hexagon v73, so check the version before trying to compile the instruction. Signed-off-by: Taylor Simpson Reviewed-by: Anton Johansson Message-Id: <20230427224057.3766963-2-tsimpson@quicinc.com> --- configure | 2 +- linux-user/hexagon/target_elf.h | 13 +++++++++---- target/hexagon/README | 8 ++++---- target/hexagon/cpu.c | 14 ++++++++++---- target/hexagon/cpu.h | 4 ++++ tests/tcg/hexagon/Makefile.target | 3 +++ tests/tcg/hexagon/misc.c | 12 ++++++++++++ 7 files changed, 43 insertions(+), 13 deletions(-) diff --git a/configure b/configure index 243e2e0a0d..0c3f7ba62f 100755 --- a/configure +++ b/configure @@ -1858,7 +1858,7 @@ fi : ${cross_cc_armeb="$cross_cc_arm"} : ${cross_cc_cflags_armeb="-mbig-endian"} : ${cross_cc_hexagon="hexagon-unknown-linux-musl-clang"} -: ${cross_cc_cflags_hexagon="-mv67 -O2 -static"} +: ${cross_cc_cflags_hexagon="-mv73 -O2 -static"} : ${cross_cc_cflags_i386="-m32"} : ${cross_cc_cflags_ppc="-m32 -mbig-endian"} : ${cross_cc_cflags_ppc64="-m64 -mbig-endian"} diff --git a/linux-user/hexagon/target_elf.h b/linux-user/hexagon/target_elf.h index b4e9f40527..a0271a0a2a 100644 --- a/linux-user/hexagon/target_elf.h +++ b/linux-user/hexagon/target_elf.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -20,7 +20,7 @@ static inline const char *cpu_get_model(uint32_t eflags) { - /* For now, treat anything newer than v5 as a v67 */ + /* For now, treat anything newer than v5 as a v73 */ /* FIXME - Disable instructions that are newer than the specified arch */ if (eflags == 0x04 || /* v5 */ eflags == 0x05 || /* v55 */ @@ -30,9 +30,14 @@ static inline const char *cpu_get_model(uint32_t eflags) eflags == 0x65 || /* v65 */ eflags == 0x66 || /* v66 */ eflags == 0x67 || /* v67 */ - eflags == 0x8067 /* v67t */ + eflags == 0x8067 || /* v67t */ + eflags == 0x68 || /* v68 */ + eflags == 0x69 || /* v69 */ + eflags == 0x71 || /* v71 */ + eflags == 0x8071 || /* v71t */ + eflags == 0x73 /* v73 */ ) { - return "v67"; + return "v73"; } return "unknown"; } diff --git a/target/hexagon/README b/target/hexagon/README index ebafc78b1c..0f48da9328 100644 --- a/target/hexagon/README +++ b/target/hexagon/README @@ -4,10 +4,10 @@ is a wide vector coprocessor designed for high performance computer vision, image processing, machine learning, and other workloads. The following versions of the Hexagon core are supported - Scalar core: v67 - https://developer.qualcomm.com/downloads/qualcomm-hexagon-v67-programmer-s-reference-manual - HVX extension: v66 - https://developer.qualcomm.com/downloads/qualcomm-hexagon-v66-hvx-programmer-s-reference-manual + Scalar core: v73 + https://developer.qualcomm.com/downloads/qualcomm-hexagon-v73-programmers-reference-manual-rev-aa + HVX extension: v73 + https://developer.qualcomm.com/downloads/qualcomm-hexagon-v73-hvx-programmers-reference-manual-rev-aa We presented an overview of the project at the 2019 KVM Forum. https://kvmforum2019.sched.com/event/Tmwc/qemu-hexagon-automatic-translation-of-the-isa-manual-pseudcode-to-tiny-code-instructions-of-a-vliw-architecture-niccolo-izzo-revng-taylor-simpson-qualcomm-innovation-center diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index ab40cfc283..c78fe25c9f 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -25,9 +25,11 @@ #include "fpu/softfloat-helpers.h" #include "tcg/tcg.h" -static void hexagon_v67_cpu_init(Object *obj) -{ -} +static void hexagon_v67_cpu_init(Object *obj) { } +static void hexagon_v68_cpu_init(Object *obj) { } +static void hexagon_v69_cpu_init(Object *obj) { } +static void hexagon_v71_cpu_init(Object *obj) { } +static void hexagon_v73_cpu_init(Object *obj) { } static ObjectClass *hexagon_cpu_class_by_name(const char *cpu_model) { @@ -382,6 +384,10 @@ static const TypeInfo hexagon_cpu_type_infos[] = { .class_init = hexagon_cpu_class_init, }, DEFINE_CPU(TYPE_HEXAGON_CPU_V67, hexagon_v67_cpu_init), + DEFINE_CPU(TYPE_HEXAGON_CPU_V68, hexagon_v68_cpu_init), + DEFINE_CPU(TYPE_HEXAGON_CPU_V69, hexagon_v69_cpu_init), + DEFINE_CPU(TYPE_HEXAGON_CPU_V71, hexagon_v71_cpu_init), + DEFINE_CPU(TYPE_HEXAGON_CPU_V73, hexagon_v73_cpu_init), }; DEFINE_TYPES(hexagon_cpu_type_infos) diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 81b663ecfb..4d8981d862 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -43,6 +43,10 @@ #define CPU_RESOLVING_TYPE TYPE_HEXAGON_CPU #define TYPE_HEXAGON_CPU_V67 HEXAGON_CPU_TYPE_NAME("v67") +#define TYPE_HEXAGON_CPU_V68 HEXAGON_CPU_TYPE_NAME("v68") +#define TYPE_HEXAGON_CPU_V69 HEXAGON_CPU_TYPE_NAME("v69") +#define TYPE_HEXAGON_CPU_V71 HEXAGON_CPU_TYPE_NAME("v71") +#define TYPE_HEXAGON_CPU_V73 HEXAGON_CPU_TYPE_NAME("v73") #define MMU_USER_IDX 0 diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target index 7c94db4bc4..59b1b074e9 100644 --- a/tests/tcg/hexagon/Makefile.target +++ b/tests/tcg/hexagon/Makefile.target @@ -82,6 +82,9 @@ TESTS += $(HEX_TESTS) usr: usr.c $(CC) $(CFLAGS) -mv67t -O2 -Wno-inline-asm -Wno-expansion-to-defined $< -o $@ $(LDFLAGS) +# Build this test with -mv71 to exercise the CABAC instruction +misc: misc.c + $(CC) $(CFLAGS) -mv71 -O2 $< -o $@ $(LDFLAGS) scatter_gather: CFLAGS += -mhvx vector_add_int: CFLAGS += -mhvx -fvectorize hvx_misc: hvx_misc.c hvx_misc.h diff --git a/tests/tcg/hexagon/misc.c b/tests/tcg/hexagon/misc.c index e126751e3a..4fcbb22795 100644 --- a/tests/tcg/hexagon/misc.c +++ b/tests/tcg/hexagon/misc.c @@ -18,6 +18,8 @@ #include #include +#define CORE_HAS_CABAC (__HEXAGON_ARCH__ <= 71) + typedef unsigned char uint8_t; typedef unsigned short uint16_t; typedef unsigned int uint32_t; @@ -245,6 +247,7 @@ static void check(int val, int expect) } } +#if CORE_HAS_CABAC static void check64(long long val, long long expect) { if (val != expect) { @@ -252,6 +255,7 @@ static void check64(long long val, long long expect) err++; } } +#endif uint32_t init[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; uint32_t array[10]; @@ -286,6 +290,7 @@ static long long creg_pair(int x, int y) return retval; } +#if CORE_HAS_CABAC static long long decbin(long long x, long long y, int *pred) { long long retval; @@ -295,6 +300,7 @@ static long long decbin(long long x, long long y, int *pred) : "r"(x), "r"(y)); return retval; } +#endif /* Check that predicates are auto-and'ed in a packet */ static int auto_and(void) @@ -388,8 +394,10 @@ void test_count_trailing_zeros_ones(void) int main() { int res; +#if CORE_HAS_CABAC long long res64; int pred; +#endif memcpy(array, init, sizeof(array)); S4_storerhnew_rr(array, 4, 0xffff); @@ -505,6 +513,7 @@ int main() res = test_clrtnew(2, 7); check(res, 7); +#if CORE_HAS_CABAC res64 = decbin(0xf0f1f2f3f4f5f6f7LL, 0x7f6f5f4f3f2f1f0fLL, &pred); check64(res64, 0x357980003700010cLL); check(pred, 0); @@ -512,6 +521,9 @@ int main() res64 = decbin(0xfLL, 0x1bLL, &pred); check64(res64, 0x78000100LL); check(pred, 1); +#else + puts("Skipping cabac tests"); +#endif res = auto_and(); check(res, 0); From 406c74f22d457969bdc0b604876d671211cbdaa9 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 15:40:50 -0700 Subject: [PATCH 02/44] Hexagon (target/hexagon) Add v68 scalar instructions The following instructions are added L2_loadw_aq L4_loadd_aq R6_release_at_vi R6_release_st_vi S2_storew_rl_at_vi S4_stored_rl_at_vi S2_storew_rl_st_vi S4_stored_rl_st_vi The release instructions are nop's in qemu. The others behave as loads/stores. The encodings for these instructions changed some "don't care" bits L2_loadw_locked L4_loadd_locked S2_storew_locked S4_stored_locked Signed-off-by: Taylor Simpson Reviewed-by: Anton Johansson Message-Id: <20230427224057.3766963-3-tsimpson@quicinc.com> --- target/hexagon/attribs_def.h.inc | 7 +++++++ target/hexagon/gen_idef_parser_funcs.py | 2 ++ target/hexagon/gen_tcg.h | 18 ++++++++++++++++++ target/hexagon/imported/encode_pp.def | 19 ++++++++++++++----- target/hexagon/imported/ldst.idef | 20 +++++++++++++++++++- target/hexagon/translate.c | 3 +++ 6 files changed, 63 insertions(+), 6 deletions(-) diff --git a/target/hexagon/attribs_def.h.inc b/target/hexagon/attribs_def.h.inc index 9874d1658f..0ddfb45bdf 100644 --- a/target/hexagon/attribs_def.h.inc +++ b/target/hexagon/attribs_def.h.inc @@ -52,6 +52,12 @@ DEF_ATTRIB(REGWRSIZE_4B, "Memory width is 4 bytes", "", "") DEF_ATTRIB(REGWRSIZE_8B, "Memory width is 8 bytes", "", "") DEF_ATTRIB(MEMLIKE, "Memory-like instruction", "", "") DEF_ATTRIB(MEMLIKE_PACKET_RULES, "follows Memory-like packet rules", "", "") +DEF_ATTRIB(RELEASE, "Releases a lock", "", "") +DEF_ATTRIB(ACQUIRE, "Acquires a lock", "", "") + +DEF_ATTRIB(RLS_INNER, "Store release inner visibility", "", "") +DEF_ATTRIB(RLS_ALL_THREAD, "Store release among all threads", "", "") +DEF_ATTRIB(RLS_SAME_THREAD, "Store release with the same thread", "", "") /* V6 Vector attributes */ DEF_ATTRIB(CVI, "Executes on the HVX extension", "", "") @@ -74,6 +80,7 @@ DEF_ATTRIB(CVI_SCATTER_RELEASE, "CVI Store Release for scatter", "", "") DEF_ATTRIB(CVI_TMP_DST, "CVI instruction that doesn't write a register", "", "") DEF_ATTRIB(CVI_SLOT23, "Can execute in slot 2 or slot 3 (HVX)", "", "") +DEF_ATTRIB(VTCM_ALLBANK_ACCESS, "Allocates in all VTCM schedulers.", "", "") /* Change-of-flow attributes */ DEF_ATTRIB(JUMP, "Jump-type instruction", "", "") diff --git a/target/hexagon/gen_idef_parser_funcs.py b/target/hexagon/gen_idef_parser_funcs.py index afe68bdb6f..dc9e396b52 100644 --- a/target/hexagon/gen_idef_parser_funcs.py +++ b/target/hexagon/gen_idef_parser_funcs.py @@ -109,6 +109,8 @@ def main(): continue if "A_COF" in hex_common.attribdict[tag]: continue + if ( tag.startswith('R6_release_') ): + continue regs = tagregs[tag] imms = tagimms[tag] diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index 329e7a1024..598d80d3ce 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -1236,6 +1236,24 @@ uiV = uiV; \ } while (0) +#define fGEN_TCG_L2_loadw_aq(SHORTCODE) SHORTCODE +#define fGEN_TCG_L4_loadd_aq(SHORTCODE) SHORTCODE + +/* Nothing to do for these in qemu, need to suppress compiler warnings */ +#define fGEN_TCG_R6_release_at_vi(SHORTCODE) \ + do { \ + RsV = RsV; \ + } while (0) +#define fGEN_TCG_R6_release_st_vi(SHORTCODE) \ + do { \ + RsV = RsV; \ + } while (0) + +#define fGEN_TCG_S2_storew_rl_at_vi(SHORTCODE) SHORTCODE +#define fGEN_TCG_S4_stored_rl_at_vi(SHORTCODE) SHORTCODE +#define fGEN_TCG_S2_storew_rl_st_vi(SHORTCODE) SHORTCODE +#define fGEN_TCG_S4_stored_rl_st_vi(SHORTCODE) SHORTCODE + #define fGEN_TCG_J2_trap0(SHORTCODE) \ do { \ uiV = uiV; \ diff --git a/target/hexagon/imported/encode_pp.def b/target/hexagon/imported/encode_pp.def index d71c04cd30..763f465bfd 100644 --- a/target/hexagon/imported/encode_pp.def +++ b/target/hexagon/imported/encode_pp.def @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -382,14 +382,23 @@ DEF_ENC32(L4_return_fnew_pt, ICLASS_LD" 011 0 000 sssss PP1110vv ---ddddd") DEF_ENC32(L4_return_tnew_pnt, ICLASS_LD" 011 0 000 sssss PP0010vv ---ddddd") DEF_ENC32(L4_return_fnew_pnt, ICLASS_LD" 011 0 000 sssss PP1010vv ---ddddd") -DEF_ENC32(L2_loadw_locked,ICLASS_LD" 001 0 000 sssss PP00---- -00ddddd") +DEF_ENC32(L2_loadw_locked,ICLASS_LD" 001 0 000 sssss PP000--- 000ddddd") +DEF_ENC32(L2_loadw_aq, ICLASS_LD" 001 0 000 sssss PP001--- 000ddddd") +DEF_ENC32(L4_loadd_aq, ICLASS_LD" 001 0 000 sssss PP011--- 000ddddd") +DEF_ENC32(R6_release_at_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --0011dd") +DEF_ENC32(R6_release_st_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --1011dd") +DEF_ENC32(S2_storew_rl_at_vi, ICLASS_ST" 000 01 01sssss PP-ttttt --0010dd") +DEF_ENC32(S2_storew_rl_st_vi, ICLASS_ST" 000 01 01sssss PP-ttttt --1010dd") -DEF_ENC32(L4_loadd_locked,ICLASS_LD" 001 0 000 sssss PP01---- -00ddddd") +DEF_ENC32(S4_stored_rl_at_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --0010dd") +DEF_ENC32(S4_stored_rl_st_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --1010dd") + +DEF_ENC32(L4_loadd_locked,ICLASS_LD" 001 0 000 sssss PP010--- 000ddddd") DEF_EXT_SPACE(EXTRACTW, ICLASS_LD" 001 0 000 iiiii PP0iiiii -01iiiii") DEF_ENC32(Y2_dcfetchbo, ICLASS_LD" 010 0 000 sssss PP0--iii iiiiiiii") @@ -479,8 +488,8 @@ STD_PST_ENC(rinew, "1 101","10ttt") /* x bus/cache */ /* x store/cache */ DEF_ENC32(S2_allocframe, ICLASS_ST" 000 01 00xxxxx PP000iii iiiiiiii") -DEF_ENC32(S2_storew_locked,ICLASS_ST" 000 01 01sssss PP-ttttt ------dd") -DEF_ENC32(S4_stored_locked,ICLASS_ST" 000 01 11sssss PP0ttttt ------dd") +DEF_ENC32(S2_storew_locked,ICLASS_ST" 000 01 01sssss PP-ttttt ----00dd") +DEF_ENC32(S4_stored_locked,ICLASS_ST" 000 01 11sssss PP0ttttt ----00dd") DEF_ENC32(Y2_dczeroa, ICLASS_ST" 000 01 10sssss PP0----- --------") diff --git a/target/hexagon/imported/ldst.idef b/target/hexagon/imported/ldst.idef index 237634bdd9..53198176a9 100644 --- a/target/hexagon/imported/ldst.idef +++ b/target/hexagon/imported/ldst.idef @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -128,6 +128,24 @@ Q6INSN(S2_allocframe,"allocframe(Rx32,#u11:3):raw", ATTRIBS(A_REGWRSIZE_8B,A_MEM #define A_RETURN A_RESTRICT_COF_MAX1,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOSLOT1_STORE,A_RET_TYPE,A_DEALLOCRET +/**** Load Acquire Store Release Instructions****/ + + + +Q6INSN(L2_loadw_aq,"Rd32=memw_aq(Rs32)",ATTRIBS(A_REGWRSIZE_4B,A_ACQUIRE,A_RESTRICT_SLOT0ONLY,A_MEMSIZE_4B,A_LOAD),"Load Acquire Word", +{ fEA_REG(RsV); fLOAD(1,4,u,EA,RdV); }) +Q6INSN(L4_loadd_aq,"Rdd32=memd_aq(Rs32)",ATTRIBS(A_REGWRSIZE_8B,A_ACQUIRE,A_RESTRICT_SLOT0ONLY,A_MEMSIZE_8B,A_LOAD),"Load Acquire Double integer", +{ fEA_REG(RsV); fLOAD(1,8,u,EA,RddV); }) + +Q6INSN(R6_release_at_vi,"release(Rs32):at",ATTRIBS(A_MEMSIZE_0B,A_RELEASE,A_STORE,A_VTCM_ALLBANK_ACCESS,A_RLS_INNER,A_RLS_ALL_THREAD,A_RESTRICT_NOPACKET,A_RESTRICT_SLOT0ONLY), "Release lock", {fEA_REG(RsV); fSTORE(1,0,EA,RsV); }) +Q6INSN(R6_release_st_vi,"release(Rs32):st",ATTRIBS(A_MEMSIZE_0B,A_RELEASE,A_STORE,A_VTCM_ALLBANK_ACCESS,A_RLS_INNER,A_RLS_SAME_THREAD,A_RESTRICT_NOPACKET,A_RESTRICT_SLOT0ONLY), "Release lock", {fEA_REG(RsV); fSTORE(1,0,EA,RsV); }) + +Q6INSN(S2_storew_rl_at_vi,"memw_rl(Rs32):at=Rt32",ATTRIBS(A_REGWRSIZE_4B,A_RELEASE,A_VTCM_ALLBANK_ACCESS,A_RLS_INNER,A_RLS_ALL_THREAD,A_RESTRICT_NOPACKET,A_MEMSIZE_4B,A_STORE,A_RESTRICT_SLOT0ONLY),"Store Release Word", { fEA_REG(RsV); fSTORE(1,4,EA,RtV); }) +Q6INSN(S4_stored_rl_at_vi,"memd_rl(Rs32):at=Rtt32",ATTRIBS(A_REGWRSIZE_8B,A_RELEASE,A_VTCM_ALLBANK_ACCESS,A_RLS_INNER,A_RLS_ALL_THREAD,A_RESTRICT_NOPACKET,A_MEMSIZE_8B,A_STORE,A_RESTRICT_SLOT0ONLY),"Store Release Double integer", { fEA_REG(RsV); fSTORE(1,8,EA,RttV); }) + +Q6INSN(S2_storew_rl_st_vi,"memw_rl(Rs32):st=Rt32",ATTRIBS(A_REGWRSIZE_4B,A_RELEASE,A_VTCM_ALLBANK_ACCESS,A_RLS_INNER,A_RLS_SAME_THREAD,A_RESTRICT_NOPACKET,A_MEMSIZE_4B,A_STORE,A_RESTRICT_SLOT0ONLY),"Store Release Word", { fEA_REG(RsV); fSTORE(1,4,EA,RtV); }) +Q6INSN(S4_stored_rl_st_vi,"memd_rl(Rs32):st=Rtt32",ATTRIBS(A_REGWRSIZE_8B,A_RELEASE,A_VTCM_ALLBANK_ACCESS,A_RLS_INNER,A_RLS_SAME_THREAD,A_RESTRICT_NOPACKET,A_MEMSIZE_8B,A_STORE,A_RESTRICT_SLOT0ONLY),"Store Release Double integer", { fEA_REG(RsV); fSTORE(1,8,EA,RttV); }) + Q6INSN(L2_deallocframe,"Rdd32=deallocframe(Rs32):raw", ATTRIBS(A_REGWRSIZE_8B,A_MEMSIZE_8B,A_LOAD,A_DEALLOCFRAME), "Deallocate stack frame", { fHIDE(size8u_t tmp;) fEA_REG(RsV); fLOAD(1,8,u,EA,tmp); diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index cddd7c5db4..01f448a325 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -481,6 +481,9 @@ static void mark_store_width(DisasContext *ctx) uint8_t width = 0; if (GET_ATTRIB(opcode, A_SCALAR_STORE)) { + if (GET_ATTRIB(opcode, A_MEMSIZE_0B)) { + return; + } if (GET_ATTRIB(opcode, A_MEMSIZE_1B)) { width |= 1; } From 860132e29543c6ab747745ceeed875593a355d1f Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 15:40:51 -0700 Subject: [PATCH 03/44] Hexagon (tests/tcg/hexagon) Add v68 scalar tests Signed-off-by: Taylor Simpson Reviewed-by: Anton Johansson Message-Id: <20230427224057.3766963-4-tsimpson@quicinc.com> --- tests/tcg/hexagon/Makefile.target | 2 + tests/tcg/hexagon/v68_scalar.c | 186 ++++++++++++++++++++++++++++++ 2 files changed, 188 insertions(+) create mode 100644 tests/tcg/hexagon/v68_scalar.c diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target index 59b1b074e9..b7529e23bc 100644 --- a/tests/tcg/hexagon/Makefile.target +++ b/tests/tcg/hexagon/Makefile.target @@ -76,6 +76,8 @@ HEX_TESTS += test_vminh HEX_TESTS += test_vpmpyh HEX_TESTS += test_vspliceb +HEX_TESTS += v68_scalar + TESTS += $(HEX_TESTS) # This test has to be compiled for the -mv67t target diff --git a/tests/tcg/hexagon/v68_scalar.c b/tests/tcg/hexagon/v68_scalar.c new file mode 100644 index 0000000000..7a8adb1130 --- /dev/null +++ b/tests/tcg/hexagon/v68_scalar.c @@ -0,0 +1,186 @@ +/* + * Copyright(c) 2023 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include +#include +#include + +/* + * Test the scalar core instructions that are new in v68 + */ + +int err; + +static int buffer32[] = { 1, 2, 3, 4 }; +static long long buffer64[] = { 5, 6, 7, 8 }; + +static void __check32(int line, uint32_t result, uint32_t expect) +{ + if (result != expect) { + printf("ERROR at line %d: 0x%08x != 0x%08x\n", + line, result, expect); + err++; + } +} + +#define check32(RES, EXP) __check32(__LINE__, RES, EXP) + +static void __check64(int line, uint64_t result, uint64_t expect) +{ + if (result != expect) { + printf("ERROR at line %d: 0x%016llx != 0x%016llx\n", + line, result, expect); + err++; + } +} + +#define check64(RES, EXP) __check64(__LINE__, RES, EXP) + +static inline int loadw_aq(int *p) +{ + int res; + asm volatile("%0 = memw_aq(%1)\n\t" + : "=r"(res) : "r"(p)); + return res; +} + +static void test_loadw_aq(void) +{ + int res; + + res = loadw_aq(&buffer32[0]); + check32(res, 1); + res = loadw_aq(&buffer32[1]); + check32(res, 2); +} + +static inline long long loadd_aq(long long *p) +{ + long long res; + asm volatile("%0 = memd_aq(%1)\n\t" + : "=r"(res) : "r"(p)); + return res; +} + +static void test_loadd_aq(void) +{ + long long res; + + res = loadd_aq(&buffer64[2]); + check64(res, 7); + res = loadd_aq(&buffer64[3]); + check64(res, 8); +} + +static inline void release_at(int *p) +{ + asm volatile("release(%0):at\n\t" + : : "r"(p)); +} + +static void test_release_at(void) +{ + release_at(&buffer32[2]); + check64(buffer32[2], 3); + release_at(&buffer32[3]); + check64(buffer32[3], 4); +} + +static inline void release_st(int *p) +{ + asm volatile("release(%0):st\n\t" + : : "r"(p)); +} + +static void test_release_st(void) +{ + release_st(&buffer32[2]); + check64(buffer32[2], 3); + release_st(&buffer32[3]); + check64(buffer32[3], 4); +} + +static inline void storew_rl_at(int *p, int val) +{ + asm volatile("memw_rl(%0):at = %1\n\t" + : : "r"(p), "r"(val) : "memory"); +} + +static void test_storew_rl_at(void) +{ + storew_rl_at(&buffer32[2], 9); + check64(buffer32[2], 9); + storew_rl_at(&buffer32[3], 10); + check64(buffer32[3], 10); +} + +static inline void stored_rl_at(long long *p, long long val) +{ + asm volatile("memd_rl(%0):at = %1\n\t" + : : "r"(p), "r"(val) : "memory"); +} + +static void test_stored_rl_at(void) +{ + stored_rl_at(&buffer64[2], 11); + check64(buffer64[2], 11); + stored_rl_at(&buffer64[3], 12); + check64(buffer64[3], 12); +} + +static inline void storew_rl_st(int *p, int val) +{ + asm volatile("memw_rl(%0):st = %1\n\t" + : : "r"(p), "r"(val) : "memory"); +} + +static void test_storew_rl_st(void) +{ + storew_rl_st(&buffer32[0], 13); + check64(buffer32[0], 13); + storew_rl_st(&buffer32[1], 14); + check64(buffer32[1], 14); +} + +static inline void stored_rl_st(long long *p, long long val) +{ + asm volatile("memd_rl(%0):st = %1\n\t" + : : "r"(p), "r"(val) : "memory"); +} + +static void test_stored_rl_st(void) +{ + stored_rl_st(&buffer64[0], 15); + check64(buffer64[0], 15); + stored_rl_st(&buffer64[1], 15); + check64(buffer64[1], 15); +} + +int main() +{ + test_loadw_aq(); + test_loadd_aq(); + test_release_at(); + test_release_st(); + test_storew_rl_at(); + test_stored_rl_at(); + test_storew_rl_st(); + test_stored_rl_st(); + + puts(err ? "FAIL" : "PASS"); + return err ? 1 : 0; +} From f128c0fe10a64c69c78b7c41e8ba0991e1761ae1 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 15:40:52 -0700 Subject: [PATCH 04/44] Hexagon (target/hexagon) Add v68 HVX instructions The following instructions are added V6_v6mpyvubs10_vxx V6_v6mpyhubs10_vxx V6_v6mpyvubs10 V6_v6mpyhubs10 Signed-off-by: Taylor Simpson Reviewed-by: Anton Johansson Message-Id: <20230427224057.3766963-5-tsimpson@quicinc.com> --- target/hexagon/imported/mmvec/encode_ext.def | 8 +- target/hexagon/imported/mmvec/ext.idef | 281 ++++++++++++++++++- target/hexagon/mmvec/macros.h | 9 +- 3 files changed, 295 insertions(+), 3 deletions(-) diff --git a/target/hexagon/imported/mmvec/encode_ext.def b/target/hexagon/imported/mmvec/encode_ext.def index 6fbbe2c422..b9b62fef8d 100644 --- a/target/hexagon/imported/mmvec/encode_ext.def +++ b/target/hexagon/imported/mmvec/encode_ext.def @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -730,6 +730,8 @@ DEF_ENC(V6_vmaxb, ICLASS_CJ" 1 111 001 vvvvv PP 0 uuuuu 101 ddddd") // DEF_ENC(V6_vsatuwuh, ICLASS_CJ" 1 111 001 vvvvv PP 0 uuuuu 110 ddddd") // DEF_ENC(V6_vdealb4w, ICLASS_CJ" 1 111 001 vvvvv PP 0 uuuuu 111 ddddd") // +DEF_ENC(V6_v6mpyvubs10_vxx, ICLASS_CJ" 1 111 001 vvvvv PP 1 uuuuu 0ii xxxxx") +DEF_ENC(V6_v6mpyhubs10_vxx, ICLASS_CJ" 1 111 001 vvvvv PP 1 uuuuu 1ii xxxxx") DEF_ENC(V6_vmpyowh_rnd, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 000 ddddd") // DEF_ENC(V6_vshuffeb, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 001 ddddd") // @@ -740,6 +742,10 @@ DEF_ENC(V6_vshufoeh, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 101 ddddd") // DEF_ENC(V6_vshufoeb, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 110 ddddd") // DEF_ENC(V6_vcombine, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 111 ddddd") // +DEF_ENC(V6_v6mpyvubs10, ICLASS_CJ" 1 111 010 vvvvv PP 1 uuuuu 0ii ddddd") +DEF_ENC(V6_v6mpyhubs10, ICLASS_CJ" 1 111 010 vvvvv PP 1 uuuuu 1ii ddddd") + + DEF_ENC(V6_vmpyieoh, ICLASS_CJ" 1 111 011 vvvvv PP 0 uuuuu 000 ddddd") // DEF_ENC(V6_vadduwsat, ICLASS_CJ" 1 111 011 vvvvv PP 0 uuuuu 001 ddddd") // DEF_ENC(V6_vsathub, ICLASS_CJ" 1 111 011 vvvvv PP 0 uuuuu 010 ddddd") // diff --git a/target/hexagon/imported/mmvec/ext.idef b/target/hexagon/imported/mmvec/ext.idef index 8ca5a606e1..c0d169fd4f 100644 --- a/target/hexagon/imported/mmvec/ext.idef +++ b/target/hexagon/imported/mmvec/ext.idef @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -116,6 +116,10 @@ ITERATOR_INSN_MPY_SLOT_LATE(WIDTH,TAG, SYNTAX2,DESCR,CODE) EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VX_DV), \ DESCR, DO_FOR_EACH_CODE(WIDTH, CODE)) +#define ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC_VX_FWD(WIDTH,TAG,SYNTAX,DESCR,CODE) \ +EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VX_DV), \ +DESCR, DO_FOR_EACH_CODE(WIDTH, CODE)) + #define ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(WIDTH,TAG,SYNTAX,SYNTAX2,DESCR,CODE) \ ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(WIDTH,TAG,SYNTAX2,DESCR,CODE) @@ -2507,6 +2511,281 @@ EXTINSN(V6_vscattermhw , "vscatter(Rt32,Mu2,Vvv32.w).h=Vw32", ATTRIBS(A_EXTENSIO }) +ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC_VX_FWD(32, v6mpyvubs10_vxx, "Vxx32.w+=v6mpy(Vuu32.ub,Vvv32.b,#u2):v", "", + fHIDE(size2s_t c00;) + fGET10BIT(c00, VvvV.v[0].uw[i], 0) + fHIDE(size2s_t c01;) + fGET10BIT(c01, VvvV.v[0].uw[i], 1) + fHIDE(size2s_t c02;) + fGET10BIT(c02, VvvV.v[0].uw[i], 2) + + fHIDE(size2s_t c10;) + fGET10BIT(c10, VvvV.v[1].uw[i], 0) + fHIDE(size2s_t c11;) + fGET10BIT(c11, VvvV.v[1].uw[i], 1) + fHIDE(size2s_t c12;) + fGET10BIT(c12, VvvV.v[1].uw[i], 2) + + if (uiV == 0) { + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c10); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c11); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c12); + + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c00); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c02); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c10); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c11); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c12); + + } else if (uiV == 1) { + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c00); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c01); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c02); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c10); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c11); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c12); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c00); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c02); + + } else if (uiV == 2) { + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c10); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c12); + + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c00); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c01); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c02); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c10); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c11); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c12); + + } else if (uiV == 3) { + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c00); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c01); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c02); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c10); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c12); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c00); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c01); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c02); + } +) +ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC_VX_FWD(32, v6mpyhubs10_vxx, "Vxx32.w+=v6mpy(Vuu32.ub,Vvv32.b,#u2):h", "", + fHIDE(size2s_t c00;) + fGET10BIT(c00, VvvV.v[0].uw[i], 0) + fHIDE(size2s_t c01;) + fGET10BIT(c01, VvvV.v[0].uw[i], 1) + fHIDE(size2s_t c02;) + fGET10BIT(c02, VvvV.v[0].uw[i], 2) + fHIDE(size2s_t c10;) + fGET10BIT(c10, VvvV.v[1].uw[i], 0) + fHIDE(size2s_t c11;) + fGET10BIT(c11, VvvV.v[1].uw[i], 1) + fHIDE(size2s_t c12;) + fGET10BIT(c12, VvvV.v[1].uw[i], 2) + + if (uiV == 0) { + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c10); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c11); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c12); + + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c00); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c02); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c10); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c11); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c12); + + } else if (uiV == 1) { + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c00); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c01); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c02); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c10); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c11); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c12); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c00); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c02); + + } else if (uiV == 2) { + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c10); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c12); + + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c00); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c01); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c02); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c10); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c11); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c12); + + } else if (uiV == 3) { + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c00); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c01); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c02); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c10); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c12); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c00); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c01); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c02); + } +) + + +ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32, v6mpyvubs10, "Vdd32.w=v6mpy(Vuu32.ub,Vvv32.b,#u2):v", "", + fHIDE(short c00;) + fGET10BIT(c00, VvvV.v[0].uw[i], 0) + fHIDE(short c01;) + fGET10BIT(c01, VvvV.v[0].uw[i], 1) + fHIDE(short c02;) + fGET10BIT(c02, VvvV.v[0].uw[i], 2) + fHIDE(short c10;) + fGET10BIT(c10, VvvV.v[1].uw[i], 0) + fHIDE(short c11;) + fGET10BIT(c11, VvvV.v[1].uw[i], 1) + fHIDE(short c12;) + fGET10BIT(c12, VvvV.v[1].uw[i], 2) + + + + if (uiV == 0) { + VddV.v[1].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c10); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c11); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c12); + + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c00); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c02); + + VddV.v[0].w[i] = fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c10); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c11); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c12); + + } else if (uiV == 1) { + VddV.v[1].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c00); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c01); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c02); + + VddV.v[0].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c10); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c11); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c12); + + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c00); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c02); + + } else if (uiV == 2) { + VddV.v[1].w[i] = fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c10); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c12); + + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c00); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c01); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c02); + + VddV.v[0].w[i] = fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c10); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c11); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c12); + + } else if (uiV == 3) { + VddV.v[1].w[i] = fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c00); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c01); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c02); + + VddV.v[0].w[i] = fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c10); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c12); + + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c00); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c01); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c02); + } +) + +ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32, v6mpyhubs10, "Vdd32.w=v6mpy(Vuu32.ub,Vvv32.b,#u2):h", "", + fHIDE(short c00;) + fGET10BIT(c00, VvvV.v[0].uw[i], 0) + fHIDE(short c01;) + fGET10BIT(c01, VvvV.v[0].uw[i], 1) + fHIDE(short c02;) + fGET10BIT(c02, VvvV.v[0].uw[i], 2) + fHIDE(short c10;) + fGET10BIT(c10, VvvV.v[1].uw[i], 0) + fHIDE(short c11;) + fGET10BIT(c11, VvvV.v[1].uw[i], 1) + fHIDE(short c12;) + fGET10BIT(c12, VvvV.v[1].uw[i], 2) + + if (uiV == 0) { + VddV.v[1].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c10); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c11); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c12); + + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c00); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c02); + + VddV.v[0].w[i] = fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c10); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c11); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c12); + + } else if (uiV == 1) { + VddV.v[1].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c00); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c01); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c02); + + VddV.v[0].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c10); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c11); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c12); + + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c00); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c02); + + } else if (uiV == 2) { + VddV.v[1].w[i] = fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c10); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c12); + + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c00); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c01); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c02); + + VddV.v[0].w[i] = fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c10); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c11); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c12); + + } else if (uiV == 3) { + VddV.v[1].w[i] = fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c00); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c01); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c02); + + VddV.v[0].w[i] = fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c10); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c12); + + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c00); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c01); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c02); + } +) + EXTINSN(V6_vscattermhwq, "if (Qs4) vscatter(Rt32,Mu2,Vvv32.w).h=Vw32", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_SCATTER,A_CVI_VA_DV,A_CVI_VM,A_MEMLIKE), "Scatter halfwords conditional", { diff --git a/target/hexagon/mmvec/macros.h b/target/hexagon/mmvec/macros.h index 1201d778d0..a655634fd1 100644 --- a/target/hexagon/mmvec/macros.h +++ b/target/hexagon/mmvec/macros.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -346,4 +346,11 @@ #define fUARCH_NOTE_PUMP_2X() #define IV1DEAD() + +#define fGET10BIT(COE, VAL, POS) \ + do { \ + COE = (sextract32(VAL, 24 + 2 * POS, 2) << 8) | \ + extract32(VAL, POS * 8, 8); \ + } while (0); + #endif From d636fb70b26ba2f712008a6deafeef55a8fd0a9c Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 15:40:53 -0700 Subject: [PATCH 05/44] Hexagon (tests/tcg/hexagon) Add v68 HVX tests Signed-off-by: Taylor Simpson Reviewed-by: Anton Johansson Message-Id: <20230427224057.3766963-6-tsimpson@quicinc.com> --- tests/tcg/hexagon/Makefile.target | 3 + tests/tcg/hexagon/v68_hvx.c | 90 +++++++++++++++++ tests/tcg/hexagon/v6mpy_ref.c.inc | 161 ++++++++++++++++++++++++++++++ 3 files changed, 254 insertions(+) create mode 100644 tests/tcg/hexagon/v68_hvx.c create mode 100644 tests/tcg/hexagon/v6mpy_ref.c.inc diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target index b7529e23bc..2ee930cf1f 100644 --- a/tests/tcg/hexagon/Makefile.target +++ b/tests/tcg/hexagon/Makefile.target @@ -77,6 +77,7 @@ HEX_TESTS += test_vpmpyh HEX_TESTS += test_vspliceb HEX_TESTS += v68_scalar +HEX_TESTS += v68_hvx TESTS += $(HEX_TESTS) @@ -92,6 +93,8 @@ vector_add_int: CFLAGS += -mhvx -fvectorize hvx_misc: hvx_misc.c hvx_misc.h hvx_misc: CFLAGS += -mhvx hvx_histogram: CFLAGS += -mhvx -Wno-gnu-folding-constant +v68_hvx: v68_hvx.c hvx_misc.h v6mpy_ref.c.inc +v68_hvx: CFLAGS += -mhvx -Wno-unused-function hvx_histogram: hvx_histogram.c hvx_histogram_row.S $(CC) $(CFLAGS) $(CROSS_CC_GUEST_CFLAGS) $^ -o $@ $(LDFLAGS) diff --git a/tests/tcg/hexagon/v68_hvx.c b/tests/tcg/hexagon/v68_hvx.c new file mode 100644 index 0000000000..02718722a3 --- /dev/null +++ b/tests/tcg/hexagon/v68_hvx.c @@ -0,0 +1,90 @@ +/* + * Copyright(c) 2022-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +int err; + +#include "hvx_misc.h" + +MMVector v6mpy_buffer0[BUFSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES))); +MMVector v6mpy_buffer1[BUFSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES))); + +static void init_v6mpy_buffers(void) +{ + int counter0 = 0; + int counter1 = 17; + for (int i = 0; i < BUFSIZE; i++) { + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + v6mpy_buffer0[i].w[j] = counter0++; + v6mpy_buffer1[i].w[j] = counter1++; + } + } +} + +int v6mpy_ref[BUFSIZE][MAX_VEC_SIZE_BYTES / 4] = { +#include "v6mpy_ref.c.inc" +}; + +static void test_v6mpy(void) +{ + void *p00 = buffer0; + void *p01 = v6mpy_buffer0; + void *p10 = buffer1; + void *p11 = v6mpy_buffer1; + void *pout = output; + + memset(expect, 0xff, sizeof(expect)); + memset(output, 0xff, sizeof(expect)); + + for (int i = 0; i < BUFSIZE; i++) { + asm("v2 = vmem(%0 + #0)\n\t" + "v3 = vmem(%1 + #0)\n\t" + "v4 = vmem(%2 + #0)\n\t" + "v5 = vmem(%3 + #0)\n\t" + "v5:4.w = v6mpy(v5:4.ub, v3:2.b, #1):v\n\t" + "vmem(%4 + #0) = v4\n\t" + : : "r"(p00), "r"(p01), "r"(p10), "r"(p11), "r"(pout) + : "v2", "v3", "v4", "v5", "memory"); + p00 += sizeof(MMVector); + p01 += sizeof(MMVector); + p10 += sizeof(MMVector); + p11 += sizeof(MMVector); + pout += sizeof(MMVector); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + expect[i].w[j] = v6mpy_ref[i][j]; + } + } + + check_output_w(__LINE__, BUFSIZE); +} + +int main() +{ + init_buffers(); + init_v6mpy_buffers(); + + test_v6mpy(); + + puts(err ? "FAIL" : "PASS"); + return err ? 1 : 0; +} diff --git a/tests/tcg/hexagon/v6mpy_ref.c.inc b/tests/tcg/hexagon/v6mpy_ref.c.inc new file mode 100644 index 0000000000..8258cddcb1 --- /dev/null +++ b/tests/tcg/hexagon/v6mpy_ref.c.inc @@ -0,0 +1,161 @@ +/* + * Copyright(c) 2021-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +{ 0xffffee11, 0xfffffcca, 0xffffc1b3, 0xffffd0cc, + 0xffffe215, 0xfffff58e, 0xffffaf37, 0xffffc310, + 0xffffd919, 0xfffff152, 0xffff9fbb, 0xffffb854, + 0xffffd31d, 0xfffff016, 0xffff933f, 0xffffb098, + 0xffffd021, 0xfffff1da, 0xffff89c3, 0xffffabdc, + 0xffffd025, 0xfffff69e, 0xffff8347, 0xffffaa20, + 0xffffd329, 0xfffffe62, 0xffff7fcb, 0xffffab64, + 0xffffd92d, 0x00000926, 0xffff7f4f, 0xffffafa8, + }, +{ 0xffffe231, 0x000016ea, 0xffff81d3, 0xffffb6ec, + 0xffffee35, 0x000027ae, 0xffff8757, 0xffffc130, + 0xfffffd39, 0x00003b72, 0xffff8fdb, 0xffffce74, + 0x00000f3d, 0x00005236, 0xffff9b5f, 0xffffdeb8, + 0x00002441, 0x00006bfa, 0xffffa9e3, 0xfffff1fc, + 0x00003c45, 0x000088be, 0xffffbb67, 0x00000840, + 0x00005749, 0x0000a882, 0xffffcfeb, 0xffffe684, + 0x0000494d, 0x00009a46, 0xffffb16f, 0x000002c8, + }, +{ 0xfffff351, 0x0000440a, 0xffff4af3, 0xffff9c0c, + 0xffffef55, 0x000044ce, 0xffff4077, 0xffff9650, + 0xffffee59, 0x00004892, 0xffff38fb, 0xffff9394, + 0xfffff05d, 0x00004f56, 0xffff347f, 0xffff93d8, + 0xfffff561, 0x0000591a, 0xffff3303, 0xffff971c, + 0xfffffd65, 0x000065de, 0xffff3487, 0xffff9d60, + 0x00000869, 0x000075a2, 0xffff390b, 0xffffa6a4, + 0x0000166d, 0x00008866, 0xffff408f, 0xffffb2e8, + }, +{ 0x00002771, 0x00009e2a, 0xffff4b13, 0xffffc22c, + 0x00003b75, 0x0000b6ee, 0xffff5897, 0xffffd470, + 0x00005279, 0x0000d2b2, 0xffff691b, 0xffffe9b4, + 0x00006c7d, 0x0000f176, 0xffff7c9f, 0x000001f8, + 0x00008981, 0x0001133a, 0xffff9323, 0x00001d3c, + 0x0000a985, 0x000137fe, 0xffffaca7, 0x00003b80, + 0x0000cc89, 0x00015fc2, 0xffffc92b, 0xffffe1c4, + 0x0000868d, 0x00011986, 0xffff72af, 0x00000608, + }, +{ 0xfffff891, 0x00008b4a, 0xfffed433, 0xffff674c, + 0xfffffc95, 0x0000940e, 0xfffed1b7, 0xffff6990, + 0x00000399, 0x00009fd2, 0xfffed23b, 0xffff6ed4, + 0x00000d9d, 0x0000ae96, 0xfffed5bf, 0xffff7718, + 0x00001aa1, 0x0000c05a, 0xfffedc43, 0xffff825c, + 0x00002aa5, 0x0000d51e, 0xfffee5c7, 0xffff90a0, + 0x00003da9, 0x0000ece2, 0xfffef24b, 0xffffa1e4, + 0x000053ad, 0x000107a6, 0xffff01cf, 0xffffb628, + }, +{ 0x00006cb1, 0x0001256a, 0xffff1453, 0xffffcd6c, + 0x000088b5, 0x0001462e, 0xffff29d7, 0xffffe7b0, + 0x0000a7b9, 0x000169f2, 0xffff425b, 0x000004f4, + 0x0000c9bd, 0x000190b6, 0xffff5ddf, 0x00002538, + 0x0000eec1, 0x0001ba7a, 0xffff7c63, 0x0000487c, + 0x000116c5, 0x0001e73e, 0xffff9de7, 0x00006ec0, + 0x000141c9, 0x00021702, 0xffffc26b, 0xffffdd04, + 0x0000c3cd, 0x000198c6, 0xffff33ef, 0x00000948, + }, +{ 0xfffffdd1, 0x0000d28a, 0xfffe5d73, 0xffff328c, + 0x000009d5, 0x0000e34e, 0xfffe62f7, 0xffff3cd0, + 0x000018d9, 0x0000f712, 0xfffe6b7b, 0xffff4a14, + 0x00002add, 0x00010dd6, 0xfffe76ff, 0xffff5a58, + 0x00003fe1, 0x0001279a, 0xfffe8583, 0xffff6d9c, + 0x000057e5, 0x0001445e, 0xfffe9707, 0xffff83e0, + 0x000072e9, 0x00016422, 0xfffeab8b, 0xffff9d24, + 0x000090ed, 0x000186e6, 0xfffec30f, 0xffffb968, + }, +{ 0x0000b1f1, 0x0001acaa, 0xfffedd93, 0xffffd8ac, + 0x0000d5f5, 0x0001d56e, 0xfffefb17, 0xfffffaf0, + 0x0000fcf9, 0x00020132, 0xffff1b9b, 0x00002034, + 0x000126fd, 0x00022ff6, 0xffff3f1f, 0x00008b36, + 0x000093c3, 0x00009d80, 0x00009d6d, 0x0000a78a, + 0x0000b4d7, 0x0000c354, 0x0000b801, 0x0000c6de, + 0x0000d4eb, 0x0000e828, 0x0000d195, 0xffffea32, + 0x00000fff, 0x000022fc, 0xfffffc29, 0x00000f86, + }, +{ 0xffffee13, 0xfffffcd0, 0xffffc1bd, 0xffffd0da, + 0xffffe327, 0xfffff6a4, 0xffffb051, 0xffffc42e, + 0xffffd73b, 0xffffef78, 0xffff9de5, 0xffffb682, + 0xffffd24f, 0xffffef4c, 0xffff9279, 0xffffafd6, + 0xffffd063, 0xfffff220, 0xffff8a0d, 0xffffac2a, + 0xffffd177, 0xfffff7f4, 0xffff84a1, 0xffffab7e, + 0xffffd18b, 0xfffffcc8, 0xffff7e35, 0xffffa9d2, + 0xffffd89f, 0x0000089c, 0xffff7ec9, 0xffffaf26, + }, +{ 0xffffe2b3, 0x00001770, 0xffff825d, 0xffffb77a, + 0xffffefc7, 0x00002944, 0xffff88f1, 0xffffc2ce, + 0xfffffbdb, 0x00003a18, 0xffff8e85, 0xffffcd22, + 0x00000eef, 0x000051ec, 0xffff9b19, 0xffffde76, + 0x00002503, 0x00006cc0, 0xffffaaad, 0xfffff2ca, + 0x00003e17, 0x00008a94, 0xffffbd41, 0x00000a1e, + 0x0000562b, 0x0000a768, 0xffffced5, 0xffffe572, + 0x0000493f, 0x00009a3c, 0xffffb169, 0x000002c6, + }, +{ 0xfffff353, 0x00004410, 0xffff4afd, 0xffff9c1a, + 0xfffff067, 0x000045e4, 0xffff4191, 0xffff976e, + 0xffffec7b, 0x000046b8, 0xffff3725, 0xffff91c2, + 0xffffef8f, 0x00004e8c, 0xffff33b9, 0xffff9316, + 0xfffff5a3, 0x00005960, 0xffff334d, 0xffff976a, + 0xfffffeb7, 0x00006734, 0xffff35e1, 0xffff9ebe, + 0x000006cb, 0x00007408, 0xffff3775, 0xffffa512, + 0x000015df, 0x000087dc, 0xffff4009, 0xffffb266, + }, +{ 0x000027f3, 0x00009eb0, 0xffff4b9d, 0xffffc2ba, + 0x00003d07, 0x0000b884, 0xffff5a31, 0xffffd60e, + 0x0000511b, 0x0000d158, 0xffff67c5, 0xffffe862, + 0x00006c2f, 0x0000f12c, 0xffff7c59, 0x000001b6, + 0x00008a43, 0x00011400, 0xffff93ed, 0x00001e0a, + 0x0000ab57, 0x000139d4, 0xffffae81, 0x00003d5e, + 0x0000cb6b, 0x00015ea8, 0xffffc815, 0xffffe0b2, + 0x0000867f, 0x0001197c, 0xffff72a9, 0x00000606, + }, +{ 0xfffff893, 0x00008b50, 0xfffed43d, 0xffff675a, + 0xfffffda7, 0x00009524, 0xfffed2d1, 0xffff6aae, + 0x000001bb, 0x00009df8, 0xfffed065, 0xffff6d02, + 0x00000ccf, 0x0000adcc, 0xfffed4f9, 0xffff7656, + 0x00001ae3, 0x0000c0a0, 0xfffedc8d, 0xffff82aa, + 0x00002bf7, 0x0000d674, 0xfffee721, 0xffff91fe, + 0x00003c0b, 0x0000eb48, 0xfffef0b5, 0xffffa052, + 0x0000531f, 0x0001071c, 0xffff0149, 0xffffb5a6, + }, +{ 0x00006d33, 0x000125f0, 0xffff14dd, 0xffffcdfa, + 0x00008a47, 0x000147c4, 0xffff2b71, 0xffffe94e, + 0x0000a65b, 0x00016898, 0xffff4105, 0x000003a2, + 0x0000c96f, 0x0001906c, 0xffff5d99, 0x000024f6, + 0x0000ef83, 0x0001bb40, 0xffff7d2d, 0x0000494a, + 0x00011897, 0x0001e914, 0xffff9fc1, 0x0000709e, + 0x000140ab, 0x000215e8, 0xffffc155, 0xffffdbf2, + 0x0000c3bf, 0x000198bc, 0xffff33e9, 0x00000946, + }, +{ 0xfffffdd3, 0x0000d290, 0xfffe5d7d, 0xffff329a, + 0x00000ae7, 0x0000e464, 0xfffe6411, 0xffff3dee, + 0x000016fb, 0x0000f538, 0xfffe69a5, 0xffff4842, + 0x00002a0f, 0x00010d0c, 0xfffe7639, 0xffff5996, + 0x00004023, 0x000127e0, 0xfffe85cd, 0xffff6dea, + 0x00005937, 0x000145b4, 0xfffe9861, 0xffff853e, + 0x0000714b, 0x00016288, 0xfffea9f5, 0xffff9b92, + 0x0000905f, 0x0001865c, 0xfffec289, 0xffffb8e6, + }, +{ 0x0000b273, 0x0001ad30, 0xfffede1d, 0xffffd93a, + 0x0000d787, 0x0001d704, 0xfffefcb1, 0xfffffc8e, + 0x0000fb9b, 0x0001ffd8, 0xffff1a45, 0x00001ee2, + 0x000126af, 0x00022fac, 0xffff3ed9, 0x00008af4, + 0x00009485, 0x00009e46, 0x00009e37, 0x0000a858, + 0x0000b6a9, 0x0000c52a, 0x0000b9db, 0x0000c8bc, + 0x0000d3cd, 0x0000e70e, 0x0000d07f, 0xffffe920, + 0x00000ff1, 0x000022f2, 0xfffffc23, 0x00000f84, + }, From b2f20c2c509df87a408b27f1117549177b31410e Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 15:40:54 -0700 Subject: [PATCH 06/44] Hexagon (target/hexagon) Add v69 HVX instructions The following instructions are added V6_vasrvuhubrndsat V6_vasrvuhubsat V6_vasrvwuhrndsat V6_vasrvwuhsat V6_vassign_tmp V6_vcombine_tmp V6_vmpyuhvs Signed-off-by: Taylor Simpson Reviewed-by: Anton Johansson Message-Id: <20230427224057.3766963-7-tsimpson@quicinc.com> --- target/hexagon/attribs_def.h.inc | 8 ++++ target/hexagon/gen_tcg_hvx.h | 12 ++++++ target/hexagon/imported/mmvec/encode_ext.def | 8 ++++ target/hexagon/imported/mmvec/ext.idef | 40 ++++++++++++++++++++ 4 files changed, 68 insertions(+) diff --git a/target/hexagon/attribs_def.h.inc b/target/hexagon/attribs_def.h.inc index 0ddfb45bdf..3bef60bef3 100644 --- a/target/hexagon/attribs_def.h.inc +++ b/target/hexagon/attribs_def.h.inc @@ -69,11 +69,13 @@ DEF_ATTRIB(CVI_VP_VS, "Double vector permute/shft insn executes on HVX", "", "") DEF_ATTRIB(CVI_VX, "Multiply instruction executes on HVX", "", "") DEF_ATTRIB(CVI_VX_DV, "Double vector multiply insn executes on HVX", "", "") DEF_ATTRIB(CVI_VS, "Shift instruction executes on HVX", "", "") +DEF_ATTRIB(CVI_VS_3SRC, "This shift needs to borrow a source register", "", "") DEF_ATTRIB(CVI_VS_VX, "Permute/shift and multiply insn executes on HVX", "", "") DEF_ATTRIB(CVI_VA, "ALU instruction executes on HVX", "", "") DEF_ATTRIB(CVI_VA_DV, "Double vector alu instruction executes on HVX", "", "") DEF_ATTRIB(CVI_4SLOT, "Consumes all the vector execution resources", "", "") DEF_ATTRIB(CVI_TMP, "Transient Memory Load not written to register", "", "") +DEF_ATTRIB(CVI_REMAP, "Register Renaming not written to register file", "", "") DEF_ATTRIB(CVI_GATHER, "CVI Gather operation", "", "") DEF_ATTRIB(CVI_SCATTER, "CVI Scatter operation", "", "") DEF_ATTRIB(CVI_SCATTER_RELEASE, "CVI Store Release for scatter", "", "") @@ -147,6 +149,8 @@ DEF_ATTRIB(L2FETCH, "Instruction is l2fetch type", "", "") DEF_ATTRIB(ICINVA, "icinva", "", "") DEF_ATTRIB(DCCLEANINVA, "dccleaninva", "", "") +DEF_ATTRIB(NO_INTRINSIC, "Don't generate an intrisic", "", "") + /* Documentation Notes */ DEF_ATTRIB(NOTE_CONDITIONAL, "can be conditionally executed", "", "") DEF_ATTRIB(NOTE_NEWVAL_SLOT0, "New-value oprnd must execute on slot 0", "", "") @@ -155,7 +159,11 @@ DEF_ATTRIB(NOTE_NOPACKET, "solo instruction", "", "") DEF_ATTRIB(NOTE_AXOK, "May only be grouped with ALU32 or non-FP XTYPE.", "", "") DEF_ATTRIB(NOTE_LATEPRED, "The predicate can not be used as a .new", "", "") DEF_ATTRIB(NOTE_NVSLOT0, "Can execute only in slot 0 (ST)", "", "") +DEF_ATTRIB(NOTE_NOVP, "Cannot be paired with a HVX permute instruction", "", "") +DEF_ATTRIB(NOTE_VA_UNARY, "Combined with HVX ALU op (must be unary)", "", "") +/* V6 MMVector Notes for Documentation */ +DEF_ATTRIB(NOTE_SHIFT_RESOURCE, "Uses the HVX shift resource.", "", "") /* Restrictions to make note of */ DEF_ATTRIB(RESTRICT_NOSLOT1_STORE, "Packet must not have slot 1 store", "", "") DEF_ATTRIB(RESTRICT_LATEPRED, "Predicate can not be used as a .new.", "", "") diff --git a/target/hexagon/gen_tcg_hvx.h b/target/hexagon/gen_tcg_hvx.h index d4aefe8e3f..8dceead5e5 100644 --- a/target/hexagon/gen_tcg_hvx.h +++ b/target/hexagon/gen_tcg_hvx.h @@ -128,6 +128,18 @@ static inline void assert_vhist_tmp(DisasContext *ctx) tcg_gen_gvec_mov(MO_64, VdV_off, VuV_off, \ sizeof(MMVector), sizeof(MMVector)) +#define fGEN_TCG_V6_vassign_tmp(SHORTCODE) \ + tcg_gen_gvec_mov(MO_64, VdV_off, VuV_off, \ + sizeof(MMVector), sizeof(MMVector)) + +#define fGEN_TCG_V6_vcombine_tmp(SHORTCODE) \ + do { \ + tcg_gen_gvec_mov(MO_64, VddV_off, VvV_off, \ + sizeof(MMVector), sizeof(MMVector)); \ + tcg_gen_gvec_mov(MO_64, VddV_off + sizeof(MMVector), VuV_off, \ + sizeof(MMVector), sizeof(MMVector)); \ + } while (0) + /* Vector conditional move */ #define fGEN_TCG_VEC_CMOV(PRED) \ do { \ diff --git a/target/hexagon/imported/mmvec/encode_ext.def b/target/hexagon/imported/mmvec/encode_ext.def index b9b62fef8d..402438f566 100644 --- a/target/hexagon/imported/mmvec/encode_ext.def +++ b/target/hexagon/imported/mmvec/encode_ext.def @@ -257,6 +257,11 @@ DEF_ENC(V6_vasruhubrndsat, ICLASS_CJ" 1 000 vvv vvttt PP 0 uuuuu 111 ddd DEF_ENC(V6_vasruwuhsat, ICLASS_CJ" 1 000 vvv vvttt PP 1 uuuuu 100 ddddd") // DEF_ENC(V6_vasruhubsat, ICLASS_CJ" 1 000 vvv vvttt PP 1 uuuuu 101 ddddd") // +DEF_ENC(V6_vasrvuhubrndsat,"00011101000vvvvvPP0uuuuu011ddddd") +DEF_ENC(V6_vasrvuhubsat,"00011101000vvvvvPP0uuuuu010ddddd") +DEF_ENC(V6_vasrvwuhrndsat,"00011101000vvvvvPP0uuuuu001ddddd") +DEF_ENC(V6_vasrvwuhsat,"00011101000vvvvvPP0uuuuu000ddddd") + /*************************************************************** * * Group #1, Uses Q6 Rt32 @@ -716,6 +721,7 @@ DEF_ENC(V6_vaddclbw, ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 001 ddddd") // DEF_ENC(V6_vavguw, ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 010 ddddd") // DEF_ENC(V6_vavguwrnd, ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 011 ddddd") // +DEF_ENC(V6_vassign_tmp,"00011110--0---01PP0uuuuu110ddddd") DEF_ENC(V6_vavgb, ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 100 ddddd") // DEF_ENC(V6_vavgbrnd, ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 101 ddddd") // DEF_ENC(V6_vnavgb, ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 110 ddddd") // @@ -741,6 +747,7 @@ DEF_ENC(V6_vshufoh, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 100 ddddd") // DEF_ENC(V6_vshufoeh, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 101 ddddd") // DEF_ENC(V6_vshufoeb, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 110 ddddd") // DEF_ENC(V6_vcombine, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 111 ddddd") // +DEF_ENC(V6_vcombine_tmp,"00011110101vvvvvPP0uuuuu111ddddd") DEF_ENC(V6_v6mpyvubs10, ICLASS_CJ" 1 111 010 vvvvv PP 1 uuuuu 0ii ddddd") DEF_ENC(V6_v6mpyhubs10, ICLASS_CJ" 1 111 010 vvvvv PP 1 uuuuu 1ii ddddd") @@ -795,6 +802,7 @@ DEF_ENC(V6_vrounduhub, ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 011 ddddd") // DEF_ENC(V6_vrounduwuh, ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 100 ddddd") // DEF_ENC(V6_vmpyewuh, ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 101 ddddd") DEF_ENC(V6_vmpyowh, ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 111 ddddd") +DEF_ENC(V6_vmpyuhvs,"00011111110vvvvvPP1uuuuu111ddddd") #endif /* NO MMVEC */ diff --git a/target/hexagon/imported/mmvec/ext.idef b/target/hexagon/imported/mmvec/ext.idef index c0d169fd4f..ead32c243b 100644 --- a/target/hexagon/imported/mmvec/ext.idef +++ b/target/hexagon/imported/mmvec/ext.idef @@ -62,6 +62,9 @@ EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VS), \ DESCR, DO_FOR_EACH_CODE(WIDTH, CODE)) +#define ITERATOR_INSN_SHIFT3_SLOT(WIDTH,TAG,SYNTAX,DESCR,CODE) \ +EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VS,A_CVI_VS_3SRC,A_NOTE_SHIFT_RESOURCE,A_NOTE_NOVP,A_NOTE_VA_UNARY), \ +DESCR, DO_FOR_EACH_CODE(WIDTH, CODE)) #define ITERATOR_INSN_SHIFT_SLOT_VV_LATE(WIDTH,TAG,SYNTAX,DESCR,CODE) \ EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VS), \ @@ -980,6 +983,22 @@ NARROWING_SHIFT(16,vasrhubrndsat,fSETBYTE,ub,h,:rnd:sat,fVSATUB,fVROUND,0x7) NARROWING_SHIFT(16,vasrhbsat,fSETBYTE,b,h,:sat,fVSATB,fVNOROUND,0x7) NARROWING_SHIFT(16,vasrhbrndsat,fSETBYTE,b,h,:rnd:sat,fVSATB,fVROUND,0x7) +#define NARROWING_VECTOR_SHIFT(ITERSIZE,TAG,DSTM,DSTTYPE,SRCTYPE,SRCTYPE2,SYNOPTS,SATFUNC,RNDFUNC,SHAMTMASK) \ +ITERATOR_INSN_SHIFT3_SLOT(ITERSIZE,TAG, \ +"Vd32." #DSTTYPE "=vasr(Vuu32." #SRCTYPE ",Vv32." #SRCTYPE2 ")" #SYNOPTS, \ +"Vector shift by vector right and shuffle", \ + fHIDE(int )shamt = VvV.SRCTYPE2[2*i+0] & SHAMTMASK; \ + DSTM(0,VdV.SRCTYPE[i],SATFUNC(RNDFUNC(VuuV.v[0].SRCTYPE[i],shamt) >> shamt)); \ + shamt = VvV.SRCTYPE2[2*i+1] & SHAMTMASK; \ + DSTM(1,VdV.SRCTYPE[i],SATFUNC(RNDFUNC(VuuV.v[1].SRCTYPE[i],shamt) >> shamt))) + +/* WORD TO HALF*/ +NARROWING_VECTOR_SHIFT(32,vasrvwuhsat,fSETHALF,uh,w,uh,:sat,fVSATUH,fVNOROUND,0xF) +NARROWING_VECTOR_SHIFT(32,vasrvwuhrndsat,fSETHALF,uh,w,uh,:rnd:sat,fVSATUH,fVROUND,0xF) +/* HALF TO BYTE*/ +NARROWING_VECTOR_SHIFT(16,vasrvuhubsat,fSETBYTE,ub,uh,ub,:sat,fVSATUB,fVNOROUND,0x7) +NARROWING_VECTOR_SHIFT(16,vasrvuhubrndsat,fSETBYTE,ub,uh,ub,:rnd:sat,fVSATUB,fVROUND,0x7) + NARROWING_SHIFT_NOV1(16,vasruhubsat,fSETBYTE,ub,uh,:sat,fVSATUB,fVNOROUND,0x7) NARROWING_SHIFT_NOV1(16,vasruhubrndsat,fSETBYTE,ub,uh,:rnd:sat,fVSATUB,fVROUND,0x7) @@ -1364,6 +1383,9 @@ ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vmpyhvsrs,"Vd32=vmpyh(Vu32,Vv32):<<1:rnd:s +ITERATOR_INSN_MPY_SLOT(16,vmpyuhvs, "Vd32.uh=vmpy(Vu32.uh,Vv32.uh):>>16", +"Vector by Vector Unsigned Halfword Multiply with 16 bit rightshift", + VdV.uh[i] = fGETUHALF(1,fMPY16UU(VuV.uh[i],VvV.uh[i]))) ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyhus, "Vdd32=vmpyhus(Vu32,Vv32)","Vdd32.w=vmpy(Vu32.h,Vv32.uh)", @@ -2042,6 +2064,24 @@ ITERATOR_INSN_ANY_SLOT_DOUBLE_VEC(8,vcombine,"Vdd32=vcombine(Vu32,Vv32)", /////////////////////////////////////////////////////////////////////////// +EXTINSN(V6_vcombine_tmp, "Vdd32.tmp=vcombine(Vu32,Vv32)", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_REMAP,A_CVI_TMP,A_NO_INTRINSIC), +"Vector assign tmp, Any two to Vector Pair ", +{ + fHIDE(int i;) + fVFOREACH(8, i) { + VddV.v[0].ub[i] = VvV.ub[i]; + VddV.v[1].ub[i] = VuV.ub[i]; + } +}) + +EXTINSN(V6_vassign_tmp, "Vd32.tmp=Vu32", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_REMAP,A_CVI_TMP,A_NO_INTRINSIC), +"Vector assign tmp, Any two to Vector Pair ", +{ + fHIDE(int i;) + fVFOREACH(32, i) { + VdV.w[i]=VuV.w[i]; + } +}) /********************************************************* * GENERAL PERMUTE NETWORKS From 6c61d4e138a3fb67ca443bb8017ac4049d5457a2 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 15:40:55 -0700 Subject: [PATCH 07/44] Hexagon (tests/tcg/hexagon) Add v69 HVX tests The following instructions are tested V6_vasrvuhubrndsat V6_vasrvuhubsat V6_vasrvwuhrndsat V6_vasrvwuhsat V6_vassign_tmp V6_vcombine_tmp V6_vmpyuhvs Signed-off-by: Taylor Simpson Reviewed-by: Anton Johansson Message-Id: <20230427224057.3766963-8-tsimpson@quicinc.com> --- tests/tcg/hexagon/Makefile.target | 3 + tests/tcg/hexagon/v69_hvx.c | 318 ++++++++++++++++++++++++++++++ 2 files changed, 321 insertions(+) create mode 100644 tests/tcg/hexagon/v69_hvx.c diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target index 2ee930cf1f..558c056148 100644 --- a/tests/tcg/hexagon/Makefile.target +++ b/tests/tcg/hexagon/Makefile.target @@ -78,6 +78,7 @@ HEX_TESTS += test_vspliceb HEX_TESTS += v68_scalar HEX_TESTS += v68_hvx +HEX_TESTS += v69_hvx TESTS += $(HEX_TESTS) @@ -95,6 +96,8 @@ hvx_misc: CFLAGS += -mhvx hvx_histogram: CFLAGS += -mhvx -Wno-gnu-folding-constant v68_hvx: v68_hvx.c hvx_misc.h v6mpy_ref.c.inc v68_hvx: CFLAGS += -mhvx -Wno-unused-function +v69_hvx: v69_hvx.c hvx_misc.h +v69_hvx: CFLAGS += -mhvx -Wno-unused-function hvx_histogram: hvx_histogram.c hvx_histogram_row.S $(CC) $(CFLAGS) $(CROSS_CC_GUEST_CFLAGS) $^ -o $@ $(LDFLAGS) diff --git a/tests/tcg/hexagon/v69_hvx.c b/tests/tcg/hexagon/v69_hvx.c new file mode 100644 index 0000000000..a0d567d142 --- /dev/null +++ b/tests/tcg/hexagon/v69_hvx.c @@ -0,0 +1,318 @@ +/* + * Copyright(c) 2023 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +int err; + +#include "hvx_misc.h" + +#define fVROUND(VAL, SHAMT) \ + ((VAL) + (((SHAMT) > 0) ? (1LL << ((SHAMT) - 1)) : 0)) + +#define fVSATUB(VAL) \ + ((((VAL) & 0xffLL) == (VAL)) ? \ + (VAL) : \ + ((((int32_t)(VAL)) < 0) ? 0 : 0xff)) + +#define fVSATUH(VAL) \ + ((((VAL) & 0xffffLL) == (VAL)) ? \ + (VAL) : \ + ((((int32_t)(VAL)) < 0) ? 0 : 0xffff)) + +static void test_vasrvuhubrndsat(void) +{ + void *p0 = buffer0; + void *p1 = buffer1; + void *pout = output; + + memset(expect, 0xaa, sizeof(expect)); + memset(output, 0xbb, sizeof(output)); + + for (int i = 0; i < BUFSIZE / 2; i++) { + asm("v4 = vmem(%0 + #0)\n\t" + "v5 = vmem(%0 + #1)\n\t" + "v6 = vmem(%1 + #0)\n\t" + "v5.ub = vasr(v5:4.uh, v6.ub):rnd:sat\n\t" + "vmem(%2) = v5\n\t" + : : "r"(p0), "r"(p1), "r"(pout) + : "v4", "v5", "v6", "memory"); + p0 += sizeof(MMVector) * 2; + p1 += sizeof(MMVector); + pout += sizeof(MMVector); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 2; j++) { + int shamt; + uint8_t byte0; + uint8_t byte1; + + shamt = buffer1[i].ub[2 * j + 0] & 0x7; + byte0 = fVSATUB(fVROUND(buffer0[2 * i + 0].uh[j], shamt) >> shamt); + shamt = buffer1[i].ub[2 * j + 1] & 0x7; + byte1 = fVSATUB(fVROUND(buffer0[2 * i + 1].uh[j], shamt) >> shamt); + expect[i].uh[j] = (byte1 << 8) | (byte0 & 0xff); + } + } + + check_output_h(__LINE__, BUFSIZE / 2); +} + +static void test_vasrvuhubsat(void) +{ + void *p0 = buffer0; + void *p1 = buffer1; + void *pout = output; + + memset(expect, 0xaa, sizeof(expect)); + memset(output, 0xbb, sizeof(output)); + + for (int i = 0; i < BUFSIZE / 2; i++) { + asm("v4 = vmem(%0 + #0)\n\t" + "v5 = vmem(%0 + #1)\n\t" + "v6 = vmem(%1 + #0)\n\t" + "v5.ub = vasr(v5:4.uh, v6.ub):sat\n\t" + "vmem(%2) = v5\n\t" + : : "r"(p0), "r"(p1), "r"(pout) + : "v4", "v5", "v6", "memory"); + p0 += sizeof(MMVector) * 2; + p1 += sizeof(MMVector); + pout += sizeof(MMVector); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 2; j++) { + int shamt; + uint8_t byte0; + uint8_t byte1; + + shamt = buffer1[i].ub[2 * j + 0] & 0x7; + byte0 = fVSATUB(buffer0[2 * i + 0].uh[j] >> shamt); + shamt = buffer1[i].ub[2 * j + 1] & 0x7; + byte1 = fVSATUB(buffer0[2 * i + 1].uh[j] >> shamt); + expect[i].uh[j] = (byte1 << 8) | (byte0 & 0xff); + } + } + + check_output_h(__LINE__, BUFSIZE / 2); +} + +static void test_vasrvwuhrndsat(void) +{ + void *p0 = buffer0; + void *p1 = buffer1; + void *pout = output; + + memset(expect, 0xaa, sizeof(expect)); + memset(output, 0xbb, sizeof(output)); + + for (int i = 0; i < BUFSIZE / 2; i++) { + asm("v4 = vmem(%0 + #0)\n\t" + "v5 = vmem(%0 + #1)\n\t" + "v6 = vmem(%1 + #0)\n\t" + "v5.uh = vasr(v5:4.w, v6.uh):rnd:sat\n\t" + "vmem(%2) = v5\n\t" + : : "r"(p0), "r"(p1), "r"(pout) + : "v4", "v5", "v6", "memory"); + p0 += sizeof(MMVector) * 2; + p1 += sizeof(MMVector); + pout += sizeof(MMVector); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + int shamt; + uint16_t half0; + uint16_t half1; + + shamt = buffer1[i].uh[2 * j + 0] & 0xf; + half0 = fVSATUH(fVROUND(buffer0[2 * i + 0].w[j], shamt) >> shamt); + shamt = buffer1[i].uh[2 * j + 1] & 0xf; + half1 = fVSATUH(fVROUND(buffer0[2 * i + 1].w[j], shamt) >> shamt); + expect[i].w[j] = (half1 << 16) | (half0 & 0xffff); + } + } + + check_output_w(__LINE__, BUFSIZE / 2); +} + +static void test_vasrvwuhsat(void) +{ + void *p0 = buffer0; + void *p1 = buffer1; + void *pout = output; + + memset(expect, 0xaa, sizeof(expect)); + memset(output, 0xbb, sizeof(output)); + + for (int i = 0; i < BUFSIZE / 2; i++) { + asm("v4 = vmem(%0 + #0)\n\t" + "v5 = vmem(%0 + #1)\n\t" + "v6 = vmem(%1 + #0)\n\t" + "v5.uh = vasr(v5:4.w, v6.uh):sat\n\t" + "vmem(%2) = v5\n\t" + : : "r"(p0), "r"(p1), "r"(pout) + : "v4", "v5", "v6", "memory"); + p0 += sizeof(MMVector) * 2; + p1 += sizeof(MMVector); + pout += sizeof(MMVector); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + int shamt; + uint16_t half0; + uint16_t half1; + + shamt = buffer1[i].uh[2 * j + 0] & 0xf; + half0 = fVSATUH(buffer0[2 * i + 0].w[j] >> shamt); + shamt = buffer1[i].uh[2 * j + 1] & 0xf; + half1 = fVSATUH(buffer0[2 * i + 1].w[j] >> shamt); + expect[i].w[j] = (half1 << 16) | (half0 & 0xffff); + } + } + + check_output_w(__LINE__, BUFSIZE / 2); +} + +static void test_vassign_tmp(void) +{ + void *p0 = buffer0; + void *pout = output; + + memset(expect, 0xaa, sizeof(expect)); + memset(output, 0xbb, sizeof(output)); + + for (int i = 0; i < BUFSIZE; i++) { + /* + * Assign into v12 as .tmp, then use it in the next packet + * Should get the new value within the same packet and + * the old value in the next packet + */ + asm("v3 = vmem(%0 + #0)\n\t" + "r1 = #1\n\t" + "v12 = vsplat(r1)\n\t" + "r1 = #2\n\t" + "v13 = vsplat(r1)\n\t" + "{\n\t" + " v12.tmp = v13\n\t" + " v4.w = vadd(v12.w, v3.w)\n\t" + "}\n\t" + "v4.w = vadd(v4.w, v12.w)\n\t" + "vmem(%1 + #0) = v4\n\t" + : : "r"(p0), "r"(pout) + : "r1", "v3", "v4", "v12", "v13", "memory"); + p0 += sizeof(MMVector); + pout += sizeof(MMVector); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + expect[i].w[j] = buffer0[i].w[j] + 3; + } + } + + check_output_w(__LINE__, BUFSIZE); +} + +static void test_vcombine_tmp(void) +{ + void *p0 = buffer0; + void *p1 = buffer1; + void *pout = output; + + memset(expect, 0xaa, sizeof(expect)); + memset(output, 0xbb, sizeof(output)); + + for (int i = 0; i < BUFSIZE; i++) { + /* + * Combine into v13:12 as .tmp, then use it in the next packet + * Should get the new value within the same packet and + * the old value in the next packet + */ + asm("v3 = vmem(%0 + #0)\n\t" + "r1 = #1\n\t" + "v12 = vsplat(r1)\n\t" + "r1 = #2\n\t" + "v13 = vsplat(r1)\n\t" + "r1 = #3\n\t" + "v14 = vsplat(r1)\n\t" + "r1 = #4\n\t" + "v15 = vsplat(r1)\n\t" + "{\n\t" + " v13:12.tmp = vcombine(v15, v14)\n\t" + " v4.w = vadd(v12.w, v3.w)\n\t" + " v16 = v13\n\t" + "}\n\t" + "v4.w = vadd(v4.w, v12.w)\n\t" + "v4.w = vadd(v4.w, v13.w)\n\t" + "v4.w = vadd(v4.w, v16.w)\n\t" + "vmem(%2 + #0) = v4\n\t" + : : "r"(p0), "r"(p1), "r"(pout) + : "r1", "v3", "v4", "v12", "v13", "v14", "v15", "v16", "memory"); + p0 += sizeof(MMVector); + p1 += sizeof(MMVector); + pout += sizeof(MMVector); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + expect[i].w[j] = buffer0[i].w[j] + 10; + } + } + + check_output_w(__LINE__, BUFSIZE); +} + +static void test_vmpyuhvs(void) +{ + void *p0 = buffer0; + void *p1 = buffer1; + void *pout = output; + + memset(expect, 0xaa, sizeof(expect)); + memset(output, 0xbb, sizeof(output)); + + for (int i = 0; i < BUFSIZE; i++) { + asm("v4 = vmem(%0 + #0)\n\t" + "v5 = vmem(%1 + #0)\n\t" + "v4.uh = vmpy(V4.uh, v5.uh):>>16\n\t" + "vmem(%2) = v4\n\t" + : : "r"(p0), "r"(p1), "r"(pout) + : "v4", "v5", "memory"); + p0 += sizeof(MMVector); + p1 += sizeof(MMVector); + pout += sizeof(MMVector); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 2; j++) { + expect[i].uh[j] = (buffer0[i].uh[j] * buffer1[i].uh[j]) >> 16; + } + } + + check_output_h(__LINE__, BUFSIZE); +} + +int main() +{ + init_buffers(); + + test_vasrvuhubrndsat(); + test_vasrvuhubsat(); + test_vasrvwuhrndsat(); + test_vasrvwuhsat(); + + test_vassign_tmp(); + test_vcombine_tmp(); + + test_vmpyuhvs(); + + puts(err ? "FAIL" : "PASS"); + return err ? 1 : 0; +} From 59958d8903fbf03209a3f62e36708de969d09a1a Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 15:40:56 -0700 Subject: [PATCH 08/44] Hexagon (target/hexagon) Add v73 scalar instructions The following instructions are added J2_callrh J2_junprh Signed-off-by: Taylor Simpson Reviewed-by: Anton Johansson Message-Id: <20230427224057.3766963-9-tsimpson@quicinc.com> --- target/hexagon/attribs_def.h.inc | 1 + target/hexagon/gen_tcg.h | 4 ++++ target/hexagon/imported/branch.idef | 7 ++++++- target/hexagon/imported/encode_pp.def | 2 ++ 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/target/hexagon/attribs_def.h.inc b/target/hexagon/attribs_def.h.inc index 3bef60bef3..69da9776f0 100644 --- a/target/hexagon/attribs_def.h.inc +++ b/target/hexagon/attribs_def.h.inc @@ -89,6 +89,7 @@ DEF_ATTRIB(JUMP, "Jump-type instruction", "", "") DEF_ATTRIB(INDIRECT, "Absolute register jump", "", "") DEF_ATTRIB(CALL, "Function call instruction", "", "") DEF_ATTRIB(COF, "Change-of-flow instruction", "", "") +DEF_ATTRIB(HINTED_COF, "This instruction is a hinted change-of-flow", "", "") DEF_ATTRIB(CONDEXEC, "May be cancelled by a predicate", "", "") DEF_ATTRIB(DOTNEWVALUE, "Uses a register value generated in this pkt", "", "") DEF_ATTRIB(NEWCMPJUMP, "Compound compare and jump", "", "") diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index 598d80d3ce..6f12f665db 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -653,6 +653,8 @@ gen_call(ctx, riV) #define fGEN_TCG_J2_callr(SHORTCODE) \ gen_callr(ctx, RsV) +#define fGEN_TCG_J2_callrh(SHORTCODE) \ + gen_callr(ctx, RsV) #define fGEN_TCG_J2_callt(SHORTCODE) \ gen_cond_call(ctx, PuV, TCG_COND_EQ, riV) @@ -851,6 +853,8 @@ gen_jump(ctx, riV) #define fGEN_TCG_J2_jumpr(SHORTCODE) \ gen_jumpr(ctx, RsV) +#define fGEN_TCG_J2_jumprh(SHORTCODE) \ + gen_jumpr(ctx, RsV) #define fGEN_TCG_J4_jumpseti(SHORTCODE) \ do { \ tcg_gen_movi_tl(RdV, UiV); \ diff --git a/target/hexagon/imported/branch.idef b/target/hexagon/imported/branch.idef index 88f5f48cce..93e2e375a5 100644 --- a/target/hexagon/imported/branch.idef +++ b/target/hexagon/imported/branch.idef @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -34,6 +34,9 @@ Q6INSN(J2_jump,"jump #r22:2",ATTRIBS(A_JDIR), "direct unconditional jump", Q6INSN(J2_jumpr,"jumpr Rs32",ATTRIBS(A_JINDIR), "indirect unconditional jump", {fJUMPR(RsN,RsV,COF_TYPE_JUMPR);}) +Q6INSN(J2_jumprh,"jumprh Rs32",ATTRIBS(A_JINDIR, A_HINTED_COF), "indirect unconditional jump", +{fJUMPR(RsN,RsV,COF_TYPE_JUMPR);}) + #define OLDCOND_JUMP(TAG,OPER,OPER2,ATTRIB,DESCR,SEMANTICS) \ Q6INSN(TAG##t,"if (Pu4) "OPER":nt "OPER2,ATTRIB,DESCR,{fBRANCH_SPECULATE_STALL(fLSBOLD(PuV),,SPECULATE_NOT_TAKEN,12,0); if (fLSBOLD(PuV)) { SEMANTICS; }}) \ Q6INSN(TAG##f,"if (!Pu4) "OPER":nt "OPER2,ATTRIB,DESCR,{fBRANCH_SPECULATE_STALL(fLSBOLDNOT(PuV),,SPECULATE_NOT_TAKEN,12,0); if (fLSBOLDNOT(PuV)) { SEMANTICS; }}) \ @@ -196,6 +199,8 @@ Q6INSN(J2_callrt,"if (Pu4) callr Rs32",ATTRIBS(CINDIR_STD),"indirect conditional Q6INSN(J2_callrf,"if (!Pu4) callr Rs32",ATTRIBS(CINDIR_STD),"indirect conditional call if false", {fBRANCH_SPECULATE_STALL(fLSBOLDNOT(PuV),,SPECULATE_NOT_TAKEN,12,0);if (fLSBOLDNOT(PuV)) { fCALLR(RsV); }}) +Q6INSN(J2_callrh,"callrh Rs32",ATTRIBS(CINDIR_STD, A_HINTED_COF), "hinted indirect unconditional call", +{ fCALLR(RsV); }) diff --git a/target/hexagon/imported/encode_pp.def b/target/hexagon/imported/encode_pp.def index 763f465bfd..0cd30a5e85 100644 --- a/target/hexagon/imported/encode_pp.def +++ b/target/hexagon/imported/encode_pp.def @@ -524,6 +524,7 @@ DEF_FIELD32(ICLASS_J" 110- -------- PP-!---- --------",J_PT,"Predict-taken") DEF_FIELDROW_DESC32(ICLASS_J" 0000 -------- PP------ --------","[#0] PC=(Rs), R31=return") DEF_ENC32(J2_callr, ICLASS_J" 0000 101sssss PP------ --------") +DEF_ENC32(J2_callrh, ICLASS_J" 0000 110sssss PP------ --------") DEF_FIELDROW_DESC32(ICLASS_J" 0001 -------- PP------ --------","[#1] if (Pu) PC=(Rs), R31=return") DEF_ENC32(J2_callrt, ICLASS_J" 0001 000sssss PP----uu --------") @@ -531,6 +532,7 @@ DEF_ENC32(J2_callrf, ICLASS_J" 0001 001sssss PP----uu --------") DEF_FIELDROW_DESC32(ICLASS_J" 0010 -------- PP------ --------","[#2] PC=(Rs); ") DEF_ENC32(J2_jumpr, ICLASS_J" 0010 100sssss PP------ --------") +DEF_ENC32(J2_jumprh, ICLASS_J" 0010 110sssss PP------ --------") DEF_ENC32(J4_hintjumpr, ICLASS_J" 0010 101sssss PP------ --------") DEF_FIELDROW_DESC32(ICLASS_J" 0011 -------- PP------ --------","[#3] if (Pu) PC=(Rs) ") From 4dd311ed2ee695a86ea77281c20dbeb115403d21 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 15:40:57 -0700 Subject: [PATCH 09/44] Hexagon (tests/tcg/hexagon) Add v73 scalar tests Tests added for the following instructions J2_callrh J2_jumprh Signed-off-by: Taylor Simpson Reviewed-by: Anton Johansson Message-Id: <20230427224057.3766963-10-tsimpson@quicinc.com> --- tests/tcg/hexagon/Makefile.target | 2 + tests/tcg/hexagon/v73_scalar.c | 96 +++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+) create mode 100644 tests/tcg/hexagon/v73_scalar.c diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target index 558c056148..3172f2e4db 100644 --- a/tests/tcg/hexagon/Makefile.target +++ b/tests/tcg/hexagon/Makefile.target @@ -79,6 +79,7 @@ HEX_TESTS += test_vspliceb HEX_TESTS += v68_scalar HEX_TESTS += v68_hvx HEX_TESTS += v69_hvx +HEX_TESTS += v73_scalar TESTS += $(HEX_TESTS) @@ -98,6 +99,7 @@ v68_hvx: v68_hvx.c hvx_misc.h v6mpy_ref.c.inc v68_hvx: CFLAGS += -mhvx -Wno-unused-function v69_hvx: v69_hvx.c hvx_misc.h v69_hvx: CFLAGS += -mhvx -Wno-unused-function +v73_scalar: CFLAGS += -Wno-unused-function hvx_histogram: hvx_histogram.c hvx_histogram_row.S $(CC) $(CFLAGS) $(CROSS_CC_GUEST_CFLAGS) $^ -o $@ $(LDFLAGS) diff --git a/tests/tcg/hexagon/v73_scalar.c b/tests/tcg/hexagon/v73_scalar.c new file mode 100644 index 0000000000..fee67fc531 --- /dev/null +++ b/tests/tcg/hexagon/v73_scalar.c @@ -0,0 +1,96 @@ +/* + * Copyright(c) 2023 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include +#include +#include + +/* + * Test the scalar core instructions that are new in v73 + */ + +int err; + +static void __check32(int line, uint32_t result, uint32_t expect) +{ + if (result != expect) { + printf("ERROR at line %d: 0x%08x != 0x%08x\n", + line, result, expect); + err++; + } +} + +#define check32(RES, EXP) __check32(__LINE__, RES, EXP) + +static void __check64(int line, uint64_t result, uint64_t expect) +{ + if (result != expect) { + printf("ERROR at line %d: 0x%016llx != 0x%016llx\n", + line, result, expect); + err++; + } +} + +#define check64(RES, EXP) __check64(__LINE__, RES, EXP) + +static bool my_func_called; + +static void my_func(void) +{ + my_func_called = true; +} + +static inline void callrh(void *func) +{ + asm volatile("callrh %0\n\t" + : : "r"(func) + /* Mark the caller-save registers as clobbered */ + : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11", "r12", "r13", "r14", "r15", "r28", + "p0", "p1", "p2", "p3"); +} + +static void test_callrh(void) +{ + my_func_called = false; + callrh(&my_func); + check32(my_func_called, true); +} + +static void test_jumprh(void) +{ + uint32_t res; + asm ("%0 = #5\n\t" + "r0 = ##1f\n\t" + "jumprh r0\n\t" + "%0 = #3\n\t" + "jump 2f\n\t" + "1:\n\t" + "%0 = #1\n\t" + "2:\n\t" + : "=r"(res) : : "r0"); + check32(res, 1); +} + +int main() +{ + test_callrh(); + test_jumprh(); + + puts(err ? "FAIL" : "PASS"); + return err ? 1 : 0; +} From 63efb6ab5601762a40a739f97328be69b6b89561 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 15:59:52 -0700 Subject: [PATCH 10/44] meson.build Add CONFIG_HEXAGON_IDEF_PARSER Enable conditional compilation depending on whether idef-parser is configured Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <20230427230012.3800327-2-tsimpson@quicinc.com> --- meson.build | 1 + 1 file changed, 1 insertion(+) diff --git a/meson.build b/meson.build index d3cf48960b..b36124fdc4 100644 --- a/meson.build +++ b/meson.build @@ -1866,6 +1866,7 @@ endif config_host_data.set('CONFIG_GTK', gtk.found()) config_host_data.set('CONFIG_VTE', vte.found()) config_host_data.set('CONFIG_GTK_CLIPBOARD', have_gtk_clipboard) +config_host_data.set('CONFIG_HEXAGON_IDEF_PARSER', get_option('hexagon_idef_parser')) config_host_data.set('CONFIG_LIBATTR', have_old_libattr) config_host_data.set('CONFIG_LIBCAP_NG', libcap_ng.found()) config_host_data.set('CONFIG_EBPF', libbpf.found()) From 07540a28c7ecc5fb8cbb0522f3cdead5bcd6d191 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 15:59:53 -0700 Subject: [PATCH 11/44] Hexagon (target/hexagon) Add DisasContext arg to gen_log_reg_write Add DisasContext arg to gen_log_reg_write_pair also Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <20230427230012.3800327-3-tsimpson@quicinc.com> --- target/hexagon/README | 2 +- target/hexagon/gen_tcg.h | 2 +- target/hexagon/gen_tcg_funcs.py | 8 +++++--- target/hexagon/genptr.c | 10 +++++----- target/hexagon/genptr.h | 2 +- target/hexagon/idef-parser/parser-helpers.c | 2 +- 6 files changed, 14 insertions(+), 12 deletions(-) diff --git a/target/hexagon/README b/target/hexagon/README index 0f48da9328..f86850ba73 100644 --- a/target/hexagon/README +++ b/target/hexagon/README @@ -87,7 +87,7 @@ tcg_funcs_generated.c.inc TCGv RsV = hex_gpr[insn->regno[1]]; TCGv RtV = hex_gpr[insn->regno[2]]; gen_helper_A2_add(RdV, cpu_env, RsV, RtV); - gen_log_reg_write(RdN, RdV); + gen_log_reg_write(ctx, RdN, RdV); } helper_funcs_generated.c.inc diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index 6f12f665db..d4bd38810e 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -515,7 +515,7 @@ do { \ TCGv_i64 RddV = get_result_gpr_pair(ctx, HEX_REG_FP); \ gen_return(ctx, RddV, hex_gpr[HEX_REG_FP]); \ - gen_log_reg_write_pair(HEX_REG_FP, RddV); \ + gen_log_reg_write_pair(ctx, HEX_REG_FP, RddV); \ } while (0) /* diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py index fcb3384480..d9ccbe63f6 100755 --- a/target/hexagon/gen_tcg_funcs.py +++ b/target/hexagon/gen_tcg_funcs.py @@ -387,7 +387,8 @@ def gen_helper_call_imm(f, immlett): def genptr_dst_write_pair(f, tag, regtype, regid): - f.write(f" gen_log_reg_write_pair({regtype}{regid}N, " f"{regtype}{regid}V);\n") + f.write(f" gen_log_reg_write_pair(ctx, {regtype}{regid}N, " + f"{regtype}{regid}V);\n") def genptr_dst_write(f, tag, regtype, regid): @@ -396,7 +397,8 @@ def genptr_dst_write(f, tag, regtype, regid): genptr_dst_write_pair(f, tag, regtype, regid) elif regid in {"d", "e", "x", "y"}: f.write( - f" gen_log_reg_write({regtype}{regid}N, " f"{regtype}{regid}V);\n" + f" gen_log_reg_write(ctx, {regtype}{regid}N, " + f"{regtype}{regid}V);\n" ) else: print("Bad register parse: ", regtype, regid) @@ -481,7 +483,7 @@ def genptr_dst_write_opn(f, regtype, regid, tag): ## TCGv RsV = hex_gpr[insn->regno[1]]; ## TCGv RtV = hex_gpr[insn->regno[2]]; ## -## gen_log_reg_write(RdN, RdV); +## gen_log_reg_write(ctx, RdN, RdV); ## } ## ## where depends on hex_common.skip_qemu_helper(tag) diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 244063b1d2..dd707a9dc7 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -81,7 +81,7 @@ static TCGv_i64 get_result_gpr_pair(DisasContext *ctx, int rnum) return result; } -void gen_log_reg_write(int rnum, TCGv val) +void gen_log_reg_write(DisasContext *ctx, int rnum, TCGv val) { const target_ulong reg_mask = reg_immut_masks[rnum]; @@ -93,7 +93,7 @@ void gen_log_reg_write(int rnum, TCGv val) } } -static void gen_log_reg_write_pair(int rnum, TCGv_i64 val) +static void gen_log_reg_write_pair(DisasContext *ctx, int rnum, TCGv_i64 val) { const target_ulong reg_mask_low = reg_immut_masks[rnum]; const target_ulong reg_mask_high = reg_immut_masks[rnum + 1]; @@ -231,7 +231,7 @@ static inline void gen_write_ctrl_reg(DisasContext *ctx, int reg_num, if (reg_num == HEX_REG_P3_0_ALIASED) { gen_write_p3_0(ctx, val); } else { - gen_log_reg_write(reg_num, val); + gen_log_reg_write(ctx, reg_num, val); if (reg_num == HEX_REG_QEMU_PKT_CNT) { ctx->num_packets = 0; } @@ -255,7 +255,7 @@ static inline void gen_write_ctrl_reg_pair(DisasContext *ctx, int reg_num, tcg_gen_extrh_i64_i32(val32, val); tcg_gen_mov_tl(result, val32); } else { - gen_log_reg_write_pair(reg_num, val); + gen_log_reg_write_pair(ctx, reg_num, val); if (reg_num == HEX_REG_QEMU_PKT_CNT) { ctx->num_packets = 0; ctx->num_insns = 0; @@ -719,7 +719,7 @@ static void gen_cond_return_subinsn(DisasContext *ctx, TCGCond cond, TCGv pred) { TCGv_i64 RddV = get_result_gpr_pair(ctx, HEX_REG_FP); gen_cond_return(ctx, RddV, hex_gpr[HEX_REG_FP], pred, cond); - gen_log_reg_write_pair(HEX_REG_FP, RddV); + gen_log_reg_write_pair(ctx, HEX_REG_FP, RddV); } static void gen_endloop0(DisasContext *ctx) diff --git a/target/hexagon/genptr.h b/target/hexagon/genptr.h index 76e497aa48..75d0fc262d 100644 --- a/target/hexagon/genptr.h +++ b/target/hexagon/genptr.h @@ -35,7 +35,7 @@ void gen_store4i(TCGv_env cpu_env, TCGv vaddr, int32_t src, uint32_t slot); void gen_store8i(TCGv_env cpu_env, TCGv vaddr, int64_t src, uint32_t slot); TCGv gen_read_reg(TCGv result, int num); TCGv gen_read_preg(TCGv pred, uint8_t num); -void gen_log_reg_write(int rnum, TCGv val); +void gen_log_reg_write(DisasContext *ctx, int rnum, TCGv val); void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val); void gen_set_usr_field(DisasContext *ctx, int field, TCGv val); void gen_set_usr_fieldi(DisasContext *ctx, int field, int x); diff --git a/target/hexagon/idef-parser/parser-helpers.c b/target/hexagon/idef-parser/parser-helpers.c index 8734218e51..09161e394d 100644 --- a/target/hexagon/idef-parser/parser-helpers.c +++ b/target/hexagon/idef-parser/parser-helpers.c @@ -1318,7 +1318,7 @@ void gen_write_reg(Context *c, YYLTYPE *locp, HexValue *reg, HexValue *value) value_m = rvalue_materialize(c, locp, &value_m); OUT(c, locp, - "gen_log_reg_write(", ®->reg.id, ", ", + "gen_log_reg_write(ctx, ", ®->reg.id, ", ", &value_m, ");\n"); } From 17fda3c2d4064c82fa29e83561e0fb6ae8673c16 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 15:59:54 -0700 Subject: [PATCH 12/44] Hexagon (target/hexagon) Add overrides for loop setup instructions These instructions have implicit writes to registers, so we don't want them to be helpers when idef-parser is off. Signed-off-by: Taylor Simpson Acked-by: Richard Henderson Message-Id: <20230427230012.3800327-4-tsimpson@quicinc.com> --- target/hexagon/gen_tcg.h | 21 +++++++++++++++++++ target/hexagon/genptr.c | 44 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index d4bd38810e..8d5e9826a0 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -665,6 +665,27 @@ #define fGEN_TCG_J2_callrf(SHORTCODE) \ gen_cond_callr(ctx, TCG_COND_NE, PuV, RsV) +#define fGEN_TCG_J2_loop0r(SHORTCODE) \ + gen_loop0r(ctx, RsV, riV) +#define fGEN_TCG_J2_loop1r(SHORTCODE) \ + gen_loop1r(ctx, RsV, riV) +#define fGEN_TCG_J2_loop0i(SHORTCODE) \ + gen_loop0i(ctx, UiV, riV) +#define fGEN_TCG_J2_loop1i(SHORTCODE) \ + gen_loop1i(ctx, UiV, riV) +#define fGEN_TCG_J2_ploop1sr(SHORTCODE) \ + gen_ploopNsr(ctx, 1, RsV, riV) +#define fGEN_TCG_J2_ploop1si(SHORTCODE) \ + gen_ploopNsi(ctx, 1, UiV, riV) +#define fGEN_TCG_J2_ploop2sr(SHORTCODE) \ + gen_ploopNsr(ctx, 2, RsV, riV) +#define fGEN_TCG_J2_ploop2si(SHORTCODE) \ + gen_ploopNsi(ctx, 2, UiV, riV) +#define fGEN_TCG_J2_ploop3sr(SHORTCODE) \ + gen_ploopNsr(ctx, 3, RsV, riV) +#define fGEN_TCG_J2_ploop3si(SHORTCODE) \ + gen_ploopNsi(ctx, 3, UiV, riV) + #define fGEN_TCG_J2_endloop0(SHORTCODE) \ gen_endloop0(ctx) #define fGEN_TCG_J2_endloop1(SHORTCODE) \ diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index dd707a9dc7..6e5767ec5e 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -518,6 +518,50 @@ static void gen_compare(TCGCond cond, TCGv res, TCGv arg1, TCGv arg2) tcg_gen_movcond_tl(cond, res, arg1, arg2, one, zero); } +#ifndef CONFIG_HEXAGON_IDEF_PARSER +static inline void gen_loop0r(DisasContext *ctx, TCGv RsV, int riV) +{ + fIMMEXT(riV); + fPCALIGN(riV); + gen_log_reg_write(ctx, HEX_REG_LC0, RsV); + gen_log_reg_write(ctx, HEX_REG_SA0, tcg_constant_tl(ctx->pkt->pc + riV)); + gen_set_usr_fieldi(ctx, USR_LPCFG, 0); +} + +static void gen_loop0i(DisasContext *ctx, int count, int riV) +{ + gen_loop0r(ctx, tcg_constant_tl(count), riV); +} + +static inline void gen_loop1r(DisasContext *ctx, TCGv RsV, int riV) +{ + fIMMEXT(riV); + fPCALIGN(riV); + gen_log_reg_write(ctx, HEX_REG_LC1, RsV); + gen_log_reg_write(ctx, HEX_REG_SA1, tcg_constant_tl(ctx->pkt->pc + riV)); +} + +static void gen_loop1i(DisasContext *ctx, int count, int riV) +{ + gen_loop1r(ctx, tcg_constant_tl(count), riV); +} + +static void gen_ploopNsr(DisasContext *ctx, int N, TCGv RsV, int riV) +{ + fIMMEXT(riV); + fPCALIGN(riV); + gen_log_reg_write(ctx, HEX_REG_LC0, RsV); + gen_log_reg_write(ctx, HEX_REG_SA0, tcg_constant_tl(ctx->pkt->pc + riV)); + gen_set_usr_fieldi(ctx, USR_LPCFG, N); + gen_log_pred_write(ctx, 3, tcg_constant_tl(0)); +} + +static void gen_ploopNsi(DisasContext *ctx, int N, int count, int riV) +{ + gen_ploopNsr(ctx, N, tcg_constant_tl(count), riV); +} +#endif + static void gen_cond_jumpr(DisasContext *ctx, TCGv dst_pc, TCGCond cond, TCGv pred) { From 085b6700f0063c4702706ac4bdcb80931b74e307 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 15:59:55 -0700 Subject: [PATCH 13/44] Hexagon (target/hexagon) Add overrides for allocframe/deallocframe These instructions have implicit writes to registers, so we don't want them to be helpers when idef-parser is off. Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <20230427230012.3800327-5-tsimpson@quicinc.com> --- target/hexagon/gen_tcg.h | 32 +++++++++++++++++++++++++++ target/hexagon/genptr.c | 47 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index 8d5e9826a0..ef17f2f18c 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -500,6 +500,38 @@ #define fGEN_TCG_Y2_icinva(SHORTCODE) \ do { RsV = RsV; } while (0) +/* + * allocframe(#uiV) + * RxV == r29 + */ +#define fGEN_TCG_S2_allocframe(SHORTCODE) \ + gen_allocframe(ctx, RxV, uiV) + +/* sub-instruction version (no RxV, so handle it manually) */ +#define fGEN_TCG_SS2_allocframe(SHORTCODE) \ + do { \ + TCGv r29 = tcg_temp_new(); \ + tcg_gen_mov_tl(r29, hex_gpr[HEX_REG_SP]); \ + gen_allocframe(ctx, r29, uiV); \ + gen_log_reg_write(ctx, HEX_REG_SP, r29); \ + } while (0) + +/* + * Rdd32 = deallocframe(Rs32):raw + * RddV == r31:30 + * RsV == r30 + */ +#define fGEN_TCG_L2_deallocframe(SHORTCODE) \ + gen_deallocframe(ctx, RddV, RsV) + +/* sub-instruction version (no RddV/RsV, so handle it manually) */ +#define fGEN_TCG_SL2_deallocframe(SHORTCODE) \ + do { \ + TCGv_i64 r31_30 = tcg_temp_new_i64(); \ + gen_deallocframe(ctx, r31_30, hex_gpr[HEX_REG_FP]); \ + gen_log_reg_write_pair(ctx, HEX_REG_FP, r31_30); \ + } while (0) + /* * dealloc_return * Assembler mapped to diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 6e5767ec5e..fa7b1754bd 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -709,6 +709,18 @@ static void gen_cond_callr(DisasContext *ctx, gen_set_label(skip); } +#ifndef CONFIG_HEXAGON_IDEF_PARSER +/* frame = ((LR << 32) | FP) ^ (FRAMEKEY << 32)) */ +static TCGv_i64 gen_frame_scramble(void) +{ + TCGv_i64 frame = tcg_temp_new_i64(); + TCGv tmp = tcg_temp_new(); + tcg_gen_xor_tl(tmp, hex_gpr[HEX_REG_LR], hex_gpr[HEX_REG_FRAMEKEY]); + tcg_gen_concat_i32_i64(frame, hex_gpr[HEX_REG_FP], tmp); + return frame; +} +#endif + /* frame ^= (int64_t)FRAMEKEY << 32 */ static void gen_frame_unscramble(TCGv_i64 frame) { @@ -725,6 +737,41 @@ static void gen_load_frame(DisasContext *ctx, TCGv_i64 frame, TCGv EA) tcg_gen_qemu_ld_i64(frame, EA, ctx->mem_idx, MO_TEUQ); } +#ifndef CONFIG_HEXAGON_IDEF_PARSER +/* Stack overflow check */ +static void gen_framecheck(TCGv EA, int framesize) +{ + /* Not modelled in linux-user mode */ + /* Placeholder for system mode */ +#ifndef CONFIG_USER_ONLY + g_assert_not_reached(); +#endif +} + +static void gen_allocframe(DisasContext *ctx, TCGv r29, int framesize) +{ + TCGv r30 = tcg_temp_new(); + TCGv_i64 frame; + tcg_gen_addi_tl(r30, r29, -8); + frame = gen_frame_scramble(); + gen_store8(cpu_env, r30, frame, ctx->insn->slot); + gen_log_reg_write(ctx, HEX_REG_FP, r30); + gen_framecheck(r30, framesize); + tcg_gen_subi_tl(r29, r30, framesize); +} + +static void gen_deallocframe(DisasContext *ctx, TCGv_i64 r31_30, TCGv r30) +{ + TCGv r29 = tcg_temp_new(); + TCGv_i64 frame = tcg_temp_new_i64(); + gen_load_frame(ctx, frame, r30); + gen_frame_unscramble(frame); + tcg_gen_mov_i64(r31_30, frame); + tcg_gen_addi_tl(r29, r30, 8); + gen_log_reg_write(ctx, HEX_REG_SP, r29); +} +#endif + static void gen_return(DisasContext *ctx, TCGv_i64 dst, TCGv src) { /* From 5c4b11e827e3cd64a6790b6158c75ed876e6f6e9 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 15:59:56 -0700 Subject: [PATCH 14/44] Hexagon (target/hexagon) Add overrides for clr[tf]new These instructions have implicit reads from p0, so we don't want them in helpers when idef-parser is off. Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <20230427230012.3800327-6-tsimpson@quicinc.com> --- target/hexagon/gen_tcg.h | 16 ++++++++++++++++ target/hexagon/macros.h | 4 ---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index ef17f2f18c..a1d7eabae7 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -1101,6 +1101,22 @@ gen_jump(ctx, riV); \ } while (0) +/* if (p0.new) r0 = #0 */ +#define fGEN_TCG_SA1_clrtnew(SHORTCODE) \ + do { \ + tcg_gen_movcond_tl(TCG_COND_EQ, RdV, \ + hex_new_pred_value[0], tcg_constant_tl(0), \ + RdV, tcg_constant_tl(0)); \ + } while (0) + +/* if (!p0.new) r0 = #0 */ +#define fGEN_TCG_SA1_clrfnew(SHORTCODE) \ + do { \ + tcg_gen_movcond_tl(TCG_COND_NE, RdV, \ + hex_new_pred_value[0], tcg_constant_tl(0), \ + RdV, tcg_constant_tl(0)); \ + } while (0) + #define fGEN_TCG_J2_pause(SHORTCODE) \ do { \ uiV = uiV; \ diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index 760630de8f..b1ff40c894 100644 --- a/target/hexagon/macros.h +++ b/target/hexagon/macros.h @@ -227,12 +227,8 @@ static inline void gen_cancel(uint32_t slot) #ifdef QEMU_GENERATE #define fLSBNEW(PVAL) tcg_gen_andi_tl(LSB, (PVAL), 1) -#define fLSBNEW0 tcg_gen_andi_tl(LSB, hex_new_pred_value[0], 1) -#define fLSBNEW1 tcg_gen_andi_tl(LSB, hex_new_pred_value[1], 1) #else #define fLSBNEW(PVAL) ((PVAL) & 1) -#define fLSBNEW0 (env->new_pred_value[0] & 1) -#define fLSBNEW1 (env->new_pred_value[1] & 1) #endif #ifdef QEMU_GENERATE From 9942f6a8b66a9fc320fadc52c0f72fcfaebf5b50 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 15:59:57 -0700 Subject: [PATCH 15/44] Hexagon (target/hexagon) Remove log_reg_write from op_helper.[ch] With the overrides added in prior commits, this function is not used Remove references in macros.h Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <20230427230012.3800327-7-tsimpson@quicinc.com> --- target/hexagon/macros.h | 14 -------------- target/hexagon/op_helper.c | 17 ----------------- target/hexagon/op_helper.h | 4 ---- 3 files changed, 35 deletions(-) diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index b1ff40c894..995ae0e384 100644 --- a/target/hexagon/macros.h +++ b/target/hexagon/macros.h @@ -343,10 +343,6 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) #define fREAD_LR() (env->gpr[HEX_REG_LR]) -#define fWRITE_LR(A) log_reg_write(env, HEX_REG_LR, A) -#define fWRITE_FP(A) log_reg_write(env, HEX_REG_FP, A) -#define fWRITE_SP(A) log_reg_write(env, HEX_REG_SP, A) - #define fREAD_SP() (env->gpr[HEX_REG_SP]) #define fREAD_LC0 (env->gpr[HEX_REG_LC0]) #define fREAD_LC1 (env->gpr[HEX_REG_LC1]) @@ -371,16 +367,6 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) #define fBRANCH(LOC, TYPE) fWRITE_NPC(LOC) #define fJUMPR(REGNO, TARGET, TYPE) fBRANCH(TARGET, COF_TYPE_JUMPR) #define fHINTJR(TARGET) { /* Not modelled in qemu */} -#define fWRITE_LOOP_REGS0(START, COUNT) \ - do { \ - log_reg_write(env, HEX_REG_LC0, COUNT); \ - log_reg_write(env, HEX_REG_SA0, START); \ - } while (0) -#define fWRITE_LOOP_REGS1(START, COUNT) \ - do { \ - log_reg_write(env, HEX_REG_LC1, COUNT); \ - log_reg_write(env, HEX_REG_SA1, START);\ - } while (0) #define fSET_OVERFLOW() SET_USR_FIELD(USR_OVF, 1) #define fSET_LPCFG(VAL) SET_USR_FIELD(USR_LPCFG, (VAL)) diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 3cc71b69d9..7e9e3f305e 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -52,23 +52,6 @@ G_NORETURN void HELPER(raise_exception)(CPUHexagonState *env, uint32_t excp) do_raise_exception_err(env, excp, 0); } -void log_reg_write(CPUHexagonState *env, int rnum, - target_ulong val) -{ - HEX_DEBUG_LOG("log_reg_write[%d] = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")", - rnum, val, val); - if (val == env->gpr[rnum]) { - HEX_DEBUG_LOG(" NO CHANGE"); - } - HEX_DEBUG_LOG("\n"); - - env->new_value[rnum] = val; - if (HEX_DEBUG) { - /* Do this so HELPER(debug_commit_end) will know */ - env->reg_written[rnum] = 1; - } -} - static void log_pred_write(CPUHexagonState *env, int pnum, target_ulong val) { HEX_DEBUG_LOG("log_pred_write[%d] = " TARGET_FMT_ld diff --git a/target/hexagon/op_helper.h b/target/hexagon/op_helper.h index db22b54401..6bd4b07849 100644 --- a/target/hexagon/op_helper.h +++ b/target/hexagon/op_helper.h @@ -19,15 +19,11 @@ #define HEXAGON_OP_HELPER_H /* Misc functions */ -void write_new_pc(CPUHexagonState *env, bool pkt_has_multi_cof, target_ulong addr); - uint8_t mem_load1(CPUHexagonState *env, uint32_t slot, target_ulong vaddr); uint16_t mem_load2(CPUHexagonState *env, uint32_t slot, target_ulong vaddr); uint32_t mem_load4(CPUHexagonState *env, uint32_t slot, target_ulong vaddr); uint64_t mem_load8(CPUHexagonState *env, uint32_t slot, target_ulong vaddr); -void log_reg_write(CPUHexagonState *env, int rnum, - target_ulong val); void log_store64(CPUHexagonState *env, target_ulong addr, int64_t val, int width, int slot); void log_store32(CPUHexagonState *env, target_ulong addr, From d24f0b2b89d0f4f595a1b62c00c4c951d1d5fce9 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 15:59:58 -0700 Subject: [PATCH 16/44] Hexagon (target/hexagon) Eliminate uses of log_pred_write function These instructions have implicit writes to registers, so we don't want them to be helpers when idef-parser is off. The following instructions are overriden S2_cabacdecbin SA1_cmpeqi Remove the log_pred_write function from op_helper.c Remove references in macros.h Signed-off-by: Taylor Simpson Acked-by: Richard Henderson Message-Id: <20230427230012.3800327-8-tsimpson@quicinc.com> --- target/hexagon/gen_tcg.h | 16 +++++++ target/hexagon/genptr.c | 5 ++ target/hexagon/helper.h | 2 + target/hexagon/macros.h | 4 -- target/hexagon/op_helper.c | 96 ++++++++++++++++++++++++++++++++------ 5 files changed, 104 insertions(+), 19 deletions(-) diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index a1d7eabae7..099a6cc47f 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -595,6 +595,14 @@ gen_helper_vacsh_val(RxxV, cpu_env, RxxV, RssV, RttV); \ } while (0) +#define fGEN_TCG_S2_cabacdecbin(SHORTCODE) \ + do { \ + TCGv p0 = tcg_temp_new(); \ + gen_helper_cabacdecbin_pred(p0, RssV, RttV); \ + gen_helper_cabacdecbin_val(RddV, RssV, RttV); \ + gen_log_pred_write(ctx, 0, p0); \ + } while (0) + /* * Approximate reciprocal * r3,p1 = sfrecipa(r0, r1) @@ -902,6 +910,14 @@ #define fGEN_TCG_J4_tstbit0_fp1_jump_t(SHORTCODE) \ gen_cmpnd_tstbit0_jmp(ctx, 1, RsV, TCG_COND_NE, riV) +/* p0 = cmp.eq(r0, #7) */ +#define fGEN_TCG_SA1_cmpeqi(SHORTCODE) \ + do { \ + TCGv p0 = tcg_temp_new(); \ + gen_comparei(TCG_COND_EQ, p0, RsV, uiV); \ + gen_log_pred_write(ctx, 0, p0); \ + } while (0) + #define fGEN_TCG_J2_jump(SHORTCODE) \ gen_jump(ctx, riV) #define fGEN_TCG_J2_jumpr(SHORTCODE) \ diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index fa7b1754bd..dac62b90a6 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -560,6 +560,11 @@ static void gen_ploopNsi(DisasContext *ctx, int N, int count, int riV) { gen_ploopNsr(ctx, N, tcg_constant_tl(count), riV); } + +static inline void gen_comparei(TCGCond cond, TCGv res, TCGv arg1, int arg2) +{ + gen_compare(cond, res, arg1, tcg_constant_tl(arg2)); +} #endif static void gen_cond_jumpr(DisasContext *ctx, TCGv dst_pc, diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h index ed7f9842f6..73849e3d49 100644 --- a/target/hexagon/helper.h +++ b/target/hexagon/helper.h @@ -31,6 +31,8 @@ DEF_HELPER_3(sfrecipa, i64, env, f32, f32) DEF_HELPER_2(sfinvsqrta, i64, env, f32) DEF_HELPER_4(vacsh_val, s64, env, s64, s64, s64) DEF_HELPER_FLAGS_4(vacsh_pred, TCG_CALL_NO_RWG_SE, s32, env, s64, s64, s64) +DEF_HELPER_FLAGS_2(cabacdecbin_val, TCG_CALL_NO_RWG_SE, s64, s64, s64) +DEF_HELPER_FLAGS_2(cabacdecbin_pred, TCG_CALL_NO_RWG_SE, s32, s64, s64) /* Floating point */ DEF_HELPER_2(conv_sf2df, f64, env, f32) diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index 995ae0e384..24c78fe80a 100644 --- a/target/hexagon/macros.h +++ b/target/hexagon/macros.h @@ -371,10 +371,6 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) #define fSET_OVERFLOW() SET_USR_FIELD(USR_OVF, 1) #define fSET_LPCFG(VAL) SET_USR_FIELD(USR_LPCFG, (VAL)) #define fGET_LPCFG (GET_USR_FIELD(USR_LPCFG)) -#define fWRITE_P0(VAL) log_pred_write(env, 0, VAL) -#define fWRITE_P1(VAL) log_pred_write(env, 1, VAL) -#define fWRITE_P2(VAL) log_pred_write(env, 2, VAL) -#define fWRITE_P3(VAL) log_pred_write(env, 3, VAL) #define fPART1(WORK) if (part1) { WORK; return; } #define fCAST4u(A) ((uint32_t)(A)) #define fCAST4s(A) ((int32_t)(A)) diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 7e9e3f305e..46ccc59106 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -52,21 +52,6 @@ G_NORETURN void HELPER(raise_exception)(CPUHexagonState *env, uint32_t excp) do_raise_exception_err(env, excp, 0); } -static void log_pred_write(CPUHexagonState *env, int pnum, target_ulong val) -{ - HEX_DEBUG_LOG("log_pred_write[%d] = " TARGET_FMT_ld - " (0x" TARGET_FMT_lx ")\n", - pnum, val, val); - - /* Multiple writes to the same preg are and'ed together */ - if (env->pred_written & (1 << pnum)) { - env->new_pred_value[pnum] &= val & 0xff; - } else { - env->new_pred_value[pnum] = val & 0xff; - env->pred_written |= 1 << pnum; - } -} - void log_store32(CPUHexagonState *env, target_ulong addr, target_ulong val, int width, int slot) { @@ -399,6 +384,87 @@ int32_t HELPER(vacsh_pred)(CPUHexagonState *env, return PeV; } +int64_t HELPER(cabacdecbin_val)(int64_t RssV, int64_t RttV) +{ + int64_t RddV = 0; + size4u_t state; + size4u_t valMPS; + size4u_t bitpos; + size4u_t range; + size4u_t offset; + size4u_t rLPS; + size4u_t rMPS; + + state = fEXTRACTU_RANGE(fGETWORD(1, RttV), 5, 0); + valMPS = fEXTRACTU_RANGE(fGETWORD(1, RttV), 8, 8); + bitpos = fEXTRACTU_RANGE(fGETWORD(0, RttV), 4, 0); + range = fGETWORD(0, RssV); + offset = fGETWORD(1, RssV); + + /* calculate rLPS */ + range <<= bitpos; + offset <<= bitpos; + rLPS = rLPS_table_64x4[state][(range >> 29) & 3]; + rLPS = rLPS << 23; /* left aligned */ + + /* calculate rMPS */ + rMPS = (range & 0xff800000) - rLPS; + + /* most probable region */ + if (offset < rMPS) { + RddV = AC_next_state_MPS_64[state]; + fINSERT_RANGE(RddV, 8, 8, valMPS); + fINSERT_RANGE(RddV, 31, 23, (rMPS >> 23)); + fSETWORD(1, RddV, offset); + } + /* least probable region */ + else { + RddV = AC_next_state_LPS_64[state]; + fINSERT_RANGE(RddV, 8, 8, ((!state) ? (1 - valMPS) : (valMPS))); + fINSERT_RANGE(RddV, 31, 23, (rLPS >> 23)); + fSETWORD(1, RddV, (offset - rMPS)); + } + return RddV; +} + +int32_t HELPER(cabacdecbin_pred)(int64_t RssV, int64_t RttV) +{ + int32_t p0 = 0; + size4u_t state; + size4u_t valMPS; + size4u_t bitpos; + size4u_t range; + size4u_t offset; + size4u_t rLPS; + size4u_t rMPS; + + state = fEXTRACTU_RANGE(fGETWORD(1, RttV), 5, 0); + valMPS = fEXTRACTU_RANGE(fGETWORD(1, RttV), 8, 8); + bitpos = fEXTRACTU_RANGE(fGETWORD(0, RttV), 4, 0); + range = fGETWORD(0, RssV); + offset = fGETWORD(1, RssV); + + /* calculate rLPS */ + range <<= bitpos; + offset <<= bitpos; + rLPS = rLPS_table_64x4[state][(range >> 29) & 3]; + rLPS = rLPS << 23; /* left aligned */ + + /* calculate rMPS */ + rMPS = (range & 0xff800000) - rLPS; + + /* most probable region */ + if (offset < rMPS) { + p0 = valMPS; + + } + /* least probable region */ + else { + p0 = valMPS ^ 1; + } + return p0; +} + static void probe_store(CPUHexagonState *env, int slot, int mmu_idx, bool is_predicated) { From 25e1d87d1041a27c1f34480b53059faeb6600fef Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 15:59:59 -0700 Subject: [PATCH 17/44] Hexagon (target/hexagon) Clean up pred_written usage Only endloop instructions will conditionally write to a predicate. When there is an endloop instruction, we preload the values into new_pred_value. The only place pred_written is needed is when HEX_DEBUG is on. We remove the last use of check_for_attrib. However, new uses will be introduced later in this series, so we mark it with G_GNUC_UNUSED. Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <20230427230012.3800327-9-tsimpson@quicinc.com> --- target/hexagon/genptr.c | 16 +++++------- target/hexagon/translate.c | 53 ++++++++++++-------------------------- 2 files changed, 23 insertions(+), 46 deletions(-) diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index dac62b90a6..9bbaca6300 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -137,7 +137,9 @@ void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val) tcg_gen_and_tl(hex_new_pred_value[pnum], hex_new_pred_value[pnum], base_val); } - tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << pnum); + if (HEX_DEBUG) { + tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << pnum); + } set_bit(pnum, ctx->pregs_written); } @@ -826,15 +828,13 @@ static void gen_endloop0(DisasContext *ctx) /* * if (lpcfg == 1) { - * hex_new_pred_value[3] = 0xff; - * hex_pred_written |= 1 << 3; + * p3 = 0xff; * } */ TCGLabel *label1 = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_NE, lpcfg, 1, label1); { - tcg_gen_movi_tl(hex_new_pred_value[3], 0xff); - tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << 3); + gen_log_pred_write(ctx, 3, tcg_constant_tl(0xff)); } gen_set_label(label1); @@ -903,14 +903,12 @@ static void gen_endloop01(DisasContext *ctx) /* * if (lpcfg == 1) { - * hex_new_pred_value[3] = 0xff; - * hex_pred_written |= 1 << 3; + * p3 = 0xff; * } */ tcg_gen_brcondi_tl(TCG_COND_NE, lpcfg, 1, label1); { - tcg_gen_movi_tl(hex_new_pred_value[3], 0xff); - tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << 3); + gen_log_pred_write(ctx, 3, tcg_constant_tl(0xff)); } gen_set_label(label1); diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index 01f448a325..0b021b301a 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -239,7 +239,7 @@ static int read_packet_words(CPUHexagonState *env, DisasContext *ctx, return nwords; } -static bool check_for_attrib(Packet *pkt, int attrib) +static G_GNUC_UNUSED bool check_for_attrib(Packet *pkt, int attrib) { for (int i = 0; i < pkt->num_insns; i++) { if (GET_ATTRIB(pkt->insn[i].opcode, attrib)) { @@ -262,11 +262,6 @@ static bool need_slot_cancelled(Packet *pkt) return false; } -static bool need_pred_written(Packet *pkt) -{ - return check_for_attrib(pkt, A_WRITES_PRED_REG); -} - static bool need_next_PC(DisasContext *ctx) { Packet *pkt = ctx->pkt; @@ -414,7 +409,7 @@ static void gen_start_packet(DisasContext *ctx) tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], next_PC); } } - if (need_pred_written(pkt)) { + if (HEX_DEBUG) { tcg_gen_movi_tl(hex_pred_written, 0); } @@ -428,6 +423,17 @@ static void gen_start_packet(DisasContext *ctx) } } + /* + * Preload the predicated pred registers into hex_new_pred_value[pred_num] + * Only endloop instructions conditionally write to pred registers + */ + if (pkt->pkt_has_endloop) { + for (int i = 0; i < ctx->preg_log_idx; i++) { + int pred_num = ctx->preg_log[i]; + tcg_gen_mov_tl(hex_new_pred_value[pred_num], hex_pred[pred_num]); + } + } + /* Preload the predicated HVX registers into future_VRegs and tmp_VRegs */ if (!bitmap_empty(ctx->predicated_future_vregs, NUM_VREGS)) { int i = find_first_bit(ctx->predicated_future_vregs, NUM_VREGS); @@ -535,41 +541,14 @@ static void gen_reg_writes(DisasContext *ctx) static void gen_pred_writes(DisasContext *ctx) { - int i; - /* Early exit if the log is empty */ if (!ctx->preg_log_idx) { return; } - /* - * Only endloop instructions will conditionally - * write a predicate. If there are no endloop - * instructions, we can use the non-conditional - * write of the predicates. - */ - if (ctx->pkt->pkt_has_endloop) { - TCGv zero = tcg_constant_tl(0); - TCGv pred_written = tcg_temp_new(); - for (i = 0; i < ctx->preg_log_idx; i++) { - int pred_num = ctx->preg_log[i]; - - tcg_gen_andi_tl(pred_written, hex_pred_written, 1 << pred_num); - tcg_gen_movcond_tl(TCG_COND_NE, hex_pred[pred_num], - pred_written, zero, - hex_new_pred_value[pred_num], - hex_pred[pred_num]); - } - } else { - for (i = 0; i < ctx->preg_log_idx; i++) { - int pred_num = ctx->preg_log[i]; - tcg_gen_mov_tl(hex_pred[pred_num], hex_new_pred_value[pred_num]); - if (HEX_DEBUG) { - /* Do this so HELPER(debug_commit_end) will know */ - tcg_gen_ori_tl(hex_pred_written, hex_pred_written, - 1 << pred_num); - } - } + for (int i = 0; i < ctx->preg_log_idx; i++) { + int pred_num = ctx->preg_log[i]; + tcg_gen_mov_tl(hex_pred[pred_num], hex_new_pred_value[pred_num]); } } From 71ed3697250e284e353b7abe05f667c5d6b9bc1b Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 16:00:00 -0700 Subject: [PATCH 18/44] Hexagon (target/hexagon) Don't overlap dest writes with source reads When generating TCG, make sure we have read all the operand registers before writing to the destination registers. This is a prerequesite for short-circuiting where the source and dest operands could be the same. Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <20230427230012.3800327-10-tsimpson@quicinc.com> --- target/hexagon/genptr.c | 45 ++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 9bbaca6300..3c7e0dafaf 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -971,6 +971,7 @@ static void gen_cmpi_jumpnv(DisasContext *ctx, /* Shift left with saturation */ static void gen_shl_sat(DisasContext *ctx, TCGv dst, TCGv src, TCGv shift_amt) { + TCGv tmp = tcg_temp_new(); /* In case dst == src */ TCGv usr = get_result_gpr(ctx, HEX_REG_USR); TCGv sh32 = tcg_temp_new(); TCGv dst_sar = tcg_temp_new(); @@ -995,17 +996,17 @@ static void gen_shl_sat(DisasContext *ctx, TCGv dst, TCGv src, TCGv shift_amt) */ tcg_gen_andi_tl(sh32, shift_amt, 31); - tcg_gen_movcond_tl(TCG_COND_EQ, dst, sh32, shift_amt, + tcg_gen_movcond_tl(TCG_COND_EQ, tmp, sh32, shift_amt, src, tcg_constant_tl(0)); - tcg_gen_shl_tl(dst, dst, sh32); - tcg_gen_sar_tl(dst_sar, dst, sh32); + tcg_gen_shl_tl(tmp, tmp, sh32); + tcg_gen_sar_tl(dst_sar, tmp, sh32); tcg_gen_movcond_tl(TCG_COND_LT, satval, src, tcg_constant_tl(0), min, max); tcg_gen_setcond_tl(TCG_COND_NE, ovf, dst_sar, src); tcg_gen_shli_tl(ovf, ovf, reg_field_info[USR_OVF].offset); tcg_gen_or_tl(usr, usr, ovf); - tcg_gen_movcond_tl(TCG_COND_EQ, dst, dst_sar, src, dst, satval); + tcg_gen_movcond_tl(TCG_COND_EQ, dst, dst_sar, src, tmp, satval); } static void gen_sar(TCGv dst, TCGv src, TCGv shift_amt) @@ -1228,22 +1229,28 @@ void gen_sat_i32(TCGv dest, TCGv source, int width) void gen_sat_i32_ovfl(TCGv ovfl, TCGv dest, TCGv source, int width) { - gen_sat_i32(dest, source, width); - tcg_gen_setcond_tl(TCG_COND_NE, ovfl, source, dest); + TCGv tmp = tcg_temp_new(); /* In case dest == source */ + gen_sat_i32(tmp, source, width); + tcg_gen_setcond_tl(TCG_COND_NE, ovfl, source, tmp); + tcg_gen_mov_tl(dest, tmp); } void gen_satu_i32(TCGv dest, TCGv source, int width) { + TCGv tmp = tcg_temp_new(); /* In case dest == source */ TCGv max_val = tcg_constant_tl((1 << width) - 1); TCGv zero = tcg_constant_tl(0); - tcg_gen_movcond_tl(TCG_COND_GTU, dest, source, max_val, max_val, source); - tcg_gen_movcond_tl(TCG_COND_LT, dest, source, zero, zero, dest); + tcg_gen_movcond_tl(TCG_COND_GTU, tmp, source, max_val, max_val, source); + tcg_gen_movcond_tl(TCG_COND_LT, tmp, source, zero, zero, tmp); + tcg_gen_mov_tl(dest, tmp); } void gen_satu_i32_ovfl(TCGv ovfl, TCGv dest, TCGv source, int width) { - gen_satu_i32(dest, source, width); - tcg_gen_setcond_tl(TCG_COND_NE, ovfl, source, dest); + TCGv tmp = tcg_temp_new(); /* In case dest == source */ + gen_satu_i32(tmp, source, width); + tcg_gen_setcond_tl(TCG_COND_NE, ovfl, source, tmp); + tcg_gen_mov_tl(dest, tmp); } void gen_sat_i64(TCGv_i64 dest, TCGv_i64 source, int width) @@ -1256,27 +1263,33 @@ void gen_sat_i64(TCGv_i64 dest, TCGv_i64 source, int width) void gen_sat_i64_ovfl(TCGv ovfl, TCGv_i64 dest, TCGv_i64 source, int width) { + TCGv_i64 tmp = tcg_temp_new_i64(); /* In case dest == source */ TCGv_i64 ovfl_64; - gen_sat_i64(dest, source, width); + gen_sat_i64(tmp, source, width); ovfl_64 = tcg_temp_new_i64(); - tcg_gen_setcond_i64(TCG_COND_NE, ovfl_64, dest, source); + tcg_gen_setcond_i64(TCG_COND_NE, ovfl_64, tmp, source); + tcg_gen_mov_i64(dest, tmp); tcg_gen_trunc_i64_tl(ovfl, ovfl_64); } void gen_satu_i64(TCGv_i64 dest, TCGv_i64 source, int width) { + TCGv_i64 tmp = tcg_temp_new_i64(); /* In case dest == source */ TCGv_i64 max_val = tcg_constant_i64((1LL << width) - 1LL); TCGv_i64 zero = tcg_constant_i64(0); - tcg_gen_movcond_i64(TCG_COND_GTU, dest, source, max_val, max_val, source); - tcg_gen_movcond_i64(TCG_COND_LT, dest, source, zero, zero, dest); + tcg_gen_movcond_i64(TCG_COND_GTU, tmp, source, max_val, max_val, source); + tcg_gen_movcond_i64(TCG_COND_LT, tmp, source, zero, zero, tmp); + tcg_gen_mov_i64(dest, tmp); } void gen_satu_i64_ovfl(TCGv ovfl, TCGv_i64 dest, TCGv_i64 source, int width) { + TCGv_i64 tmp = tcg_temp_new_i64(); /* In case dest == source */ TCGv_i64 ovfl_64; - gen_satu_i64(dest, source, width); + gen_satu_i64(tmp, source, width); ovfl_64 = tcg_temp_new_i64(); - tcg_gen_setcond_i64(TCG_COND_NE, ovfl_64, dest, source); + tcg_gen_setcond_i64(TCG_COND_NE, ovfl_64, tmp, source); + tcg_gen_mov_i64(dest, tmp); tcg_gen_trunc_i64_tl(ovfl, ovfl_64); } From b9f0326bf7f584227dd10cbf5222e49130c7fc86 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 16:00:01 -0700 Subject: [PATCH 19/44] Hexagon (target/hexagon) Mark registers as read during packet analysis Have gen_analyze_funcs mark the registers that are read by the instruction. We also mark the implicit reads using instruction attributes. Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <20230427230012.3800327-11-tsimpson@quicinc.com> --- target/hexagon/attribs_def.h.inc | 6 +++- target/hexagon/gen_analyze_funcs.py | 44 ++++++++++++++++++++--------- target/hexagon/hex_common.py | 6 ++++ target/hexagon/translate.c | 20 +++++++++++++ target/hexagon/translate.h | 36 +++++++++++++++++++++++ 5 files changed, 97 insertions(+), 15 deletions(-) diff --git a/target/hexagon/attribs_def.h.inc b/target/hexagon/attribs_def.h.inc index 69da9776f0..21d457fa4a 100644 --- a/target/hexagon/attribs_def.h.inc +++ b/target/hexagon/attribs_def.h.inc @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -112,6 +112,10 @@ DEF_ATTRIB(IMPLICIT_WRITES_P1, "Writes Predicate 1", "", "UREG.P1") DEF_ATTRIB(IMPLICIT_WRITES_P2, "Writes Predicate 1", "", "UREG.P2") DEF_ATTRIB(IMPLICIT_WRITES_P3, "May write Predicate 3", "", "UREG.P3") DEF_ATTRIB(IMPLICIT_READS_PC, "Reads the PC register", "", "") +DEF_ATTRIB(IMPLICIT_READS_P0, "Reads the P0 register", "", "") +DEF_ATTRIB(IMPLICIT_READS_P1, "Reads the P1 register", "", "") +DEF_ATTRIB(IMPLICIT_READS_P2, "Reads the P2 register", "", "") +DEF_ATTRIB(IMPLICIT_READS_P3, "Reads the P3 register", "", "") DEF_ATTRIB(IMPLICIT_WRITES_USR, "May write USR", "", "") DEF_ATTRIB(WRITES_PRED_REG, "Writes a predicate register", "", "") DEF_ATTRIB(COMMUTES, "The operation is communitive", "", "") diff --git a/target/hexagon/gen_analyze_funcs.py b/target/hexagon/gen_analyze_funcs.py index c74443da78..86aec5ac4b 100755 --- a/target/hexagon/gen_analyze_funcs.py +++ b/target/hexagon/gen_analyze_funcs.py @@ -35,12 +35,14 @@ def analyze_opn_old(f, tag, regtype, regid, regno): predicated = "true" if is_predicated(tag) else "false" if regtype == "R": if regid in {"ss", "tt"}: - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_reg_read_pair(ctx, {regN});\n") elif regid in {"dd", "ee", "xx", "yy"}: f.write(f" const int {regN} = insn->regno[{regno}];\n") f.write(f" ctx_log_reg_write_pair(ctx, {regN}, {predicated});\n") elif regid in {"s", "t", "u", "v"}: - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_reg_read(ctx, {regN});\n") elif regid in {"d", "e", "x", "y"}: f.write(f" const int {regN} = insn->regno[{regno}];\n") f.write(f" ctx_log_reg_write(ctx, {regN}, {predicated});\n") @@ -48,7 +50,8 @@ def analyze_opn_old(f, tag, regtype, regid, regno): print("Bad register parse: ", regtype, regid) elif regtype == "P": if regid in {"s", "t", "u", "v"}: - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_pred_read(ctx, {regN});\n") elif regid in {"d", "e", "x"}: f.write(f" const int {regN} = insn->regno[{regno}];\n") f.write(f" ctx_log_pred_write(ctx, {regN});\n") @@ -57,15 +60,19 @@ def analyze_opn_old(f, tag, regtype, regid, regno): elif regtype == "C": if regid == "ss": f.write( - f"// const int {regN} = insn->regno[{regno}] " "+ HEX_REG_SA0;\n" + f" const int {regN} = insn->regno[{regno}] " + "+ HEX_REG_SA0;\n" ) + f.write(f" ctx_log_reg_read_pair(ctx, {regN});\n") elif regid == "dd": f.write(f" const int {regN} = insn->regno[{regno}] " "+ HEX_REG_SA0;\n") f.write(f" ctx_log_reg_write_pair(ctx, {regN}, {predicated});\n") elif regid == "s": f.write( - f"// const int {regN} = insn->regno[{regno}] " "+ HEX_REG_SA0;\n" + f" const int {regN} = insn->regno[{regno}] " + "+ HEX_REG_SA0;\n" ) + f.write(f" ctx_log_reg_read(ctx, {regN});\n") elif regid == "d": f.write(f" const int {regN} = insn->regno[{regno}] " "+ HEX_REG_SA0;\n") f.write(f" ctx_log_reg_write(ctx, {regN}, {predicated});\n") @@ -73,7 +80,8 @@ def analyze_opn_old(f, tag, regtype, regid, regno): print("Bad register parse: ", regtype, regid) elif regtype == "M": if regid == "u": - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_reg_read(ctx, {regN});\n") else: print("Bad register parse: ", regtype, regid) elif regtype == "V": @@ -88,9 +96,11 @@ def analyze_opn_old(f, tag, regtype, regid, regno): f" ctx_log_vreg_write_pair(ctx, {regN}, {newv}, " f"{predicated});\n" ) elif regid in {"uu", "vv"}: - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_vreg_read_pair(ctx, {regN});\n") elif regid in {"s", "u", "v", "w"}: - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_vreg_read(ctx, {regN});\n") elif regid in {"d", "x", "y"}: f.write(f" const int {regN} = insn->regno[{regno}];\n") f.write(f" ctx_log_vreg_write(ctx, {regN}, {newv}, " f"{predicated});\n") @@ -101,7 +111,8 @@ def analyze_opn_old(f, tag, regtype, regid, regno): f.write(f" const int {regN} = insn->regno[{regno}];\n") f.write(f" ctx_log_qreg_write(ctx, {regN});\n") elif regid in {"s", "t", "u", "v"}: - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_qreg_read(ctx, {regN});\n") else: print("Bad register parse: ", regtype, regid) elif regtype == "G": @@ -134,17 +145,20 @@ def analyze_opn_new(f, tag, regtype, regid, regno): regN = f"{regtype}{regid}N" if regtype == "N": if regid in {"s", "t"}: - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_reg_read(ctx, {regN});\n") else: print("Bad register parse: ", regtype, regid) elif regtype == "P": if regid in {"t", "u", "v"}: - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_pred_read(ctx, {regN});\n") else: print("Bad register parse: ", regtype, regid) elif regtype == "O": if regid == "s": - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_vreg_read(ctx, {regN});\n") else: print("Bad register parse: ", regtype, regid) else: @@ -174,8 +188,10 @@ def analyze_opn(f, tag, regtype, regid, toss, numregs, i): ## Insn *insn G_GNUC_UNUSED = ctx->insn; ## const int RdN = insn->regno[0]; ## ctx_log_reg_write(ctx, RdN, false); -## // const int RsN = insn->regno[1]; -## // const int RtN = insn->regno[2]; +## const int RsN = insn->regno[1]; +## ctx_log_reg_read(ctx, RsN); +## const int RtN = insn->regno[2]; +## ctx_log_reg_read(ctx, RtN); ## } ## def gen_analyze_func(f, tag, regs, imms): diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py index 40f28ca933..232c6e2c20 100755 --- a/target/hexagon/hex_common.py +++ b/target/hexagon/hex_common.py @@ -97,6 +97,12 @@ def calculate_attribs(): add_qemu_macro_attrib("fSET_LPCFG", "A_IMPLICIT_WRITES_USR") add_qemu_macro_attrib("fLOAD", "A_SCALAR_LOAD") add_qemu_macro_attrib("fSTORE", "A_SCALAR_STORE") + add_qemu_macro_attrib('fLSBNEW0', 'A_IMPLICIT_READS_P0') + add_qemu_macro_attrib('fLSBNEW0NOT', 'A_IMPLICIT_READS_P0') + add_qemu_macro_attrib('fREAD_P0', 'A_IMPLICIT_READS_P0') + add_qemu_macro_attrib('fLSBNEW1', 'A_IMPLICIT_READS_P1') + add_qemu_macro_attrib('fLSBNEW1NOT', 'A_IMPLICIT_READS_P1') + add_qemu_macro_attrib('fREAD_P3', 'A_IMPLICIT_READS_P3') # Recurse down macros, find attributes from sub-macros macroValues = list(macros.values()) diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index 0b021b301a..e84bd34618 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -336,6 +336,21 @@ static void mark_implicit_pred_writes(DisasContext *ctx) mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3); } +static void mark_implicit_pred_read(DisasContext *ctx, int attrib, int pnum) +{ + if (GET_ATTRIB(ctx->insn->opcode, attrib)) { + ctx_log_pred_read(ctx, pnum); + } +} + +static void mark_implicit_pred_reads(DisasContext *ctx) +{ + mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P0, 0); + mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P1, 1); + mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P3, 2); + mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P3, 3); +} + static void analyze_packet(DisasContext *ctx) { Packet *pkt = ctx->pkt; @@ -348,6 +363,7 @@ static void analyze_packet(DisasContext *ctx) } mark_implicit_reg_writes(ctx); mark_implicit_pred_writes(ctx); + mark_implicit_pred_reads(ctx); } } @@ -361,9 +377,11 @@ static void gen_start_packet(DisasContext *ctx) ctx->next_PC = next_PC; ctx->reg_log_idx = 0; bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS); + bitmap_zero(ctx->regs_read, TOTAL_PER_THREAD_REGS); bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS); ctx->preg_log_idx = 0; bitmap_zero(ctx->pregs_written, NUM_PREGS); + bitmap_zero(ctx->pregs_read, NUM_PREGS); ctx->future_vregs_idx = 0; ctx->tmp_vregs_idx = 0; ctx->vreg_log_idx = 0; @@ -372,6 +390,8 @@ static void gen_start_packet(DisasContext *ctx) bitmap_zero(ctx->vregs_select, NUM_VREGS); bitmap_zero(ctx->predicated_future_vregs, NUM_VREGS); bitmap_zero(ctx->predicated_tmp_vregs, NUM_VREGS); + bitmap_zero(ctx->vregs_read, NUM_VREGS); + bitmap_zero(ctx->qregs_read, NUM_QREGS); ctx->qreg_log_idx = 0; for (i = 0; i < STORES_MAX; i++) { ctx->store_width[i] = 0; diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index 4b9f21c41d..f72228859f 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -38,10 +38,12 @@ typedef struct DisasContext { int reg_log[REG_WRITES_MAX]; int reg_log_idx; DECLARE_BITMAP(regs_written, TOTAL_PER_THREAD_REGS); + DECLARE_BITMAP(regs_read, TOTAL_PER_THREAD_REGS); DECLARE_BITMAP(predicated_regs, TOTAL_PER_THREAD_REGS); int preg_log[PRED_WRITES_MAX]; int preg_log_idx; DECLARE_BITMAP(pregs_written, NUM_PREGS); + DECLARE_BITMAP(pregs_read, NUM_PREGS); uint8_t store_width[STORES_MAX]; bool s1_store_processed; int future_vregs_idx; @@ -55,8 +57,10 @@ typedef struct DisasContext { DECLARE_BITMAP(vregs_select, NUM_VREGS); DECLARE_BITMAP(predicated_future_vregs, NUM_VREGS); DECLARE_BITMAP(predicated_tmp_vregs, NUM_VREGS); + DECLARE_BITMAP(vregs_read, NUM_VREGS); int qreg_log[NUM_QREGS]; int qreg_log_idx; + DECLARE_BITMAP(qregs_read, NUM_QREGS); bool pre_commit; TCGCond branch_cond; target_ulong branch_dest; @@ -73,6 +77,11 @@ static inline void ctx_log_pred_write(DisasContext *ctx, int pnum) } } +static inline void ctx_log_pred_read(DisasContext *ctx, int pnum) +{ + set_bit(pnum, ctx->pregs_read); +} + static inline void ctx_log_reg_write(DisasContext *ctx, int rnum, bool is_predicated) { @@ -99,6 +108,17 @@ static inline void ctx_log_reg_write_pair(DisasContext *ctx, int rnum, ctx_log_reg_write(ctx, rnum + 1, is_predicated); } +static inline void ctx_log_reg_read(DisasContext *ctx, int rnum) +{ + set_bit(rnum, ctx->regs_read); +} + +static inline void ctx_log_reg_read_pair(DisasContext *ctx, int rnum) +{ + ctx_log_reg_read(ctx, rnum); + ctx_log_reg_read(ctx, rnum + 1); +} + intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum, int num, bool alloc_ok); intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum, @@ -139,6 +159,17 @@ static inline void ctx_log_vreg_write_pair(DisasContext *ctx, ctx_log_vreg_write(ctx, rnum ^ 1, type, is_predicated); } +static inline void ctx_log_vreg_read(DisasContext *ctx, int rnum) +{ + set_bit(rnum, ctx->vregs_read); +} + +static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum) +{ + ctx_log_vreg_read(ctx, rnum ^ 0); + ctx_log_vreg_read(ctx, rnum ^ 1); +} + static inline void ctx_log_qreg_write(DisasContext *ctx, int rnum) { @@ -146,6 +177,11 @@ static inline void ctx_log_qreg_write(DisasContext *ctx, ctx->qreg_log_idx++; } +static inline void ctx_log_qreg_read(DisasContext *ctx, int qnum) +{ + set_bit(qnum, ctx->qregs_read); +} + extern TCGv hex_gpr[TOTAL_PER_THREAD_REGS]; extern TCGv hex_pred[NUM_PREGS]; extern TCGv hex_this_PC; From d54c56156f409344f8cf232f1e7ee68defa811b9 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 16:00:02 -0700 Subject: [PATCH 20/44] Hexagon (target/hexagon) Short-circuit packet register writes In certain cases, we can avoid the overhead of writing to hex_new_value and write directly to hex_gpr. We add need_commit field to DisasContext indicating if the end-of-packet commit is needed. If it is not needed, get_result_gpr() and get_result_gpr_pair() can return hex_gpr. We pass the ctx->need_commit to helpers when needed. Finally, we can early-exit from gen_reg_writes during packet commit. There are a few instructions whose semantics write to the result before reading all the inputs. Therefore, the idef-parser generated code is incompatible with short-circuit. We tell idef-parser to skip them. For debugging purposes, we add a cpu property to turn off short-circuit. When the short-circuit property is false, we skip the analysis and force the end-of-packet commit. Here's a simple example of the TCG generated for 0x004000b4: 0x7800c020 { R0 = #0x1 } BEFORE: ---- 004000b4 movi_i32 new_r0,$0x1 mov_i32 r0,new_r0 AFTER: ---- 004000b4 movi_i32 r0,$0x1 This patch reintroduces a use of check_for_attrib, so we remove the G_GNUC_UNUSED added earlier in this series. Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Reviewed-by: Brian Cain Message-Id: <20230427230012.3800327-12-tsimpson@quicinc.com> --- target/hexagon/arch.c | 3 +- target/hexagon/cpu.c | 3 ++ target/hexagon/cpu.h | 1 + target/hexagon/gen_helper_funcs.py | 2 + target/hexagon/gen_helper_protos.py | 10 +++- target/hexagon/gen_idef_parser_funcs.py | 7 +++ target/hexagon/gen_tcg.h | 3 +- target/hexagon/gen_tcg_funcs.py | 5 ++ target/hexagon/genptr.c | 30 ++++------- target/hexagon/genptr.h | 2 + target/hexagon/helper.h | 2 +- target/hexagon/hex_common.py | 3 ++ target/hexagon/macros.h | 13 ++++- target/hexagon/op_helper.c | 5 +- target/hexagon/translate.c | 67 ++++++++++++++++++++++++- target/hexagon/translate.h | 2 + 16 files changed, 128 insertions(+), 30 deletions(-) diff --git a/target/hexagon/arch.c b/target/hexagon/arch.c index da79b41c4d..d053d68487 100644 --- a/target/hexagon/arch.c +++ b/target/hexagon/arch.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -224,6 +224,7 @@ void arch_fpop_start(CPUHexagonState *env) void arch_fpop_end(CPUHexagonState *env) { + const bool pkt_need_commit = true; int flags = get_float_exception_flags(&env->fp_status); if (flags != 0) { SOFTFLOAT_TEST_FLAG(float_flag_inexact, FPINPF, FPINPE); diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index c78fe25c9f..d4dfc382ab 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -54,6 +54,8 @@ static Property hexagon_lldb_compat_property = static Property hexagon_lldb_stack_adjust_property = DEFINE_PROP_UNSIGNED("lldb-stack-adjust", HexagonCPU, lldb_stack_adjust, 0, qdev_prop_uint32, target_ulong); +static Property hexagon_short_circuit_property = + DEFINE_PROP_BOOL("short-circuit", HexagonCPU, short_circuit, true); const char * const hexagon_regnames[TOTAL_PER_THREAD_REGS] = { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", @@ -330,6 +332,7 @@ static void hexagon_cpu_init(Object *obj) cpu_set_cpustate_pointers(cpu); qdev_property_add_static(DEVICE(obj), &hexagon_lldb_compat_property); qdev_property_add_static(DEVICE(obj), &hexagon_lldb_stack_adjust_property); + qdev_property_add_static(DEVICE(obj), &hexagon_short_circuit_property); } #include "hw/core/tcg-cpu-ops.h" diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 4d8981d862..631bfdbe9c 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -150,6 +150,7 @@ struct ArchCPU { bool lldb_compat; target_ulong lldb_stack_adjust; + bool short_circuit; }; #include "cpu_bits.h" diff --git a/target/hexagon/gen_helper_funcs.py b/target/hexagon/gen_helper_funcs.py index c73d792580..e259ea3d03 100755 --- a/target/hexagon/gen_helper_funcs.py +++ b/target/hexagon/gen_helper_funcs.py @@ -287,6 +287,8 @@ def gen_helper_function(f, tag, tagregs, tagimms): if hex_common.need_pkt_has_multi_cof(tag): f.write(", uint32_t pkt_has_multi_cof") + if (hex_common.need_pkt_need_commit(tag)): + f.write(", uint32_t pkt_need_commit") if hex_common.need_PC(tag): if i > 0: diff --git a/target/hexagon/gen_helper_protos.py b/target/hexagon/gen_helper_protos.py index 187cd6e04e..c5ecb85294 100755 --- a/target/hexagon/gen_helper_protos.py +++ b/target/hexagon/gen_helper_protos.py @@ -86,6 +86,8 @@ def gen_helper_prototype(f, tag, tagregs, tagimms): def_helper_size = len(regs) + len(imms) + numscalarreadwrite + 1 if hex_common.need_pkt_has_multi_cof(tag): def_helper_size += 1 + if hex_common.need_pkt_need_commit(tag): + def_helper_size += 1 if hex_common.need_part1(tag): def_helper_size += 1 if hex_common.need_slot(tag): @@ -103,6 +105,8 @@ def gen_helper_prototype(f, tag, tagregs, tagimms): def_helper_size = len(regs) + len(imms) + numscalarreadwrite if hex_common.need_pkt_has_multi_cof(tag): def_helper_size += 1 + if hex_common.need_pkt_need_commit(tag): + def_helper_size += 1 if hex_common.need_part1(tag): def_helper_size += 1 if hex_common.need_slot(tag): @@ -156,10 +160,12 @@ def gen_helper_prototype(f, tag, tagregs, tagimms): for immlett, bits, immshift in imms: f.write(", s32") - ## Add the arguments for the instruction pkt_has_multi_cof, slot and - ## part1 (if needed) + ## Add the arguments for the instruction pkt_has_multi_cof, + ## pkt_needs_commit, PC, next_PC, slot, and part1 (if needed) if hex_common.need_pkt_has_multi_cof(tag): f.write(", i32") + if hex_common.need_pkt_need_commit(tag): + f.write(', i32') if hex_common.need_PC(tag): f.write(", i32") if hex_common.helper_needs_next_PC(tag): diff --git a/target/hexagon/gen_idef_parser_funcs.py b/target/hexagon/gen_idef_parser_funcs.py index dc9e396b52..ad2e5c04d3 100644 --- a/target/hexagon/gen_idef_parser_funcs.py +++ b/target/hexagon/gen_idef_parser_funcs.py @@ -111,6 +111,13 @@ def main(): continue if ( tag.startswith('R6_release_') ): continue + ## Skip instructions that are incompatible with short-circuit + ## packet register writes + if ( tag == 'S2_insert' or + tag == 'S2_insert_rp' or + tag == 'S2_asr_r_svw_trun' or + tag == 'A2_swiz' ): + continue regs = tagregs[tag] imms = tagimms[tag] diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index 099a6cc47f..7e070c35bd 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -592,7 +592,8 @@ #define fGEN_TCG_A5_ACS(SHORTCODE) \ do { \ gen_helper_vacsh_pred(PeV, cpu_env, RxxV, RssV, RttV); \ - gen_helper_vacsh_val(RxxV, cpu_env, RxxV, RssV, RttV); \ + gen_helper_vacsh_val(RxxV, cpu_env, RxxV, RssV, RttV, \ + tcg_constant_tl(ctx->need_commit)); \ } while (0) #define fGEN_TCG_S2_cabacdecbin(SHORTCODE) \ diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py index d9ccbe63f6..0e45d43685 100755 --- a/target/hexagon/gen_tcg_funcs.py +++ b/target/hexagon/gen_tcg_funcs.py @@ -550,6 +550,9 @@ def gen_tcg_func(f, tag, regs, imms): if hex_common.need_pkt_has_multi_cof(tag): f.write(" TCGv pkt_has_multi_cof = ") f.write("tcg_constant_tl(ctx->pkt->pkt_has_multi_cof);\n") + if hex_common.need_pkt_need_commit(tag): + f.write(" TCGv pkt_need_commit = ") + f.write("tcg_constant_tl(ctx->need_commit);\n") if hex_common.need_part1(tag): f.write(" TCGv part1 = tcg_constant_tl(insn->part1);\n") if hex_common.need_slot(tag): @@ -596,6 +599,8 @@ def gen_tcg_func(f, tag, regs, imms): if hex_common.need_pkt_has_multi_cof(tag): f.write(", pkt_has_multi_cof") + if hex_common.need_pkt_need_commit(tag): + f.write(", pkt_need_commit") if hex_common.need_PC(tag): f.write(", PC") if hex_common.helper_needs_next_PC(tag): diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 3c7e0dafaf..9858d7bc35 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -45,7 +45,7 @@ TCGv gen_read_preg(TCGv pred, uint8_t num) #define IMMUTABLE (~0) -static const target_ulong reg_immut_masks[TOTAL_PER_THREAD_REGS] = { +const target_ulong reg_immut_masks[TOTAL_PER_THREAD_REGS] = { [HEX_REG_USR] = 0xc13000c0, [HEX_REG_PC] = IMMUTABLE, [HEX_REG_GP] = 0x3f, @@ -70,14 +70,18 @@ static inline void gen_masked_reg_write(TCGv new_val, TCGv cur_val, static TCGv get_result_gpr(DisasContext *ctx, int rnum) { - return hex_new_value[rnum]; + if (ctx->need_commit) { + return hex_new_value[rnum]; + } else { + return hex_gpr[rnum]; + } } static TCGv_i64 get_result_gpr_pair(DisasContext *ctx, int rnum) { TCGv_i64 result = tcg_temp_new_i64(); - tcg_gen_concat_i32_i64(result, hex_new_value[rnum], - hex_new_value[rnum + 1]); + tcg_gen_concat_i32_i64(result, get_result_gpr(ctx, rnum), + get_result_gpr(ctx, rnum + 1)); return result; } @@ -86,7 +90,7 @@ void gen_log_reg_write(DisasContext *ctx, int rnum, TCGv val) const target_ulong reg_mask = reg_immut_masks[rnum]; gen_masked_reg_write(val, hex_gpr[rnum], reg_mask); - tcg_gen_mov_tl(hex_new_value[rnum], val); + tcg_gen_mov_tl(get_result_gpr(ctx, rnum), val); if (HEX_DEBUG) { /* Do this so HELPER(debug_commit_end) will know */ tcg_gen_movi_tl(hex_reg_written[rnum], 1); @@ -95,27 +99,15 @@ void gen_log_reg_write(DisasContext *ctx, int rnum, TCGv val) static void gen_log_reg_write_pair(DisasContext *ctx, int rnum, TCGv_i64 val) { - const target_ulong reg_mask_low = reg_immut_masks[rnum]; - const target_ulong reg_mask_high = reg_immut_masks[rnum + 1]; TCGv val32 = tcg_temp_new(); /* Low word */ tcg_gen_extrl_i64_i32(val32, val); - gen_masked_reg_write(val32, hex_gpr[rnum], reg_mask_low); - tcg_gen_mov_tl(hex_new_value[rnum], val32); - if (HEX_DEBUG) { - /* Do this so HELPER(debug_commit_end) will know */ - tcg_gen_movi_tl(hex_reg_written[rnum], 1); - } + gen_log_reg_write(ctx, rnum, val32); /* High word */ tcg_gen_extrh_i64_i32(val32, val); - gen_masked_reg_write(val32, hex_gpr[rnum + 1], reg_mask_high); - tcg_gen_mov_tl(hex_new_value[rnum + 1], val32); - if (HEX_DEBUG) { - /* Do this so HELPER(debug_commit_end) will know */ - tcg_gen_movi_tl(hex_reg_written[rnum + 1], 1); - } + gen_log_reg_write(ctx, rnum + 1, val32); } void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val) diff --git a/target/hexagon/genptr.h b/target/hexagon/genptr.h index 75d0fc262d..420867f934 100644 --- a/target/hexagon/genptr.h +++ b/target/hexagon/genptr.h @@ -58,4 +58,6 @@ void gen_set_half(int N, TCGv result, TCGv src); void gen_set_half_i64(int N, TCGv_i64 result, TCGv src); void probe_noshuf_load(TCGv va, int s, int mi); +extern const target_ulong reg_immut_masks[TOTAL_PER_THREAD_REGS]; + #endif diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h index 73849e3d49..4b750d0351 100644 --- a/target/hexagon/helper.h +++ b/target/hexagon/helper.h @@ -29,7 +29,7 @@ DEF_HELPER_FLAGS_4(fcircadd, TCG_CALL_NO_RWG_SE, s32, s32, s32, s32, s32) DEF_HELPER_FLAGS_1(fbrev, TCG_CALL_NO_RWG_SE, i32, i32) DEF_HELPER_3(sfrecipa, i64, env, f32, f32) DEF_HELPER_2(sfinvsqrta, i64, env, f32) -DEF_HELPER_4(vacsh_val, s64, env, s64, s64, s64) +DEF_HELPER_5(vacsh_val, s64, env, s64, s64, s64, i32) DEF_HELPER_FLAGS_4(vacsh_pred, TCG_CALL_NO_RWG_SE, s32, env, s64, s64, s64) DEF_HELPER_FLAGS_2(cabacdecbin_val, TCG_CALL_NO_RWG_SE, s64, s64, s64) DEF_HELPER_FLAGS_2(cabacdecbin_pred, TCG_CALL_NO_RWG_SE, s32, s64, s64) diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py index 232c6e2c20..29c0508f66 100755 --- a/target/hexagon/hex_common.py +++ b/target/hexagon/hex_common.py @@ -276,6 +276,9 @@ def need_pkt_has_multi_cof(tag): return "A_COF" in attribdict[tag] +def need_pkt_need_commit(tag): + return 'A_IMPLICIT_WRITES_USR' in attribdict[tag] + def need_condexec_reg(tag, regs): if "A_CONDEXEC" in attribdict[tag]: for regtype, regid, toss, numregs in regs: diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index 24c78fe80a..54562cccb0 100644 --- a/target/hexagon/macros.h +++ b/target/hexagon/macros.h @@ -44,8 +44,17 @@ reg_field_info[FIELD].offset) #define SET_USR_FIELD(FIELD, VAL) \ - fINSERT_BITS(env->new_value[HEX_REG_USR], reg_field_info[FIELD].width, \ - reg_field_info[FIELD].offset, (VAL)) + do { \ + if (pkt_need_commit) { \ + fINSERT_BITS(env->new_value[HEX_REG_USR], \ + reg_field_info[FIELD].width, \ + reg_field_info[FIELD].offset, (VAL)); \ + } else { \ + fINSERT_BITS(env->gpr[HEX_REG_USR], \ + reg_field_info[FIELD].width, \ + reg_field_info[FIELD].offset, (VAL)); \ + } \ + } while (0) #endif #ifdef QEMU_GENERATE diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 46ccc59106..fc5c30a141 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -220,7 +220,7 @@ void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1) reg_printed = true; } HEX_DEBUG_LOG("\tr%d = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")\n", - i, env->new_value[i], env->new_value[i]); + i, env->gpr[i], env->gpr[i]); } } @@ -352,7 +352,8 @@ uint64_t HELPER(sfinvsqrta)(CPUHexagonState *env, float32 RsV) } int64_t HELPER(vacsh_val)(CPUHexagonState *env, - int64_t RxxV, int64_t RssV, int64_t RttV) + int64_t RxxV, int64_t RssV, int64_t RttV, + uint32_t pkt_need_commit) { for (int i = 0; i < 4; i++) { int xv = sextract64(RxxV, i * 16, 16); diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index e84bd34618..6fa885cf16 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -27,6 +27,7 @@ #include "insn.h" #include "decode.h" #include "translate.h" +#include "genptr.h" #include "printinsn.h" #include "analyze_funcs_generated.c.inc" @@ -239,7 +240,7 @@ static int read_packet_words(CPUHexagonState *env, DisasContext *ctx, return nwords; } -static G_GNUC_UNUSED bool check_for_attrib(Packet *pkt, int attrib) +static bool check_for_attrib(Packet *pkt, int attrib) { for (int i = 0; i < pkt->num_insns; i++) { if (GET_ATTRIB(pkt->insn[i].opcode, attrib)) { @@ -336,6 +337,58 @@ static void mark_implicit_pred_writes(DisasContext *ctx) mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3); } +static bool pkt_raises_exception(Packet *pkt) +{ + if (check_for_attrib(pkt, A_LOAD) || + check_for_attrib(pkt, A_STORE)) { + return true; + } + return false; +} + +static bool need_commit(DisasContext *ctx) +{ + Packet *pkt = ctx->pkt; + + /* + * If the short-circuit property is set to false, we'll always do the commit + */ + if (!ctx->short_circuit) { + return true; + } + + if (pkt_raises_exception(pkt)) { + return true; + } + + /* Registers with immutability flags require new_value */ + for (int i = 0; i < ctx->reg_log_idx; i++) { + int rnum = ctx->reg_log[i]; + if (reg_immut_masks[rnum]) { + return true; + } + } + + /* Floating point instructions are hard-coded to use new_value */ + if (check_for_attrib(pkt, A_FPOP)) { + return true; + } + + if (pkt->num_insns == 1) { + return false; + } + + /* Check for overlap between register reads and writes */ + for (int i = 0; i < ctx->reg_log_idx; i++) { + int rnum = ctx->reg_log[i]; + if (test_bit(rnum, ctx->regs_read)) { + return true; + } + } + + return false; +} + static void mark_implicit_pred_read(DisasContext *ctx, int attrib, int pnum) { if (GET_ATTRIB(ctx->insn->opcode, attrib)) { @@ -365,6 +418,8 @@ static void analyze_packet(DisasContext *ctx) mark_implicit_pred_writes(ctx); mark_implicit_pred_reads(ctx); } + + ctx->need_commit = need_commit(ctx); } static void gen_start_packet(DisasContext *ctx) @@ -434,7 +489,8 @@ static void gen_start_packet(DisasContext *ctx) } /* Preload the predicated registers into hex_new_value[i] */ - if (!bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) { + if (ctx->need_commit && + !bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) { int i = find_first_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS); while (i < TOTAL_PER_THREAD_REGS) { tcg_gen_mov_tl(hex_new_value[i], hex_gpr[i]); @@ -544,6 +600,11 @@ static void gen_reg_writes(DisasContext *ctx) { int i; + /* Early exit if not needed */ + if (!ctx->need_commit) { + return; + } + for (i = 0; i < ctx->reg_log_idx; i++) { int reg_num = ctx->reg_log[i]; @@ -922,6 +983,7 @@ static void hexagon_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) { DisasContext *ctx = container_of(dcbase, DisasContext, base); + HexagonCPU *hex_cpu = env_archcpu(cs->env_ptr); uint32_t hex_flags = dcbase->tb->flags; ctx->mem_idx = MMU_USER_IDX; @@ -930,6 +992,7 @@ static void hexagon_tr_init_disas_context(DisasContextBase *dcbase, ctx->num_hvx_insns = 0; ctx->branch_cond = TCG_COND_NEVER; ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP); + ctx->short_circuit = hex_cpu->short_circuit; } static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu) diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index f72228859f..3f6fd3452c 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -62,10 +62,12 @@ typedef struct DisasContext { int qreg_log_idx; DECLARE_BITMAP(qregs_read, NUM_QREGS); bool pre_commit; + bool need_commit; TCGCond branch_cond; target_ulong branch_dest; bool is_tight_loop; bool need_pkt_has_store_s1; + bool short_circuit; } DisasContext; static inline void ctx_log_pred_write(DisasContext *ctx, int pnum) From 455e169d7cad4499ed9f4647215b9ec71aa706e4 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 16:00:03 -0700 Subject: [PATCH 21/44] Hexagon (target/hexagon) Short-circuit packet predicate writes In certain cases, we can avoid the overhead of writing to hex_new_pred_value and write directly to hex_pred. We consider predicate reads/writes when computing ctx->need_commit. The get_result_pred() function uses this field to decide between hex_new_pred_value and hex_pred. Then, we can early-exit from gen_pred_writes. Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <20230427230012.3800327-13-tsimpson@quicinc.com> --- target/hexagon/genptr.c | 15 ++++++++++++--- target/hexagon/genptr.h | 1 + target/hexagon/translate.c | 14 +++++++++++--- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 9858d7bc35..5025e172cf 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -110,8 +110,18 @@ static void gen_log_reg_write_pair(DisasContext *ctx, int rnum, TCGv_i64 val) gen_log_reg_write(ctx, rnum + 1, val32); } +TCGv get_result_pred(DisasContext *ctx, int pnum) +{ + if (ctx->need_commit) { + return hex_new_pred_value[pnum]; + } else { + return hex_pred[pnum]; + } +} + void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val) { + TCGv pred = get_result_pred(ctx, pnum); TCGv base_val = tcg_temp_new(); tcg_gen_andi_tl(base_val, val, 0xff); @@ -124,10 +134,9 @@ void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val) * straight assignment. Otherwise, do an and. */ if (!test_bit(pnum, ctx->pregs_written)) { - tcg_gen_mov_tl(hex_new_pred_value[pnum], base_val); + tcg_gen_mov_tl(pred, base_val); } else { - tcg_gen_and_tl(hex_new_pred_value[pnum], - hex_new_pred_value[pnum], base_val); + tcg_gen_and_tl(pred, pred, base_val); } if (HEX_DEBUG) { tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << pnum); diff --git a/target/hexagon/genptr.h b/target/hexagon/genptr.h index 420867f934..e11ccc2358 100644 --- a/target/hexagon/genptr.h +++ b/target/hexagon/genptr.h @@ -35,6 +35,7 @@ void gen_store4i(TCGv_env cpu_env, TCGv vaddr, int32_t src, uint32_t slot); void gen_store8i(TCGv_env cpu_env, TCGv vaddr, int64_t src, uint32_t slot); TCGv gen_read_reg(TCGv result, int num); TCGv gen_read_preg(TCGv pred, uint8_t num); +TCGv get_result_pred(DisasContext *ctx, int pnum); void gen_log_reg_write(DisasContext *ctx, int rnum, TCGv val); void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val); void gen_set_usr_field(DisasContext *ctx, int field, TCGv val); diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index 6fa885cf16..bcf64f725a 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -386,6 +386,14 @@ static bool need_commit(DisasContext *ctx) } } + /* Check for overlap between predicate reads and writes */ + for (int i = 0; i < ctx->preg_log_idx; i++) { + int pnum = ctx->preg_log[i]; + if (test_bit(pnum, ctx->pregs_read)) { + return true; + } + } + return false; } @@ -503,7 +511,7 @@ static void gen_start_packet(DisasContext *ctx) * Preload the predicated pred registers into hex_new_pred_value[pred_num] * Only endloop instructions conditionally write to pred registers */ - if (pkt->pkt_has_endloop) { + if (ctx->need_commit && pkt->pkt_has_endloop) { for (int i = 0; i < ctx->preg_log_idx; i++) { int pred_num = ctx->preg_log[i]; tcg_gen_mov_tl(hex_new_pred_value[pred_num], hex_pred[pred_num]); @@ -622,8 +630,8 @@ static void gen_reg_writes(DisasContext *ctx) static void gen_pred_writes(DisasContext *ctx) { - /* Early exit if the log is empty */ - if (!ctx->preg_log_idx) { + /* Early exit if not needed or the log is empty */ + if (!ctx->need_commit || !ctx->preg_log_idx) { return; } From b85529854ee1a67a1169683a4446f2500a9c14c9 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 16:00:04 -0700 Subject: [PATCH 22/44] Hexagon (target/hexagon) Short-circuit packet HVX writes In certain cases, we can avoid the overhead of writing to future_VRegs and write directly to VRegs. We consider HVX reads/writes when computing ctx->need_commit. Then, we can early-exit from gen_commit_hvx. Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <20230427230012.3800327-14-tsimpson@quicinc.com> --- target/hexagon/genptr.c | 6 ++++- target/hexagon/translate.c | 46 +++++++++++++++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 5025e172cf..82a3408eb4 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -1104,7 +1104,11 @@ static void gen_log_vreg_write_pair(DisasContext *ctx, intptr_t srcoff, int num, static intptr_t get_result_qreg(DisasContext *ctx, int qnum) { - return offsetof(CPUHexagonState, future_QRegs[qnum]); + if (ctx->need_commit) { + return offsetof(CPUHexagonState, future_QRegs[qnum]); + } else { + return offsetof(CPUHexagonState, QRegs[qnum]); + } } static void gen_vreg_load(DisasContext *ctx, intptr_t dstoff, TCGv src, diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index bcf64f725a..8e7a4377c8 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -70,6 +70,10 @@ intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum, { intptr_t offset; + if (!ctx->need_commit) { + return offsetof(CPUHexagonState, VRegs[regnum]); + } + /* See if it is already allocated */ for (int i = 0; i < ctx->future_vregs_idx; i++) { if (ctx->future_vregs_num[i] == regnum) { @@ -374,7 +378,7 @@ static bool need_commit(DisasContext *ctx) return true; } - if (pkt->num_insns == 1) { + if (pkt->num_insns == 1 && !pkt->pkt_has_hvx) { return false; } @@ -394,6 +398,40 @@ static bool need_commit(DisasContext *ctx) } } + /* Check for overlap between HVX reads and writes */ + for (int i = 0; i < ctx->vreg_log_idx; i++) { + int vnum = ctx->vreg_log[i]; + if (test_bit(vnum, ctx->vregs_read)) { + return true; + } + } + if (!bitmap_empty(ctx->vregs_updated_tmp, NUM_VREGS)) { + int i = find_first_bit(ctx->vregs_updated_tmp, NUM_VREGS); + while (i < NUM_VREGS) { + if (test_bit(i, ctx->vregs_read)) { + return true; + } + i = find_next_bit(ctx->vregs_updated_tmp, NUM_VREGS, i + 1); + } + } + if (!bitmap_empty(ctx->vregs_select, NUM_VREGS)) { + int i = find_first_bit(ctx->vregs_select, NUM_VREGS); + while (i < NUM_VREGS) { + if (test_bit(i, ctx->vregs_read)) { + return true; + } + i = find_next_bit(ctx->vregs_select, NUM_VREGS, i + 1); + } + } + + /* Check for overlap between HVX predicate reads and writes */ + for (int i = 0; i < ctx->qreg_log_idx; i++) { + int qnum = ctx->qreg_log[i]; + if (test_bit(qnum, ctx->qregs_read)) { + return true; + } + } + return false; } @@ -790,6 +828,12 @@ static void gen_commit_hvx(DisasContext *ctx) { int i; + /* Early exit if not needed */ + if (!ctx->need_commit) { + g_assert(!pkt_has_hvx_store(ctx->pkt)); + return; + } + /* * for (i = 0; i < ctx->vreg_log_idx; i++) { * int rnum = ctx->vreg_log[i]; From d05d5eebc77f607f96e582527e43908a274b2abf Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 16:00:05 -0700 Subject: [PATCH 23/44] Hexagon (target/hexagon) Short-circuit more HVX single instruction packets The generated helpers for HVX use pass-by-reference, so they can't short-circuit when the reads/writes overlap. The instructions with overrides are OK because they use tcg_gen_gvec_*. We add a flag has_hvx_helper to DisasContext and extend gen_analyze_funcs to set the flag when the instruction is an HVX instruction with a generated helper. We add an override for V6_vcombine so that it can be short-circuited along with a test case in tests/tcg/hexagon/hvx_misc.c Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <20230427230012.3800327-15-tsimpson@quicinc.com> --- target/hexagon/gen_analyze_funcs.py | 5 +++++ target/hexagon/gen_tcg_hvx.h | 23 +++++++++++++++++++++++ target/hexagon/translate.c | 17 +++++++++++++++-- target/hexagon/translate.h | 1 + tests/tcg/hexagon/hvx_misc.c | 21 +++++++++++++++++++++ 5 files changed, 65 insertions(+), 2 deletions(-) diff --git a/target/hexagon/gen_analyze_funcs.py b/target/hexagon/gen_analyze_funcs.py index 86aec5ac4b..36da669450 100755 --- a/target/hexagon/gen_analyze_funcs.py +++ b/target/hexagon/gen_analyze_funcs.py @@ -212,6 +212,11 @@ def gen_analyze_func(f, tag, regs, imms): if has_generated_helper and "A_SCALAR_LOAD" in hex_common.attribdict[tag]: f.write(" ctx->need_pkt_has_store_s1 = true;\n") + ## Mark HVX instructions with generated helpers + if (has_generated_helper and + "A_CVI" in hex_common.attribdict[tag]): + f.write(" ctx->has_hvx_helper = true;\n") + f.write("}\n\n") diff --git a/target/hexagon/gen_tcg_hvx.h b/target/hexagon/gen_tcg_hvx.h index 8dceead5e5..44bae53f8d 100644 --- a/target/hexagon/gen_tcg_hvx.h +++ b/target/hexagon/gen_tcg_hvx.h @@ -140,6 +140,29 @@ static inline void assert_vhist_tmp(DisasContext *ctx) sizeof(MMVector), sizeof(MMVector)); \ } while (0) +/* + * Vector combine + * + * Be careful that the source and dest don't overlap + */ +#define fGEN_TCG_V6_vcombine(SHORTCODE) \ + do { \ + if (VddV_off != VuV_off) { \ + tcg_gen_gvec_mov(MO_64, VddV_off, VvV_off, \ + sizeof(MMVector), sizeof(MMVector)); \ + tcg_gen_gvec_mov(MO_64, VddV_off + sizeof(MMVector), VuV_off, \ + sizeof(MMVector), sizeof(MMVector)); \ + } else { \ + intptr_t tmpoff = offsetof(CPUHexagonState, vtmp); \ + tcg_gen_gvec_mov(MO_64, tmpoff, VuV_off, \ + sizeof(MMVector), sizeof(MMVector)); \ + tcg_gen_gvec_mov(MO_64, VddV_off, VvV_off, \ + sizeof(MMVector), sizeof(MMVector)); \ + tcg_gen_gvec_mov(MO_64, VddV_off + sizeof(MMVector), tmpoff, \ + sizeof(MMVector), sizeof(MMVector)); \ + } \ + } while (0) + /* Vector conditional move */ #define fGEN_TCG_VEC_CMOV(PRED) \ do { \ diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index 8e7a4377c8..fe85edc1ec 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -378,8 +378,20 @@ static bool need_commit(DisasContext *ctx) return true; } - if (pkt->num_insns == 1 && !pkt->pkt_has_hvx) { - return false; + if (pkt->num_insns == 1) { + if (pkt->pkt_has_hvx) { + /* + * The HVX instructions with generated helpers use + * pass-by-reference, so they need the read/write overlap + * check below. + * The HVX instructions with overrides are OK. + */ + if (!ctx->has_hvx_helper) { + return false; + } + } else { + return false; + } } /* Check for overlap between register reads and writes */ @@ -454,6 +466,7 @@ static void analyze_packet(DisasContext *ctx) { Packet *pkt = ctx->pkt; ctx->need_pkt_has_store_s1 = false; + ctx->has_hvx_helper = false; for (int i = 0; i < pkt->num_insns; i++) { Insn *insn = &pkt->insn[i]; ctx->insn = insn; diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index 3f6fd3452c..26bcae0395 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -68,6 +68,7 @@ typedef struct DisasContext { bool is_tight_loop; bool need_pkt_has_store_s1; bool short_circuit; + bool has_hvx_helper; } DisasContext; static inline void ctx_log_pred_write(DisasContext *ctx, int pnum) diff --git a/tests/tcg/hexagon/hvx_misc.c b/tests/tcg/hexagon/hvx_misc.c index d0e64e035f..c89fe0253d 100644 --- a/tests/tcg/hexagon/hvx_misc.c +++ b/tests/tcg/hexagon/hvx_misc.c @@ -454,6 +454,25 @@ static void test_load_cur_predicated(void) check_output_w(__LINE__, BUFSIZE); } +static void test_vcombine(void) +{ + for (int i = 0; i < BUFSIZE / 2; i++) { + asm volatile("v2 = vsplat(%0)\n\t" + "v3 = vsplat(%1)\n\t" + "v3:2 = vcombine(v2, v3)\n\t" + "vmem(%2+#0) = v2\n\t" + "vmem(%2+#1) = v3\n\t" + : + : "r"(2 * i), "r"(2 * i + 1), "r"(&output[2 * i]) + : "v2", "v3", "memory"); + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + expect[2 * i].w[j] = 2 * i + 1; + expect[2 * i + 1].w[j] = 2 * i; + } + } + check_output_w(__LINE__, BUFSIZE); +} + int main() { init_buffers(); @@ -494,6 +513,8 @@ int main() test_load_tmp_predicated(); test_load_cur_predicated(); + test_vcombine(); + puts(err ? "FAIL" : "PASS"); return err ? 1 : 0; } From 00e64fda061ba65668a19dd1ea79e2a2f72090da Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 16:00:06 -0700 Subject: [PATCH 24/44] Hexagon (target/hexagon) Add overrides for disabled idef-parser insns The following have overrides S2_insert S2_insert_rp S2_asr_r_svw_trun A2_swiz These instructions have semantics that write to the destination before all the operand reads have been completed. Therefore, the idef-parser versions were disabled with the short-circuit patch. Test cases added to tests/tcg/hexagon/read_write_overlap.c Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <20230427230012.3800327-16-tsimpson@quicinc.com> --- target/hexagon/gen_tcg.h | 18 ++++ target/hexagon/genptr.c | 99 ++++++++++++++++++ tests/tcg/hexagon/Makefile.target | 1 + tests/tcg/hexagon/read_write_overlap.c | 136 +++++++++++++++++++++++++ 4 files changed, 254 insertions(+) create mode 100644 tests/tcg/hexagon/read_write_overlap.c diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index 7e070c35bd..ed2c1ccc46 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -1185,6 +1185,24 @@ tcg_gen_extrl_i64_i32(RdV, tmp); \ } while (0) +#define fGEN_TCG_S2_insert(SHORTCODE) \ + do { \ + int width = uiV; \ + int offset = UiV; \ + if (width != 0) { \ + if (offset + width > 32) { \ + width = 32 - offset; \ + } \ + tcg_gen_deposit_tl(RxV, RxV, RsV, offset, width); \ + } \ + } while (0) +#define fGEN_TCG_S2_insert_rp(SHORTCODE) \ + gen_insert_rp(ctx, RxV, RsV, RttV) +#define fGEN_TCG_S2_asr_r_svw_trun(SHORTCODE) \ + gen_asr_r_svw_trun(ctx, RdV, RssV, RtV) +#define fGEN_TCG_A2_swiz(SHORTCODE) \ + tcg_gen_bswap_tl(RdV, RsV) + /* Floating point */ #define fGEN_TCG_F2_conv_sf2df(SHORTCODE) \ gen_helper_conv_sf2df(RddV, cpu_env, RsV) diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 82a3408eb4..5eb0d58659 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -1065,6 +1065,105 @@ static void gen_asl_r_r_sat(DisasContext *ctx, TCGv RdV, TCGv RsV, TCGv RtV) gen_set_label(done); } +static void gen_insert_rp(DisasContext *ctx, TCGv RxV, TCGv RsV, TCGv_i64 RttV) +{ + /* + * int width = fZXTN(6, 32, (fGETWORD(1, RttV))); + * int offset = fSXTN(7, 32, (fGETWORD(0, RttV))); + * size8u_t mask = ((fCONSTLL(1) << width) - 1); + * if (offset < 0) { + * RxV = 0; + * } else { + * RxV &= ~(mask << offset); + * RxV |= ((RsV & mask) << offset); + * } + */ + + TCGv width = tcg_temp_new(); + TCGv offset = tcg_temp_new(); + TCGv_i64 mask = tcg_temp_new_i64(); + TCGv_i64 result = tcg_temp_new_i64(); + TCGv_i64 tmp = tcg_temp_new_i64(); + TCGv_i64 offset64 = tcg_temp_new_i64(); + TCGLabel *label = gen_new_label(); + TCGLabel *done = gen_new_label(); + + tcg_gen_extrh_i64_i32(width, RttV); + tcg_gen_extract_tl(width, width, 0, 6); + tcg_gen_extrl_i64_i32(offset, RttV); + tcg_gen_sextract_tl(offset, offset, 0, 7); + /* Possible values for offset are -64 .. 63 */ + tcg_gen_brcondi_tl(TCG_COND_GE, offset, 0, label); + /* For negative offsets, zero out the result */ + tcg_gen_movi_tl(RxV, 0); + tcg_gen_br(done); + gen_set_label(label); + /* At this point, possible values of offset are 0 .. 63 */ + tcg_gen_ext_i32_i64(mask, width); + tcg_gen_shl_i64(mask, tcg_constant_i64(1), mask); + tcg_gen_subi_i64(mask, mask, 1); + tcg_gen_extu_i32_i64(result, RxV); + tcg_gen_ext_i32_i64(tmp, offset); + tcg_gen_shl_i64(tmp, mask, tmp); + tcg_gen_andc_i64(result, result, tmp); + tcg_gen_extu_i32_i64(tmp, RsV); + tcg_gen_and_i64(tmp, tmp, mask); + tcg_gen_extu_i32_i64(offset64, offset); + tcg_gen_shl_i64(tmp, tmp, offset64); + tcg_gen_or_i64(result, result, tmp); + tcg_gen_extrl_i64_i32(RxV, result); + gen_set_label(done); +} + +static void gen_asr_r_svw_trun(DisasContext *ctx, TCGv RdV, + TCGv_i64 RssV, TCGv RtV) +{ + /* + * for (int i = 0; i < 2; i++) { + * fSETHALF(i, RdV, fGETHALF(0, ((fSXTN(7, 32, RtV) > 0) ? + * (fCAST4_8s(fGETWORD(i, RssV)) >> fSXTN(7, 32, RtV)) : + * (fCAST4_8s(fGETWORD(i, RssV)) << -fSXTN(7, 32, RtV))))); + * } + */ + TCGv shift_amt32 = tcg_temp_new(); + TCGv_i64 shift_amt64 = tcg_temp_new_i64(); + TCGv_i64 tmp64 = tcg_temp_new_i64(); + TCGv tmp32 = tcg_temp_new(); + TCGLabel *label = gen_new_label(); + TCGLabel *zero = gen_new_label(); + TCGLabel *done = gen_new_label(); + + tcg_gen_sextract_tl(shift_amt32, RtV, 0, 7); + /* Possible values of shift_amt32 are -64 .. 63 */ + tcg_gen_brcondi_tl(TCG_COND_LE, shift_amt32, 0, label); + /* After branch, possible values of shift_amt32 are 1 .. 63 */ + tcg_gen_ext_i32_i64(shift_amt64, shift_amt32); + for (int i = 0; i < 2; i++) { + tcg_gen_sextract_i64(tmp64, RssV, i * 32, 32); + tcg_gen_sar_i64(tmp64, tmp64, shift_amt64); + tcg_gen_extrl_i64_i32(tmp32, tmp64); + tcg_gen_deposit_tl(RdV, RdV, tmp32, i * 16, 16); + } + tcg_gen_br(done); + gen_set_label(label); + tcg_gen_neg_tl(shift_amt32, shift_amt32); + /*At this point, possible values of shift_amt32 are 0 .. 64 */ + tcg_gen_brcondi_tl(TCG_COND_GT, shift_amt32, 63, zero); + /*At this point, possible values of shift_amt32 are 0 .. 63 */ + tcg_gen_ext_i32_i64(shift_amt64, shift_amt32); + for (int i = 0; i < 2; i++) { + tcg_gen_sextract_i64(tmp64, RssV, i * 32, 32); + tcg_gen_shl_i64(tmp64, tmp64, shift_amt64); + tcg_gen_extrl_i64_i32(tmp32, tmp64); + tcg_gen_deposit_tl(RdV, RdV, tmp32, i * 16, 16); + } + tcg_gen_br(done); + gen_set_label(zero); + /* When the shift_amt is 64, zero out the result */ + tcg_gen_movi_tl(RdV, 0); + gen_set_label(done); +} + static intptr_t vreg_src_off(DisasContext *ctx, int num) { intptr_t offset = offsetof(CPUHexagonState, VRegs[num]); diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target index 3172f2e4db..6109a7ed10 100644 --- a/tests/tcg/hexagon/Makefile.target +++ b/tests/tcg/hexagon/Makefile.target @@ -45,6 +45,7 @@ HEX_TESTS += fpstuff HEX_TESTS += overflow HEX_TESTS += signal_context HEX_TESTS += reg_mut +HEX_TESTS += read_write_overlap HEX_TESTS += vector_add_int HEX_TESTS += scatter_gather HEX_TESTS += hvx_misc diff --git a/tests/tcg/hexagon/read_write_overlap.c b/tests/tcg/hexagon/read_write_overlap.c new file mode 100644 index 0000000000..a75fc11dc4 --- /dev/null +++ b/tests/tcg/hexagon/read_write_overlap.c @@ -0,0 +1,136 @@ +/* + * Copyright(c) 2023 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +/* + * Test instructions where the semantics write to the destination + * before all the operand reads have been completed. + * + * These instructions are problematic when we short-circuit the + * register writes because the destination and source operands could + * be the same TCGv. + * + * We test by forcing the read and write to be register r7. + */ + +#include +#include +#include + +int err; + +static void __check(const char *filename, int line, int x, int expect) +{ + if (x != expect) { + printf("ERROR %s:%d - 0x%08x != 0x%08x\n", + filename, line, x, expect); + err++; + } +} + +#define check(x, expect) __check(__FILE__, __LINE__, (x), (expect)) + +#define insert(RES, X, WIDTH, OFFSET) \ + asm("r7 = %1\n\t" \ + "r7 = insert(r7, #" #WIDTH ", #" #OFFSET ")\n\t" \ + "%0 = r7\n\t" \ + : "=r"(RES) : "r"(X) : "r7") + +static void test_insert(void) +{ + uint32_t res; + + insert(res, 0x12345678, 8, 1); + check(res, 0x123456f0); + insert(res, 0x12345678, 0, 1); + check(res, 0x12345678); + insert(res, 0x12345678, 20, 16); + check(res, 0x56785678); +} + +static inline uint32_t insert_rp(uint32_t x, uint32_t width, uint32_t offset) +{ + uint64_t width_offset = (uint64_t)width << 32 | offset; + uint32_t res; + asm("r7 = %1\n\t" + "r7 = insert(r7, %2)\n\t" + "%0 = r7\n\t" + : "=r"(res) : "r"(x), "r"(width_offset) : "r7"); + return res; + +} + +static void test_insert_rp(void) +{ + check(insert_rp(0x12345678, 8, 1), 0x123456f0); + check(insert_rp(0x12345678, 63, 8), 0x34567878); + check(insert_rp(0x12345678, 127, 8), 0x34567878); + check(insert_rp(0x12345678, 8, 24), 0x78345678); + check(insert_rp(0x12345678, 8, 63), 0x12345678); + check(insert_rp(0x12345678, 8, 64), 0x00000000); +} + +static inline uint32_t asr_r_svw_trun(uint64_t x, uint32_t y) +{ + uint32_t res; + asm("r7 = %2\n\t" + "r7 = vasrw(%1, r7)\n\t" + "%0 = r7\n\t" + : "=r"(res) : "r"(x), "r"(y) : "r7"); + return res; +} + +static void test_asr_r_svw_trun(void) +{ + check(asr_r_svw_trun(0x1111111122222222ULL, 5), + 0x88881111); + check(asr_r_svw_trun(0x1111111122222222ULL, 63), + 0x00000000); + check(asr_r_svw_trun(0x1111111122222222ULL, 64), + 0x00000000); + check(asr_r_svw_trun(0x1111111122222222ULL, 127), + 0x22224444); + check(asr_r_svw_trun(0x1111111122222222ULL, 128), + 0x11112222); + check(asr_r_svw_trun(0xffffffff22222222ULL, 128), + 0xffff2222); +} + +static inline uint32_t swiz(uint32_t x) +{ + uint32_t res; + asm("r7 = %1\n\t" + "r7 = swiz(r7)\n\t" + "%0 = r7\n\t" + : "=r"(res) : "r"(x) : "r7"); + return res; +} + +static void test_swiz(void) +{ + check(swiz(0x11223344), 0x44332211); +} + +int main() +{ + test_insert(); + test_insert_rp(); + test_asr_r_svw_trun(); + test_swiz(); + + puts(err ? "FAIL" : "PASS"); + return err ? EXIT_FAILURE : EXIT_SUCCESS; +} From 6aa4f1d15ca8f843d92cf8e431a03d5b1278054c Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 16:00:07 -0700 Subject: [PATCH 25/44] Hexagon (target/hexagon) Make special new_value for USR Precursor to moving new_value from the global state to DisasContext USR will need to stay in the global state because some helpers will set it's value Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <20230427230012.3800327-17-tsimpson@quicinc.com> --- target/hexagon/README | 2 +- target/hexagon/cpu.h | 1 + target/hexagon/gen_tcg_funcs.py | 2 +- target/hexagon/genptr.c | 8 ++++++-- target/hexagon/genptr.h | 1 + target/hexagon/macros.h | 2 +- target/hexagon/translate.c | 22 +++++++++++++++------- target/hexagon/translate.h | 1 + 8 files changed, 27 insertions(+), 12 deletions(-) diff --git a/target/hexagon/README b/target/hexagon/README index f86850ba73..4186f8fe3f 100644 --- a/target/hexagon/README +++ b/target/hexagon/README @@ -186,7 +186,7 @@ We also generate an analyze_ function for each instruction. Currently, these functions record the writes to registers by calling ctx_log_*. During gen_start_packet, we invoke the analyze_ function for each instruction in the packet, and we mark the implicit writes. After the analysis is performed, -we initialize hex_new_value for each of the predicated assignments. +we initialize the result register for each of the predicated assignments. In addition to instruction semantics, we use a generator to create the decode tree. This generation is also a two step process. The first step is to run diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 631bfdbe9c..f86c9f0131 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -90,6 +90,7 @@ typedef struct CPUArchState { uint8_t slot_cancelled; target_ulong new_value[TOTAL_PER_THREAD_REGS]; + target_ulong new_value_usr; /* * Only used when HEX_DEBUG is on, but unconditionally included diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py index 0e45d43685..a36117d57f 100755 --- a/target/hexagon/gen_tcg_funcs.py +++ b/target/hexagon/gen_tcg_funcs.py @@ -190,7 +190,7 @@ def genptr_decl_new(f, tag, regtype, regid, regno): if regid in {"s", "t"}: f.write( f" TCGv {regtype}{regid}N = " - f"hex_new_value[insn->regno[{regno}]];\n" + f"get_result_gpr(ctx, insn->regno[{regno}]);\n" ) else: print("Bad register parse: ", regtype, regid) diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 5eb0d58659..bfcb962a3d 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -68,10 +68,14 @@ static inline void gen_masked_reg_write(TCGv new_val, TCGv cur_val, } } -static TCGv get_result_gpr(DisasContext *ctx, int rnum) +TCGv get_result_gpr(DisasContext *ctx, int rnum) { if (ctx->need_commit) { - return hex_new_value[rnum]; + if (rnum == HEX_REG_USR) { + return hex_new_value_usr; + } else { + return hex_new_value[rnum]; + } } else { return hex_gpr[rnum]; } diff --git a/target/hexagon/genptr.h b/target/hexagon/genptr.h index e11ccc2358..a4b43c2910 100644 --- a/target/hexagon/genptr.h +++ b/target/hexagon/genptr.h @@ -35,6 +35,7 @@ void gen_store4i(TCGv_env cpu_env, TCGv vaddr, int32_t src, uint32_t slot); void gen_store8i(TCGv_env cpu_env, TCGv vaddr, int64_t src, uint32_t slot); TCGv gen_read_reg(TCGv result, int num); TCGv gen_read_preg(TCGv pred, uint8_t num); +TCGv get_result_gpr(DisasContext *ctx, int rnum); TCGv get_result_pred(DisasContext *ctx, int pnum); void gen_log_reg_write(DisasContext *ctx, int rnum, TCGv val); void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val); diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index 54562cccb0..828874f318 100644 --- a/target/hexagon/macros.h +++ b/target/hexagon/macros.h @@ -46,7 +46,7 @@ #define SET_USR_FIELD(FIELD, VAL) \ do { \ if (pkt_need_commit) { \ - fINSERT_BITS(env->new_value[HEX_REG_USR], \ + fINSERT_BITS(env->new_value_usr, \ reg_field_info[FIELD].width, \ reg_field_info[FIELD].offset, (VAL)); \ } else { \ diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index fe85edc1ec..e73c0066dd 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -45,6 +45,7 @@ TCGv hex_this_PC; TCGv hex_slot_cancelled; TCGv hex_branch_taken; TCGv hex_new_value[TOTAL_PER_THREAD_REGS]; +TCGv hex_new_value_usr; TCGv hex_reg_written[TOTAL_PER_THREAD_REGS]; TCGv hex_new_pred_value[NUM_PREGS]; TCGv hex_pred_written; @@ -547,12 +548,12 @@ static void gen_start_packet(DisasContext *ctx) tcg_gen_movi_tl(hex_pred_written, 0); } - /* Preload the predicated registers into hex_new_value[i] */ + /* Preload the predicated registers into get_result_gpr(ctx, i) */ if (ctx->need_commit && !bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) { int i = find_first_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS); while (i < TOTAL_PER_THREAD_REGS) { - tcg_gen_mov_tl(hex_new_value[i], hex_gpr[i]); + tcg_gen_mov_tl(get_result_gpr(ctx, i), hex_gpr[i]); i = find_next_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS, i + 1); } @@ -667,7 +668,7 @@ static void gen_reg_writes(DisasContext *ctx) for (i = 0; i < ctx->reg_log_idx; i++) { int reg_num = ctx->reg_log[i]; - tcg_gen_mov_tl(hex_gpr[reg_num], hex_new_value[reg_num]); + tcg_gen_mov_tl(hex_gpr[reg_num], get_result_gpr(ctx, reg_num)); /* * ctx->is_tight_loop is set when SA0 points to the beginning of the TB. @@ -1180,10 +1181,14 @@ void hexagon_translate_init(void) offsetof(CPUHexagonState, gpr[i]), hexagon_regnames[i]); - snprintf(new_value_names[i], NAME_LEN, "new_%s", hexagon_regnames[i]); - hex_new_value[i] = tcg_global_mem_new(cpu_env, - offsetof(CPUHexagonState, new_value[i]), - new_value_names[i]); + if (i == HEX_REG_USR) { + hex_new_value[i] = NULL; + } else { + snprintf(new_value_names[i], NAME_LEN, "new_%s", hexagon_regnames[i]); + hex_new_value[i] = tcg_global_mem_new(cpu_env, + offsetof(CPUHexagonState, new_value[i]), + new_value_names[i]); + } if (HEX_DEBUG) { snprintf(reg_written_names[i], NAME_LEN, "reg_written_%s", @@ -1193,6 +1198,9 @@ void hexagon_translate_init(void) reg_written_names[i]); } } + hex_new_value_usr = tcg_global_mem_new(cpu_env, + offsetof(CPUHexagonState, new_value_usr), "new_value_usr"); + for (i = 0; i < NUM_PREGS; i++) { hex_pred[i] = tcg_global_mem_new(cpu_env, offsetof(CPUHexagonState, pred[i]), diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index 26bcae0395..4c17433a6f 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -191,6 +191,7 @@ extern TCGv hex_this_PC; extern TCGv hex_slot_cancelled; extern TCGv hex_branch_taken; extern TCGv hex_new_value[TOTAL_PER_THREAD_REGS]; +extern TCGv hex_new_value_usr; extern TCGv hex_reg_written[TOTAL_PER_THREAD_REGS]; extern TCGv hex_new_pred_value[NUM_PREGS]; extern TCGv hex_pred_written; From 4ff5676474cff9afd02c4755d97b0438e764f9d9 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 16:00:08 -0700 Subject: [PATCH 26/44] Hexagon (target/hexagon) Move new_value to DisasContext The new_value array in the CPUHexagonState is only used for bookkeeping within the translation of a packet. With recent changes that eliminate the need to free TCGv variables, these make more sense to be transient and kept in DisasContext. Suggested-by: Richard Henderson Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <20230427230012.3800327-18-tsimpson@quicinc.com> --- target/hexagon/cpu.h | 1 - target/hexagon/genptr.c | 6 +++++- target/hexagon/translate.c | 14 +++----------- target/hexagon/translate.h | 2 +- 4 files changed, 9 insertions(+), 14 deletions(-) diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index f86c9f0131..0ef6d717d0 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -89,7 +89,6 @@ typedef struct CPUArchState { target_ulong stack_start; uint8_t slot_cancelled; - target_ulong new_value[TOTAL_PER_THREAD_REGS]; target_ulong new_value_usr; /* diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index bfcb962a3d..37210e6f09 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -74,7 +74,11 @@ TCGv get_result_gpr(DisasContext *ctx, int rnum) if (rnum == HEX_REG_USR) { return hex_new_value_usr; } else { - return hex_new_value[rnum]; + if (ctx->new_value[rnum] == NULL) { + ctx->new_value[rnum] = tcg_temp_new(); + tcg_gen_movi_tl(ctx->new_value[rnum], 0); + } + return ctx->new_value[rnum]; } } else { return hex_gpr[rnum]; diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index e73c0066dd..bca42797c0 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -44,7 +44,6 @@ TCGv hex_pred[NUM_PREGS]; TCGv hex_this_PC; TCGv hex_slot_cancelled; TCGv hex_branch_taken; -TCGv hex_new_value[TOTAL_PER_THREAD_REGS]; TCGv hex_new_value_usr; TCGv hex_reg_written[TOTAL_PER_THREAD_REGS]; TCGv hex_new_pred_value[NUM_PREGS]; @@ -513,6 +512,9 @@ static void gen_start_packet(DisasContext *ctx) } ctx->s1_store_processed = false; ctx->pre_commit = true; + for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) { + ctx->new_value[i] = NULL; + } analyze_packet(ctx); @@ -1159,7 +1161,6 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, } #define NAME_LEN 64 -static char new_value_names[TOTAL_PER_THREAD_REGS][NAME_LEN]; static char reg_written_names[TOTAL_PER_THREAD_REGS][NAME_LEN]; static char new_pred_value_names[NUM_PREGS][NAME_LEN]; static char store_addr_names[STORES_MAX][NAME_LEN]; @@ -1181,15 +1182,6 @@ void hexagon_translate_init(void) offsetof(CPUHexagonState, gpr[i]), hexagon_regnames[i]); - if (i == HEX_REG_USR) { - hex_new_value[i] = NULL; - } else { - snprintf(new_value_names[i], NAME_LEN, "new_%s", hexagon_regnames[i]); - hex_new_value[i] = tcg_global_mem_new(cpu_env, - offsetof(CPUHexagonState, new_value[i]), - new_value_names[i]); - } - if (HEX_DEBUG) { snprintf(reg_written_names[i], NAME_LEN, "reg_written_%s", hexagon_regnames[i]); diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index 4c17433a6f..6dde487566 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -69,6 +69,7 @@ typedef struct DisasContext { bool need_pkt_has_store_s1; bool short_circuit; bool has_hvx_helper; + TCGv new_value[TOTAL_PER_THREAD_REGS]; } DisasContext; static inline void ctx_log_pred_write(DisasContext *ctx, int pnum) @@ -190,7 +191,6 @@ extern TCGv hex_pred[NUM_PREGS]; extern TCGv hex_this_PC; extern TCGv hex_slot_cancelled; extern TCGv hex_branch_taken; -extern TCGv hex_new_value[TOTAL_PER_THREAD_REGS]; extern TCGv hex_new_value_usr; extern TCGv hex_reg_written[TOTAL_PER_THREAD_REGS]; extern TCGv hex_new_pred_value[NUM_PREGS]; From e22edc7c1df59607dea3cf3d8529097ae38e3ae0 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 16:00:09 -0700 Subject: [PATCH 27/44] Hexagon (target/hexagon) Move new_pred_value to DisasContext The new_pred_value array in the CPUHexagonState is only used for bookkeeping within the translation of a packet. With recent changes that eliminate the need to free TCGv variables, these make more sense to be transient and kept in DisasContext. Suggested-by: Richard Henderson Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <20230427230012.3800327-19-tsimpson@quicinc.com> --- target/hexagon/cpu.h | 1 - target/hexagon/gen_tcg.h | 12 ++++++------ target/hexagon/gen_tcg_funcs.py | 2 +- target/hexagon/genptr.c | 10 +++++++--- target/hexagon/idef-parser/parser-helpers.c | 2 +- target/hexagon/op_helper.c | 2 +- target/hexagon/translate.c | 16 ++++++---------- target/hexagon/translate.h | 2 +- 8 files changed, 23 insertions(+), 24 deletions(-) diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 0ef6d717d0..2b4f77fb8e 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -98,7 +98,6 @@ typedef struct CPUArchState { target_ulong this_PC; target_ulong reg_written[TOTAL_PER_THREAD_REGS]; - target_ulong new_pred_value[NUM_PREGS]; target_ulong pred_written; MemLog mem_log_stores[STORES_MAX]; diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index ed2c1ccc46..d78d99d155 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -581,9 +581,9 @@ #define fGEN_TCG_SL2_return_f(SHORTCODE) \ gen_cond_return_subinsn(ctx, TCG_COND_NE, hex_pred[0]) #define fGEN_TCG_SL2_return_tnew(SHORTCODE) \ - gen_cond_return_subinsn(ctx, TCG_COND_EQ, hex_new_pred_value[0]) + gen_cond_return_subinsn(ctx, TCG_COND_EQ, ctx->new_pred_value[0]) #define fGEN_TCG_SL2_return_fnew(SHORTCODE) \ - gen_cond_return_subinsn(ctx, TCG_COND_NE, hex_new_pred_value[0]) + gen_cond_return_subinsn(ctx, TCG_COND_NE, ctx->new_pred_value[0]) /* * Mathematical operations with more than one definition require @@ -1122,7 +1122,7 @@ #define fGEN_TCG_SA1_clrtnew(SHORTCODE) \ do { \ tcg_gen_movcond_tl(TCG_COND_EQ, RdV, \ - hex_new_pred_value[0], tcg_constant_tl(0), \ + ctx->new_pred_value[0], tcg_constant_tl(0), \ RdV, tcg_constant_tl(0)); \ } while (0) @@ -1130,7 +1130,7 @@ #define fGEN_TCG_SA1_clrfnew(SHORTCODE) \ do { \ tcg_gen_movcond_tl(TCG_COND_NE, RdV, \ - hex_new_pred_value[0], tcg_constant_tl(0), \ + ctx->new_pred_value[0], tcg_constant_tl(0), \ RdV, tcg_constant_tl(0)); \ } while (0) @@ -1157,9 +1157,9 @@ gen_cond_jumpr31(ctx, TCG_COND_NE, hex_pred[0]) #define fGEN_TCG_SL2_jumpr31_tnew(SHORTCODE) \ - gen_cond_jumpr31(ctx, TCG_COND_EQ, hex_new_pred_value[0]) + gen_cond_jumpr31(ctx, TCG_COND_EQ, ctx->new_pred_value[0]) #define fGEN_TCG_SL2_jumpr31_fnew(SHORTCODE) \ - gen_cond_jumpr31(ctx, TCG_COND_NE, hex_new_pred_value[0]) + gen_cond_jumpr31(ctx, TCG_COND_NE, ctx->new_pred_value[0]) /* Count trailing zeros/ones */ #define fGEN_TCG_S2_ct0(SHORTCODE) \ diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py index a36117d57f..0403547387 100755 --- a/target/hexagon/gen_tcg_funcs.py +++ b/target/hexagon/gen_tcg_funcs.py @@ -198,7 +198,7 @@ def genptr_decl_new(f, tag, regtype, regid, regno): if regid in {"t", "u", "v"}: f.write( f" TCGv {regtype}{regid}N = " - f"hex_new_pred_value[insn->regno[{regno}]];\n" + f"ctx->new_pred_value[insn->regno[{regno}]];\n" ) else: print("Bad register parse: ", regtype, regid) diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 37210e6f09..1f69f4f922 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -121,7 +121,11 @@ static void gen_log_reg_write_pair(DisasContext *ctx, int rnum, TCGv_i64 val) TCGv get_result_pred(DisasContext *ctx, int pnum) { if (ctx->need_commit) { - return hex_new_pred_value[pnum]; + if (ctx->new_pred_value[pnum] == NULL) { + ctx->new_pred_value[pnum] = tcg_temp_new(); + tcg_gen_movi_tl(ctx->new_pred_value[pnum], 0); + } + return ctx->new_pred_value[pnum]; } else { return hex_pred[pnum]; } @@ -607,7 +611,7 @@ static void gen_cmpnd_cmp_jmp(DisasContext *ctx, gen_log_pred_write(ctx, pnum, pred); } else { TCGv pred = tcg_temp_new(); - tcg_gen_mov_tl(pred, hex_new_pred_value[pnum]); + tcg_gen_mov_tl(pred, ctx->new_pred_value[pnum]); gen_cond_jump(ctx, cond2, pred, pc_off); } } @@ -664,7 +668,7 @@ static void gen_cmpnd_tstbit0_jmp(DisasContext *ctx, gen_log_pred_write(ctx, pnum, pred); } else { TCGv pred = tcg_temp_new(); - tcg_gen_mov_tl(pred, hex_new_pred_value[pnum]); + tcg_gen_mov_tl(pred, ctx->new_pred_value[pnum]); gen_cond_jump(ctx, cond, pred, pc_off); } } diff --git a/target/hexagon/idef-parser/parser-helpers.c b/target/hexagon/idef-parser/parser-helpers.c index 09161e394d..6626e006f6 100644 --- a/target/hexagon/idef-parser/parser-helpers.c +++ b/target/hexagon/idef-parser/parser-helpers.c @@ -1854,7 +1854,7 @@ HexValue gen_rvalue_pred(Context *c, YYLTYPE *locp, HexValue *pred) *pred = gen_tmp(c, locp, 32, UNSIGNED); if (is_dotnew) { OUT(c, locp, "tcg_gen_mov_i32(", pred, - ", hex_new_pred_value["); + ", ctx->new_pred_value["); OUT(c, locp, pred_str, "]);\n"); } else { OUT(c, locp, "gen_read_preg(", pred, ", ", pred_str, ");\n"); diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index fc5c30a141..26fba9f5d6 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -231,7 +231,7 @@ void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1) pred_printed = true; } HEX_DEBUG_LOG("\tp%d = 0x" TARGET_FMT_lx "\n", - i, env->new_pred_value[i]); + i, env->pred[i]); } } diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index bca42797c0..459aace921 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -46,7 +46,6 @@ TCGv hex_slot_cancelled; TCGv hex_branch_taken; TCGv hex_new_value_usr; TCGv hex_reg_written[TOTAL_PER_THREAD_REGS]; -TCGv hex_new_pred_value[NUM_PREGS]; TCGv hex_pred_written; TCGv hex_store_addr[STORES_MAX]; TCGv hex_store_width[STORES_MAX]; @@ -515,6 +514,9 @@ static void gen_start_packet(DisasContext *ctx) for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) { ctx->new_value[i] = NULL; } + for (i = 0; i < NUM_PREGS; i++) { + ctx->new_pred_value[i] = NULL; + } analyze_packet(ctx); @@ -568,7 +570,8 @@ static void gen_start_packet(DisasContext *ctx) if (ctx->need_commit && pkt->pkt_has_endloop) { for (int i = 0; i < ctx->preg_log_idx; i++) { int pred_num = ctx->preg_log[i]; - tcg_gen_mov_tl(hex_new_pred_value[pred_num], hex_pred[pred_num]); + ctx->new_pred_value[pred_num] = tcg_temp_new(); + tcg_gen_mov_tl(ctx->new_pred_value[pred_num], hex_pred[pred_num]); } } @@ -691,7 +694,7 @@ static void gen_pred_writes(DisasContext *ctx) for (int i = 0; i < ctx->preg_log_idx; i++) { int pred_num = ctx->preg_log[i]; - tcg_gen_mov_tl(hex_pred[pred_num], hex_new_pred_value[pred_num]); + tcg_gen_mov_tl(hex_pred[pred_num], ctx->new_pred_value[pred_num]); } } @@ -1162,7 +1165,6 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, #define NAME_LEN 64 static char reg_written_names[TOTAL_PER_THREAD_REGS][NAME_LEN]; -static char new_pred_value_names[NUM_PREGS][NAME_LEN]; static char store_addr_names[STORES_MAX][NAME_LEN]; static char store_width_names[STORES_MAX][NAME_LEN]; static char store_val32_names[STORES_MAX][NAME_LEN]; @@ -1197,12 +1199,6 @@ void hexagon_translate_init(void) hex_pred[i] = tcg_global_mem_new(cpu_env, offsetof(CPUHexagonState, pred[i]), hexagon_prednames[i]); - - snprintf(new_pred_value_names[i], NAME_LEN, "new_pred_%s", - hexagon_prednames[i]); - hex_new_pred_value[i] = tcg_global_mem_new(cpu_env, - offsetof(CPUHexagonState, new_pred_value[i]), - new_pred_value_names[i]); } hex_pred_written = tcg_global_mem_new(cpu_env, offsetof(CPUHexagonState, pred_written), "pred_written"); diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index 6dde487566..fdfa1b6fe3 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -70,6 +70,7 @@ typedef struct DisasContext { bool short_circuit; bool has_hvx_helper; TCGv new_value[TOTAL_PER_THREAD_REGS]; + TCGv new_pred_value[NUM_PREGS]; } DisasContext; static inline void ctx_log_pred_write(DisasContext *ctx, int pnum) @@ -193,7 +194,6 @@ extern TCGv hex_slot_cancelled; extern TCGv hex_branch_taken; extern TCGv hex_new_value_usr; extern TCGv hex_reg_written[TOTAL_PER_THREAD_REGS]; -extern TCGv hex_new_pred_value[NUM_PREGS]; extern TCGv hex_pred_written; extern TCGv hex_store_addr[STORES_MAX]; extern TCGv hex_store_width[STORES_MAX]; From 842b206f268a69d7def87b990f272a514c49837b Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 16:00:10 -0700 Subject: [PATCH 28/44] Hexagon (target/hexagon) Move pred_written to DisasContext The pred_written variable in the CPUHexagonState is only used for bookkeeping within the translation of a packet. With recent changes that eliminate the need to free TCGv variables, these make more sense to be transient and kept in DisasContext. Suggested-by: Richard Henderson Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <20230427230012.3800327-20-tsimpson@quicinc.com> --- target/hexagon/cpu.h | 2 -- target/hexagon/genptr.c | 2 +- target/hexagon/helper.h | 2 +- target/hexagon/op_helper.c | 5 +++-- target/hexagon/translate.c | 9 ++++----- target/hexagon/translate.h | 2 +- 6 files changed, 10 insertions(+), 12 deletions(-) diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 2b4f77fb8e..7673f9f32d 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -98,8 +98,6 @@ typedef struct CPUArchState { target_ulong this_PC; target_ulong reg_written[TOTAL_PER_THREAD_REGS]; - target_ulong pred_written; - MemLog mem_log_stores[STORES_MAX]; target_ulong pkt_has_store_s1; target_ulong dczero_addr; diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 1f69f4f922..785778759e 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -151,7 +151,7 @@ void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val) tcg_gen_and_tl(pred, pred, base_val); } if (HEX_DEBUG) { - tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << pnum); + tcg_gen_ori_tl(ctx->pred_written, ctx->pred_written, 1 << pnum); } set_bit(pnum, ctx->pregs_written); } diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h index 4b750d0351..f3b298beee 100644 --- a/target/hexagon/helper.h +++ b/target/hexagon/helper.h @@ -21,7 +21,7 @@ DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_RETURN, noreturn, env, i32) DEF_HELPER_1(debug_start_packet, void, env) DEF_HELPER_FLAGS_3(debug_check_store_width, TCG_CALL_NO_WG, void, env, int, int) -DEF_HELPER_FLAGS_3(debug_commit_end, TCG_CALL_NO_WG, void, env, int, int) +DEF_HELPER_FLAGS_4(debug_commit_end, TCG_CALL_NO_WG, void, env, int, int, int) DEF_HELPER_2(commit_store, void, env, int) DEF_HELPER_3(gather_store, void, env, i32, int) DEF_HELPER_1(commit_hvx_stores, void, env) diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 26fba9f5d6..f9021efc7e 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -203,7 +203,8 @@ static void print_store(CPUHexagonState *env, int slot) } /* This function is a handy place to set a breakpoint */ -void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1) +void HELPER(debug_commit_end)(CPUHexagonState *env, + int pred_written, int has_st0, int has_st1) { bool reg_printed = false; bool pred_printed = false; @@ -225,7 +226,7 @@ void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1) } for (i = 0; i < NUM_PREGS; i++) { - if (env->pred_written & (1 << i)) { + if (pred_written & (1 << i)) { if (!pred_printed) { HEX_DEBUG_LOG("Predicates written\n"); pred_printed = true; diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index 459aace921..a585cc8cfd 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -46,7 +46,6 @@ TCGv hex_slot_cancelled; TCGv hex_branch_taken; TCGv hex_new_value_usr; TCGv hex_reg_written[TOTAL_PER_THREAD_REGS]; -TCGv hex_pred_written; TCGv hex_store_addr[STORES_MAX]; TCGv hex_store_width[STORES_MAX]; TCGv hex_store_val32[STORES_MAX]; @@ -549,7 +548,8 @@ static void gen_start_packet(DisasContext *ctx) } } if (HEX_DEBUG) { - tcg_gen_movi_tl(hex_pred_written, 0); + ctx->pred_written = tcg_temp_new(); + tcg_gen_movi_tl(ctx->pred_written, 0); } /* Preload the predicated registers into get_result_gpr(ctx, i) */ @@ -1007,7 +1007,8 @@ static void gen_commit_packet(DisasContext *ctx) tcg_constant_tl(pkt->pkt_has_store_s1 && !pkt->pkt_has_dczeroa); /* Handy place to set a breakpoint at the end of execution */ - gen_helper_debug_commit_end(cpu_env, has_st0, has_st1); + gen_helper_debug_commit_end(cpu_env, ctx->pred_written, + has_st0, has_st1); } if (pkt->vhist_insn != NULL) { @@ -1200,8 +1201,6 @@ void hexagon_translate_init(void) offsetof(CPUHexagonState, pred[i]), hexagon_prednames[i]); } - hex_pred_written = tcg_global_mem_new(cpu_env, - offsetof(CPUHexagonState, pred_written), "pred_written"); hex_this_PC = tcg_global_mem_new(cpu_env, offsetof(CPUHexagonState, this_PC), "this_PC"); hex_slot_cancelled = tcg_global_mem_new(cpu_env, diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index fdfa1b6fe3..a9f1ccee24 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -71,6 +71,7 @@ typedef struct DisasContext { bool has_hvx_helper; TCGv new_value[TOTAL_PER_THREAD_REGS]; TCGv new_pred_value[NUM_PREGS]; + TCGv pred_written; } DisasContext; static inline void ctx_log_pred_write(DisasContext *ctx, int pnum) @@ -194,7 +195,6 @@ extern TCGv hex_slot_cancelled; extern TCGv hex_branch_taken; extern TCGv hex_new_value_usr; extern TCGv hex_reg_written[TOTAL_PER_THREAD_REGS]; -extern TCGv hex_pred_written; extern TCGv hex_store_addr[STORES_MAX]; extern TCGv hex_store_width[STORES_MAX]; extern TCGv hex_store_val32[STORES_MAX]; From e5d0d78db4adc76200d9d8c3f055176911b42c5d Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 16:00:11 -0700 Subject: [PATCH 29/44] Hexagon (target/hexagon) Move pkt_has_store_s1 to DisasContext The pkt_has_store_s1 field is only used for bookkeeping helpers with a load. With recent changes that eliminate the need to free TCGv variables, it makes more sense to make this transient. These helpers already take the instruction slot as an argument. We combine the slot and pkt_has_store_s1 into a single argument called slotval. Suggested-by: Richard Henderson Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <20230427230012.3800327-21-tsimpson@quicinc.com> --- target/hexagon/cpu.h | 1 - target/hexagon/gen_analyze_funcs.py | 2 -- target/hexagon/gen_helper_funcs.py | 7 ++++++- target/hexagon/gen_tcg_funcs.py | 4 ++-- target/hexagon/genptr.c | 8 ++++++++ target/hexagon/hex_common.py | 7 ++++--- target/hexagon/macros.h | 16 ++++++++-------- target/hexagon/op_helper.c | 26 +++++++++++++++----------- target/hexagon/op_helper.h | 12 ++++++++---- target/hexagon/translate.c | 7 ------- target/hexagon/translate.h | 1 - 11 files changed, 51 insertions(+), 40 deletions(-) diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 7673f9f32d..87e457dda9 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -99,7 +99,6 @@ typedef struct CPUArchState { target_ulong reg_written[TOTAL_PER_THREAD_REGS]; MemLog mem_log_stores[STORES_MAX]; - target_ulong pkt_has_store_s1; target_ulong dczero_addr; float_status fp_status; diff --git a/target/hexagon/gen_analyze_funcs.py b/target/hexagon/gen_analyze_funcs.py index 36da669450..d040f67001 100755 --- a/target/hexagon/gen_analyze_funcs.py +++ b/target/hexagon/gen_analyze_funcs.py @@ -209,8 +209,6 @@ def gen_analyze_func(f, tag, regs, imms): has_generated_helper = not hex_common.skip_qemu_helper( tag ) and not hex_common.is_idef_parser_enabled(tag) - if has_generated_helper and "A_SCALAR_LOAD" in hex_common.attribdict[tag]: - f.write(" ctx->need_pkt_has_store_s1 = true;\n") ## Mark HVX instructions with generated helpers if (has_generated_helper and diff --git a/target/hexagon/gen_helper_funcs.py b/target/hexagon/gen_helper_funcs.py index e259ea3d03..39751a483c 100755 --- a/target/hexagon/gen_helper_funcs.py +++ b/target/hexagon/gen_helper_funcs.py @@ -303,7 +303,7 @@ def gen_helper_function(f, tag, tagregs, tagimms): if hex_common.need_slot(tag): if i > 0: f.write(", ") - f.write("uint32_t slot") + f.write("uint32_t slotval") i += 1 if hex_common.need_part1(tag): if i > 0: @@ -331,6 +331,11 @@ def gen_helper_function(f, tag, tagregs, tagimms): else: print("Bad register parse: ", regtype, regid, toss, numregs) + if hex_common.need_slot(tag): + if "A_LOAD" in hex_common.attribdict[tag]: + f.write(" bool pkt_has_store_s1 = slotval & 0x1;\n") + f.write(" uint32_t slot = slotval >> 1;\n") + if "A_FPOP" in hex_common.attribdict[tag]: f.write(" arch_fpop_start(env);\n") diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py index 0403547387..887b1cd369 100755 --- a/target/hexagon/gen_tcg_funcs.py +++ b/target/hexagon/gen_tcg_funcs.py @@ -556,7 +556,7 @@ def gen_tcg_func(f, tag, regs, imms): if hex_common.need_part1(tag): f.write(" TCGv part1 = tcg_constant_tl(insn->part1);\n") if hex_common.need_slot(tag): - f.write(" TCGv slot = tcg_constant_tl(insn->slot);\n") + f.write(" TCGv slotval = gen_slotval(ctx);\n") if hex_common.need_PC(tag): f.write(" TCGv PC = tcg_constant_tl(ctx->pkt->pc);\n") if hex_common.helper_needs_next_PC(tag): @@ -606,7 +606,7 @@ def gen_tcg_func(f, tag, regs, imms): if hex_common.helper_needs_next_PC(tag): f.write(", next_PC") if hex_common.need_slot(tag): - f.write(", slot") + f.write(", slotval") if hex_common.need_part1(tag): f.write(", part1") f.write(");\n") diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 785778759e..361cc789d7 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -398,6 +398,14 @@ static inline void gen_store_conditional8(DisasContext *ctx, tcg_gen_movi_tl(hex_llsc_addr, ~0); } +#ifndef CONFIG_HEXAGON_IDEF_PARSER +static TCGv gen_slotval(DisasContext *ctx) +{ + int slotval = (ctx->pkt->pkt_has_store_s1 & 1) | (ctx->insn->slot << 1); + return tcg_constant_tl(slotval); +} +#endif + void gen_store32(TCGv vaddr, TCGv src, int width, uint32_t slot) { tcg_gen_mov_tl(hex_store_addr[slot], vaddr); diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py index 29c0508f66..011cce1a68 100755 --- a/target/hexagon/hex_common.py +++ b/target/hexagon/hex_common.py @@ -247,9 +247,10 @@ def is_new_val(regtype, regid, tag): def need_slot(tag): if ( - ("A_CONDEXEC" in attribdict[tag] and "A_JUMP" not in attribdict[tag]) - or "A_STORE" in attribdict[tag] - or "A_LOAD" in attribdict[tag] + "A_CVI_SCATTER" not in attribdict[tag] + and "A_CVI_GATHER" not in attribdict[tag] + and ("A_STORE" in attribdict[tag] + or "A_LOAD" in attribdict[tag]) ): return 1 else: diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index 828874f318..5308c0848e 100644 --- a/target/hexagon/macros.h +++ b/target/hexagon/macros.h @@ -173,14 +173,14 @@ #define MEM_STORE8(VA, DATA, SLOT) \ MEM_STORE8_FUNC(DATA)(cpu_env, VA, DATA, SLOT) #else -#define MEM_LOAD1s(VA) ((int8_t)mem_load1(env, slot, VA)) -#define MEM_LOAD1u(VA) ((uint8_t)mem_load1(env, slot, VA)) -#define MEM_LOAD2s(VA) ((int16_t)mem_load2(env, slot, VA)) -#define MEM_LOAD2u(VA) ((uint16_t)mem_load2(env, slot, VA)) -#define MEM_LOAD4s(VA) ((int32_t)mem_load4(env, slot, VA)) -#define MEM_LOAD4u(VA) ((uint32_t)mem_load4(env, slot, VA)) -#define MEM_LOAD8s(VA) ((int64_t)mem_load8(env, slot, VA)) -#define MEM_LOAD8u(VA) ((uint64_t)mem_load8(env, slot, VA)) +#define MEM_LOAD1s(VA) ((int8_t)mem_load1(env, pkt_has_store_s1, slot, VA)) +#define MEM_LOAD1u(VA) ((uint8_t)mem_load1(env, pkt_has_store_s1, slot, VA)) +#define MEM_LOAD2s(VA) ((int16_t)mem_load2(env, pkt_has_store_s1, slot, VA)) +#define MEM_LOAD2u(VA) ((uint16_t)mem_load2(env, pkt_has_store_s1, slot, VA)) +#define MEM_LOAD4s(VA) ((int32_t)mem_load4(env, pkt_has_store_s1, slot, VA)) +#define MEM_LOAD4u(VA) ((uint32_t)mem_load4(env, pkt_has_store_s1, slot, VA)) +#define MEM_LOAD8s(VA) ((int64_t)mem_load8(env, pkt_has_store_s1, slot, VA)) +#define MEM_LOAD8u(VA) ((uint64_t)mem_load8(env, pkt_has_store_s1, slot, VA)) #define MEM_STORE1(VA, DATA, SLOT) log_store32(env, VA, DATA, 1, SLOT) #define MEM_STORE2(VA, DATA, SLOT) log_store32(env, VA, DATA, 2, SLOT) diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index f9021efc7e..dfabce3123 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -567,41 +567,45 @@ void HELPER(probe_pkt_scalar_hvx_stores)(CPUHexagonState *env, int mask) * If the load is in slot 0 and there is a store in slot1 (that * wasn't cancelled), we have to do the store first. */ -static void check_noshuf(CPUHexagonState *env, uint32_t slot, - target_ulong vaddr, int size) +static void check_noshuf(CPUHexagonState *env, bool pkt_has_store_s1, + uint32_t slot, target_ulong vaddr, int size) { - if (slot == 0 && env->pkt_has_store_s1 && + if (slot == 0 && pkt_has_store_s1 && ((env->slot_cancelled & (1 << 1)) == 0)) { HELPER(probe_noshuf_load)(env, vaddr, size, MMU_USER_IDX); HELPER(commit_store)(env, 1); } } -uint8_t mem_load1(CPUHexagonState *env, uint32_t slot, target_ulong vaddr) +uint8_t mem_load1(CPUHexagonState *env, bool pkt_has_store_s1, + uint32_t slot, target_ulong vaddr) { uintptr_t ra = GETPC(); - check_noshuf(env, slot, vaddr, 1); + check_noshuf(env, pkt_has_store_s1, slot, vaddr, 1); return cpu_ldub_data_ra(env, vaddr, ra); } -uint16_t mem_load2(CPUHexagonState *env, uint32_t slot, target_ulong vaddr) +uint16_t mem_load2(CPUHexagonState *env, bool pkt_has_store_s1, + uint32_t slot, target_ulong vaddr) { uintptr_t ra = GETPC(); - check_noshuf(env, slot, vaddr, 2); + check_noshuf(env, pkt_has_store_s1, slot, vaddr, 2); return cpu_lduw_data_ra(env, vaddr, ra); } -uint32_t mem_load4(CPUHexagonState *env, uint32_t slot, target_ulong vaddr) +uint32_t mem_load4(CPUHexagonState *env, bool pkt_has_store_s1, + uint32_t slot, target_ulong vaddr) { uintptr_t ra = GETPC(); - check_noshuf(env, slot, vaddr, 4); + check_noshuf(env, pkt_has_store_s1, slot, vaddr, 4); return cpu_ldl_data_ra(env, vaddr, ra); } -uint64_t mem_load8(CPUHexagonState *env, uint32_t slot, target_ulong vaddr) +uint64_t mem_load8(CPUHexagonState *env, bool pkt_has_store_s1, + uint32_t slot, target_ulong vaddr) { uintptr_t ra = GETPC(); - check_noshuf(env, slot, vaddr, 8); + check_noshuf(env, pkt_has_store_s1, slot, vaddr, 8); return cpu_ldq_data_ra(env, vaddr, ra); } diff --git a/target/hexagon/op_helper.h b/target/hexagon/op_helper.h index 6bd4b07849..8f3764d15e 100644 --- a/target/hexagon/op_helper.h +++ b/target/hexagon/op_helper.h @@ -19,10 +19,14 @@ #define HEXAGON_OP_HELPER_H /* Misc functions */ -uint8_t mem_load1(CPUHexagonState *env, uint32_t slot, target_ulong vaddr); -uint16_t mem_load2(CPUHexagonState *env, uint32_t slot, target_ulong vaddr); -uint32_t mem_load4(CPUHexagonState *env, uint32_t slot, target_ulong vaddr); -uint64_t mem_load8(CPUHexagonState *env, uint32_t slot, target_ulong vaddr); +uint8_t mem_load1(CPUHexagonState *env, bool pkt_has_store_s1, + uint32_t slot, target_ulong vaddr); +uint16_t mem_load2(CPUHexagonState *env, bool pkt_has_store_s1, + uint32_t slot, target_ulong vaddr); +uint32_t mem_load4(CPUHexagonState *env, bool pkt_has_store_s1, + uint32_t slot, target_ulong vaddr); +uint64_t mem_load8(CPUHexagonState *env, bool pkt_has_store_s1, + uint32_t slot, target_ulong vaddr); void log_store64(CPUHexagonState *env, target_ulong addr, int64_t val, int width, int slot); diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index a585cc8cfd..ec5abd2fd0 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -463,7 +463,6 @@ static void mark_implicit_pred_reads(DisasContext *ctx) static void analyze_packet(DisasContext *ctx) { Packet *pkt = ctx->pkt; - ctx->need_pkt_has_store_s1 = false; ctx->has_hvx_helper = false; for (int i = 0; i < pkt->num_insns; i++) { Insn *insn = &pkt->insn[i]; @@ -519,10 +518,6 @@ static void gen_start_packet(DisasContext *ctx) analyze_packet(ctx); - if (ctx->need_pkt_has_store_s1) { - tcg_gen_movi_tl(hex_pkt_has_store_s1, pkt->pkt_has_store_s1); - } - /* * pregs_written is used both in the analyze phase as well as the code * gen phase, so clear it again. @@ -1207,8 +1202,6 @@ void hexagon_translate_init(void) offsetof(CPUHexagonState, slot_cancelled), "slot_cancelled"); hex_branch_taken = tcg_global_mem_new(cpu_env, offsetof(CPUHexagonState, branch_taken), "branch_taken"); - hex_pkt_has_store_s1 = tcg_global_mem_new(cpu_env, - offsetof(CPUHexagonState, pkt_has_store_s1), "pkt_has_store_s1"); hex_dczero_addr = tcg_global_mem_new(cpu_env, offsetof(CPUHexagonState, dczero_addr), "dczero_addr"); hex_llsc_addr = tcg_global_mem_new(cpu_env, diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index a9f1ccee24..9697b4de0e 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -66,7 +66,6 @@ typedef struct DisasContext { TCGCond branch_cond; target_ulong branch_dest; bool is_tight_loop; - bool need_pkt_has_store_s1; bool short_circuit; bool has_hvx_helper; TCGv new_value[TOTAL_PER_THREAD_REGS]; From 0fc56c437566f15e3fe54b568951eecb3cd68bf3 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 27 Apr 2023 16:00:12 -0700 Subject: [PATCH 30/44] Hexagon (target/hexagon) Move items to DisasContext The following items in the CPUHexagonState are only used for bookkeeping within the translation of a packet. With recent changes that eliminate the need to free TCGv variables, these make more sense to be transient and kept in DisasContext. The following items are moved dczero_addr branch_taken this_PC Suggested-by: Richard Henderson Signed-off-by: Taylor Simpson Reviewed-by: Richard Henderson Message-Id: <20230427230012.3800327-22-tsimpson@quicinc.com> --- target/hexagon/README | 2 +- target/hexagon/cpu.h | 3 --- target/hexagon/genptr.c | 6 +++--- target/hexagon/helper.h | 2 +- target/hexagon/macros.h | 6 +++++- target/hexagon/op_helper.c | 5 ++--- target/hexagon/translate.c | 23 +++++++---------------- target/hexagon/translate.h | 5 ++--- 8 files changed, 21 insertions(+), 31 deletions(-) diff --git a/target/hexagon/README b/target/hexagon/README index 4186f8fe3f..43811178e9 100644 --- a/target/hexagon/README +++ b/target/hexagon/README @@ -304,4 +304,4 @@ Here are some handy places to set breakpoints At the start of execution of a packet for a given PC br helper_debug_start_packet if env->gpr[41] == 0xdeadbeef At the end of execution of a packet for a given PC - br helper_debug_commit_end if env->this_PC == 0xdeadbeef + br helper_debug_commit_end if this_PC == 0xdeadbeef diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 87e457dda9..d095dc6647 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -82,7 +82,6 @@ typedef struct { typedef struct CPUArchState { target_ulong gpr[TOTAL_PER_THREAD_REGS]; target_ulong pred[NUM_PREGS]; - target_ulong branch_taken; /* For comparing with LLDB on target - see adjust_stack_ptrs function */ target_ulong last_pc_dumped; @@ -95,11 +94,9 @@ typedef struct CPUArchState { * Only used when HEX_DEBUG is on, but unconditionally included * to reduce recompile time when turning HEX_DEBUG on/off. */ - target_ulong this_PC; target_ulong reg_written[TOTAL_PER_THREAD_REGS]; MemLog mem_log_stores[STORES_MAX]; - target_ulong dczero_addr; float_status fp_status; diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 361cc789d7..cb2aa28a19 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -480,9 +480,9 @@ static void gen_write_new_pc_addr(DisasContext *ctx, TCGv addr, if (ctx->pkt->pkt_has_multi_cof) { /* If there are multiple branches in a packet, ignore the second one */ tcg_gen_movcond_tl(TCG_COND_NE, hex_gpr[HEX_REG_PC], - hex_branch_taken, tcg_constant_tl(0), + ctx->branch_taken, tcg_constant_tl(0), hex_gpr[HEX_REG_PC], addr); - tcg_gen_movi_tl(hex_branch_taken, 1); + tcg_gen_movi_tl(ctx->branch_taken, 1); } else { tcg_gen_mov_tl(hex_gpr[HEX_REG_PC], addr); } @@ -503,7 +503,7 @@ static void gen_write_new_pc_pcrel(DisasContext *ctx, int pc_off, ctx->branch_cond = TCG_COND_ALWAYS; if (pred != NULL) { ctx->branch_cond = cond; - tcg_gen_mov_tl(hex_branch_taken, pred); + tcg_gen_mov_tl(ctx->branch_taken, pred); } ctx->branch_dest = dest; } diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h index f3b298beee..fa0ebaf7c8 100644 --- a/target/hexagon/helper.h +++ b/target/hexagon/helper.h @@ -21,7 +21,7 @@ DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_RETURN, noreturn, env, i32) DEF_HELPER_1(debug_start_packet, void, env) DEF_HELPER_FLAGS_3(debug_check_store_width, TCG_CALL_NO_WG, void, env, int, int) -DEF_HELPER_FLAGS_4(debug_commit_end, TCG_CALL_NO_WG, void, env, int, int, int) +DEF_HELPER_FLAGS_5(debug_commit_end, TCG_CALL_NO_WG, void, env, i32, int, int, int) DEF_HELPER_2(commit_store, void, env, int) DEF_HELPER_3(gather_store, void, env, i32, int) DEF_HELPER_1(commit_hvx_stores, void, env) diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index 5308c0848e..5451b061ee 100644 --- a/target/hexagon/macros.h +++ b/target/hexagon/macros.h @@ -648,7 +648,11 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) reg_field_info[FIELD].offset) #ifdef QEMU_GENERATE -#define fDCZEROA(REG) tcg_gen_mov_tl(hex_dczero_addr, (REG)) +#define fDCZEROA(REG) \ + do { \ + ctx->dczero_addr = tcg_temp_new(); \ + tcg_gen_mov_tl(ctx->dczero_addr, (REG)); \ + } while (0) #endif #define fBRANCH_SPECULATE_STALL(DOTNEWVAL, JUMP_COND, SPEC_DIR, HINTBITNUM, \ diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index dfabce3123..12967ac21e 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -203,15 +203,14 @@ static void print_store(CPUHexagonState *env, int slot) } /* This function is a handy place to set a breakpoint */ -void HELPER(debug_commit_end)(CPUHexagonState *env, +void HELPER(debug_commit_end)(CPUHexagonState *env, uint32_t this_PC, int pred_written, int has_st0, int has_st1) { bool reg_printed = false; bool pred_printed = false; int i; - HEX_DEBUG_LOG("Packet committed: pc = 0x" TARGET_FMT_lx "\n", - env->this_PC); + HEX_DEBUG_LOG("Packet committed: pc = 0x" TARGET_FMT_lx "\n", this_PC); HEX_DEBUG_LOG("slot_cancelled = %d\n", env->slot_cancelled); for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) { diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index ec5abd2fd0..b18f1a9051 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -41,17 +41,13 @@ static const AnalyzeInsn opcode_analyze[XX_LAST_OPCODE] = { TCGv hex_gpr[TOTAL_PER_THREAD_REGS]; TCGv hex_pred[NUM_PREGS]; -TCGv hex_this_PC; TCGv hex_slot_cancelled; -TCGv hex_branch_taken; TCGv hex_new_value_usr; TCGv hex_reg_written[TOTAL_PER_THREAD_REGS]; TCGv hex_store_addr[STORES_MAX]; TCGv hex_store_width[STORES_MAX]; TCGv hex_store_val32[STORES_MAX]; TCGv_i64 hex_store_val64[STORES_MAX]; -TCGv hex_pkt_has_store_s1; -TCGv hex_dczero_addr; TCGv hex_llsc_addr; TCGv hex_llsc_val; TCGv_i64 hex_llsc_val_i64; @@ -157,7 +153,7 @@ static void gen_end_tb(DisasContext *ctx) if (ctx->branch_cond != TCG_COND_NEVER) { if (ctx->branch_cond != TCG_COND_ALWAYS) { TCGLabel *skip = gen_new_label(); - tcg_gen_brcondi_tl(ctx->branch_cond, hex_branch_taken, 0, skip); + tcg_gen_brcondi_tl(ctx->branch_cond, ctx->branch_taken, 0, skip); gen_goto_tb(ctx, 0, ctx->branch_dest, true); gen_set_label(skip); gen_goto_tb(ctx, 1, ctx->next_PC, false); @@ -527,16 +523,17 @@ static void gen_start_packet(DisasContext *ctx) if (HEX_DEBUG) { /* Handy place to set a breakpoint before the packet executes */ gen_helper_debug_start_packet(cpu_env); - tcg_gen_movi_tl(hex_this_PC, ctx->base.pc_next); } /* Initialize the runtime state for packet semantics */ if (need_slot_cancelled(pkt)) { tcg_gen_movi_tl(hex_slot_cancelled, 0); } + ctx->branch_taken = NULL; if (pkt->pkt_has_cof) { + ctx->branch_taken = tcg_temp_new(); if (pkt->pkt_has_multi_cof) { - tcg_gen_movi_tl(hex_branch_taken, 0); + tcg_gen_movi_tl(ctx->branch_taken, 0); } if (need_next_PC(ctx)) { tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], next_PC); @@ -815,7 +812,7 @@ static void process_dczeroa(DisasContext *ctx) TCGv addr = tcg_temp_new(); TCGv_i64 zero = tcg_constant_i64(0); - tcg_gen_andi_tl(addr, hex_dczero_addr, ~0x1f); + tcg_gen_andi_tl(addr, ctx->dczero_addr, ~0x1f); tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ); tcg_gen_addi_tl(addr, addr, 8); tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ); @@ -1002,8 +999,8 @@ static void gen_commit_packet(DisasContext *ctx) tcg_constant_tl(pkt->pkt_has_store_s1 && !pkt->pkt_has_dczeroa); /* Handy place to set a breakpoint at the end of execution */ - gen_helper_debug_commit_end(cpu_env, ctx->pred_written, - has_st0, has_st1); + gen_helper_debug_commit_end(cpu_env, tcg_constant_tl(ctx->pkt->pc), + ctx->pred_written, has_st0, has_st1); } if (pkt->vhist_insn != NULL) { @@ -1196,14 +1193,8 @@ void hexagon_translate_init(void) offsetof(CPUHexagonState, pred[i]), hexagon_prednames[i]); } - hex_this_PC = tcg_global_mem_new(cpu_env, - offsetof(CPUHexagonState, this_PC), "this_PC"); hex_slot_cancelled = tcg_global_mem_new(cpu_env, offsetof(CPUHexagonState, slot_cancelled), "slot_cancelled"); - hex_branch_taken = tcg_global_mem_new(cpu_env, - offsetof(CPUHexagonState, branch_taken), "branch_taken"); - hex_dczero_addr = tcg_global_mem_new(cpu_env, - offsetof(CPUHexagonState, dczero_addr), "dczero_addr"); hex_llsc_addr = tcg_global_mem_new(cpu_env, offsetof(CPUHexagonState, llsc_addr), "llsc_addr"); hex_llsc_val = tcg_global_mem_new(cpu_env, diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index 9697b4de0e..4dd59c6726 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -71,6 +71,8 @@ typedef struct DisasContext { TCGv new_value[TOTAL_PER_THREAD_REGS]; TCGv new_pred_value[NUM_PREGS]; TCGv pred_written; + TCGv branch_taken; + TCGv dczero_addr; } DisasContext; static inline void ctx_log_pred_write(DisasContext *ctx, int pnum) @@ -189,16 +191,13 @@ static inline void ctx_log_qreg_read(DisasContext *ctx, int qnum) extern TCGv hex_gpr[TOTAL_PER_THREAD_REGS]; extern TCGv hex_pred[NUM_PREGS]; -extern TCGv hex_this_PC; extern TCGv hex_slot_cancelled; -extern TCGv hex_branch_taken; extern TCGv hex_new_value_usr; extern TCGv hex_reg_written[TOTAL_PER_THREAD_REGS]; extern TCGv hex_store_addr[STORES_MAX]; extern TCGv hex_store_width[STORES_MAX]; extern TCGv hex_store_val32[STORES_MAX]; extern TCGv_i64 hex_store_val64[STORES_MAX]; -extern TCGv hex_dczero_addr; extern TCGv hex_llsc_addr; extern TCGv hex_llsc_val; extern TCGv_i64 hex_llsc_val_i64; From 163e5fa38e47281f8e83946794f6c202749bff32 Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Wed, 26 Apr 2023 10:32:32 -0700 Subject: [PATCH 31/44] Hexagon (target/hexagon) Additional instructions handled by idef-parser **** Changes in v3 **** Fix bugs exposed by dpmpyss_rnd_s0 instruction Set correct size/signedness for constants Test cases added to tests/tcg/hexagon/misc.c **** Changes in v2 **** Fix bug in imm_print identified in clang build Currently, idef-parser skips all floating point instructions. However, there are some floating point instructions that can be handled. The following instructions are now parsed F2_sfimm_p F2_sfimm_n F2_dfimm_p F2_dfimm_n F2_dfmpyll F2_dfmpylh To make these instructions work, we fix some bugs in parser-helpers.c gen_rvalue_extend gen_cast_op imm_print lexer properly sets size/signedness of constants Test cases added to tests/tcg/hexagon/fpstuff.c Signed-off-by: Taylor Simpson Tested-by: Anton Johansson Reviewed-by: Anton Johansson Message-Id: <20230501203125.4025991-1-tsimpson@quicinc.com> --- target/hexagon/gen_idef_parser_funcs.py | 10 +++- target/hexagon/idef-parser/idef-parser.lex | 37 +++++++++++-- target/hexagon/idef-parser/idef-parser.y | 2 - target/hexagon/idef-parser/parser-helpers.c | 61 ++++++++++----------- target/hexagon/idef-parser/parser-helpers.h | 2 +- tests/tcg/hexagon/fpstuff.c | 54 ++++++++++++++++++ tests/tcg/hexagon/misc.c | 35 ++++++++++++ 7 files changed, 160 insertions(+), 41 deletions(-) diff --git a/target/hexagon/gen_idef_parser_funcs.py b/target/hexagon/gen_idef_parser_funcs.py index ad2e5c04d3..639458b462 100644 --- a/target/hexagon/gen_idef_parser_funcs.py +++ b/target/hexagon/gen_idef_parser_funcs.py @@ -103,7 +103,15 @@ def main(): continue if tag.startswith("V6_"): continue - if tag.startswith("F"): + if ( tag.startswith("F") and + tag not in { + "F2_sfimm_p", + "F2_sfimm_n", + "F2_dfimm_p", + "F2_dfimm_n", + "F2_dfmpyll", + "F2_dfmpylh" + }): continue if tag.endswith("_locked"): continue diff --git a/target/hexagon/idef-parser/idef-parser.lex b/target/hexagon/idef-parser/idef-parser.lex index 5eb8ac5a80..cd5958ec90 100644 --- a/target/hexagon/idef-parser/idef-parser.lex +++ b/target/hexagon/idef-parser/idef-parser.lex @@ -401,12 +401,39 @@ STRING_LIT \"(\\.|[^"\\])*\" } return SIGN; } -"0x"{HEX_DIGIT}+ | -{DIGIT}+ { yylval->rvalue.type = IMMEDIATE; - yylval->rvalue.bit_width = 32; - yylval->rvalue.signedness = SIGNED; +"0x"{HEX_DIGIT}+ { uint64_t value = strtoull(yytext, NULL, 0); + yylval->rvalue.type = IMMEDIATE; yylval->rvalue.imm.type = VALUE; - yylval->rvalue.imm.value = strtoull(yytext, NULL, 0); + yylval->rvalue.imm.value = value; + if (value <= INT_MAX) { + yylval->rvalue.bit_width = sizeof(int) * 8; + yylval->rvalue.signedness = SIGNED; + } else if (value <= UINT_MAX) { + yylval->rvalue.bit_width = sizeof(unsigned int) * 8; + yylval->rvalue.signedness = UNSIGNED; + } else if (value <= LONG_MAX) { + yylval->rvalue.bit_width = sizeof(long) * 8; + yylval->rvalue.signedness = SIGNED; + } else if (value <= ULONG_MAX) { + yylval->rvalue.bit_width = sizeof(unsigned long) * 8; + yylval->rvalue.signedness = UNSIGNED; + } else { + g_assert_not_reached(); + } + return IMM; } +{DIGIT}+ { int64_t value = strtoll(yytext, NULL, 0); + yylval->rvalue.type = IMMEDIATE; + yylval->rvalue.imm.type = VALUE; + yylval->rvalue.imm.value = value; + if (value >= INT_MIN && value <= INT_MAX) { + yylval->rvalue.bit_width = sizeof(int) * 8; + yylval->rvalue.signedness = SIGNED; + } else if (value >= LONG_MIN && value <= LONG_MAX) { + yylval->rvalue.bit_width = sizeof(long) * 8; + yylval->rvalue.signedness = SIGNED; + } else { + g_assert_not_reached(); + } return IMM; } "0x"{HEX_DIGIT}+"ULL" | {DIGIT}+"ULL" { yylval->rvalue.type = IMMEDIATE; diff --git a/target/hexagon/idef-parser/idef-parser.y b/target/hexagon/idef-parser/idef-parser.y index 5444fd4749..5f3907eb28 100644 --- a/target/hexagon/idef-parser/idef-parser.y +++ b/target/hexagon/idef-parser/idef-parser.y @@ -594,8 +594,6 @@ rvalue : FAIL | CAST rvalue { @1.last_column = @2.last_column; - /* Assign target signedness */ - $2.signedness = $1.signedness; $$ = gen_cast_op(c, &@1, &$2, $1.bit_width, $1.signedness); } | rvalue EQ rvalue diff --git a/target/hexagon/idef-parser/parser-helpers.c b/target/hexagon/idef-parser/parser-helpers.c index 6626e006f6..9550097269 100644 --- a/target/hexagon/idef-parser/parser-helpers.c +++ b/target/hexagon/idef-parser/parser-helpers.c @@ -167,8 +167,9 @@ void reg_print(Context *c, YYLTYPE *locp, HexReg *reg) EMIT(c, "hex_gpr[%u]", reg->id); } -void imm_print(Context *c, YYLTYPE *locp, HexImm *imm) +void imm_print(Context *c, YYLTYPE *locp, HexValue *rvalue) { + HexImm *imm = &rvalue->imm; switch (imm->type) { case I: EMIT(c, "i"); @@ -177,7 +178,21 @@ void imm_print(Context *c, YYLTYPE *locp, HexImm *imm) EMIT(c, "%ciV", imm->id); break; case VALUE: - EMIT(c, "((int64_t) %" PRIu64 "ULL)", (int64_t) imm->value); + if (rvalue->bit_width == 32) { + if (rvalue->signedness == UNSIGNED) { + EMIT(c, "((uint32_t) 0x%" PRIx32 ")", (uint32_t) imm->value); + } else { + EMIT(c, "((int32_t) 0x%" PRIx32 ")", (int32_t) imm->value); + } + } else if (rvalue->bit_width == 64) { + if (rvalue->signedness == UNSIGNED) { + EMIT(c, "((uint64_t) 0x%" PRIx64 "ULL)", (uint64_t) imm->value); + } else { + EMIT(c, "((int64_t) 0x%" PRIx64 "LL)", (int64_t) imm->value); + } + } else { + g_assert_not_reached(); + } break; case QEMU_TMP: EMIT(c, "qemu_tmp_%" PRIu64, imm->index); @@ -213,7 +228,7 @@ void rvalue_print(Context *c, YYLTYPE *locp, void *pointer) tmp_print(c, locp, &rvalue->tmp); break; case IMMEDIATE: - imm_print(c, locp, &rvalue->imm); + imm_print(c, locp, rvalue); break; case VARID: var_print(c, locp, &rvalue->var); @@ -386,13 +401,10 @@ HexValue gen_rvalue_extend(Context *c, YYLTYPE *locp, HexValue *rvalue) if (rvalue->type == IMMEDIATE) { HexValue res = gen_imm_qemu_tmp(c, locp, 64, rvalue->signedness); - bool is_unsigned = (rvalue->signedness == UNSIGNED); - const char *sign_suffix = is_unsigned ? "u" : ""; gen_c_int_type(c, locp, 64, rvalue->signedness); - OUT(c, locp, " ", &res, " = "); - OUT(c, locp, "(", sign_suffix, "int64_t) "); - OUT(c, locp, "(", sign_suffix, "int32_t) "); - OUT(c, locp, rvalue, ";\n"); + OUT(c, locp, " ", &res, " = ("); + gen_c_int_type(c, locp, 64, rvalue->signedness); + OUT(c, locp, ")", rvalue, ";\n"); return res; } else { HexValue res = gen_tmp(c, locp, 64, rvalue->signedness); @@ -959,33 +971,18 @@ HexValue gen_cast_op(Context *c, unsigned target_width, HexSignedness signedness) { + HexValue res; assert_signedness(c, locp, src->signedness); if (src->bit_width == target_width) { - return *src; - } else if (src->type == IMMEDIATE) { - HexValue res = *src; - res.bit_width = target_width; - res.signedness = signedness; - return res; + res = *src; + } else if (src->bit_width < target_width) { + res = gen_rvalue_extend(c, locp, src); } else { - HexValue res = gen_tmp(c, locp, target_width, signedness); - /* Truncate */ - if (src->bit_width > target_width) { - OUT(c, locp, "tcg_gen_trunc_i64_tl(", &res, ", ", src, ");\n"); - } else { - assert_signedness(c, locp, src->signedness); - if (src->signedness == UNSIGNED) { - /* Extend unsigned */ - OUT(c, locp, "tcg_gen_extu_i32_i64(", - &res, ", ", src, ");\n"); - } else { - /* Extend signed */ - OUT(c, locp, "tcg_gen_ext_i32_i64(", - &res, ", ", src, ");\n"); - } - } - return res; + /* src->bit_width > target_width */ + res = gen_rvalue_truncate(c, locp, src); } + res.signedness = signedness; + return res; } diff --git a/target/hexagon/idef-parser/parser-helpers.h b/target/hexagon/idef-parser/parser-helpers.h index 1239d23a6a..7c58087169 100644 --- a/target/hexagon/idef-parser/parser-helpers.h +++ b/target/hexagon/idef-parser/parser-helpers.h @@ -80,7 +80,7 @@ void reg_compose(Context *c, YYLTYPE *locp, HexReg *reg, char reg_id[5]); void reg_print(Context *c, YYLTYPE *locp, HexReg *reg); -void imm_print(Context *c, YYLTYPE *locp, HexImm *imm); +void imm_print(Context *c, YYLTYPE *locp, HexValue *rvalue); void var_print(Context *c, YYLTYPE *locp, HexVar *var); diff --git a/tests/tcg/hexagon/fpstuff.c b/tests/tcg/hexagon/fpstuff.c index 90ce9a6ef3..28f9397155 100644 --- a/tests/tcg/hexagon/fpstuff.c +++ b/tests/tcg/hexagon/fpstuff.c @@ -20,6 +20,7 @@ */ #include +#include const int FPINVF_BIT = 1; /* Invalid */ const int FPINVF = 1 << FPINVF_BIT; @@ -706,6 +707,57 @@ static void check_float2int_convs() check_fpstatus(usr, FPINVF); } +static void check_float_consts(void) +{ + int res32; + unsigned long long res64; + + asm("%0 = sfmake(#%1):neg\n\t" : "=r"(res32) : "i"(0xf)); + check32(res32, 0xbc9e0000); + + asm("%0 = sfmake(#%1):pos\n\t" : "=r"(res32) : "i"(0xf)); + check32(res32, 0x3c9e0000); + + asm("%0 = dfmake(#%1):neg\n\t" : "=r"(res64) : "i"(0xf)); + check64(res64, 0xbf93c00000000000ULL); + + asm("%0 = dfmake(#%1):pos\n\t" : "=r"(res64) : "i"(0xf)); + check64(res64, 0x3f93c00000000000ULL); +} + +static inline unsigned long long dfmpyll(double x, double y) +{ + unsigned long long res64; + asm("%0 = dfmpyll(%1, %2)" : "=r"(res64) : "r"(x), "r"(y)); + return res64; +} + +static inline unsigned long long dfmpylh(double acc, double x, double y) +{ + unsigned long long res64 = *(unsigned long long *)&acc; + asm("%0 += dfmpylh(%1, %2)" : "+r"(res64) : "r"(x), "r"(y)); + return res64; +} + +static void check_dfmpyxx(void) +{ + unsigned long long res64; + + res64 = dfmpyll(DBL_MIN, DBL_MIN); + check64(res64, 0ULL); + res64 = dfmpyll(-1.0, DBL_MIN); + check64(res64, 0ULL); + res64 = dfmpyll(DBL_MAX, DBL_MAX); + check64(res64, 0x1fffffffdULL); + + res64 = dfmpylh(DBL_MIN, DBL_MIN, DBL_MIN); + check64(res64, 0x10000000000000ULL); + res64 = dfmpylh(-1.0, DBL_MAX, DBL_MIN); + check64(res64, 0xc00fffffffe00000ULL); + res64 = dfmpylh(DBL_MAX, 0.0, -1.0); + check64(res64, 0x7fefffffffffffffULL); +} + int main() { check_compare_exception(); @@ -718,6 +770,8 @@ int main() check_sffixupd(); check_sffms(); check_float2int_convs(); + check_float_consts(); + check_dfmpyxx(); puts(err ? "FAIL" : "PASS"); return err ? 1 : 0; diff --git a/tests/tcg/hexagon/misc.c b/tests/tcg/hexagon/misc.c index 4fcbb22795..cfdda3fd09 100644 --- a/tests/tcg/hexagon/misc.c +++ b/tests/tcg/hexagon/misc.c @@ -391,6 +391,39 @@ void test_count_trailing_zeros_ones(void) check(ct1p(0xffffff0fffffffffULL), 36); } +static inline int dpmpyss_rnd_s0(int x, int y) +{ + int res; + asm("%0 = mpy(%1, %2):rnd\n\t" : "=r"(res) : "r"(x), "r"(y)); + return res; +} + +void test_dpmpyss_rnd_s0(void) +{ + check(dpmpyss_rnd_s0(-1, 0x80000000), 1); + check(dpmpyss_rnd_s0(0, 0x80000000), 0); + check(dpmpyss_rnd_s0(1, 0x80000000), 0); + check(dpmpyss_rnd_s0(0x7fffffff, 0x80000000), 0xc0000001); + check(dpmpyss_rnd_s0(0x80000000, -1), 1); + check(dpmpyss_rnd_s0(-1, -1), 0); + check(dpmpyss_rnd_s0(0, -1), 0); + check(dpmpyss_rnd_s0(1, -1), 0); + check(dpmpyss_rnd_s0(0x7fffffff, -1), 0); + check(dpmpyss_rnd_s0(0x80000000, 0), 0); + check(dpmpyss_rnd_s0(-1, 0), 0); + check(dpmpyss_rnd_s0(0, 0), 0); + check(dpmpyss_rnd_s0(1, 0), 0); + check(dpmpyss_rnd_s0(-1, -1), 0); + check(dpmpyss_rnd_s0(0, -1), 0); + check(dpmpyss_rnd_s0(1, -1), 0); + check(dpmpyss_rnd_s0(0x7fffffff, 1), 0); + check(dpmpyss_rnd_s0(0x80000000, 0x7fffffff), 0xc0000001); + check(dpmpyss_rnd_s0(-1, 0x7fffffff), 0); + check(dpmpyss_rnd_s0(0, 0x7fffffff), 0); + check(dpmpyss_rnd_s0(1, 0x7fffffff), 0); + check(dpmpyss_rnd_s0(0x7fffffff, 0x7fffffff), 0x3fffffff); +} + int main() { int res; @@ -534,6 +567,8 @@ int main() test_count_trailing_zeros_ones(); + test_dpmpyss_rnd_s0(); + puts(err ? "FAIL" : "PASS"); return err; } From 4354f3dbae3acf6a2116be361e4c497896270518 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 27 Apr 2023 14:56:32 +0200 Subject: [PATCH 32/44] target/hexagon: fix = vs. == mishap **** Changes in v2 **** Fix yyassert's for sign and zero extends Coverity reports a parameter that is "set but never used". This is caused by an assignment operator being used instead of equality. Co-authored-by: Taylor Simpson Signed-off-by: Paolo Bonzini Signed-off-by: Taylor Simpson Reviewed-by: Anton Johansson Tested-by: Anton Johansson Message-Id: <20230428204411.1400931-1-tsimpson@quicinc.com> --- target/hexagon/idef-parser/idef-parser.y | 4 ++-- target/hexagon/idef-parser/parser-helpers.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/target/hexagon/idef-parser/idef-parser.y b/target/hexagon/idef-parser/idef-parser.y index 5f3907eb28..5c983954ed 100644 --- a/target/hexagon/idef-parser/idef-parser.y +++ b/target/hexagon/idef-parser/idef-parser.y @@ -683,7 +683,7 @@ rvalue : FAIL yyassert(c, &@1, $5.type == IMMEDIATE && $5.imm.type == VALUE, "SXT expects immediate values\n"); - $$ = gen_extend_op(c, &@1, &$3, $5.imm.value, &$7, SIGNED); + $$ = gen_extend_op(c, &@1, &$3, 64, &$7, SIGNED); } | ZXT '(' rvalue ',' IMM ',' rvalue ')' { @@ -691,7 +691,7 @@ rvalue : FAIL yyassert(c, &@1, $5.type == IMMEDIATE && $5.imm.type == VALUE, "ZXT expects immediate values\n"); - $$ = gen_extend_op(c, &@1, &$3, $5.imm.value, &$7, UNSIGNED); + $$ = gen_extend_op(c, &@1, &$3, 64, &$7, UNSIGNED); } | '(' rvalue ')' { diff --git a/target/hexagon/idef-parser/parser-helpers.c b/target/hexagon/idef-parser/parser-helpers.c index 9550097269..7b5ebafec2 100644 --- a/target/hexagon/idef-parser/parser-helpers.c +++ b/target/hexagon/idef-parser/parser-helpers.c @@ -1120,7 +1120,7 @@ HexValue gen_extend_op(Context *c, HexValue *value, HexSignedness signedness) { - unsigned bit_width = (dst_width = 64) ? 64 : 32; + unsigned bit_width = (dst_width == 64) ? 64 : 32; HexValue value_m = *value; HexValue src_width_m = *src_width; From c3199390c2357f5bf695fa0cbf2b1cfd48ebc5fc Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino Date: Thu, 4 May 2023 13:17:47 -0300 Subject: [PATCH 33/44] Hexagon (target/hexagon/*.py): raise exception on reg parsing error Currently, the python scripts used for the hexagon building will not abort the compilation when there is an error parsing a register. Let's make the compilation properly fail in such cases by rasing an exception instead of just printing a warning message, which might get lost in the output. This patch was generated with: git grep -l "Bad register" *hexagon* | \ xargs sed -i "" -e 's/print("Bad register parse: "[, ]*\([^)]*\))/hex_common.bad_register(\1)/g' Plus the bad_register() helper added to hex_common.py. Signed-off-by: Matheus Tavares Bernardino Reviewed-by: Anton Johansson Tested-by: Taylor Simpson Reviewed-by: Taylor Simpson Signed-off-by: Taylor Simpson Message-Id: <1f5dbd92f68fdd89e2647e4ba527a2c32cf0f070.1683217043.git.quic_mathbern@quicinc.com> --- target/hexagon/gen_analyze_funcs.py | 30 +++++----- target/hexagon/gen_helper_funcs.py | 14 ++--- target/hexagon/gen_helper_protos.py | 2 +- target/hexagon/gen_idef_parser_funcs.py | 2 +- target/hexagon/gen_tcg_funcs.py | 78 ++++++++++++------------- target/hexagon/hex_common.py | 3 + 6 files changed, 66 insertions(+), 63 deletions(-) diff --git a/target/hexagon/gen_analyze_funcs.py b/target/hexagon/gen_analyze_funcs.py index d040f67001..00868cc6cb 100755 --- a/target/hexagon/gen_analyze_funcs.py +++ b/target/hexagon/gen_analyze_funcs.py @@ -47,7 +47,7 @@ def analyze_opn_old(f, tag, regtype, regid, regno): f.write(f" const int {regN} = insn->regno[{regno}];\n") f.write(f" ctx_log_reg_write(ctx, {regN}, {predicated});\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "P": if regid in {"s", "t", "u", "v"}: f.write(f" const int {regN} = insn->regno[{regno}];\n") @@ -56,7 +56,7 @@ def analyze_opn_old(f, tag, regtype, regid, regno): f.write(f" const int {regN} = insn->regno[{regno}];\n") f.write(f" ctx_log_pred_write(ctx, {regN});\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "C": if regid == "ss": f.write( @@ -77,13 +77,13 @@ def analyze_opn_old(f, tag, regtype, regid, regno): f.write(f" const int {regN} = insn->regno[{regno}] " "+ HEX_REG_SA0;\n") f.write(f" ctx_log_reg_write(ctx, {regN}, {predicated});\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "M": if regid == "u": f.write(f" const int {regN} = insn->regno[{regno}];\n") f.write(f" ctx_log_reg_read(ctx, {regN});\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "V": newv = "EXT_DFL" if hex_common.is_new_result(tag): @@ -105,7 +105,7 @@ def analyze_opn_old(f, tag, regtype, regid, regno): f.write(f" const int {regN} = insn->regno[{regno}];\n") f.write(f" ctx_log_vreg_write(ctx, {regN}, {newv}, " f"{predicated});\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "Q": if regid in {"d", "e", "x"}: f.write(f" const int {regN} = insn->regno[{regno}];\n") @@ -114,7 +114,7 @@ def analyze_opn_old(f, tag, regtype, regid, regno): f.write(f" const int {regN} = insn->regno[{regno}];\n") f.write(f" ctx_log_qreg_read(ctx, {regN});\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "G": if regid in {"dd"}: f.write(f"// const int {regN} = insn->regno[{regno}];\n") @@ -125,7 +125,7 @@ def analyze_opn_old(f, tag, regtype, regid, regno): elif regid in {"s"}: f.write(f"// const int {regN} = insn->regno[{regno}];\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "S": if regid in {"dd"}: f.write(f"// const int {regN} = insn->regno[{regno}];\n") @@ -136,9 +136,9 @@ def analyze_opn_old(f, tag, regtype, regid, regno): elif regid in {"s"}: f.write(f"// const int {regN} = insn->regno[{regno}];\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) def analyze_opn_new(f, tag, regtype, regid, regno): @@ -148,21 +148,21 @@ def analyze_opn_new(f, tag, regtype, regid, regno): f.write(f" const int {regN} = insn->regno[{regno}];\n") f.write(f" ctx_log_reg_read(ctx, {regN});\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "P": if regid in {"t", "u", "v"}: f.write(f" const int {regN} = insn->regno[{regno}];\n") f.write(f" ctx_log_pred_read(ctx, {regN});\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "O": if regid == "s": f.write(f" const int {regN} = insn->regno[{regno}];\n") f.write(f" ctx_log_vreg_read(ctx, {regN});\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) def analyze_opn(f, tag, regtype, regid, toss, numregs, i): @@ -174,9 +174,9 @@ def analyze_opn(f, tag, regtype, regid, toss, numregs, i): elif hex_common.is_new_val(regtype, regid, tag): analyze_opn_new(f, tag, regtype, regid, i) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) ## diff --git a/target/hexagon/gen_helper_funcs.py b/target/hexagon/gen_helper_funcs.py index 39751a483c..e80550f94e 100755 --- a/target/hexagon/gen_helper_funcs.py +++ b/target/hexagon/gen_helper_funcs.py @@ -87,9 +87,9 @@ def gen_helper_arg_opn(f, regtype, regid, i, tag): elif hex_common.is_new_val(regtype, regid, tag): gen_helper_arg_new(f, regtype, regid, i) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) def gen_helper_arg_imm(f, immlett): @@ -135,7 +135,7 @@ def gen_helper_dest_decl_opn(f, regtype, regid, i): else: gen_helper_dest_decl(f, regtype, regid, i) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) def gen_helper_src_var_ext(f, regtype, regid): @@ -185,7 +185,7 @@ def gen_helper_return_opn(f, regtype, regid, i): else: gen_helper_return(f, regtype, regid, i) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) ## @@ -239,7 +239,7 @@ def gen_helper_function(f, tag, tagregs, tagimms): else: gen_helper_return_type(f, regtype, regid, i) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) i += 1 if numscalarresults == 0: @@ -262,7 +262,7 @@ def gen_helper_function(f, tag, tagregs, tagimms): # This is the return value of the function continue else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) i += 1 ## For conditional instructions, we pass in the destination register @@ -329,7 +329,7 @@ def gen_helper_function(f, tag, tagregs, tagimms): if hex_common.is_hvx_reg(regtype): gen_helper_src_var_ext(f, regtype, regid) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) if hex_common.need_slot(tag): if "A_LOAD" in hex_common.attribdict[tag]: diff --git a/target/hexagon/gen_helper_protos.py b/target/hexagon/gen_helper_protos.py index c5ecb85294..3dedd76cb4 100755 --- a/target/hexagon/gen_helper_protos.py +++ b/target/hexagon/gen_helper_protos.py @@ -52,7 +52,7 @@ def gen_def_helper_opn(f, tag, regtype, regid, toss, numregs, i): elif hex_common.is_single(regid): f.write(f", {def_helper_types[regtype]}") else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) ## diff --git a/target/hexagon/gen_idef_parser_funcs.py b/target/hexagon/gen_idef_parser_funcs.py index 639458b462..29160fcb1d 100644 --- a/target/hexagon/gen_idef_parser_funcs.py +++ b/target/hexagon/gen_idef_parser_funcs.py @@ -147,7 +147,7 @@ def main(): elif is_single_new: arguments.append(f"{prefix}{regtype}{regid}N") else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) for immlett, bits, immshift in imms: arguments.append(hex_common.imm_name(immlett)) diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py index 887b1cd369..c73467b840 100755 --- a/target/hexagon/gen_tcg_funcs.py +++ b/target/hexagon/gen_tcg_funcs.py @@ -37,7 +37,7 @@ def genptr_decl_pair_writable(f, tag, regtype, regid, regno): elif regtype == "C": f.write(f" const int {regN} = insn->regno[{regno}] + HEX_REG_SA0;\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) f.write(f" TCGv_i64 {regtype}{regid}V = " f"get_result_gpr_pair(ctx, {regN});\n") @@ -53,7 +53,7 @@ def genptr_decl_writable(f, tag, regtype, regid, regno): f.write(f" const int {regN} = insn->regno[{regno}];\n") f.write(f" TCGv {regtype}{regid}V = tcg_temp_new();\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) def genptr_decl(f, tag, regtype, regid, regno): @@ -71,7 +71,7 @@ def genptr_decl(f, tag, regtype, regid, regno): elif regid in {"d", "e", "x", "y"}: genptr_decl_writable(f, tag, regtype, regid, regno) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "P": if regid in {"s", "t", "u", "v"}: f.write( @@ -80,7 +80,7 @@ def genptr_decl(f, tag, regtype, regid, regno): elif regid in {"d", "e", "x"}: genptr_decl_writable(f, tag, regtype, regid, regno) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "C": if regid == "ss": f.write(f" TCGv_i64 {regtype}{regid}V = " f"tcg_temp_new_i64();\n") @@ -96,7 +96,7 @@ def genptr_decl(f, tag, regtype, regid, regno): elif regid == "d": genptr_decl_writable(f, tag, regtype, regid, regno) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "M": if regid == "u": f.write(f" const int {regtype}{regid}N = " f"insn->regno[{regno}];\n") @@ -105,7 +105,7 @@ def genptr_decl(f, tag, regtype, regid, regno): "HEX_REG_M0];\n" ) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "V": if regid in {"dd"}: f.write(f" const int {regtype}{regid}N = " f"insn->regno[{regno}];\n") @@ -159,7 +159,7 @@ def genptr_decl(f, tag, regtype, regid, regno): f"{regtype}{regid}V_off);\n" ) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "Q": if regid in {"d", "e", "x"}: f.write(f" const int {regtype}{regid}N = " f"insn->regno[{regno}];\n") @@ -180,9 +180,9 @@ def genptr_decl(f, tag, regtype, regid, regno): if not hex_common.skip_qemu_helper(tag): f.write(f" TCGv_ptr {regtype}{regid}V = " "tcg_temp_new_ptr();\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) def genptr_decl_new(f, tag, regtype, regid, regno): @@ -193,7 +193,7 @@ def genptr_decl_new(f, tag, regtype, regid, regno): f"get_result_gpr(ctx, insn->regno[{regno}]);\n" ) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "P": if regid in {"t", "u", "v"}: f.write( @@ -201,7 +201,7 @@ def genptr_decl_new(f, tag, regtype, regid, regno): f"ctx->new_pred_value[insn->regno[{regno}]];\n" ) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "O": if regid == "s": f.write( @@ -218,9 +218,9 @@ def genptr_decl_new(f, tag, regtype, regid, regno): f"tcg_constant_tl({regtype}{regid}N_num);\n" ) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) def genptr_decl_opn(f, tag, regtype, regid, toss, numregs, i): @@ -232,9 +232,9 @@ def genptr_decl_opn(f, tag, regtype, regid, toss, numregs, i): elif hex_common.is_new_val(regtype, regid, tag): genptr_decl_new(f, tag, regtype, regid, i) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) def genptr_decl_imm(f, immlett): @@ -266,7 +266,7 @@ def genptr_src_read(f, tag, regtype, regid): f"hex_gpr[{regtype}{regid}N]);\n" ) elif regid not in {"s", "t", "u", "v"}: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "P": if regid == "x": f.write( @@ -274,7 +274,7 @@ def genptr_src_read(f, tag, regtype, regid): f"hex_pred[{regtype}{regid}N]);\n" ) elif regid not in {"s", "t", "u", "v"}: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "C": if regid == "ss": f.write( @@ -287,10 +287,10 @@ def genptr_src_read(f, tag, regtype, regid): f"{regtype}{regid}V);\n" ) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "M": if regid != "u": - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "V": if regid in {"uu", "vv", "xx"}: f.write(f" tcg_gen_gvec_mov(MO_64, {regtype}{regid}V_off,\n") @@ -311,7 +311,7 @@ def genptr_src_read(f, tag, regtype, regid): f.write(f" vreg_src_off(ctx, {regtype}{regid}N),\n") f.write(" sizeof(MMVector), sizeof(MMVector));\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "Q": if regid in {"s", "t", "u", "v"}: if not hex_common.skip_qemu_helper(tag): @@ -326,23 +326,23 @@ def genptr_src_read(f, tag, regtype, regid): ) f.write(" sizeof(MMQReg), sizeof(MMQReg));\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) def genptr_src_read_new(f, regtype, regid): if regtype == "N": if regid not in {"s", "t"}: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "P": if regid not in {"t", "u", "v"}: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "O": if regid != "s": - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) def genptr_src_read_opn(f, regtype, regid, tag): @@ -354,9 +354,9 @@ def genptr_src_read_opn(f, regtype, regid, tag): elif hex_common.is_new_val(regtype, regid, tag): genptr_src_read_new(f, regtype, regid) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) def gen_helper_call_opn(f, tag, regtype, regid, toss, numregs, i): @@ -370,9 +370,9 @@ def gen_helper_call_opn(f, tag, regtype, regid, toss, numregs, i): elif hex_common.is_new_val(regtype, regid, tag): f.write(f"{regtype}{regid}N") else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) def gen_helper_decl_imm(f, immlett): @@ -401,7 +401,7 @@ def genptr_dst_write(f, tag, regtype, regid): f"{regtype}{regid}V);\n" ) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "P": if regid in {"d", "e", "x"}: f.write( @@ -409,7 +409,7 @@ def genptr_dst_write(f, tag, regtype, regid): f"{regtype}{regid}V);\n" ) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "C": if regid == "dd": f.write( @@ -422,9 +422,9 @@ def genptr_dst_write(f, tag, regtype, regid): f"{regtype}{regid}V);\n" ) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) def genptr_dst_write_ext(f, tag, regtype, regid, newv="EXT_DFL"): @@ -440,12 +440,12 @@ def genptr_dst_write_ext(f, tag, regtype, regid, newv="EXT_DFL"): f"{regtype}{regid}N, {newv});\n" ) elif regid not in {"dd", "d", "x"}: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "Q": if regid not in {"d", "e", "x"}: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) def genptr_dst_write_opn(f, regtype, regid, tag): @@ -468,7 +468,7 @@ def genptr_dst_write_opn(f, regtype, regid, tag): else: genptr_dst_write(f, tag, regtype, regid) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) ## @@ -532,7 +532,7 @@ def gen_tcg_func(f, tag, regs, imms): elif hex_common.is_new_val(regtype, regid, tag): declared.append(f"{regtype}{regid}N") else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) ## Handle immediates for immlett, bits, immshift in imms: diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py index 011cce1a68..f3aac55db0 100755 --- a/target/hexagon/hex_common.py +++ b/target/hexagon/hex_common.py @@ -30,6 +30,9 @@ tags = [] # list of all tags overrides = {} # tags with helper overrides idef_parser_enabled = {} # tags enabled for idef-parser +def bad_register(*args): + args_str = ", ".join(map(str, args)) + raise Exception(f"Bad register parse: {args_str}") # We should do this as a hash for performance, # but to keep order let's keep it as a list. From f0e0c9844326d47694299e1d9ce3c0bcc1b3a457 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino Date: Thu, 4 May 2023 15:53:11 -0300 Subject: [PATCH 34/44] Hexagon: list available CPUs with `-cpu help` Currently, qemu-hexagon only models the v67 cpu. Nonetheless if we try to get this information with `-cpu help`, qemu just exists with an error code and no output. Let's correct that. The code is basically a copy from target/alpha/cpu.h, but we strip the "-hexagon-cpu" suffix before printing. This is to avoid confusing situations like the following: $ qemu-hexagon -cpu help Available CPUs: v67-hexagon-cpu $ qemu-hexagon -cpu v67-hexagon-cpu ./prog qemu-hexagon: unable to find CPU model 'v67-hexagon-cpu' Signed-off-by: Matheus Tavares Bernardino Signed-off-by: Taylor Simpson Tested-by: Taylor Simpson Reviewed-by: Taylor Simpson Message-Id: --- target/hexagon/cpu.c | 20 ++++++++++++++++++++ target/hexagon/cpu.h | 3 +++ 2 files changed, 23 insertions(+) diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index d4dfc382ab..7e127059c7 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -31,6 +31,26 @@ static void hexagon_v69_cpu_init(Object *obj) { } static void hexagon_v71_cpu_init(Object *obj) { } static void hexagon_v73_cpu_init(Object *obj) { } +static void hexagon_cpu_list_entry(gpointer data, gpointer user_data) +{ + ObjectClass *oc = data; + char *name = g_strdup(object_class_get_name(oc)); + if (g_str_has_suffix(name, HEXAGON_CPU_TYPE_SUFFIX)) { + name[strlen(name) - strlen(HEXAGON_CPU_TYPE_SUFFIX)] = '\0'; + } + qemu_printf(" %s\n", name); + g_free(name); +} + +void hexagon_cpu_list(void) +{ + GSList *list; + list = object_class_get_list_sorted(TYPE_HEXAGON_CPU, false); + qemu_printf("Available CPUs:\n"); + g_slist_foreach(list, hexagon_cpu_list_entry, NULL); + g_slist_free(list); +} + static ObjectClass *hexagon_cpu_class_by_name(const char *cpu_model) { ObjectClass *oc; diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index d095dc6647..bfcb1057dd 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -48,6 +48,9 @@ #define TYPE_HEXAGON_CPU_V71 HEXAGON_CPU_TYPE_NAME("v71") #define TYPE_HEXAGON_CPU_V73 HEXAGON_CPU_TYPE_NAME("v73") +void hexagon_cpu_list(void); +#define cpu_list hexagon_cpu_list + #define MMU_USER_IDX 0 typedef struct { From 3128588232333beb505505366133d18da671e2c8 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino Date: Thu, 4 May 2023 15:53:12 -0300 Subject: [PATCH 35/44] Hexagon: append eflags to unknown cpu model string Running qemu-hexagon with a binary that was compiled for an arch version unknown by qemu can produce a somewhat confusing message: qemu-hexagon: unable to find CPU model 'unknown' Let's give a bit more info by appending the eflags so that the message becomes: qemu-hexagon: unable to find CPU model 'unknown (0x69)' Signed-off-by: Matheus Tavares Bernardino Signed-off-by: Taylor Simpson Tested-by: Taylor Simpson Reviewed-by: Taylor Simpson Message-Id: <8a8d013cc619b94fd4fb577ae6a8df26cedb972b.1683225804.git.quic_mathbern@quicinc.com> --- linux-user/hexagon/target_elf.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/linux-user/hexagon/target_elf.h b/linux-user/hexagon/target_elf.h index a0271a0a2a..36056fc9f0 100644 --- a/linux-user/hexagon/target_elf.h +++ b/linux-user/hexagon/target_elf.h @@ -20,6 +20,9 @@ static inline const char *cpu_get_model(uint32_t eflags) { + static char buf[32]; + int err; + /* For now, treat anything newer than v5 as a v73 */ /* FIXME - Disable instructions that are newer than the specified arch */ if (eflags == 0x04 || /* v5 */ @@ -39,7 +42,9 @@ static inline const char *cpu_get_model(uint32_t eflags) ) { return "v73"; } - return "unknown"; + + err = snprintf(buf, sizeof(buf), "unknown (0x%x)", eflags); + return err >= 0 && err < sizeof(buf) ? buf : "unknown"; } #endif From ed9b28fb009a7944fcda89c3ced3cb6ad4a92ea1 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino Date: Thu, 11 May 2023 09:36:01 -0700 Subject: [PATCH 36/44] Hexagon (iclass): update J4_hintjumpr slot constraints The Hexagon PRM says that "The assembler automatically encodes instructions in the packet in the proper order. In the binary encoding of a packet, the instructions must be ordered from Slot 3 down to Slot 0." Prior to the architecture version v73, the slot constraints from instruction "hintjr" only allowed it to be executed at slot 2. With that in mind, consider the packet: { hintjr(r0) nop nop if (!p0) memd(r1+#0) = r1:0 } To satisfy the ordering rule quoted from the PRM, the assembler would, thus, move one of the nops to the first position, so that it can be assigned to slot 3 and the subsequent hintjr to slot 2. However, since v73, hintjr can be executed at either slot 2 or 3. So there is no need to reorder that packet and the assembler will encode it as is. When QEMU tries to execute it, however, we end up hitting a "misaliged store" exception because both the store and the hintjr will be assigned to store 0, and some functions like `slot_is_predicated()` expect the decode machinery to assign only one instruction per slot. In particular, the mentioned function will traverse the packet until it finds the first instruction at the desired slot which, for slot 0, will be hintjr. Since hintjr is not predicated, the result is that we try to execute the store regardless of the predicate. And because the predicate is false, we had not previously loaded hex_store_addr[0] or hex_store_width[0]. As a result, the store will decide de width based on trash memory, causing it to be misaligned. Update the slot constraints for hintjr so that QEMU can properly handle such encodings. Note: to avoid similar-but-not-identical issues in the future, we should look for multiple instructions at the same slot during decoding time and throw an invalid packet exception. That will be done in the subsequent commit. Signed-off-by: Matheus Tavares Bernardino Reviewed-by: Taylor Simpson Signed-off-by: Taylor Simpson Message-Id: <0fcd8293642c6324119fbbab44741164bcbd04fb.1673616964.git.quic_mathbern@quicinc.com> --- target/hexagon/iclass.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/target/hexagon/iclass.c b/target/hexagon/iclass.c index 6091286993..c3f8523b27 100644 --- a/target/hexagon/iclass.c +++ b/target/hexagon/iclass.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -51,8 +51,10 @@ SlotMask find_iclass_slots(Opcode opcode, int itype) return SLOTS_0; } else if ((opcode == J2_trap0) || (opcode == Y2_isync) || - (opcode == J2_pause) || (opcode == J4_hintjumpr)) { + (opcode == J2_pause)) { return SLOTS_2; + } else if (opcode == J4_hintjumpr) { + return SLOTS_23; } else if (GET_ATTRIB(opcode, A_CRSLOT23)) { return SLOTS_23; } else if (GET_ATTRIB(opcode, A_RESTRICT_PREFERSLOT0)) { From 14edcf11e297252c995d5909144f3751b7383efc Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino Date: Mon, 8 May 2023 10:37:23 -0300 Subject: [PATCH 37/44] Hexagon (decode): look for pkts with multiple insns at the same slot Each slot in a packet can be assigned to at most one instruction. Although the assembler generally ought to enforce this rule, we better be safe than sorry and also do some check to properly throw an "invalid packet" exception on wrong slot assignments. This should also make it easier to debug possible future errors caused by missing updates to `find_iclass_slots()` rules in target/hexagon/iclass.c. Co-authored-by: Taylor Simpson Signed-off-by: Taylor Simpson Signed-off-by: Matheus Tavares Bernardino Reviewed-by: Taylor Simpson Tested-by: Taylor Simpson Message-Id: --- target/hexagon/decode.c | 30 +++++++++++++++++++++++++++--- tests/tcg/hexagon/Makefile.target | 7 +++++++ tests/tcg/hexagon/invalid-slots.c | 29 +++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 3 deletions(-) create mode 100644 tests/tcg/hexagon/invalid-slots.c diff --git a/target/hexagon/decode.c b/target/hexagon/decode.c index 041c8de751..946c55cc71 100644 --- a/target/hexagon/decode.c +++ b/target/hexagon/decode.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -797,7 +797,26 @@ static bool decode_parsebits_is_loopend(uint32_t encoding32) return bits == 0x2; } -static void +static bool has_valid_slot_assignment(Packet *pkt) +{ + int used_slots = 0; + for (int i = 0; i < pkt->num_insns; i++) { + int slot_mask; + Insn *insn = &pkt->insn[i]; + if (decode_opcode_ends_loop(insn->opcode)) { + /* We overload slot 0 for endloop. */ + continue; + } + slot_mask = 1 << insn->slot; + if (used_slots & slot_mask) { + return false; + } + used_slots |= slot_mask; + } + return true; +} + +static bool decode_set_slot_number(Packet *pkt) { int slot; @@ -886,6 +905,8 @@ decode_set_slot_number(Packet *pkt) /* Then push it to slot0 */ pkt->insn[slot1_iidx].slot = 0; } + + return has_valid_slot_assignment(pkt); } /* @@ -961,8 +982,11 @@ int decode_packet(int max_words, const uint32_t *words, Packet *pkt, decode_apply_extenders(pkt); if (!disas_only) { decode_remove_extenders(pkt); + if (!decode_set_slot_number(pkt)) { + /* Invalid packet */ + return 0; + } } - decode_set_slot_number(pkt); decode_fill_newvalue_regno(pkt); if (pkt->pkt_has_hvx) { diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target index 6109a7ed10..890cceed5d 100644 --- a/tests/tcg/hexagon/Makefile.target +++ b/tests/tcg/hexagon/Makefile.target @@ -50,6 +50,13 @@ HEX_TESTS += vector_add_int HEX_TESTS += scatter_gather HEX_TESTS += hvx_misc HEX_TESTS += hvx_histogram +HEX_TESTS += invalid-slots + +run-and-check-exception = $(call run-test,$2,$3 2>$2.stderr; \ + test $$? -eq 1 && grep -q "exception $(strip $1)" $2.stderr) + +run-invalid-slots: invalid-slots + $(call run-and-check-exception, 0x15, $@, $(QEMU) $(QEMU_OPTS) $<) HEX_TESTS += test_abs HEX_TESTS += test_bitcnt diff --git a/tests/tcg/hexagon/invalid-slots.c b/tests/tcg/hexagon/invalid-slots.c new file mode 100644 index 0000000000..366ce4f42f --- /dev/null +++ b/tests/tcg/hexagon/invalid-slots.c @@ -0,0 +1,29 @@ +/* + * Copyright(c) 2023 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +char mem[8] __attribute__((aligned(8))); + +int main() +{ + asm volatile( + "r0 = #mem\n" + /* Invalid packet (2 instructions at slot 0): */ + ".word 0xa1804100\n" /* { memw(r0) = r1; */ + ".word 0x28032804\n" /* r3 = #0; r4 = #0 } */ + : : : "r0", "r3", "r4", "memory"); + return 0; +} From 9e6d4938d106ca775108ec2a1fecc6d789543088 Mon Sep 17 00:00:00 2001 From: Marco Liebel Date: Tue, 9 May 2023 11:42:31 -0700 Subject: [PATCH 38/44] Remove test_vshuff from hvx_misc tests test_vshuff checks that the vshuff instruction works correctly when both vector registers are the same. Using vshuff in this way is undefined and will be rejected by the compiler in a future version of the toolchain. Signed-off-by: Marco Liebel Reviewed-by: Brian Cain Reviewed-by: Taylor Simpson Tested-by: Taylor Simpson Signed-off-by: Taylor Simpson Message-Id: <20230509184231.2467626-1-quic_mliebel@quicinc.com> --- tests/tcg/hexagon/hvx_misc.c | 45 ------------------------------------ 1 file changed, 45 deletions(-) diff --git a/tests/tcg/hexagon/hvx_misc.c b/tests/tcg/hexagon/hvx_misc.c index c89fe0253d..09dec8d7a1 100644 --- a/tests/tcg/hexagon/hvx_misc.c +++ b/tests/tcg/hexagon/hvx_misc.c @@ -342,49 +342,6 @@ static void test_vsubuwsat_dv(void) check_output_w(__LINE__, 2); } -static void test_vshuff(void) -{ - /* Test that vshuff works when the two operands are the same register */ - const uint32_t splat = 0x089be55c; - const uint32_t shuff = 0x454fa926; - MMVector v0, v1; - - memset(expect, 0x12, sizeof(MMVector)); - memset(output, 0x34, sizeof(MMVector)); - - asm volatile("v25 = vsplat(%0)\n\t" - "vshuff(v25, v25, %1)\n\t" - "vmem(%2 + #0) = v25\n\t" - : /* no outputs */ - : "r"(splat), "r"(shuff), "r"(output) - : "v25", "memory"); - - /* - * The semantics of Hexagon are the operands are pass-by-value, so create - * two copies of the vsplat result. - */ - for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; i++) { - v0.uw[i] = splat; - v1.uw[i] = splat; - } - /* Do the vshuff operation */ - for (int offset = 1; offset < MAX_VEC_SIZE_BYTES; offset <<= 1) { - if (shuff & offset) { - for (int k = 0; k < MAX_VEC_SIZE_BYTES; k++) { - if (!(k & offset)) { - uint8_t tmp = v0.ub[k]; - v0.ub[k] = v1.ub[k + offset]; - v1.ub[k + offset] = tmp; - } - } - } - } - /* Put the result in the expect buffer for verification */ - expect[0] = v1; - - check_output_b(__LINE__, 1); -} - static void test_load_tmp_predicated(void) { void *p0 = buffer0; @@ -508,8 +465,6 @@ int main() test_vadduwsat(); test_vsubuwsat_dv(); - test_vshuff(); - test_load_tmp_predicated(); test_load_cur_predicated(); From 758370052fb602f9f23c3b8ae26a6133373c78e6 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino Date: Thu, 4 May 2023 12:37:31 -0300 Subject: [PATCH 39/44] gdbstub: only send stop-reply packets when allowed to MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GDB's remote serial protocol allows stop-reply messages to be sent by the stub either as a notification packet or as a reply to a GDB command (provided that the cmd accepts such a response). QEMU currently does not implement notification packets, so it should only send stop-replies synchronously and when requested. Nevertheless, it still issues unsolicited stop messages through gdb_vm_state_change(). Although this behavior doesn't seem to cause problems with GDB itself (the messages are just ignored), it can impact other debuggers that implement the GDB remote serial protocol, like hexagon-lldb. Let's change the gdbstub to send stop messages only as a response to a previous GDB command that accepts such a reply. Signed-off-by: Matheus Tavares Bernardino Acked-by: Alex Bennée Signed-off-by: Taylor Simpson Message-Id: --- gdbstub/gdbstub.c | 37 ++++++++++++++++++++++++++++--------- gdbstub/internals.h | 5 +++++ gdbstub/softmmu.c | 13 +++++++++++-- gdbstub/user.c | 24 ++++++++++++++++-------- 4 files changed, 60 insertions(+), 19 deletions(-) diff --git a/gdbstub/gdbstub.c b/gdbstub/gdbstub.c index 0760d78685..be18568d0a 100644 --- a/gdbstub/gdbstub.c +++ b/gdbstub/gdbstub.c @@ -777,6 +777,10 @@ typedef void (*GdbCmdHandler)(GArray *params, void *user_ctx); /* * cmd_startswith -> cmd is compared using startswith * + * allow_stop_reply -> true iff the gdbstub can respond to this command with a + * "stop reply" packet. The list of commands that accept such response is + * defined at the GDB Remote Serial Protocol documentation. see: + * https://sourceware.org/gdb/onlinedocs/gdb/Stop-Reply-Packets.html#Stop-Reply-Packets. * * schema definitions: * Each schema parameter entry consists of 2 chars, @@ -802,6 +806,7 @@ typedef struct GdbCmdParseEntry { const char *cmd; bool cmd_startswith; const char *schema; + bool allow_stop_reply; } GdbCmdParseEntry; static inline int startswith(const char *string, const char *pattern) @@ -835,6 +840,7 @@ static int process_string_cmd(void *user_ctx, const char *data, } } + gdbserver_state.allow_stop_reply = cmd->allow_stop_reply; cmd->handler(params, user_ctx); return 0; } @@ -1283,11 +1289,14 @@ static void handle_v_attach(GArray *params, void *user_ctx) gdbserver_state.g_cpu = cpu; gdbserver_state.c_cpu = cpu; - g_string_printf(gdbserver_state.str_buf, "T%02xthread:", GDB_SIGNAL_TRAP); - gdb_append_thread_id(cpu, gdbserver_state.str_buf); - g_string_append_c(gdbserver_state.str_buf, ';'); + if (gdbserver_state.allow_stop_reply) { + g_string_printf(gdbserver_state.str_buf, "T%02xthread:", GDB_SIGNAL_TRAP); + gdb_append_thread_id(cpu, gdbserver_state.str_buf); + g_string_append_c(gdbserver_state.str_buf, ';'); + gdbserver_state.allow_stop_reply = false; cleanup: - gdb_put_strbuf(); + gdb_put_strbuf(); + } } static void handle_v_kill(GArray *params, void *user_ctx) @@ -1310,12 +1319,14 @@ static const GdbCmdParseEntry gdb_v_commands_table[] = { .handler = handle_v_cont, .cmd = "Cont", .cmd_startswith = 1, + .allow_stop_reply = true, .schema = "s0" }, { .handler = handle_v_attach, .cmd = "Attach;", .cmd_startswith = 1, + .allow_stop_reply = true, .schema = "l0" }, { @@ -1698,10 +1709,13 @@ static void handle_gen_set(GArray *params, void *user_ctx) static void handle_target_halt(GArray *params, void *user_ctx) { - g_string_printf(gdbserver_state.str_buf, "T%02xthread:", GDB_SIGNAL_TRAP); - gdb_append_thread_id(gdbserver_state.c_cpu, gdbserver_state.str_buf); - g_string_append_c(gdbserver_state.str_buf, ';'); - gdb_put_strbuf(); + if (gdbserver_state.allow_stop_reply) { + g_string_printf(gdbserver_state.str_buf, "T%02xthread:", GDB_SIGNAL_TRAP); + gdb_append_thread_id(gdbserver_state.c_cpu, gdbserver_state.str_buf); + g_string_append_c(gdbserver_state.str_buf, ';'); + gdb_put_strbuf(); + gdbserver_state.allow_stop_reply = false; + } /* * Remove all the breakpoints when this query is issued, * because gdb is doing an initial connect and the state @@ -1725,7 +1739,8 @@ static int gdb_handle_packet(const char *line_buf) static const GdbCmdParseEntry target_halted_cmd_desc = { .handler = handle_target_halt, .cmd = "?", - .cmd_startswith = 1 + .cmd_startswith = 1, + .allow_stop_reply = true, }; cmd_parser = &target_halted_cmd_desc; } @@ -1736,6 +1751,7 @@ static int gdb_handle_packet(const char *line_buf) .handler = handle_continue, .cmd = "c", .cmd_startswith = 1, + .allow_stop_reply = true, .schema = "L0" }; cmd_parser = &continue_cmd_desc; @@ -1747,6 +1763,7 @@ static int gdb_handle_packet(const char *line_buf) .handler = handle_cont_with_sig, .cmd = "C", .cmd_startswith = 1, + .allow_stop_reply = true, .schema = "l0" }; cmd_parser = &cont_with_sig_cmd_desc; @@ -1785,6 +1802,7 @@ static int gdb_handle_packet(const char *line_buf) .handler = handle_step, .cmd = "s", .cmd_startswith = 1, + .allow_stop_reply = true, .schema = "L0" }; cmd_parser = &step_cmd_desc; @@ -1976,6 +1994,7 @@ void gdb_read_byte(uint8_t ch) { uint8_t reply; + gdbserver_state.allow_stop_reply = false; #ifndef CONFIG_USER_ONLY if (gdbserver_state.last_packet->len) { /* Waiting for a response to the last packet. If we see the start diff --git a/gdbstub/internals.h b/gdbstub/internals.h index 94ddff4495..33d21d6488 100644 --- a/gdbstub/internals.h +++ b/gdbstub/internals.h @@ -65,6 +65,11 @@ typedef struct GDBState { GByteArray *mem_buf; int sstep_flags; int supported_sstep_flags; + /* + * Whether we are allowed to send a stop reply packet at this moment. + * Must be set off after sending the stop reply itself. + */ + bool allow_stop_reply; } GDBState; /* lives in main gdbstub.c */ diff --git a/gdbstub/softmmu.c b/gdbstub/softmmu.c index 22ecd09d04..99d994e6bf 100644 --- a/gdbstub/softmmu.c +++ b/gdbstub/softmmu.c @@ -43,6 +43,7 @@ static void reset_gdbserver_state(void) g_free(gdbserver_state.processes); gdbserver_state.processes = NULL; gdbserver_state.process_num = 0; + gdbserver_state.allow_stop_reply = false; } /* @@ -139,6 +140,10 @@ static void gdb_vm_state_change(void *opaque, bool running, RunState state) return; } + if (!gdbserver_state.allow_stop_reply) { + return; + } + gdb_append_thread_id(cpu, tid); switch (state) { @@ -205,6 +210,7 @@ static void gdb_vm_state_change(void *opaque, bool running, RunState state) send_packet: gdb_put_packet(buf->str); + gdbserver_state.allow_stop_reply = false; /* disable single step if it was enabled */ cpu_single_step(cpu, 0); @@ -422,8 +428,11 @@ void gdb_exit(int code) trace_gdbstub_op_exiting((uint8_t)code); - snprintf(buf, sizeof(buf), "W%02x", (uint8_t)code); - gdb_put_packet(buf); + if (gdbserver_state.allow_stop_reply) { + snprintf(buf, sizeof(buf), "W%02x", (uint8_t)code); + gdb_put_packet(buf); + gdbserver_state.allow_stop_reply = false; + } qemu_chr_fe_deinit(&gdbserver_system_state.chr, true); } diff --git a/gdbstub/user.c b/gdbstub/user.c index 80488b6bb9..5b375be1d9 100644 --- a/gdbstub/user.c +++ b/gdbstub/user.c @@ -108,8 +108,11 @@ void gdb_exit(int code) trace_gdbstub_op_exiting((uint8_t)code); - snprintf(buf, sizeof(buf), "W%02x", (uint8_t)code); - gdb_put_packet(buf); + if (gdbserver_state.allow_stop_reply) { + snprintf(buf, sizeof(buf), "W%02x", (uint8_t)code); + gdb_put_packet(buf); + gdbserver_state.allow_stop_reply = false; + } } int gdb_handlesig(CPUState *cpu, int sig) @@ -127,11 +130,14 @@ int gdb_handlesig(CPUState *cpu, int sig) if (sig != 0) { gdb_set_stop_cpu(cpu); - g_string_printf(gdbserver_state.str_buf, - "T%02xthread:", gdb_target_signal_to_gdb(sig)); - gdb_append_thread_id(cpu, gdbserver_state.str_buf); - g_string_append_c(gdbserver_state.str_buf, ';'); - gdb_put_strbuf(); + if (gdbserver_state.allow_stop_reply) { + g_string_printf(gdbserver_state.str_buf, + "T%02xthread:", gdb_target_signal_to_gdb(sig)); + gdb_append_thread_id(cpu, gdbserver_state.str_buf); + g_string_append_c(gdbserver_state.str_buf, ';'); + gdb_put_strbuf(); + gdbserver_state.allow_stop_reply = false; + } } /* * gdb_put_packet() might have detected that the peer terminated the @@ -174,12 +180,14 @@ void gdb_signalled(CPUArchState *env, int sig) { char buf[4]; - if (!gdbserver_state.init || gdbserver_user_state.fd < 0) { + if (!gdbserver_state.init || gdbserver_user_state.fd < 0 || + !gdbserver_state.allow_stop_reply) { return; } snprintf(buf, sizeof(buf), "X%02x", gdb_target_signal_to_gdb(sig)); gdb_put_packet(buf); + gdbserver_state.allow_stop_reply = false; } static void gdb_accept_init(int fd) From dae66a3f669eaf0e33447ed42a36ad4ee2f178f2 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino Date: Thu, 4 May 2023 12:37:32 -0300 Subject: [PATCH 40/44] gdbstub: add test for untimely stop-reply packets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the previous commit, we modified gdbstub.c to only send stop-reply packets as a response to GDB commands that accept it. Now, let's add a test for this intended behavior. Running this test before the fix from the previous commit fails as QEMU sends a stop-reply packet asynchronously, when GDB was in fact waiting an ACK. Signed-off-by: Matheus Tavares Bernardino Acked-by: Alex Bennée Signed-off-by: Taylor Simpson Message-Id: --- tests/guest-debug/run-test.py | 16 ++++++++++++---- .../tcg/multiarch/system/Makefile.softmmu-target | 16 +++++++++++++++- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/tests/guest-debug/run-test.py b/tests/guest-debug/run-test.py index d865e46ecd..de6106a5e5 100755 --- a/tests/guest-debug/run-test.py +++ b/tests/guest-debug/run-test.py @@ -26,11 +26,12 @@ def get_args(): parser.add_argument("--qargs", help="Qemu arguments for test") parser.add_argument("--binary", help="Binary to debug", required=True) - parser.add_argument("--test", help="GDB test script", - required=True) + parser.add_argument("--test", help="GDB test script") parser.add_argument("--gdb", help="The gdb binary to use", default=None) + parser.add_argument("--gdb-args", help="Additional gdb arguments") parser.add_argument("--output", help="A file to redirect output to") + parser.add_argument("--stderr", help="A file to redirect stderr to") return parser.parse_args() @@ -58,6 +59,10 @@ if __name__ == '__main__': output = open(args.output, "w") else: output = None + if args.stderr: + stderr = open(args.stderr, "w") + else: + stderr = None socket_dir = TemporaryDirectory("qemu-gdbstub") socket_name = os.path.join(socket_dir.name, "gdbstub.socket") @@ -77,6 +82,8 @@ if __name__ == '__main__': # Now launch gdb with our test and collect the result gdb_cmd = "%s %s" % (args.gdb, args.binary) + if args.gdb_args: + gdb_cmd += " %s" % (args.gdb_args) # run quietly and ignore .gdbinit gdb_cmd += " -q -n -batch" # disable prompts in case of crash @@ -84,13 +91,14 @@ if __name__ == '__main__': # connect to remote gdb_cmd += " -ex 'target remote %s'" % (socket_name) # finally the test script itself - gdb_cmd += " -x %s" % (args.test) + if args.test: + gdb_cmd += " -x %s" % (args.test) sleep(1) log(output, "GDB CMD: %s" % (gdb_cmd)) - result = subprocess.call(gdb_cmd, shell=True, stdout=output) + result = subprocess.call(gdb_cmd, shell=True, stdout=output, stderr=stderr) # A result of greater than 128 indicates a fatal signal (likely a # crash due to gdb internal failure). That's a problem for GDB and diff --git a/tests/tcg/multiarch/system/Makefile.softmmu-target b/tests/tcg/multiarch/system/Makefile.softmmu-target index 5f432c95f3..fe40195d39 100644 --- a/tests/tcg/multiarch/system/Makefile.softmmu-target +++ b/tests/tcg/multiarch/system/Makefile.softmmu-target @@ -27,6 +27,20 @@ run-gdbstub-memory: memory "-monitor none -display none -chardev file$(COMMA)path=$<.out$(COMMA)id=output $(QEMU_OPTS)" \ --bin $< --test $(MULTIARCH_SRC)/gdbstub/memory.py, \ softmmu gdbstub support) + +run-gdbstub-untimely-packet: hello + $(call run-test, $@, $(GDB_SCRIPT) \ + --gdb $(HAVE_GDB_BIN) \ + --gdb-args "-ex 'set debug remote 1'" \ + --output untimely-packet.gdb.out \ + --stderr untimely-packet.gdb.err \ + --qemu $(QEMU) \ + --bin $< --qargs \ + "-monitor none -display none -chardev file$(COMMA)path=untimely-packet.out$(COMMA)id=output $(QEMU_OPTS)", \ + "softmmu gdbstub untimely packets") + $(call quiet-command, \ + (! grep -Fq 'Packet instead of Ack, ignoring it' untimely-packet.gdb.err), \ + "GREP", "file untimely-packet.gdb.err") else run-gdbstub-%: $(call skip-test, "gdbstub test $*", "no guest arch support") @@ -36,4 +50,4 @@ run-gdbstub-%: $(call skip-test, "gdbstub test $*", "need working gdb") endif -MULTIARCH_RUNS += run-gdbstub-memory +MULTIARCH_RUNS += run-gdbstub-memory run-gdbstub-untimely-packet From ab930e80097c8b3472717363435cee8363e55152 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino Date: Thu, 4 May 2023 12:37:33 -0300 Subject: [PATCH 41/44] Hexagon: add core gdbstub xml data for LLDB Signed-off-by: Matheus Tavares Bernardino Reviewed-by: Taylor Simpson Signed-off-by: Taylor Simpson Message-Id: --- MAINTAINERS | 1 + configs/targets/hexagon-linux-user.mak | 1 + gdb-xml/hexagon-core.xml | 84 ++++++++++++++++++++++++++ target/hexagon/cpu.c | 3 +- 4 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 gdb-xml/hexagon-core.xml diff --git a/MAINTAINERS b/MAINTAINERS index f757369373..2e18c3cad4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -225,6 +225,7 @@ F: tests/tcg/hexagon/ F: disas/hexagon.c F: configs/targets/hexagon-linux-user/default.mak F: docker/dockerfiles/debian-hexagon-cross.docker +F: gdb-xml/hexagon*.xml Hexagon idef-parser M: Alessandro Di Federico diff --git a/configs/targets/hexagon-linux-user.mak b/configs/targets/hexagon-linux-user.mak index 003ed0a408..fd5e222d4f 100644 --- a/configs/targets/hexagon-linux-user.mak +++ b/configs/targets/hexagon-linux-user.mak @@ -1 +1,2 @@ TARGET_ARCH=hexagon +TARGET_XML_FILES=gdb-xml/hexagon-core.xml diff --git a/gdb-xml/hexagon-core.xml b/gdb-xml/hexagon-core.xml new file mode 100644 index 0000000000..e181163cff --- /dev/null +++ b/gdb-xml/hexagon-core.xml @@ -0,0 +1,84 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 7e127059c7..575bcc190d 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -383,8 +383,9 @@ static void hexagon_cpu_class_init(ObjectClass *c, void *data) cc->get_pc = hexagon_cpu_get_pc; cc->gdb_read_register = hexagon_gdb_read_register; cc->gdb_write_register = hexagon_gdb_write_register; - cc->gdb_num_core_regs = TOTAL_PER_THREAD_REGS + NUM_VREGS + NUM_QREGS; + cc->gdb_num_core_regs = TOTAL_PER_THREAD_REGS; cc->gdb_stop_before_watchpoint = true; + cc->gdb_core_xml_file = "hexagon-core.xml"; cc->disas_set_info = hexagon_cpu_disas_set_info; cc->tcg_ops = &hexagon_tcg_ops; } From b0bd9d8adff1b4e967e1cd4c42d82a9d46712cbd Mon Sep 17 00:00:00 2001 From: Brian Cain Date: Thu, 4 May 2023 12:37:34 -0300 Subject: [PATCH 42/44] Hexagon (gdbstub): fix p3:0 read and write via stub Signed-off-by: Brian Cain Co-authored-by: Sid Manning Signed-off-by: Sid Manning Co-authored-by: Matheus Tavares Bernardino Signed-off-by: Matheus Tavares Bernardino Reviewed-by: Taylor Simpson Signed-off-by: Taylor Simpson Message-Id: <32e7de567cdae184a6781644454bbb19916c955b.1683214375.git.quic_mathbern@quicinc.com> --- target/hexagon/gdbstub.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/target/hexagon/gdbstub.c b/target/hexagon/gdbstub.c index 46083da620..a06fed9f18 100644 --- a/target/hexagon/gdbstub.c +++ b/target/hexagon/gdbstub.c @@ -25,6 +25,14 @@ int hexagon_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n) HexagonCPU *cpu = HEXAGON_CPU(cs); CPUHexagonState *env = &cpu->env; + if (n == HEX_REG_P3_0_ALIASED) { + uint32_t p3_0 = 0; + for (int i = 0; i < NUM_PREGS; i++) { + p3_0 = deposit32(p3_0, i * 8, 8, env->pred[i]); + } + return gdb_get_regl(mem_buf, p3_0); + } + if (n < TOTAL_PER_THREAD_REGS) { return gdb_get_regl(mem_buf, env->gpr[n]); } @@ -37,6 +45,14 @@ int hexagon_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) HexagonCPU *cpu = HEXAGON_CPU(cs); CPUHexagonState *env = &cpu->env; + if (n == HEX_REG_P3_0_ALIASED) { + uint32_t p3_0 = ldtul_p(mem_buf); + for (int i = 0; i < NUM_PREGS; i++) { + env->pred[i] = extract32(p3_0, i * 8, 8); + } + return sizeof(target_ulong); + } + if (n < TOTAL_PER_THREAD_REGS) { env->gpr[n] = ldtul_p(mem_buf); return sizeof(target_ulong); From b647652ee5f9bc16a431cc2dbd8525b371b3cb1b Mon Sep 17 00:00:00 2001 From: Taylor Simpson Date: Thu, 4 May 2023 12:37:35 -0300 Subject: [PATCH 43/44] Hexagon (gdbstub): add HVX support Signed-off-by: Taylor Simpson Co-authored-by: Brian Cain Signed-off-by: Brian Cain Co-authored-by: Matheus Tavares Bernardino Signed-off-by: Matheus Tavares Bernardino Reviewed-by: Brian Cain Message-Id: <17cb32f34d469f705c3cc066a3583935352ee048.1683214375.git.quic_mathbern@quicinc.com> --- configs/targets/hexagon-linux-user.mak | 2 +- gdb-xml/hexagon-hvx.xml | 96 ++++++++++++++++++++++++++ target/hexagon/cpu.c | 6 ++ target/hexagon/gdbstub.c | 68 ++++++++++++++++++ target/hexagon/internal.h | 2 + 5 files changed, 173 insertions(+), 1 deletion(-) create mode 100644 gdb-xml/hexagon-hvx.xml diff --git a/configs/targets/hexagon-linux-user.mak b/configs/targets/hexagon-linux-user.mak index fd5e222d4f..2765a4c563 100644 --- a/configs/targets/hexagon-linux-user.mak +++ b/configs/targets/hexagon-linux-user.mak @@ -1,2 +1,2 @@ TARGET_ARCH=hexagon -TARGET_XML_FILES=gdb-xml/hexagon-core.xml +TARGET_XML_FILES=gdb-xml/hexagon-core.xml gdb-xml/hexagon-hvx.xml diff --git a/gdb-xml/hexagon-hvx.xml b/gdb-xml/hexagon-hvx.xml new file mode 100644 index 0000000000..5f2e220733 --- /dev/null +++ b/gdb-xml/hexagon-hvx.xml @@ -0,0 +1,96 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 575bcc190d..f155936289 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -24,6 +24,7 @@ #include "hw/qdev-properties.h" #include "fpu/softfloat-helpers.h" #include "tcg/tcg.h" +#include "exec/gdbstub.h" static void hexagon_v67_cpu_init(Object *obj) { } static void hexagon_v68_cpu_init(Object *obj) { } @@ -339,6 +340,11 @@ static void hexagon_cpu_realize(DeviceState *dev, Error **errp) return; } + gdb_register_coprocessor(cs, hexagon_hvx_gdb_read_register, + hexagon_hvx_gdb_write_register, + NUM_VREGS + NUM_QREGS, + "hexagon-hvx.xml", 0); + qemu_init_vcpu(cs); cpu_reset(cs); diff --git a/target/hexagon/gdbstub.c b/target/hexagon/gdbstub.c index a06fed9f18..54d37e006e 100644 --- a/target/hexagon/gdbstub.c +++ b/target/hexagon/gdbstub.c @@ -60,3 +60,71 @@ int hexagon_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) g_assert_not_reached(); } + +static int gdb_get_vreg(CPUHexagonState *env, GByteArray *mem_buf, int n) +{ + int total = 0; + int i; + for (i = 0; i < ARRAY_SIZE(env->VRegs[n].uw); i++) { + total += gdb_get_regl(mem_buf, env->VRegs[n].uw[i]); + } + return total; +} + +static int gdb_get_qreg(CPUHexagonState *env, GByteArray *mem_buf, int n) +{ + int total = 0; + int i; + for (i = 0; i < ARRAY_SIZE(env->QRegs[n].uw); i++) { + total += gdb_get_regl(mem_buf, env->QRegs[n].uw[i]); + } + return total; +} + +int hexagon_hvx_gdb_read_register(CPUHexagonState *env, GByteArray *mem_buf, int n) +{ + if (n < NUM_VREGS) { + return gdb_get_vreg(env, mem_buf, n); + } + n -= NUM_VREGS; + + if (n < NUM_QREGS) { + return gdb_get_qreg(env, mem_buf, n); + } + + g_assert_not_reached(); +} + +static int gdb_put_vreg(CPUHexagonState *env, uint8_t *mem_buf, int n) +{ + int i; + for (i = 0; i < ARRAY_SIZE(env->VRegs[n].uw); i++) { + env->VRegs[n].uw[i] = ldtul_p(mem_buf); + mem_buf += 4; + } + return MAX_VEC_SIZE_BYTES; +} + +static int gdb_put_qreg(CPUHexagonState *env, uint8_t *mem_buf, int n) +{ + int i; + for (i = 0; i < ARRAY_SIZE(env->QRegs[n].uw); i++) { + env->QRegs[n].uw[i] = ldtul_p(mem_buf); + mem_buf += 4; + } + return MAX_VEC_SIZE_BYTES / 8; +} + +int hexagon_hvx_gdb_write_register(CPUHexagonState *env, uint8_t *mem_buf, int n) +{ + if (n < NUM_VREGS) { + return gdb_put_vreg(env, mem_buf, n); + } + n -= NUM_VREGS; + + if (n < NUM_QREGS) { + return gdb_put_qreg(env, mem_buf, n); + } + + g_assert_not_reached(); +} diff --git a/target/hexagon/internal.h b/target/hexagon/internal.h index b1bfadc3f5..d732b6bb3c 100644 --- a/target/hexagon/internal.h +++ b/target/hexagon/internal.h @@ -33,6 +33,8 @@ int hexagon_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); int hexagon_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); +int hexagon_hvx_gdb_read_register(CPUHexagonState *env, GByteArray *mem_buf, int n); +int hexagon_hvx_gdb_write_register(CPUHexagonState *env, uint8_t *mem_buf, int n); void hexagon_debug_vreg(CPUHexagonState *env, int regnum); void hexagon_debug_qreg(CPUHexagonState *env, int regnum); From 9073bfd725440da0af44f1ee1e3bcf72e9de39b6 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino Date: Thu, 4 May 2023 12:37:36 -0300 Subject: [PATCH 44/44] Hexagon (linux-user/hexagon): handle breakpoints This enables LLDB to work with hexagon linux-user mode through the GDB remote protocol. Helped-by: Richard Henderson Signed-off-by: Matheus Tavares Bernardino Reviewed-by: Richard Henderson Signed-off-by: Taylor Simpson Message-Id: --- linux-user/hexagon/cpu_loop.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/linux-user/hexagon/cpu_loop.c b/linux-user/hexagon/cpu_loop.c index b84e25bf71..7f1499ed28 100644 --- a/linux-user/hexagon/cpu_loop.c +++ b/linux-user/hexagon/cpu_loop.c @@ -63,6 +63,9 @@ void cpu_loop(CPUHexagonState *env) case EXCP_ATOMIC: cpu_exec_step_atomic(cs); break; + case EXCP_DEBUG: + force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, 0); + break; default: EXCP_DUMP(env, "\nqemu: unhandled CPU exception %#x - aborting\n", trapnr);