diff --git a/MAINTAINERS b/MAINTAINERS index d0e604c725..1b6466496d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -225,6 +225,7 @@ F: tests/tcg/hexagon/ F: disas/hexagon.c F: configs/targets/hexagon-linux-user/default.mak F: docker/dockerfiles/debian-hexagon-cross.docker +F: gdb-xml/hexagon*.xml Hexagon idef-parser M: Alessandro Di Federico diff --git a/configs/targets/hexagon-linux-user.mak b/configs/targets/hexagon-linux-user.mak index 003ed0a408..2765a4c563 100644 --- a/configs/targets/hexagon-linux-user.mak +++ b/configs/targets/hexagon-linux-user.mak @@ -1 +1,2 @@ TARGET_ARCH=hexagon +TARGET_XML_FILES=gdb-xml/hexagon-core.xml gdb-xml/hexagon-hvx.xml diff --git a/configure b/configure index bcab685cfd..2a556d14c9 100755 --- a/configure +++ b/configure @@ -1269,7 +1269,7 @@ fi : ${cross_cc_armeb="$cross_cc_arm"} : ${cross_cc_cflags_armeb="-mbig-endian"} : ${cross_cc_hexagon="hexagon-unknown-linux-musl-clang"} -: ${cross_cc_cflags_hexagon="-mv67 -O2 -static"} +: ${cross_cc_cflags_hexagon="-mv73 -O2 -static"} : ${cross_cc_cflags_i386="-m32"} : ${cross_cc_cflags_ppc="-m32 -mbig-endian"} : ${cross_cc_cflags_ppc64="-m64 -mbig-endian"} diff --git a/gdb-xml/hexagon-core.xml b/gdb-xml/hexagon-core.xml new file mode 100644 index 0000000000..e181163cff --- /dev/null +++ b/gdb-xml/hexagon-core.xml @@ -0,0 +1,84 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/gdb-xml/hexagon-hvx.xml b/gdb-xml/hexagon-hvx.xml new file mode 100644 index 0000000000..5f2e220733 --- /dev/null +++ b/gdb-xml/hexagon-hvx.xml @@ -0,0 +1,96 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/gdbstub/gdbstub.c b/gdbstub/gdbstub.c index 0760d78685..be18568d0a 100644 --- a/gdbstub/gdbstub.c +++ b/gdbstub/gdbstub.c @@ -777,6 +777,10 @@ typedef void (*GdbCmdHandler)(GArray *params, void *user_ctx); /* * cmd_startswith -> cmd is compared using startswith * + * allow_stop_reply -> true iff the gdbstub can respond to this command with a + * "stop reply" packet. The list of commands that accept such response is + * defined at the GDB Remote Serial Protocol documentation. see: + * https://sourceware.org/gdb/onlinedocs/gdb/Stop-Reply-Packets.html#Stop-Reply-Packets. * * schema definitions: * Each schema parameter entry consists of 2 chars, @@ -802,6 +806,7 @@ typedef struct GdbCmdParseEntry { const char *cmd; bool cmd_startswith; const char *schema; + bool allow_stop_reply; } GdbCmdParseEntry; static inline int startswith(const char *string, const char *pattern) @@ -835,6 +840,7 @@ static int process_string_cmd(void *user_ctx, const char *data, } } + gdbserver_state.allow_stop_reply = cmd->allow_stop_reply; cmd->handler(params, user_ctx); return 0; } @@ -1283,11 +1289,14 @@ static void handle_v_attach(GArray *params, void *user_ctx) gdbserver_state.g_cpu = cpu; gdbserver_state.c_cpu = cpu; - g_string_printf(gdbserver_state.str_buf, "T%02xthread:", GDB_SIGNAL_TRAP); - gdb_append_thread_id(cpu, gdbserver_state.str_buf); - g_string_append_c(gdbserver_state.str_buf, ';'); + if (gdbserver_state.allow_stop_reply) { + g_string_printf(gdbserver_state.str_buf, "T%02xthread:", GDB_SIGNAL_TRAP); + gdb_append_thread_id(cpu, gdbserver_state.str_buf); + g_string_append_c(gdbserver_state.str_buf, ';'); + gdbserver_state.allow_stop_reply = false; cleanup: - gdb_put_strbuf(); + gdb_put_strbuf(); + } } static void handle_v_kill(GArray *params, void *user_ctx) @@ -1310,12 +1319,14 @@ static const GdbCmdParseEntry gdb_v_commands_table[] = { .handler = handle_v_cont, .cmd = "Cont", .cmd_startswith = 1, + .allow_stop_reply = true, .schema = "s0" }, { .handler = handle_v_attach, .cmd = "Attach;", .cmd_startswith = 1, + .allow_stop_reply = true, .schema = "l0" }, { @@ -1698,10 +1709,13 @@ static void handle_gen_set(GArray *params, void *user_ctx) static void handle_target_halt(GArray *params, void *user_ctx) { - g_string_printf(gdbserver_state.str_buf, "T%02xthread:", GDB_SIGNAL_TRAP); - gdb_append_thread_id(gdbserver_state.c_cpu, gdbserver_state.str_buf); - g_string_append_c(gdbserver_state.str_buf, ';'); - gdb_put_strbuf(); + if (gdbserver_state.allow_stop_reply) { + g_string_printf(gdbserver_state.str_buf, "T%02xthread:", GDB_SIGNAL_TRAP); + gdb_append_thread_id(gdbserver_state.c_cpu, gdbserver_state.str_buf); + g_string_append_c(gdbserver_state.str_buf, ';'); + gdb_put_strbuf(); + gdbserver_state.allow_stop_reply = false; + } /* * Remove all the breakpoints when this query is issued, * because gdb is doing an initial connect and the state @@ -1725,7 +1739,8 @@ static int gdb_handle_packet(const char *line_buf) static const GdbCmdParseEntry target_halted_cmd_desc = { .handler = handle_target_halt, .cmd = "?", - .cmd_startswith = 1 + .cmd_startswith = 1, + .allow_stop_reply = true, }; cmd_parser = &target_halted_cmd_desc; } @@ -1736,6 +1751,7 @@ static int gdb_handle_packet(const char *line_buf) .handler = handle_continue, .cmd = "c", .cmd_startswith = 1, + .allow_stop_reply = true, .schema = "L0" }; cmd_parser = &continue_cmd_desc; @@ -1747,6 +1763,7 @@ static int gdb_handle_packet(const char *line_buf) .handler = handle_cont_with_sig, .cmd = "C", .cmd_startswith = 1, + .allow_stop_reply = true, .schema = "l0" }; cmd_parser = &cont_with_sig_cmd_desc; @@ -1785,6 +1802,7 @@ static int gdb_handle_packet(const char *line_buf) .handler = handle_step, .cmd = "s", .cmd_startswith = 1, + .allow_stop_reply = true, .schema = "L0" }; cmd_parser = &step_cmd_desc; @@ -1976,6 +1994,7 @@ void gdb_read_byte(uint8_t ch) { uint8_t reply; + gdbserver_state.allow_stop_reply = false; #ifndef CONFIG_USER_ONLY if (gdbserver_state.last_packet->len) { /* Waiting for a response to the last packet. If we see the start diff --git a/gdbstub/internals.h b/gdbstub/internals.h index 94ddff4495..33d21d6488 100644 --- a/gdbstub/internals.h +++ b/gdbstub/internals.h @@ -65,6 +65,11 @@ typedef struct GDBState { GByteArray *mem_buf; int sstep_flags; int supported_sstep_flags; + /* + * Whether we are allowed to send a stop reply packet at this moment. + * Must be set off after sending the stop reply itself. + */ + bool allow_stop_reply; } GDBState; /* lives in main gdbstub.c */ diff --git a/gdbstub/softmmu.c b/gdbstub/softmmu.c index 22ecd09d04..99d994e6bf 100644 --- a/gdbstub/softmmu.c +++ b/gdbstub/softmmu.c @@ -43,6 +43,7 @@ static void reset_gdbserver_state(void) g_free(gdbserver_state.processes); gdbserver_state.processes = NULL; gdbserver_state.process_num = 0; + gdbserver_state.allow_stop_reply = false; } /* @@ -139,6 +140,10 @@ static void gdb_vm_state_change(void *opaque, bool running, RunState state) return; } + if (!gdbserver_state.allow_stop_reply) { + return; + } + gdb_append_thread_id(cpu, tid); switch (state) { @@ -205,6 +210,7 @@ static void gdb_vm_state_change(void *opaque, bool running, RunState state) send_packet: gdb_put_packet(buf->str); + gdbserver_state.allow_stop_reply = false; /* disable single step if it was enabled */ cpu_single_step(cpu, 0); @@ -422,8 +428,11 @@ void gdb_exit(int code) trace_gdbstub_op_exiting((uint8_t)code); - snprintf(buf, sizeof(buf), "W%02x", (uint8_t)code); - gdb_put_packet(buf); + if (gdbserver_state.allow_stop_reply) { + snprintf(buf, sizeof(buf), "W%02x", (uint8_t)code); + gdb_put_packet(buf); + gdbserver_state.allow_stop_reply = false; + } qemu_chr_fe_deinit(&gdbserver_system_state.chr, true); } diff --git a/gdbstub/user.c b/gdbstub/user.c index 80488b6bb9..5b375be1d9 100644 --- a/gdbstub/user.c +++ b/gdbstub/user.c @@ -108,8 +108,11 @@ void gdb_exit(int code) trace_gdbstub_op_exiting((uint8_t)code); - snprintf(buf, sizeof(buf), "W%02x", (uint8_t)code); - gdb_put_packet(buf); + if (gdbserver_state.allow_stop_reply) { + snprintf(buf, sizeof(buf), "W%02x", (uint8_t)code); + gdb_put_packet(buf); + gdbserver_state.allow_stop_reply = false; + } } int gdb_handlesig(CPUState *cpu, int sig) @@ -127,11 +130,14 @@ int gdb_handlesig(CPUState *cpu, int sig) if (sig != 0) { gdb_set_stop_cpu(cpu); - g_string_printf(gdbserver_state.str_buf, - "T%02xthread:", gdb_target_signal_to_gdb(sig)); - gdb_append_thread_id(cpu, gdbserver_state.str_buf); - g_string_append_c(gdbserver_state.str_buf, ';'); - gdb_put_strbuf(); + if (gdbserver_state.allow_stop_reply) { + g_string_printf(gdbserver_state.str_buf, + "T%02xthread:", gdb_target_signal_to_gdb(sig)); + gdb_append_thread_id(cpu, gdbserver_state.str_buf); + g_string_append_c(gdbserver_state.str_buf, ';'); + gdb_put_strbuf(); + gdbserver_state.allow_stop_reply = false; + } } /* * gdb_put_packet() might have detected that the peer terminated the @@ -174,12 +180,14 @@ void gdb_signalled(CPUArchState *env, int sig) { char buf[4]; - if (!gdbserver_state.init || gdbserver_user_state.fd < 0) { + if (!gdbserver_state.init || gdbserver_user_state.fd < 0 || + !gdbserver_state.allow_stop_reply) { return; } snprintf(buf, sizeof(buf), "X%02x", gdb_target_signal_to_gdb(sig)); gdb_put_packet(buf); + gdbserver_state.allow_stop_reply = false; } static void gdb_accept_init(int fd) diff --git a/linux-user/hexagon/cpu_loop.c b/linux-user/hexagon/cpu_loop.c index b84e25bf71..7f1499ed28 100644 --- a/linux-user/hexagon/cpu_loop.c +++ b/linux-user/hexagon/cpu_loop.c @@ -63,6 +63,9 @@ void cpu_loop(CPUHexagonState *env) case EXCP_ATOMIC: cpu_exec_step_atomic(cs); break; + case EXCP_DEBUG: + force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, 0); + break; default: EXCP_DUMP(env, "\nqemu: unhandled CPU exception %#x - aborting\n", trapnr); diff --git a/linux-user/hexagon/target_elf.h b/linux-user/hexagon/target_elf.h index b4e9f40527..36056fc9f0 100644 --- a/linux-user/hexagon/target_elf.h +++ b/linux-user/hexagon/target_elf.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -20,7 +20,10 @@ static inline const char *cpu_get_model(uint32_t eflags) { - /* For now, treat anything newer than v5 as a v67 */ + static char buf[32]; + int err; + + /* For now, treat anything newer than v5 as a v73 */ /* FIXME - Disable instructions that are newer than the specified arch */ if (eflags == 0x04 || /* v5 */ eflags == 0x05 || /* v55 */ @@ -30,11 +33,18 @@ static inline const char *cpu_get_model(uint32_t eflags) eflags == 0x65 || /* v65 */ eflags == 0x66 || /* v66 */ eflags == 0x67 || /* v67 */ - eflags == 0x8067 /* v67t */ + eflags == 0x8067 || /* v67t */ + eflags == 0x68 || /* v68 */ + eflags == 0x69 || /* v69 */ + eflags == 0x71 || /* v71 */ + eflags == 0x8071 || /* v71t */ + eflags == 0x73 /* v73 */ ) { - return "v67"; + return "v73"; } - return "unknown"; + + err = snprintf(buf, sizeof(buf), "unknown (0x%x)", eflags); + return err >= 0 && err < sizeof(buf) ? buf : "unknown"; } #endif diff --git a/meson.build b/meson.build index 41c87c441f..0a5cdefd4d 100644 --- a/meson.build +++ b/meson.build @@ -2105,6 +2105,7 @@ endif config_host_data.set('CONFIG_GTK', gtk.found()) config_host_data.set('CONFIG_VTE', vte.found()) config_host_data.set('CONFIG_GTK_CLIPBOARD', have_gtk_clipboard) +config_host_data.set('CONFIG_HEXAGON_IDEF_PARSER', get_option('hexagon_idef_parser')) config_host_data.set('CONFIG_LIBATTR', have_old_libattr) config_host_data.set('CONFIG_LIBCAP_NG', libcap_ng.found()) config_host_data.set('CONFIG_EBPF', libbpf.found()) diff --git a/target/hexagon/README b/target/hexagon/README index ebafc78b1c..43811178e9 100644 --- a/target/hexagon/README +++ b/target/hexagon/README @@ -4,10 +4,10 @@ is a wide vector coprocessor designed for high performance computer vision, image processing, machine learning, and other workloads. The following versions of the Hexagon core are supported - Scalar core: v67 - https://developer.qualcomm.com/downloads/qualcomm-hexagon-v67-programmer-s-reference-manual - HVX extension: v66 - https://developer.qualcomm.com/downloads/qualcomm-hexagon-v66-hvx-programmer-s-reference-manual + Scalar core: v73 + https://developer.qualcomm.com/downloads/qualcomm-hexagon-v73-programmers-reference-manual-rev-aa + HVX extension: v73 + https://developer.qualcomm.com/downloads/qualcomm-hexagon-v73-hvx-programmers-reference-manual-rev-aa We presented an overview of the project at the 2019 KVM Forum. https://kvmforum2019.sched.com/event/Tmwc/qemu-hexagon-automatic-translation-of-the-isa-manual-pseudcode-to-tiny-code-instructions-of-a-vliw-architecture-niccolo-izzo-revng-taylor-simpson-qualcomm-innovation-center @@ -87,7 +87,7 @@ tcg_funcs_generated.c.inc TCGv RsV = hex_gpr[insn->regno[1]]; TCGv RtV = hex_gpr[insn->regno[2]]; gen_helper_A2_add(RdV, cpu_env, RsV, RtV); - gen_log_reg_write(RdN, RdV); + gen_log_reg_write(ctx, RdN, RdV); } helper_funcs_generated.c.inc @@ -186,7 +186,7 @@ We also generate an analyze_ function for each instruction. Currently, these functions record the writes to registers by calling ctx_log_*. During gen_start_packet, we invoke the analyze_ function for each instruction in the packet, and we mark the implicit writes. After the analysis is performed, -we initialize hex_new_value for each of the predicated assignments. +we initialize the result register for each of the predicated assignments. In addition to instruction semantics, we use a generator to create the decode tree. This generation is also a two step process. The first step is to run @@ -304,4 +304,4 @@ Here are some handy places to set breakpoints At the start of execution of a packet for a given PC br helper_debug_start_packet if env->gpr[41] == 0xdeadbeef At the end of execution of a packet for a given PC - br helper_debug_commit_end if env->this_PC == 0xdeadbeef + br helper_debug_commit_end if this_PC == 0xdeadbeef diff --git a/target/hexagon/arch.c b/target/hexagon/arch.c index da79b41c4d..d053d68487 100644 --- a/target/hexagon/arch.c +++ b/target/hexagon/arch.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -224,6 +224,7 @@ void arch_fpop_start(CPUHexagonState *env) void arch_fpop_end(CPUHexagonState *env) { + const bool pkt_need_commit = true; int flags = get_float_exception_flags(&env->fp_status); if (flags != 0) { SOFTFLOAT_TEST_FLAG(float_flag_inexact, FPINPF, FPINPE); diff --git a/target/hexagon/attribs_def.h.inc b/target/hexagon/attribs_def.h.inc index 9874d1658f..21d457fa4a 100644 --- a/target/hexagon/attribs_def.h.inc +++ b/target/hexagon/attribs_def.h.inc @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -52,6 +52,12 @@ DEF_ATTRIB(REGWRSIZE_4B, "Memory width is 4 bytes", "", "") DEF_ATTRIB(REGWRSIZE_8B, "Memory width is 8 bytes", "", "") DEF_ATTRIB(MEMLIKE, "Memory-like instruction", "", "") DEF_ATTRIB(MEMLIKE_PACKET_RULES, "follows Memory-like packet rules", "", "") +DEF_ATTRIB(RELEASE, "Releases a lock", "", "") +DEF_ATTRIB(ACQUIRE, "Acquires a lock", "", "") + +DEF_ATTRIB(RLS_INNER, "Store release inner visibility", "", "") +DEF_ATTRIB(RLS_ALL_THREAD, "Store release among all threads", "", "") +DEF_ATTRIB(RLS_SAME_THREAD, "Store release with the same thread", "", "") /* V6 Vector attributes */ DEF_ATTRIB(CVI, "Executes on the HVX extension", "", "") @@ -63,23 +69,27 @@ DEF_ATTRIB(CVI_VP_VS, "Double vector permute/shft insn executes on HVX", "", "") DEF_ATTRIB(CVI_VX, "Multiply instruction executes on HVX", "", "") DEF_ATTRIB(CVI_VX_DV, "Double vector multiply insn executes on HVX", "", "") DEF_ATTRIB(CVI_VS, "Shift instruction executes on HVX", "", "") +DEF_ATTRIB(CVI_VS_3SRC, "This shift needs to borrow a source register", "", "") DEF_ATTRIB(CVI_VS_VX, "Permute/shift and multiply insn executes on HVX", "", "") DEF_ATTRIB(CVI_VA, "ALU instruction executes on HVX", "", "") DEF_ATTRIB(CVI_VA_DV, "Double vector alu instruction executes on HVX", "", "") DEF_ATTRIB(CVI_4SLOT, "Consumes all the vector execution resources", "", "") DEF_ATTRIB(CVI_TMP, "Transient Memory Load not written to register", "", "") +DEF_ATTRIB(CVI_REMAP, "Register Renaming not written to register file", "", "") DEF_ATTRIB(CVI_GATHER, "CVI Gather operation", "", "") DEF_ATTRIB(CVI_SCATTER, "CVI Scatter operation", "", "") DEF_ATTRIB(CVI_SCATTER_RELEASE, "CVI Store Release for scatter", "", "") DEF_ATTRIB(CVI_TMP_DST, "CVI instruction that doesn't write a register", "", "") DEF_ATTRIB(CVI_SLOT23, "Can execute in slot 2 or slot 3 (HVX)", "", "") +DEF_ATTRIB(VTCM_ALLBANK_ACCESS, "Allocates in all VTCM schedulers.", "", "") /* Change-of-flow attributes */ DEF_ATTRIB(JUMP, "Jump-type instruction", "", "") DEF_ATTRIB(INDIRECT, "Absolute register jump", "", "") DEF_ATTRIB(CALL, "Function call instruction", "", "") DEF_ATTRIB(COF, "Change-of-flow instruction", "", "") +DEF_ATTRIB(HINTED_COF, "This instruction is a hinted change-of-flow", "", "") DEF_ATTRIB(CONDEXEC, "May be cancelled by a predicate", "", "") DEF_ATTRIB(DOTNEWVALUE, "Uses a register value generated in this pkt", "", "") DEF_ATTRIB(NEWCMPJUMP, "Compound compare and jump", "", "") @@ -102,6 +112,10 @@ DEF_ATTRIB(IMPLICIT_WRITES_P1, "Writes Predicate 1", "", "UREG.P1") DEF_ATTRIB(IMPLICIT_WRITES_P2, "Writes Predicate 1", "", "UREG.P2") DEF_ATTRIB(IMPLICIT_WRITES_P3, "May write Predicate 3", "", "UREG.P3") DEF_ATTRIB(IMPLICIT_READS_PC, "Reads the PC register", "", "") +DEF_ATTRIB(IMPLICIT_READS_P0, "Reads the P0 register", "", "") +DEF_ATTRIB(IMPLICIT_READS_P1, "Reads the P1 register", "", "") +DEF_ATTRIB(IMPLICIT_READS_P2, "Reads the P2 register", "", "") +DEF_ATTRIB(IMPLICIT_READS_P3, "Reads the P3 register", "", "") DEF_ATTRIB(IMPLICIT_WRITES_USR, "May write USR", "", "") DEF_ATTRIB(WRITES_PRED_REG, "Writes a predicate register", "", "") DEF_ATTRIB(COMMUTES, "The operation is communitive", "", "") @@ -140,6 +154,8 @@ DEF_ATTRIB(L2FETCH, "Instruction is l2fetch type", "", "") DEF_ATTRIB(ICINVA, "icinva", "", "") DEF_ATTRIB(DCCLEANINVA, "dccleaninva", "", "") +DEF_ATTRIB(NO_INTRINSIC, "Don't generate an intrisic", "", "") + /* Documentation Notes */ DEF_ATTRIB(NOTE_CONDITIONAL, "can be conditionally executed", "", "") DEF_ATTRIB(NOTE_NEWVAL_SLOT0, "New-value oprnd must execute on slot 0", "", "") @@ -148,7 +164,11 @@ DEF_ATTRIB(NOTE_NOPACKET, "solo instruction", "", "") DEF_ATTRIB(NOTE_AXOK, "May only be grouped with ALU32 or non-FP XTYPE.", "", "") DEF_ATTRIB(NOTE_LATEPRED, "The predicate can not be used as a .new", "", "") DEF_ATTRIB(NOTE_NVSLOT0, "Can execute only in slot 0 (ST)", "", "") +DEF_ATTRIB(NOTE_NOVP, "Cannot be paired with a HVX permute instruction", "", "") +DEF_ATTRIB(NOTE_VA_UNARY, "Combined with HVX ALU op (must be unary)", "", "") +/* V6 MMVector Notes for Documentation */ +DEF_ATTRIB(NOTE_SHIFT_RESOURCE, "Uses the HVX shift resource.", "", "") /* Restrictions to make note of */ DEF_ATTRIB(RESTRICT_NOSLOT1_STORE, "Packet must not have slot 1 store", "", "") DEF_ATTRIB(RESTRICT_LATEPRED, "Predicate can not be used as a .new.", "", "") diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index ab40cfc283..f155936289 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -24,9 +24,32 @@ #include "hw/qdev-properties.h" #include "fpu/softfloat-helpers.h" #include "tcg/tcg.h" +#include "exec/gdbstub.h" -static void hexagon_v67_cpu_init(Object *obj) +static void hexagon_v67_cpu_init(Object *obj) { } +static void hexagon_v68_cpu_init(Object *obj) { } +static void hexagon_v69_cpu_init(Object *obj) { } +static void hexagon_v71_cpu_init(Object *obj) { } +static void hexagon_v73_cpu_init(Object *obj) { } + +static void hexagon_cpu_list_entry(gpointer data, gpointer user_data) { + ObjectClass *oc = data; + char *name = g_strdup(object_class_get_name(oc)); + if (g_str_has_suffix(name, HEXAGON_CPU_TYPE_SUFFIX)) { + name[strlen(name) - strlen(HEXAGON_CPU_TYPE_SUFFIX)] = '\0'; + } + qemu_printf(" %s\n", name); + g_free(name); +} + +void hexagon_cpu_list(void) +{ + GSList *list; + list = object_class_get_list_sorted(TYPE_HEXAGON_CPU, false); + qemu_printf("Available CPUs:\n"); + g_slist_foreach(list, hexagon_cpu_list_entry, NULL); + g_slist_free(list); } static ObjectClass *hexagon_cpu_class_by_name(const char *cpu_model) @@ -52,6 +75,8 @@ static Property hexagon_lldb_compat_property = static Property hexagon_lldb_stack_adjust_property = DEFINE_PROP_UNSIGNED("lldb-stack-adjust", HexagonCPU, lldb_stack_adjust, 0, qdev_prop_uint32, target_ulong); +static Property hexagon_short_circuit_property = + DEFINE_PROP_BOOL("short-circuit", HexagonCPU, short_circuit, true); const char * const hexagon_regnames[TOTAL_PER_THREAD_REGS] = { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", @@ -315,6 +340,11 @@ static void hexagon_cpu_realize(DeviceState *dev, Error **errp) return; } + gdb_register_coprocessor(cs, hexagon_hvx_gdb_read_register, + hexagon_hvx_gdb_write_register, + NUM_VREGS + NUM_QREGS, + "hexagon-hvx.xml", 0); + qemu_init_vcpu(cs); cpu_reset(cs); @@ -328,6 +358,7 @@ static void hexagon_cpu_init(Object *obj) cpu_set_cpustate_pointers(cpu); qdev_property_add_static(DEVICE(obj), &hexagon_lldb_compat_property); qdev_property_add_static(DEVICE(obj), &hexagon_lldb_stack_adjust_property); + qdev_property_add_static(DEVICE(obj), &hexagon_short_circuit_property); } #include "hw/core/tcg-cpu-ops.h" @@ -358,8 +389,9 @@ static void hexagon_cpu_class_init(ObjectClass *c, void *data) cc->get_pc = hexagon_cpu_get_pc; cc->gdb_read_register = hexagon_gdb_read_register; cc->gdb_write_register = hexagon_gdb_write_register; - cc->gdb_num_core_regs = TOTAL_PER_THREAD_REGS + NUM_VREGS + NUM_QREGS; + cc->gdb_num_core_regs = TOTAL_PER_THREAD_REGS; cc->gdb_stop_before_watchpoint = true; + cc->gdb_core_xml_file = "hexagon-core.xml"; cc->disas_set_info = hexagon_cpu_disas_set_info; cc->tcg_ops = &hexagon_tcg_ops; } @@ -382,6 +414,10 @@ static const TypeInfo hexagon_cpu_type_infos[] = { .class_init = hexagon_cpu_class_init, }, DEFINE_CPU(TYPE_HEXAGON_CPU_V67, hexagon_v67_cpu_init), + DEFINE_CPU(TYPE_HEXAGON_CPU_V68, hexagon_v68_cpu_init), + DEFINE_CPU(TYPE_HEXAGON_CPU_V69, hexagon_v69_cpu_init), + DEFINE_CPU(TYPE_HEXAGON_CPU_V71, hexagon_v71_cpu_init), + DEFINE_CPU(TYPE_HEXAGON_CPU_V73, hexagon_v73_cpu_init), }; DEFINE_TYPES(hexagon_cpu_type_infos) diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 81b663ecfb..bfcb1057dd 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -43,6 +43,13 @@ #define CPU_RESOLVING_TYPE TYPE_HEXAGON_CPU #define TYPE_HEXAGON_CPU_V67 HEXAGON_CPU_TYPE_NAME("v67") +#define TYPE_HEXAGON_CPU_V68 HEXAGON_CPU_TYPE_NAME("v68") +#define TYPE_HEXAGON_CPU_V69 HEXAGON_CPU_TYPE_NAME("v69") +#define TYPE_HEXAGON_CPU_V71 HEXAGON_CPU_TYPE_NAME("v71") +#define TYPE_HEXAGON_CPU_V73 HEXAGON_CPU_TYPE_NAME("v73") + +void hexagon_cpu_list(void); +#define cpu_list hexagon_cpu_list #define MMU_USER_IDX 0 @@ -78,28 +85,21 @@ typedef struct { typedef struct CPUArchState { target_ulong gpr[TOTAL_PER_THREAD_REGS]; target_ulong pred[NUM_PREGS]; - target_ulong branch_taken; /* For comparing with LLDB on target - see adjust_stack_ptrs function */ target_ulong last_pc_dumped; target_ulong stack_start; uint8_t slot_cancelled; - target_ulong new_value[TOTAL_PER_THREAD_REGS]; + target_ulong new_value_usr; /* * Only used when HEX_DEBUG is on, but unconditionally included * to reduce recompile time when turning HEX_DEBUG on/off. */ - target_ulong this_PC; target_ulong reg_written[TOTAL_PER_THREAD_REGS]; - target_ulong new_pred_value[NUM_PREGS]; - target_ulong pred_written; - MemLog mem_log_stores[STORES_MAX]; - target_ulong pkt_has_store_s1; - target_ulong dczero_addr; float_status fp_status; @@ -146,6 +146,7 @@ struct ArchCPU { bool lldb_compat; target_ulong lldb_stack_adjust; + bool short_circuit; }; #include "cpu_bits.h" diff --git a/target/hexagon/decode.c b/target/hexagon/decode.c index 041c8de751..946c55cc71 100644 --- a/target/hexagon/decode.c +++ b/target/hexagon/decode.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -797,7 +797,26 @@ static bool decode_parsebits_is_loopend(uint32_t encoding32) return bits == 0x2; } -static void +static bool has_valid_slot_assignment(Packet *pkt) +{ + int used_slots = 0; + for (int i = 0; i < pkt->num_insns; i++) { + int slot_mask; + Insn *insn = &pkt->insn[i]; + if (decode_opcode_ends_loop(insn->opcode)) { + /* We overload slot 0 for endloop. */ + continue; + } + slot_mask = 1 << insn->slot; + if (used_slots & slot_mask) { + return false; + } + used_slots |= slot_mask; + } + return true; +} + +static bool decode_set_slot_number(Packet *pkt) { int slot; @@ -886,6 +905,8 @@ decode_set_slot_number(Packet *pkt) /* Then push it to slot0 */ pkt->insn[slot1_iidx].slot = 0; } + + return has_valid_slot_assignment(pkt); } /* @@ -961,8 +982,11 @@ int decode_packet(int max_words, const uint32_t *words, Packet *pkt, decode_apply_extenders(pkt); if (!disas_only) { decode_remove_extenders(pkt); + if (!decode_set_slot_number(pkt)) { + /* Invalid packet */ + return 0; + } } - decode_set_slot_number(pkt); decode_fill_newvalue_regno(pkt); if (pkt->pkt_has_hvx) { diff --git a/target/hexagon/gdbstub.c b/target/hexagon/gdbstub.c index 46083da620..54d37e006e 100644 --- a/target/hexagon/gdbstub.c +++ b/target/hexagon/gdbstub.c @@ -25,6 +25,14 @@ int hexagon_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n) HexagonCPU *cpu = HEXAGON_CPU(cs); CPUHexagonState *env = &cpu->env; + if (n == HEX_REG_P3_0_ALIASED) { + uint32_t p3_0 = 0; + for (int i = 0; i < NUM_PREGS; i++) { + p3_0 = deposit32(p3_0, i * 8, 8, env->pred[i]); + } + return gdb_get_regl(mem_buf, p3_0); + } + if (n < TOTAL_PER_THREAD_REGS) { return gdb_get_regl(mem_buf, env->gpr[n]); } @@ -37,6 +45,14 @@ int hexagon_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) HexagonCPU *cpu = HEXAGON_CPU(cs); CPUHexagonState *env = &cpu->env; + if (n == HEX_REG_P3_0_ALIASED) { + uint32_t p3_0 = ldtul_p(mem_buf); + for (int i = 0; i < NUM_PREGS; i++) { + env->pred[i] = extract32(p3_0, i * 8, 8); + } + return sizeof(target_ulong); + } + if (n < TOTAL_PER_THREAD_REGS) { env->gpr[n] = ldtul_p(mem_buf); return sizeof(target_ulong); @@ -44,3 +60,71 @@ int hexagon_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) g_assert_not_reached(); } + +static int gdb_get_vreg(CPUHexagonState *env, GByteArray *mem_buf, int n) +{ + int total = 0; + int i; + for (i = 0; i < ARRAY_SIZE(env->VRegs[n].uw); i++) { + total += gdb_get_regl(mem_buf, env->VRegs[n].uw[i]); + } + return total; +} + +static int gdb_get_qreg(CPUHexagonState *env, GByteArray *mem_buf, int n) +{ + int total = 0; + int i; + for (i = 0; i < ARRAY_SIZE(env->QRegs[n].uw); i++) { + total += gdb_get_regl(mem_buf, env->QRegs[n].uw[i]); + } + return total; +} + +int hexagon_hvx_gdb_read_register(CPUHexagonState *env, GByteArray *mem_buf, int n) +{ + if (n < NUM_VREGS) { + return gdb_get_vreg(env, mem_buf, n); + } + n -= NUM_VREGS; + + if (n < NUM_QREGS) { + return gdb_get_qreg(env, mem_buf, n); + } + + g_assert_not_reached(); +} + +static int gdb_put_vreg(CPUHexagonState *env, uint8_t *mem_buf, int n) +{ + int i; + for (i = 0; i < ARRAY_SIZE(env->VRegs[n].uw); i++) { + env->VRegs[n].uw[i] = ldtul_p(mem_buf); + mem_buf += 4; + } + return MAX_VEC_SIZE_BYTES; +} + +static int gdb_put_qreg(CPUHexagonState *env, uint8_t *mem_buf, int n) +{ + int i; + for (i = 0; i < ARRAY_SIZE(env->QRegs[n].uw); i++) { + env->QRegs[n].uw[i] = ldtul_p(mem_buf); + mem_buf += 4; + } + return MAX_VEC_SIZE_BYTES / 8; +} + +int hexagon_hvx_gdb_write_register(CPUHexagonState *env, uint8_t *mem_buf, int n) +{ + if (n < NUM_VREGS) { + return gdb_put_vreg(env, mem_buf, n); + } + n -= NUM_VREGS; + + if (n < NUM_QREGS) { + return gdb_put_qreg(env, mem_buf, n); + } + + g_assert_not_reached(); +} diff --git a/target/hexagon/gen_analyze_funcs.py b/target/hexagon/gen_analyze_funcs.py index c74443da78..00868cc6cb 100755 --- a/target/hexagon/gen_analyze_funcs.py +++ b/target/hexagon/gen_analyze_funcs.py @@ -35,47 +35,55 @@ def analyze_opn_old(f, tag, regtype, regid, regno): predicated = "true" if is_predicated(tag) else "false" if regtype == "R": if regid in {"ss", "tt"}: - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_reg_read_pair(ctx, {regN});\n") elif regid in {"dd", "ee", "xx", "yy"}: f.write(f" const int {regN} = insn->regno[{regno}];\n") f.write(f" ctx_log_reg_write_pair(ctx, {regN}, {predicated});\n") elif regid in {"s", "t", "u", "v"}: - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_reg_read(ctx, {regN});\n") elif regid in {"d", "e", "x", "y"}: f.write(f" const int {regN} = insn->regno[{regno}];\n") f.write(f" ctx_log_reg_write(ctx, {regN}, {predicated});\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "P": if regid in {"s", "t", "u", "v"}: - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_pred_read(ctx, {regN});\n") elif regid in {"d", "e", "x"}: f.write(f" const int {regN} = insn->regno[{regno}];\n") f.write(f" ctx_log_pred_write(ctx, {regN});\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "C": if regid == "ss": f.write( - f"// const int {regN} = insn->regno[{regno}] " "+ HEX_REG_SA0;\n" + f" const int {regN} = insn->regno[{regno}] " + "+ HEX_REG_SA0;\n" ) + f.write(f" ctx_log_reg_read_pair(ctx, {regN});\n") elif regid == "dd": f.write(f" const int {regN} = insn->regno[{regno}] " "+ HEX_REG_SA0;\n") f.write(f" ctx_log_reg_write_pair(ctx, {regN}, {predicated});\n") elif regid == "s": f.write( - f"// const int {regN} = insn->regno[{regno}] " "+ HEX_REG_SA0;\n" + f" const int {regN} = insn->regno[{regno}] " + "+ HEX_REG_SA0;\n" ) + f.write(f" ctx_log_reg_read(ctx, {regN});\n") elif regid == "d": f.write(f" const int {regN} = insn->regno[{regno}] " "+ HEX_REG_SA0;\n") f.write(f" ctx_log_reg_write(ctx, {regN}, {predicated});\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "M": if regid == "u": - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_reg_read(ctx, {regN});\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "V": newv = "EXT_DFL" if hex_common.is_new_result(tag): @@ -88,22 +96,25 @@ def analyze_opn_old(f, tag, regtype, regid, regno): f" ctx_log_vreg_write_pair(ctx, {regN}, {newv}, " f"{predicated});\n" ) elif regid in {"uu", "vv"}: - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_vreg_read_pair(ctx, {regN});\n") elif regid in {"s", "u", "v", "w"}: - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_vreg_read(ctx, {regN});\n") elif regid in {"d", "x", "y"}: f.write(f" const int {regN} = insn->regno[{regno}];\n") f.write(f" ctx_log_vreg_write(ctx, {regN}, {newv}, " f"{predicated});\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "Q": if regid in {"d", "e", "x"}: f.write(f" const int {regN} = insn->regno[{regno}];\n") f.write(f" ctx_log_qreg_write(ctx, {regN});\n") elif regid in {"s", "t", "u", "v"}: - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_qreg_read(ctx, {regN});\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "G": if regid in {"dd"}: f.write(f"// const int {regN} = insn->regno[{regno}];\n") @@ -114,7 +125,7 @@ def analyze_opn_old(f, tag, regtype, regid, regno): elif regid in {"s"}: f.write(f"// const int {regN} = insn->regno[{regno}];\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "S": if regid in {"dd"}: f.write(f"// const int {regN} = insn->regno[{regno}];\n") @@ -125,30 +136,33 @@ def analyze_opn_old(f, tag, regtype, regid, regno): elif regid in {"s"}: f.write(f"// const int {regN} = insn->regno[{regno}];\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) def analyze_opn_new(f, tag, regtype, regid, regno): regN = f"{regtype}{regid}N" if regtype == "N": if regid in {"s", "t"}: - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_reg_read(ctx, {regN});\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "P": if regid in {"t", "u", "v"}: - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_pred_read(ctx, {regN});\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "O": if regid == "s": - f.write(f"// const int {regN} = insn->regno[{regno}];\n") + f.write(f" const int {regN} = insn->regno[{regno}];\n") + f.write(f" ctx_log_vreg_read(ctx, {regN});\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) def analyze_opn(f, tag, regtype, regid, toss, numregs, i): @@ -160,9 +174,9 @@ def analyze_opn(f, tag, regtype, regid, toss, numregs, i): elif hex_common.is_new_val(regtype, regid, tag): analyze_opn_new(f, tag, regtype, regid, i) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) ## @@ -174,8 +188,10 @@ def analyze_opn(f, tag, regtype, regid, toss, numregs, i): ## Insn *insn G_GNUC_UNUSED = ctx->insn; ## const int RdN = insn->regno[0]; ## ctx_log_reg_write(ctx, RdN, false); -## // const int RsN = insn->regno[1]; -## // const int RtN = insn->regno[2]; +## const int RsN = insn->regno[1]; +## ctx_log_reg_read(ctx, RsN); +## const int RtN = insn->regno[2]; +## ctx_log_reg_read(ctx, RtN); ## } ## def gen_analyze_func(f, tag, regs, imms): @@ -193,8 +209,11 @@ def gen_analyze_func(f, tag, regs, imms): has_generated_helper = not hex_common.skip_qemu_helper( tag ) and not hex_common.is_idef_parser_enabled(tag) - if has_generated_helper and "A_SCALAR_LOAD" in hex_common.attribdict[tag]: - f.write(" ctx->need_pkt_has_store_s1 = true;\n") + + ## Mark HVX instructions with generated helpers + if (has_generated_helper and + "A_CVI" in hex_common.attribdict[tag]): + f.write(" ctx->has_hvx_helper = true;\n") f.write("}\n\n") diff --git a/target/hexagon/gen_helper_funcs.py b/target/hexagon/gen_helper_funcs.py index c73d792580..e80550f94e 100755 --- a/target/hexagon/gen_helper_funcs.py +++ b/target/hexagon/gen_helper_funcs.py @@ -87,9 +87,9 @@ def gen_helper_arg_opn(f, regtype, regid, i, tag): elif hex_common.is_new_val(regtype, regid, tag): gen_helper_arg_new(f, regtype, regid, i) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) def gen_helper_arg_imm(f, immlett): @@ -135,7 +135,7 @@ def gen_helper_dest_decl_opn(f, regtype, regid, i): else: gen_helper_dest_decl(f, regtype, regid, i) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) def gen_helper_src_var_ext(f, regtype, regid): @@ -185,7 +185,7 @@ def gen_helper_return_opn(f, regtype, regid, i): else: gen_helper_return(f, regtype, regid, i) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) ## @@ -239,7 +239,7 @@ def gen_helper_function(f, tag, tagregs, tagimms): else: gen_helper_return_type(f, regtype, regid, i) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) i += 1 if numscalarresults == 0: @@ -262,7 +262,7 @@ def gen_helper_function(f, tag, tagregs, tagimms): # This is the return value of the function continue else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) i += 1 ## For conditional instructions, we pass in the destination register @@ -287,6 +287,8 @@ def gen_helper_function(f, tag, tagregs, tagimms): if hex_common.need_pkt_has_multi_cof(tag): f.write(", uint32_t pkt_has_multi_cof") + if (hex_common.need_pkt_need_commit(tag)): + f.write(", uint32_t pkt_need_commit") if hex_common.need_PC(tag): if i > 0: @@ -301,7 +303,7 @@ def gen_helper_function(f, tag, tagregs, tagimms): if hex_common.need_slot(tag): if i > 0: f.write(", ") - f.write("uint32_t slot") + f.write("uint32_t slotval") i += 1 if hex_common.need_part1(tag): if i > 0: @@ -327,7 +329,12 @@ def gen_helper_function(f, tag, tagregs, tagimms): if hex_common.is_hvx_reg(regtype): gen_helper_src_var_ext(f, regtype, regid) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) + + if hex_common.need_slot(tag): + if "A_LOAD" in hex_common.attribdict[tag]: + f.write(" bool pkt_has_store_s1 = slotval & 0x1;\n") + f.write(" uint32_t slot = slotval >> 1;\n") if "A_FPOP" in hex_common.attribdict[tag]: f.write(" arch_fpop_start(env);\n") diff --git a/target/hexagon/gen_helper_protos.py b/target/hexagon/gen_helper_protos.py index 187cd6e04e..3dedd76cb4 100755 --- a/target/hexagon/gen_helper_protos.py +++ b/target/hexagon/gen_helper_protos.py @@ -52,7 +52,7 @@ def gen_def_helper_opn(f, tag, regtype, regid, toss, numregs, i): elif hex_common.is_single(regid): f.write(f", {def_helper_types[regtype]}") else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) ## @@ -86,6 +86,8 @@ def gen_helper_prototype(f, tag, tagregs, tagimms): def_helper_size = len(regs) + len(imms) + numscalarreadwrite + 1 if hex_common.need_pkt_has_multi_cof(tag): def_helper_size += 1 + if hex_common.need_pkt_need_commit(tag): + def_helper_size += 1 if hex_common.need_part1(tag): def_helper_size += 1 if hex_common.need_slot(tag): @@ -103,6 +105,8 @@ def gen_helper_prototype(f, tag, tagregs, tagimms): def_helper_size = len(regs) + len(imms) + numscalarreadwrite if hex_common.need_pkt_has_multi_cof(tag): def_helper_size += 1 + if hex_common.need_pkt_need_commit(tag): + def_helper_size += 1 if hex_common.need_part1(tag): def_helper_size += 1 if hex_common.need_slot(tag): @@ -156,10 +160,12 @@ def gen_helper_prototype(f, tag, tagregs, tagimms): for immlett, bits, immshift in imms: f.write(", s32") - ## Add the arguments for the instruction pkt_has_multi_cof, slot and - ## part1 (if needed) + ## Add the arguments for the instruction pkt_has_multi_cof, + ## pkt_needs_commit, PC, next_PC, slot, and part1 (if needed) if hex_common.need_pkt_has_multi_cof(tag): f.write(", i32") + if hex_common.need_pkt_need_commit(tag): + f.write(', i32') if hex_common.need_PC(tag): f.write(", i32") if hex_common.helper_needs_next_PC(tag): diff --git a/target/hexagon/gen_idef_parser_funcs.py b/target/hexagon/gen_idef_parser_funcs.py index afe68bdb6f..29160fcb1d 100644 --- a/target/hexagon/gen_idef_parser_funcs.py +++ b/target/hexagon/gen_idef_parser_funcs.py @@ -103,12 +103,29 @@ def main(): continue if tag.startswith("V6_"): continue - if tag.startswith("F"): + if ( tag.startswith("F") and + tag not in { + "F2_sfimm_p", + "F2_sfimm_n", + "F2_dfimm_p", + "F2_dfimm_n", + "F2_dfmpyll", + "F2_dfmpylh" + }): continue if tag.endswith("_locked"): continue if "A_COF" in hex_common.attribdict[tag]: continue + if ( tag.startswith('R6_release_') ): + continue + ## Skip instructions that are incompatible with short-circuit + ## packet register writes + if ( tag == 'S2_insert' or + tag == 'S2_insert_rp' or + tag == 'S2_asr_r_svw_trun' or + tag == 'A2_swiz' ): + continue regs = tagregs[tag] imms = tagimms[tag] @@ -130,7 +147,7 @@ def main(): elif is_single_new: arguments.append(f"{prefix}{regtype}{regid}N") else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) for immlett, bits, immshift in imms: arguments.append(hex_common.imm_name(immlett)) diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index 329e7a1024..d78d99d155 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -500,6 +500,38 @@ #define fGEN_TCG_Y2_icinva(SHORTCODE) \ do { RsV = RsV; } while (0) +/* + * allocframe(#uiV) + * RxV == r29 + */ +#define fGEN_TCG_S2_allocframe(SHORTCODE) \ + gen_allocframe(ctx, RxV, uiV) + +/* sub-instruction version (no RxV, so handle it manually) */ +#define fGEN_TCG_SS2_allocframe(SHORTCODE) \ + do { \ + TCGv r29 = tcg_temp_new(); \ + tcg_gen_mov_tl(r29, hex_gpr[HEX_REG_SP]); \ + gen_allocframe(ctx, r29, uiV); \ + gen_log_reg_write(ctx, HEX_REG_SP, r29); \ + } while (0) + +/* + * Rdd32 = deallocframe(Rs32):raw + * RddV == r31:30 + * RsV == r30 + */ +#define fGEN_TCG_L2_deallocframe(SHORTCODE) \ + gen_deallocframe(ctx, RddV, RsV) + +/* sub-instruction version (no RddV/RsV, so handle it manually) */ +#define fGEN_TCG_SL2_deallocframe(SHORTCODE) \ + do { \ + TCGv_i64 r31_30 = tcg_temp_new_i64(); \ + gen_deallocframe(ctx, r31_30, hex_gpr[HEX_REG_FP]); \ + gen_log_reg_write_pair(ctx, HEX_REG_FP, r31_30); \ + } while (0) + /* * dealloc_return * Assembler mapped to @@ -515,7 +547,7 @@ do { \ TCGv_i64 RddV = get_result_gpr_pair(ctx, HEX_REG_FP); \ gen_return(ctx, RddV, hex_gpr[HEX_REG_FP]); \ - gen_log_reg_write_pair(HEX_REG_FP, RddV); \ + gen_log_reg_write_pair(ctx, HEX_REG_FP, RddV); \ } while (0) /* @@ -549,9 +581,9 @@ #define fGEN_TCG_SL2_return_f(SHORTCODE) \ gen_cond_return_subinsn(ctx, TCG_COND_NE, hex_pred[0]) #define fGEN_TCG_SL2_return_tnew(SHORTCODE) \ - gen_cond_return_subinsn(ctx, TCG_COND_EQ, hex_new_pred_value[0]) + gen_cond_return_subinsn(ctx, TCG_COND_EQ, ctx->new_pred_value[0]) #define fGEN_TCG_SL2_return_fnew(SHORTCODE) \ - gen_cond_return_subinsn(ctx, TCG_COND_NE, hex_new_pred_value[0]) + gen_cond_return_subinsn(ctx, TCG_COND_NE, ctx->new_pred_value[0]) /* * Mathematical operations with more than one definition require @@ -560,7 +592,16 @@ #define fGEN_TCG_A5_ACS(SHORTCODE) \ do { \ gen_helper_vacsh_pred(PeV, cpu_env, RxxV, RssV, RttV); \ - gen_helper_vacsh_val(RxxV, cpu_env, RxxV, RssV, RttV); \ + gen_helper_vacsh_val(RxxV, cpu_env, RxxV, RssV, RttV, \ + tcg_constant_tl(ctx->need_commit)); \ + } while (0) + +#define fGEN_TCG_S2_cabacdecbin(SHORTCODE) \ + do { \ + TCGv p0 = tcg_temp_new(); \ + gen_helper_cabacdecbin_pred(p0, RssV, RttV); \ + gen_helper_cabacdecbin_val(RddV, RssV, RttV); \ + gen_log_pred_write(ctx, 0, p0); \ } while (0) /* @@ -653,6 +694,8 @@ gen_call(ctx, riV) #define fGEN_TCG_J2_callr(SHORTCODE) \ gen_callr(ctx, RsV) +#define fGEN_TCG_J2_callrh(SHORTCODE) \ + gen_callr(ctx, RsV) #define fGEN_TCG_J2_callt(SHORTCODE) \ gen_cond_call(ctx, PuV, TCG_COND_EQ, riV) @@ -663,6 +706,27 @@ #define fGEN_TCG_J2_callrf(SHORTCODE) \ gen_cond_callr(ctx, TCG_COND_NE, PuV, RsV) +#define fGEN_TCG_J2_loop0r(SHORTCODE) \ + gen_loop0r(ctx, RsV, riV) +#define fGEN_TCG_J2_loop1r(SHORTCODE) \ + gen_loop1r(ctx, RsV, riV) +#define fGEN_TCG_J2_loop0i(SHORTCODE) \ + gen_loop0i(ctx, UiV, riV) +#define fGEN_TCG_J2_loop1i(SHORTCODE) \ + gen_loop1i(ctx, UiV, riV) +#define fGEN_TCG_J2_ploop1sr(SHORTCODE) \ + gen_ploopNsr(ctx, 1, RsV, riV) +#define fGEN_TCG_J2_ploop1si(SHORTCODE) \ + gen_ploopNsi(ctx, 1, UiV, riV) +#define fGEN_TCG_J2_ploop2sr(SHORTCODE) \ + gen_ploopNsr(ctx, 2, RsV, riV) +#define fGEN_TCG_J2_ploop2si(SHORTCODE) \ + gen_ploopNsi(ctx, 2, UiV, riV) +#define fGEN_TCG_J2_ploop3sr(SHORTCODE) \ + gen_ploopNsr(ctx, 3, RsV, riV) +#define fGEN_TCG_J2_ploop3si(SHORTCODE) \ + gen_ploopNsi(ctx, 3, UiV, riV) + #define fGEN_TCG_J2_endloop0(SHORTCODE) \ gen_endloop0(ctx) #define fGEN_TCG_J2_endloop1(SHORTCODE) \ @@ -847,10 +911,20 @@ #define fGEN_TCG_J4_tstbit0_fp1_jump_t(SHORTCODE) \ gen_cmpnd_tstbit0_jmp(ctx, 1, RsV, TCG_COND_NE, riV) +/* p0 = cmp.eq(r0, #7) */ +#define fGEN_TCG_SA1_cmpeqi(SHORTCODE) \ + do { \ + TCGv p0 = tcg_temp_new(); \ + gen_comparei(TCG_COND_EQ, p0, RsV, uiV); \ + gen_log_pred_write(ctx, 0, p0); \ + } while (0) + #define fGEN_TCG_J2_jump(SHORTCODE) \ gen_jump(ctx, riV) #define fGEN_TCG_J2_jumpr(SHORTCODE) \ gen_jumpr(ctx, RsV) +#define fGEN_TCG_J2_jumprh(SHORTCODE) \ + gen_jumpr(ctx, RsV) #define fGEN_TCG_J4_jumpseti(SHORTCODE) \ do { \ tcg_gen_movi_tl(RdV, UiV); \ @@ -1044,6 +1118,22 @@ gen_jump(ctx, riV); \ } while (0) +/* if (p0.new) r0 = #0 */ +#define fGEN_TCG_SA1_clrtnew(SHORTCODE) \ + do { \ + tcg_gen_movcond_tl(TCG_COND_EQ, RdV, \ + ctx->new_pred_value[0], tcg_constant_tl(0), \ + RdV, tcg_constant_tl(0)); \ + } while (0) + +/* if (!p0.new) r0 = #0 */ +#define fGEN_TCG_SA1_clrfnew(SHORTCODE) \ + do { \ + tcg_gen_movcond_tl(TCG_COND_NE, RdV, \ + ctx->new_pred_value[0], tcg_constant_tl(0), \ + RdV, tcg_constant_tl(0)); \ + } while (0) + #define fGEN_TCG_J2_pause(SHORTCODE) \ do { \ uiV = uiV; \ @@ -1067,9 +1157,9 @@ gen_cond_jumpr31(ctx, TCG_COND_NE, hex_pred[0]) #define fGEN_TCG_SL2_jumpr31_tnew(SHORTCODE) \ - gen_cond_jumpr31(ctx, TCG_COND_EQ, hex_new_pred_value[0]) + gen_cond_jumpr31(ctx, TCG_COND_EQ, ctx->new_pred_value[0]) #define fGEN_TCG_SL2_jumpr31_fnew(SHORTCODE) \ - gen_cond_jumpr31(ctx, TCG_COND_NE, hex_new_pred_value[0]) + gen_cond_jumpr31(ctx, TCG_COND_NE, ctx->new_pred_value[0]) /* Count trailing zeros/ones */ #define fGEN_TCG_S2_ct0(SHORTCODE) \ @@ -1095,6 +1185,24 @@ tcg_gen_extrl_i64_i32(RdV, tmp); \ } while (0) +#define fGEN_TCG_S2_insert(SHORTCODE) \ + do { \ + int width = uiV; \ + int offset = UiV; \ + if (width != 0) { \ + if (offset + width > 32) { \ + width = 32 - offset; \ + } \ + tcg_gen_deposit_tl(RxV, RxV, RsV, offset, width); \ + } \ + } while (0) +#define fGEN_TCG_S2_insert_rp(SHORTCODE) \ + gen_insert_rp(ctx, RxV, RsV, RttV) +#define fGEN_TCG_S2_asr_r_svw_trun(SHORTCODE) \ + gen_asr_r_svw_trun(ctx, RdV, RssV, RtV) +#define fGEN_TCG_A2_swiz(SHORTCODE) \ + tcg_gen_bswap_tl(RdV, RsV) + /* Floating point */ #define fGEN_TCG_F2_conv_sf2df(SHORTCODE) \ gen_helper_conv_sf2df(RddV, cpu_env, RsV) @@ -1236,6 +1344,24 @@ uiV = uiV; \ } while (0) +#define fGEN_TCG_L2_loadw_aq(SHORTCODE) SHORTCODE +#define fGEN_TCG_L4_loadd_aq(SHORTCODE) SHORTCODE + +/* Nothing to do for these in qemu, need to suppress compiler warnings */ +#define fGEN_TCG_R6_release_at_vi(SHORTCODE) \ + do { \ + RsV = RsV; \ + } while (0) +#define fGEN_TCG_R6_release_st_vi(SHORTCODE) \ + do { \ + RsV = RsV; \ + } while (0) + +#define fGEN_TCG_S2_storew_rl_at_vi(SHORTCODE) SHORTCODE +#define fGEN_TCG_S4_stored_rl_at_vi(SHORTCODE) SHORTCODE +#define fGEN_TCG_S2_storew_rl_st_vi(SHORTCODE) SHORTCODE +#define fGEN_TCG_S4_stored_rl_st_vi(SHORTCODE) SHORTCODE + #define fGEN_TCG_J2_trap0(SHORTCODE) \ do { \ uiV = uiV; \ diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py index fcb3384480..c73467b840 100755 --- a/target/hexagon/gen_tcg_funcs.py +++ b/target/hexagon/gen_tcg_funcs.py @@ -37,7 +37,7 @@ def genptr_decl_pair_writable(f, tag, regtype, regid, regno): elif regtype == "C": f.write(f" const int {regN} = insn->regno[{regno}] + HEX_REG_SA0;\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) f.write(f" TCGv_i64 {regtype}{regid}V = " f"get_result_gpr_pair(ctx, {regN});\n") @@ -53,7 +53,7 @@ def genptr_decl_writable(f, tag, regtype, regid, regno): f.write(f" const int {regN} = insn->regno[{regno}];\n") f.write(f" TCGv {regtype}{regid}V = tcg_temp_new();\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) def genptr_decl(f, tag, regtype, regid, regno): @@ -71,7 +71,7 @@ def genptr_decl(f, tag, regtype, regid, regno): elif regid in {"d", "e", "x", "y"}: genptr_decl_writable(f, tag, regtype, regid, regno) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "P": if regid in {"s", "t", "u", "v"}: f.write( @@ -80,7 +80,7 @@ def genptr_decl(f, tag, regtype, regid, regno): elif regid in {"d", "e", "x"}: genptr_decl_writable(f, tag, regtype, regid, regno) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "C": if regid == "ss": f.write(f" TCGv_i64 {regtype}{regid}V = " f"tcg_temp_new_i64();\n") @@ -96,7 +96,7 @@ def genptr_decl(f, tag, regtype, regid, regno): elif regid == "d": genptr_decl_writable(f, tag, regtype, regid, regno) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "M": if regid == "u": f.write(f" const int {regtype}{regid}N = " f"insn->regno[{regno}];\n") @@ -105,7 +105,7 @@ def genptr_decl(f, tag, regtype, regid, regno): "HEX_REG_M0];\n" ) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "V": if regid in {"dd"}: f.write(f" const int {regtype}{regid}N = " f"insn->regno[{regno}];\n") @@ -159,7 +159,7 @@ def genptr_decl(f, tag, regtype, regid, regno): f"{regtype}{regid}V_off);\n" ) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "Q": if regid in {"d", "e", "x"}: f.write(f" const int {regtype}{regid}N = " f"insn->regno[{regno}];\n") @@ -180,9 +180,9 @@ def genptr_decl(f, tag, regtype, regid, regno): if not hex_common.skip_qemu_helper(tag): f.write(f" TCGv_ptr {regtype}{regid}V = " "tcg_temp_new_ptr();\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) def genptr_decl_new(f, tag, regtype, regid, regno): @@ -190,18 +190,18 @@ def genptr_decl_new(f, tag, regtype, regid, regno): if regid in {"s", "t"}: f.write( f" TCGv {regtype}{regid}N = " - f"hex_new_value[insn->regno[{regno}]];\n" + f"get_result_gpr(ctx, insn->regno[{regno}]);\n" ) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "P": if regid in {"t", "u", "v"}: f.write( f" TCGv {regtype}{regid}N = " - f"hex_new_pred_value[insn->regno[{regno}]];\n" + f"ctx->new_pred_value[insn->regno[{regno}]];\n" ) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "O": if regid == "s": f.write( @@ -218,9 +218,9 @@ def genptr_decl_new(f, tag, regtype, regid, regno): f"tcg_constant_tl({regtype}{regid}N_num);\n" ) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) def genptr_decl_opn(f, tag, regtype, regid, toss, numregs, i): @@ -232,9 +232,9 @@ def genptr_decl_opn(f, tag, regtype, regid, toss, numregs, i): elif hex_common.is_new_val(regtype, regid, tag): genptr_decl_new(f, tag, regtype, regid, i) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) def genptr_decl_imm(f, immlett): @@ -266,7 +266,7 @@ def genptr_src_read(f, tag, regtype, regid): f"hex_gpr[{regtype}{regid}N]);\n" ) elif regid not in {"s", "t", "u", "v"}: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "P": if regid == "x": f.write( @@ -274,7 +274,7 @@ def genptr_src_read(f, tag, regtype, regid): f"hex_pred[{regtype}{regid}N]);\n" ) elif regid not in {"s", "t", "u", "v"}: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "C": if regid == "ss": f.write( @@ -287,10 +287,10 @@ def genptr_src_read(f, tag, regtype, regid): f"{regtype}{regid}V);\n" ) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "M": if regid != "u": - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "V": if regid in {"uu", "vv", "xx"}: f.write(f" tcg_gen_gvec_mov(MO_64, {regtype}{regid}V_off,\n") @@ -311,7 +311,7 @@ def genptr_src_read(f, tag, regtype, regid): f.write(f" vreg_src_off(ctx, {regtype}{regid}N),\n") f.write(" sizeof(MMVector), sizeof(MMVector));\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "Q": if regid in {"s", "t", "u", "v"}: if not hex_common.skip_qemu_helper(tag): @@ -326,23 +326,23 @@ def genptr_src_read(f, tag, regtype, regid): ) f.write(" sizeof(MMQReg), sizeof(MMQReg));\n") else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) def genptr_src_read_new(f, regtype, regid): if regtype == "N": if regid not in {"s", "t"}: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "P": if regid not in {"t", "u", "v"}: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "O": if regid != "s": - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) def genptr_src_read_opn(f, regtype, regid, tag): @@ -354,9 +354,9 @@ def genptr_src_read_opn(f, regtype, regid, tag): elif hex_common.is_new_val(regtype, regid, tag): genptr_src_read_new(f, regtype, regid) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) def gen_helper_call_opn(f, tag, regtype, regid, toss, numregs, i): @@ -370,9 +370,9 @@ def gen_helper_call_opn(f, tag, regtype, regid, toss, numregs, i): elif hex_common.is_new_val(regtype, regid, tag): f.write(f"{regtype}{regid}N") else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) def gen_helper_decl_imm(f, immlett): @@ -387,7 +387,8 @@ def gen_helper_call_imm(f, immlett): def genptr_dst_write_pair(f, tag, regtype, regid): - f.write(f" gen_log_reg_write_pair({regtype}{regid}N, " f"{regtype}{regid}V);\n") + f.write(f" gen_log_reg_write_pair(ctx, {regtype}{regid}N, " + f"{regtype}{regid}V);\n") def genptr_dst_write(f, tag, regtype, regid): @@ -396,10 +397,11 @@ def genptr_dst_write(f, tag, regtype, regid): genptr_dst_write_pair(f, tag, regtype, regid) elif regid in {"d", "e", "x", "y"}: f.write( - f" gen_log_reg_write({regtype}{regid}N, " f"{regtype}{regid}V);\n" + f" gen_log_reg_write(ctx, {regtype}{regid}N, " + f"{regtype}{regid}V);\n" ) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "P": if regid in {"d", "e", "x"}: f.write( @@ -407,7 +409,7 @@ def genptr_dst_write(f, tag, regtype, regid): f"{regtype}{regid}V);\n" ) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "C": if regid == "dd": f.write( @@ -420,9 +422,9 @@ def genptr_dst_write(f, tag, regtype, regid): f"{regtype}{regid}V);\n" ) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) def genptr_dst_write_ext(f, tag, regtype, regid, newv="EXT_DFL"): @@ -438,12 +440,12 @@ def genptr_dst_write_ext(f, tag, regtype, regid, newv="EXT_DFL"): f"{regtype}{regid}N, {newv});\n" ) elif regid not in {"dd", "d", "x"}: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) elif regtype == "Q": if regid not in {"d", "e", "x"}: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) else: - print("Bad register parse: ", regtype, regid) + hex_common.bad_register(regtype, regid) def genptr_dst_write_opn(f, regtype, regid, tag): @@ -466,7 +468,7 @@ def genptr_dst_write_opn(f, regtype, regid, tag): else: genptr_dst_write(f, tag, regtype, regid) else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) ## @@ -481,7 +483,7 @@ def genptr_dst_write_opn(f, regtype, regid, tag): ## TCGv RsV = hex_gpr[insn->regno[1]]; ## TCGv RtV = hex_gpr[insn->regno[2]]; ## -## gen_log_reg_write(RdN, RdV); +## gen_log_reg_write(ctx, RdN, RdV); ## } ## ## where depends on hex_common.skip_qemu_helper(tag) @@ -530,7 +532,7 @@ def gen_tcg_func(f, tag, regs, imms): elif hex_common.is_new_val(regtype, regid, tag): declared.append(f"{regtype}{regid}N") else: - print("Bad register parse: ", regtype, regid, toss, numregs) + hex_common.bad_register(regtype, regid, toss, numregs) ## Handle immediates for immlett, bits, immshift in imms: @@ -548,10 +550,13 @@ def gen_tcg_func(f, tag, regs, imms): if hex_common.need_pkt_has_multi_cof(tag): f.write(" TCGv pkt_has_multi_cof = ") f.write("tcg_constant_tl(ctx->pkt->pkt_has_multi_cof);\n") + if hex_common.need_pkt_need_commit(tag): + f.write(" TCGv pkt_need_commit = ") + f.write("tcg_constant_tl(ctx->need_commit);\n") if hex_common.need_part1(tag): f.write(" TCGv part1 = tcg_constant_tl(insn->part1);\n") if hex_common.need_slot(tag): - f.write(" TCGv slot = tcg_constant_tl(insn->slot);\n") + f.write(" TCGv slotval = gen_slotval(ctx);\n") if hex_common.need_PC(tag): f.write(" TCGv PC = tcg_constant_tl(ctx->pkt->pc);\n") if hex_common.helper_needs_next_PC(tag): @@ -594,12 +599,14 @@ def gen_tcg_func(f, tag, regs, imms): if hex_common.need_pkt_has_multi_cof(tag): f.write(", pkt_has_multi_cof") + if hex_common.need_pkt_need_commit(tag): + f.write(", pkt_need_commit") if hex_common.need_PC(tag): f.write(", PC") if hex_common.helper_needs_next_PC(tag): f.write(", next_PC") if hex_common.need_slot(tag): - f.write(", slot") + f.write(", slotval") if hex_common.need_part1(tag): f.write(", part1") f.write(");\n") diff --git a/target/hexagon/gen_tcg_hvx.h b/target/hexagon/gen_tcg_hvx.h index d4aefe8e3f..44bae53f8d 100644 --- a/target/hexagon/gen_tcg_hvx.h +++ b/target/hexagon/gen_tcg_hvx.h @@ -128,6 +128,41 @@ static inline void assert_vhist_tmp(DisasContext *ctx) tcg_gen_gvec_mov(MO_64, VdV_off, VuV_off, \ sizeof(MMVector), sizeof(MMVector)) +#define fGEN_TCG_V6_vassign_tmp(SHORTCODE) \ + tcg_gen_gvec_mov(MO_64, VdV_off, VuV_off, \ + sizeof(MMVector), sizeof(MMVector)) + +#define fGEN_TCG_V6_vcombine_tmp(SHORTCODE) \ + do { \ + tcg_gen_gvec_mov(MO_64, VddV_off, VvV_off, \ + sizeof(MMVector), sizeof(MMVector)); \ + tcg_gen_gvec_mov(MO_64, VddV_off + sizeof(MMVector), VuV_off, \ + sizeof(MMVector), sizeof(MMVector)); \ + } while (0) + +/* + * Vector combine + * + * Be careful that the source and dest don't overlap + */ +#define fGEN_TCG_V6_vcombine(SHORTCODE) \ + do { \ + if (VddV_off != VuV_off) { \ + tcg_gen_gvec_mov(MO_64, VddV_off, VvV_off, \ + sizeof(MMVector), sizeof(MMVector)); \ + tcg_gen_gvec_mov(MO_64, VddV_off + sizeof(MMVector), VuV_off, \ + sizeof(MMVector), sizeof(MMVector)); \ + } else { \ + intptr_t tmpoff = offsetof(CPUHexagonState, vtmp); \ + tcg_gen_gvec_mov(MO_64, tmpoff, VuV_off, \ + sizeof(MMVector), sizeof(MMVector)); \ + tcg_gen_gvec_mov(MO_64, VddV_off, VvV_off, \ + sizeof(MMVector), sizeof(MMVector)); \ + tcg_gen_gvec_mov(MO_64, VddV_off + sizeof(MMVector), tmpoff, \ + sizeof(MMVector), sizeof(MMVector)); \ + } \ + } while (0) + /* Vector conditional move */ #define fGEN_TCG_VEC_CMOV(PRED) \ do { \ diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 244063b1d2..cb2aa28a19 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -45,7 +45,7 @@ TCGv gen_read_preg(TCGv pred, uint8_t num) #define IMMUTABLE (~0) -static const target_ulong reg_immut_masks[TOTAL_PER_THREAD_REGS] = { +const target_ulong reg_immut_masks[TOTAL_PER_THREAD_REGS] = { [HEX_REG_USR] = 0xc13000c0, [HEX_REG_PC] = IMMUTABLE, [HEX_REG_GP] = 0x3f, @@ -68,58 +68,72 @@ static inline void gen_masked_reg_write(TCGv new_val, TCGv cur_val, } } -static TCGv get_result_gpr(DisasContext *ctx, int rnum) +TCGv get_result_gpr(DisasContext *ctx, int rnum) { - return hex_new_value[rnum]; + if (ctx->need_commit) { + if (rnum == HEX_REG_USR) { + return hex_new_value_usr; + } else { + if (ctx->new_value[rnum] == NULL) { + ctx->new_value[rnum] = tcg_temp_new(); + tcg_gen_movi_tl(ctx->new_value[rnum], 0); + } + return ctx->new_value[rnum]; + } + } else { + return hex_gpr[rnum]; + } } static TCGv_i64 get_result_gpr_pair(DisasContext *ctx, int rnum) { TCGv_i64 result = tcg_temp_new_i64(); - tcg_gen_concat_i32_i64(result, hex_new_value[rnum], - hex_new_value[rnum + 1]); + tcg_gen_concat_i32_i64(result, get_result_gpr(ctx, rnum), + get_result_gpr(ctx, rnum + 1)); return result; } -void gen_log_reg_write(int rnum, TCGv val) +void gen_log_reg_write(DisasContext *ctx, int rnum, TCGv val) { const target_ulong reg_mask = reg_immut_masks[rnum]; gen_masked_reg_write(val, hex_gpr[rnum], reg_mask); - tcg_gen_mov_tl(hex_new_value[rnum], val); + tcg_gen_mov_tl(get_result_gpr(ctx, rnum), val); if (HEX_DEBUG) { /* Do this so HELPER(debug_commit_end) will know */ tcg_gen_movi_tl(hex_reg_written[rnum], 1); } } -static void gen_log_reg_write_pair(int rnum, TCGv_i64 val) +static void gen_log_reg_write_pair(DisasContext *ctx, int rnum, TCGv_i64 val) { - const target_ulong reg_mask_low = reg_immut_masks[rnum]; - const target_ulong reg_mask_high = reg_immut_masks[rnum + 1]; TCGv val32 = tcg_temp_new(); /* Low word */ tcg_gen_extrl_i64_i32(val32, val); - gen_masked_reg_write(val32, hex_gpr[rnum], reg_mask_low); - tcg_gen_mov_tl(hex_new_value[rnum], val32); - if (HEX_DEBUG) { - /* Do this so HELPER(debug_commit_end) will know */ - tcg_gen_movi_tl(hex_reg_written[rnum], 1); - } + gen_log_reg_write(ctx, rnum, val32); /* High word */ tcg_gen_extrh_i64_i32(val32, val); - gen_masked_reg_write(val32, hex_gpr[rnum + 1], reg_mask_high); - tcg_gen_mov_tl(hex_new_value[rnum + 1], val32); - if (HEX_DEBUG) { - /* Do this so HELPER(debug_commit_end) will know */ - tcg_gen_movi_tl(hex_reg_written[rnum + 1], 1); + gen_log_reg_write(ctx, rnum + 1, val32); +} + +TCGv get_result_pred(DisasContext *ctx, int pnum) +{ + if (ctx->need_commit) { + if (ctx->new_pred_value[pnum] == NULL) { + ctx->new_pred_value[pnum] = tcg_temp_new(); + tcg_gen_movi_tl(ctx->new_pred_value[pnum], 0); + } + return ctx->new_pred_value[pnum]; + } else { + return hex_pred[pnum]; } } void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val) { + TCGv pred = get_result_pred(ctx, pnum); TCGv base_val = tcg_temp_new(); tcg_gen_andi_tl(base_val, val, 0xff); @@ -132,12 +146,13 @@ void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val) * straight assignment. Otherwise, do an and. */ if (!test_bit(pnum, ctx->pregs_written)) { - tcg_gen_mov_tl(hex_new_pred_value[pnum], base_val); + tcg_gen_mov_tl(pred, base_val); } else { - tcg_gen_and_tl(hex_new_pred_value[pnum], - hex_new_pred_value[pnum], base_val); + tcg_gen_and_tl(pred, pred, base_val); + } + if (HEX_DEBUG) { + tcg_gen_ori_tl(ctx->pred_written, ctx->pred_written, 1 << pnum); } - tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << pnum); set_bit(pnum, ctx->pregs_written); } @@ -231,7 +246,7 @@ static inline void gen_write_ctrl_reg(DisasContext *ctx, int reg_num, if (reg_num == HEX_REG_P3_0_ALIASED) { gen_write_p3_0(ctx, val); } else { - gen_log_reg_write(reg_num, val); + gen_log_reg_write(ctx, reg_num, val); if (reg_num == HEX_REG_QEMU_PKT_CNT) { ctx->num_packets = 0; } @@ -255,7 +270,7 @@ static inline void gen_write_ctrl_reg_pair(DisasContext *ctx, int reg_num, tcg_gen_extrh_i64_i32(val32, val); tcg_gen_mov_tl(result, val32); } else { - gen_log_reg_write_pair(reg_num, val); + gen_log_reg_write_pair(ctx, reg_num, val); if (reg_num == HEX_REG_QEMU_PKT_CNT) { ctx->num_packets = 0; ctx->num_insns = 0; @@ -383,6 +398,14 @@ static inline void gen_store_conditional8(DisasContext *ctx, tcg_gen_movi_tl(hex_llsc_addr, ~0); } +#ifndef CONFIG_HEXAGON_IDEF_PARSER +static TCGv gen_slotval(DisasContext *ctx) +{ + int slotval = (ctx->pkt->pkt_has_store_s1 & 1) | (ctx->insn->slot << 1); + return tcg_constant_tl(slotval); +} +#endif + void gen_store32(TCGv vaddr, TCGv src, int width, uint32_t slot) { tcg_gen_mov_tl(hex_store_addr[slot], vaddr); @@ -457,9 +480,9 @@ static void gen_write_new_pc_addr(DisasContext *ctx, TCGv addr, if (ctx->pkt->pkt_has_multi_cof) { /* If there are multiple branches in a packet, ignore the second one */ tcg_gen_movcond_tl(TCG_COND_NE, hex_gpr[HEX_REG_PC], - hex_branch_taken, tcg_constant_tl(0), + ctx->branch_taken, tcg_constant_tl(0), hex_gpr[HEX_REG_PC], addr); - tcg_gen_movi_tl(hex_branch_taken, 1); + tcg_gen_movi_tl(ctx->branch_taken, 1); } else { tcg_gen_mov_tl(hex_gpr[HEX_REG_PC], addr); } @@ -480,7 +503,7 @@ static void gen_write_new_pc_pcrel(DisasContext *ctx, int pc_off, ctx->branch_cond = TCG_COND_ALWAYS; if (pred != NULL) { ctx->branch_cond = cond; - tcg_gen_mov_tl(hex_branch_taken, pred); + tcg_gen_mov_tl(ctx->branch_taken, pred); } ctx->branch_dest = dest; } @@ -518,6 +541,55 @@ static void gen_compare(TCGCond cond, TCGv res, TCGv arg1, TCGv arg2) tcg_gen_movcond_tl(cond, res, arg1, arg2, one, zero); } +#ifndef CONFIG_HEXAGON_IDEF_PARSER +static inline void gen_loop0r(DisasContext *ctx, TCGv RsV, int riV) +{ + fIMMEXT(riV); + fPCALIGN(riV); + gen_log_reg_write(ctx, HEX_REG_LC0, RsV); + gen_log_reg_write(ctx, HEX_REG_SA0, tcg_constant_tl(ctx->pkt->pc + riV)); + gen_set_usr_fieldi(ctx, USR_LPCFG, 0); +} + +static void gen_loop0i(DisasContext *ctx, int count, int riV) +{ + gen_loop0r(ctx, tcg_constant_tl(count), riV); +} + +static inline void gen_loop1r(DisasContext *ctx, TCGv RsV, int riV) +{ + fIMMEXT(riV); + fPCALIGN(riV); + gen_log_reg_write(ctx, HEX_REG_LC1, RsV); + gen_log_reg_write(ctx, HEX_REG_SA1, tcg_constant_tl(ctx->pkt->pc + riV)); +} + +static void gen_loop1i(DisasContext *ctx, int count, int riV) +{ + gen_loop1r(ctx, tcg_constant_tl(count), riV); +} + +static void gen_ploopNsr(DisasContext *ctx, int N, TCGv RsV, int riV) +{ + fIMMEXT(riV); + fPCALIGN(riV); + gen_log_reg_write(ctx, HEX_REG_LC0, RsV); + gen_log_reg_write(ctx, HEX_REG_SA0, tcg_constant_tl(ctx->pkt->pc + riV)); + gen_set_usr_fieldi(ctx, USR_LPCFG, N); + gen_log_pred_write(ctx, 3, tcg_constant_tl(0)); +} + +static void gen_ploopNsi(DisasContext *ctx, int N, int count, int riV) +{ + gen_ploopNsr(ctx, N, tcg_constant_tl(count), riV); +} + +static inline void gen_comparei(TCGCond cond, TCGv res, TCGv arg1, int arg2) +{ + gen_compare(cond, res, arg1, tcg_constant_tl(arg2)); +} +#endif + static void gen_cond_jumpr(DisasContext *ctx, TCGv dst_pc, TCGCond cond, TCGv pred) { @@ -547,7 +619,7 @@ static void gen_cmpnd_cmp_jmp(DisasContext *ctx, gen_log_pred_write(ctx, pnum, pred); } else { TCGv pred = tcg_temp_new(); - tcg_gen_mov_tl(pred, hex_new_pred_value[pnum]); + tcg_gen_mov_tl(pred, ctx->new_pred_value[pnum]); gen_cond_jump(ctx, cond2, pred, pc_off); } } @@ -604,7 +676,7 @@ static void gen_cmpnd_tstbit0_jmp(DisasContext *ctx, gen_log_pred_write(ctx, pnum, pred); } else { TCGv pred = tcg_temp_new(); - tcg_gen_mov_tl(pred, hex_new_pred_value[pnum]); + tcg_gen_mov_tl(pred, ctx->new_pred_value[pnum]); gen_cond_jump(ctx, cond, pred, pc_off); } } @@ -665,6 +737,18 @@ static void gen_cond_callr(DisasContext *ctx, gen_set_label(skip); } +#ifndef CONFIG_HEXAGON_IDEF_PARSER +/* frame = ((LR << 32) | FP) ^ (FRAMEKEY << 32)) */ +static TCGv_i64 gen_frame_scramble(void) +{ + TCGv_i64 frame = tcg_temp_new_i64(); + TCGv tmp = tcg_temp_new(); + tcg_gen_xor_tl(tmp, hex_gpr[HEX_REG_LR], hex_gpr[HEX_REG_FRAMEKEY]); + tcg_gen_concat_i32_i64(frame, hex_gpr[HEX_REG_FP], tmp); + return frame; +} +#endif + /* frame ^= (int64_t)FRAMEKEY << 32 */ static void gen_frame_unscramble(TCGv_i64 frame) { @@ -681,6 +765,41 @@ static void gen_load_frame(DisasContext *ctx, TCGv_i64 frame, TCGv EA) tcg_gen_qemu_ld_i64(frame, EA, ctx->mem_idx, MO_TEUQ); } +#ifndef CONFIG_HEXAGON_IDEF_PARSER +/* Stack overflow check */ +static void gen_framecheck(TCGv EA, int framesize) +{ + /* Not modelled in linux-user mode */ + /* Placeholder for system mode */ +#ifndef CONFIG_USER_ONLY + g_assert_not_reached(); +#endif +} + +static void gen_allocframe(DisasContext *ctx, TCGv r29, int framesize) +{ + TCGv r30 = tcg_temp_new(); + TCGv_i64 frame; + tcg_gen_addi_tl(r30, r29, -8); + frame = gen_frame_scramble(); + gen_store8(cpu_env, r30, frame, ctx->insn->slot); + gen_log_reg_write(ctx, HEX_REG_FP, r30); + gen_framecheck(r30, framesize); + tcg_gen_subi_tl(r29, r30, framesize); +} + +static void gen_deallocframe(DisasContext *ctx, TCGv_i64 r31_30, TCGv r30) +{ + TCGv r29 = tcg_temp_new(); + TCGv_i64 frame = tcg_temp_new_i64(); + gen_load_frame(ctx, frame, r30); + gen_frame_unscramble(frame); + tcg_gen_mov_i64(r31_30, frame); + tcg_gen_addi_tl(r29, r30, 8); + gen_log_reg_write(ctx, HEX_REG_SP, r29); +} +#endif + static void gen_return(DisasContext *ctx, TCGv_i64 dst, TCGv src) { /* @@ -719,7 +838,7 @@ static void gen_cond_return_subinsn(DisasContext *ctx, TCGCond cond, TCGv pred) { TCGv_i64 RddV = get_result_gpr_pair(ctx, HEX_REG_FP); gen_cond_return(ctx, RddV, hex_gpr[HEX_REG_FP], pred, cond); - gen_log_reg_write_pair(HEX_REG_FP, RddV); + gen_log_reg_write_pair(ctx, HEX_REG_FP, RddV); } static void gen_endloop0(DisasContext *ctx) @@ -730,15 +849,13 @@ static void gen_endloop0(DisasContext *ctx) /* * if (lpcfg == 1) { - * hex_new_pred_value[3] = 0xff; - * hex_pred_written |= 1 << 3; + * p3 = 0xff; * } */ TCGLabel *label1 = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_NE, lpcfg, 1, label1); { - tcg_gen_movi_tl(hex_new_pred_value[3], 0xff); - tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << 3); + gen_log_pred_write(ctx, 3, tcg_constant_tl(0xff)); } gen_set_label(label1); @@ -807,14 +924,12 @@ static void gen_endloop01(DisasContext *ctx) /* * if (lpcfg == 1) { - * hex_new_pred_value[3] = 0xff; - * hex_pred_written |= 1 << 3; + * p3 = 0xff; * } */ tcg_gen_brcondi_tl(TCG_COND_NE, lpcfg, 1, label1); { - tcg_gen_movi_tl(hex_new_pred_value[3], 0xff); - tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << 3); + gen_log_pred_write(ctx, 3, tcg_constant_tl(0xff)); } gen_set_label(label1); @@ -877,6 +992,7 @@ static void gen_cmpi_jumpnv(DisasContext *ctx, /* Shift left with saturation */ static void gen_shl_sat(DisasContext *ctx, TCGv dst, TCGv src, TCGv shift_amt) { + TCGv tmp = tcg_temp_new(); /* In case dst == src */ TCGv usr = get_result_gpr(ctx, HEX_REG_USR); TCGv sh32 = tcg_temp_new(); TCGv dst_sar = tcg_temp_new(); @@ -901,17 +1017,17 @@ static void gen_shl_sat(DisasContext *ctx, TCGv dst, TCGv src, TCGv shift_amt) */ tcg_gen_andi_tl(sh32, shift_amt, 31); - tcg_gen_movcond_tl(TCG_COND_EQ, dst, sh32, shift_amt, + tcg_gen_movcond_tl(TCG_COND_EQ, tmp, sh32, shift_amt, src, tcg_constant_tl(0)); - tcg_gen_shl_tl(dst, dst, sh32); - tcg_gen_sar_tl(dst_sar, dst, sh32); + tcg_gen_shl_tl(tmp, tmp, sh32); + tcg_gen_sar_tl(dst_sar, tmp, sh32); tcg_gen_movcond_tl(TCG_COND_LT, satval, src, tcg_constant_tl(0), min, max); tcg_gen_setcond_tl(TCG_COND_NE, ovf, dst_sar, src); tcg_gen_shli_tl(ovf, ovf, reg_field_info[USR_OVF].offset); tcg_gen_or_tl(usr, usr, ovf); - tcg_gen_movcond_tl(TCG_COND_EQ, dst, dst_sar, src, dst, satval); + tcg_gen_movcond_tl(TCG_COND_EQ, dst, dst_sar, src, tmp, satval); } static void gen_sar(TCGv dst, TCGv src, TCGv shift_amt) @@ -969,6 +1085,105 @@ static void gen_asl_r_r_sat(DisasContext *ctx, TCGv RdV, TCGv RsV, TCGv RtV) gen_set_label(done); } +static void gen_insert_rp(DisasContext *ctx, TCGv RxV, TCGv RsV, TCGv_i64 RttV) +{ + /* + * int width = fZXTN(6, 32, (fGETWORD(1, RttV))); + * int offset = fSXTN(7, 32, (fGETWORD(0, RttV))); + * size8u_t mask = ((fCONSTLL(1) << width) - 1); + * if (offset < 0) { + * RxV = 0; + * } else { + * RxV &= ~(mask << offset); + * RxV |= ((RsV & mask) << offset); + * } + */ + + TCGv width = tcg_temp_new(); + TCGv offset = tcg_temp_new(); + TCGv_i64 mask = tcg_temp_new_i64(); + TCGv_i64 result = tcg_temp_new_i64(); + TCGv_i64 tmp = tcg_temp_new_i64(); + TCGv_i64 offset64 = tcg_temp_new_i64(); + TCGLabel *label = gen_new_label(); + TCGLabel *done = gen_new_label(); + + tcg_gen_extrh_i64_i32(width, RttV); + tcg_gen_extract_tl(width, width, 0, 6); + tcg_gen_extrl_i64_i32(offset, RttV); + tcg_gen_sextract_tl(offset, offset, 0, 7); + /* Possible values for offset are -64 .. 63 */ + tcg_gen_brcondi_tl(TCG_COND_GE, offset, 0, label); + /* For negative offsets, zero out the result */ + tcg_gen_movi_tl(RxV, 0); + tcg_gen_br(done); + gen_set_label(label); + /* At this point, possible values of offset are 0 .. 63 */ + tcg_gen_ext_i32_i64(mask, width); + tcg_gen_shl_i64(mask, tcg_constant_i64(1), mask); + tcg_gen_subi_i64(mask, mask, 1); + tcg_gen_extu_i32_i64(result, RxV); + tcg_gen_ext_i32_i64(tmp, offset); + tcg_gen_shl_i64(tmp, mask, tmp); + tcg_gen_andc_i64(result, result, tmp); + tcg_gen_extu_i32_i64(tmp, RsV); + tcg_gen_and_i64(tmp, tmp, mask); + tcg_gen_extu_i32_i64(offset64, offset); + tcg_gen_shl_i64(tmp, tmp, offset64); + tcg_gen_or_i64(result, result, tmp); + tcg_gen_extrl_i64_i32(RxV, result); + gen_set_label(done); +} + +static void gen_asr_r_svw_trun(DisasContext *ctx, TCGv RdV, + TCGv_i64 RssV, TCGv RtV) +{ + /* + * for (int i = 0; i < 2; i++) { + * fSETHALF(i, RdV, fGETHALF(0, ((fSXTN(7, 32, RtV) > 0) ? + * (fCAST4_8s(fGETWORD(i, RssV)) >> fSXTN(7, 32, RtV)) : + * (fCAST4_8s(fGETWORD(i, RssV)) << -fSXTN(7, 32, RtV))))); + * } + */ + TCGv shift_amt32 = tcg_temp_new(); + TCGv_i64 shift_amt64 = tcg_temp_new_i64(); + TCGv_i64 tmp64 = tcg_temp_new_i64(); + TCGv tmp32 = tcg_temp_new(); + TCGLabel *label = gen_new_label(); + TCGLabel *zero = gen_new_label(); + TCGLabel *done = gen_new_label(); + + tcg_gen_sextract_tl(shift_amt32, RtV, 0, 7); + /* Possible values of shift_amt32 are -64 .. 63 */ + tcg_gen_brcondi_tl(TCG_COND_LE, shift_amt32, 0, label); + /* After branch, possible values of shift_amt32 are 1 .. 63 */ + tcg_gen_ext_i32_i64(shift_amt64, shift_amt32); + for (int i = 0; i < 2; i++) { + tcg_gen_sextract_i64(tmp64, RssV, i * 32, 32); + tcg_gen_sar_i64(tmp64, tmp64, shift_amt64); + tcg_gen_extrl_i64_i32(tmp32, tmp64); + tcg_gen_deposit_tl(RdV, RdV, tmp32, i * 16, 16); + } + tcg_gen_br(done); + gen_set_label(label); + tcg_gen_neg_tl(shift_amt32, shift_amt32); + /*At this point, possible values of shift_amt32 are 0 .. 64 */ + tcg_gen_brcondi_tl(TCG_COND_GT, shift_amt32, 63, zero); + /*At this point, possible values of shift_amt32 are 0 .. 63 */ + tcg_gen_ext_i32_i64(shift_amt64, shift_amt32); + for (int i = 0; i < 2; i++) { + tcg_gen_sextract_i64(tmp64, RssV, i * 32, 32); + tcg_gen_shl_i64(tmp64, tmp64, shift_amt64); + tcg_gen_extrl_i64_i32(tmp32, tmp64); + tcg_gen_deposit_tl(RdV, RdV, tmp32, i * 16, 16); + } + tcg_gen_br(done); + gen_set_label(zero); + /* When the shift_amt is 64, zero out the result */ + tcg_gen_movi_tl(RdV, 0); + gen_set_label(done); +} + static intptr_t vreg_src_off(DisasContext *ctx, int num) { intptr_t offset = offsetof(CPUHexagonState, VRegs[num]); @@ -1008,7 +1223,11 @@ static void gen_log_vreg_write_pair(DisasContext *ctx, intptr_t srcoff, int num, static intptr_t get_result_qreg(DisasContext *ctx, int qnum) { - return offsetof(CPUHexagonState, future_QRegs[qnum]); + if (ctx->need_commit) { + return offsetof(CPUHexagonState, future_QRegs[qnum]); + } else { + return offsetof(CPUHexagonState, QRegs[qnum]); + } } static void gen_vreg_load(DisasContext *ctx, intptr_t dstoff, TCGv src, @@ -1134,22 +1353,28 @@ void gen_sat_i32(TCGv dest, TCGv source, int width) void gen_sat_i32_ovfl(TCGv ovfl, TCGv dest, TCGv source, int width) { - gen_sat_i32(dest, source, width); - tcg_gen_setcond_tl(TCG_COND_NE, ovfl, source, dest); + TCGv tmp = tcg_temp_new(); /* In case dest == source */ + gen_sat_i32(tmp, source, width); + tcg_gen_setcond_tl(TCG_COND_NE, ovfl, source, tmp); + tcg_gen_mov_tl(dest, tmp); } void gen_satu_i32(TCGv dest, TCGv source, int width) { + TCGv tmp = tcg_temp_new(); /* In case dest == source */ TCGv max_val = tcg_constant_tl((1 << width) - 1); TCGv zero = tcg_constant_tl(0); - tcg_gen_movcond_tl(TCG_COND_GTU, dest, source, max_val, max_val, source); - tcg_gen_movcond_tl(TCG_COND_LT, dest, source, zero, zero, dest); + tcg_gen_movcond_tl(TCG_COND_GTU, tmp, source, max_val, max_val, source); + tcg_gen_movcond_tl(TCG_COND_LT, tmp, source, zero, zero, tmp); + tcg_gen_mov_tl(dest, tmp); } void gen_satu_i32_ovfl(TCGv ovfl, TCGv dest, TCGv source, int width) { - gen_satu_i32(dest, source, width); - tcg_gen_setcond_tl(TCG_COND_NE, ovfl, source, dest); + TCGv tmp = tcg_temp_new(); /* In case dest == source */ + gen_satu_i32(tmp, source, width); + tcg_gen_setcond_tl(TCG_COND_NE, ovfl, source, tmp); + tcg_gen_mov_tl(dest, tmp); } void gen_sat_i64(TCGv_i64 dest, TCGv_i64 source, int width) @@ -1162,27 +1387,33 @@ void gen_sat_i64(TCGv_i64 dest, TCGv_i64 source, int width) void gen_sat_i64_ovfl(TCGv ovfl, TCGv_i64 dest, TCGv_i64 source, int width) { + TCGv_i64 tmp = tcg_temp_new_i64(); /* In case dest == source */ TCGv_i64 ovfl_64; - gen_sat_i64(dest, source, width); + gen_sat_i64(tmp, source, width); ovfl_64 = tcg_temp_new_i64(); - tcg_gen_setcond_i64(TCG_COND_NE, ovfl_64, dest, source); + tcg_gen_setcond_i64(TCG_COND_NE, ovfl_64, tmp, source); + tcg_gen_mov_i64(dest, tmp); tcg_gen_trunc_i64_tl(ovfl, ovfl_64); } void gen_satu_i64(TCGv_i64 dest, TCGv_i64 source, int width) { + TCGv_i64 tmp = tcg_temp_new_i64(); /* In case dest == source */ TCGv_i64 max_val = tcg_constant_i64((1LL << width) - 1LL); TCGv_i64 zero = tcg_constant_i64(0); - tcg_gen_movcond_i64(TCG_COND_GTU, dest, source, max_val, max_val, source); - tcg_gen_movcond_i64(TCG_COND_LT, dest, source, zero, zero, dest); + tcg_gen_movcond_i64(TCG_COND_GTU, tmp, source, max_val, max_val, source); + tcg_gen_movcond_i64(TCG_COND_LT, tmp, source, zero, zero, tmp); + tcg_gen_mov_i64(dest, tmp); } void gen_satu_i64_ovfl(TCGv ovfl, TCGv_i64 dest, TCGv_i64 source, int width) { + TCGv_i64 tmp = tcg_temp_new_i64(); /* In case dest == source */ TCGv_i64 ovfl_64; - gen_satu_i64(dest, source, width); + gen_satu_i64(tmp, source, width); ovfl_64 = tcg_temp_new_i64(); - tcg_gen_setcond_i64(TCG_COND_NE, ovfl_64, dest, source); + tcg_gen_setcond_i64(TCG_COND_NE, ovfl_64, tmp, source); + tcg_gen_mov_i64(dest, tmp); tcg_gen_trunc_i64_tl(ovfl, ovfl_64); } diff --git a/target/hexagon/genptr.h b/target/hexagon/genptr.h index 76e497aa48..a4b43c2910 100644 --- a/target/hexagon/genptr.h +++ b/target/hexagon/genptr.h @@ -35,7 +35,9 @@ void gen_store4i(TCGv_env cpu_env, TCGv vaddr, int32_t src, uint32_t slot); void gen_store8i(TCGv_env cpu_env, TCGv vaddr, int64_t src, uint32_t slot); TCGv gen_read_reg(TCGv result, int num); TCGv gen_read_preg(TCGv pred, uint8_t num); -void gen_log_reg_write(int rnum, TCGv val); +TCGv get_result_gpr(DisasContext *ctx, int rnum); +TCGv get_result_pred(DisasContext *ctx, int pnum); +void gen_log_reg_write(DisasContext *ctx, int rnum, TCGv val); void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val); void gen_set_usr_field(DisasContext *ctx, int field, TCGv val); void gen_set_usr_fieldi(DisasContext *ctx, int field, int x); @@ -58,4 +60,6 @@ void gen_set_half(int N, TCGv result, TCGv src); void gen_set_half_i64(int N, TCGv_i64 result, TCGv src); void probe_noshuf_load(TCGv va, int s, int mi); +extern const target_ulong reg_immut_masks[TOTAL_PER_THREAD_REGS]; + #endif diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h index ed7f9842f6..fa0ebaf7c8 100644 --- a/target/hexagon/helper.h +++ b/target/hexagon/helper.h @@ -21,7 +21,7 @@ DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_RETURN, noreturn, env, i32) DEF_HELPER_1(debug_start_packet, void, env) DEF_HELPER_FLAGS_3(debug_check_store_width, TCG_CALL_NO_WG, void, env, int, int) -DEF_HELPER_FLAGS_3(debug_commit_end, TCG_CALL_NO_WG, void, env, int, int) +DEF_HELPER_FLAGS_5(debug_commit_end, TCG_CALL_NO_WG, void, env, i32, int, int, int) DEF_HELPER_2(commit_store, void, env, int) DEF_HELPER_3(gather_store, void, env, i32, int) DEF_HELPER_1(commit_hvx_stores, void, env) @@ -29,8 +29,10 @@ DEF_HELPER_FLAGS_4(fcircadd, TCG_CALL_NO_RWG_SE, s32, s32, s32, s32, s32) DEF_HELPER_FLAGS_1(fbrev, TCG_CALL_NO_RWG_SE, i32, i32) DEF_HELPER_3(sfrecipa, i64, env, f32, f32) DEF_HELPER_2(sfinvsqrta, i64, env, f32) -DEF_HELPER_4(vacsh_val, s64, env, s64, s64, s64) +DEF_HELPER_5(vacsh_val, s64, env, s64, s64, s64, i32) DEF_HELPER_FLAGS_4(vacsh_pred, TCG_CALL_NO_RWG_SE, s32, env, s64, s64, s64) +DEF_HELPER_FLAGS_2(cabacdecbin_val, TCG_CALL_NO_RWG_SE, s64, s64, s64) +DEF_HELPER_FLAGS_2(cabacdecbin_pred, TCG_CALL_NO_RWG_SE, s32, s64, s64) /* Floating point */ DEF_HELPER_2(conv_sf2df, f64, env, f32) diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py index 40f28ca933..f3aac55db0 100755 --- a/target/hexagon/hex_common.py +++ b/target/hexagon/hex_common.py @@ -30,6 +30,9 @@ tags = [] # list of all tags overrides = {} # tags with helper overrides idef_parser_enabled = {} # tags enabled for idef-parser +def bad_register(*args): + args_str = ", ".join(map(str, args)) + raise Exception(f"Bad register parse: {args_str}") # We should do this as a hash for performance, # but to keep order let's keep it as a list. @@ -97,6 +100,12 @@ def calculate_attribs(): add_qemu_macro_attrib("fSET_LPCFG", "A_IMPLICIT_WRITES_USR") add_qemu_macro_attrib("fLOAD", "A_SCALAR_LOAD") add_qemu_macro_attrib("fSTORE", "A_SCALAR_STORE") + add_qemu_macro_attrib('fLSBNEW0', 'A_IMPLICIT_READS_P0') + add_qemu_macro_attrib('fLSBNEW0NOT', 'A_IMPLICIT_READS_P0') + add_qemu_macro_attrib('fREAD_P0', 'A_IMPLICIT_READS_P0') + add_qemu_macro_attrib('fLSBNEW1', 'A_IMPLICIT_READS_P1') + add_qemu_macro_attrib('fLSBNEW1NOT', 'A_IMPLICIT_READS_P1') + add_qemu_macro_attrib('fREAD_P3', 'A_IMPLICIT_READS_P3') # Recurse down macros, find attributes from sub-macros macroValues = list(macros.values()) @@ -241,9 +250,10 @@ def is_new_val(regtype, regid, tag): def need_slot(tag): if ( - ("A_CONDEXEC" in attribdict[tag] and "A_JUMP" not in attribdict[tag]) - or "A_STORE" in attribdict[tag] - or "A_LOAD" in attribdict[tag] + "A_CVI_SCATTER" not in attribdict[tag] + and "A_CVI_GATHER" not in attribdict[tag] + and ("A_STORE" in attribdict[tag] + or "A_LOAD" in attribdict[tag]) ): return 1 else: @@ -270,6 +280,9 @@ def need_pkt_has_multi_cof(tag): return "A_COF" in attribdict[tag] +def need_pkt_need_commit(tag): + return 'A_IMPLICIT_WRITES_USR' in attribdict[tag] + def need_condexec_reg(tag, regs): if "A_CONDEXEC" in attribdict[tag]: for regtype, regid, toss, numregs in regs: diff --git a/target/hexagon/iclass.c b/target/hexagon/iclass.c index 6091286993..c3f8523b27 100644 --- a/target/hexagon/iclass.c +++ b/target/hexagon/iclass.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -51,8 +51,10 @@ SlotMask find_iclass_slots(Opcode opcode, int itype) return SLOTS_0; } else if ((opcode == J2_trap0) || (opcode == Y2_isync) || - (opcode == J2_pause) || (opcode == J4_hintjumpr)) { + (opcode == J2_pause)) { return SLOTS_2; + } else if (opcode == J4_hintjumpr) { + return SLOTS_23; } else if (GET_ATTRIB(opcode, A_CRSLOT23)) { return SLOTS_23; } else if (GET_ATTRIB(opcode, A_RESTRICT_PREFERSLOT0)) { diff --git a/target/hexagon/idef-parser/idef-parser.lex b/target/hexagon/idef-parser/idef-parser.lex index 5eb8ac5a80..cd5958ec90 100644 --- a/target/hexagon/idef-parser/idef-parser.lex +++ b/target/hexagon/idef-parser/idef-parser.lex @@ -401,12 +401,39 @@ STRING_LIT \"(\\.|[^"\\])*\" } return SIGN; } -"0x"{HEX_DIGIT}+ | -{DIGIT}+ { yylval->rvalue.type = IMMEDIATE; - yylval->rvalue.bit_width = 32; - yylval->rvalue.signedness = SIGNED; +"0x"{HEX_DIGIT}+ { uint64_t value = strtoull(yytext, NULL, 0); + yylval->rvalue.type = IMMEDIATE; yylval->rvalue.imm.type = VALUE; - yylval->rvalue.imm.value = strtoull(yytext, NULL, 0); + yylval->rvalue.imm.value = value; + if (value <= INT_MAX) { + yylval->rvalue.bit_width = sizeof(int) * 8; + yylval->rvalue.signedness = SIGNED; + } else if (value <= UINT_MAX) { + yylval->rvalue.bit_width = sizeof(unsigned int) * 8; + yylval->rvalue.signedness = UNSIGNED; + } else if (value <= LONG_MAX) { + yylval->rvalue.bit_width = sizeof(long) * 8; + yylval->rvalue.signedness = SIGNED; + } else if (value <= ULONG_MAX) { + yylval->rvalue.bit_width = sizeof(unsigned long) * 8; + yylval->rvalue.signedness = UNSIGNED; + } else { + g_assert_not_reached(); + } + return IMM; } +{DIGIT}+ { int64_t value = strtoll(yytext, NULL, 0); + yylval->rvalue.type = IMMEDIATE; + yylval->rvalue.imm.type = VALUE; + yylval->rvalue.imm.value = value; + if (value >= INT_MIN && value <= INT_MAX) { + yylval->rvalue.bit_width = sizeof(int) * 8; + yylval->rvalue.signedness = SIGNED; + } else if (value >= LONG_MIN && value <= LONG_MAX) { + yylval->rvalue.bit_width = sizeof(long) * 8; + yylval->rvalue.signedness = SIGNED; + } else { + g_assert_not_reached(); + } return IMM; } "0x"{HEX_DIGIT}+"ULL" | {DIGIT}+"ULL" { yylval->rvalue.type = IMMEDIATE; diff --git a/target/hexagon/idef-parser/idef-parser.y b/target/hexagon/idef-parser/idef-parser.y index 5444fd4749..5c983954ed 100644 --- a/target/hexagon/idef-parser/idef-parser.y +++ b/target/hexagon/idef-parser/idef-parser.y @@ -594,8 +594,6 @@ rvalue : FAIL | CAST rvalue { @1.last_column = @2.last_column; - /* Assign target signedness */ - $2.signedness = $1.signedness; $$ = gen_cast_op(c, &@1, &$2, $1.bit_width, $1.signedness); } | rvalue EQ rvalue @@ -685,7 +683,7 @@ rvalue : FAIL yyassert(c, &@1, $5.type == IMMEDIATE && $5.imm.type == VALUE, "SXT expects immediate values\n"); - $$ = gen_extend_op(c, &@1, &$3, $5.imm.value, &$7, SIGNED); + $$ = gen_extend_op(c, &@1, &$3, 64, &$7, SIGNED); } | ZXT '(' rvalue ',' IMM ',' rvalue ')' { @@ -693,7 +691,7 @@ rvalue : FAIL yyassert(c, &@1, $5.type == IMMEDIATE && $5.imm.type == VALUE, "ZXT expects immediate values\n"); - $$ = gen_extend_op(c, &@1, &$3, $5.imm.value, &$7, UNSIGNED); + $$ = gen_extend_op(c, &@1, &$3, 64, &$7, UNSIGNED); } | '(' rvalue ')' { diff --git a/target/hexagon/idef-parser/parser-helpers.c b/target/hexagon/idef-parser/parser-helpers.c index 8734218e51..7b5ebafec2 100644 --- a/target/hexagon/idef-parser/parser-helpers.c +++ b/target/hexagon/idef-parser/parser-helpers.c @@ -167,8 +167,9 @@ void reg_print(Context *c, YYLTYPE *locp, HexReg *reg) EMIT(c, "hex_gpr[%u]", reg->id); } -void imm_print(Context *c, YYLTYPE *locp, HexImm *imm) +void imm_print(Context *c, YYLTYPE *locp, HexValue *rvalue) { + HexImm *imm = &rvalue->imm; switch (imm->type) { case I: EMIT(c, "i"); @@ -177,7 +178,21 @@ void imm_print(Context *c, YYLTYPE *locp, HexImm *imm) EMIT(c, "%ciV", imm->id); break; case VALUE: - EMIT(c, "((int64_t) %" PRIu64 "ULL)", (int64_t) imm->value); + if (rvalue->bit_width == 32) { + if (rvalue->signedness == UNSIGNED) { + EMIT(c, "((uint32_t) 0x%" PRIx32 ")", (uint32_t) imm->value); + } else { + EMIT(c, "((int32_t) 0x%" PRIx32 ")", (int32_t) imm->value); + } + } else if (rvalue->bit_width == 64) { + if (rvalue->signedness == UNSIGNED) { + EMIT(c, "((uint64_t) 0x%" PRIx64 "ULL)", (uint64_t) imm->value); + } else { + EMIT(c, "((int64_t) 0x%" PRIx64 "LL)", (int64_t) imm->value); + } + } else { + g_assert_not_reached(); + } break; case QEMU_TMP: EMIT(c, "qemu_tmp_%" PRIu64, imm->index); @@ -213,7 +228,7 @@ void rvalue_print(Context *c, YYLTYPE *locp, void *pointer) tmp_print(c, locp, &rvalue->tmp); break; case IMMEDIATE: - imm_print(c, locp, &rvalue->imm); + imm_print(c, locp, rvalue); break; case VARID: var_print(c, locp, &rvalue->var); @@ -386,13 +401,10 @@ HexValue gen_rvalue_extend(Context *c, YYLTYPE *locp, HexValue *rvalue) if (rvalue->type == IMMEDIATE) { HexValue res = gen_imm_qemu_tmp(c, locp, 64, rvalue->signedness); - bool is_unsigned = (rvalue->signedness == UNSIGNED); - const char *sign_suffix = is_unsigned ? "u" : ""; gen_c_int_type(c, locp, 64, rvalue->signedness); - OUT(c, locp, " ", &res, " = "); - OUT(c, locp, "(", sign_suffix, "int64_t) "); - OUT(c, locp, "(", sign_suffix, "int32_t) "); - OUT(c, locp, rvalue, ";\n"); + OUT(c, locp, " ", &res, " = ("); + gen_c_int_type(c, locp, 64, rvalue->signedness); + OUT(c, locp, ")", rvalue, ";\n"); return res; } else { HexValue res = gen_tmp(c, locp, 64, rvalue->signedness); @@ -959,33 +971,18 @@ HexValue gen_cast_op(Context *c, unsigned target_width, HexSignedness signedness) { + HexValue res; assert_signedness(c, locp, src->signedness); if (src->bit_width == target_width) { - return *src; - } else if (src->type == IMMEDIATE) { - HexValue res = *src; - res.bit_width = target_width; - res.signedness = signedness; - return res; + res = *src; + } else if (src->bit_width < target_width) { + res = gen_rvalue_extend(c, locp, src); } else { - HexValue res = gen_tmp(c, locp, target_width, signedness); - /* Truncate */ - if (src->bit_width > target_width) { - OUT(c, locp, "tcg_gen_trunc_i64_tl(", &res, ", ", src, ");\n"); - } else { - assert_signedness(c, locp, src->signedness); - if (src->signedness == UNSIGNED) { - /* Extend unsigned */ - OUT(c, locp, "tcg_gen_extu_i32_i64(", - &res, ", ", src, ");\n"); - } else { - /* Extend signed */ - OUT(c, locp, "tcg_gen_ext_i32_i64(", - &res, ", ", src, ");\n"); - } - } - return res; + /* src->bit_width > target_width */ + res = gen_rvalue_truncate(c, locp, src); } + res.signedness = signedness; + return res; } @@ -1123,7 +1120,7 @@ HexValue gen_extend_op(Context *c, HexValue *value, HexSignedness signedness) { - unsigned bit_width = (dst_width = 64) ? 64 : 32; + unsigned bit_width = (dst_width == 64) ? 64 : 32; HexValue value_m = *value; HexValue src_width_m = *src_width; @@ -1318,7 +1315,7 @@ void gen_write_reg(Context *c, YYLTYPE *locp, HexValue *reg, HexValue *value) value_m = rvalue_materialize(c, locp, &value_m); OUT(c, locp, - "gen_log_reg_write(", ®->reg.id, ", ", + "gen_log_reg_write(ctx, ", ®->reg.id, ", ", &value_m, ");\n"); } @@ -1854,7 +1851,7 @@ HexValue gen_rvalue_pred(Context *c, YYLTYPE *locp, HexValue *pred) *pred = gen_tmp(c, locp, 32, UNSIGNED); if (is_dotnew) { OUT(c, locp, "tcg_gen_mov_i32(", pred, - ", hex_new_pred_value["); + ", ctx->new_pred_value["); OUT(c, locp, pred_str, "]);\n"); } else { OUT(c, locp, "gen_read_preg(", pred, ", ", pred_str, ");\n"); diff --git a/target/hexagon/idef-parser/parser-helpers.h b/target/hexagon/idef-parser/parser-helpers.h index 1239d23a6a..7c58087169 100644 --- a/target/hexagon/idef-parser/parser-helpers.h +++ b/target/hexagon/idef-parser/parser-helpers.h @@ -80,7 +80,7 @@ void reg_compose(Context *c, YYLTYPE *locp, HexReg *reg, char reg_id[5]); void reg_print(Context *c, YYLTYPE *locp, HexReg *reg); -void imm_print(Context *c, YYLTYPE *locp, HexImm *imm); +void imm_print(Context *c, YYLTYPE *locp, HexValue *rvalue); void var_print(Context *c, YYLTYPE *locp, HexVar *var); diff --git a/target/hexagon/imported/branch.idef b/target/hexagon/imported/branch.idef index 88f5f48cce..93e2e375a5 100644 --- a/target/hexagon/imported/branch.idef +++ b/target/hexagon/imported/branch.idef @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -34,6 +34,9 @@ Q6INSN(J2_jump,"jump #r22:2",ATTRIBS(A_JDIR), "direct unconditional jump", Q6INSN(J2_jumpr,"jumpr Rs32",ATTRIBS(A_JINDIR), "indirect unconditional jump", {fJUMPR(RsN,RsV,COF_TYPE_JUMPR);}) +Q6INSN(J2_jumprh,"jumprh Rs32",ATTRIBS(A_JINDIR, A_HINTED_COF), "indirect unconditional jump", +{fJUMPR(RsN,RsV,COF_TYPE_JUMPR);}) + #define OLDCOND_JUMP(TAG,OPER,OPER2,ATTRIB,DESCR,SEMANTICS) \ Q6INSN(TAG##t,"if (Pu4) "OPER":nt "OPER2,ATTRIB,DESCR,{fBRANCH_SPECULATE_STALL(fLSBOLD(PuV),,SPECULATE_NOT_TAKEN,12,0); if (fLSBOLD(PuV)) { SEMANTICS; }}) \ Q6INSN(TAG##f,"if (!Pu4) "OPER":nt "OPER2,ATTRIB,DESCR,{fBRANCH_SPECULATE_STALL(fLSBOLDNOT(PuV),,SPECULATE_NOT_TAKEN,12,0); if (fLSBOLDNOT(PuV)) { SEMANTICS; }}) \ @@ -196,6 +199,8 @@ Q6INSN(J2_callrt,"if (Pu4) callr Rs32",ATTRIBS(CINDIR_STD),"indirect conditional Q6INSN(J2_callrf,"if (!Pu4) callr Rs32",ATTRIBS(CINDIR_STD),"indirect conditional call if false", {fBRANCH_SPECULATE_STALL(fLSBOLDNOT(PuV),,SPECULATE_NOT_TAKEN,12,0);if (fLSBOLDNOT(PuV)) { fCALLR(RsV); }}) +Q6INSN(J2_callrh,"callrh Rs32",ATTRIBS(CINDIR_STD, A_HINTED_COF), "hinted indirect unconditional call", +{ fCALLR(RsV); }) diff --git a/target/hexagon/imported/encode_pp.def b/target/hexagon/imported/encode_pp.def index d71c04cd30..0cd30a5e85 100644 --- a/target/hexagon/imported/encode_pp.def +++ b/target/hexagon/imported/encode_pp.def @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -382,14 +382,23 @@ DEF_ENC32(L4_return_fnew_pt, ICLASS_LD" 011 0 000 sssss PP1110vv ---ddddd") DEF_ENC32(L4_return_tnew_pnt, ICLASS_LD" 011 0 000 sssss PP0010vv ---ddddd") DEF_ENC32(L4_return_fnew_pnt, ICLASS_LD" 011 0 000 sssss PP1010vv ---ddddd") -DEF_ENC32(L2_loadw_locked,ICLASS_LD" 001 0 000 sssss PP00---- -00ddddd") +DEF_ENC32(L2_loadw_locked,ICLASS_LD" 001 0 000 sssss PP000--- 000ddddd") +DEF_ENC32(L2_loadw_aq, ICLASS_LD" 001 0 000 sssss PP001--- 000ddddd") +DEF_ENC32(L4_loadd_aq, ICLASS_LD" 001 0 000 sssss PP011--- 000ddddd") +DEF_ENC32(R6_release_at_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --0011dd") +DEF_ENC32(R6_release_st_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --1011dd") +DEF_ENC32(S2_storew_rl_at_vi, ICLASS_ST" 000 01 01sssss PP-ttttt --0010dd") +DEF_ENC32(S2_storew_rl_st_vi, ICLASS_ST" 000 01 01sssss PP-ttttt --1010dd") -DEF_ENC32(L4_loadd_locked,ICLASS_LD" 001 0 000 sssss PP01---- -00ddddd") +DEF_ENC32(S4_stored_rl_at_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --0010dd") +DEF_ENC32(S4_stored_rl_st_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --1010dd") + +DEF_ENC32(L4_loadd_locked,ICLASS_LD" 001 0 000 sssss PP010--- 000ddddd") DEF_EXT_SPACE(EXTRACTW, ICLASS_LD" 001 0 000 iiiii PP0iiiii -01iiiii") DEF_ENC32(Y2_dcfetchbo, ICLASS_LD" 010 0 000 sssss PP0--iii iiiiiiii") @@ -479,8 +488,8 @@ STD_PST_ENC(rinew, "1 101","10ttt") /* x bus/cache */ /* x store/cache */ DEF_ENC32(S2_allocframe, ICLASS_ST" 000 01 00xxxxx PP000iii iiiiiiii") -DEF_ENC32(S2_storew_locked,ICLASS_ST" 000 01 01sssss PP-ttttt ------dd") -DEF_ENC32(S4_stored_locked,ICLASS_ST" 000 01 11sssss PP0ttttt ------dd") +DEF_ENC32(S2_storew_locked,ICLASS_ST" 000 01 01sssss PP-ttttt ----00dd") +DEF_ENC32(S4_stored_locked,ICLASS_ST" 000 01 11sssss PP0ttttt ----00dd") DEF_ENC32(Y2_dczeroa, ICLASS_ST" 000 01 10sssss PP0----- --------") @@ -515,6 +524,7 @@ DEF_FIELD32(ICLASS_J" 110- -------- PP-!---- --------",J_PT,"Predict-taken") DEF_FIELDROW_DESC32(ICLASS_J" 0000 -------- PP------ --------","[#0] PC=(Rs), R31=return") DEF_ENC32(J2_callr, ICLASS_J" 0000 101sssss PP------ --------") +DEF_ENC32(J2_callrh, ICLASS_J" 0000 110sssss PP------ --------") DEF_FIELDROW_DESC32(ICLASS_J" 0001 -------- PP------ --------","[#1] if (Pu) PC=(Rs), R31=return") DEF_ENC32(J2_callrt, ICLASS_J" 0001 000sssss PP----uu --------") @@ -522,6 +532,7 @@ DEF_ENC32(J2_callrf, ICLASS_J" 0001 001sssss PP----uu --------") DEF_FIELDROW_DESC32(ICLASS_J" 0010 -------- PP------ --------","[#2] PC=(Rs); ") DEF_ENC32(J2_jumpr, ICLASS_J" 0010 100sssss PP------ --------") +DEF_ENC32(J2_jumprh, ICLASS_J" 0010 110sssss PP------ --------") DEF_ENC32(J4_hintjumpr, ICLASS_J" 0010 101sssss PP------ --------") DEF_FIELDROW_DESC32(ICLASS_J" 0011 -------- PP------ --------","[#3] if (Pu) PC=(Rs) ") diff --git a/target/hexagon/imported/ldst.idef b/target/hexagon/imported/ldst.idef index 237634bdd9..53198176a9 100644 --- a/target/hexagon/imported/ldst.idef +++ b/target/hexagon/imported/ldst.idef @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -128,6 +128,24 @@ Q6INSN(S2_allocframe,"allocframe(Rx32,#u11:3):raw", ATTRIBS(A_REGWRSIZE_8B,A_MEM #define A_RETURN A_RESTRICT_COF_MAX1,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOSLOT1_STORE,A_RET_TYPE,A_DEALLOCRET +/**** Load Acquire Store Release Instructions****/ + + + +Q6INSN(L2_loadw_aq,"Rd32=memw_aq(Rs32)",ATTRIBS(A_REGWRSIZE_4B,A_ACQUIRE,A_RESTRICT_SLOT0ONLY,A_MEMSIZE_4B,A_LOAD),"Load Acquire Word", +{ fEA_REG(RsV); fLOAD(1,4,u,EA,RdV); }) +Q6INSN(L4_loadd_aq,"Rdd32=memd_aq(Rs32)",ATTRIBS(A_REGWRSIZE_8B,A_ACQUIRE,A_RESTRICT_SLOT0ONLY,A_MEMSIZE_8B,A_LOAD),"Load Acquire Double integer", +{ fEA_REG(RsV); fLOAD(1,8,u,EA,RddV); }) + +Q6INSN(R6_release_at_vi,"release(Rs32):at",ATTRIBS(A_MEMSIZE_0B,A_RELEASE,A_STORE,A_VTCM_ALLBANK_ACCESS,A_RLS_INNER,A_RLS_ALL_THREAD,A_RESTRICT_NOPACKET,A_RESTRICT_SLOT0ONLY), "Release lock", {fEA_REG(RsV); fSTORE(1,0,EA,RsV); }) +Q6INSN(R6_release_st_vi,"release(Rs32):st",ATTRIBS(A_MEMSIZE_0B,A_RELEASE,A_STORE,A_VTCM_ALLBANK_ACCESS,A_RLS_INNER,A_RLS_SAME_THREAD,A_RESTRICT_NOPACKET,A_RESTRICT_SLOT0ONLY), "Release lock", {fEA_REG(RsV); fSTORE(1,0,EA,RsV); }) + +Q6INSN(S2_storew_rl_at_vi,"memw_rl(Rs32):at=Rt32",ATTRIBS(A_REGWRSIZE_4B,A_RELEASE,A_VTCM_ALLBANK_ACCESS,A_RLS_INNER,A_RLS_ALL_THREAD,A_RESTRICT_NOPACKET,A_MEMSIZE_4B,A_STORE,A_RESTRICT_SLOT0ONLY),"Store Release Word", { fEA_REG(RsV); fSTORE(1,4,EA,RtV); }) +Q6INSN(S4_stored_rl_at_vi,"memd_rl(Rs32):at=Rtt32",ATTRIBS(A_REGWRSIZE_8B,A_RELEASE,A_VTCM_ALLBANK_ACCESS,A_RLS_INNER,A_RLS_ALL_THREAD,A_RESTRICT_NOPACKET,A_MEMSIZE_8B,A_STORE,A_RESTRICT_SLOT0ONLY),"Store Release Double integer", { fEA_REG(RsV); fSTORE(1,8,EA,RttV); }) + +Q6INSN(S2_storew_rl_st_vi,"memw_rl(Rs32):st=Rt32",ATTRIBS(A_REGWRSIZE_4B,A_RELEASE,A_VTCM_ALLBANK_ACCESS,A_RLS_INNER,A_RLS_SAME_THREAD,A_RESTRICT_NOPACKET,A_MEMSIZE_4B,A_STORE,A_RESTRICT_SLOT0ONLY),"Store Release Word", { fEA_REG(RsV); fSTORE(1,4,EA,RtV); }) +Q6INSN(S4_stored_rl_st_vi,"memd_rl(Rs32):st=Rtt32",ATTRIBS(A_REGWRSIZE_8B,A_RELEASE,A_VTCM_ALLBANK_ACCESS,A_RLS_INNER,A_RLS_SAME_THREAD,A_RESTRICT_NOPACKET,A_MEMSIZE_8B,A_STORE,A_RESTRICT_SLOT0ONLY),"Store Release Double integer", { fEA_REG(RsV); fSTORE(1,8,EA,RttV); }) + Q6INSN(L2_deallocframe,"Rdd32=deallocframe(Rs32):raw", ATTRIBS(A_REGWRSIZE_8B,A_MEMSIZE_8B,A_LOAD,A_DEALLOCFRAME), "Deallocate stack frame", { fHIDE(size8u_t tmp;) fEA_REG(RsV); fLOAD(1,8,u,EA,tmp); diff --git a/target/hexagon/imported/mmvec/encode_ext.def b/target/hexagon/imported/mmvec/encode_ext.def index 6fbbe2c422..402438f566 100644 --- a/target/hexagon/imported/mmvec/encode_ext.def +++ b/target/hexagon/imported/mmvec/encode_ext.def @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -257,6 +257,11 @@ DEF_ENC(V6_vasruhubrndsat, ICLASS_CJ" 1 000 vvv vvttt PP 0 uuuuu 111 ddd DEF_ENC(V6_vasruwuhsat, ICLASS_CJ" 1 000 vvv vvttt PP 1 uuuuu 100 ddddd") // DEF_ENC(V6_vasruhubsat, ICLASS_CJ" 1 000 vvv vvttt PP 1 uuuuu 101 ddddd") // +DEF_ENC(V6_vasrvuhubrndsat,"00011101000vvvvvPP0uuuuu011ddddd") +DEF_ENC(V6_vasrvuhubsat,"00011101000vvvvvPP0uuuuu010ddddd") +DEF_ENC(V6_vasrvwuhrndsat,"00011101000vvvvvPP0uuuuu001ddddd") +DEF_ENC(V6_vasrvwuhsat,"00011101000vvvvvPP0uuuuu000ddddd") + /*************************************************************** * * Group #1, Uses Q6 Rt32 @@ -716,6 +721,7 @@ DEF_ENC(V6_vaddclbw, ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 001 ddddd") // DEF_ENC(V6_vavguw, ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 010 ddddd") // DEF_ENC(V6_vavguwrnd, ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 011 ddddd") // +DEF_ENC(V6_vassign_tmp,"00011110--0---01PP0uuuuu110ddddd") DEF_ENC(V6_vavgb, ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 100 ddddd") // DEF_ENC(V6_vavgbrnd, ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 101 ddddd") // DEF_ENC(V6_vnavgb, ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 110 ddddd") // @@ -730,6 +736,8 @@ DEF_ENC(V6_vmaxb, ICLASS_CJ" 1 111 001 vvvvv PP 0 uuuuu 101 ddddd") // DEF_ENC(V6_vsatuwuh, ICLASS_CJ" 1 111 001 vvvvv PP 0 uuuuu 110 ddddd") // DEF_ENC(V6_vdealb4w, ICLASS_CJ" 1 111 001 vvvvv PP 0 uuuuu 111 ddddd") // +DEF_ENC(V6_v6mpyvubs10_vxx, ICLASS_CJ" 1 111 001 vvvvv PP 1 uuuuu 0ii xxxxx") +DEF_ENC(V6_v6mpyhubs10_vxx, ICLASS_CJ" 1 111 001 vvvvv PP 1 uuuuu 1ii xxxxx") DEF_ENC(V6_vmpyowh_rnd, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 000 ddddd") // DEF_ENC(V6_vshuffeb, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 001 ddddd") // @@ -739,6 +747,11 @@ DEF_ENC(V6_vshufoh, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 100 ddddd") // DEF_ENC(V6_vshufoeh, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 101 ddddd") // DEF_ENC(V6_vshufoeb, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 110 ddddd") // DEF_ENC(V6_vcombine, ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 111 ddddd") // +DEF_ENC(V6_vcombine_tmp,"00011110101vvvvvPP0uuuuu111ddddd") + +DEF_ENC(V6_v6mpyvubs10, ICLASS_CJ" 1 111 010 vvvvv PP 1 uuuuu 0ii ddddd") +DEF_ENC(V6_v6mpyhubs10, ICLASS_CJ" 1 111 010 vvvvv PP 1 uuuuu 1ii ddddd") + DEF_ENC(V6_vmpyieoh, ICLASS_CJ" 1 111 011 vvvvv PP 0 uuuuu 000 ddddd") // DEF_ENC(V6_vadduwsat, ICLASS_CJ" 1 111 011 vvvvv PP 0 uuuuu 001 ddddd") // @@ -789,6 +802,7 @@ DEF_ENC(V6_vrounduhub, ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 011 ddddd") // DEF_ENC(V6_vrounduwuh, ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 100 ddddd") // DEF_ENC(V6_vmpyewuh, ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 101 ddddd") DEF_ENC(V6_vmpyowh, ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 111 ddddd") +DEF_ENC(V6_vmpyuhvs,"00011111110vvvvvPP1uuuuu111ddddd") #endif /* NO MMVEC */ diff --git a/target/hexagon/imported/mmvec/ext.idef b/target/hexagon/imported/mmvec/ext.idef index 8ca5a606e1..ead32c243b 100644 --- a/target/hexagon/imported/mmvec/ext.idef +++ b/target/hexagon/imported/mmvec/ext.idef @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -62,6 +62,9 @@ EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VS), \ DESCR, DO_FOR_EACH_CODE(WIDTH, CODE)) +#define ITERATOR_INSN_SHIFT3_SLOT(WIDTH,TAG,SYNTAX,DESCR,CODE) \ +EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VS,A_CVI_VS_3SRC,A_NOTE_SHIFT_RESOURCE,A_NOTE_NOVP,A_NOTE_VA_UNARY), \ +DESCR, DO_FOR_EACH_CODE(WIDTH, CODE)) #define ITERATOR_INSN_SHIFT_SLOT_VV_LATE(WIDTH,TAG,SYNTAX,DESCR,CODE) \ EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VS), \ @@ -116,6 +119,10 @@ ITERATOR_INSN_MPY_SLOT_LATE(WIDTH,TAG, SYNTAX2,DESCR,CODE) EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VX_DV), \ DESCR, DO_FOR_EACH_CODE(WIDTH, CODE)) +#define ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC_VX_FWD(WIDTH,TAG,SYNTAX,DESCR,CODE) \ +EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VX_DV), \ +DESCR, DO_FOR_EACH_CODE(WIDTH, CODE)) + #define ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(WIDTH,TAG,SYNTAX,SYNTAX2,DESCR,CODE) \ ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(WIDTH,TAG,SYNTAX2,DESCR,CODE) @@ -976,6 +983,22 @@ NARROWING_SHIFT(16,vasrhubrndsat,fSETBYTE,ub,h,:rnd:sat,fVSATUB,fVROUND,0x7) NARROWING_SHIFT(16,vasrhbsat,fSETBYTE,b,h,:sat,fVSATB,fVNOROUND,0x7) NARROWING_SHIFT(16,vasrhbrndsat,fSETBYTE,b,h,:rnd:sat,fVSATB,fVROUND,0x7) +#define NARROWING_VECTOR_SHIFT(ITERSIZE,TAG,DSTM,DSTTYPE,SRCTYPE,SRCTYPE2,SYNOPTS,SATFUNC,RNDFUNC,SHAMTMASK) \ +ITERATOR_INSN_SHIFT3_SLOT(ITERSIZE,TAG, \ +"Vd32." #DSTTYPE "=vasr(Vuu32." #SRCTYPE ",Vv32." #SRCTYPE2 ")" #SYNOPTS, \ +"Vector shift by vector right and shuffle", \ + fHIDE(int )shamt = VvV.SRCTYPE2[2*i+0] & SHAMTMASK; \ + DSTM(0,VdV.SRCTYPE[i],SATFUNC(RNDFUNC(VuuV.v[0].SRCTYPE[i],shamt) >> shamt)); \ + shamt = VvV.SRCTYPE2[2*i+1] & SHAMTMASK; \ + DSTM(1,VdV.SRCTYPE[i],SATFUNC(RNDFUNC(VuuV.v[1].SRCTYPE[i],shamt) >> shamt))) + +/* WORD TO HALF*/ +NARROWING_VECTOR_SHIFT(32,vasrvwuhsat,fSETHALF,uh,w,uh,:sat,fVSATUH,fVNOROUND,0xF) +NARROWING_VECTOR_SHIFT(32,vasrvwuhrndsat,fSETHALF,uh,w,uh,:rnd:sat,fVSATUH,fVROUND,0xF) +/* HALF TO BYTE*/ +NARROWING_VECTOR_SHIFT(16,vasrvuhubsat,fSETBYTE,ub,uh,ub,:sat,fVSATUB,fVNOROUND,0x7) +NARROWING_VECTOR_SHIFT(16,vasrvuhubrndsat,fSETBYTE,ub,uh,ub,:rnd:sat,fVSATUB,fVROUND,0x7) + NARROWING_SHIFT_NOV1(16,vasruhubsat,fSETBYTE,ub,uh,:sat,fVSATUB,fVNOROUND,0x7) NARROWING_SHIFT_NOV1(16,vasruhubrndsat,fSETBYTE,ub,uh,:rnd:sat,fVSATUB,fVROUND,0x7) @@ -1360,6 +1383,9 @@ ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vmpyhvsrs,"Vd32=vmpyh(Vu32,Vv32):<<1:rnd:s +ITERATOR_INSN_MPY_SLOT(16,vmpyuhvs, "Vd32.uh=vmpy(Vu32.uh,Vv32.uh):>>16", +"Vector by Vector Unsigned Halfword Multiply with 16 bit rightshift", + VdV.uh[i] = fGETUHALF(1,fMPY16UU(VuV.uh[i],VvV.uh[i]))) ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyhus, "Vdd32=vmpyhus(Vu32,Vv32)","Vdd32.w=vmpy(Vu32.h,Vv32.uh)", @@ -2038,6 +2064,24 @@ ITERATOR_INSN_ANY_SLOT_DOUBLE_VEC(8,vcombine,"Vdd32=vcombine(Vu32,Vv32)", /////////////////////////////////////////////////////////////////////////// +EXTINSN(V6_vcombine_tmp, "Vdd32.tmp=vcombine(Vu32,Vv32)", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_REMAP,A_CVI_TMP,A_NO_INTRINSIC), +"Vector assign tmp, Any two to Vector Pair ", +{ + fHIDE(int i;) + fVFOREACH(8, i) { + VddV.v[0].ub[i] = VvV.ub[i]; + VddV.v[1].ub[i] = VuV.ub[i]; + } +}) + +EXTINSN(V6_vassign_tmp, "Vd32.tmp=Vu32", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_REMAP,A_CVI_TMP,A_NO_INTRINSIC), +"Vector assign tmp, Any two to Vector Pair ", +{ + fHIDE(int i;) + fVFOREACH(32, i) { + VdV.w[i]=VuV.w[i]; + } +}) /********************************************************* * GENERAL PERMUTE NETWORKS @@ -2507,6 +2551,281 @@ EXTINSN(V6_vscattermhw , "vscatter(Rt32,Mu2,Vvv32.w).h=Vw32", ATTRIBS(A_EXTENSIO }) +ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC_VX_FWD(32, v6mpyvubs10_vxx, "Vxx32.w+=v6mpy(Vuu32.ub,Vvv32.b,#u2):v", "", + fHIDE(size2s_t c00;) + fGET10BIT(c00, VvvV.v[0].uw[i], 0) + fHIDE(size2s_t c01;) + fGET10BIT(c01, VvvV.v[0].uw[i], 1) + fHIDE(size2s_t c02;) + fGET10BIT(c02, VvvV.v[0].uw[i], 2) + + fHIDE(size2s_t c10;) + fGET10BIT(c10, VvvV.v[1].uw[i], 0) + fHIDE(size2s_t c11;) + fGET10BIT(c11, VvvV.v[1].uw[i], 1) + fHIDE(size2s_t c12;) + fGET10BIT(c12, VvvV.v[1].uw[i], 2) + + if (uiV == 0) { + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c10); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c11); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c12); + + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c00); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c02); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c10); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c11); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c12); + + } else if (uiV == 1) { + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c00); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c01); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c02); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c10); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c11); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c12); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c00); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c02); + + } else if (uiV == 2) { + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c10); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c12); + + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c00); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c01); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c02); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c10); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c11); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c12); + + } else if (uiV == 3) { + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c00); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c01); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c02); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c10); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c12); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c00); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c01); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c02); + } +) +ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC_VX_FWD(32, v6mpyhubs10_vxx, "Vxx32.w+=v6mpy(Vuu32.ub,Vvv32.b,#u2):h", "", + fHIDE(size2s_t c00;) + fGET10BIT(c00, VvvV.v[0].uw[i], 0) + fHIDE(size2s_t c01;) + fGET10BIT(c01, VvvV.v[0].uw[i], 1) + fHIDE(size2s_t c02;) + fGET10BIT(c02, VvvV.v[0].uw[i], 2) + fHIDE(size2s_t c10;) + fGET10BIT(c10, VvvV.v[1].uw[i], 0) + fHIDE(size2s_t c11;) + fGET10BIT(c11, VvvV.v[1].uw[i], 1) + fHIDE(size2s_t c12;) + fGET10BIT(c12, VvvV.v[1].uw[i], 2) + + if (uiV == 0) { + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c10); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c11); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c12); + + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c00); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c02); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c10); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c11); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c12); + + } else if (uiV == 1) { + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c00); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c01); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c02); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c10); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c11); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c12); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c00); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c02); + + } else if (uiV == 2) { + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c10); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c12); + + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c00); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c01); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c02); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c10); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c11); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c12); + + } else if (uiV == 3) { + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c00); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c01); + VxxV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c02); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c10); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c12); + + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c00); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c01); + VxxV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c02); + } +) + + +ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32, v6mpyvubs10, "Vdd32.w=v6mpy(Vuu32.ub,Vvv32.b,#u2):v", "", + fHIDE(short c00;) + fGET10BIT(c00, VvvV.v[0].uw[i], 0) + fHIDE(short c01;) + fGET10BIT(c01, VvvV.v[0].uw[i], 1) + fHIDE(short c02;) + fGET10BIT(c02, VvvV.v[0].uw[i], 2) + fHIDE(short c10;) + fGET10BIT(c10, VvvV.v[1].uw[i], 0) + fHIDE(short c11;) + fGET10BIT(c11, VvvV.v[1].uw[i], 1) + fHIDE(short c12;) + fGET10BIT(c12, VvvV.v[1].uw[i], 2) + + + + if (uiV == 0) { + VddV.v[1].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c10); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c11); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c12); + + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c00); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c02); + + VddV.v[0].w[i] = fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c10); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c11); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c12); + + } else if (uiV == 1) { + VddV.v[1].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c00); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c01); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c02); + + VddV.v[0].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c10); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c11); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c12); + + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c00); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c02); + + } else if (uiV == 2) { + VddV.v[1].w[i] = fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c10); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c12); + + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c00); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c01); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c02); + + VddV.v[0].w[i] = fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c10); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c11); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c12); + + } else if (uiV == 3) { + VddV.v[1].w[i] = fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c00); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c01); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c02); + + VddV.v[0].w[i] = fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c10); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c12); + + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c00); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c01); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c02); + } +) + +ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32, v6mpyhubs10, "Vdd32.w=v6mpy(Vuu32.ub,Vvv32.b,#u2):h", "", + fHIDE(short c00;) + fGET10BIT(c00, VvvV.v[0].uw[i], 0) + fHIDE(short c01;) + fGET10BIT(c01, VvvV.v[0].uw[i], 1) + fHIDE(short c02;) + fGET10BIT(c02, VvvV.v[0].uw[i], 2) + fHIDE(short c10;) + fGET10BIT(c10, VvvV.v[1].uw[i], 0) + fHIDE(short c11;) + fGET10BIT(c11, VvvV.v[1].uw[i], 1) + fHIDE(short c12;) + fGET10BIT(c12, VvvV.v[1].uw[i], 2) + + if (uiV == 0) { + VddV.v[1].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c10); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c11); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c12); + + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c00); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c02); + + VddV.v[0].w[i] = fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c10); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c11); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c12); + + } else if (uiV == 1) { + VddV.v[1].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c00); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c01); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c02); + + VddV.v[0].w[i] = fMPY16US(fGETUBYTE(3,VuuV.v[1].uw[i]), c10); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c11); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c12); + + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[1].uw[i]), c00); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c01); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c02); + + } else if (uiV == 2) { + VddV.v[1].w[i] = fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c10); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c12); + + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c00); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c01); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c02); + + VddV.v[0].w[i] = fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c10); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c11); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c12); + + } else if (uiV == 3) { + VddV.v[1].w[i] = fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c00); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c01); + VddV.v[1].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c02); + + VddV.v[0].w[i] = fMPY16US(fGETUBYTE(1,VuuV.v[1].uw[i]), c10); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(3,VuuV.v[0].uw[i]), c11); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(1,VuuV.v[0].uw[i]), c12); + + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[1].uw[i]), c00); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(2,VuuV.v[0].uw[i]), c01); + VddV.v[0].w[i] += fMPY16US(fGETUBYTE(0,VuuV.v[0].uw[i]), c02); + } +) + EXTINSN(V6_vscattermhwq, "if (Qs4) vscatter(Rt32,Mu2,Vvv32.w).h=Vw32", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_SCATTER,A_CVI_VA_DV,A_CVI_VM,A_MEMLIKE), "Scatter halfwords conditional", { diff --git a/target/hexagon/internal.h b/target/hexagon/internal.h index b1bfadc3f5..d732b6bb3c 100644 --- a/target/hexagon/internal.h +++ b/target/hexagon/internal.h @@ -33,6 +33,8 @@ int hexagon_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); int hexagon_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); +int hexagon_hvx_gdb_read_register(CPUHexagonState *env, GByteArray *mem_buf, int n); +int hexagon_hvx_gdb_write_register(CPUHexagonState *env, uint8_t *mem_buf, int n); void hexagon_debug_vreg(CPUHexagonState *env, int regnum); void hexagon_debug_qreg(CPUHexagonState *env, int regnum); diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index 760630de8f..5451b061ee 100644 --- a/target/hexagon/macros.h +++ b/target/hexagon/macros.h @@ -44,8 +44,17 @@ reg_field_info[FIELD].offset) #define SET_USR_FIELD(FIELD, VAL) \ - fINSERT_BITS(env->new_value[HEX_REG_USR], reg_field_info[FIELD].width, \ - reg_field_info[FIELD].offset, (VAL)) + do { \ + if (pkt_need_commit) { \ + fINSERT_BITS(env->new_value_usr, \ + reg_field_info[FIELD].width, \ + reg_field_info[FIELD].offset, (VAL)); \ + } else { \ + fINSERT_BITS(env->gpr[HEX_REG_USR], \ + reg_field_info[FIELD].width, \ + reg_field_info[FIELD].offset, (VAL)); \ + } \ + } while (0) #endif #ifdef QEMU_GENERATE @@ -164,14 +173,14 @@ #define MEM_STORE8(VA, DATA, SLOT) \ MEM_STORE8_FUNC(DATA)(cpu_env, VA, DATA, SLOT) #else -#define MEM_LOAD1s(VA) ((int8_t)mem_load1(env, slot, VA)) -#define MEM_LOAD1u(VA) ((uint8_t)mem_load1(env, slot, VA)) -#define MEM_LOAD2s(VA) ((int16_t)mem_load2(env, slot, VA)) -#define MEM_LOAD2u(VA) ((uint16_t)mem_load2(env, slot, VA)) -#define MEM_LOAD4s(VA) ((int32_t)mem_load4(env, slot, VA)) -#define MEM_LOAD4u(VA) ((uint32_t)mem_load4(env, slot, VA)) -#define MEM_LOAD8s(VA) ((int64_t)mem_load8(env, slot, VA)) -#define MEM_LOAD8u(VA) ((uint64_t)mem_load8(env, slot, VA)) +#define MEM_LOAD1s(VA) ((int8_t)mem_load1(env, pkt_has_store_s1, slot, VA)) +#define MEM_LOAD1u(VA) ((uint8_t)mem_load1(env, pkt_has_store_s1, slot, VA)) +#define MEM_LOAD2s(VA) ((int16_t)mem_load2(env, pkt_has_store_s1, slot, VA)) +#define MEM_LOAD2u(VA) ((uint16_t)mem_load2(env, pkt_has_store_s1, slot, VA)) +#define MEM_LOAD4s(VA) ((int32_t)mem_load4(env, pkt_has_store_s1, slot, VA)) +#define MEM_LOAD4u(VA) ((uint32_t)mem_load4(env, pkt_has_store_s1, slot, VA)) +#define MEM_LOAD8s(VA) ((int64_t)mem_load8(env, pkt_has_store_s1, slot, VA)) +#define MEM_LOAD8u(VA) ((uint64_t)mem_load8(env, pkt_has_store_s1, slot, VA)) #define MEM_STORE1(VA, DATA, SLOT) log_store32(env, VA, DATA, 1, SLOT) #define MEM_STORE2(VA, DATA, SLOT) log_store32(env, VA, DATA, 2, SLOT) @@ -227,12 +236,8 @@ static inline void gen_cancel(uint32_t slot) #ifdef QEMU_GENERATE #define fLSBNEW(PVAL) tcg_gen_andi_tl(LSB, (PVAL), 1) -#define fLSBNEW0 tcg_gen_andi_tl(LSB, hex_new_pred_value[0], 1) -#define fLSBNEW1 tcg_gen_andi_tl(LSB, hex_new_pred_value[1], 1) #else #define fLSBNEW(PVAL) ((PVAL) & 1) -#define fLSBNEW0 (env->new_pred_value[0] & 1) -#define fLSBNEW1 (env->new_pred_value[1] & 1) #endif #ifdef QEMU_GENERATE @@ -347,10 +352,6 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) #define fREAD_LR() (env->gpr[HEX_REG_LR]) -#define fWRITE_LR(A) log_reg_write(env, HEX_REG_LR, A) -#define fWRITE_FP(A) log_reg_write(env, HEX_REG_FP, A) -#define fWRITE_SP(A) log_reg_write(env, HEX_REG_SP, A) - #define fREAD_SP() (env->gpr[HEX_REG_SP]) #define fREAD_LC0 (env->gpr[HEX_REG_LC0]) #define fREAD_LC1 (env->gpr[HEX_REG_LC1]) @@ -375,24 +376,10 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) #define fBRANCH(LOC, TYPE) fWRITE_NPC(LOC) #define fJUMPR(REGNO, TARGET, TYPE) fBRANCH(TARGET, COF_TYPE_JUMPR) #define fHINTJR(TARGET) { /* Not modelled in qemu */} -#define fWRITE_LOOP_REGS0(START, COUNT) \ - do { \ - log_reg_write(env, HEX_REG_LC0, COUNT); \ - log_reg_write(env, HEX_REG_SA0, START); \ - } while (0) -#define fWRITE_LOOP_REGS1(START, COUNT) \ - do { \ - log_reg_write(env, HEX_REG_LC1, COUNT); \ - log_reg_write(env, HEX_REG_SA1, START);\ - } while (0) #define fSET_OVERFLOW() SET_USR_FIELD(USR_OVF, 1) #define fSET_LPCFG(VAL) SET_USR_FIELD(USR_LPCFG, (VAL)) #define fGET_LPCFG (GET_USR_FIELD(USR_LPCFG)) -#define fWRITE_P0(VAL) log_pred_write(env, 0, VAL) -#define fWRITE_P1(VAL) log_pred_write(env, 1, VAL) -#define fWRITE_P2(VAL) log_pred_write(env, 2, VAL) -#define fWRITE_P3(VAL) log_pred_write(env, 3, VAL) #define fPART1(WORK) if (part1) { WORK; return; } #define fCAST4u(A) ((uint32_t)(A)) #define fCAST4s(A) ((int32_t)(A)) @@ -661,7 +648,11 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) reg_field_info[FIELD].offset) #ifdef QEMU_GENERATE -#define fDCZEROA(REG) tcg_gen_mov_tl(hex_dczero_addr, (REG)) +#define fDCZEROA(REG) \ + do { \ + ctx->dczero_addr = tcg_temp_new(); \ + tcg_gen_mov_tl(ctx->dczero_addr, (REG)); \ + } while (0) #endif #define fBRANCH_SPECULATE_STALL(DOTNEWVAL, JUMP_COND, SPEC_DIR, HINTBITNUM, \ diff --git a/target/hexagon/mmvec/macros.h b/target/hexagon/mmvec/macros.h index 1201d778d0..a655634fd1 100644 --- a/target/hexagon/mmvec/macros.h +++ b/target/hexagon/mmvec/macros.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -346,4 +346,11 @@ #define fUARCH_NOTE_PUMP_2X() #define IV1DEAD() + +#define fGET10BIT(COE, VAL, POS) \ + do { \ + COE = (sextract32(VAL, 24 + 2 * POS, 2) << 8) | \ + extract32(VAL, POS * 8, 8); \ + } while (0); + #endif diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 3cc71b69d9..12967ac21e 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -52,38 +52,6 @@ G_NORETURN void HELPER(raise_exception)(CPUHexagonState *env, uint32_t excp) do_raise_exception_err(env, excp, 0); } -void log_reg_write(CPUHexagonState *env, int rnum, - target_ulong val) -{ - HEX_DEBUG_LOG("log_reg_write[%d] = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")", - rnum, val, val); - if (val == env->gpr[rnum]) { - HEX_DEBUG_LOG(" NO CHANGE"); - } - HEX_DEBUG_LOG("\n"); - - env->new_value[rnum] = val; - if (HEX_DEBUG) { - /* Do this so HELPER(debug_commit_end) will know */ - env->reg_written[rnum] = 1; - } -} - -static void log_pred_write(CPUHexagonState *env, int pnum, target_ulong val) -{ - HEX_DEBUG_LOG("log_pred_write[%d] = " TARGET_FMT_ld - " (0x" TARGET_FMT_lx ")\n", - pnum, val, val); - - /* Multiple writes to the same preg are and'ed together */ - if (env->pred_written & (1 << pnum)) { - env->new_pred_value[pnum] &= val & 0xff; - } else { - env->new_pred_value[pnum] = val & 0xff; - env->pred_written |= 1 << pnum; - } -} - void log_store32(CPUHexagonState *env, target_ulong addr, target_ulong val, int width, int slot) { @@ -235,14 +203,14 @@ static void print_store(CPUHexagonState *env, int slot) } /* This function is a handy place to set a breakpoint */ -void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1) +void HELPER(debug_commit_end)(CPUHexagonState *env, uint32_t this_PC, + int pred_written, int has_st0, int has_st1) { bool reg_printed = false; bool pred_printed = false; int i; - HEX_DEBUG_LOG("Packet committed: pc = 0x" TARGET_FMT_lx "\n", - env->this_PC); + HEX_DEBUG_LOG("Packet committed: pc = 0x" TARGET_FMT_lx "\n", this_PC); HEX_DEBUG_LOG("slot_cancelled = %d\n", env->slot_cancelled); for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) { @@ -252,18 +220,18 @@ void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1) reg_printed = true; } HEX_DEBUG_LOG("\tr%d = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")\n", - i, env->new_value[i], env->new_value[i]); + i, env->gpr[i], env->gpr[i]); } } for (i = 0; i < NUM_PREGS; i++) { - if (env->pred_written & (1 << i)) { + if (pred_written & (1 << i)) { if (!pred_printed) { HEX_DEBUG_LOG("Predicates written\n"); pred_printed = true; } HEX_DEBUG_LOG("\tp%d = 0x" TARGET_FMT_lx "\n", - i, env->new_pred_value[i]); + i, env->pred[i]); } } @@ -384,7 +352,8 @@ uint64_t HELPER(sfinvsqrta)(CPUHexagonState *env, float32 RsV) } int64_t HELPER(vacsh_val)(CPUHexagonState *env, - int64_t RxxV, int64_t RssV, int64_t RttV) + int64_t RxxV, int64_t RssV, int64_t RttV, + uint32_t pkt_need_commit) { for (int i = 0; i < 4; i++) { int xv = sextract64(RxxV, i * 16, 16); @@ -416,6 +385,87 @@ int32_t HELPER(vacsh_pred)(CPUHexagonState *env, return PeV; } +int64_t HELPER(cabacdecbin_val)(int64_t RssV, int64_t RttV) +{ + int64_t RddV = 0; + size4u_t state; + size4u_t valMPS; + size4u_t bitpos; + size4u_t range; + size4u_t offset; + size4u_t rLPS; + size4u_t rMPS; + + state = fEXTRACTU_RANGE(fGETWORD(1, RttV), 5, 0); + valMPS = fEXTRACTU_RANGE(fGETWORD(1, RttV), 8, 8); + bitpos = fEXTRACTU_RANGE(fGETWORD(0, RttV), 4, 0); + range = fGETWORD(0, RssV); + offset = fGETWORD(1, RssV); + + /* calculate rLPS */ + range <<= bitpos; + offset <<= bitpos; + rLPS = rLPS_table_64x4[state][(range >> 29) & 3]; + rLPS = rLPS << 23; /* left aligned */ + + /* calculate rMPS */ + rMPS = (range & 0xff800000) - rLPS; + + /* most probable region */ + if (offset < rMPS) { + RddV = AC_next_state_MPS_64[state]; + fINSERT_RANGE(RddV, 8, 8, valMPS); + fINSERT_RANGE(RddV, 31, 23, (rMPS >> 23)); + fSETWORD(1, RddV, offset); + } + /* least probable region */ + else { + RddV = AC_next_state_LPS_64[state]; + fINSERT_RANGE(RddV, 8, 8, ((!state) ? (1 - valMPS) : (valMPS))); + fINSERT_RANGE(RddV, 31, 23, (rLPS >> 23)); + fSETWORD(1, RddV, (offset - rMPS)); + } + return RddV; +} + +int32_t HELPER(cabacdecbin_pred)(int64_t RssV, int64_t RttV) +{ + int32_t p0 = 0; + size4u_t state; + size4u_t valMPS; + size4u_t bitpos; + size4u_t range; + size4u_t offset; + size4u_t rLPS; + size4u_t rMPS; + + state = fEXTRACTU_RANGE(fGETWORD(1, RttV), 5, 0); + valMPS = fEXTRACTU_RANGE(fGETWORD(1, RttV), 8, 8); + bitpos = fEXTRACTU_RANGE(fGETWORD(0, RttV), 4, 0); + range = fGETWORD(0, RssV); + offset = fGETWORD(1, RssV); + + /* calculate rLPS */ + range <<= bitpos; + offset <<= bitpos; + rLPS = rLPS_table_64x4[state][(range >> 29) & 3]; + rLPS = rLPS << 23; /* left aligned */ + + /* calculate rMPS */ + rMPS = (range & 0xff800000) - rLPS; + + /* most probable region */ + if (offset < rMPS) { + p0 = valMPS; + + } + /* least probable region */ + else { + p0 = valMPS ^ 1; + } + return p0; +} + static void probe_store(CPUHexagonState *env, int slot, int mmu_idx, bool is_predicated) { @@ -516,41 +566,45 @@ void HELPER(probe_pkt_scalar_hvx_stores)(CPUHexagonState *env, int mask) * If the load is in slot 0 and there is a store in slot1 (that * wasn't cancelled), we have to do the store first. */ -static void check_noshuf(CPUHexagonState *env, uint32_t slot, - target_ulong vaddr, int size) +static void check_noshuf(CPUHexagonState *env, bool pkt_has_store_s1, + uint32_t slot, target_ulong vaddr, int size) { - if (slot == 0 && env->pkt_has_store_s1 && + if (slot == 0 && pkt_has_store_s1 && ((env->slot_cancelled & (1 << 1)) == 0)) { HELPER(probe_noshuf_load)(env, vaddr, size, MMU_USER_IDX); HELPER(commit_store)(env, 1); } } -uint8_t mem_load1(CPUHexagonState *env, uint32_t slot, target_ulong vaddr) +uint8_t mem_load1(CPUHexagonState *env, bool pkt_has_store_s1, + uint32_t slot, target_ulong vaddr) { uintptr_t ra = GETPC(); - check_noshuf(env, slot, vaddr, 1); + check_noshuf(env, pkt_has_store_s1, slot, vaddr, 1); return cpu_ldub_data_ra(env, vaddr, ra); } -uint16_t mem_load2(CPUHexagonState *env, uint32_t slot, target_ulong vaddr) +uint16_t mem_load2(CPUHexagonState *env, bool pkt_has_store_s1, + uint32_t slot, target_ulong vaddr) { uintptr_t ra = GETPC(); - check_noshuf(env, slot, vaddr, 2); + check_noshuf(env, pkt_has_store_s1, slot, vaddr, 2); return cpu_lduw_data_ra(env, vaddr, ra); } -uint32_t mem_load4(CPUHexagonState *env, uint32_t slot, target_ulong vaddr) +uint32_t mem_load4(CPUHexagonState *env, bool pkt_has_store_s1, + uint32_t slot, target_ulong vaddr) { uintptr_t ra = GETPC(); - check_noshuf(env, slot, vaddr, 4); + check_noshuf(env, pkt_has_store_s1, slot, vaddr, 4); return cpu_ldl_data_ra(env, vaddr, ra); } -uint64_t mem_load8(CPUHexagonState *env, uint32_t slot, target_ulong vaddr) +uint64_t mem_load8(CPUHexagonState *env, bool pkt_has_store_s1, + uint32_t slot, target_ulong vaddr) { uintptr_t ra = GETPC(); - check_noshuf(env, slot, vaddr, 8); + check_noshuf(env, pkt_has_store_s1, slot, vaddr, 8); return cpu_ldq_data_ra(env, vaddr, ra); } diff --git a/target/hexagon/op_helper.h b/target/hexagon/op_helper.h index db22b54401..8f3764d15e 100644 --- a/target/hexagon/op_helper.h +++ b/target/hexagon/op_helper.h @@ -19,15 +19,15 @@ #define HEXAGON_OP_HELPER_H /* Misc functions */ -void write_new_pc(CPUHexagonState *env, bool pkt_has_multi_cof, target_ulong addr); +uint8_t mem_load1(CPUHexagonState *env, bool pkt_has_store_s1, + uint32_t slot, target_ulong vaddr); +uint16_t mem_load2(CPUHexagonState *env, bool pkt_has_store_s1, + uint32_t slot, target_ulong vaddr); +uint32_t mem_load4(CPUHexagonState *env, bool pkt_has_store_s1, + uint32_t slot, target_ulong vaddr); +uint64_t mem_load8(CPUHexagonState *env, bool pkt_has_store_s1, + uint32_t slot, target_ulong vaddr); -uint8_t mem_load1(CPUHexagonState *env, uint32_t slot, target_ulong vaddr); -uint16_t mem_load2(CPUHexagonState *env, uint32_t slot, target_ulong vaddr); -uint32_t mem_load4(CPUHexagonState *env, uint32_t slot, target_ulong vaddr); -uint64_t mem_load8(CPUHexagonState *env, uint32_t slot, target_ulong vaddr); - -void log_reg_write(CPUHexagonState *env, int rnum, - target_ulong val); void log_store64(CPUHexagonState *env, target_ulong addr, int64_t val, int width, int slot); void log_store32(CPUHexagonState *env, target_ulong addr, diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index cddd7c5db4..b18f1a9051 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -27,6 +27,7 @@ #include "insn.h" #include "decode.h" #include "translate.h" +#include "genptr.h" #include "printinsn.h" #include "analyze_funcs_generated.c.inc" @@ -40,19 +41,13 @@ static const AnalyzeInsn opcode_analyze[XX_LAST_OPCODE] = { TCGv hex_gpr[TOTAL_PER_THREAD_REGS]; TCGv hex_pred[NUM_PREGS]; -TCGv hex_this_PC; TCGv hex_slot_cancelled; -TCGv hex_branch_taken; -TCGv hex_new_value[TOTAL_PER_THREAD_REGS]; +TCGv hex_new_value_usr; TCGv hex_reg_written[TOTAL_PER_THREAD_REGS]; -TCGv hex_new_pred_value[NUM_PREGS]; -TCGv hex_pred_written; TCGv hex_store_addr[STORES_MAX]; TCGv hex_store_width[STORES_MAX]; TCGv hex_store_val32[STORES_MAX]; TCGv_i64 hex_store_val64[STORES_MAX]; -TCGv hex_pkt_has_store_s1; -TCGv hex_dczero_addr; TCGv hex_llsc_addr; TCGv hex_llsc_val; TCGv_i64 hex_llsc_val_i64; @@ -69,6 +64,10 @@ intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum, { intptr_t offset; + if (!ctx->need_commit) { + return offsetof(CPUHexagonState, VRegs[regnum]); + } + /* See if it is already allocated */ for (int i = 0; i < ctx->future_vregs_idx; i++) { if (ctx->future_vregs_num[i] == regnum) { @@ -154,7 +153,7 @@ static void gen_end_tb(DisasContext *ctx) if (ctx->branch_cond != TCG_COND_NEVER) { if (ctx->branch_cond != TCG_COND_ALWAYS) { TCGLabel *skip = gen_new_label(); - tcg_gen_brcondi_tl(ctx->branch_cond, hex_branch_taken, 0, skip); + tcg_gen_brcondi_tl(ctx->branch_cond, ctx->branch_taken, 0, skip); gen_goto_tb(ctx, 0, ctx->branch_dest, true); gen_set_label(skip); gen_goto_tb(ctx, 1, ctx->next_PC, false); @@ -262,11 +261,6 @@ static bool need_slot_cancelled(Packet *pkt) return false; } -static bool need_pred_written(Packet *pkt) -{ - return check_for_attrib(pkt, A_WRITES_PRED_REG); -} - static bool need_next_PC(DisasContext *ctx) { Packet *pkt = ctx->pkt; @@ -341,10 +335,131 @@ static void mark_implicit_pred_writes(DisasContext *ctx) mark_implicit_pred_write(ctx, A_IMPLICIT_WRITES_P3, 3); } +static bool pkt_raises_exception(Packet *pkt) +{ + if (check_for_attrib(pkt, A_LOAD) || + check_for_attrib(pkt, A_STORE)) { + return true; + } + return false; +} + +static bool need_commit(DisasContext *ctx) +{ + Packet *pkt = ctx->pkt; + + /* + * If the short-circuit property is set to false, we'll always do the commit + */ + if (!ctx->short_circuit) { + return true; + } + + if (pkt_raises_exception(pkt)) { + return true; + } + + /* Registers with immutability flags require new_value */ + for (int i = 0; i < ctx->reg_log_idx; i++) { + int rnum = ctx->reg_log[i]; + if (reg_immut_masks[rnum]) { + return true; + } + } + + /* Floating point instructions are hard-coded to use new_value */ + if (check_for_attrib(pkt, A_FPOP)) { + return true; + } + + if (pkt->num_insns == 1) { + if (pkt->pkt_has_hvx) { + /* + * The HVX instructions with generated helpers use + * pass-by-reference, so they need the read/write overlap + * check below. + * The HVX instructions with overrides are OK. + */ + if (!ctx->has_hvx_helper) { + return false; + } + } else { + return false; + } + } + + /* Check for overlap between register reads and writes */ + for (int i = 0; i < ctx->reg_log_idx; i++) { + int rnum = ctx->reg_log[i]; + if (test_bit(rnum, ctx->regs_read)) { + return true; + } + } + + /* Check for overlap between predicate reads and writes */ + for (int i = 0; i < ctx->preg_log_idx; i++) { + int pnum = ctx->preg_log[i]; + if (test_bit(pnum, ctx->pregs_read)) { + return true; + } + } + + /* Check for overlap between HVX reads and writes */ + for (int i = 0; i < ctx->vreg_log_idx; i++) { + int vnum = ctx->vreg_log[i]; + if (test_bit(vnum, ctx->vregs_read)) { + return true; + } + } + if (!bitmap_empty(ctx->vregs_updated_tmp, NUM_VREGS)) { + int i = find_first_bit(ctx->vregs_updated_tmp, NUM_VREGS); + while (i < NUM_VREGS) { + if (test_bit(i, ctx->vregs_read)) { + return true; + } + i = find_next_bit(ctx->vregs_updated_tmp, NUM_VREGS, i + 1); + } + } + if (!bitmap_empty(ctx->vregs_select, NUM_VREGS)) { + int i = find_first_bit(ctx->vregs_select, NUM_VREGS); + while (i < NUM_VREGS) { + if (test_bit(i, ctx->vregs_read)) { + return true; + } + i = find_next_bit(ctx->vregs_select, NUM_VREGS, i + 1); + } + } + + /* Check for overlap between HVX predicate reads and writes */ + for (int i = 0; i < ctx->qreg_log_idx; i++) { + int qnum = ctx->qreg_log[i]; + if (test_bit(qnum, ctx->qregs_read)) { + return true; + } + } + + return false; +} + +static void mark_implicit_pred_read(DisasContext *ctx, int attrib, int pnum) +{ + if (GET_ATTRIB(ctx->insn->opcode, attrib)) { + ctx_log_pred_read(ctx, pnum); + } +} + +static void mark_implicit_pred_reads(DisasContext *ctx) +{ + mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P0, 0); + mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P1, 1); + mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P3, 2); + mark_implicit_pred_read(ctx, A_IMPLICIT_READS_P3, 3); +} + static void analyze_packet(DisasContext *ctx) { Packet *pkt = ctx->pkt; - ctx->need_pkt_has_store_s1 = false; + ctx->has_hvx_helper = false; for (int i = 0; i < pkt->num_insns; i++) { Insn *insn = &pkt->insn[i]; ctx->insn = insn; @@ -353,7 +468,10 @@ static void analyze_packet(DisasContext *ctx) } mark_implicit_reg_writes(ctx); mark_implicit_pred_writes(ctx); + mark_implicit_pred_reads(ctx); } + + ctx->need_commit = need_commit(ctx); } static void gen_start_packet(DisasContext *ctx) @@ -366,9 +484,11 @@ static void gen_start_packet(DisasContext *ctx) ctx->next_PC = next_PC; ctx->reg_log_idx = 0; bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS); + bitmap_zero(ctx->regs_read, TOTAL_PER_THREAD_REGS); bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS); ctx->preg_log_idx = 0; bitmap_zero(ctx->pregs_written, NUM_PREGS); + bitmap_zero(ctx->pregs_read, NUM_PREGS); ctx->future_vregs_idx = 0; ctx->tmp_vregs_idx = 0; ctx->vreg_log_idx = 0; @@ -377,19 +497,23 @@ static void gen_start_packet(DisasContext *ctx) bitmap_zero(ctx->vregs_select, NUM_VREGS); bitmap_zero(ctx->predicated_future_vregs, NUM_VREGS); bitmap_zero(ctx->predicated_tmp_vregs, NUM_VREGS); + bitmap_zero(ctx->vregs_read, NUM_VREGS); + bitmap_zero(ctx->qregs_read, NUM_QREGS); ctx->qreg_log_idx = 0; for (i = 0; i < STORES_MAX; i++) { ctx->store_width[i] = 0; } ctx->s1_store_processed = false; ctx->pre_commit = true; + for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) { + ctx->new_value[i] = NULL; + } + for (i = 0; i < NUM_PREGS; i++) { + ctx->new_pred_value[i] = NULL; + } analyze_packet(ctx); - if (ctx->need_pkt_has_store_s1) { - tcg_gen_movi_tl(hex_pkt_has_store_s1, pkt->pkt_has_store_s1); - } - /* * pregs_written is used both in the analyze phase as well as the code * gen phase, so clear it again. @@ -399,35 +523,50 @@ static void gen_start_packet(DisasContext *ctx) if (HEX_DEBUG) { /* Handy place to set a breakpoint before the packet executes */ gen_helper_debug_start_packet(cpu_env); - tcg_gen_movi_tl(hex_this_PC, ctx->base.pc_next); } /* Initialize the runtime state for packet semantics */ if (need_slot_cancelled(pkt)) { tcg_gen_movi_tl(hex_slot_cancelled, 0); } + ctx->branch_taken = NULL; if (pkt->pkt_has_cof) { + ctx->branch_taken = tcg_temp_new(); if (pkt->pkt_has_multi_cof) { - tcg_gen_movi_tl(hex_branch_taken, 0); + tcg_gen_movi_tl(ctx->branch_taken, 0); } if (need_next_PC(ctx)) { tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], next_PC); } } - if (need_pred_written(pkt)) { - tcg_gen_movi_tl(hex_pred_written, 0); + if (HEX_DEBUG) { + ctx->pred_written = tcg_temp_new(); + tcg_gen_movi_tl(ctx->pred_written, 0); } - /* Preload the predicated registers into hex_new_value[i] */ - if (!bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) { + /* Preload the predicated registers into get_result_gpr(ctx, i) */ + if (ctx->need_commit && + !bitmap_empty(ctx->predicated_regs, TOTAL_PER_THREAD_REGS)) { int i = find_first_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS); while (i < TOTAL_PER_THREAD_REGS) { - tcg_gen_mov_tl(hex_new_value[i], hex_gpr[i]); + tcg_gen_mov_tl(get_result_gpr(ctx, i), hex_gpr[i]); i = find_next_bit(ctx->predicated_regs, TOTAL_PER_THREAD_REGS, i + 1); } } + /* + * Preload the predicated pred registers into hex_new_pred_value[pred_num] + * Only endloop instructions conditionally write to pred registers + */ + if (ctx->need_commit && pkt->pkt_has_endloop) { + for (int i = 0; i < ctx->preg_log_idx; i++) { + int pred_num = ctx->preg_log[i]; + ctx->new_pred_value[pred_num] = tcg_temp_new(); + tcg_gen_mov_tl(ctx->new_pred_value[pred_num], hex_pred[pred_num]); + } + } + /* Preload the predicated HVX registers into future_VRegs and tmp_VRegs */ if (!bitmap_empty(ctx->predicated_future_vregs, NUM_VREGS)) { int i = find_first_bit(ctx->predicated_future_vregs, NUM_VREGS); @@ -481,6 +620,9 @@ static void mark_store_width(DisasContext *ctx) uint8_t width = 0; if (GET_ATTRIB(opcode, A_SCALAR_STORE)) { + if (GET_ATTRIB(opcode, A_MEMSIZE_0B)) { + return; + } if (GET_ATTRIB(opcode, A_MEMSIZE_1B)) { width |= 1; } @@ -515,10 +657,15 @@ static void gen_reg_writes(DisasContext *ctx) { int i; + /* Early exit if not needed */ + if (!ctx->need_commit) { + return; + } + for (i = 0; i < ctx->reg_log_idx; i++) { int reg_num = ctx->reg_log[i]; - tcg_gen_mov_tl(hex_gpr[reg_num], hex_new_value[reg_num]); + tcg_gen_mov_tl(hex_gpr[reg_num], get_result_gpr(ctx, reg_num)); /* * ctx->is_tight_loop is set when SA0 points to the beginning of the TB. @@ -532,41 +679,14 @@ static void gen_reg_writes(DisasContext *ctx) static void gen_pred_writes(DisasContext *ctx) { - int i; - - /* Early exit if the log is empty */ - if (!ctx->preg_log_idx) { + /* Early exit if not needed or the log is empty */ + if (!ctx->need_commit || !ctx->preg_log_idx) { return; } - /* - * Only endloop instructions will conditionally - * write a predicate. If there are no endloop - * instructions, we can use the non-conditional - * write of the predicates. - */ - if (ctx->pkt->pkt_has_endloop) { - TCGv zero = tcg_constant_tl(0); - TCGv pred_written = tcg_temp_new(); - for (i = 0; i < ctx->preg_log_idx; i++) { - int pred_num = ctx->preg_log[i]; - - tcg_gen_andi_tl(pred_written, hex_pred_written, 1 << pred_num); - tcg_gen_movcond_tl(TCG_COND_NE, hex_pred[pred_num], - pred_written, zero, - hex_new_pred_value[pred_num], - hex_pred[pred_num]); - } - } else { - for (i = 0; i < ctx->preg_log_idx; i++) { - int pred_num = ctx->preg_log[i]; - tcg_gen_mov_tl(hex_pred[pred_num], hex_new_pred_value[pred_num]); - if (HEX_DEBUG) { - /* Do this so HELPER(debug_commit_end) will know */ - tcg_gen_ori_tl(hex_pred_written, hex_pred_written, - 1 << pred_num); - } - } + for (int i = 0; i < ctx->preg_log_idx; i++) { + int pred_num = ctx->preg_log[i]; + tcg_gen_mov_tl(hex_pred[pred_num], ctx->new_pred_value[pred_num]); } } @@ -692,7 +812,7 @@ static void process_dczeroa(DisasContext *ctx) TCGv addr = tcg_temp_new(); TCGv_i64 zero = tcg_constant_i64(0); - tcg_gen_andi_tl(addr, hex_dczero_addr, ~0x1f); + tcg_gen_andi_tl(addr, ctx->dczero_addr, ~0x1f); tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ); tcg_gen_addi_tl(addr, addr, 8); tcg_gen_qemu_st_i64(zero, addr, ctx->mem_idx, MO_UQ); @@ -719,6 +839,12 @@ static void gen_commit_hvx(DisasContext *ctx) { int i; + /* Early exit if not needed */ + if (!ctx->need_commit) { + g_assert(!pkt_has_hvx_store(ctx->pkt)); + return; + } + /* * for (i = 0; i < ctx->vreg_log_idx; i++) { * int rnum = ctx->vreg_log[i]; @@ -873,7 +999,8 @@ static void gen_commit_packet(DisasContext *ctx) tcg_constant_tl(pkt->pkt_has_store_s1 && !pkt->pkt_has_dczeroa); /* Handy place to set a breakpoint at the end of execution */ - gen_helper_debug_commit_end(cpu_env, has_st0, has_st1); + gen_helper_debug_commit_end(cpu_env, tcg_constant_tl(ctx->pkt->pc), + ctx->pred_written, has_st0, has_st1); } if (pkt->vhist_insn != NULL) { @@ -920,6 +1047,7 @@ static void hexagon_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) { DisasContext *ctx = container_of(dcbase, DisasContext, base); + HexagonCPU *hex_cpu = env_archcpu(cs->env_ptr); uint32_t hex_flags = dcbase->tb->flags; ctx->mem_idx = MMU_USER_IDX; @@ -928,6 +1056,7 @@ static void hexagon_tr_init_disas_context(DisasContextBase *dcbase, ctx->num_hvx_insns = 0; ctx->branch_cond = TCG_COND_NEVER; ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP); + ctx->short_circuit = hex_cpu->short_circuit; } static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu) @@ -1028,9 +1157,7 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, } #define NAME_LEN 64 -static char new_value_names[TOTAL_PER_THREAD_REGS][NAME_LEN]; static char reg_written_names[TOTAL_PER_THREAD_REGS][NAME_LEN]; -static char new_pred_value_names[NUM_PREGS][NAME_LEN]; static char store_addr_names[STORES_MAX][NAME_LEN]; static char store_width_names[STORES_MAX][NAME_LEN]; static char store_val32_names[STORES_MAX][NAME_LEN]; @@ -1050,11 +1177,6 @@ void hexagon_translate_init(void) offsetof(CPUHexagonState, gpr[i]), hexagon_regnames[i]); - snprintf(new_value_names[i], NAME_LEN, "new_%s", hexagon_regnames[i]); - hex_new_value[i] = tcg_global_mem_new(cpu_env, - offsetof(CPUHexagonState, new_value[i]), - new_value_names[i]); - if (HEX_DEBUG) { snprintf(reg_written_names[i], NAME_LEN, "reg_written_%s", hexagon_regnames[i]); @@ -1063,29 +1185,16 @@ void hexagon_translate_init(void) reg_written_names[i]); } } + hex_new_value_usr = tcg_global_mem_new(cpu_env, + offsetof(CPUHexagonState, new_value_usr), "new_value_usr"); + for (i = 0; i < NUM_PREGS; i++) { hex_pred[i] = tcg_global_mem_new(cpu_env, offsetof(CPUHexagonState, pred[i]), hexagon_prednames[i]); - - snprintf(new_pred_value_names[i], NAME_LEN, "new_pred_%s", - hexagon_prednames[i]); - hex_new_pred_value[i] = tcg_global_mem_new(cpu_env, - offsetof(CPUHexagonState, new_pred_value[i]), - new_pred_value_names[i]); } - hex_pred_written = tcg_global_mem_new(cpu_env, - offsetof(CPUHexagonState, pred_written), "pred_written"); - hex_this_PC = tcg_global_mem_new(cpu_env, - offsetof(CPUHexagonState, this_PC), "this_PC"); hex_slot_cancelled = tcg_global_mem_new(cpu_env, offsetof(CPUHexagonState, slot_cancelled), "slot_cancelled"); - hex_branch_taken = tcg_global_mem_new(cpu_env, - offsetof(CPUHexagonState, branch_taken), "branch_taken"); - hex_pkt_has_store_s1 = tcg_global_mem_new(cpu_env, - offsetof(CPUHexagonState, pkt_has_store_s1), "pkt_has_store_s1"); - hex_dczero_addr = tcg_global_mem_new(cpu_env, - offsetof(CPUHexagonState, dczero_addr), "dczero_addr"); hex_llsc_addr = tcg_global_mem_new(cpu_env, offsetof(CPUHexagonState, llsc_addr), "llsc_addr"); hex_llsc_val = tcg_global_mem_new(cpu_env, diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index 4b9f21c41d..4dd59c6726 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -38,10 +38,12 @@ typedef struct DisasContext { int reg_log[REG_WRITES_MAX]; int reg_log_idx; DECLARE_BITMAP(regs_written, TOTAL_PER_THREAD_REGS); + DECLARE_BITMAP(regs_read, TOTAL_PER_THREAD_REGS); DECLARE_BITMAP(predicated_regs, TOTAL_PER_THREAD_REGS); int preg_log[PRED_WRITES_MAX]; int preg_log_idx; DECLARE_BITMAP(pregs_written, NUM_PREGS); + DECLARE_BITMAP(pregs_read, NUM_PREGS); uint8_t store_width[STORES_MAX]; bool s1_store_processed; int future_vregs_idx; @@ -55,13 +57,22 @@ typedef struct DisasContext { DECLARE_BITMAP(vregs_select, NUM_VREGS); DECLARE_BITMAP(predicated_future_vregs, NUM_VREGS); DECLARE_BITMAP(predicated_tmp_vregs, NUM_VREGS); + DECLARE_BITMAP(vregs_read, NUM_VREGS); int qreg_log[NUM_QREGS]; int qreg_log_idx; + DECLARE_BITMAP(qregs_read, NUM_QREGS); bool pre_commit; + bool need_commit; TCGCond branch_cond; target_ulong branch_dest; bool is_tight_loop; - bool need_pkt_has_store_s1; + bool short_circuit; + bool has_hvx_helper; + TCGv new_value[TOTAL_PER_THREAD_REGS]; + TCGv new_pred_value[NUM_PREGS]; + TCGv pred_written; + TCGv branch_taken; + TCGv dczero_addr; } DisasContext; static inline void ctx_log_pred_write(DisasContext *ctx, int pnum) @@ -73,6 +84,11 @@ static inline void ctx_log_pred_write(DisasContext *ctx, int pnum) } } +static inline void ctx_log_pred_read(DisasContext *ctx, int pnum) +{ + set_bit(pnum, ctx->pregs_read); +} + static inline void ctx_log_reg_write(DisasContext *ctx, int rnum, bool is_predicated) { @@ -99,6 +115,17 @@ static inline void ctx_log_reg_write_pair(DisasContext *ctx, int rnum, ctx_log_reg_write(ctx, rnum + 1, is_predicated); } +static inline void ctx_log_reg_read(DisasContext *ctx, int rnum) +{ + set_bit(rnum, ctx->regs_read); +} + +static inline void ctx_log_reg_read_pair(DisasContext *ctx, int rnum) +{ + ctx_log_reg_read(ctx, rnum); + ctx_log_reg_read(ctx, rnum + 1); +} + intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum, int num, bool alloc_ok); intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum, @@ -139,6 +166,17 @@ static inline void ctx_log_vreg_write_pair(DisasContext *ctx, ctx_log_vreg_write(ctx, rnum ^ 1, type, is_predicated); } +static inline void ctx_log_vreg_read(DisasContext *ctx, int rnum) +{ + set_bit(rnum, ctx->vregs_read); +} + +static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum) +{ + ctx_log_vreg_read(ctx, rnum ^ 0); + ctx_log_vreg_read(ctx, rnum ^ 1); +} + static inline void ctx_log_qreg_write(DisasContext *ctx, int rnum) { @@ -146,20 +184,20 @@ static inline void ctx_log_qreg_write(DisasContext *ctx, ctx->qreg_log_idx++; } +static inline void ctx_log_qreg_read(DisasContext *ctx, int qnum) +{ + set_bit(qnum, ctx->qregs_read); +} + extern TCGv hex_gpr[TOTAL_PER_THREAD_REGS]; extern TCGv hex_pred[NUM_PREGS]; -extern TCGv hex_this_PC; extern TCGv hex_slot_cancelled; -extern TCGv hex_branch_taken; -extern TCGv hex_new_value[TOTAL_PER_THREAD_REGS]; +extern TCGv hex_new_value_usr; extern TCGv hex_reg_written[TOTAL_PER_THREAD_REGS]; -extern TCGv hex_new_pred_value[NUM_PREGS]; -extern TCGv hex_pred_written; extern TCGv hex_store_addr[STORES_MAX]; extern TCGv hex_store_width[STORES_MAX]; extern TCGv hex_store_val32[STORES_MAX]; extern TCGv_i64 hex_store_val64[STORES_MAX]; -extern TCGv hex_dczero_addr; extern TCGv hex_llsc_addr; extern TCGv hex_llsc_val; extern TCGv_i64 hex_llsc_val_i64; diff --git a/tests/guest-debug/run-test.py b/tests/guest-debug/run-test.py index d865e46ecd..de6106a5e5 100755 --- a/tests/guest-debug/run-test.py +++ b/tests/guest-debug/run-test.py @@ -26,11 +26,12 @@ def get_args(): parser.add_argument("--qargs", help="Qemu arguments for test") parser.add_argument("--binary", help="Binary to debug", required=True) - parser.add_argument("--test", help="GDB test script", - required=True) + parser.add_argument("--test", help="GDB test script") parser.add_argument("--gdb", help="The gdb binary to use", default=None) + parser.add_argument("--gdb-args", help="Additional gdb arguments") parser.add_argument("--output", help="A file to redirect output to") + parser.add_argument("--stderr", help="A file to redirect stderr to") return parser.parse_args() @@ -58,6 +59,10 @@ if __name__ == '__main__': output = open(args.output, "w") else: output = None + if args.stderr: + stderr = open(args.stderr, "w") + else: + stderr = None socket_dir = TemporaryDirectory("qemu-gdbstub") socket_name = os.path.join(socket_dir.name, "gdbstub.socket") @@ -77,6 +82,8 @@ if __name__ == '__main__': # Now launch gdb with our test and collect the result gdb_cmd = "%s %s" % (args.gdb, args.binary) + if args.gdb_args: + gdb_cmd += " %s" % (args.gdb_args) # run quietly and ignore .gdbinit gdb_cmd += " -q -n -batch" # disable prompts in case of crash @@ -84,13 +91,14 @@ if __name__ == '__main__': # connect to remote gdb_cmd += " -ex 'target remote %s'" % (socket_name) # finally the test script itself - gdb_cmd += " -x %s" % (args.test) + if args.test: + gdb_cmd += " -x %s" % (args.test) sleep(1) log(output, "GDB CMD: %s" % (gdb_cmd)) - result = subprocess.call(gdb_cmd, shell=True, stdout=output) + result = subprocess.call(gdb_cmd, shell=True, stdout=output, stderr=stderr) # A result of greater than 128 indicates a fatal signal (likely a # crash due to gdb internal failure). That's a problem for GDB and diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target index 7c94db4bc4..890cceed5d 100644 --- a/tests/tcg/hexagon/Makefile.target +++ b/tests/tcg/hexagon/Makefile.target @@ -45,10 +45,18 @@ HEX_TESTS += fpstuff HEX_TESTS += overflow HEX_TESTS += signal_context HEX_TESTS += reg_mut +HEX_TESTS += read_write_overlap HEX_TESTS += vector_add_int HEX_TESTS += scatter_gather HEX_TESTS += hvx_misc HEX_TESTS += hvx_histogram +HEX_TESTS += invalid-slots + +run-and-check-exception = $(call run-test,$2,$3 2>$2.stderr; \ + test $$? -eq 1 && grep -q "exception $(strip $1)" $2.stderr) + +run-invalid-slots: invalid-slots + $(call run-and-check-exception, 0x15, $@, $(QEMU) $(QEMU_OPTS) $<) HEX_TESTS += test_abs HEX_TESTS += test_bitcnt @@ -76,17 +84,30 @@ HEX_TESTS += test_vminh HEX_TESTS += test_vpmpyh HEX_TESTS += test_vspliceb +HEX_TESTS += v68_scalar +HEX_TESTS += v68_hvx +HEX_TESTS += v69_hvx +HEX_TESTS += v73_scalar + TESTS += $(HEX_TESTS) # This test has to be compiled for the -mv67t target usr: usr.c $(CC) $(CFLAGS) -mv67t -O2 -Wno-inline-asm -Wno-expansion-to-defined $< -o $@ $(LDFLAGS) +# Build this test with -mv71 to exercise the CABAC instruction +misc: misc.c + $(CC) $(CFLAGS) -mv71 -O2 $< -o $@ $(LDFLAGS) scatter_gather: CFLAGS += -mhvx vector_add_int: CFLAGS += -mhvx -fvectorize hvx_misc: hvx_misc.c hvx_misc.h hvx_misc: CFLAGS += -mhvx hvx_histogram: CFLAGS += -mhvx -Wno-gnu-folding-constant +v68_hvx: v68_hvx.c hvx_misc.h v6mpy_ref.c.inc +v68_hvx: CFLAGS += -mhvx -Wno-unused-function +v69_hvx: v69_hvx.c hvx_misc.h +v69_hvx: CFLAGS += -mhvx -Wno-unused-function +v73_scalar: CFLAGS += -Wno-unused-function hvx_histogram: hvx_histogram.c hvx_histogram_row.S $(CC) $(CFLAGS) $(CROSS_CC_GUEST_CFLAGS) $^ -o $@ $(LDFLAGS) diff --git a/tests/tcg/hexagon/fpstuff.c b/tests/tcg/hexagon/fpstuff.c index 90ce9a6ef3..28f9397155 100644 --- a/tests/tcg/hexagon/fpstuff.c +++ b/tests/tcg/hexagon/fpstuff.c @@ -20,6 +20,7 @@ */ #include +#include const int FPINVF_BIT = 1; /* Invalid */ const int FPINVF = 1 << FPINVF_BIT; @@ -706,6 +707,57 @@ static void check_float2int_convs() check_fpstatus(usr, FPINVF); } +static void check_float_consts(void) +{ + int res32; + unsigned long long res64; + + asm("%0 = sfmake(#%1):neg\n\t" : "=r"(res32) : "i"(0xf)); + check32(res32, 0xbc9e0000); + + asm("%0 = sfmake(#%1):pos\n\t" : "=r"(res32) : "i"(0xf)); + check32(res32, 0x3c9e0000); + + asm("%0 = dfmake(#%1):neg\n\t" : "=r"(res64) : "i"(0xf)); + check64(res64, 0xbf93c00000000000ULL); + + asm("%0 = dfmake(#%1):pos\n\t" : "=r"(res64) : "i"(0xf)); + check64(res64, 0x3f93c00000000000ULL); +} + +static inline unsigned long long dfmpyll(double x, double y) +{ + unsigned long long res64; + asm("%0 = dfmpyll(%1, %2)" : "=r"(res64) : "r"(x), "r"(y)); + return res64; +} + +static inline unsigned long long dfmpylh(double acc, double x, double y) +{ + unsigned long long res64 = *(unsigned long long *)&acc; + asm("%0 += dfmpylh(%1, %2)" : "+r"(res64) : "r"(x), "r"(y)); + return res64; +} + +static void check_dfmpyxx(void) +{ + unsigned long long res64; + + res64 = dfmpyll(DBL_MIN, DBL_MIN); + check64(res64, 0ULL); + res64 = dfmpyll(-1.0, DBL_MIN); + check64(res64, 0ULL); + res64 = dfmpyll(DBL_MAX, DBL_MAX); + check64(res64, 0x1fffffffdULL); + + res64 = dfmpylh(DBL_MIN, DBL_MIN, DBL_MIN); + check64(res64, 0x10000000000000ULL); + res64 = dfmpylh(-1.0, DBL_MAX, DBL_MIN); + check64(res64, 0xc00fffffffe00000ULL); + res64 = dfmpylh(DBL_MAX, 0.0, -1.0); + check64(res64, 0x7fefffffffffffffULL); +} + int main() { check_compare_exception(); @@ -718,6 +770,8 @@ int main() check_sffixupd(); check_sffms(); check_float2int_convs(); + check_float_consts(); + check_dfmpyxx(); puts(err ? "FAIL" : "PASS"); return err ? 1 : 0; diff --git a/tests/tcg/hexagon/hvx_misc.c b/tests/tcg/hexagon/hvx_misc.c index d0e64e035f..09dec8d7a1 100644 --- a/tests/tcg/hexagon/hvx_misc.c +++ b/tests/tcg/hexagon/hvx_misc.c @@ -342,49 +342,6 @@ static void test_vsubuwsat_dv(void) check_output_w(__LINE__, 2); } -static void test_vshuff(void) -{ - /* Test that vshuff works when the two operands are the same register */ - const uint32_t splat = 0x089be55c; - const uint32_t shuff = 0x454fa926; - MMVector v0, v1; - - memset(expect, 0x12, sizeof(MMVector)); - memset(output, 0x34, sizeof(MMVector)); - - asm volatile("v25 = vsplat(%0)\n\t" - "vshuff(v25, v25, %1)\n\t" - "vmem(%2 + #0) = v25\n\t" - : /* no outputs */ - : "r"(splat), "r"(shuff), "r"(output) - : "v25", "memory"); - - /* - * The semantics of Hexagon are the operands are pass-by-value, so create - * two copies of the vsplat result. - */ - for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; i++) { - v0.uw[i] = splat; - v1.uw[i] = splat; - } - /* Do the vshuff operation */ - for (int offset = 1; offset < MAX_VEC_SIZE_BYTES; offset <<= 1) { - if (shuff & offset) { - for (int k = 0; k < MAX_VEC_SIZE_BYTES; k++) { - if (!(k & offset)) { - uint8_t tmp = v0.ub[k]; - v0.ub[k] = v1.ub[k + offset]; - v1.ub[k + offset] = tmp; - } - } - } - } - /* Put the result in the expect buffer for verification */ - expect[0] = v1; - - check_output_b(__LINE__, 1); -} - static void test_load_tmp_predicated(void) { void *p0 = buffer0; @@ -454,6 +411,25 @@ static void test_load_cur_predicated(void) check_output_w(__LINE__, BUFSIZE); } +static void test_vcombine(void) +{ + for (int i = 0; i < BUFSIZE / 2; i++) { + asm volatile("v2 = vsplat(%0)\n\t" + "v3 = vsplat(%1)\n\t" + "v3:2 = vcombine(v2, v3)\n\t" + "vmem(%2+#0) = v2\n\t" + "vmem(%2+#1) = v3\n\t" + : + : "r"(2 * i), "r"(2 * i + 1), "r"(&output[2 * i]) + : "v2", "v3", "memory"); + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + expect[2 * i].w[j] = 2 * i + 1; + expect[2 * i + 1].w[j] = 2 * i; + } + } + check_output_w(__LINE__, BUFSIZE); +} + int main() { init_buffers(); @@ -489,11 +465,11 @@ int main() test_vadduwsat(); test_vsubuwsat_dv(); - test_vshuff(); - test_load_tmp_predicated(); test_load_cur_predicated(); + test_vcombine(); + puts(err ? "FAIL" : "PASS"); return err ? 1 : 0; } diff --git a/tests/tcg/hexagon/invalid-slots.c b/tests/tcg/hexagon/invalid-slots.c new file mode 100644 index 0000000000..366ce4f42f --- /dev/null +++ b/tests/tcg/hexagon/invalid-slots.c @@ -0,0 +1,29 @@ +/* + * Copyright(c) 2023 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +char mem[8] __attribute__((aligned(8))); + +int main() +{ + asm volatile( + "r0 = #mem\n" + /* Invalid packet (2 instructions at slot 0): */ + ".word 0xa1804100\n" /* { memw(r0) = r1; */ + ".word 0x28032804\n" /* r3 = #0; r4 = #0 } */ + : : : "r0", "r3", "r4", "memory"); + return 0; +} diff --git a/tests/tcg/hexagon/misc.c b/tests/tcg/hexagon/misc.c index e126751e3a..cfdda3fd09 100644 --- a/tests/tcg/hexagon/misc.c +++ b/tests/tcg/hexagon/misc.c @@ -18,6 +18,8 @@ #include #include +#define CORE_HAS_CABAC (__HEXAGON_ARCH__ <= 71) + typedef unsigned char uint8_t; typedef unsigned short uint16_t; typedef unsigned int uint32_t; @@ -245,6 +247,7 @@ static void check(int val, int expect) } } +#if CORE_HAS_CABAC static void check64(long long val, long long expect) { if (val != expect) { @@ -252,6 +255,7 @@ static void check64(long long val, long long expect) err++; } } +#endif uint32_t init[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; uint32_t array[10]; @@ -286,6 +290,7 @@ static long long creg_pair(int x, int y) return retval; } +#if CORE_HAS_CABAC static long long decbin(long long x, long long y, int *pred) { long long retval; @@ -295,6 +300,7 @@ static long long decbin(long long x, long long y, int *pred) : "r"(x), "r"(y)); return retval; } +#endif /* Check that predicates are auto-and'ed in a packet */ static int auto_and(void) @@ -385,11 +391,46 @@ void test_count_trailing_zeros_ones(void) check(ct1p(0xffffff0fffffffffULL), 36); } +static inline int dpmpyss_rnd_s0(int x, int y) +{ + int res; + asm("%0 = mpy(%1, %2):rnd\n\t" : "=r"(res) : "r"(x), "r"(y)); + return res; +} + +void test_dpmpyss_rnd_s0(void) +{ + check(dpmpyss_rnd_s0(-1, 0x80000000), 1); + check(dpmpyss_rnd_s0(0, 0x80000000), 0); + check(dpmpyss_rnd_s0(1, 0x80000000), 0); + check(dpmpyss_rnd_s0(0x7fffffff, 0x80000000), 0xc0000001); + check(dpmpyss_rnd_s0(0x80000000, -1), 1); + check(dpmpyss_rnd_s0(-1, -1), 0); + check(dpmpyss_rnd_s0(0, -1), 0); + check(dpmpyss_rnd_s0(1, -1), 0); + check(dpmpyss_rnd_s0(0x7fffffff, -1), 0); + check(dpmpyss_rnd_s0(0x80000000, 0), 0); + check(dpmpyss_rnd_s0(-1, 0), 0); + check(dpmpyss_rnd_s0(0, 0), 0); + check(dpmpyss_rnd_s0(1, 0), 0); + check(dpmpyss_rnd_s0(-1, -1), 0); + check(dpmpyss_rnd_s0(0, -1), 0); + check(dpmpyss_rnd_s0(1, -1), 0); + check(dpmpyss_rnd_s0(0x7fffffff, 1), 0); + check(dpmpyss_rnd_s0(0x80000000, 0x7fffffff), 0xc0000001); + check(dpmpyss_rnd_s0(-1, 0x7fffffff), 0); + check(dpmpyss_rnd_s0(0, 0x7fffffff), 0); + check(dpmpyss_rnd_s0(1, 0x7fffffff), 0); + check(dpmpyss_rnd_s0(0x7fffffff, 0x7fffffff), 0x3fffffff); +} + int main() { int res; +#if CORE_HAS_CABAC long long res64; int pred; +#endif memcpy(array, init, sizeof(array)); S4_storerhnew_rr(array, 4, 0xffff); @@ -505,6 +546,7 @@ int main() res = test_clrtnew(2, 7); check(res, 7); +#if CORE_HAS_CABAC res64 = decbin(0xf0f1f2f3f4f5f6f7LL, 0x7f6f5f4f3f2f1f0fLL, &pred); check64(res64, 0x357980003700010cLL); check(pred, 0); @@ -512,6 +554,9 @@ int main() res64 = decbin(0xfLL, 0x1bLL, &pred); check64(res64, 0x78000100LL); check(pred, 1); +#else + puts("Skipping cabac tests"); +#endif res = auto_and(); check(res, 0); @@ -522,6 +567,8 @@ int main() test_count_trailing_zeros_ones(); + test_dpmpyss_rnd_s0(); + puts(err ? "FAIL" : "PASS"); return err; } diff --git a/tests/tcg/hexagon/read_write_overlap.c b/tests/tcg/hexagon/read_write_overlap.c new file mode 100644 index 0000000000..a75fc11dc4 --- /dev/null +++ b/tests/tcg/hexagon/read_write_overlap.c @@ -0,0 +1,136 @@ +/* + * Copyright(c) 2023 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +/* + * Test instructions where the semantics write to the destination + * before all the operand reads have been completed. + * + * These instructions are problematic when we short-circuit the + * register writes because the destination and source operands could + * be the same TCGv. + * + * We test by forcing the read and write to be register r7. + */ + +#include +#include +#include + +int err; + +static void __check(const char *filename, int line, int x, int expect) +{ + if (x != expect) { + printf("ERROR %s:%d - 0x%08x != 0x%08x\n", + filename, line, x, expect); + err++; + } +} + +#define check(x, expect) __check(__FILE__, __LINE__, (x), (expect)) + +#define insert(RES, X, WIDTH, OFFSET) \ + asm("r7 = %1\n\t" \ + "r7 = insert(r7, #" #WIDTH ", #" #OFFSET ")\n\t" \ + "%0 = r7\n\t" \ + : "=r"(RES) : "r"(X) : "r7") + +static void test_insert(void) +{ + uint32_t res; + + insert(res, 0x12345678, 8, 1); + check(res, 0x123456f0); + insert(res, 0x12345678, 0, 1); + check(res, 0x12345678); + insert(res, 0x12345678, 20, 16); + check(res, 0x56785678); +} + +static inline uint32_t insert_rp(uint32_t x, uint32_t width, uint32_t offset) +{ + uint64_t width_offset = (uint64_t)width << 32 | offset; + uint32_t res; + asm("r7 = %1\n\t" + "r7 = insert(r7, %2)\n\t" + "%0 = r7\n\t" + : "=r"(res) : "r"(x), "r"(width_offset) : "r7"); + return res; + +} + +static void test_insert_rp(void) +{ + check(insert_rp(0x12345678, 8, 1), 0x123456f0); + check(insert_rp(0x12345678, 63, 8), 0x34567878); + check(insert_rp(0x12345678, 127, 8), 0x34567878); + check(insert_rp(0x12345678, 8, 24), 0x78345678); + check(insert_rp(0x12345678, 8, 63), 0x12345678); + check(insert_rp(0x12345678, 8, 64), 0x00000000); +} + +static inline uint32_t asr_r_svw_trun(uint64_t x, uint32_t y) +{ + uint32_t res; + asm("r7 = %2\n\t" + "r7 = vasrw(%1, r7)\n\t" + "%0 = r7\n\t" + : "=r"(res) : "r"(x), "r"(y) : "r7"); + return res; +} + +static void test_asr_r_svw_trun(void) +{ + check(asr_r_svw_trun(0x1111111122222222ULL, 5), + 0x88881111); + check(asr_r_svw_trun(0x1111111122222222ULL, 63), + 0x00000000); + check(asr_r_svw_trun(0x1111111122222222ULL, 64), + 0x00000000); + check(asr_r_svw_trun(0x1111111122222222ULL, 127), + 0x22224444); + check(asr_r_svw_trun(0x1111111122222222ULL, 128), + 0x11112222); + check(asr_r_svw_trun(0xffffffff22222222ULL, 128), + 0xffff2222); +} + +static inline uint32_t swiz(uint32_t x) +{ + uint32_t res; + asm("r7 = %1\n\t" + "r7 = swiz(r7)\n\t" + "%0 = r7\n\t" + : "=r"(res) : "r"(x) : "r7"); + return res; +} + +static void test_swiz(void) +{ + check(swiz(0x11223344), 0x44332211); +} + +int main() +{ + test_insert(); + test_insert_rp(); + test_asr_r_svw_trun(); + test_swiz(); + + puts(err ? "FAIL" : "PASS"); + return err ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/tests/tcg/hexagon/v68_hvx.c b/tests/tcg/hexagon/v68_hvx.c new file mode 100644 index 0000000000..02718722a3 --- /dev/null +++ b/tests/tcg/hexagon/v68_hvx.c @@ -0,0 +1,90 @@ +/* + * Copyright(c) 2022-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +int err; + +#include "hvx_misc.h" + +MMVector v6mpy_buffer0[BUFSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES))); +MMVector v6mpy_buffer1[BUFSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES))); + +static void init_v6mpy_buffers(void) +{ + int counter0 = 0; + int counter1 = 17; + for (int i = 0; i < BUFSIZE; i++) { + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + v6mpy_buffer0[i].w[j] = counter0++; + v6mpy_buffer1[i].w[j] = counter1++; + } + } +} + +int v6mpy_ref[BUFSIZE][MAX_VEC_SIZE_BYTES / 4] = { +#include "v6mpy_ref.c.inc" +}; + +static void test_v6mpy(void) +{ + void *p00 = buffer0; + void *p01 = v6mpy_buffer0; + void *p10 = buffer1; + void *p11 = v6mpy_buffer1; + void *pout = output; + + memset(expect, 0xff, sizeof(expect)); + memset(output, 0xff, sizeof(expect)); + + for (int i = 0; i < BUFSIZE; i++) { + asm("v2 = vmem(%0 + #0)\n\t" + "v3 = vmem(%1 + #0)\n\t" + "v4 = vmem(%2 + #0)\n\t" + "v5 = vmem(%3 + #0)\n\t" + "v5:4.w = v6mpy(v5:4.ub, v3:2.b, #1):v\n\t" + "vmem(%4 + #0) = v4\n\t" + : : "r"(p00), "r"(p01), "r"(p10), "r"(p11), "r"(pout) + : "v2", "v3", "v4", "v5", "memory"); + p00 += sizeof(MMVector); + p01 += sizeof(MMVector); + p10 += sizeof(MMVector); + p11 += sizeof(MMVector); + pout += sizeof(MMVector); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + expect[i].w[j] = v6mpy_ref[i][j]; + } + } + + check_output_w(__LINE__, BUFSIZE); +} + +int main() +{ + init_buffers(); + init_v6mpy_buffers(); + + test_v6mpy(); + + puts(err ? "FAIL" : "PASS"); + return err ? 1 : 0; +} diff --git a/tests/tcg/hexagon/v68_scalar.c b/tests/tcg/hexagon/v68_scalar.c new file mode 100644 index 0000000000..7a8adb1130 --- /dev/null +++ b/tests/tcg/hexagon/v68_scalar.c @@ -0,0 +1,186 @@ +/* + * Copyright(c) 2023 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include +#include +#include + +/* + * Test the scalar core instructions that are new in v68 + */ + +int err; + +static int buffer32[] = { 1, 2, 3, 4 }; +static long long buffer64[] = { 5, 6, 7, 8 }; + +static void __check32(int line, uint32_t result, uint32_t expect) +{ + if (result != expect) { + printf("ERROR at line %d: 0x%08x != 0x%08x\n", + line, result, expect); + err++; + } +} + +#define check32(RES, EXP) __check32(__LINE__, RES, EXP) + +static void __check64(int line, uint64_t result, uint64_t expect) +{ + if (result != expect) { + printf("ERROR at line %d: 0x%016llx != 0x%016llx\n", + line, result, expect); + err++; + } +} + +#define check64(RES, EXP) __check64(__LINE__, RES, EXP) + +static inline int loadw_aq(int *p) +{ + int res; + asm volatile("%0 = memw_aq(%1)\n\t" + : "=r"(res) : "r"(p)); + return res; +} + +static void test_loadw_aq(void) +{ + int res; + + res = loadw_aq(&buffer32[0]); + check32(res, 1); + res = loadw_aq(&buffer32[1]); + check32(res, 2); +} + +static inline long long loadd_aq(long long *p) +{ + long long res; + asm volatile("%0 = memd_aq(%1)\n\t" + : "=r"(res) : "r"(p)); + return res; +} + +static void test_loadd_aq(void) +{ + long long res; + + res = loadd_aq(&buffer64[2]); + check64(res, 7); + res = loadd_aq(&buffer64[3]); + check64(res, 8); +} + +static inline void release_at(int *p) +{ + asm volatile("release(%0):at\n\t" + : : "r"(p)); +} + +static void test_release_at(void) +{ + release_at(&buffer32[2]); + check64(buffer32[2], 3); + release_at(&buffer32[3]); + check64(buffer32[3], 4); +} + +static inline void release_st(int *p) +{ + asm volatile("release(%0):st\n\t" + : : "r"(p)); +} + +static void test_release_st(void) +{ + release_st(&buffer32[2]); + check64(buffer32[2], 3); + release_st(&buffer32[3]); + check64(buffer32[3], 4); +} + +static inline void storew_rl_at(int *p, int val) +{ + asm volatile("memw_rl(%0):at = %1\n\t" + : : "r"(p), "r"(val) : "memory"); +} + +static void test_storew_rl_at(void) +{ + storew_rl_at(&buffer32[2], 9); + check64(buffer32[2], 9); + storew_rl_at(&buffer32[3], 10); + check64(buffer32[3], 10); +} + +static inline void stored_rl_at(long long *p, long long val) +{ + asm volatile("memd_rl(%0):at = %1\n\t" + : : "r"(p), "r"(val) : "memory"); +} + +static void test_stored_rl_at(void) +{ + stored_rl_at(&buffer64[2], 11); + check64(buffer64[2], 11); + stored_rl_at(&buffer64[3], 12); + check64(buffer64[3], 12); +} + +static inline void storew_rl_st(int *p, int val) +{ + asm volatile("memw_rl(%0):st = %1\n\t" + : : "r"(p), "r"(val) : "memory"); +} + +static void test_storew_rl_st(void) +{ + storew_rl_st(&buffer32[0], 13); + check64(buffer32[0], 13); + storew_rl_st(&buffer32[1], 14); + check64(buffer32[1], 14); +} + +static inline void stored_rl_st(long long *p, long long val) +{ + asm volatile("memd_rl(%0):st = %1\n\t" + : : "r"(p), "r"(val) : "memory"); +} + +static void test_stored_rl_st(void) +{ + stored_rl_st(&buffer64[0], 15); + check64(buffer64[0], 15); + stored_rl_st(&buffer64[1], 15); + check64(buffer64[1], 15); +} + +int main() +{ + test_loadw_aq(); + test_loadd_aq(); + test_release_at(); + test_release_st(); + test_storew_rl_at(); + test_stored_rl_at(); + test_storew_rl_st(); + test_stored_rl_st(); + + puts(err ? "FAIL" : "PASS"); + return err ? 1 : 0; +} diff --git a/tests/tcg/hexagon/v69_hvx.c b/tests/tcg/hexagon/v69_hvx.c new file mode 100644 index 0000000000..a0d567d142 --- /dev/null +++ b/tests/tcg/hexagon/v69_hvx.c @@ -0,0 +1,318 @@ +/* + * Copyright(c) 2023 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +int err; + +#include "hvx_misc.h" + +#define fVROUND(VAL, SHAMT) \ + ((VAL) + (((SHAMT) > 0) ? (1LL << ((SHAMT) - 1)) : 0)) + +#define fVSATUB(VAL) \ + ((((VAL) & 0xffLL) == (VAL)) ? \ + (VAL) : \ + ((((int32_t)(VAL)) < 0) ? 0 : 0xff)) + +#define fVSATUH(VAL) \ + ((((VAL) & 0xffffLL) == (VAL)) ? \ + (VAL) : \ + ((((int32_t)(VAL)) < 0) ? 0 : 0xffff)) + +static void test_vasrvuhubrndsat(void) +{ + void *p0 = buffer0; + void *p1 = buffer1; + void *pout = output; + + memset(expect, 0xaa, sizeof(expect)); + memset(output, 0xbb, sizeof(output)); + + for (int i = 0; i < BUFSIZE / 2; i++) { + asm("v4 = vmem(%0 + #0)\n\t" + "v5 = vmem(%0 + #1)\n\t" + "v6 = vmem(%1 + #0)\n\t" + "v5.ub = vasr(v5:4.uh, v6.ub):rnd:sat\n\t" + "vmem(%2) = v5\n\t" + : : "r"(p0), "r"(p1), "r"(pout) + : "v4", "v5", "v6", "memory"); + p0 += sizeof(MMVector) * 2; + p1 += sizeof(MMVector); + pout += sizeof(MMVector); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 2; j++) { + int shamt; + uint8_t byte0; + uint8_t byte1; + + shamt = buffer1[i].ub[2 * j + 0] & 0x7; + byte0 = fVSATUB(fVROUND(buffer0[2 * i + 0].uh[j], shamt) >> shamt); + shamt = buffer1[i].ub[2 * j + 1] & 0x7; + byte1 = fVSATUB(fVROUND(buffer0[2 * i + 1].uh[j], shamt) >> shamt); + expect[i].uh[j] = (byte1 << 8) | (byte0 & 0xff); + } + } + + check_output_h(__LINE__, BUFSIZE / 2); +} + +static void test_vasrvuhubsat(void) +{ + void *p0 = buffer0; + void *p1 = buffer1; + void *pout = output; + + memset(expect, 0xaa, sizeof(expect)); + memset(output, 0xbb, sizeof(output)); + + for (int i = 0; i < BUFSIZE / 2; i++) { + asm("v4 = vmem(%0 + #0)\n\t" + "v5 = vmem(%0 + #1)\n\t" + "v6 = vmem(%1 + #0)\n\t" + "v5.ub = vasr(v5:4.uh, v6.ub):sat\n\t" + "vmem(%2) = v5\n\t" + : : "r"(p0), "r"(p1), "r"(pout) + : "v4", "v5", "v6", "memory"); + p0 += sizeof(MMVector) * 2; + p1 += sizeof(MMVector); + pout += sizeof(MMVector); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 2; j++) { + int shamt; + uint8_t byte0; + uint8_t byte1; + + shamt = buffer1[i].ub[2 * j + 0] & 0x7; + byte0 = fVSATUB(buffer0[2 * i + 0].uh[j] >> shamt); + shamt = buffer1[i].ub[2 * j + 1] & 0x7; + byte1 = fVSATUB(buffer0[2 * i + 1].uh[j] >> shamt); + expect[i].uh[j] = (byte1 << 8) | (byte0 & 0xff); + } + } + + check_output_h(__LINE__, BUFSIZE / 2); +} + +static void test_vasrvwuhrndsat(void) +{ + void *p0 = buffer0; + void *p1 = buffer1; + void *pout = output; + + memset(expect, 0xaa, sizeof(expect)); + memset(output, 0xbb, sizeof(output)); + + for (int i = 0; i < BUFSIZE / 2; i++) { + asm("v4 = vmem(%0 + #0)\n\t" + "v5 = vmem(%0 + #1)\n\t" + "v6 = vmem(%1 + #0)\n\t" + "v5.uh = vasr(v5:4.w, v6.uh):rnd:sat\n\t" + "vmem(%2) = v5\n\t" + : : "r"(p0), "r"(p1), "r"(pout) + : "v4", "v5", "v6", "memory"); + p0 += sizeof(MMVector) * 2; + p1 += sizeof(MMVector); + pout += sizeof(MMVector); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + int shamt; + uint16_t half0; + uint16_t half1; + + shamt = buffer1[i].uh[2 * j + 0] & 0xf; + half0 = fVSATUH(fVROUND(buffer0[2 * i + 0].w[j], shamt) >> shamt); + shamt = buffer1[i].uh[2 * j + 1] & 0xf; + half1 = fVSATUH(fVROUND(buffer0[2 * i + 1].w[j], shamt) >> shamt); + expect[i].w[j] = (half1 << 16) | (half0 & 0xffff); + } + } + + check_output_w(__LINE__, BUFSIZE / 2); +} + +static void test_vasrvwuhsat(void) +{ + void *p0 = buffer0; + void *p1 = buffer1; + void *pout = output; + + memset(expect, 0xaa, sizeof(expect)); + memset(output, 0xbb, sizeof(output)); + + for (int i = 0; i < BUFSIZE / 2; i++) { + asm("v4 = vmem(%0 + #0)\n\t" + "v5 = vmem(%0 + #1)\n\t" + "v6 = vmem(%1 + #0)\n\t" + "v5.uh = vasr(v5:4.w, v6.uh):sat\n\t" + "vmem(%2) = v5\n\t" + : : "r"(p0), "r"(p1), "r"(pout) + : "v4", "v5", "v6", "memory"); + p0 += sizeof(MMVector) * 2; + p1 += sizeof(MMVector); + pout += sizeof(MMVector); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + int shamt; + uint16_t half0; + uint16_t half1; + + shamt = buffer1[i].uh[2 * j + 0] & 0xf; + half0 = fVSATUH(buffer0[2 * i + 0].w[j] >> shamt); + shamt = buffer1[i].uh[2 * j + 1] & 0xf; + half1 = fVSATUH(buffer0[2 * i + 1].w[j] >> shamt); + expect[i].w[j] = (half1 << 16) | (half0 & 0xffff); + } + } + + check_output_w(__LINE__, BUFSIZE / 2); +} + +static void test_vassign_tmp(void) +{ + void *p0 = buffer0; + void *pout = output; + + memset(expect, 0xaa, sizeof(expect)); + memset(output, 0xbb, sizeof(output)); + + for (int i = 0; i < BUFSIZE; i++) { + /* + * Assign into v12 as .tmp, then use it in the next packet + * Should get the new value within the same packet and + * the old value in the next packet + */ + asm("v3 = vmem(%0 + #0)\n\t" + "r1 = #1\n\t" + "v12 = vsplat(r1)\n\t" + "r1 = #2\n\t" + "v13 = vsplat(r1)\n\t" + "{\n\t" + " v12.tmp = v13\n\t" + " v4.w = vadd(v12.w, v3.w)\n\t" + "}\n\t" + "v4.w = vadd(v4.w, v12.w)\n\t" + "vmem(%1 + #0) = v4\n\t" + : : "r"(p0), "r"(pout) + : "r1", "v3", "v4", "v12", "v13", "memory"); + p0 += sizeof(MMVector); + pout += sizeof(MMVector); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + expect[i].w[j] = buffer0[i].w[j] + 3; + } + } + + check_output_w(__LINE__, BUFSIZE); +} + +static void test_vcombine_tmp(void) +{ + void *p0 = buffer0; + void *p1 = buffer1; + void *pout = output; + + memset(expect, 0xaa, sizeof(expect)); + memset(output, 0xbb, sizeof(output)); + + for (int i = 0; i < BUFSIZE; i++) { + /* + * Combine into v13:12 as .tmp, then use it in the next packet + * Should get the new value within the same packet and + * the old value in the next packet + */ + asm("v3 = vmem(%0 + #0)\n\t" + "r1 = #1\n\t" + "v12 = vsplat(r1)\n\t" + "r1 = #2\n\t" + "v13 = vsplat(r1)\n\t" + "r1 = #3\n\t" + "v14 = vsplat(r1)\n\t" + "r1 = #4\n\t" + "v15 = vsplat(r1)\n\t" + "{\n\t" + " v13:12.tmp = vcombine(v15, v14)\n\t" + " v4.w = vadd(v12.w, v3.w)\n\t" + " v16 = v13\n\t" + "}\n\t" + "v4.w = vadd(v4.w, v12.w)\n\t" + "v4.w = vadd(v4.w, v13.w)\n\t" + "v4.w = vadd(v4.w, v16.w)\n\t" + "vmem(%2 + #0) = v4\n\t" + : : "r"(p0), "r"(p1), "r"(pout) + : "r1", "v3", "v4", "v12", "v13", "v14", "v15", "v16", "memory"); + p0 += sizeof(MMVector); + p1 += sizeof(MMVector); + pout += sizeof(MMVector); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + expect[i].w[j] = buffer0[i].w[j] + 10; + } + } + + check_output_w(__LINE__, BUFSIZE); +} + +static void test_vmpyuhvs(void) +{ + void *p0 = buffer0; + void *p1 = buffer1; + void *pout = output; + + memset(expect, 0xaa, sizeof(expect)); + memset(output, 0xbb, sizeof(output)); + + for (int i = 0; i < BUFSIZE; i++) { + asm("v4 = vmem(%0 + #0)\n\t" + "v5 = vmem(%1 + #0)\n\t" + "v4.uh = vmpy(V4.uh, v5.uh):>>16\n\t" + "vmem(%2) = v4\n\t" + : : "r"(p0), "r"(p1), "r"(pout) + : "v4", "v5", "memory"); + p0 += sizeof(MMVector); + p1 += sizeof(MMVector); + pout += sizeof(MMVector); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 2; j++) { + expect[i].uh[j] = (buffer0[i].uh[j] * buffer1[i].uh[j]) >> 16; + } + } + + check_output_h(__LINE__, BUFSIZE); +} + +int main() +{ + init_buffers(); + + test_vasrvuhubrndsat(); + test_vasrvuhubsat(); + test_vasrvwuhrndsat(); + test_vasrvwuhsat(); + + test_vassign_tmp(); + test_vcombine_tmp(); + + test_vmpyuhvs(); + + puts(err ? "FAIL" : "PASS"); + return err ? 1 : 0; +} diff --git a/tests/tcg/hexagon/v6mpy_ref.c.inc b/tests/tcg/hexagon/v6mpy_ref.c.inc new file mode 100644 index 0000000000..8258cddcb1 --- /dev/null +++ b/tests/tcg/hexagon/v6mpy_ref.c.inc @@ -0,0 +1,161 @@ +/* + * Copyright(c) 2021-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +{ 0xffffee11, 0xfffffcca, 0xffffc1b3, 0xffffd0cc, + 0xffffe215, 0xfffff58e, 0xffffaf37, 0xffffc310, + 0xffffd919, 0xfffff152, 0xffff9fbb, 0xffffb854, + 0xffffd31d, 0xfffff016, 0xffff933f, 0xffffb098, + 0xffffd021, 0xfffff1da, 0xffff89c3, 0xffffabdc, + 0xffffd025, 0xfffff69e, 0xffff8347, 0xffffaa20, + 0xffffd329, 0xfffffe62, 0xffff7fcb, 0xffffab64, + 0xffffd92d, 0x00000926, 0xffff7f4f, 0xffffafa8, + }, +{ 0xffffe231, 0x000016ea, 0xffff81d3, 0xffffb6ec, + 0xffffee35, 0x000027ae, 0xffff8757, 0xffffc130, + 0xfffffd39, 0x00003b72, 0xffff8fdb, 0xffffce74, + 0x00000f3d, 0x00005236, 0xffff9b5f, 0xffffdeb8, + 0x00002441, 0x00006bfa, 0xffffa9e3, 0xfffff1fc, + 0x00003c45, 0x000088be, 0xffffbb67, 0x00000840, + 0x00005749, 0x0000a882, 0xffffcfeb, 0xffffe684, + 0x0000494d, 0x00009a46, 0xffffb16f, 0x000002c8, + }, +{ 0xfffff351, 0x0000440a, 0xffff4af3, 0xffff9c0c, + 0xffffef55, 0x000044ce, 0xffff4077, 0xffff9650, + 0xffffee59, 0x00004892, 0xffff38fb, 0xffff9394, + 0xfffff05d, 0x00004f56, 0xffff347f, 0xffff93d8, + 0xfffff561, 0x0000591a, 0xffff3303, 0xffff971c, + 0xfffffd65, 0x000065de, 0xffff3487, 0xffff9d60, + 0x00000869, 0x000075a2, 0xffff390b, 0xffffa6a4, + 0x0000166d, 0x00008866, 0xffff408f, 0xffffb2e8, + }, +{ 0x00002771, 0x00009e2a, 0xffff4b13, 0xffffc22c, + 0x00003b75, 0x0000b6ee, 0xffff5897, 0xffffd470, + 0x00005279, 0x0000d2b2, 0xffff691b, 0xffffe9b4, + 0x00006c7d, 0x0000f176, 0xffff7c9f, 0x000001f8, + 0x00008981, 0x0001133a, 0xffff9323, 0x00001d3c, + 0x0000a985, 0x000137fe, 0xffffaca7, 0x00003b80, + 0x0000cc89, 0x00015fc2, 0xffffc92b, 0xffffe1c4, + 0x0000868d, 0x00011986, 0xffff72af, 0x00000608, + }, +{ 0xfffff891, 0x00008b4a, 0xfffed433, 0xffff674c, + 0xfffffc95, 0x0000940e, 0xfffed1b7, 0xffff6990, + 0x00000399, 0x00009fd2, 0xfffed23b, 0xffff6ed4, + 0x00000d9d, 0x0000ae96, 0xfffed5bf, 0xffff7718, + 0x00001aa1, 0x0000c05a, 0xfffedc43, 0xffff825c, + 0x00002aa5, 0x0000d51e, 0xfffee5c7, 0xffff90a0, + 0x00003da9, 0x0000ece2, 0xfffef24b, 0xffffa1e4, + 0x000053ad, 0x000107a6, 0xffff01cf, 0xffffb628, + }, +{ 0x00006cb1, 0x0001256a, 0xffff1453, 0xffffcd6c, + 0x000088b5, 0x0001462e, 0xffff29d7, 0xffffe7b0, + 0x0000a7b9, 0x000169f2, 0xffff425b, 0x000004f4, + 0x0000c9bd, 0x000190b6, 0xffff5ddf, 0x00002538, + 0x0000eec1, 0x0001ba7a, 0xffff7c63, 0x0000487c, + 0x000116c5, 0x0001e73e, 0xffff9de7, 0x00006ec0, + 0x000141c9, 0x00021702, 0xffffc26b, 0xffffdd04, + 0x0000c3cd, 0x000198c6, 0xffff33ef, 0x00000948, + }, +{ 0xfffffdd1, 0x0000d28a, 0xfffe5d73, 0xffff328c, + 0x000009d5, 0x0000e34e, 0xfffe62f7, 0xffff3cd0, + 0x000018d9, 0x0000f712, 0xfffe6b7b, 0xffff4a14, + 0x00002add, 0x00010dd6, 0xfffe76ff, 0xffff5a58, + 0x00003fe1, 0x0001279a, 0xfffe8583, 0xffff6d9c, + 0x000057e5, 0x0001445e, 0xfffe9707, 0xffff83e0, + 0x000072e9, 0x00016422, 0xfffeab8b, 0xffff9d24, + 0x000090ed, 0x000186e6, 0xfffec30f, 0xffffb968, + }, +{ 0x0000b1f1, 0x0001acaa, 0xfffedd93, 0xffffd8ac, + 0x0000d5f5, 0x0001d56e, 0xfffefb17, 0xfffffaf0, + 0x0000fcf9, 0x00020132, 0xffff1b9b, 0x00002034, + 0x000126fd, 0x00022ff6, 0xffff3f1f, 0x00008b36, + 0x000093c3, 0x00009d80, 0x00009d6d, 0x0000a78a, + 0x0000b4d7, 0x0000c354, 0x0000b801, 0x0000c6de, + 0x0000d4eb, 0x0000e828, 0x0000d195, 0xffffea32, + 0x00000fff, 0x000022fc, 0xfffffc29, 0x00000f86, + }, +{ 0xffffee13, 0xfffffcd0, 0xffffc1bd, 0xffffd0da, + 0xffffe327, 0xfffff6a4, 0xffffb051, 0xffffc42e, + 0xffffd73b, 0xffffef78, 0xffff9de5, 0xffffb682, + 0xffffd24f, 0xffffef4c, 0xffff9279, 0xffffafd6, + 0xffffd063, 0xfffff220, 0xffff8a0d, 0xffffac2a, + 0xffffd177, 0xfffff7f4, 0xffff84a1, 0xffffab7e, + 0xffffd18b, 0xfffffcc8, 0xffff7e35, 0xffffa9d2, + 0xffffd89f, 0x0000089c, 0xffff7ec9, 0xffffaf26, + }, +{ 0xffffe2b3, 0x00001770, 0xffff825d, 0xffffb77a, + 0xffffefc7, 0x00002944, 0xffff88f1, 0xffffc2ce, + 0xfffffbdb, 0x00003a18, 0xffff8e85, 0xffffcd22, + 0x00000eef, 0x000051ec, 0xffff9b19, 0xffffde76, + 0x00002503, 0x00006cc0, 0xffffaaad, 0xfffff2ca, + 0x00003e17, 0x00008a94, 0xffffbd41, 0x00000a1e, + 0x0000562b, 0x0000a768, 0xffffced5, 0xffffe572, + 0x0000493f, 0x00009a3c, 0xffffb169, 0x000002c6, + }, +{ 0xfffff353, 0x00004410, 0xffff4afd, 0xffff9c1a, + 0xfffff067, 0x000045e4, 0xffff4191, 0xffff976e, + 0xffffec7b, 0x000046b8, 0xffff3725, 0xffff91c2, + 0xffffef8f, 0x00004e8c, 0xffff33b9, 0xffff9316, + 0xfffff5a3, 0x00005960, 0xffff334d, 0xffff976a, + 0xfffffeb7, 0x00006734, 0xffff35e1, 0xffff9ebe, + 0x000006cb, 0x00007408, 0xffff3775, 0xffffa512, + 0x000015df, 0x000087dc, 0xffff4009, 0xffffb266, + }, +{ 0x000027f3, 0x00009eb0, 0xffff4b9d, 0xffffc2ba, + 0x00003d07, 0x0000b884, 0xffff5a31, 0xffffd60e, + 0x0000511b, 0x0000d158, 0xffff67c5, 0xffffe862, + 0x00006c2f, 0x0000f12c, 0xffff7c59, 0x000001b6, + 0x00008a43, 0x00011400, 0xffff93ed, 0x00001e0a, + 0x0000ab57, 0x000139d4, 0xffffae81, 0x00003d5e, + 0x0000cb6b, 0x00015ea8, 0xffffc815, 0xffffe0b2, + 0x0000867f, 0x0001197c, 0xffff72a9, 0x00000606, + }, +{ 0xfffff893, 0x00008b50, 0xfffed43d, 0xffff675a, + 0xfffffda7, 0x00009524, 0xfffed2d1, 0xffff6aae, + 0x000001bb, 0x00009df8, 0xfffed065, 0xffff6d02, + 0x00000ccf, 0x0000adcc, 0xfffed4f9, 0xffff7656, + 0x00001ae3, 0x0000c0a0, 0xfffedc8d, 0xffff82aa, + 0x00002bf7, 0x0000d674, 0xfffee721, 0xffff91fe, + 0x00003c0b, 0x0000eb48, 0xfffef0b5, 0xffffa052, + 0x0000531f, 0x0001071c, 0xffff0149, 0xffffb5a6, + }, +{ 0x00006d33, 0x000125f0, 0xffff14dd, 0xffffcdfa, + 0x00008a47, 0x000147c4, 0xffff2b71, 0xffffe94e, + 0x0000a65b, 0x00016898, 0xffff4105, 0x000003a2, + 0x0000c96f, 0x0001906c, 0xffff5d99, 0x000024f6, + 0x0000ef83, 0x0001bb40, 0xffff7d2d, 0x0000494a, + 0x00011897, 0x0001e914, 0xffff9fc1, 0x0000709e, + 0x000140ab, 0x000215e8, 0xffffc155, 0xffffdbf2, + 0x0000c3bf, 0x000198bc, 0xffff33e9, 0x00000946, + }, +{ 0xfffffdd3, 0x0000d290, 0xfffe5d7d, 0xffff329a, + 0x00000ae7, 0x0000e464, 0xfffe6411, 0xffff3dee, + 0x000016fb, 0x0000f538, 0xfffe69a5, 0xffff4842, + 0x00002a0f, 0x00010d0c, 0xfffe7639, 0xffff5996, + 0x00004023, 0x000127e0, 0xfffe85cd, 0xffff6dea, + 0x00005937, 0x000145b4, 0xfffe9861, 0xffff853e, + 0x0000714b, 0x00016288, 0xfffea9f5, 0xffff9b92, + 0x0000905f, 0x0001865c, 0xfffec289, 0xffffb8e6, + }, +{ 0x0000b273, 0x0001ad30, 0xfffede1d, 0xffffd93a, + 0x0000d787, 0x0001d704, 0xfffefcb1, 0xfffffc8e, + 0x0000fb9b, 0x0001ffd8, 0xffff1a45, 0x00001ee2, + 0x000126af, 0x00022fac, 0xffff3ed9, 0x00008af4, + 0x00009485, 0x00009e46, 0x00009e37, 0x0000a858, + 0x0000b6a9, 0x0000c52a, 0x0000b9db, 0x0000c8bc, + 0x0000d3cd, 0x0000e70e, 0x0000d07f, 0xffffe920, + 0x00000ff1, 0x000022f2, 0xfffffc23, 0x00000f84, + }, diff --git a/tests/tcg/hexagon/v73_scalar.c b/tests/tcg/hexagon/v73_scalar.c new file mode 100644 index 0000000000..fee67fc531 --- /dev/null +++ b/tests/tcg/hexagon/v73_scalar.c @@ -0,0 +1,96 @@ +/* + * Copyright(c) 2023 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include +#include +#include + +/* + * Test the scalar core instructions that are new in v73 + */ + +int err; + +static void __check32(int line, uint32_t result, uint32_t expect) +{ + if (result != expect) { + printf("ERROR at line %d: 0x%08x != 0x%08x\n", + line, result, expect); + err++; + } +} + +#define check32(RES, EXP) __check32(__LINE__, RES, EXP) + +static void __check64(int line, uint64_t result, uint64_t expect) +{ + if (result != expect) { + printf("ERROR at line %d: 0x%016llx != 0x%016llx\n", + line, result, expect); + err++; + } +} + +#define check64(RES, EXP) __check64(__LINE__, RES, EXP) + +static bool my_func_called; + +static void my_func(void) +{ + my_func_called = true; +} + +static inline void callrh(void *func) +{ + asm volatile("callrh %0\n\t" + : : "r"(func) + /* Mark the caller-save registers as clobbered */ + : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", + "r10", "r11", "r12", "r13", "r14", "r15", "r28", + "p0", "p1", "p2", "p3"); +} + +static void test_callrh(void) +{ + my_func_called = false; + callrh(&my_func); + check32(my_func_called, true); +} + +static void test_jumprh(void) +{ + uint32_t res; + asm ("%0 = #5\n\t" + "r0 = ##1f\n\t" + "jumprh r0\n\t" + "%0 = #3\n\t" + "jump 2f\n\t" + "1:\n\t" + "%0 = #1\n\t" + "2:\n\t" + : "=r"(res) : : "r0"); + check32(res, 1); +} + +int main() +{ + test_callrh(); + test_jumprh(); + + puts(err ? "FAIL" : "PASS"); + return err ? 1 : 0; +} diff --git a/tests/tcg/multiarch/system/Makefile.softmmu-target b/tests/tcg/multiarch/system/Makefile.softmmu-target index 5f432c95f3..fe40195d39 100644 --- a/tests/tcg/multiarch/system/Makefile.softmmu-target +++ b/tests/tcg/multiarch/system/Makefile.softmmu-target @@ -27,6 +27,20 @@ run-gdbstub-memory: memory "-monitor none -display none -chardev file$(COMMA)path=$<.out$(COMMA)id=output $(QEMU_OPTS)" \ --bin $< --test $(MULTIARCH_SRC)/gdbstub/memory.py, \ softmmu gdbstub support) + +run-gdbstub-untimely-packet: hello + $(call run-test, $@, $(GDB_SCRIPT) \ + --gdb $(HAVE_GDB_BIN) \ + --gdb-args "-ex 'set debug remote 1'" \ + --output untimely-packet.gdb.out \ + --stderr untimely-packet.gdb.err \ + --qemu $(QEMU) \ + --bin $< --qargs \ + "-monitor none -display none -chardev file$(COMMA)path=untimely-packet.out$(COMMA)id=output $(QEMU_OPTS)", \ + "softmmu gdbstub untimely packets") + $(call quiet-command, \ + (! grep -Fq 'Packet instead of Ack, ignoring it' untimely-packet.gdb.err), \ + "GREP", "file untimely-packet.gdb.err") else run-gdbstub-%: $(call skip-test, "gdbstub test $*", "no guest arch support") @@ -36,4 +50,4 @@ run-gdbstub-%: $(call skip-test, "gdbstub test $*", "need working gdb") endif -MULTIARCH_RUNS += run-gdbstub-memory +MULTIARCH_RUNS += run-gdbstub-memory run-gdbstub-untimely-packet