From 99ab4d500af638ba3ebb20e8aa89d72201b70860 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 3 Feb 2023 18:15:10 +0100 Subject: [PATCH 01/40] accel/tcg: Test CPUJumpCache in tb_jmp_cache_clear_page After commit 4e4fa6c12d ("accel/tcg: Complete cpu initialization before registration"), it looks the CPUJumpCache pointer can be NULL. This causes a SIGSEV when running debug-wp-migration kvm unit test. At the first place it should be clarified why this TCG code is called with KVM acceleration. This may hide another bug. Fixes: 4e4fa6c12d ("accel/tcg: Complete cpu initialization before registration") Signed-off-by: Eric Auger Message-Id: <20230203171510.2867451-1-eric.auger@redhat.com> Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 4e040a1cb9..04e270742e 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -100,9 +100,14 @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr) { - int i, i0 = tb_jmp_cache_hash_page(page_addr); CPUJumpCache *jc = cpu->tb_jmp_cache; + int i, i0; + if (unlikely(!jc)) { + return; + } + + i0 = tb_jmp_cache_hash_page(page_addr); for (i = 0; i < TB_JMP_PAGE_SIZE; i++) { qatomic_set(&jc->array[i0 + i].tb, NULL); } From e1e646524437072e466313b04a2f8326dd7b8e77 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 3 Feb 2023 12:58:12 -1000 Subject: [PATCH 02/40] tcg: Init temp_subindex in liveness_pass_2 Correctly handle large types while lowering. Fixes: fac87bd2a49b ("tcg: Add temp_subindex to TCGTemp") Signed-off-by: Richard Henderson --- tcg/tcg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tcg/tcg.c b/tcg/tcg.c index fd557d55d3..bc60fd0fe8 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -3063,6 +3063,7 @@ static bool liveness_pass_2(TCGContext *s) TCGTemp *dts = tcg_temp_alloc(s); dts->type = its->type; dts->base_type = its->base_type; + dts->temp_subindex = its->temp_subindex; dts->kind = TEMP_EBB; its->state_ptr = dts; } else { From ecbea3ec1ce5f4499ef6acbc696ec5d6a1c69165 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 11 Nov 2022 10:49:52 +1000 Subject: [PATCH 03/40] tcg: Define TCG_TYPE_I128 and related helper macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Begin staging in support for TCGv_i128 with Int128. Define the type enumerator, the typedef, and the helper-head.h macros. This cannot yet be used, because you can't allocate temporaries of this new type. Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- include/exec/helper-head.h | 7 +++++++ include/tcg/tcg.h | 17 ++++++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h index bc6698b19f..b8d1140dc7 100644 --- a/include/exec/helper-head.h +++ b/include/exec/helper-head.h @@ -26,6 +26,7 @@ #define dh_alias_int i32 #define dh_alias_i64 i64 #define dh_alias_s64 i64 +#define dh_alias_i128 i128 #define dh_alias_f16 i32 #define dh_alias_f32 i32 #define dh_alias_f64 i64 @@ -40,6 +41,7 @@ #define dh_ctype_int int #define dh_ctype_i64 uint64_t #define dh_ctype_s64 int64_t +#define dh_ctype_i128 Int128 #define dh_ctype_f16 uint32_t #define dh_ctype_f32 float32 #define dh_ctype_f64 float64 @@ -71,6 +73,7 @@ #define dh_retvar_decl0_noreturn void #define dh_retvar_decl0_i32 TCGv_i32 retval #define dh_retvar_decl0_i64 TCGv_i64 retval +#define dh_retval_decl0_i128 TCGv_i128 retval #define dh_retvar_decl0_ptr TCGv_ptr retval #define dh_retvar_decl0(t) glue(dh_retvar_decl0_, dh_alias(t)) @@ -78,6 +81,7 @@ #define dh_retvar_decl_noreturn #define dh_retvar_decl_i32 TCGv_i32 retval, #define dh_retvar_decl_i64 TCGv_i64 retval, +#define dh_retvar_decl_i128 TCGv_i128 retval, #define dh_retvar_decl_ptr TCGv_ptr retval, #define dh_retvar_decl(t) glue(dh_retvar_decl_, dh_alias(t)) @@ -85,6 +89,7 @@ #define dh_retvar_noreturn NULL #define dh_retvar_i32 tcgv_i32_temp(retval) #define dh_retvar_i64 tcgv_i64_temp(retval) +#define dh_retvar_i128 tcgv_i128_temp(retval) #define dh_retvar_ptr tcgv_ptr_temp(retval) #define dh_retvar(t) glue(dh_retvar_, dh_alias(t)) @@ -95,6 +100,7 @@ #define dh_typecode_i64 4 #define dh_typecode_s64 5 #define dh_typecode_ptr 6 +#define dh_typecode_i128 7 #define dh_typecode_int dh_typecode_s32 #define dh_typecode_f16 dh_typecode_i32 #define dh_typecode_f32 dh_typecode_i32 @@ -104,6 +110,7 @@ #define dh_callflag_i32 0 #define dh_callflag_i64 0 +#define dh_callflag_i128 0 #define dh_callflag_ptr 0 #define dh_callflag_void 0 #define dh_callflag_noreturn TCG_CALL_NO_RETURN diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index c5112da0ef..4d7e4107a9 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -270,6 +270,7 @@ typedef struct TCGPool { typedef enum TCGType { TCG_TYPE_I32, TCG_TYPE_I64, + TCG_TYPE_I128, TCG_TYPE_V64, TCG_TYPE_V128, @@ -351,13 +352,14 @@ typedef tcg_target_ulong TCGArg; in tcg/README. Target CPU front-end code uses these types to deal with TCG variables as it emits TCG code via the tcg_gen_* functions. They come in several flavours: - * TCGv_i32 : 32 bit integer type - * TCGv_i64 : 64 bit integer type - * TCGv_ptr : a host pointer type - * TCGv_vec : a host vector type; the exact size is not exposed - to the CPU front-end code. - * TCGv : an integer type the same size as target_ulong - (an alias for either TCGv_i32 or TCGv_i64) + * TCGv_i32 : 32 bit integer type + * TCGv_i64 : 64 bit integer type + * TCGv_i128 : 128 bit integer type + * TCGv_ptr : a host pointer type + * TCGv_vec : a host vector type; the exact size is not exposed + to the CPU front-end code. + * TCGv : an integer type the same size as target_ulong + (an alias for either TCGv_i32 or TCGv_i64) The compiler's type checking will complain if you mix them up and pass the wrong sized TCGv to a function. @@ -377,6 +379,7 @@ typedef tcg_target_ulong TCGArg; typedef struct TCGv_i32_d *TCGv_i32; typedef struct TCGv_i64_d *TCGv_i64; +typedef struct TCGv_i128_d *TCGv_i128; typedef struct TCGv_ptr_d *TCGv_ptr; typedef struct TCGv_vec_d *TCGv_vec; typedef TCGv_ptr TCGv_env; From 466d37596010845eb61fbb8b5cd7daa407286342 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 11 Nov 2022 11:01:13 +1000 Subject: [PATCH 04/40] tcg: Handle dh_typecode_i128 with TCG_CALL_{RET,ARG}_NORMAL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Many hosts pass and return 128-bit quantities like sequential 64-bit quantities. Treat this just like we currently break down 64-bit quantities for a 32-bit host. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/tcg.c | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/tcg/tcg.c b/tcg/tcg.c index bc60fd0fe8..bc7198e5d0 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -707,11 +707,22 @@ static void init_call_layout(TCGHelperInfo *info) case dh_typecode_s64: info->nr_out = 64 / TCG_TARGET_REG_BITS; info->out_kind = TCG_CALL_RET_NORMAL; + assert(info->nr_out <= ARRAY_SIZE(tcg_target_call_oarg_regs)); + break; + case dh_typecode_i128: + info->nr_out = 128 / TCG_TARGET_REG_BITS; + info->out_kind = TCG_CALL_RET_NORMAL; /* TODO */ + switch (/* TODO */ TCG_CALL_RET_NORMAL) { + case TCG_CALL_RET_NORMAL: + assert(info->nr_out <= ARRAY_SIZE(tcg_target_call_oarg_regs)); + break; + default: + qemu_build_not_reached(); + } break; default: g_assert_not_reached(); } - assert(info->nr_out <= ARRAY_SIZE(tcg_target_call_oarg_regs)); /* * Parse and place function arguments. @@ -733,6 +744,9 @@ static void init_call_layout(TCGHelperInfo *info) case dh_typecode_ptr: type = TCG_TYPE_PTR; break; + case dh_typecode_i128: + type = TCG_TYPE_I128; + break; default: g_assert_not_reached(); } @@ -772,6 +786,19 @@ static void init_call_layout(TCGHelperInfo *info) } break; + case TCG_TYPE_I128: + switch (/* TODO */ TCG_CALL_ARG_NORMAL) { + case TCG_CALL_ARG_EVEN: + layout_arg_even(&cum); + /* fall through */ + case TCG_CALL_ARG_NORMAL: + layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS); + break; + default: + qemu_build_not_reached(); + } + break; + default: g_assert_not_reached(); } @@ -1692,11 +1719,13 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) op->args[pi++] = temp_arg(ret); break; case 2: + case 4: tcg_debug_assert(ret != NULL); - tcg_debug_assert(ret->base_type == ret->type + 1); + tcg_debug_assert(ret->base_type == ret->type + ctz32(n)); tcg_debug_assert(ret->temp_subindex == 0); - op->args[pi++] = temp_arg(ret); - op->args[pi++] = temp_arg(ret + 1); + for (i = 0; i < n; ++i) { + op->args[pi++] = temp_arg(ret + i); + } break; default: g_assert_not_reached(); From 273eb50c0fed6696d4600d9cf26f1b2dfcccab0c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 19 Oct 2022 12:03:40 +1000 Subject: [PATCH 05/40] tcg: Allocate objects contiguously in temp_allocate_frame MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When allocating a temp to the stack frame, consider the base type and allocate all parts at once. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/tcg.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/tcg/tcg.c b/tcg/tcg.c index bc7198e5d0..cdfc50b164 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -3267,11 +3267,12 @@ static bool liveness_pass_2(TCGContext *s) static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) { - int size = tcg_type_size(ts->type); - int align; intptr_t off; + int size, align; - switch (ts->type) { + /* When allocating an object, look at the full type. */ + size = tcg_type_size(ts->base_type); + switch (ts->base_type) { case TCG_TYPE_I32: align = 4; break; @@ -3302,13 +3303,30 @@ static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) tcg_raise_tb_overflow(s); } s->current_frame_offset = off + size; - - ts->mem_offset = off; #if defined(__sparc__) - ts->mem_offset += TCG_TARGET_STACK_BIAS; + off += TCG_TARGET_STACK_BIAS; #endif - ts->mem_base = s->frame_temp; - ts->mem_allocated = 1; + + /* If the object was subdivided, assign memory to all the parts. */ + if (ts->base_type != ts->type) { + int part_size = tcg_type_size(ts->type); + int part_count = size / part_size; + + /* + * Each part is allocated sequentially in tcg_temp_new_internal. + * Jump back to the first part by subtracting the current index. + */ + ts -= ts->temp_subindex; + for (int i = 0; i < part_count; ++i) { + ts[i].mem_offset = off + i * part_size; + ts[i].mem_base = s->frame_temp; + ts[i].mem_allocated = 1; + } + } else { + ts->mem_offset = off; + ts->mem_base = s->frame_temp; + ts->mem_allocated = 1; + } } /* Assign @reg to @ts, and update reg_to_temp[]. */ From 6a6d772e30d62e209587ef341df243e9789f5a9f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 18 Oct 2022 21:28:04 +1000 Subject: [PATCH 06/40] tcg: Introduce tcg_out_addi_ptr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement the function for arm, i386, and s390x, which will use it. Add stubs for all other backends. Reviewed-by: Alex Bennée Reviewed-by: Daniel Henrique Barboza Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 7 +++++++ tcg/arm/tcg-target.c.inc | 20 ++++++++++++++++++++ tcg/i386/tcg-target.c.inc | 8 ++++++++ tcg/loongarch64/tcg-target.c.inc | 7 +++++++ tcg/mips/tcg-target.c.inc | 7 +++++++ tcg/ppc/tcg-target.c.inc | 7 +++++++ tcg/riscv/tcg-target.c.inc | 7 +++++++ tcg/s390x/tcg-target.c.inc | 7 +++++++ tcg/sparc64/tcg-target.c.inc | 7 +++++++ tcg/tcg.c | 2 ++ tcg/tci/tcg-target.c.inc | 7 +++++++ 11 files changed, 86 insertions(+) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 330d26b395..bd6da72678 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1102,6 +1102,13 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, tcg_out_insn(s, 3305, LDR, 0, rd); } +static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, + tcg_target_long imm) +{ + /* This function is only used for passing structs by reference. */ + g_assert_not_reached(); +} + /* Define something more legible for general use. */ #define tcg_out_ldst_r tcg_out_insn_3310 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 0f5f9f4925..6e9e9b9b3f 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -2581,6 +2581,26 @@ static void tcg_out_movi(TCGContext *s, TCGType type, tcg_out_movi32(s, COND_AL, ret, arg); } +static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, + tcg_target_long imm) +{ + int enc, opc = ARITH_ADD; + + /* All of the easiest immediates to encode are positive. */ + if (imm < 0) { + imm = -imm; + opc = ARITH_SUB; + } + enc = encode_imm(imm); + if (enc >= 0) { + tcg_out_dat_imm(s, COND_AL, opc, rd, rs, enc); + } else { + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, imm); + tcg_out_dat_reg(s, COND_AL, opc, rd, rs, + TCG_REG_TMP, SHIFT_IMM_LSL(0)); + } +} + /* Type is always V128, with I64 elements. */ static void tcg_out_dup2_vec(TCGContext *s, TCGReg rd, TCGReg rl, TCGReg rh) { diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index c71c3e664d..7b573bd287 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -1069,6 +1069,14 @@ static void tcg_out_movi(TCGContext *s, TCGType type, } } +static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, + tcg_target_long imm) +{ + /* This function is only used for passing structs by reference. */ + tcg_debug_assert(TCG_TARGET_REG_BITS == 32); + tcg_out_modrm_offset(s, OPC_LEA, rd, rs, imm); +} + static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val) { if (val == (int8_t)val) { diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index ce4a153887..b6e2ff6213 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -417,6 +417,13 @@ static void tcg_out_addi(TCGContext *s, TCGType type, TCGReg rd, } } +static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, + tcg_target_long imm) +{ + /* This function is only used for passing structs by reference. */ + g_assert_not_reached(); +} + static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg) { tcg_out_opc_andi(s, ret, arg, 0xff); diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 6e000d8e69..d419c4c1fc 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -550,6 +550,13 @@ static void tcg_out_movi(TCGContext *s, TCGType type, } } +static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, + tcg_target_long imm) +{ + /* This function is only used for passing structs by reference. */ + g_assert_not_reached(); +} + static void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg, int flags) { /* ret and arg can't be register tmp0 */ diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 8d6899cf40..85f84fe59e 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -1125,6 +1125,13 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, } } +static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, + tcg_target_long imm) +{ + /* This function is only used for passing structs by reference. */ + g_assert_not_reached(); +} + static bool mask_operand(uint32_t c, int *mb, int *me) { uint32_t lsb, test; diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 01cb67ef7b..383331025a 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -559,6 +559,13 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, tcg_out_opc_imm(s, OPC_LD, rd, rd, 0); } +static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, + tcg_target_long imm) +{ + /* This function is only used for passing structs by reference. */ + g_assert_not_reached(); +} + static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg) { tcg_out_opc_imm(s, OPC_ANDI, ret, arg, 0xff); diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 218318feb2..d8fd755ef0 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -1073,6 +1073,13 @@ static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, return false; } +static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, + tcg_target_long imm) +{ + /* This function is only used for passing structs by reference. */ + tcg_out_mem(s, RX_LA, RXY_LAY, rd, rs, TCG_REG_NONE, imm); +} + static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src, int msb, int lsb, int ofs, int z) { diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index dd406bc065..4b834f3f1e 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -496,6 +496,13 @@ static void tcg_out_movi(TCGContext *s, TCGType type, tcg_out_movi_int(s, type, ret, arg, false, TCG_REG_T2); } +static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, + tcg_target_long imm) +{ + /* This function is only used for passing structs by reference. */ + g_assert_not_reached(); +} + static void tcg_out_ldst_rr(TCGContext *s, TCGReg data, TCGReg a1, TCGReg a2, int op) { diff --git a/tcg/tcg.c b/tcg/tcg.c index cdfc50b164..8923b52044 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -104,6 +104,8 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, tcg_target_long arg); +static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long) + __attribute__((unused)); static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); static void tcg_out_goto_tb(TCGContext *s, int which); static void tcg_out_op(TCGContext *s, TCGOpcode opc, diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index bc452007c6..33551b43dc 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -557,6 +557,13 @@ static void tcg_out_movi(TCGContext *s, TCGType type, } } +static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, + tcg_target_long imm) +{ + /* This function is only used for passing structs by reference. */ + g_assert_not_reached(); +} + static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func, const TCGHelperInfo *info) { From 313bdea84d2912fdbb139e746bd9346b3d85ebdc Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 31 Oct 2022 09:22:59 +1100 Subject: [PATCH 07/40] tcg: Add TCG_CALL_{RET,ARG}_BY_REF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These will be used by some hosts, both 32 and 64-bit, to pass and return i128. Not yet used, because allocation is not yet enabled. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/tcg-internal.h | 3 + tcg/tcg.c | 135 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 135 insertions(+), 3 deletions(-) diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h index 6e50aeba3a..2ec1ea01df 100644 --- a/tcg/tcg-internal.h +++ b/tcg/tcg-internal.h @@ -36,6 +36,7 @@ */ typedef enum { TCG_CALL_RET_NORMAL, /* by registers */ + TCG_CALL_RET_BY_REF, /* for i128, by reference */ } TCGCallReturnKind; typedef enum { @@ -44,6 +45,8 @@ typedef enum { TCG_CALL_ARG_EXTEND, /* for i32, as a sign/zero-extended i64 */ TCG_CALL_ARG_EXTEND_U, /* ... as a zero-extended i64 */ TCG_CALL_ARG_EXTEND_S, /* ... as a sign-extended i64 */ + TCG_CALL_ARG_BY_REF, /* for i128, by reference, first */ + TCG_CALL_ARG_BY_REF_N, /* ... by reference, subsequent */ } TCGCallArgumentKind; typedef struct TCGCallArgumentLoc { diff --git a/tcg/tcg.c b/tcg/tcg.c index 8923b52044..123cde7000 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -104,8 +104,7 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, tcg_target_long arg); -static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long) - __attribute__((unused)); +static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long); static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); static void tcg_out_goto_tb(TCGContext *s, int which); static void tcg_out_op(TCGContext *s, TCGOpcode opc, @@ -683,6 +682,38 @@ static void layout_arg_normal_n(TCGCumulativeArgs *cum, cum->arg_slot += n; } +static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info) +{ + TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; + int n = 128 / TCG_TARGET_REG_BITS; + + /* The first subindex carries the pointer. */ + layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF); + + /* + * The callee is allowed to clobber memory associated with + * structure pass by-reference. Therefore we must make copies. + * Allocate space from "ref_slot", which will be adjusted to + * follow the parameters on the stack. + */ + loc[0].ref_slot = cum->ref_slot; + + /* + * Subsequent words also go into the reference slot, but + * do not accumulate into the regular arguments. + */ + for (int i = 1; i < n; ++i) { + loc[i] = (TCGCallArgumentLoc){ + .kind = TCG_CALL_ARG_BY_REF_N, + .arg_idx = cum->arg_idx, + .tmp_subindex = i, + .ref_slot = cum->ref_slot + i, + }; + } + cum->info_in_idx += n; + cum->ref_slot += n; +} + static void init_call_layout(TCGHelperInfo *info) { int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs); @@ -718,6 +749,14 @@ static void init_call_layout(TCGHelperInfo *info) case TCG_CALL_RET_NORMAL: assert(info->nr_out <= ARRAY_SIZE(tcg_target_call_oarg_regs)); break; + case TCG_CALL_RET_BY_REF: + /* + * Allocate the first argument to the output. + * We don't need to store this anywhere, just make it + * unavailable for use in the input loop below. + */ + cum.arg_slot = 1; + break; default: qemu_build_not_reached(); } @@ -796,6 +835,9 @@ static void init_call_layout(TCGHelperInfo *info) case TCG_CALL_ARG_NORMAL: layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS); break; + case TCG_CALL_ARG_BY_REF: + layout_arg_by_ref(&cum, info); + break; default: qemu_build_not_reached(); } @@ -811,7 +853,39 @@ static void init_call_layout(TCGHelperInfo *info) assert(cum.info_in_idx <= ARRAY_SIZE(info->in)); /* Validate the backend has enough argument space. */ assert(cum.arg_slot <= max_reg_slots + max_stk_slots); - assert(cum.ref_slot <= max_stk_slots); + + /* + * Relocate the "ref_slot" area to the end of the parameters. + * Minimizing this stack offset helps code size for x86, + * which has a signed 8-bit offset encoding. + */ + if (cum.ref_slot != 0) { + int ref_base = 0; + + if (cum.arg_slot > max_reg_slots) { + int align = __alignof(Int128) / sizeof(tcg_target_long); + + ref_base = cum.arg_slot - max_reg_slots; + if (align > 1) { + ref_base = ROUND_UP(ref_base, align); + } + } + assert(ref_base + cum.ref_slot <= max_stk_slots); + + if (ref_base != 0) { + for (int i = cum.info_in_idx - 1; i >= 0; --i) { + TCGCallArgumentLoc *loc = &info->in[i]; + switch (loc->kind) { + case TCG_CALL_ARG_BY_REF: + case TCG_CALL_ARG_BY_REF_N: + loc->ref_slot += ref_base; + break; + default: + break; + } + } + } + } } static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)]; @@ -1740,6 +1814,8 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) switch (loc->kind) { case TCG_CALL_ARG_NORMAL: + case TCG_CALL_ARG_BY_REF: + case TCG_CALL_ARG_BY_REF_N: op->args[pi++] = temp_arg(ts); break; @@ -4411,6 +4487,27 @@ static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l, } } +static void load_arg_ref(TCGContext *s, int arg_slot, TCGReg ref_base, + intptr_t ref_off, TCGRegSet *allocated_regs) +{ + TCGReg reg; + int stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs); + + if (stk_slot < 0) { + reg = tcg_target_call_iarg_regs[arg_slot]; + tcg_reg_free(s, reg, *allocated_regs); + tcg_out_addi_ptr(s, reg, ref_base, ref_off); + tcg_regset_set_reg(*allocated_regs, reg); + } else { + reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR], + *allocated_regs, 0, false); + tcg_out_addi_ptr(s, reg, ref_base, ref_off); + tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK, + TCG_TARGET_CALL_STACK_OFFSET + + stk_slot * sizeof(tcg_target_long)); + } +} + static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) { const int nb_oargs = TCGOP_CALLO(op); @@ -4434,6 +4531,16 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) case TCG_CALL_ARG_EXTEND_S: load_arg_normal(s, loc, ts, &allocated_regs); break; + case TCG_CALL_ARG_BY_REF: + load_arg_stk(s, loc->ref_slot, ts, allocated_regs); + load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK, + TCG_TARGET_CALL_STACK_OFFSET + + loc->ref_slot * sizeof(tcg_target_long), + &allocated_regs); + break; + case TCG_CALL_ARG_BY_REF_N: + load_arg_stk(s, loc->ref_slot, ts, allocated_regs); + break; default: g_assert_not_reached(); } @@ -4465,6 +4572,19 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) save_globals(s, allocated_regs); } + /* + * If the ABI passes a pointer to the returned struct as the first + * argument, load that now. Pass a pointer to the output home slot. + */ + if (info->out_kind == TCG_CALL_RET_BY_REF) { + TCGTemp *ts = arg_temp(op->args[0]); + + if (!ts->mem_allocated) { + temp_allocate_frame(s, ts); + } + load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs); + } + tcg_out_call(s, tcg_call_func(op), info); /* Assign output registers and emit moves if needed. */ @@ -4481,6 +4601,15 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) ts->mem_coherent = 0; } break; + + case TCG_CALL_RET_BY_REF: + /* The callee has performed a write through the reference. */ + for (i = 0; i < nb_oargs; i++) { + TCGTemp *ts = arg_temp(op->args[i]); + ts->val_type = TEMP_VAL_MEM; + } + break; + default: g_assert_not_reached(); } From 5e3d0c199f4edf4ecdf8100464da441c60ce36e3 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 20 Oct 2022 00:55:36 +1000 Subject: [PATCH 08/40] tcg: Introduce tcg_target_call_oarg_reg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the flat array tcg_target_call_oarg_regs[] with a function call including the TCGCallReturnKind. Extend the set of registers for ARM to r0-r3 to match the ABI: https://github.com/ARM-software/abi-aa/blob/main/aapcs32/aapcs32.rst#result-return Reviewed-by: Alex Bennée Reviewed-by: Daniel Henrique Barboza Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 10 +++++++--- tcg/arm/tcg-target.c.inc | 10 +++++++--- tcg/i386/tcg-target.c.inc | 16 ++++++++++------ tcg/loongarch64/tcg-target.c.inc | 10 ++++++---- tcg/mips/tcg-target.c.inc | 10 ++++++---- tcg/ppc/tcg-target.c.inc | 10 ++++++---- tcg/riscv/tcg-target.c.inc | 10 ++++++---- tcg/s390x/tcg-target.c.inc | 9 ++++++--- tcg/sparc64/tcg-target.c.inc | 12 ++++++------ tcg/tcg.c | 9 ++++++--- tcg/tci/tcg-target.c.inc | 12 ++++++------ 11 files changed, 72 insertions(+), 46 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index bd6da72678..fde3b30ad1 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -63,9 +63,13 @@ static const int tcg_target_call_iarg_regs[8] = { TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7 }; -static const int tcg_target_call_oarg_regs[1] = { - TCG_REG_X0 -}; + +static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) +{ + tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); + tcg_debug_assert(slot >= 0 && slot <= 1); + return TCG_REG_X0 + slot; +} #define TCG_REG_TMP TCG_REG_X30 #define TCG_VEC_TMP TCG_REG_V31 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 6e9e9b9b3f..d06ac60c15 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -79,9 +79,13 @@ static const int tcg_target_reg_alloc_order[] = { static const int tcg_target_call_iarg_regs[4] = { TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3 }; -static const int tcg_target_call_oarg_regs[2] = { - TCG_REG_R0, TCG_REG_R1 -}; + +static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) +{ + tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); + tcg_debug_assert(slot >= 0 && slot <= 3); + return TCG_REG_R0 + slot; +} #define TCG_REG_TMP TCG_REG_R12 #define TCG_VEC_TMP TCG_REG_Q15 diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 7b573bd287..2f0a9521bf 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -109,12 +109,16 @@ static const int tcg_target_call_iarg_regs[] = { #endif }; -static const int tcg_target_call_oarg_regs[] = { - TCG_REG_EAX, -#if TCG_TARGET_REG_BITS == 32 - TCG_REG_EDX -#endif -}; +static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) +{ + switch (kind) { + case TCG_CALL_RET_NORMAL: + tcg_debug_assert(slot >= 0 && slot <= 1); + return slot ? TCG_REG_EDX : TCG_REG_EAX; + default: + g_assert_not_reached(); + } +} /* Constants we accept. */ #define TCG_CT_CONST_S32 0x100 diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index b6e2ff6213..c5f55afd68 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -114,10 +114,12 @@ static const int tcg_target_call_iarg_regs[] = { TCG_REG_A7, }; -static const int tcg_target_call_oarg_regs[] = { - TCG_REG_A0, - TCG_REG_A1, -}; +static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) +{ + tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); + tcg_debug_assert(slot >= 0 && slot <= 1); + return TCG_REG_A0 + slot; +} #ifndef CONFIG_SOFTMMU #define USE_GUEST_BASE (guest_base != 0) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index d419c4c1fc..80748d892e 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -136,10 +136,12 @@ static const TCGReg tcg_target_call_iarg_regs[] = { #endif }; -static const TCGReg tcg_target_call_oarg_regs[2] = { - TCG_REG_V0, - TCG_REG_V1 -}; +static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) +{ + tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); + tcg_debug_assert(slot >= 0 && slot <= 1); + return TCG_REG_V0 + slot; +} static const tcg_insn_unit *tb_ret_addr; static const tcg_insn_unit *bswap32_addr; diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 85f84fe59e..f3fec14118 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -186,10 +186,12 @@ static const int tcg_target_call_iarg_regs[] = { TCG_REG_R10 }; -static const int tcg_target_call_oarg_regs[] = { - TCG_REG_R3, - TCG_REG_R4 -}; +static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) +{ + tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); + tcg_debug_assert(slot >= 0 && slot <= 1); + return TCG_REG_R3 + slot; +} static const int tcg_target_callee_save_regs[] = { #ifdef _CALL_DARWIN diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 383331025a..558de127ef 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -113,10 +113,12 @@ static const int tcg_target_call_iarg_regs[] = { TCG_REG_A7, }; -static const int tcg_target_call_oarg_regs[] = { - TCG_REG_A0, - TCG_REG_A1, -}; +static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) +{ + tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); + tcg_debug_assert(slot >= 0 && slot <= 1); + return TCG_REG_A0 + slot; +} #define TCG_CT_CONST_ZERO 0x100 #define TCG_CT_CONST_S12 0x200 diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index d8fd755ef0..844532156b 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -402,9 +402,12 @@ static const int tcg_target_call_iarg_regs[] = { TCG_REG_R6, }; -static const int tcg_target_call_oarg_regs[] = { - TCG_REG_R2, -}; +static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) +{ + tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); + tcg_debug_assert(slot == 0); + return TCG_REG_R2; +} #define S390_CC_EQ 8 #define S390_CC_LT 4 diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 4b834f3f1e..ccc4144f7c 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -132,12 +132,12 @@ static const int tcg_target_call_iarg_regs[6] = { TCG_REG_O5, }; -static const int tcg_target_call_oarg_regs[] = { - TCG_REG_O0, - TCG_REG_O1, - TCG_REG_O2, - TCG_REG_O3, -}; +static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) +{ + tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); + tcg_debug_assert(slot >= 0 && slot <= 3); + return TCG_REG_O0 + slot; +} #define INSN_OP(x) ((x) << 30) #define INSN_OP2(x) ((x) << 22) diff --git a/tcg/tcg.c b/tcg/tcg.c index 123cde7000..a77483eee8 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -151,6 +151,7 @@ static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, TCGReg base, intptr_t ofs); static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, const TCGHelperInfo *info); +static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot); static bool tcg_target_const_match(int64_t val, TCGType type, int ct); #ifdef TCG_TARGET_NEED_LDST_LABELS static int tcg_out_ldst_finalize(TCGContext *s); @@ -740,14 +741,16 @@ static void init_call_layout(TCGHelperInfo *info) case dh_typecode_s64: info->nr_out = 64 / TCG_TARGET_REG_BITS; info->out_kind = TCG_CALL_RET_NORMAL; - assert(info->nr_out <= ARRAY_SIZE(tcg_target_call_oarg_regs)); + /* Query the last register now to trigger any assert early. */ + tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); break; case dh_typecode_i128: info->nr_out = 128 / TCG_TARGET_REG_BITS; info->out_kind = TCG_CALL_RET_NORMAL; /* TODO */ switch (/* TODO */ TCG_CALL_RET_NORMAL) { case TCG_CALL_RET_NORMAL: - assert(info->nr_out <= ARRAY_SIZE(tcg_target_call_oarg_regs)); + /* Query the last register now to trigger any assert early. */ + tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); break; case TCG_CALL_RET_BY_REF: /* @@ -4592,7 +4595,7 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) case TCG_CALL_RET_NORMAL: for (i = 0; i < nb_oargs; i++) { TCGTemp *ts = arg_temp(op->args[i]); - TCGReg reg = tcg_target_call_oarg_regs[i]; + TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i); /* ENV should not be modified. */ tcg_debug_assert(!temp_readonly(ts)); diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 33551b43dc..e3b0ff303f 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -200,12 +200,12 @@ static const int tcg_target_reg_alloc_order[] = { /* No call arguments via registers. All will be stored on the "stack". */ static const int tcg_target_call_iarg_regs[] = { }; -static const int tcg_target_call_oarg_regs[] = { - TCG_REG_R0, -#if TCG_TARGET_REG_BITS == 32 - TCG_REG_R1 -#endif -}; +static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) +{ + tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); + tcg_debug_assert(slot >= 0 && slot < 64 / TCG_TARGET_REG_BITS); + return TCG_REG_R0 + slot; +} #ifdef CONFIG_DEBUG_TCG static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { From c6556aa0c8de8718813fea0ca61232632bf33c42 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 20 Oct 2022 01:13:52 +1000 Subject: [PATCH 09/40] tcg: Add TCG_CALL_RET_BY_VEC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will be used by _WIN64 to return i128. Not yet used, because allocation is not yet enabled. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/tcg-internal.h | 1 + tcg/tcg.c | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h index 2ec1ea01df..33f1d8b411 100644 --- a/tcg/tcg-internal.h +++ b/tcg/tcg-internal.h @@ -37,6 +37,7 @@ typedef enum { TCG_CALL_RET_NORMAL, /* by registers */ TCG_CALL_RET_BY_REF, /* for i128, by reference */ + TCG_CALL_RET_BY_VEC, /* for i128, by vector register */ } TCGCallReturnKind; typedef enum { diff --git a/tcg/tcg.c b/tcg/tcg.c index a77483eee8..098be83b00 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -752,6 +752,10 @@ static void init_call_layout(TCGHelperInfo *info) /* Query the last register now to trigger any assert early. */ tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); break; + case TCG_CALL_RET_BY_VEC: + /* Query the single register now to trigger any assert early. */ + tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0); + break; case TCG_CALL_RET_BY_REF: /* * Allocate the first argument to the output. @@ -4605,6 +4609,21 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) } break; + case TCG_CALL_RET_BY_VEC: + { + TCGTemp *ts = arg_temp(op->args[0]); + + tcg_debug_assert(ts->base_type == TCG_TYPE_I128); + tcg_debug_assert(ts->temp_subindex == 0); + if (!ts->mem_allocated) { + temp_allocate_frame(s, ts); + } + tcg_out_st(s, TCG_TYPE_V128, + tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0), + ts->mem_base->reg, ts->mem_offset); + } + /* fall through to mark all parts in memory */ + case TCG_CALL_RET_BY_REF: /* The callee has performed a write through the reference. */ for (i = 0; i < nb_oargs; i++) { From b959822c94e6d32b36fad038e79c14f841e585c1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 17 Oct 2022 09:17:20 +1000 Subject: [PATCH 10/40] include/qemu/int128: Use Int128 structure for TCI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are about to allow passing Int128 to/from tcg helper functions, but libffi doesn't support __int128_t, so use the structure. In order for atomic128.h to continue working, we must provide a mechanism to frob between real __int128_t and the structure. Provide a new union, Int128Alias, for this. We cannot modify Int128 itself, as any changed alignment would also break libffi. Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- include/qemu/atomic128.h | 29 +++++++++++++++++++++------ include/qemu/int128.h | 25 +++++++++++++++++++++--- util/int128.c | 42 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 87 insertions(+), 9 deletions(-) diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h index adb9a1a260..d0ba0b9c65 100644 --- a/include/qemu/atomic128.h +++ b/include/qemu/atomic128.h @@ -44,13 +44,23 @@ #if defined(CONFIG_ATOMIC128) static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new) { - return qatomic_cmpxchg__nocheck(ptr, cmp, new); + Int128Alias r, c, n; + + c.s = cmp; + n.s = new; + r.i = qatomic_cmpxchg__nocheck((__int128_t *)ptr, c.i, n.i); + return r.s; } # define HAVE_CMPXCHG128 1 #elif defined(CONFIG_CMPXCHG128) static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new) { - return __sync_val_compare_and_swap_16(ptr, cmp, new); + Int128Alias r, c, n; + + c.s = cmp; + n.s = new; + r.i = __sync_val_compare_and_swap_16((__int128_t *)ptr, c.i, n.i); + return r.s; } # define HAVE_CMPXCHG128 1 #elif defined(__aarch64__) @@ -89,12 +99,18 @@ Int128 QEMU_ERROR("unsupported atomic") #if defined(CONFIG_ATOMIC128) static inline Int128 atomic16_read(Int128 *ptr) { - return qatomic_read__nocheck(ptr); + Int128Alias r; + + r.i = qatomic_read__nocheck((__int128_t *)ptr); + return r.s; } static inline void atomic16_set(Int128 *ptr, Int128 val) { - qatomic_set__nocheck(ptr, val); + Int128Alias v; + + v.s = val; + qatomic_set__nocheck((__int128_t *)ptr, v.i); } # define HAVE_ATOMIC128 1 @@ -132,7 +148,8 @@ static inline void atomic16_set(Int128 *ptr, Int128 val) static inline Int128 atomic16_read(Int128 *ptr) { /* Maybe replace 0 with 0, returning the old value. */ - return atomic16_cmpxchg(ptr, 0, 0); + Int128 z = int128_make64(0); + return atomic16_cmpxchg(ptr, z, z); } static inline void atomic16_set(Int128 *ptr, Int128 val) @@ -141,7 +158,7 @@ static inline void atomic16_set(Int128 *ptr, Int128 val) do { cmp = old; old = atomic16_cmpxchg(ptr, cmp, val); - } while (old != cmp); + } while (int128_ne(old, cmp)); } # define HAVE_ATOMIC128 1 diff --git a/include/qemu/int128.h b/include/qemu/int128.h index d2b76ca6ac..f62a46b48c 100644 --- a/include/qemu/int128.h +++ b/include/qemu/int128.h @@ -3,7 +3,12 @@ #include "qemu/bswap.h" -#ifdef CONFIG_INT128 +/* + * With TCI, we need to use libffi for interfacing with TCG helpers. + * But libffi does not support __int128_t, and therefore cannot pass + * or return values of this type, force use of the Int128 struct. + */ +#if defined(CONFIG_INT128) && !defined(CONFIG_TCG_INTERPRETER) typedef __int128_t Int128; static inline Int128 int128_make64(uint64_t a) @@ -460,8 +465,7 @@ Int128 int128_divu(Int128, Int128); Int128 int128_remu(Int128, Int128); Int128 int128_divs(Int128, Int128); Int128 int128_rems(Int128, Int128); - -#endif /* CONFIG_INT128 */ +#endif /* CONFIG_INT128 && !CONFIG_TCG_INTERPRETER */ static inline void bswap128s(Int128 *s) { @@ -472,4 +476,19 @@ static inline void bswap128s(Int128 *s) #define INT128_MAX int128_make128(UINT64_MAX, INT64_MAX) #define INT128_MIN int128_make128(0, INT64_MIN) +/* + * When compiler supports a 128-bit type, define a combination of + * a possible structure and the native types. Ease parameter passing + * via use of the transparent union extension. + */ +#ifdef CONFIG_INT128 +typedef union { + Int128 s; + __int128_t i; + __uint128_t u; +} Int128Alias __attribute__((transparent_union)); +#else +typedef Int128 Int128Alias; +#endif /* CONFIG_INT128 */ + #endif /* INT128_H */ diff --git a/util/int128.c b/util/int128.c index ed8f25fef1..df6c6331bd 100644 --- a/util/int128.c +++ b/util/int128.c @@ -144,4 +144,46 @@ Int128 int128_rems(Int128 a, Int128 b) return r; } +#elif defined(CONFIG_TCG_INTERPRETER) + +Int128 int128_divu(Int128 a_s, Int128 b_s) +{ + Int128Alias r, a, b; + + a.s = a_s; + b.s = b_s; + r.u = a.u / b.u; + return r.s; +} + +Int128 int128_remu(Int128 a_s, Int128 b_s) +{ + Int128Alias r, a, b; + + a.s = a_s; + b.s = b_s; + r.u = a.u % b.u; + return r.s; +} + +Int128 int128_divs(Int128 a_s, Int128 b_s) +{ + Int128Alias r, a, b; + + a.s = a_s; + b.s = b_s; + r.i = a.i / b.i; + return r.s; +} + +Int128 int128_rems(Int128 a_s, Int128 b_s) +{ + Int128Alias r, a, b; + + a.s = a_s; + b.s = b_s; + r.i = a.i % b.i; + return r.s; +} + #endif From c4f4a00ac7d947c9b100e3cb62755a9a157df1fa Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 21 Oct 2022 10:16:28 +1000 Subject: [PATCH 11/40] tcg/i386: Add TCG_TARGET_CALL_{RET,ARG}_I128 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fill in the parameters for the host ABI for Int128. Adjust tcg_target_call_oarg_reg for _WIN64, and tcg_out_call for i386 sysv. Allow TCG_TYPE_V128 stores without AVX enabled. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/i386/tcg-target.c.inc | 30 +++++++++++++++++++++++++++++- tcg/i386/tcg-target.h | 10 ++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 2f0a9521bf..883ced8168 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -115,6 +115,11 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) case TCG_CALL_RET_NORMAL: tcg_debug_assert(slot >= 0 && slot <= 1); return slot ? TCG_REG_EDX : TCG_REG_EAX; +#ifdef _WIN64 + case TCG_CALL_RET_BY_VEC: + tcg_debug_assert(slot == 0); + return TCG_REG_XMM0; +#endif default: g_assert_not_reached(); } @@ -1188,9 +1193,16 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, * The gvec infrastructure is asserts that v128 vector loads * and stores use a 16-byte aligned offset. Validate that the * final pointer is aligned by using an insn that will SIGSEGV. + * + * This specific instance is also used by TCG_CALL_RET_BY_VEC, + * for _WIN64, which must have SSE2 but may not have AVX. */ tcg_debug_assert(arg >= 16); - tcg_out_vex_modrm_offset(s, OPC_MOVDQA_WxVx, arg, 0, arg1, arg2); + if (have_avx1) { + tcg_out_vex_modrm_offset(s, OPC_MOVDQA_WxVx, arg, 0, arg1, arg2); + } else { + tcg_out_modrm_offset(s, OPC_MOVDQA_WxVx, arg, arg1, arg2); + } break; case TCG_TYPE_V256: /* @@ -1677,6 +1689,22 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest, const TCGHelperInfo *info) { tcg_out_branch(s, 1, dest); + +#ifndef _WIN32 + if (TCG_TARGET_REG_BITS == 32 && info->out_kind == TCG_CALL_RET_BY_REF) { + /* + * The sysv i386 abi for struct return places a reference as the + * first argument of the stack, and pops that argument with the + * return statement. Since we want to retain the aligned stack + * pointer for the callee, we do not want to actually push that + * argument before the call but rely on the normal store to the + * stack slot. But we do need to compensate for the pop in order + * to reset our correct stack pointer value. + * Pushing a garbage value back onto the stack is quickest. + */ + tcg_out_push(s, TCG_REG_EAX); + } +#endif } static void tcg_out_jmp(TCGContext *s, const tcg_insn_unit *dest) diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 5797a55ea0..d4f2a6f8c2 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -100,6 +100,16 @@ typedef enum { #endif #define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL #define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL +#if defined(_WIN64) +# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_BY_REF +# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_VEC +#elif TCG_TARGET_REG_BITS == 64 +# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL +# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL +#else +# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL +# define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF +#endif extern bool have_bmi1; extern bool have_popcnt; From 896c76e6ba5d9a3444fb8528fdc407747ecc82f2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 21 Oct 2022 10:34:21 +1000 Subject: [PATCH 12/40] tcg/tci: Fix big-endian return register ordering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We expect the backend to require register pairs in host-endian ordering, thus for big-endian the first register of a pair contains the high part. We were forcing R0 to contain the low part for calls. Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/tci.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tcg/tci.c b/tcg/tci.c index 05a24163d3..eeccdde8bc 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -520,27 +520,28 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, ffi_call(pptr[1], pptr[0], stack, call_slots); } - /* Any result winds up "left-aligned" in the stack[0] slot. */ switch (len) { case 0: /* void */ break; case 1: /* uint32_t */ /* + * The result winds up "left-aligned" in the stack[0] slot. * Note that libffi has an odd special case in that it will * always widen an integral result to ffi_arg. */ - if (sizeof(ffi_arg) == 4) { - regs[TCG_REG_R0] = *(uint32_t *)stack; - break; - } - /* fall through */ - case 2: /* uint64_t */ - if (TCG_TARGET_REG_BITS == 32) { - tci_write_reg64(regs, TCG_REG_R1, TCG_REG_R0, stack[0]); + if (sizeof(ffi_arg) == 8) { + regs[TCG_REG_R0] = (uint32_t)stack[0]; } else { - regs[TCG_REG_R0] = stack[0]; + regs[TCG_REG_R0] = *(uint32_t *)stack; } break; + case 2: /* uint64_t */ + /* + * For TCG_TARGET_REG_BITS == 32, the register pair + * must stay in host memory order. + */ + memcpy(®s[TCG_REG_R0], stack, 8); + break; default: g_assert_not_reached(); } From e9709e17ac88f16c60004c4160c9a131d36ed564 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 21 Oct 2022 10:47:54 +1000 Subject: [PATCH 13/40] tcg/tci: Add TCG_TARGET_CALL_{RET,ARG}_I128 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fill in the parameters for libffi for Int128. Adjust the interpreter to allow for 16-byte return values. Adjust tcg_out_call to record the return value length. Call parameters are no longer all the same size, so we cannot reuse the same call_slots array for every function. Compute it each time now, but only fill in slots required for the call we're about to make. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/tcg.c | 19 +++++++++++++++++ tcg/tci.c | 44 ++++++++++++++++++++-------------------- tcg/tci/tcg-target.c.inc | 10 ++++----- tcg/tci/tcg-target.h | 3 +++ 4 files changed, 49 insertions(+), 27 deletions(-) diff --git a/tcg/tcg.c b/tcg/tcg.c index 098be83b00..865ed5ea0f 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -570,6 +570,22 @@ static GHashTable *helper_table; #ifdef CONFIG_TCG_INTERPRETER static ffi_type *typecode_to_ffi(int argmask) { + /* + * libffi does not support __int128_t, so we have forced Int128 + * to use the structure definition instead of the builtin type. + */ + static ffi_type *ffi_type_i128_elements[3] = { + &ffi_type_uint64, + &ffi_type_uint64, + NULL + }; + static ffi_type ffi_type_i128 = { + .size = 16, + .alignment = __alignof__(Int128), + .type = FFI_TYPE_STRUCT, + .elements = ffi_type_i128_elements, + }; + switch (argmask) { case dh_typecode_void: return &ffi_type_void; @@ -583,6 +599,8 @@ static ffi_type *typecode_to_ffi(int argmask) return &ffi_type_sint64; case dh_typecode_ptr: return &ffi_type_pointer; + case dh_typecode_i128: + return &ffi_type_i128; } g_assert_not_reached(); } @@ -613,6 +631,7 @@ static void init_ffi_layouts(void) /* Ignoring the return type, find the last non-zero field. */ nargs = 32 - clz32(typemask >> 3); nargs = DIV_ROUND_UP(nargs, 3); + assert(nargs <= MAX_CALL_IARGS); ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *)); ca->cif.rtype = typecode_to_ffi(typemask & 7); diff --git a/tcg/tci.c b/tcg/tci.c index eeccdde8bc..022fe9d0f8 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -470,12 +470,9 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, tcg_target_ulong regs[TCG_TARGET_NB_REGS]; uint64_t stack[(TCG_STATIC_CALL_ARGS_SIZE + TCG_STATIC_FRAME_SIZE) / sizeof(uint64_t)]; - void *call_slots[TCG_STATIC_CALL_ARGS_SIZE / sizeof(uint64_t)]; regs[TCG_AREG0] = (tcg_target_ulong)env; regs[TCG_REG_CALL_STACK] = (uintptr_t)stack; - /* Other call_slots entries initialized at first use (see below). */ - call_slots[0] = NULL; tci_assert(tb_ptr); for (;;) { @@ -498,26 +495,26 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, switch (opc) { case INDEX_op_call: - /* - * Set up the ffi_avalue array once, delayed until now - * because many TB's do not make any calls. In tcg_gen_callN, - * we arranged for every real argument to be "left-aligned" - * in each 64-bit slot. - */ - if (unlikely(call_slots[0] == NULL)) { - for (int i = 0; i < ARRAY_SIZE(call_slots); ++i) { - call_slots[i] = &stack[i]; - } - } - - tci_args_nl(insn, tb_ptr, &len, &ptr); - - /* Helper functions may need to access the "return address" */ - tci_tb_ptr = (uintptr_t)tb_ptr; - { - void **pptr = ptr; - ffi_call(pptr[1], pptr[0], stack, call_slots); + void *call_slots[MAX_CALL_IARGS]; + ffi_cif *cif; + void *func; + unsigned i, s, n; + + tci_args_nl(insn, tb_ptr, &len, &ptr); + func = ((void **)ptr)[0]; + cif = ((void **)ptr)[1]; + + n = cif->nargs; + for (i = s = 0; i < n; ++i) { + ffi_type *t = cif->arg_types[i]; + call_slots[i] = &stack[s]; + s += DIV_ROUND_UP(t->size, 8); + } + + /* Helper functions may need to access the "return address" */ + tci_tb_ptr = (uintptr_t)tb_ptr; + ffi_call(cif, func, stack, call_slots); } switch (len) { @@ -542,6 +539,9 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, */ memcpy(®s[TCG_REG_R0], stack, 8); break; + case 3: /* Int128 */ + memcpy(®s[TCG_REG_R0], stack, 16); + break; default: g_assert_not_reached(); } diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index e3b0ff303f..c1d34d7bd1 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -203,7 +203,7 @@ static const int tcg_target_call_iarg_regs[] = { }; static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) { tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); - tcg_debug_assert(slot >= 0 && slot < 64 / TCG_TARGET_REG_BITS); + tcg_debug_assert(slot >= 0 && slot < 128 / TCG_TARGET_REG_BITS); return TCG_REG_R0 + slot; } @@ -573,11 +573,11 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func, if (cif->rtype == &ffi_type_void) { which = 0; - } else if (cif->rtype->size == 4) { - which = 1; } else { - tcg_debug_assert(cif->rtype->size == 8); - which = 2; + tcg_debug_assert(cif->rtype->size == 4 || + cif->rtype->size == 8 || + cif->rtype->size == 16); + which = ctz32(cif->rtype->size) - 1; } new_pool_l2(s, 20, s->code_ptr, 0, (uintptr_t)func, (uintptr_t)cif); insn = deposit32(insn, 0, 8, INDEX_op_call); diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index 1414ab4d5b..7140a76a73 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -160,10 +160,13 @@ typedef enum { #if TCG_TARGET_REG_BITS == 32 # define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EVEN # define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN +# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN #else # define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL # define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL +# define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL #endif +#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL #define HAVE_TCG_QEMU_TB_EXEC #define TCG_TARGET_NEED_POOL_LABELS From 5427a9a76041029730775292995e87c3edd06515 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 20 Oct 2022 07:54:48 +1000 Subject: [PATCH 14/40] tcg: Add TCG_TARGET_CALL_{RET,ARG}_I128 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fill in the parameters for the host ABI for Int128 for those backends which require no extra modification. Reviewed-by: Alex Bennée Reviewed-by: Daniel Henrique Barboza Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.h | 2 ++ tcg/arm/tcg-target.h | 2 ++ tcg/loongarch64/tcg-target.h | 2 ++ tcg/mips/tcg-target.h | 2 ++ tcg/ppc/tcg-target.c.inc | 3 +++ tcg/riscv/tcg-target.h | 3 +++ tcg/s390x/tcg-target.h | 2 ++ tcg/sparc64/tcg-target.h | 2 ++ tcg/tcg.c | 6 +++--- 9 files changed, 21 insertions(+), 3 deletions(-) diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 8d244292aa..c0b0f614ba 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -54,6 +54,8 @@ typedef enum { #define TCG_TARGET_CALL_STACK_OFFSET 0 #define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL #define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL +#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN +#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL /* optional instructions */ #define TCG_TARGET_HAS_div_i32 1 diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 91b8954804..def2a189e6 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -91,6 +91,8 @@ extern bool use_neon_instructions; #define TCG_TARGET_CALL_STACK_OFFSET 0 #define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL #define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN +#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN +#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF /* optional instructions */ #define TCG_TARGET_HAS_ext8s_i32 1 diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h index 8b151e7f6f..17b8193aa5 100644 --- a/tcg/loongarch64/tcg-target.h +++ b/tcg/loongarch64/tcg-target.h @@ -92,6 +92,8 @@ typedef enum { #define TCG_TARGET_CALL_STACK_OFFSET 0 #define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL #define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL +#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL +#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL /* optional instructions */ #define TCG_TARGET_HAS_movcond_i32 1 diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 7bc8e15293..68b11e4d48 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -89,6 +89,8 @@ typedef enum { # define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL #endif #define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL +#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN +#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL /* MOVN/MOVZ instructions detection */ #if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \ diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index f3fec14118..afadf9a1e3 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -54,6 +54,9 @@ #else # define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL #endif +/* Note sysv arg alignment applies only to 2-word types, not more. */ +#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL +#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL /* For some memory operations, we need a scratch that isn't R0. For the AIX calling convention, we can re-use the TOC register since we'll be reloading diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h index 1337bc1f1e..0deb33701f 100644 --- a/tcg/riscv/tcg-target.h +++ b/tcg/riscv/tcg-target.h @@ -85,9 +85,12 @@ typedef enum { #define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL #if TCG_TARGET_REG_BITS == 32 #define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_EVEN +#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN #else #define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL +#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL #endif +#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL /* optional instructions */ #define TCG_TARGET_HAS_movcond_i32 0 diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h index e597e47e60..a05b473117 100644 --- a/tcg/s390x/tcg-target.h +++ b/tcg/s390x/tcg-target.h @@ -169,6 +169,8 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_CALL_STACK_OFFSET 160 #define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND #define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL +#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_BY_REF +#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF #define TCG_TARGET_HAS_MEMORY_BSWAP 1 diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h index 1d6a5c8b07..ffe22b1d21 100644 --- a/tcg/sparc64/tcg-target.h +++ b/tcg/sparc64/tcg-target.h @@ -73,6 +73,8 @@ typedef enum { #define TCG_TARGET_CALL_STACK_OFFSET (128 + 6*8 + TCG_TARGET_STACK_BIAS) #define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_EXTEND #define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL +#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL +#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL #if defined(__VIS__) && __VIS__ >= 0x300 #define use_vis3_instructions 1 diff --git a/tcg/tcg.c b/tcg/tcg.c index 865ed5ea0f..163913c95f 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -765,8 +765,8 @@ static void init_call_layout(TCGHelperInfo *info) break; case dh_typecode_i128: info->nr_out = 128 / TCG_TARGET_REG_BITS; - info->out_kind = TCG_CALL_RET_NORMAL; /* TODO */ - switch (/* TODO */ TCG_CALL_RET_NORMAL) { + info->out_kind = TCG_TARGET_CALL_RET_I128; + switch (TCG_TARGET_CALL_RET_I128) { case TCG_CALL_RET_NORMAL: /* Query the last register now to trigger any assert early. */ tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1); @@ -854,7 +854,7 @@ static void init_call_layout(TCGHelperInfo *info) break; case TCG_TYPE_I128: - switch (/* TODO */ TCG_CALL_ARG_NORMAL) { + switch (TCG_TARGET_CALL_ARG_I128) { case TCG_CALL_ARG_EVEN: layout_arg_even(&cum); /* fall through */ From 43eef72f41093ae4a94ffddc94aeef80a2fb5c69 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 20 Oct 2022 08:03:41 +1000 Subject: [PATCH 15/40] tcg: Add temp allocation for TCGv_i128 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This enables allocation of i128. The type is not yet usable, as we have not yet added data movement ops. Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- include/tcg/tcg.h | 32 +++++++++++++++++++++++++ tcg/tcg.c | 60 +++++++++++++++++++++++++++++++++-------------- 2 files changed, 74 insertions(+), 18 deletions(-) diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index 4d7e4107a9..59854f95b1 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -687,6 +687,11 @@ static inline TCGTemp *tcgv_i64_temp(TCGv_i64 v) return tcgv_i32_temp((TCGv_i32)v); } +static inline TCGTemp *tcgv_i128_temp(TCGv_i128 v) +{ + return tcgv_i32_temp((TCGv_i32)v); +} + static inline TCGTemp *tcgv_ptr_temp(TCGv_ptr v) { return tcgv_i32_temp((TCGv_i32)v); @@ -707,6 +712,11 @@ static inline TCGArg tcgv_i64_arg(TCGv_i64 v) return temp_arg(tcgv_i64_temp(v)); } +static inline TCGArg tcgv_i128_arg(TCGv_i128 v) +{ + return temp_arg(tcgv_i128_temp(v)); +} + static inline TCGArg tcgv_ptr_arg(TCGv_ptr v) { return temp_arg(tcgv_ptr_temp(v)); @@ -728,6 +738,11 @@ static inline TCGv_i64 temp_tcgv_i64(TCGTemp *t) return (TCGv_i64)temp_tcgv_i32(t); } +static inline TCGv_i128 temp_tcgv_i128(TCGTemp *t) +{ + return (TCGv_i128)temp_tcgv_i32(t); +} + static inline TCGv_ptr temp_tcgv_ptr(TCGTemp *t) { return (TCGv_ptr)temp_tcgv_i32(t); @@ -853,6 +868,11 @@ static inline void tcg_temp_free_i64(TCGv_i64 arg) tcg_temp_free_internal(tcgv_i64_temp(arg)); } +static inline void tcg_temp_free_i128(TCGv_i128 arg) +{ + tcg_temp_free_internal(tcgv_i128_temp(arg)); +} + static inline void tcg_temp_free_ptr(TCGv_ptr arg) { tcg_temp_free_internal(tcgv_ptr_temp(arg)); @@ -901,6 +921,18 @@ static inline TCGv_i64 tcg_temp_local_new_i64(void) return temp_tcgv_i64(t); } +static inline TCGv_i128 tcg_temp_new_i128(void) +{ + TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I128, false); + return temp_tcgv_i128(t); +} + +static inline TCGv_i128 tcg_temp_local_new_i128(void) +{ + TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I128, true); + return temp_tcgv_i128(t); +} + static inline TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t offset, const char *name) { diff --git a/tcg/tcg.c b/tcg/tcg.c index 163913c95f..a4a3da6804 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1273,26 +1273,45 @@ TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local) tcg_debug_assert(ts->base_type == type); tcg_debug_assert(ts->kind == kind); } else { + int i, n; + + switch (type) { + case TCG_TYPE_I32: + case TCG_TYPE_V64: + case TCG_TYPE_V128: + case TCG_TYPE_V256: + n = 1; + break; + case TCG_TYPE_I64: + n = 64 / TCG_TARGET_REG_BITS; + break; + case TCG_TYPE_I128: + n = 128 / TCG_TARGET_REG_BITS; + break; + default: + g_assert_not_reached(); + } + ts = tcg_temp_alloc(s); - if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) { - TCGTemp *ts2 = tcg_temp_alloc(s); + ts->base_type = type; + ts->temp_allocated = 1; + ts->kind = kind; - ts->base_type = type; - ts->type = TCG_TYPE_I32; - ts->temp_allocated = 1; - ts->kind = kind; - - tcg_debug_assert(ts2 == ts + 1); - ts2->base_type = TCG_TYPE_I64; - ts2->type = TCG_TYPE_I32; - ts2->temp_allocated = 1; - ts2->temp_subindex = 1; - ts2->kind = kind; - } else { - ts->base_type = type; + if (n == 1) { ts->type = type; - ts->temp_allocated = 1; - ts->kind = kind; + } else { + ts->type = TCG_TYPE_REG; + + for (i = 1; i < n; ++i) { + TCGTemp *ts2 = tcg_temp_alloc(s); + + tcg_debug_assert(ts2 == ts + i); + ts2->base_type = type; + ts2->type = TCG_TYPE_REG; + ts2->temp_allocated = 1; + ts2->temp_subindex = i; + ts2->kind = kind; + } } } @@ -3384,9 +3403,14 @@ static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) case TCG_TYPE_V64: align = 8; break; + case TCG_TYPE_I128: case TCG_TYPE_V128: case TCG_TYPE_V256: - /* Note that we do not require aligned storage for V256. */ + /* + * Note that we do not require aligned storage for V256, + * and that we provide alignment for I128 to match V128, + * even if that's above what the host ABI requires. + */ align = 16; break; default: From 4771e71c28eb0cece2a17a2d891bbd724bdc158d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 20 Oct 2022 08:00:51 +1000 Subject: [PATCH 16/40] tcg: Add basic data movement for TCGv_i128 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add code generation functions for data movement between TCGv_i128 (mov) and to/from TCGv_i64 (concat, extract). Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- include/tcg/tcg-op.h | 4 ++++ tcg/tcg-internal.h | 13 +++++++++++++ tcg/tcg-op.c | 20 ++++++++++++++++++++ 3 files changed, 37 insertions(+) diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h index 79b1cf786f..c4276767d1 100644 --- a/include/tcg/tcg-op.h +++ b/include/tcg/tcg-op.h @@ -712,6 +712,10 @@ void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg); void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg); void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg); +void tcg_gen_mov_i128(TCGv_i128 dst, TCGv_i128 src); +void tcg_gen_extr_i128_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i128 arg); +void tcg_gen_concat_i64_i128(TCGv_i128 ret, TCGv_i64 lo, TCGv_i64 hi); + static inline void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi) { tcg_gen_deposit_i64(ret, lo, hi, 32, 32); diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h index 33f1d8b411..e542a4e9b7 100644 --- a/tcg/tcg-internal.h +++ b/tcg/tcg-internal.h @@ -117,4 +117,17 @@ extern TCGv_i32 TCGV_LOW(TCGv_i64) QEMU_ERROR("32-bit code path is reachable"); extern TCGv_i32 TCGV_HIGH(TCGv_i64) QEMU_ERROR("32-bit code path is reachable"); #endif +static inline TCGv_i64 TCGV128_LOW(TCGv_i128 t) +{ + /* For 32-bit, offset by 2, which may then have TCGV_{LOW,HIGH} applied. */ + int o = HOST_BIG_ENDIAN ? 64 / TCG_TARGET_REG_BITS : 0; + return temp_tcgv_i64(tcgv_i128_temp(t) + o); +} + +static inline TCGv_i64 TCGV128_HIGH(TCGv_i128 t) +{ + int o = HOST_BIG_ENDIAN ? 0 : 64 / TCG_TARGET_REG_BITS; + return temp_tcgv_i64(tcgv_i128_temp(t) + o); +} + #endif /* TCG_INTERNAL_H */ diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 326a9180ef..cb83d2375d 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -2747,6 +2747,26 @@ void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg) tcg_gen_shri_i64(hi, arg, 32); } +void tcg_gen_extr_i128_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i128 arg) +{ + tcg_gen_mov_i64(lo, TCGV128_LOW(arg)); + tcg_gen_mov_i64(hi, TCGV128_HIGH(arg)); +} + +void tcg_gen_concat_i64_i128(TCGv_i128 ret, TCGv_i64 lo, TCGv_i64 hi) +{ + tcg_gen_mov_i64(TCGV128_LOW(ret), lo); + tcg_gen_mov_i64(TCGV128_HIGH(ret), hi); +} + +void tcg_gen_mov_i128(TCGv_i128 dst, TCGv_i128 src) +{ + if (dst != src) { + tcg_gen_mov_i64(TCGV128_LOW(dst), TCGV128_LOW(src)); + tcg_gen_mov_i64(TCGV128_HIGH(dst), TCGV128_HIGH(src)); + } +} + /* QEMU specific operations. */ void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx) From cb48f3654e290ee5d7cbf1fb31888463fa2a180c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 7 Nov 2022 19:48:14 +1100 Subject: [PATCH 17/40] tcg: Add guest load/store primitives for TCGv_i128 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These are not yet considering atomicity of the 16-byte value; this is a direct replacement for the current target code which uses a pair of 8-byte operations. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- accel/tcg/cputlb.c | 112 +++++++++++++++++++++++++++++++++ accel/tcg/user-exec.c | 66 ++++++++++++++++++++ include/exec/cpu_ldst.h | 10 +++ include/tcg/tcg-op.h | 2 + tcg/tcg-op.c | 134 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 324 insertions(+) diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 04e270742e..4812d83961 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -2192,6 +2192,64 @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr, return cpu_load_helper(env, addr, oi, ra, helper_le_ldq_mmu); } +Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr, + MemOpIdx oi, uintptr_t ra) +{ + MemOp mop = get_memop(oi); + int mmu_idx = get_mmuidx(oi); + MemOpIdx new_oi; + unsigned a_bits; + uint64_t h, l; + + tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_BE|MO_128)); + a_bits = get_alignment_bits(mop); + + /* Handle CPU specific unaligned behaviour */ + if (addr & ((1 << a_bits) - 1)) { + cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_LOAD, + mmu_idx, ra); + } + + /* Construct an unaligned 64-bit replacement MemOpIdx. */ + mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN; + new_oi = make_memop_idx(mop, mmu_idx); + + h = helper_be_ldq_mmu(env, addr, new_oi, ra); + l = helper_be_ldq_mmu(env, addr + 8, new_oi, ra); + + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R); + return int128_make128(l, h); +} + +Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr, + MemOpIdx oi, uintptr_t ra) +{ + MemOp mop = get_memop(oi); + int mmu_idx = get_mmuidx(oi); + MemOpIdx new_oi; + unsigned a_bits; + uint64_t h, l; + + tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_LE|MO_128)); + a_bits = get_alignment_bits(mop); + + /* Handle CPU specific unaligned behaviour */ + if (addr & ((1 << a_bits) - 1)) { + cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_LOAD, + mmu_idx, ra); + } + + /* Construct an unaligned 64-bit replacement MemOpIdx. */ + mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN; + new_oi = make_memop_idx(mop, mmu_idx); + + l = helper_le_ldq_mmu(env, addr, new_oi, ra); + h = helper_le_ldq_mmu(env, addr + 8, new_oi, ra); + + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R); + return int128_make128(l, h); +} + /* * Store Helpers */ @@ -2546,6 +2604,60 @@ void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val, cpu_store_helper(env, addr, val, oi, retaddr, helper_le_stq_mmu); } +void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val, + MemOpIdx oi, uintptr_t ra) +{ + MemOp mop = get_memop(oi); + int mmu_idx = get_mmuidx(oi); + MemOpIdx new_oi; + unsigned a_bits; + + tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_BE|MO_128)); + a_bits = get_alignment_bits(mop); + + /* Handle CPU specific unaligned behaviour */ + if (addr & ((1 << a_bits) - 1)) { + cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, + mmu_idx, ra); + } + + /* Construct an unaligned 64-bit replacement MemOpIdx. */ + mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN; + new_oi = make_memop_idx(mop, mmu_idx); + + helper_be_stq_mmu(env, addr, int128_gethi(val), new_oi, ra); + helper_be_stq_mmu(env, addr + 8, int128_getlo(val), new_oi, ra); + + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W); +} + +void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val, + MemOpIdx oi, uintptr_t ra) +{ + MemOp mop = get_memop(oi); + int mmu_idx = get_mmuidx(oi); + MemOpIdx new_oi; + unsigned a_bits; + + tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_LE|MO_128)); + a_bits = get_alignment_bits(mop); + + /* Handle CPU specific unaligned behaviour */ + if (addr & ((1 << a_bits) - 1)) { + cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE, + mmu_idx, ra); + } + + /* Construct an unaligned 64-bit replacement MemOpIdx. */ + mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN; + new_oi = make_memop_idx(mop, mmu_idx); + + helper_le_stq_mmu(env, addr, int128_getlo(val), new_oi, ra); + helper_le_stq_mmu(env, addr + 8, int128_gethi(val), new_oi, ra); + + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W); +} + #include "ldst_common.c.inc" /* diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index a8eb63ab96..ae67d84638 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -1031,6 +1031,42 @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr, return ret; } +Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr, + MemOpIdx oi, uintptr_t ra) +{ + void *haddr; + Int128 ret; + + validate_memop(oi, MO_128 | MO_BE); + haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD); + memcpy(&ret, haddr, 16); + clear_helper_retaddr(); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R); + + if (!HOST_BIG_ENDIAN) { + ret = bswap128(ret); + } + return ret; +} + +Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr, + MemOpIdx oi, uintptr_t ra) +{ + void *haddr; + Int128 ret; + + validate_memop(oi, MO_128 | MO_LE); + haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD); + memcpy(&ret, haddr, 16); + clear_helper_retaddr(); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R); + + if (HOST_BIG_ENDIAN) { + ret = bswap128(ret); + } + return ret; +} + void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val, MemOpIdx oi, uintptr_t ra) { @@ -1115,6 +1151,36 @@ void cpu_stq_le_mmu(CPUArchState *env, abi_ptr addr, uint64_t val, qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W); } +void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, + Int128 val, MemOpIdx oi, uintptr_t ra) +{ + void *haddr; + + validate_memop(oi, MO_128 | MO_BE); + haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE); + if (!HOST_BIG_ENDIAN) { + val = bswap128(val); + } + memcpy(haddr, &val, 16); + clear_helper_retaddr(); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W); +} + +void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, + Int128 val, MemOpIdx oi, uintptr_t ra) +{ + void *haddr; + + validate_memop(oi, MO_128 | MO_LE); + haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE); + if (HOST_BIG_ENDIAN) { + val = bswap128(val); + } + memcpy(haddr, &val, 16); + clear_helper_retaddr(); + qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W); +} + uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr ptr) { uint32_t ret; diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h index d0c7c0d5fe..09b55cc0ee 100644 --- a/include/exec/cpu_ldst.h +++ b/include/exec/cpu_ldst.h @@ -220,6 +220,11 @@ uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr ptr, uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr ptr, MemOpIdx oi, uintptr_t ra); +Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr, + MemOpIdx oi, uintptr_t ra); +Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr, + MemOpIdx oi, uintptr_t ra); + void cpu_stb_mmu(CPUArchState *env, abi_ptr ptr, uint8_t val, MemOpIdx oi, uintptr_t ra); void cpu_stw_be_mmu(CPUArchState *env, abi_ptr ptr, uint16_t val, @@ -235,6 +240,11 @@ void cpu_stl_le_mmu(CPUArchState *env, abi_ptr ptr, uint32_t val, void cpu_stq_le_mmu(CPUArchState *env, abi_ptr ptr, uint64_t val, MemOpIdx oi, uintptr_t ra); +void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val, + MemOpIdx oi, uintptr_t ra); +void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val, + MemOpIdx oi, uintptr_t ra); + uint32_t cpu_atomic_cmpxchgb_mmu(CPUArchState *env, target_ulong addr, uint32_t cmpv, uint32_t newv, MemOpIdx oi, uintptr_t retaddr); diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h index c4276767d1..e5f5b63c37 100644 --- a/include/tcg/tcg-op.h +++ b/include/tcg/tcg-op.h @@ -845,6 +845,8 @@ void tcg_gen_qemu_ld_i32(TCGv_i32, TCGv, TCGArg, MemOp); void tcg_gen_qemu_st_i32(TCGv_i32, TCGv, TCGArg, MemOp); void tcg_gen_qemu_ld_i64(TCGv_i64, TCGv, TCGArg, MemOp); void tcg_gen_qemu_st_i64(TCGv_i64, TCGv, TCGArg, MemOp); +void tcg_gen_qemu_ld_i128(TCGv_i128, TCGv, TCGArg, MemOp); +void tcg_gen_qemu_st_i128(TCGv_i128, TCGv, TCGArg, MemOp); static inline void tcg_gen_qemu_ld8u(TCGv ret, TCGv addr, int mem_index) { diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index cb83d2375d..33ef325f6e 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -3109,6 +3109,140 @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop) } } +static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig) +{ + MemOp mop_1 = orig, mop_2; + + tcg_debug_assert((orig & MO_SIZE) == MO_128); + tcg_debug_assert((orig & MO_SIGN) == 0); + + /* Use a memory ordering implemented by the host. */ + if (!TCG_TARGET_HAS_MEMORY_BSWAP && (orig & MO_BSWAP)) { + mop_1 &= ~MO_BSWAP; + } + + /* Reduce the size to 64-bit. */ + mop_1 = (mop_1 & ~MO_SIZE) | MO_64; + + /* Retain the alignment constraints of the original. */ + switch (orig & MO_AMASK) { + case MO_UNALN: + case MO_ALIGN_2: + case MO_ALIGN_4: + mop_2 = mop_1; + break; + case MO_ALIGN_8: + /* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */ + mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN; + mop_2 = mop_1; + break; + case MO_ALIGN: + /* Second has 8-byte alignment; first has 16-byte alignment. */ + mop_2 = mop_1; + mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16; + break; + case MO_ALIGN_16: + case MO_ALIGN_32: + case MO_ALIGN_64: + /* Second has 8-byte alignment; first retains original. */ + mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN; + break; + default: + g_assert_not_reached(); + } + ret[0] = mop_1; + ret[1] = mop_2; +} + +void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop) +{ + MemOp mop[2]; + TCGv addr_p8; + TCGv_i64 x, y; + + canonicalize_memop_i128_as_i64(mop, memop); + + tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); + addr = plugin_prep_mem_callbacks(addr); + + /* TODO: respect atomicity of the operation. */ + /* TODO: allow the tcg backend to see the whole operation. */ + + /* + * Since there are no global TCGv_i128, there is no visible state + * changed if the second load faults. Load directly into the two + * subwords. + */ + if ((memop & MO_BSWAP) == MO_LE) { + x = TCGV128_LOW(val); + y = TCGV128_HIGH(val); + } else { + x = TCGV128_HIGH(val); + y = TCGV128_LOW(val); + } + + gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx); + + if ((mop[0] ^ memop) & MO_BSWAP) { + tcg_gen_bswap64_i64(x, x); + } + + addr_p8 = tcg_temp_new(); + tcg_gen_addi_tl(addr_p8, addr, 8); + gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx); + tcg_temp_free(addr_p8); + + if ((mop[0] ^ memop) & MO_BSWAP) { + tcg_gen_bswap64_i64(y, y); + } + + plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx), + QEMU_PLUGIN_MEM_R); +} + +void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop) +{ + MemOp mop[2]; + TCGv addr_p8; + TCGv_i64 x, y; + + canonicalize_memop_i128_as_i64(mop, memop); + + tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST); + addr = plugin_prep_mem_callbacks(addr); + + /* TODO: respect atomicity of the operation. */ + /* TODO: allow the tcg backend to see the whole operation. */ + + if ((memop & MO_BSWAP) == MO_LE) { + x = TCGV128_LOW(val); + y = TCGV128_HIGH(val); + } else { + x = TCGV128_HIGH(val); + y = TCGV128_LOW(val); + } + + addr_p8 = tcg_temp_new(); + if ((mop[0] ^ memop) & MO_BSWAP) { + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_bswap64_i64(t, x); + gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx); + tcg_gen_bswap64_i64(t, y); + tcg_gen_addi_tl(addr_p8, addr, 8); + gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx); + tcg_temp_free_i64(t); + } else { + gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx); + tcg_gen_addi_tl(addr_p8, addr, 8); + gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx); + } + tcg_temp_free(addr_p8); + + plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx), + QEMU_PLUGIN_MEM_W); +} + static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc) { switch (opc & MO_SSIZE) { From 123ae5683c9e7815857304fd2f21664621c90a13 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 9 Nov 2022 00:23:44 +1100 Subject: [PATCH 18/40] tcg: Add tcg_gen_{non}atomic_cmpxchg_i128 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will allow targets to avoid rolling their own. Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/atomic_common.c.inc | 45 +++++++++++++++++++ accel/tcg/tcg-runtime.h | 11 +++++ include/tcg/tcg-op.h | 5 +++ tcg/tcg-op.c | 85 +++++++++++++++++++++++++++++++++++ 4 files changed, 146 insertions(+) diff --git a/accel/tcg/atomic_common.c.inc b/accel/tcg/atomic_common.c.inc index 6602d7689f..8f2ce43ee6 100644 --- a/accel/tcg/atomic_common.c.inc +++ b/accel/tcg/atomic_common.c.inc @@ -55,8 +55,53 @@ CMPXCHG_HELPER(cmpxchgq_be, uint64_t) CMPXCHG_HELPER(cmpxchgq_le, uint64_t) #endif +#ifdef CONFIG_CMPXCHG128 +CMPXCHG_HELPER(cmpxchgo_be, Int128) +CMPXCHG_HELPER(cmpxchgo_le, Int128) +#endif + #undef CMPXCHG_HELPER +Int128 HELPER(nonatomic_cmpxchgo_be)(CPUArchState *env, target_ulong addr, + Int128 cmpv, Int128 newv, uint32_t oi) +{ +#if TCG_TARGET_REG_BITS == 32 + uintptr_t ra = GETPC(); + Int128 oldv; + + oldv = cpu_ld16_be_mmu(env, addr, oi, ra); + if (int128_eq(oldv, cmpv)) { + cpu_st16_be_mmu(env, addr, newv, oi, ra); + } else { + /* Even with comparison failure, still need a write cycle. */ + probe_write(env, addr, 16, get_mmuidx(oi), ra); + } + return oldv; +#else + g_assert_not_reached(); +#endif +} + +Int128 HELPER(nonatomic_cmpxchgo_le)(CPUArchState *env, target_ulong addr, + Int128 cmpv, Int128 newv, uint32_t oi) +{ +#if TCG_TARGET_REG_BITS == 32 + uintptr_t ra = GETPC(); + Int128 oldv; + + oldv = cpu_ld16_le_mmu(env, addr, oi, ra); + if (int128_eq(oldv, cmpv)) { + cpu_st16_le_mmu(env, addr, newv, oi, ra); + } else { + /* Even with comparison failure, still need a write cycle. */ + probe_write(env, addr, 16, get_mmuidx(oi), ra); + } + return oldv; +#else + g_assert_not_reached(); +#endif +} + #define ATOMIC_HELPER(OP, TYPE) \ TYPE HELPER(glue(atomic_,OP))(CPUArchState *env, target_ulong addr, \ TYPE val, uint32_t oi) \ diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h index 37cbd722bf..e141a6ab24 100644 --- a/accel/tcg/tcg-runtime.h +++ b/accel/tcg/tcg-runtime.h @@ -55,6 +55,17 @@ DEF_HELPER_FLAGS_5(atomic_cmpxchgq_be, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_5(atomic_cmpxchgq_le, TCG_CALL_NO_WG, i64, env, tl, i64, i64, i32) #endif +#ifdef CONFIG_CMPXCHG128 +DEF_HELPER_FLAGS_5(atomic_cmpxchgo_be, TCG_CALL_NO_WG, + i128, env, tl, i128, i128, i32) +DEF_HELPER_FLAGS_5(atomic_cmpxchgo_le, TCG_CALL_NO_WG, + i128, env, tl, i128, i128, i32) +#endif + +DEF_HELPER_FLAGS_5(nonatomic_cmpxchgo_be, TCG_CALL_NO_WG, + i128, env, tl, i128, i128, i32) +DEF_HELPER_FLAGS_5(nonatomic_cmpxchgo_le, TCG_CALL_NO_WG, + i128, env, tl, i128, i128, i32) #ifdef CONFIG_ATOMIC64 #define GEN_ATOMIC_HELPERS(NAME) \ diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h index e5f5b63c37..31bf3d287e 100644 --- a/include/tcg/tcg-op.h +++ b/include/tcg/tcg-op.h @@ -907,6 +907,11 @@ void tcg_gen_atomic_cmpxchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGv_i32, TCGArg, MemOp); void tcg_gen_atomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64, TCGArg, MemOp); +void tcg_gen_atomic_cmpxchg_i128(TCGv_i128, TCGv, TCGv_i128, TCGv_i128, + TCGArg, MemOp); + +void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128, TCGv, TCGv_i128, TCGv_i128, + TCGArg, MemOp); void tcg_gen_atomic_xchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp); void tcg_gen_atomic_xchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp); diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 33ef325f6e..5811ecd3e7 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -3295,6 +3295,8 @@ typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv, TCGv_i32, TCGv_i32, TCGv_i32); typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv, TCGv_i64, TCGv_i64, TCGv_i32); +typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv, + TCGv_i128, TCGv_i128, TCGv_i32); typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv, TCGv_i32, TCGv_i32); typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv, @@ -3305,6 +3307,11 @@ typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv, #else # define WITH_ATOMIC64(X) #endif +#ifdef CONFIG_CMPXCHG128 +# define WITH_ATOMIC128(X) X, +#else +# define WITH_ATOMIC128(X) +#endif static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = { [MO_8] = gen_helper_atomic_cmpxchgb, @@ -3314,6 +3321,8 @@ static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = { [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be, WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le) WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be) + WITH_ATOMIC128([MO_128 | MO_LE] = gen_helper_atomic_cmpxchgo_le) + WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be) }; void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv, @@ -3412,6 +3421,82 @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv, } } +void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv, + TCGv_i128 newv, TCGArg idx, MemOp memop) +{ + if (TCG_TARGET_REG_BITS == 32) { + /* Inline expansion below is simply too large for 32-bit hosts. */ + gen_atomic_cx_i128 gen = ((memop & MO_BSWAP) == MO_LE + ? gen_helper_nonatomic_cmpxchgo_le + : gen_helper_nonatomic_cmpxchgo_be); + MemOpIdx oi = make_memop_idx(memop, idx); + + tcg_debug_assert((memop & MO_SIZE) == MO_128); + tcg_debug_assert((memop & MO_SIGN) == 0); + + gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi)); + } else { + TCGv_i128 oldv = tcg_temp_new_i128(); + TCGv_i128 tmpv = tcg_temp_new_i128(); + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 z = tcg_constant_i64(0); + + tcg_gen_qemu_ld_i128(oldv, addr, idx, memop); + + /* Compare i128 */ + tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv)); + tcg_gen_xor_i64(t1, TCGV128_HIGH(oldv), TCGV128_HIGH(cmpv)); + tcg_gen_or_i64(t0, t0, t1); + + /* tmpv = equal ? newv : oldv */ + tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_LOW(tmpv), t0, z, + TCGV128_LOW(newv), TCGV128_LOW(oldv)); + tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_HIGH(tmpv), t0, z, + TCGV128_HIGH(newv), TCGV128_HIGH(oldv)); + + /* Unconditional writeback. */ + tcg_gen_qemu_st_i128(tmpv, addr, idx, memop); + tcg_gen_mov_i128(retv, oldv); + + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + tcg_temp_free_i128(tmpv); + tcg_temp_free_i128(oldv); + } +} + +void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv, + TCGv_i128 newv, TCGArg idx, MemOp memop) +{ + gen_atomic_cx_i128 gen; + + if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) { + tcg_gen_nonatomic_cmpxchg_i128(retv, addr, cmpv, newv, idx, memop); + return; + } + + tcg_debug_assert((memop & MO_SIZE) == MO_128); + tcg_debug_assert((memop & MO_SIGN) == 0); + gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)]; + + if (gen) { + MemOpIdx oi = make_memop_idx(memop, idx); + gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi)); + return; + } + + gen_helper_exit_atomic(cpu_env); + + /* + * Produce a result for a well-formed opcode stream. This satisfies + * liveness for set before used, which happens before this dead code + * is removed. + */ + tcg_gen_movi_i64(TCGV128_LOW(retv), 0); + tcg_gen_movi_i64(TCGV128_HIGH(retv), 0); +} + static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop, bool new_val, void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32)) From d1beee4da1dbbc0ce1bc42b38752366eed4babec Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 10 Nov 2022 16:07:04 +1000 Subject: [PATCH 19/40] tcg: Split out tcg_gen_nonatomic_cmpxchg_i{32,64} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Normally this is automatically handled by the CF_PARALLEL checks with in tcg_gen_atomic_cmpxchg_i{32,64}, but x86 has a special case of !PREFIX_LOCK where it always wants the non-atomic version. Split these out so that x86 does not have to roll its own. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- include/tcg/tcg-op.h | 4 ++ tcg/tcg-op.c | 154 +++++++++++++++++++++++++++---------------- 2 files changed, 101 insertions(+), 57 deletions(-) diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h index 31bf3d287e..839d91c0c7 100644 --- a/include/tcg/tcg-op.h +++ b/include/tcg/tcg-op.h @@ -910,6 +910,10 @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64, void tcg_gen_atomic_cmpxchg_i128(TCGv_i128, TCGv, TCGv_i128, TCGv_i128, TCGArg, MemOp); +void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGv_i32, + TCGArg, MemOp); +void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64, + TCGArg, MemOp); void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128, TCGv, TCGv_i128, TCGv_i128, TCGArg, MemOp); diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 5811ecd3e7..c581ae77c4 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -3325,82 +3325,122 @@ static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = { WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be) }; +void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv, + TCGv_i32 newv, TCGArg idx, MemOp memop) +{ + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + + tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE); + + tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN); + tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1); + tcg_gen_qemu_st_i32(t2, addr, idx, memop); + tcg_temp_free_i32(t2); + + if (memop & MO_SIGN) { + tcg_gen_ext_i32(retv, t1, memop); + } else { + tcg_gen_mov_i32(retv, t1); + } + tcg_temp_free_i32(t1); +} + void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv, TCGv_i32 newv, TCGArg idx, MemOp memop) { - memop = tcg_canonicalize_memop(memop, 0, 0); + gen_atomic_cx_i32 gen; + MemOpIdx oi; if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) { - TCGv_i32 t1 = tcg_temp_new_i32(); - TCGv_i32 t2 = tcg_temp_new_i32(); - - tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE); - - tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN); - tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1); - tcg_gen_qemu_st_i32(t2, addr, idx, memop); - tcg_temp_free_i32(t2); - - if (memop & MO_SIGN) { - tcg_gen_ext_i32(retv, t1, memop); - } else { - tcg_gen_mov_i32(retv, t1); - } - tcg_temp_free_i32(t1); - } else { - gen_atomic_cx_i32 gen; - MemOpIdx oi; - - gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)]; - tcg_debug_assert(gen != NULL); - - oi = make_memop_idx(memop & ~MO_SIGN, idx); - gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi)); - - if (memop & MO_SIGN) { - tcg_gen_ext_i32(retv, retv, memop); - } + tcg_gen_nonatomic_cmpxchg_i32(retv, addr, cmpv, newv, idx, memop); + return; } + + memop = tcg_canonicalize_memop(memop, 0, 0); + gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)]; + tcg_debug_assert(gen != NULL); + + oi = make_memop_idx(memop & ~MO_SIGN, idx); + gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi)); + + if (memop & MO_SIGN) { + tcg_gen_ext_i32(retv, retv, memop); + } +} + +void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv, + TCGv_i64 newv, TCGArg idx, MemOp memop) +{ + TCGv_i64 t1, t2; + + if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) { + tcg_gen_nonatomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv), + TCGV_LOW(newv), idx, memop); + if (memop & MO_SIGN) { + tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31); + } else { + tcg_gen_movi_i32(TCGV_HIGH(retv), 0); + } + return; + } + + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); + + tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE); + + tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN); + tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1); + tcg_gen_qemu_st_i64(t2, addr, idx, memop); + tcg_temp_free_i64(t2); + + if (memop & MO_SIGN) { + tcg_gen_ext_i64(retv, t1, memop); + } else { + tcg_gen_mov_i64(retv, t1); + } + tcg_temp_free_i64(t1); } void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv, TCGv_i64 newv, TCGArg idx, MemOp memop) { - memop = tcg_canonicalize_memop(memop, 1, 0); - if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) { - TCGv_i64 t1 = tcg_temp_new_i64(); - TCGv_i64 t2 = tcg_temp_new_i64(); + tcg_gen_nonatomic_cmpxchg_i64(retv, addr, cmpv, newv, idx, memop); + return; + } - tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE); - - tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN); - tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1); - tcg_gen_qemu_st_i64(t2, addr, idx, memop); - tcg_temp_free_i64(t2); - - if (memop & MO_SIGN) { - tcg_gen_ext_i64(retv, t1, memop); - } else { - tcg_gen_mov_i64(retv, t1); - } - tcg_temp_free_i64(t1); - } else if ((memop & MO_SIZE) == MO_64) { -#ifdef CONFIG_ATOMIC64 + if ((memop & MO_SIZE) == MO_64) { gen_atomic_cx_i64 gen; - MemOpIdx oi; + memop = tcg_canonicalize_memop(memop, 1, 0); gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)]; - tcg_debug_assert(gen != NULL); + if (gen) { + MemOpIdx oi = make_memop_idx(memop, idx); + gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi)); + return; + } - oi = make_memop_idx(memop, idx); - gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi)); -#else gen_helper_exit_atomic(cpu_env); - /* Produce a result, so that we have a well-formed opcode stream - with respect to uses of the result in the (dead) code following. */ + + /* + * Produce a result for a well-formed opcode stream. This satisfies + * liveness for set before used, which happens before this dead code + * is removed. + */ tcg_gen_movi_i64(retv, 0); -#endif /* CONFIG_ATOMIC64 */ + return; + } + + if (TCG_TARGET_REG_BITS == 32) { + tcg_gen_atomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv), + TCGV_LOW(newv), idx, memop); + if (memop & MO_SIGN) { + tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31); + } else { + tcg_gen_movi_i32(TCGV_HIGH(retv), 0); + } } else { TCGv_i32 c32 = tcg_temp_new_i32(); TCGv_i32 n32 = tcg_temp_new_i32(); From 546789c7df8866c55cae8d3195e8e58328a35d51 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 12 Nov 2022 14:25:54 +1000 Subject: [PATCH 20/40] target/arm: Use tcg_gen_atomic_cmpxchg_i128 for STXP Signed-off-by: Richard Henderson Reviewed-by: Peter Maydell Message-Id: <20221112042555.2622152-2-richard.henderson@linaro.org> --- target/arm/helper-a64.c | 104 ------------------------------------- target/arm/helper-a64.h | 6 --- target/arm/translate-a64.c | 60 ++++++++++++--------- 3 files changed, 35 insertions(+), 135 deletions(-) diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c index 77a8502b6b..7dbdb2c233 100644 --- a/target/arm/helper-a64.c +++ b/target/arm/helper-a64.c @@ -505,110 +505,6 @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) return crc32c(acc, buf, bytes) ^ 0xffffffff; } -uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi) -{ - Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high); - Int128 newv = int128_make128(new_lo, new_hi); - Int128 oldv; - uintptr_t ra = GETPC(); - uint64_t o0, o1; - bool success; - int mem_idx = cpu_mmu_index(env, false); - MemOpIdx oi0 = make_memop_idx(MO_LEUQ | MO_ALIGN_16, mem_idx); - MemOpIdx oi1 = make_memop_idx(MO_LEUQ, mem_idx); - - o0 = cpu_ldq_le_mmu(env, addr + 0, oi0, ra); - o1 = cpu_ldq_le_mmu(env, addr + 8, oi1, ra); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - cpu_stq_le_mmu(env, addr + 0, int128_getlo(newv), oi1, ra); - cpu_stq_le_mmu(env, addr + 8, int128_gethi(newv), oi1, ra); - } - - return !success; -} - -uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi) -{ - Int128 oldv, cmpv, newv; - uintptr_t ra = GETPC(); - bool success; - int mem_idx; - MemOpIdx oi; - - assert(HAVE_CMPXCHG128); - - mem_idx = cpu_mmu_index(env, false); - oi = make_memop_idx(MO_LE | MO_128 | MO_ALIGN, mem_idx); - - cmpv = int128_make128(env->exclusive_val, env->exclusive_high); - newv = int128_make128(new_lo, new_hi); - oldv = cpu_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); - - success = int128_eq(oldv, cmpv); - return !success; -} - -uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi) -{ - /* - * High and low need to be switched here because this is not actually a - * 128bit store but two doublewords stored consecutively - */ - Int128 cmpv = int128_make128(env->exclusive_high, env->exclusive_val); - Int128 newv = int128_make128(new_hi, new_lo); - Int128 oldv; - uintptr_t ra = GETPC(); - uint64_t o0, o1; - bool success; - int mem_idx = cpu_mmu_index(env, false); - MemOpIdx oi0 = make_memop_idx(MO_BEUQ | MO_ALIGN_16, mem_idx); - MemOpIdx oi1 = make_memop_idx(MO_BEUQ, mem_idx); - - o1 = cpu_ldq_be_mmu(env, addr + 0, oi0, ra); - o0 = cpu_ldq_be_mmu(env, addr + 8, oi1, ra); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - cpu_stq_be_mmu(env, addr + 0, int128_gethi(newv), oi1, ra); - cpu_stq_be_mmu(env, addr + 8, int128_getlo(newv), oi1, ra); - } - - return !success; -} - -uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi) -{ - Int128 oldv, cmpv, newv; - uintptr_t ra = GETPC(); - bool success; - int mem_idx; - MemOpIdx oi; - - assert(HAVE_CMPXCHG128); - - mem_idx = cpu_mmu_index(env, false); - oi = make_memop_idx(MO_BE | MO_128 | MO_ALIGN, mem_idx); - - /* - * High and low need to be switched here because this is not actually a - * 128bit store but two doublewords stored consecutively - */ - cmpv = int128_make128(env->exclusive_high, env->exclusive_val); - newv = int128_make128(new_hi, new_lo); - oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); - - success = int128_eq(oldv, cmpv); - return !success; -} - /* Writes back the old data into Rs. */ void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, uint64_t new_lo, uint64_t new_hi) diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h index 7b706571bb..94065d1917 100644 --- a/target/arm/helper-a64.h +++ b/target/arm/helper-a64.h @@ -50,12 +50,6 @@ DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, ptr) DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env) DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32) DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32) -DEF_HELPER_FLAGS_4(paired_cmpxchg64_le, TCG_CALL_NO_WG, i64, env, i64, i64, i64) -DEF_HELPER_FLAGS_4(paired_cmpxchg64_le_parallel, TCG_CALL_NO_WG, - i64, env, i64, i64, i64) -DEF_HELPER_FLAGS_4(paired_cmpxchg64_be, TCG_CALL_NO_WG, i64, env, i64, i64, i64) -DEF_HELPER_FLAGS_4(paired_cmpxchg64_be_parallel, TCG_CALL_NO_WG, - i64, env, i64, i64, i64) DEF_HELPER_5(casp_le_parallel, void, env, i32, i64, i64, i64) DEF_HELPER_5(casp_be_parallel, void, env, i32, i64, i64, i64) DEF_HELPER_FLAGS_3(advsimd_maxh, TCG_CALL_NO_RWG, f16, f16, f16, ptr) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index bbfadb7c2e..951b64c9b1 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -2601,32 +2601,42 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, get_mem_index(s), MO_64 | MO_ALIGN | s->be_data); tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); - } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { - if (!HAVE_CMPXCHG128) { - gen_helper_exit_atomic(cpu_env); - /* - * Produce a result so we have a well-formed opcode - * stream when the following (dead) code uses 'tmp'. - * TCG will remove the dead ops for us. - */ - tcg_gen_movi_i64(tmp, 0); - } else if (s->be_data == MO_LE) { - gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env, - cpu_exclusive_addr, - cpu_reg(s, rt), - cpu_reg(s, rt2)); - } else { - gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env, - cpu_exclusive_addr, - cpu_reg(s, rt), - cpu_reg(s, rt2)); - } - } else if (s->be_data == MO_LE) { - gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr, - cpu_reg(s, rt), cpu_reg(s, rt2)); } else { - gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr, - cpu_reg(s, rt), cpu_reg(s, rt2)); + TCGv_i128 t16 = tcg_temp_new_i128(); + TCGv_i128 c16 = tcg_temp_new_i128(); + TCGv_i64 a, b; + + if (s->be_data == MO_LE) { + tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2)); + tcg_gen_concat_i64_i128(c16, cpu_exclusive_val, + cpu_exclusive_high); + } else { + tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt)); + tcg_gen_concat_i64_i128(c16, cpu_exclusive_high, + cpu_exclusive_val); + } + + tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16, + get_mem_index(s), + MO_128 | MO_ALIGN | s->be_data); + tcg_temp_free_i128(c16); + + a = tcg_temp_new_i64(); + b = tcg_temp_new_i64(); + if (s->be_data == MO_LE) { + tcg_gen_extr_i128_i64(a, b, t16); + } else { + tcg_gen_extr_i128_i64(b, a, t16); + } + + tcg_gen_xor_i64(a, a, cpu_exclusive_val); + tcg_gen_xor_i64(b, b, cpu_exclusive_high); + tcg_gen_or_i64(tmp, a, b); + tcg_temp_free_i64(a); + tcg_temp_free_i64(b); + tcg_temp_free_i128(t16); + + tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0); } } else { tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, From 9c32396debee91a87867abc562bb8e2b458c958a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 12 Nov 2022 14:25:55 +1000 Subject: [PATCH 21/40] target/arm: Use tcg_gen_atomic_cmpxchg_i128 for CASP Signed-off-by: Richard Henderson Reviewed-by: Peter Maydell Message-Id: <20221112042555.2622152-3-richard.henderson@linaro.org> --- target/arm/helper-a64.c | 43 --------------------------- target/arm/helper-a64.h | 2 -- target/arm/translate-a64.c | 61 +++++++++++--------------------------- 3 files changed, 18 insertions(+), 88 deletions(-) diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c index 7dbdb2c233..0972a4bdd0 100644 --- a/target/arm/helper-a64.c +++ b/target/arm/helper-a64.c @@ -505,49 +505,6 @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) return crc32c(acc, buf, bytes) ^ 0xffffffff; } -/* Writes back the old data into Rs. */ -void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, - uint64_t new_lo, uint64_t new_hi) -{ - Int128 oldv, cmpv, newv; - uintptr_t ra = GETPC(); - int mem_idx; - MemOpIdx oi; - - assert(HAVE_CMPXCHG128); - - mem_idx = cpu_mmu_index(env, false); - oi = make_memop_idx(MO_LE | MO_128 | MO_ALIGN, mem_idx); - - cmpv = int128_make128(env->xregs[rs], env->xregs[rs + 1]); - newv = int128_make128(new_lo, new_hi); - oldv = cpu_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); - - env->xregs[rs] = int128_getlo(oldv); - env->xregs[rs + 1] = int128_gethi(oldv); -} - -void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, - uint64_t new_hi, uint64_t new_lo) -{ - Int128 oldv, cmpv, newv; - uintptr_t ra = GETPC(); - int mem_idx; - MemOpIdx oi; - - assert(HAVE_CMPXCHG128); - - mem_idx = cpu_mmu_index(env, false); - oi = make_memop_idx(MO_LE | MO_128 | MO_ALIGN, mem_idx); - - cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]); - newv = int128_make128(new_lo, new_hi); - oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); - - env->xregs[rs + 1] = int128_getlo(oldv); - env->xregs[rs] = int128_gethi(oldv); -} - /* * AdvSIMD half-precision */ diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h index 94065d1917..ff56807247 100644 --- a/target/arm/helper-a64.h +++ b/target/arm/helper-a64.h @@ -50,8 +50,6 @@ DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, ptr) DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env) DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32) DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32) -DEF_HELPER_5(casp_le_parallel, void, env, i32, i64, i64, i64) -DEF_HELPER_5(casp_be_parallel, void, env, i32, i64, i64, i64) DEF_HELPER_FLAGS_3(advsimd_maxh, TCG_CALL_NO_RWG, f16, f16, f16, ptr) DEF_HELPER_FLAGS_3(advsimd_minh, TCG_CALL_NO_RWG, f16, f16, f16, ptr) DEF_HELPER_FLAGS_3(advsimd_maxnumh, TCG_CALL_NO_RWG, f16, f16, f16, ptr) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 951b64c9b1..da9f877476 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -2709,53 +2709,28 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, tcg_gen_extr32_i64(s2, s1, cmp); } tcg_temp_free_i64(cmp); - } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { - if (HAVE_CMPXCHG128) { - TCGv_i32 tcg_rs = tcg_constant_i32(rs); - if (s->be_data == MO_LE) { - gen_helper_casp_le_parallel(cpu_env, tcg_rs, - clean_addr, t1, t2); - } else { - gen_helper_casp_be_parallel(cpu_env, tcg_rs, - clean_addr, t1, t2); - } - } else { - gen_helper_exit_atomic(cpu_env); - s->base.is_jmp = DISAS_NORETURN; - } } else { - TCGv_i64 d1 = tcg_temp_new_i64(); - TCGv_i64 d2 = tcg_temp_new_i64(); - TCGv_i64 a2 = tcg_temp_new_i64(); - TCGv_i64 c1 = tcg_temp_new_i64(); - TCGv_i64 c2 = tcg_temp_new_i64(); - TCGv_i64 zero = tcg_constant_i64(0); + TCGv_i128 cmp = tcg_temp_new_i128(); + TCGv_i128 val = tcg_temp_new_i128(); - /* Load the two words, in memory order. */ - tcg_gen_qemu_ld_i64(d1, clean_addr, memidx, - MO_64 | MO_ALIGN_16 | s->be_data); - tcg_gen_addi_i64(a2, clean_addr, 8); - tcg_gen_qemu_ld_i64(d2, a2, memidx, MO_64 | s->be_data); + if (s->be_data == MO_LE) { + tcg_gen_concat_i64_i128(val, t1, t2); + tcg_gen_concat_i64_i128(cmp, s1, s2); + } else { + tcg_gen_concat_i64_i128(val, t2, t1); + tcg_gen_concat_i64_i128(cmp, s2, s1); + } - /* Compare the two words, also in memory order. */ - tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1); - tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2); - tcg_gen_and_i64(c2, c2, c1); + tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, + MO_128 | MO_ALIGN | s->be_data); + tcg_temp_free_i128(val); - /* If compare equal, write back new data, else write back old data. */ - tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1); - tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2); - tcg_gen_qemu_st_i64(c1, clean_addr, memidx, MO_64 | s->be_data); - tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data); - tcg_temp_free_i64(a2); - tcg_temp_free_i64(c1); - tcg_temp_free_i64(c2); - - /* Write back the data from memory to Rs. */ - tcg_gen_mov_i64(s1, d1); - tcg_gen_mov_i64(s2, d2); - tcg_temp_free_i64(d1); - tcg_temp_free_i64(d2); + if (s->be_data == MO_LE) { + tcg_gen_extr_i128_i64(s1, s2, cmp); + } else { + tcg_gen_extr_i128_i64(s2, s1, cmp); + } + tcg_temp_free_i128(cmp); } } From 894448ae7dce4269c4b3c152a7091520317ea397 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 12 Nov 2022 16:11:22 +1000 Subject: [PATCH 22/40] target/ppc: Use tcg_gen_atomic_cmpxchg_i128 for STQCX Note that the previous direct reference to reserve_val, - tcg_gen_ld_i64(t1, cpu_env, (ctx->le_mode - ? offsetof(CPUPPCState, reserve_val2) - : offsetof(CPUPPCState, reserve_val))); was incorrect because all references should have gone through cpu_reserve_val. Create a cpu_reserve_val2 tcg temp to fix this. Signed-off-by: Richard Henderson Reviewed-by: Daniel Henrique Barboza Message-Id: <20221112061122.2720163-2-richard.henderson@linaro.org> --- target/ppc/helper.h | 2 - target/ppc/mem_helper.c | 44 ----------------- target/ppc/translate.c | 102 ++++++++++++++++++---------------------- 3 files changed, 47 insertions(+), 101 deletions(-) diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 8dd22a35e4..0beaca5c7a 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -818,6 +818,4 @@ DEF_HELPER_FLAGS_5(stq_le_parallel, TCG_CALL_NO_WG, void, env, tl, i64, i64, i32) DEF_HELPER_FLAGS_5(stq_be_parallel, TCG_CALL_NO_WG, void, env, tl, i64, i64, i32) -DEF_HELPER_5(stqcx_le_parallel, i32, env, tl, i64, i64, i32) -DEF_HELPER_5(stqcx_be_parallel, i32, env, tl, i64, i64, i32) #endif diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c index d1163f316c..1578887a8f 100644 --- a/target/ppc/mem_helper.c +++ b/target/ppc/mem_helper.c @@ -413,50 +413,6 @@ void helper_stq_be_parallel(CPUPPCState *env, target_ulong addr, val = int128_make128(lo, hi); cpu_atomic_sto_be_mmu(env, addr, val, opidx, GETPC()); } - -uint32_t helper_stqcx_le_parallel(CPUPPCState *env, target_ulong addr, - uint64_t new_lo, uint64_t new_hi, - uint32_t opidx) -{ - bool success = false; - - /* We will have raised EXCP_ATOMIC from the translator. */ - assert(HAVE_CMPXCHG128); - - if (likely(addr == env->reserve_addr)) { - Int128 oldv, cmpv, newv; - - cmpv = int128_make128(env->reserve_val2, env->reserve_val); - newv = int128_make128(new_lo, new_hi); - oldv = cpu_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, - opidx, GETPC()); - success = int128_eq(oldv, cmpv); - } - env->reserve_addr = -1; - return env->so + success * CRF_EQ_BIT; -} - -uint32_t helper_stqcx_be_parallel(CPUPPCState *env, target_ulong addr, - uint64_t new_lo, uint64_t new_hi, - uint32_t opidx) -{ - bool success = false; - - /* We will have raised EXCP_ATOMIC from the translator. */ - assert(HAVE_CMPXCHG128); - - if (likely(addr == env->reserve_addr)) { - Int128 oldv, cmpv, newv; - - cmpv = int128_make128(env->reserve_val2, env->reserve_val); - newv = int128_make128(new_lo, new_hi); - oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, - opidx, GETPC()); - success = int128_eq(oldv, cmpv); - } - env->reserve_addr = -1; - return env->so + success * CRF_EQ_BIT; -} #endif /*****************************************************************************/ diff --git a/target/ppc/translate.c b/target/ppc/translate.c index edb3daa9b5..1c17d5a558 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -72,6 +72,7 @@ static TCGv cpu_cfar; static TCGv cpu_xer, cpu_so, cpu_ov, cpu_ca, cpu_ov32, cpu_ca32; static TCGv cpu_reserve; static TCGv cpu_reserve_val; +static TCGv cpu_reserve_val2; static TCGv cpu_fpscr; static TCGv_i32 cpu_access_type; @@ -141,8 +142,11 @@ void ppc_translate_init(void) offsetof(CPUPPCState, reserve_addr), "reserve_addr"); cpu_reserve_val = tcg_global_mem_new(cpu_env, - offsetof(CPUPPCState, reserve_val), - "reserve_val"); + offsetof(CPUPPCState, reserve_val), + "reserve_val"); + cpu_reserve_val2 = tcg_global_mem_new(cpu_env, + offsetof(CPUPPCState, reserve_val2), + "reserve_val2"); cpu_fpscr = tcg_global_mem_new(cpu_env, offsetof(CPUPPCState, fpscr), "fpscr"); @@ -3998,78 +4002,66 @@ static void gen_lqarx(DisasContext *ctx) /* stqcx. */ static void gen_stqcx_(DisasContext *ctx) { + TCGLabel *lab_fail, *lab_over; int rs = rS(ctx->opcode); - TCGv EA, hi, lo; + TCGv EA, t0, t1; + TCGv_i128 cmp, val; if (unlikely(rs & 1)) { gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL); return; } + lab_fail = gen_new_label(); + lab_over = gen_new_label(); + gen_set_access_type(ctx, ACCESS_RES); EA = tcg_temp_new(); gen_addr_reg_index(ctx, EA); + tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, lab_fail); + tcg_temp_free(EA); + + cmp = tcg_temp_new_i128(); + val = tcg_temp_new_i128(); + + tcg_gen_concat_i64_i128(cmp, cpu_reserve_val2, cpu_reserve_val); + /* Note that the low part is always in RS+1, even in LE mode. */ - lo = cpu_gpr[rs + 1]; - hi = cpu_gpr[rs]; + tcg_gen_concat_i64_i128(val, cpu_gpr[rs + 1], cpu_gpr[rs]); - if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { - if (HAVE_CMPXCHG128) { - TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_128) | MO_ALIGN); - if (ctx->le_mode) { - gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env, - EA, lo, hi, oi); - } else { - gen_helper_stqcx_be_parallel(cpu_crf[0], cpu_env, - EA, lo, hi, oi); - } - tcg_temp_free_i32(oi); - } else { - /* Restart with exclusive lock. */ - gen_helper_exit_atomic(cpu_env); - ctx->base.is_jmp = DISAS_NORETURN; - } - tcg_temp_free(EA); - } else { - TCGLabel *lab_fail = gen_new_label(); - TCGLabel *lab_over = gen_new_label(); - TCGv_i64 t0 = tcg_temp_new_i64(); - TCGv_i64 t1 = tcg_temp_new_i64(); + tcg_gen_atomic_cmpxchg_i128(val, cpu_reserve, cmp, val, ctx->mem_idx, + DEF_MEMOP(MO_128 | MO_ALIGN)); + tcg_temp_free_i128(cmp); - tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, lab_fail); - tcg_temp_free(EA); + t0 = tcg_temp_new(); + t1 = tcg_temp_new(); + tcg_gen_extr_i128_i64(t1, t0, val); + tcg_temp_free_i128(val); - gen_qemu_ld64_i64(ctx, t0, cpu_reserve); - tcg_gen_ld_i64(t1, cpu_env, (ctx->le_mode - ? offsetof(CPUPPCState, reserve_val2) - : offsetof(CPUPPCState, reserve_val))); - tcg_gen_brcond_i64(TCG_COND_NE, t0, t1, lab_fail); + tcg_gen_xor_tl(t1, t1, cpu_reserve_val2); + tcg_gen_xor_tl(t0, t0, cpu_reserve_val); + tcg_gen_or_tl(t0, t0, t1); + tcg_temp_free(t1); - tcg_gen_addi_i64(t0, cpu_reserve, 8); - gen_qemu_ld64_i64(ctx, t0, t0); - tcg_gen_ld_i64(t1, cpu_env, (ctx->le_mode - ? offsetof(CPUPPCState, reserve_val) - : offsetof(CPUPPCState, reserve_val2))); - tcg_gen_brcond_i64(TCG_COND_NE, t0, t1, lab_fail); + tcg_gen_setcondi_tl(TCG_COND_EQ, t0, t0, 0); + tcg_gen_shli_tl(t0, t0, CRF_EQ_BIT); + tcg_gen_or_tl(t0, t0, cpu_so); + tcg_gen_trunc_tl_i32(cpu_crf[0], t0); + tcg_temp_free(t0); - /* Success */ - gen_qemu_st64_i64(ctx, ctx->le_mode ? lo : hi, cpu_reserve); - tcg_gen_addi_i64(t0, cpu_reserve, 8); - gen_qemu_st64_i64(ctx, ctx->le_mode ? hi : lo, t0); + tcg_gen_br(lab_over); + gen_set_label(lab_fail); - tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so); - tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], CRF_EQ); - tcg_gen_br(lab_over); + /* + * Address mismatch implies failure. But we still need to provide + * the memory barrier semantics of the instruction. + */ + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); + tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so); - gen_set_label(lab_fail); - tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so); - - gen_set_label(lab_over); - tcg_gen_movi_tl(cpu_reserve, -1); - tcg_temp_free_i64(t0); - tcg_temp_free_i64(t1); - } + gen_set_label(lab_over); + tcg_gen_movi_tl(cpu_reserve, -1); } #endif /* defined(TARGET_PPC64) */ From 29b8de001f8ea2f36d4de5a250d1150492311529 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 1 Nov 2022 12:13:00 +0100 Subject: [PATCH 23/40] tests/tcg/s390x: Add div.c Add a basic test to prevent regressions. Signed-off-by: Ilya Leoshkevich Message-Id: <20221101111300.2539919-1-iii@linux.ibm.com> Signed-off-by: Richard Henderson --- tests/tcg/s390x/Makefile.target | 1 + tests/tcg/s390x/div.c | 40 +++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 tests/tcg/s390x/div.c diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target index 07fcc6d0ce..ab7a3bcfb2 100644 --- a/tests/tcg/s390x/Makefile.target +++ b/tests/tcg/s390x/Makefile.target @@ -24,6 +24,7 @@ TESTS+=trap TESTS+=signals-s390x TESTS+=branch-relative-long TESTS+=noexec +TESTS+=div Z13_TESTS=vistr $(Z13_TESTS): CFLAGS+=-march=z13 -O2 diff --git a/tests/tcg/s390x/div.c b/tests/tcg/s390x/div.c new file mode 100644 index 0000000000..5807295614 --- /dev/null +++ b/tests/tcg/s390x/div.c @@ -0,0 +1,40 @@ +#include +#include + +static void test_dr(void) +{ + register int32_t r0 asm("r0") = -1; + register int32_t r1 asm("r1") = -4241; + int32_t b = 101, q, r; + + asm("dr %[r0],%[b]" + : [r0] "+r" (r0), [r1] "+r" (r1) + : [b] "r" (b) + : "cc"); + q = r1; + r = r0; + assert(q == -41); + assert(r == -100); +} + +static void test_dlr(void) +{ + register uint32_t r0 asm("r0") = 0; + register uint32_t r1 asm("r1") = 4243; + uint32_t b = 101, q, r; + + asm("dlr %[r0],%[b]" + : [r0] "+r" (r0), [r1] "+r" (r1) + : [b] "r" (b) + : "cc"); + q = r1; + r = r0; + assert(q == 42); + assert(r == 1); +} + +int main(void) +{ + test_dr(); + test_dlr(); +} From c432198ab09205d06ba9cf53cb4610d5fae22aa7 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 25 Oct 2022 23:30:08 +0200 Subject: [PATCH 24/40] tests/tcg/s390x: Add clst.c Add a basic test to prevent regressions. Signed-off-by: Ilya Leoshkevich Message-Id: <20221025213008.2209006-2-iii@linux.ibm.com> Signed-off-by: Richard Henderson --- tests/tcg/s390x/Makefile.target | 1 + tests/tcg/s390x/clst.c | 82 +++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 tests/tcg/s390x/clst.c diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target index ab7a3bcfb2..79250f31dd 100644 --- a/tests/tcg/s390x/Makefile.target +++ b/tests/tcg/s390x/Makefile.target @@ -25,6 +25,7 @@ TESTS+=signals-s390x TESTS+=branch-relative-long TESTS+=noexec TESTS+=div +TESTS+=clst Z13_TESTS=vistr $(Z13_TESTS): CFLAGS+=-march=z13 -O2 diff --git a/tests/tcg/s390x/clst.c b/tests/tcg/s390x/clst.c new file mode 100644 index 0000000000..ed2fe7326c --- /dev/null +++ b/tests/tcg/s390x/clst.c @@ -0,0 +1,82 @@ +#define _GNU_SOURCE +#include +#include + +static int clst(char sep, const char **s1, const char **s2) +{ + const char *r1 = *s1; + const char *r2 = *s2; + int cc; + + do { + register int r0 asm("r0") = sep; + + asm("clst %[r1],%[r2]\n" + "ipm %[cc]\n" + "srl %[cc],28" + : [r1] "+r" (r1), [r2] "+r" (r2), "+r" (r0), [cc] "=r" (cc) + : + : "cc"); + *s1 = r1; + *s2 = r2; + } while (cc == 3); + + return cc; +} + +static const struct test { + const char *name; + char sep; + const char *s1; + const char *s2; + int exp_cc; + int exp_off; +} tests[] = { + { + .name = "cc0", + .sep = 0, + .s1 = "aa", + .s2 = "aa", + .exp_cc = 0, + .exp_off = 0, + }, + { + .name = "cc1", + .sep = 1, + .s1 = "a\x01", + .s2 = "aa\x01", + .exp_cc = 1, + .exp_off = 1, + }, + { + .name = "cc2", + .sep = 2, + .s1 = "abc\x02", + .s2 = "abb\x02", + .exp_cc = 2, + .exp_off = 2, + }, +}; + +int main(void) +{ + const struct test *t; + const char *s1, *s2; + size_t i; + int cc; + + for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) { + t = &tests[i]; + s1 = t->s1; + s2 = t->s2; + cc = clst(t->sep, &s1, &s2); + if (cc != t->exp_cc || + s1 != t->s1 + t->exp_off || + s2 != t->s2 + t->exp_off) { + fprintf(stderr, "%s\n", t->name); + return EXIT_FAILURE; + } + } + + return EXIT_SUCCESS; +} From 521d38ec9b4da82576dfcd3c5b6a2172cda25736 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 21 Oct 2022 16:09:30 +1000 Subject: [PATCH 25/40] tests/tcg/s390x: Add long-double.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acked-by: Ilya Leoshkevich Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tests/tcg/s390x/Makefile.target | 1 + tests/tcg/s390x/long-double.c | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 tests/tcg/s390x/long-double.c diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target index 79250f31dd..1d454270c0 100644 --- a/tests/tcg/s390x/Makefile.target +++ b/tests/tcg/s390x/Makefile.target @@ -26,6 +26,7 @@ TESTS+=branch-relative-long TESTS+=noexec TESTS+=div TESTS+=clst +TESTS+=long-double Z13_TESTS=vistr $(Z13_TESTS): CFLAGS+=-march=z13 -O2 diff --git a/tests/tcg/s390x/long-double.c b/tests/tcg/s390x/long-double.c new file mode 100644 index 0000000000..757a6262fd --- /dev/null +++ b/tests/tcg/s390x/long-double.c @@ -0,0 +1,24 @@ +/* + * Perform some basic arithmetic with long double, as a sanity check. + * With small integral numbers, we can cross-check with integers. + */ + +#include + +int main() +{ + int i, j; + + for (i = 1; i < 5; i++) { + for (j = 1; j < 5; j++) { + long double la = (long double)i + j; + long double lm = (long double)i * j; + long double ls = (long double)i - j; + + assert(la == i + j); + assert(lm == i * j); + assert(ls == i - j); + } + } + return 0; +} From 82f6584c9b1345489a6446b3bc4086e00e8d67d1 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 1 Feb 2023 14:32:57 +0100 Subject: [PATCH 26/40] tests/tcg/s390x: Add cdsg.c Add a simple test to prevent regressions. Signed-off-by: Ilya Leoshkevich Message-Id: <20230201133257.3223115-1-iii@linux.ibm.com> Signed-off-by: Richard Henderson --- tests/tcg/s390x/Makefile.target | 4 ++ tests/tcg/s390x/cdsg.c | 93 +++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+) create mode 100644 tests/tcg/s390x/cdsg.c diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target index 1d454270c0..72ad309b27 100644 --- a/tests/tcg/s390x/Makefile.target +++ b/tests/tcg/s390x/Makefile.target @@ -27,6 +27,10 @@ TESTS+=noexec TESTS+=div TESTS+=clst TESTS+=long-double +TESTS+=cdsg + +cdsg: CFLAGS+=-pthread +cdsg: LDFLAGS+=-pthread Z13_TESTS=vistr $(Z13_TESTS): CFLAGS+=-march=z13 -O2 diff --git a/tests/tcg/s390x/cdsg.c b/tests/tcg/s390x/cdsg.c new file mode 100644 index 0000000000..800618ff4b --- /dev/null +++ b/tests/tcg/s390x/cdsg.c @@ -0,0 +1,93 @@ +/* + * Test CDSG instruction. + * + * Increment the first half of aligned_quadword by 1, and the second half by 2 + * from 2 threads. Verify that the result is consistent. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#include +#include +#include +#include + +static volatile bool start; +typedef unsigned long aligned_quadword[2] __attribute__((__aligned__(16))); +static aligned_quadword val; +static const int n_iterations = 1000000; + +static inline int cdsg(unsigned long *orig0, unsigned long *orig1, + unsigned long new0, unsigned long new1, + aligned_quadword *mem) +{ + register unsigned long r0 asm("r0"); + register unsigned long r1 asm("r1"); + register unsigned long r2 asm("r2"); + register unsigned long r3 asm("r3"); + int cc; + + r0 = *orig0; + r1 = *orig1; + r2 = new0; + r3 = new1; + asm("cdsg %[r0],%[r2],%[db2]\n" + "ipm %[cc]" + : [r0] "+r" (r0) + , [r1] "+r" (r1) + , [db2] "+m" (*mem) + , [cc] "=r" (cc) + : [r2] "r" (r2) + , [r3] "r" (r3) + : "cc"); + *orig0 = r0; + *orig1 = r1; + + return (cc >> 28) & 3; +} + +void *cdsg_loop(void *arg) +{ + unsigned long orig0, orig1, new0, new1; + int cc; + int i; + + while (!start) { + } + + orig0 = val[0]; + orig1 = val[1]; + for (i = 0; i < n_iterations;) { + new0 = orig0 + 1; + new1 = orig1 + 2; + + cc = cdsg(&orig0, &orig1, new0, new1, &val); + + if (cc == 0) { + orig0 = new0; + orig1 = new1; + i++; + } else { + assert(cc == 1); + } + } + + return NULL; +} + +int main(void) +{ + pthread_t thread; + int ret; + + ret = pthread_create(&thread, NULL, cdsg_loop, NULL); + assert(ret == 0); + start = true; + cdsg_loop(NULL); + ret = pthread_join(thread, NULL); + assert(ret == 0); + + assert(val[0] == n_iterations * 2); + assert(val[1] == n_iterations * 4); + + return EXIT_SUCCESS; +} From 6d28ff406c71c2c6f8a79edb1ccfc17978aa95fb Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 20 Oct 2022 08:18:59 +1000 Subject: [PATCH 27/40] target/s390x: Use a single return for helper_divs32/u32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pack the quotient and remainder into a single uint64_t. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: David Hildenbrand Signed-off-by: Richard Henderson --- v2: Fix operand ordering; use tcg_extr32_i64. --- target/s390x/helper.h | 2 +- target/s390x/tcg/int_helper.c | 26 +++++++++++++------------- target/s390x/tcg/translate.c | 8 ++++---- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/target/s390x/helper.h b/target/s390x/helper.h index 93923ca153..bc828d976b 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -10,7 +10,7 @@ DEF_HELPER_FLAGS_4(clc, TCG_CALL_NO_WG, i32, env, i32, i64, i64) DEF_HELPER_3(mvcl, i32, env, i32, i32) DEF_HELPER_3(clcl, i32, env, i32, i32) DEF_HELPER_FLAGS_4(clm, TCG_CALL_NO_WG, i32, env, i32, i32, i64) -DEF_HELPER_FLAGS_3(divs32, TCG_CALL_NO_WG, s64, env, s64, s64) +DEF_HELPER_FLAGS_3(divs32, TCG_CALL_NO_WG, i64, env, s64, s64) DEF_HELPER_FLAGS_3(divu32, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(divs64, TCG_CALL_NO_WG, s64, env, s64, s64) DEF_HELPER_FLAGS_4(divu64, TCG_CALL_NO_WG, i64, env, i64, i64, i64) diff --git a/target/s390x/tcg/int_helper.c b/target/s390x/tcg/int_helper.c index 954542388a..7260583cf2 100644 --- a/target/s390x/tcg/int_helper.c +++ b/target/s390x/tcg/int_helper.c @@ -34,45 +34,45 @@ #endif /* 64/32 -> 32 signed division */ -int64_t HELPER(divs32)(CPUS390XState *env, int64_t a, int64_t b64) +uint64_t HELPER(divs32)(CPUS390XState *env, int64_t a, int64_t b64) { - int32_t ret, b = b64; - int64_t q; + int32_t b = b64; + int64_t q, r; if (b == 0) { tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC()); } - ret = q = a / b; - env->retxl = a % b; + q = a / b; + r = a % b; /* Catch non-representable quotient. */ - if (ret != q) { + if (q != (int32_t)q) { tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC()); } - return ret; + return deposit64(q, 32, 32, r); } /* 64/32 -> 32 unsigned division */ uint64_t HELPER(divu32)(CPUS390XState *env, uint64_t a, uint64_t b64) { - uint32_t ret, b = b64; - uint64_t q; + uint32_t b = b64; + uint64_t q, r; if (b == 0) { tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC()); } - ret = q = a / b; - env->retxl = a % b; + q = a / b; + r = a % b; /* Catch non-representable quotient. */ - if (ret != q) { + if (q != (uint32_t)q) { tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC()); } - return ret; + return deposit64(q, 32, 32, r); } /* 64/64 -> 64 signed division */ diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c index a339b277e9..169f7ee1b2 100644 --- a/target/s390x/tcg/translate.c +++ b/target/s390x/tcg/translate.c @@ -2395,15 +2395,15 @@ static DisasJumpType op_diag(DisasContext *s, DisasOps *o) static DisasJumpType op_divs32(DisasContext *s, DisasOps *o) { - gen_helper_divs32(o->out2, cpu_env, o->in1, o->in2); - return_low128(o->out); + gen_helper_divs32(o->out, cpu_env, o->in1, o->in2); + tcg_gen_extr32_i64(o->out2, o->out, o->out); return DISAS_NEXT; } static DisasJumpType op_divu32(DisasContext *s, DisasOps *o) { - gen_helper_divu32(o->out2, cpu_env, o->in1, o->in2); - return_low128(o->out); + gen_helper_divu32(o->out, cpu_env, o->in1, o->in2); + tcg_gen_extr32_i64(o->out2, o->out, o->out); return DISAS_NEXT; } From 4e5712f9037c34bbd9ffd78baa9d1ebea13a430d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 20 Oct 2022 09:08:52 +1000 Subject: [PATCH 28/40] target/s390x: Use a single return for helper_divs64/u64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pack the quotient and remainder into a single Int128. Use the divu128 primitive to remove the cpu_abort on 32-bit hosts. Reviewed-by: Philippe Mathieu-Daudé Acked-by: Ilya Leoshkevich Signed-off-by: Richard Henderson --- v2: Extended div test case to cover these insns. --- target/s390x/helper.h | 4 ++-- target/s390x/tcg/int_helper.c | 38 +++++++++-------------------------- target/s390x/tcg/translate.c | 14 +++++++++---- tests/tcg/s390x/div.c | 35 ++++++++++++++++++++++++++++++++ 4 files changed, 56 insertions(+), 35 deletions(-) diff --git a/target/s390x/helper.h b/target/s390x/helper.h index bc828d976b..593f3c8bee 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -12,8 +12,8 @@ DEF_HELPER_3(clcl, i32, env, i32, i32) DEF_HELPER_FLAGS_4(clm, TCG_CALL_NO_WG, i32, env, i32, i32, i64) DEF_HELPER_FLAGS_3(divs32, TCG_CALL_NO_WG, i64, env, s64, s64) DEF_HELPER_FLAGS_3(divu32, TCG_CALL_NO_WG, i64, env, i64, i64) -DEF_HELPER_FLAGS_3(divs64, TCG_CALL_NO_WG, s64, env, s64, s64) -DEF_HELPER_FLAGS_4(divu64, TCG_CALL_NO_WG, i64, env, i64, i64, i64) +DEF_HELPER_FLAGS_3(divs64, TCG_CALL_NO_WG, i128, env, s64, s64) +DEF_HELPER_FLAGS_4(divu64, TCG_CALL_NO_WG, i128, env, i64, i64, i64) DEF_HELPER_3(srst, void, env, i32, i32) DEF_HELPER_3(srstu, void, env, i32, i32) DEF_HELPER_4(clst, i64, env, i64, i64, i64) diff --git a/target/s390x/tcg/int_helper.c b/target/s390x/tcg/int_helper.c index 7260583cf2..eb8e6dd1b5 100644 --- a/target/s390x/tcg/int_helper.c +++ b/target/s390x/tcg/int_helper.c @@ -76,46 +76,26 @@ uint64_t HELPER(divu32)(CPUS390XState *env, uint64_t a, uint64_t b64) } /* 64/64 -> 64 signed division */ -int64_t HELPER(divs64)(CPUS390XState *env, int64_t a, int64_t b) +Int128 HELPER(divs64)(CPUS390XState *env, int64_t a, int64_t b) { /* Catch divide by zero, and non-representable quotient (MIN / -1). */ if (b == 0 || (b == -1 && a == (1ll << 63))) { tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC()); } - env->retxl = a % b; - return a / b; + return int128_make128(a / b, a % b); } /* 128 -> 64/64 unsigned division */ -uint64_t HELPER(divu64)(CPUS390XState *env, uint64_t ah, uint64_t al, - uint64_t b) +Int128 HELPER(divu64)(CPUS390XState *env, uint64_t ah, uint64_t al, uint64_t b) { - uint64_t ret; - /* Signal divide by zero. */ - if (b == 0) { - tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC()); - } - if (ah == 0) { - /* 64 -> 64/64 case */ - env->retxl = al % b; - ret = al / b; - } else { - /* ??? Move i386 idivq helper to host-utils. */ -#ifdef CONFIG_INT128 - __uint128_t a = ((__uint128_t)ah << 64) | al; - __uint128_t q = a / b; - env->retxl = a % b; - ret = q; - if (ret != q) { - tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC()); + if (b != 0) { + uint64_t r = divu128(&al, &ah, b); + if (ah == 0) { + return int128_make128(al, r); } -#else - /* 32-bit hosts would need special wrapper functionality - just abort if - we encounter such a case; it's very unlikely anyways. */ - cpu_abort(env_cpu(env), "128 -> 64/64 division not implemented\n"); -#endif } - return ret; + /* divide by zero or overflow */ + tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC()); } uint64_t HELPER(cvd)(int32_t reg) diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c index 169f7ee1b2..6953b81de7 100644 --- a/target/s390x/tcg/translate.c +++ b/target/s390x/tcg/translate.c @@ -2409,15 +2409,21 @@ static DisasJumpType op_divu32(DisasContext *s, DisasOps *o) static DisasJumpType op_divs64(DisasContext *s, DisasOps *o) { - gen_helper_divs64(o->out2, cpu_env, o->in1, o->in2); - return_low128(o->out); + TCGv_i128 t = tcg_temp_new_i128(); + + gen_helper_divs64(t, cpu_env, o->in1, o->in2); + tcg_gen_extr_i128_i64(o->out2, o->out, t); + tcg_temp_free_i128(t); return DISAS_NEXT; } static DisasJumpType op_divu64(DisasContext *s, DisasOps *o) { - gen_helper_divu64(o->out2, cpu_env, o->out, o->out2, o->in2); - return_low128(o->out); + TCGv_i128 t = tcg_temp_new_i128(); + + gen_helper_divu64(t, cpu_env, o->out, o->out2, o->in2); + tcg_gen_extr_i128_i64(o->out2, o->out, t); + tcg_temp_free_i128(t); return DISAS_NEXT; } diff --git a/tests/tcg/s390x/div.c b/tests/tcg/s390x/div.c index 5807295614..6ad9900e08 100644 --- a/tests/tcg/s390x/div.c +++ b/tests/tcg/s390x/div.c @@ -33,8 +33,43 @@ static void test_dlr(void) assert(r == 1); } +static void test_dsgr(void) +{ + register int64_t r0 asm("r0") = -1; + register int64_t r1 asm("r1") = -4241; + int64_t b = 101, q, r; + + asm("dsgr %[r0],%[b]" + : [r0] "+r" (r0), [r1] "+r" (r1) + : [b] "r" (b) + : "cc"); + q = r1; + r = r0; + assert(q == -41); + assert(r == -100); +} + +static void test_dlgr(void) +{ + register uint64_t r0 asm("r0") = 0; + register uint64_t r1 asm("r1") = 4243; + uint64_t b = 101, q, r; + + asm("dlgr %[r0],%[b]" + : [r0] "+r" (r0), [r1] "+r" (r1) + : [b] "r" (b) + : "cc"); + q = r1; + r = r0; + assert(q == 42); + assert(r == 1); +} + int main(void) { test_dr(); test_dlr(); + test_dsgr(); + test_dlgr(); + return 0; } From b71dd2a51e898ee91bee3e23708e8d4d14ac6812 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 21 Oct 2022 11:46:06 +1000 Subject: [PATCH 29/40] target/s390x: Use Int128 for return from CLST MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Acked-by: Ilya Leoshkevich Signed-off-by: Richard Henderson --- target/s390x/helper.h | 2 +- target/s390x/tcg/mem_helper.c | 11 ++++------- target/s390x/tcg/translate.c | 8 ++++++-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/target/s390x/helper.h b/target/s390x/helper.h index 593f3c8bee..25c2dd0b3c 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -16,7 +16,7 @@ DEF_HELPER_FLAGS_3(divs64, TCG_CALL_NO_WG, i128, env, s64, s64) DEF_HELPER_FLAGS_4(divu64, TCG_CALL_NO_WG, i128, env, i64, i64, i64) DEF_HELPER_3(srst, void, env, i32, i32) DEF_HELPER_3(srstu, void, env, i32, i32) -DEF_HELPER_4(clst, i64, env, i64, i64, i64) +DEF_HELPER_4(clst, i128, env, i64, i64, i64) DEF_HELPER_FLAGS_4(mvn, TCG_CALL_NO_WG, void, env, i32, i64, i64) DEF_HELPER_FLAGS_4(mvo, TCG_CALL_NO_WG, void, env, i32, i64, i64) DEF_HELPER_FLAGS_4(mvpg, TCG_CALL_NO_WG, i32, env, i64, i32, i32) diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c index cb82cd1c1d..9be42851d8 100644 --- a/target/s390x/tcg/mem_helper.c +++ b/target/s390x/tcg/mem_helper.c @@ -886,7 +886,7 @@ void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2) } /* unsigned string compare (c is string terminator) */ -uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2) +Int128 HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2) { uintptr_t ra = GETPC(); uint32_t len; @@ -904,23 +904,20 @@ uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2) if (v1 == c) { /* Equal. CC=0, and don't advance the registers. */ env->cc_op = 0; - env->retxl = s2; - return s1; + return int128_make128(s2, s1); } } else { /* Unequal. CC={1,2}, and advance the registers. Note that the terminator need not be zero, but the string that contains the terminator is by definition "low". */ env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2); - env->retxl = s2 + len; - return s1 + len; + return int128_make128(s2 + len, s1 + len); } } /* CPU-determined bytes equal; advance the registers. */ env->cc_op = 3; - env->retxl = s2 + len; - return s1 + len; + return int128_make128(s2 + len, s1 + len); } /* move page */ diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c index 6953b81de7..8397fe2bd8 100644 --- a/target/s390x/tcg/translate.c +++ b/target/s390x/tcg/translate.c @@ -2164,9 +2164,13 @@ static DisasJumpType op_clm(DisasContext *s, DisasOps *o) static DisasJumpType op_clst(DisasContext *s, DisasOps *o) { - gen_helper_clst(o->in1, cpu_env, regs[0], o->in1, o->in2); + TCGv_i128 pair = tcg_temp_new_i128(); + + gen_helper_clst(pair, cpu_env, regs[0], o->in1, o->in2); + tcg_gen_extr_i128_i64(o->in2, o->in1, pair); + tcg_temp_free_i128(pair); + set_cc_static(s); - return_low128(o->in2); return DISAS_NEXT; } From c91192245ac32c219ba698e3ca5e976cbed3359b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 21 Oct 2022 11:51:10 +1000 Subject: [PATCH 30/40] target/s390x: Use Int128 for return from CKSM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acked-by: Ilya Leoshkevich Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/s390x/helper.h | 2 +- target/s390x/tcg/mem_helper.c | 7 +++---- target/s390x/tcg/translate.c | 6 ++++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/target/s390x/helper.h b/target/s390x/helper.h index 25c2dd0b3c..03b29efa3e 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -103,7 +103,7 @@ DEF_HELPER_4(tre, i64, env, i64, i64, i64) DEF_HELPER_4(trt, i32, env, i32, i64, i64) DEF_HELPER_4(trtr, i32, env, i32, i64, i64) DEF_HELPER_5(trXX, i32, env, i32, i32, i32, i32) -DEF_HELPER_4(cksm, i64, env, i64, i64, i64) +DEF_HELPER_4(cksm, i128, env, i64, i64, i64) DEF_HELPER_FLAGS_5(calc_cc, TCG_CALL_NO_RWG_SE, i32, env, i32, i64, i64, i64) DEF_HELPER_FLAGS_2(sfpc, TCG_CALL_NO_WG, void, env, i64) DEF_HELPER_FLAGS_2(sfas, TCG_CALL_NO_WG, void, env, i64) diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c index 9be42851d8..b0b403e23a 100644 --- a/target/s390x/tcg/mem_helper.c +++ b/target/s390x/tcg/mem_helper.c @@ -1350,8 +1350,8 @@ uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2, } /* checksum */ -uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1, - uint64_t src, uint64_t src_len) +Int128 HELPER(cksm)(CPUS390XState *env, uint64_t r1, + uint64_t src, uint64_t src_len) { uintptr_t ra = GETPC(); uint64_t max_len, len; @@ -1392,8 +1392,7 @@ uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1, env->cc_op = (len == src_len ? 0 : 3); /* Return both cksm and processed length. */ - env->retxl = cksm; - return len; + return int128_make128(cksm, len); } void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src) diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c index 8397fe2bd8..1a7aa9e4ae 100644 --- a/target/s390x/tcg/translate.c +++ b/target/s390x/tcg/translate.c @@ -2041,11 +2041,13 @@ static DisasJumpType op_cxlgb(DisasContext *s, DisasOps *o) static DisasJumpType op_cksm(DisasContext *s, DisasOps *o) { int r2 = get_field(s, r2); + TCGv_i128 pair = tcg_temp_new_i128(); TCGv_i64 len = tcg_temp_new_i64(); - gen_helper_cksm(len, cpu_env, o->in1, o->in2, regs[r2 + 1]); + gen_helper_cksm(pair, cpu_env, o->in1, o->in2, regs[r2 + 1]); set_cc_static(s); - return_low128(o->out); + tcg_gen_extr_i128_i64(o->out, len, pair); + tcg_temp_free_i128(pair); tcg_gen_add_i64(regs[r2], regs[r2], len); tcg_gen_sub_i64(regs[r2 + 1], regs[r2 + 1], len); From ef45f5b998126205e0362c7af4c5d6ee65801450 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 21 Oct 2022 12:00:07 +1000 Subject: [PATCH 31/40] target/s390x: Use Int128 for return from TRE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acked-by: Ilya Leoshkevich Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/s390x/helper.h | 2 +- target/s390x/tcg/mem_helper.c | 7 +++---- target/s390x/tcg/translate.c | 7 +++++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/target/s390x/helper.h b/target/s390x/helper.h index 03b29efa3e..b4170a4256 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -99,7 +99,7 @@ DEF_HELPER_FLAGS_4(unpka, TCG_CALL_NO_WG, i32, env, i64, i32, i64) DEF_HELPER_FLAGS_4(unpku, TCG_CALL_NO_WG, i32, env, i64, i32, i64) DEF_HELPER_FLAGS_3(tp, TCG_CALL_NO_WG, i32, env, i64, i32) DEF_HELPER_FLAGS_4(tr, TCG_CALL_NO_WG, void, env, i32, i64, i64) -DEF_HELPER_4(tre, i64, env, i64, i64, i64) +DEF_HELPER_4(tre, i128, env, i64, i64, i64) DEF_HELPER_4(trt, i32, env, i32, i64, i64) DEF_HELPER_4(trtr, i32, env, i32, i64, i64) DEF_HELPER_5(trXX, i32, env, i32, i32, i32, i32) diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c index b0b403e23a..49969abda7 100644 --- a/target/s390x/tcg/mem_helper.c +++ b/target/s390x/tcg/mem_helper.c @@ -1632,8 +1632,8 @@ void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array, do_helper_tr(env, len, array, trans, GETPC()); } -uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array, - uint64_t len, uint64_t trans) +Int128 HELPER(tre)(CPUS390XState *env, uint64_t array, + uint64_t len, uint64_t trans) { uintptr_t ra = GETPC(); uint8_t end = env->regs[0] & 0xff; @@ -1668,8 +1668,7 @@ uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array, } env->cc_op = cc; - env->retxl = len - i; - return array + i; + return int128_make128(len - i, array + i); } static inline uint32_t do_helper_trt(CPUS390XState *env, int len, diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c index 1a7aa9e4ae..f3e4b70ed9 100644 --- a/target/s390x/tcg/translate.c +++ b/target/s390x/tcg/translate.c @@ -4905,8 +4905,11 @@ static DisasJumpType op_tr(DisasContext *s, DisasOps *o) static DisasJumpType op_tre(DisasContext *s, DisasOps *o) { - gen_helper_tre(o->out, cpu_env, o->out, o->out2, o->in2); - return_low128(o->out2); + TCGv_i128 pair = tcg_temp_new_i128(); + + gen_helper_tre(pair, cpu_env, o->out, o->out2, o->in2); + tcg_gen_extr_i128_i64(o->out2, o->out, pair); + tcg_temp_free_i128(pair); set_cc_static(s); return DISAS_NEXT; } From f4031d9664d6db63dc384e1bca38739b2cb50acd Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 21 Oct 2022 15:18:56 +1000 Subject: [PATCH 32/40] target/s390x: Copy wout_x1 to wout_x1_P MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make a copy of wout_x1 before modifying it, as wout_x1_P emphasizing that it operates on the out/out2 pair. The insns that use x1_P are data movement that will not change to Int128. Acked-by: Ilya Leoshkevich Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/s390x/tcg/insn-data.h.inc | 12 ++++++------ target/s390x/tcg/translate.c | 8 ++++++++ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc index 79c6ab509a..d0814cb218 100644 --- a/target/s390x/tcg/insn-data.h.inc +++ b/target/s390x/tcg/insn-data.h.inc @@ -422,7 +422,7 @@ F(0x3800, LER, RR_a, Z, 0, e2, 0, cond_e1e2, mov2, 0, IF_AFP1 | IF_AFP2) F(0x7800, LE, RX_a, Z, 0, m2_32u, 0, e1, mov2, 0, IF_AFP1) F(0xed64, LEY, RXY_a, LD, 0, m2_32u, 0, e1, mov2, 0, IF_AFP1) - F(0xb365, LXR, RRE, Z, x2h, x2l, 0, x1, movx, 0, IF_AFP1) + F(0xb365, LXR, RRE, Z, x2h, x2l, 0, x1_P, movx, 0, IF_AFP1) /* LOAD IMMEDIATE */ C(0xc001, LGFI, RIL_a, EI, 0, i2, 0, r1, mov2, 0) /* LOAD RELATIVE LONG */ @@ -461,7 +461,7 @@ C(0xe332, LTGF, RXY_a, GIE, 0, a2, r1, 0, ld32s, s64) F(0xb302, LTEBR, RRE, Z, 0, e2, 0, cond_e1e2, mov2, f32, IF_BFP) F(0xb312, LTDBR, RRE, Z, 0, f2, 0, f1, mov2, f64, IF_BFP) - F(0xb342, LTXBR, RRE, Z, x2h, x2l, 0, x1, movx, f128, IF_BFP) + F(0xb342, LTXBR, RRE, Z, x2h, x2l, 0, x1_P, movx, f128, IF_BFP) /* LOAD AND TRAP */ C(0xe39f, LAT, RXY_a, LAT, 0, m2_32u, r1, 0, lat, 0) C(0xe385, LGAT, RXY_a, LAT, 0, a2, r1, 0, lgat, 0) @@ -483,7 +483,7 @@ C(0xb913, LCGFR, RRE, Z, 0, r2_32s, r1, 0, neg, neg64) F(0xb303, LCEBR, RRE, Z, 0, e2, new, e1, negf32, f32, IF_BFP) F(0xb313, LCDBR, RRE, Z, 0, f2, new, f1, negf64, f64, IF_BFP) - F(0xb343, LCXBR, RRE, Z, x2h, x2l, new_P, x1, negf128, f128, IF_BFP) + F(0xb343, LCXBR, RRE, Z, x2h, x2l, new_P, x1_P, negf128, f128, IF_BFP) F(0xb373, LCDFR, RRE, FPSSH, 0, f2, new, f1, negf64, 0, IF_AFP1 | IF_AFP2) /* LOAD COUNT TO BLOCK BOUNDARY */ C(0xe727, LCBB, RXE, V, la2, 0, r1, 0, lcbb, 0) @@ -552,7 +552,7 @@ C(0xb911, LNGFR, RRE, Z, 0, r2_32s, r1, 0, nabs, nabs64) F(0xb301, LNEBR, RRE, Z, 0, e2, new, e1, nabsf32, f32, IF_BFP) F(0xb311, LNDBR, RRE, Z, 0, f2, new, f1, nabsf64, f64, IF_BFP) - F(0xb341, LNXBR, RRE, Z, x2h, x2l, new_P, x1, nabsf128, f128, IF_BFP) + F(0xb341, LNXBR, RRE, Z, x2h, x2l, new_P, x1_P, nabsf128, f128, IF_BFP) F(0xb371, LNDFR, RRE, FPSSH, 0, f2, new, f1, nabsf64, 0, IF_AFP1 | IF_AFP2) /* LOAD ON CONDITION */ C(0xb9f2, LOCR, RRF_c, LOC, r1, r2, new, r1_32, loc, 0) @@ -577,7 +577,7 @@ C(0xb910, LPGFR, RRE, Z, 0, r2_32s, r1, 0, abs, abs64) F(0xb300, LPEBR, RRE, Z, 0, e2, new, e1, absf32, f32, IF_BFP) F(0xb310, LPDBR, RRE, Z, 0, f2, new, f1, absf64, f64, IF_BFP) - F(0xb340, LPXBR, RRE, Z, x2h, x2l, new_P, x1, absf128, f128, IF_BFP) + F(0xb340, LPXBR, RRE, Z, x2h, x2l, new_P, x1_P, absf128, f128, IF_BFP) F(0xb370, LPDFR, RRE, FPSSH, 0, f2, new, f1, absf64, 0, IF_AFP1 | IF_AFP2) /* LOAD REVERSED */ C(0xb91f, LRVR, RRE, Z, 0, r2_32u, new, r1_32, rev32, 0) @@ -588,7 +588,7 @@ /* LOAD ZERO */ F(0xb374, LZER, RRE, Z, 0, 0, 0, e1, zero, 0, IF_AFP1) F(0xb375, LZDR, RRE, Z, 0, 0, 0, f1, zero, 0, IF_AFP1) - F(0xb376, LZXR, RRE, Z, 0, 0, 0, x1, zero2, 0, IF_AFP1) + F(0xb376, LZXR, RRE, Z, 0, 0, 0, x1_P, zero2, 0, IF_AFP1) /* LOAD FPC */ F(0xb29d, LFPC, S, Z, 0, m2_32u, 0, 0, sfpc, 0, IF_BFP) diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c index f3e4b70ed9..d25b6f3c03 100644 --- a/target/s390x/tcg/translate.c +++ b/target/s390x/tcg/translate.c @@ -5518,6 +5518,14 @@ static void wout_x1(DisasContext *s, DisasOps *o) } #define SPEC_wout_x1 SPEC_r1_f128 +static void wout_x1_P(DisasContext *s, DisasOps *o) +{ + int f1 = get_field(s, r1); + store_freg(f1, o->out); + store_freg(f1 + 2, o->out2); +} +#define SPEC_wout_x1_P SPEC_r1_f128 + static void wout_cond_r1r2_32(DisasContext *s, DisasOps *o) { if (get_field(s, r1) != get_field(s, r2)) { From ee5e866fd2304a08172c3674dd7c7e7a97b046ed Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 20 Oct 2022 10:15:49 +1000 Subject: [PATCH 33/40] target/s390x: Use Int128 for returning float128 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acked-by: David Hildenbrand Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- v2: Remove extraneous return_low128. --- target/s390x/helper.h | 22 +++++++------- target/s390x/tcg/fpu_helper.c | 29 +++++++++--------- target/s390x/tcg/insn-data.h.inc | 20 ++++++------- target/s390x/tcg/translate.c | 51 +++++++++++++++++--------------- 4 files changed, 63 insertions(+), 59 deletions(-) diff --git a/target/s390x/helper.h b/target/s390x/helper.h index b4170a4256..d40aeb471f 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -31,32 +31,32 @@ DEF_HELPER_4(clcle, i32, env, i32, i64, i32) DEF_HELPER_4(clclu, i32, env, i32, i64, i32) DEF_HELPER_3(cegb, i64, env, s64, i32) DEF_HELPER_3(cdgb, i64, env, s64, i32) -DEF_HELPER_3(cxgb, i64, env, s64, i32) +DEF_HELPER_3(cxgb, i128, env, s64, i32) DEF_HELPER_3(celgb, i64, env, i64, i32) DEF_HELPER_3(cdlgb, i64, env, i64, i32) -DEF_HELPER_3(cxlgb, i64, env, i64, i32) +DEF_HELPER_3(cxlgb, i128, env, i64, i32) DEF_HELPER_4(cdsg, void, env, i64, i32, i32) DEF_HELPER_4(cdsg_parallel, void, env, i64, i32, i32) DEF_HELPER_4(csst, i32, env, i32, i64, i64) DEF_HELPER_4(csst_parallel, i32, env, i32, i64, i64) DEF_HELPER_FLAGS_3(aeb, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(adb, TCG_CALL_NO_WG, i64, env, i64, i64) -DEF_HELPER_FLAGS_5(axb, TCG_CALL_NO_WG, i64, env, i64, i64, i64, i64) +DEF_HELPER_FLAGS_5(axb, TCG_CALL_NO_WG, i128, env, i64, i64, i64, i64) DEF_HELPER_FLAGS_3(seb, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(sdb, TCG_CALL_NO_WG, i64, env, i64, i64) -DEF_HELPER_FLAGS_5(sxb, TCG_CALL_NO_WG, i64, env, i64, i64, i64, i64) +DEF_HELPER_FLAGS_5(sxb, TCG_CALL_NO_WG, i128, env, i64, i64, i64, i64) DEF_HELPER_FLAGS_3(deb, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(ddb, TCG_CALL_NO_WG, i64, env, i64, i64) -DEF_HELPER_FLAGS_5(dxb, TCG_CALL_NO_WG, i64, env, i64, i64, i64, i64) +DEF_HELPER_FLAGS_5(dxb, TCG_CALL_NO_WG, i128, env, i64, i64, i64, i64) DEF_HELPER_FLAGS_3(meeb, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(mdeb, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(mdb, TCG_CALL_NO_WG, i64, env, i64, i64) -DEF_HELPER_FLAGS_5(mxb, TCG_CALL_NO_WG, i64, env, i64, i64, i64, i64) -DEF_HELPER_FLAGS_4(mxdb, TCG_CALL_NO_WG, i64, env, i64, i64, i64) +DEF_HELPER_FLAGS_5(mxb, TCG_CALL_NO_WG, i128, env, i64, i64, i64, i64) +DEF_HELPER_FLAGS_4(mxdb, TCG_CALL_NO_WG, i128, env, i64, i64, i64) DEF_HELPER_FLAGS_2(ldeb, TCG_CALL_NO_WG, i64, env, i64) DEF_HELPER_FLAGS_4(ldxb, TCG_CALL_NO_WG, i64, env, i64, i64, i32) -DEF_HELPER_FLAGS_2(lxdb, TCG_CALL_NO_WG, i64, env, i64) -DEF_HELPER_FLAGS_2(lxeb, TCG_CALL_NO_WG, i64, env, i64) +DEF_HELPER_FLAGS_2(lxdb, TCG_CALL_NO_WG, i128, env, i64) +DEF_HELPER_FLAGS_2(lxeb, TCG_CALL_NO_WG, i128, env, i64) DEF_HELPER_FLAGS_3(ledb, TCG_CALL_NO_WG, i64, env, i64, i32) DEF_HELPER_FLAGS_4(lexb, TCG_CALL_NO_WG, i64, env, i64, i64, i32) DEF_HELPER_FLAGS_3(ceb, TCG_CALL_NO_WG_SE, i32, env, i64, i64) @@ -79,7 +79,7 @@ DEF_HELPER_3(clfdb, i64, env, i64, i32) DEF_HELPER_4(clfxb, i64, env, i64, i64, i32) DEF_HELPER_FLAGS_3(fieb, TCG_CALL_NO_WG, i64, env, i64, i32) DEF_HELPER_FLAGS_3(fidb, TCG_CALL_NO_WG, i64, env, i64, i32) -DEF_HELPER_FLAGS_4(fixb, TCG_CALL_NO_WG, i64, env, i64, i64, i32) +DEF_HELPER_FLAGS_4(fixb, TCG_CALL_NO_WG, i128, env, i64, i64, i32) DEF_HELPER_FLAGS_4(maeb, TCG_CALL_NO_WG, i64, env, i64, i64, i64) DEF_HELPER_FLAGS_4(madb, TCG_CALL_NO_WG, i64, env, i64, i64, i64) DEF_HELPER_FLAGS_4(mseb, TCG_CALL_NO_WG, i64, env, i64, i64, i64) @@ -89,7 +89,7 @@ DEF_HELPER_FLAGS_3(tcdb, TCG_CALL_NO_RWG_SE, i32, env, i64, i64) DEF_HELPER_FLAGS_4(tcxb, TCG_CALL_NO_RWG_SE, i32, env, i64, i64, i64) DEF_HELPER_FLAGS_2(sqeb, TCG_CALL_NO_WG, i64, env, i64) DEF_HELPER_FLAGS_2(sqdb, TCG_CALL_NO_WG, i64, env, i64) -DEF_HELPER_FLAGS_3(sqxb, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(sqxb, TCG_CALL_NO_WG, i128, env, i64, i64) DEF_HELPER_FLAGS_1(cvd, TCG_CALL_NO_RWG_SE, i64, s32) DEF_HELPER_FLAGS_4(pack, TCG_CALL_NO_WG, void, env, i32, i64, i64) DEF_HELPER_FLAGS_4(pka, TCG_CALL_NO_WG, void, env, i64, i64, i32) diff --git a/target/s390x/tcg/fpu_helper.c b/target/s390x/tcg/fpu_helper.c index be80b2373c..13be44499b 100644 --- a/target/s390x/tcg/fpu_helper.c +++ b/target/s390x/tcg/fpu_helper.c @@ -34,7 +34,10 @@ #define HELPER_LOG(x...) #endif -#define RET128(F) (env->retxl = F.low, F.high) +static inline Int128 RET128(float128 f) +{ + return int128_make128(f.low, f.high); +} uint8_t s390_softfloat_exc_to_ieee(unsigned int exc) { @@ -224,7 +227,7 @@ uint64_t HELPER(adb)(CPUS390XState *env, uint64_t f1, uint64_t f2) } /* 128-bit FP addition */ -uint64_t HELPER(axb)(CPUS390XState *env, uint64_t ah, uint64_t al, +Int128 HELPER(axb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl) { float128 ret = float128_add(make_float128(ah, al), @@ -251,7 +254,7 @@ uint64_t HELPER(sdb)(CPUS390XState *env, uint64_t f1, uint64_t f2) } /* 128-bit FP subtraction */ -uint64_t HELPER(sxb)(CPUS390XState *env, uint64_t ah, uint64_t al, +Int128 HELPER(sxb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl) { float128 ret = float128_sub(make_float128(ah, al), @@ -278,7 +281,7 @@ uint64_t HELPER(ddb)(CPUS390XState *env, uint64_t f1, uint64_t f2) } /* 128-bit FP division */ -uint64_t HELPER(dxb)(CPUS390XState *env, uint64_t ah, uint64_t al, +Int128 HELPER(dxb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl) { float128 ret = float128_div(make_float128(ah, al), @@ -314,7 +317,7 @@ uint64_t HELPER(mdeb)(CPUS390XState *env, uint64_t f1, uint64_t f2) } /* 128-bit FP multiplication */ -uint64_t HELPER(mxb)(CPUS390XState *env, uint64_t ah, uint64_t al, +Int128 HELPER(mxb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl) { float128 ret = float128_mul(make_float128(ah, al), @@ -325,8 +328,7 @@ uint64_t HELPER(mxb)(CPUS390XState *env, uint64_t ah, uint64_t al, } /* 128/64-bit FP multiplication */ -uint64_t HELPER(mxdb)(CPUS390XState *env, uint64_t ah, uint64_t al, - uint64_t f2) +Int128 HELPER(mxdb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint64_t f2) { float128 ret = float64_to_float128(f2, &env->fpu_status); ret = float128_mul(make_float128(ah, al), ret, &env->fpu_status); @@ -355,7 +357,7 @@ uint64_t HELPER(ldxb)(CPUS390XState *env, uint64_t ah, uint64_t al, } /* convert 64-bit float to 128-bit float */ -uint64_t HELPER(lxdb)(CPUS390XState *env, uint64_t f2) +Int128 HELPER(lxdb)(CPUS390XState *env, uint64_t f2) { float128 ret = float64_to_float128(f2, &env->fpu_status); handle_exceptions(env, false, GETPC()); @@ -363,7 +365,7 @@ uint64_t HELPER(lxdb)(CPUS390XState *env, uint64_t f2) } /* convert 32-bit float to 128-bit float */ -uint64_t HELPER(lxeb)(CPUS390XState *env, uint64_t f2) +Int128 HELPER(lxeb)(CPUS390XState *env, uint64_t f2) { float128 ret = float32_to_float128(f2, &env->fpu_status); handle_exceptions(env, false, GETPC()); @@ -486,7 +488,7 @@ uint64_t HELPER(cdgb)(CPUS390XState *env, int64_t v2, uint32_t m34) } /* convert 64-bit int to 128-bit float */ -uint64_t HELPER(cxgb)(CPUS390XState *env, int64_t v2, uint32_t m34) +Int128 HELPER(cxgb)(CPUS390XState *env, int64_t v2, uint32_t m34) { int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); float128 ret = int64_to_float128(v2, &env->fpu_status); @@ -519,7 +521,7 @@ uint64_t HELPER(cdlgb)(CPUS390XState *env, uint64_t v2, uint32_t m34) } /* convert 64-bit uint to 128-bit float */ -uint64_t HELPER(cxlgb)(CPUS390XState *env, uint64_t v2, uint32_t m34) +Int128 HELPER(cxlgb)(CPUS390XState *env, uint64_t v2, uint32_t m34) { int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); float128 ret = uint64_to_float128(v2, &env->fpu_status); @@ -748,8 +750,7 @@ uint64_t HELPER(fidb)(CPUS390XState *env, uint64_t f2, uint32_t m34) } /* round to integer 128-bit */ -uint64_t HELPER(fixb)(CPUS390XState *env, uint64_t ah, uint64_t al, - uint32_t m34) +Int128 HELPER(fixb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint32_t m34) { int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); float128 ret = float128_round_to_int(make_float128(ah, al), @@ -890,7 +891,7 @@ uint64_t HELPER(sqdb)(CPUS390XState *env, uint64_t f2) } /* square root 128-bit */ -uint64_t HELPER(sqxb)(CPUS390XState *env, uint64_t ah, uint64_t al) +Int128 HELPER(sqxb)(CPUS390XState *env, uint64_t ah, uint64_t al) { float128 ret = float128_sqrt(make_float128(ah, al), &env->fpu_status); handle_exceptions(env, false, GETPC()); diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc index d0814cb218..517a4500ae 100644 --- a/target/s390x/tcg/insn-data.h.inc +++ b/target/s390x/tcg/insn-data.h.inc @@ -306,10 +306,10 @@ /* CONVERT FROM FIXED */ F(0xb394, CEFBR, RRF_e, Z, 0, r2_32s, new, e1, cegb, 0, IF_BFP) F(0xb395, CDFBR, RRF_e, Z, 0, r2_32s, new, f1, cdgb, 0, IF_BFP) - F(0xb396, CXFBR, RRF_e, Z, 0, r2_32s, new_P, x1, cxgb, 0, IF_BFP) + F(0xb396, CXFBR, RRF_e, Z, 0, r2_32s, new_x, x1, cxgb, 0, IF_BFP) F(0xb3a4, CEGBR, RRF_e, Z, 0, r2_o, new, e1, cegb, 0, IF_BFP) F(0xb3a5, CDGBR, RRF_e, Z, 0, r2_o, new, f1, cdgb, 0, IF_BFP) - F(0xb3a6, CXGBR, RRF_e, Z, 0, r2_o, new_P, x1, cxgb, 0, IF_BFP) + F(0xb3a6, CXGBR, RRF_e, Z, 0, r2_o, new_x, x1, cxgb, 0, IF_BFP) /* CONVERT TO LOGICAL */ F(0xb39c, CLFEBR, RRF_e, FPE, 0, e2, new, r1_32, clfeb, 0, IF_BFP) F(0xb39d, CLFDBR, RRF_e, FPE, 0, f2, new, r1_32, clfdb, 0, IF_BFP) @@ -320,10 +320,10 @@ /* CONVERT FROM LOGICAL */ F(0xb390, CELFBR, RRF_e, FPE, 0, r2_32u, new, e1, celgb, 0, IF_BFP) F(0xb391, CDLFBR, RRF_e, FPE, 0, r2_32u, new, f1, cdlgb, 0, IF_BFP) - F(0xb392, CXLFBR, RRF_e, FPE, 0, r2_32u, new_P, x1, cxlgb, 0, IF_BFP) + F(0xb392, CXLFBR, RRF_e, FPE, 0, r2_32u, new_x, x1, cxlgb, 0, IF_BFP) F(0xb3a0, CELGBR, RRF_e, FPE, 0, r2_o, new, e1, celgb, 0, IF_BFP) F(0xb3a1, CDLGBR, RRF_e, FPE, 0, r2_o, new, f1, cdlgb, 0, IF_BFP) - F(0xb3a2, CXLGBR, RRF_e, FPE, 0, r2_o, new_P, x1, cxlgb, 0, IF_BFP) + F(0xb3a2, CXLGBR, RRF_e, FPE, 0, r2_o, new_x, x1, cxlgb, 0, IF_BFP) /* CONVERT UTF-8 TO UTF-16 */ D(0xb2a7, CU12, RRF_c, Z, 0, 0, 0, 0, cuXX, 0, 12) @@ -597,15 +597,15 @@ /* LOAD FP INTEGER */ F(0xb357, FIEBR, RRF_e, Z, 0, e2, new, e1, fieb, 0, IF_BFP) F(0xb35f, FIDBR, RRF_e, Z, 0, f2, new, f1, fidb, 0, IF_BFP) - F(0xb347, FIXBR, RRF_e, Z, x2h, x2l, new_P, x1, fixb, 0, IF_BFP) + F(0xb347, FIXBR, RRF_e, Z, x2h, x2l, new_x, x1, fixb, 0, IF_BFP) /* LOAD LENGTHENED */ F(0xb304, LDEBR, RRE, Z, 0, e2, new, f1, ldeb, 0, IF_BFP) - F(0xb305, LXDBR, RRE, Z, 0, f2, new_P, x1, lxdb, 0, IF_BFP) - F(0xb306, LXEBR, RRE, Z, 0, e2, new_P, x1, lxeb, 0, IF_BFP) + F(0xb305, LXDBR, RRE, Z, 0, f2, new_x, x1, lxdb, 0, IF_BFP) + F(0xb306, LXEBR, RRE, Z, 0, e2, new_x, x1, lxeb, 0, IF_BFP) F(0xed04, LDEB, RXE, Z, 0, m2_32u, new, f1, ldeb, 0, IF_BFP) - F(0xed05, LXDB, RXE, Z, 0, m2_64, new_P, x1, lxdb, 0, IF_BFP) - F(0xed06, LXEB, RXE, Z, 0, m2_32u, new_P, x1, lxeb, 0, IF_BFP) + F(0xed05, LXDB, RXE, Z, 0, m2_64, new_x, x1, lxdb, 0, IF_BFP) + F(0xed06, LXEB, RXE, Z, 0, m2_32u, new_x, x1, lxeb, 0, IF_BFP) F(0xb324, LDER, RXE, Z, 0, e2, new, f1, lde, 0, IF_AFP1) F(0xed24, LDE, RXE, Z, 0, m2_32u, new, f1, lde, 0, IF_AFP1) /* LOAD ROUNDED */ @@ -835,7 +835,7 @@ /* SQUARE ROOT */ F(0xb314, SQEBR, RRE, Z, 0, e2, new, e1, sqeb, 0, IF_BFP) F(0xb315, SQDBR, RRE, Z, 0, f2, new, f1, sqdb, 0, IF_BFP) - F(0xb316, SQXBR, RRE, Z, x2h, x2l, new_P, x1, sqxb, 0, IF_BFP) + F(0xb316, SQXBR, RRE, Z, x2h, x2l, new_x, x1, sqxb, 0, IF_BFP) F(0xed14, SQEB, RXE, Z, 0, m2_32u, new, e1, sqeb, 0, IF_BFP) F(0xed15, SQDB, RXE, Z, 0, m2_64, new, f1, sqdb, 0, IF_BFP) diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c index d25b6f3c03..0a750a5467 100644 --- a/target/s390x/tcg/translate.c +++ b/target/s390x/tcg/translate.c @@ -1103,6 +1103,7 @@ typedef struct { bool g_out, g_out2, g_in1, g_in2; TCGv_i64 out, out2, in1, in2; TCGv_i64 addr1; + TCGv_i128 out_128; } DisasOps; /* Instructions can place constraints on their operands, raising specification @@ -1461,8 +1462,7 @@ static DisasJumpType op_adb(DisasContext *s, DisasOps *o) static DisasJumpType op_axb(DisasContext *s, DisasOps *o) { - gen_helper_axb(o->out, cpu_env, o->out, o->out2, o->in1, o->in2); - return_low128(o->out2); + gen_helper_axb(o->out_128, cpu_env, o->out, o->out2, o->in1, o->in2); return DISAS_NEXT; } @@ -1995,9 +1995,8 @@ static DisasJumpType op_cxgb(DisasContext *s, DisasOps *o) if (!m34) { return DISAS_NORETURN; } - gen_helper_cxgb(o->out, cpu_env, o->in2, m34); + gen_helper_cxgb(o->out_128, cpu_env, o->in2, m34); tcg_temp_free_i32(m34); - return_low128(o->out2); return DISAS_NEXT; } @@ -2032,9 +2031,8 @@ static DisasJumpType op_cxlgb(DisasContext *s, DisasOps *o) if (!m34) { return DISAS_NORETURN; } - gen_helper_cxlgb(o->out, cpu_env, o->in2, m34); + gen_helper_cxlgb(o->out_128, cpu_env, o->in2, m34); tcg_temp_free_i32(m34); - return_low128(o->out2); return DISAS_NEXT; } @@ -2447,8 +2445,7 @@ static DisasJumpType op_ddb(DisasContext *s, DisasOps *o) static DisasJumpType op_dxb(DisasContext *s, DisasOps *o) { - gen_helper_dxb(o->out, cpu_env, o->out, o->out2, o->in1, o->in2); - return_low128(o->out2); + gen_helper_dxb(o->out_128, cpu_env, o->out, o->out2, o->in1, o->in2); return DISAS_NEXT; } @@ -2553,8 +2550,7 @@ static DisasJumpType op_fixb(DisasContext *s, DisasOps *o) if (!m34) { return DISAS_NORETURN; } - gen_helper_fixb(o->out, cpu_env, o->in1, o->in2, m34); - return_low128(o->out2); + gen_helper_fixb(o->out_128, cpu_env, o->in1, o->in2, m34); tcg_temp_free_i32(m34); return DISAS_NEXT; } @@ -2866,15 +2862,13 @@ static DisasJumpType op_lexb(DisasContext *s, DisasOps *o) static DisasJumpType op_lxdb(DisasContext *s, DisasOps *o) { - gen_helper_lxdb(o->out, cpu_env, o->in2); - return_low128(o->out2); + gen_helper_lxdb(o->out_128, cpu_env, o->in2); return DISAS_NEXT; } static DisasJumpType op_lxeb(DisasContext *s, DisasOps *o) { - gen_helper_lxeb(o->out, cpu_env, o->in2); - return_low128(o->out2); + gen_helper_lxeb(o->out_128, cpu_env, o->in2); return DISAS_NEXT; } @@ -3590,15 +3584,13 @@ static DisasJumpType op_mdb(DisasContext *s, DisasOps *o) static DisasJumpType op_mxb(DisasContext *s, DisasOps *o) { - gen_helper_mxb(o->out, cpu_env, o->out, o->out2, o->in1, o->in2); - return_low128(o->out2); + gen_helper_mxb(o->out_128, cpu_env, o->out, o->out2, o->in1, o->in2); return DISAS_NEXT; } static DisasJumpType op_mxdb(DisasContext *s, DisasOps *o) { - gen_helper_mxdb(o->out, cpu_env, o->out, o->out2, o->in2); - return_low128(o->out2); + gen_helper_mxdb(o->out_128, cpu_env, o->out, o->out2, o->in2); return DISAS_NEXT; } @@ -4063,8 +4055,7 @@ static DisasJumpType op_sdb(DisasContext *s, DisasOps *o) static DisasJumpType op_sxb(DisasContext *s, DisasOps *o) { - gen_helper_sxb(o->out, cpu_env, o->out, o->out2, o->in1, o->in2); - return_low128(o->out2); + gen_helper_sxb(o->out_128, cpu_env, o->out, o->out2, o->in1, o->in2); return DISAS_NEXT; } @@ -4082,8 +4073,7 @@ static DisasJumpType op_sqdb(DisasContext *s, DisasOps *o) static DisasJumpType op_sqxb(DisasContext *s, DisasOps *o) { - gen_helper_sqxb(o->out, cpu_env, o->in1, o->in2); - return_low128(o->out2); + gen_helper_sqxb(o->out_128, cpu_env, o->in1, o->in2); return DISAS_NEXT; } @@ -5395,6 +5385,14 @@ static void prep_new_P(DisasContext *s, DisasOps *o) } #define SPEC_prep_new_P 0 +static void prep_new_x(DisasContext *s, DisasOps *o) +{ + o->out = tcg_temp_new_i64(); + o->out2 = tcg_temp_new_i64(); + o->out_128 = tcg_temp_new_i128(); +} +#define SPEC_prep_new_x 0 + static void prep_r1(DisasContext *s, DisasOps *o) { o->out = regs[get_field(s, r1)]; @@ -5411,11 +5409,12 @@ static void prep_r1_P(DisasContext *s, DisasOps *o) } #define SPEC_prep_r1_P SPEC_r1_even -/* Whenever we need x1 in addition to other inputs, we'll load it to out/out2 */ static void prep_x1(DisasContext *s, DisasOps *o) { o->out = load_freg(get_field(s, r1)); o->out2 = load_freg(get_field(s, r1) + 2); + o->out_128 = tcg_temp_new_i128(); + tcg_gen_concat_i64_i128(o->out_128, o->out2, o->out); } #define SPEC_prep_x1 SPEC_r1_f128 @@ -5513,6 +5512,8 @@ static void wout_f1(DisasContext *s, DisasOps *o) static void wout_x1(DisasContext *s, DisasOps *o) { int f1 = get_field(s, r1); + + tcg_gen_extr_i128_i64(o->out2, o->out, o->out_128); store_freg(f1, o->out); store_freg(f1 + 2, o->out2); } @@ -6588,7 +6589,9 @@ static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s) if (o.addr1) { tcg_temp_free_i64(o.addr1); } - + if (o.out_128) { + tcg_temp_free_i128(o.out_128); + } /* io should be the last instruction in tb when icount is enabled */ if (unlikely(icount && ret == DISAS_NEXT)) { ret = DISAS_TOO_MANY; From 2b91240f95fdb9acfa35ccac6cda2a42a16ac7f2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 21 Oct 2022 13:05:45 +1000 Subject: [PATCH 34/40] target/s390x: Use Int128 for passing float128 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acked-by: David Hildenbrand Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- v2: Fix SPEC_in1_x1. --- target/s390x/helper.h | 32 ++++++------ target/s390x/tcg/fpu_helper.c | 88 ++++++++++++++------------------ target/s390x/tcg/insn-data.h.inc | 30 +++++------ target/s390x/tcg/translate.c | 76 ++++++++++++++++++--------- 4 files changed, 121 insertions(+), 105 deletions(-) diff --git a/target/s390x/helper.h b/target/s390x/helper.h index d40aeb471f..bccd3bfca6 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -41,55 +41,55 @@ DEF_HELPER_4(csst, i32, env, i32, i64, i64) DEF_HELPER_4(csst_parallel, i32, env, i32, i64, i64) DEF_HELPER_FLAGS_3(aeb, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(adb, TCG_CALL_NO_WG, i64, env, i64, i64) -DEF_HELPER_FLAGS_5(axb, TCG_CALL_NO_WG, i128, env, i64, i64, i64, i64) +DEF_HELPER_FLAGS_3(axb, TCG_CALL_NO_WG, i128, env, i128, i128) DEF_HELPER_FLAGS_3(seb, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(sdb, TCG_CALL_NO_WG, i64, env, i64, i64) -DEF_HELPER_FLAGS_5(sxb, TCG_CALL_NO_WG, i128, env, i64, i64, i64, i64) +DEF_HELPER_FLAGS_3(sxb, TCG_CALL_NO_WG, i128, env, i128, i128) DEF_HELPER_FLAGS_3(deb, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(ddb, TCG_CALL_NO_WG, i64, env, i64, i64) -DEF_HELPER_FLAGS_5(dxb, TCG_CALL_NO_WG, i128, env, i64, i64, i64, i64) +DEF_HELPER_FLAGS_3(dxb, TCG_CALL_NO_WG, i128, env, i128, i128) DEF_HELPER_FLAGS_3(meeb, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(mdeb, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(mdb, TCG_CALL_NO_WG, i64, env, i64, i64) -DEF_HELPER_FLAGS_5(mxb, TCG_CALL_NO_WG, i128, env, i64, i64, i64, i64) -DEF_HELPER_FLAGS_4(mxdb, TCG_CALL_NO_WG, i128, env, i64, i64, i64) +DEF_HELPER_FLAGS_3(mxb, TCG_CALL_NO_WG, i128, env, i128, i128) +DEF_HELPER_FLAGS_3(mxdb, TCG_CALL_NO_WG, i128, env, i128, i64) DEF_HELPER_FLAGS_2(ldeb, TCG_CALL_NO_WG, i64, env, i64) -DEF_HELPER_FLAGS_4(ldxb, TCG_CALL_NO_WG, i64, env, i64, i64, i32) +DEF_HELPER_FLAGS_3(ldxb, TCG_CALL_NO_WG, i64, env, i128, i32) DEF_HELPER_FLAGS_2(lxdb, TCG_CALL_NO_WG, i128, env, i64) DEF_HELPER_FLAGS_2(lxeb, TCG_CALL_NO_WG, i128, env, i64) DEF_HELPER_FLAGS_3(ledb, TCG_CALL_NO_WG, i64, env, i64, i32) -DEF_HELPER_FLAGS_4(lexb, TCG_CALL_NO_WG, i64, env, i64, i64, i32) +DEF_HELPER_FLAGS_3(lexb, TCG_CALL_NO_WG, i64, env, i128, i32) DEF_HELPER_FLAGS_3(ceb, TCG_CALL_NO_WG_SE, i32, env, i64, i64) DEF_HELPER_FLAGS_3(cdb, TCG_CALL_NO_WG_SE, i32, env, i64, i64) -DEF_HELPER_FLAGS_5(cxb, TCG_CALL_NO_WG_SE, i32, env, i64, i64, i64, i64) +DEF_HELPER_FLAGS_3(cxb, TCG_CALL_NO_WG_SE, i32, env, i128, i128) DEF_HELPER_FLAGS_3(keb, TCG_CALL_NO_WG, i32, env, i64, i64) DEF_HELPER_FLAGS_3(kdb, TCG_CALL_NO_WG, i32, env, i64, i64) -DEF_HELPER_FLAGS_5(kxb, TCG_CALL_NO_WG, i32, env, i64, i64, i64, i64) +DEF_HELPER_FLAGS_3(kxb, TCG_CALL_NO_WG, i32, env, i128, i128) DEF_HELPER_3(cgeb, i64, env, i64, i32) DEF_HELPER_3(cgdb, i64, env, i64, i32) -DEF_HELPER_4(cgxb, i64, env, i64, i64, i32) +DEF_HELPER_3(cgxb, i64, env, i128, i32) DEF_HELPER_3(cfeb, i64, env, i64, i32) DEF_HELPER_3(cfdb, i64, env, i64, i32) -DEF_HELPER_4(cfxb, i64, env, i64, i64, i32) +DEF_HELPER_3(cfxb, i64, env, i128, i32) DEF_HELPER_3(clgeb, i64, env, i64, i32) DEF_HELPER_3(clgdb, i64, env, i64, i32) -DEF_HELPER_4(clgxb, i64, env, i64, i64, i32) +DEF_HELPER_3(clgxb, i64, env, i128, i32) DEF_HELPER_3(clfeb, i64, env, i64, i32) DEF_HELPER_3(clfdb, i64, env, i64, i32) -DEF_HELPER_4(clfxb, i64, env, i64, i64, i32) +DEF_HELPER_3(clfxb, i64, env, i128, i32) DEF_HELPER_FLAGS_3(fieb, TCG_CALL_NO_WG, i64, env, i64, i32) DEF_HELPER_FLAGS_3(fidb, TCG_CALL_NO_WG, i64, env, i64, i32) -DEF_HELPER_FLAGS_4(fixb, TCG_CALL_NO_WG, i128, env, i64, i64, i32) +DEF_HELPER_FLAGS_3(fixb, TCG_CALL_NO_WG, i128, env, i128, i32) DEF_HELPER_FLAGS_4(maeb, TCG_CALL_NO_WG, i64, env, i64, i64, i64) DEF_HELPER_FLAGS_4(madb, TCG_CALL_NO_WG, i64, env, i64, i64, i64) DEF_HELPER_FLAGS_4(mseb, TCG_CALL_NO_WG, i64, env, i64, i64, i64) DEF_HELPER_FLAGS_4(msdb, TCG_CALL_NO_WG, i64, env, i64, i64, i64) DEF_HELPER_FLAGS_3(tceb, TCG_CALL_NO_RWG_SE, i32, env, i64, i64) DEF_HELPER_FLAGS_3(tcdb, TCG_CALL_NO_RWG_SE, i32, env, i64, i64) -DEF_HELPER_FLAGS_4(tcxb, TCG_CALL_NO_RWG_SE, i32, env, i64, i64, i64) +DEF_HELPER_FLAGS_3(tcxb, TCG_CALL_NO_RWG_SE, i32, env, i128, i64) DEF_HELPER_FLAGS_2(sqeb, TCG_CALL_NO_WG, i64, env, i64) DEF_HELPER_FLAGS_2(sqdb, TCG_CALL_NO_WG, i64, env, i64) -DEF_HELPER_FLAGS_3(sqxb, TCG_CALL_NO_WG, i128, env, i64, i64) +DEF_HELPER_FLAGS_2(sqxb, TCG_CALL_NO_WG, i128, env, i128) DEF_HELPER_FLAGS_1(cvd, TCG_CALL_NO_RWG_SE, i64, s32) DEF_HELPER_FLAGS_4(pack, TCG_CALL_NO_WG, void, env, i32, i64, i64) DEF_HELPER_FLAGS_4(pka, TCG_CALL_NO_WG, void, env, i64, i64, i32) diff --git a/target/s390x/tcg/fpu_helper.c b/target/s390x/tcg/fpu_helper.c index 13be44499b..0bdab5bcf7 100644 --- a/target/s390x/tcg/fpu_helper.c +++ b/target/s390x/tcg/fpu_helper.c @@ -39,6 +39,11 @@ static inline Int128 RET128(float128 f) return int128_make128(f.low, f.high); } +static inline float128 ARG128(Int128 i) +{ + return make_float128(int128_gethi(i), int128_getlo(i)); +} + uint8_t s390_softfloat_exc_to_ieee(unsigned int exc) { uint8_t s390_exc = 0; @@ -227,12 +232,9 @@ uint64_t HELPER(adb)(CPUS390XState *env, uint64_t f1, uint64_t f2) } /* 128-bit FP addition */ -Int128 HELPER(axb)(CPUS390XState *env, uint64_t ah, uint64_t al, - uint64_t bh, uint64_t bl) +Int128 HELPER(axb)(CPUS390XState *env, Int128 a, Int128 b) { - float128 ret = float128_add(make_float128(ah, al), - make_float128(bh, bl), - &env->fpu_status); + float128 ret = float128_add(ARG128(a), ARG128(b), &env->fpu_status); handle_exceptions(env, false, GETPC()); return RET128(ret); } @@ -254,12 +256,9 @@ uint64_t HELPER(sdb)(CPUS390XState *env, uint64_t f1, uint64_t f2) } /* 128-bit FP subtraction */ -Int128 HELPER(sxb)(CPUS390XState *env, uint64_t ah, uint64_t al, - uint64_t bh, uint64_t bl) +Int128 HELPER(sxb)(CPUS390XState *env, Int128 a, Int128 b) { - float128 ret = float128_sub(make_float128(ah, al), - make_float128(bh, bl), - &env->fpu_status); + float128 ret = float128_sub(ARG128(a), ARG128(b), &env->fpu_status); handle_exceptions(env, false, GETPC()); return RET128(ret); } @@ -281,12 +280,9 @@ uint64_t HELPER(ddb)(CPUS390XState *env, uint64_t f1, uint64_t f2) } /* 128-bit FP division */ -Int128 HELPER(dxb)(CPUS390XState *env, uint64_t ah, uint64_t al, - uint64_t bh, uint64_t bl) +Int128 HELPER(dxb)(CPUS390XState *env, Int128 a, Int128 b) { - float128 ret = float128_div(make_float128(ah, al), - make_float128(bh, bl), - &env->fpu_status); + float128 ret = float128_div(ARG128(a), ARG128(b), &env->fpu_status); handle_exceptions(env, false, GETPC()); return RET128(ret); } @@ -317,21 +313,18 @@ uint64_t HELPER(mdeb)(CPUS390XState *env, uint64_t f1, uint64_t f2) } /* 128-bit FP multiplication */ -Int128 HELPER(mxb)(CPUS390XState *env, uint64_t ah, uint64_t al, - uint64_t bh, uint64_t bl) +Int128 HELPER(mxb)(CPUS390XState *env, Int128 a, Int128 b) { - float128 ret = float128_mul(make_float128(ah, al), - make_float128(bh, bl), - &env->fpu_status); + float128 ret = float128_mul(ARG128(a), ARG128(b), &env->fpu_status); handle_exceptions(env, false, GETPC()); return RET128(ret); } /* 128/64-bit FP multiplication */ -Int128 HELPER(mxdb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint64_t f2) +Int128 HELPER(mxdb)(CPUS390XState *env, Int128 a, uint64_t f2) { float128 ret = float64_to_float128(f2, &env->fpu_status); - ret = float128_mul(make_float128(ah, al), ret, &env->fpu_status); + ret = float128_mul(ARG128(a), ret, &env->fpu_status); handle_exceptions(env, false, GETPC()); return RET128(ret); } @@ -345,11 +338,10 @@ uint64_t HELPER(ldeb)(CPUS390XState *env, uint64_t f2) } /* convert 128-bit float to 64-bit float */ -uint64_t HELPER(ldxb)(CPUS390XState *env, uint64_t ah, uint64_t al, - uint32_t m34) +uint64_t HELPER(ldxb)(CPUS390XState *env, Int128 a, uint32_t m34) { int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); - float64 ret = float128_to_float64(make_float128(ah, al), &env->fpu_status); + float64 ret = float128_to_float64(ARG128(a), &env->fpu_status); s390_restore_bfp_rounding_mode(env, old_mode); handle_exceptions(env, xxc_from_m34(m34), GETPC()); @@ -384,11 +376,10 @@ uint64_t HELPER(ledb)(CPUS390XState *env, uint64_t f2, uint32_t m34) } /* convert 128-bit float to 32-bit float */ -uint64_t HELPER(lexb)(CPUS390XState *env, uint64_t ah, uint64_t al, - uint32_t m34) +uint64_t HELPER(lexb)(CPUS390XState *env, Int128 a, uint32_t m34) { int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); - float32 ret = float128_to_float32(make_float128(ah, al), &env->fpu_status); + float32 ret = float128_to_float32(ARG128(a), &env->fpu_status); s390_restore_bfp_rounding_mode(env, old_mode); handle_exceptions(env, xxc_from_m34(m34), GETPC()); @@ -412,11 +403,9 @@ uint32_t HELPER(cdb)(CPUS390XState *env, uint64_t f1, uint64_t f2) } /* 128-bit FP compare */ -uint32_t HELPER(cxb)(CPUS390XState *env, uint64_t ah, uint64_t al, - uint64_t bh, uint64_t bl) +uint32_t HELPER(cxb)(CPUS390XState *env, Int128 a, Int128 b) { - FloatRelation cmp = float128_compare_quiet(make_float128(ah, al), - make_float128(bh, bl), + FloatRelation cmp = float128_compare_quiet(ARG128(a), ARG128(b), &env->fpu_status); handle_exceptions(env, false, GETPC()); return float_comp_to_cc(env, cmp); @@ -564,10 +553,10 @@ uint64_t HELPER(cgdb)(CPUS390XState *env, uint64_t v2, uint32_t m34) } /* convert 128-bit float to 64-bit int */ -uint64_t HELPER(cgxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34) +uint64_t HELPER(cgxb)(CPUS390XState *env, Int128 i2, uint32_t m34) { int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); - float128 v2 = make_float128(h, l); + float128 v2 = ARG128(i2); int64_t ret = float128_to_int64(v2, &env->fpu_status); uint32_t cc = set_cc_conv_f128(v2, &env->fpu_status); @@ -613,10 +602,10 @@ uint64_t HELPER(cfdb)(CPUS390XState *env, uint64_t v2, uint32_t m34) } /* convert 128-bit float to 32-bit int */ -uint64_t HELPER(cfxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34) +uint64_t HELPER(cfxb)(CPUS390XState *env, Int128 i2, uint32_t m34) { int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); - float128 v2 = make_float128(h, l); + float128 v2 = ARG128(i2); int32_t ret = float128_to_int32(v2, &env->fpu_status); uint32_t cc = set_cc_conv_f128(v2, &env->fpu_status); @@ -662,10 +651,10 @@ uint64_t HELPER(clgdb)(CPUS390XState *env, uint64_t v2, uint32_t m34) } /* convert 128-bit float to 64-bit uint */ -uint64_t HELPER(clgxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34) +uint64_t HELPER(clgxb)(CPUS390XState *env, Int128 i2, uint32_t m34) { int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); - float128 v2 = make_float128(h, l); + float128 v2 = ARG128(i2); uint64_t ret = float128_to_uint64(v2, &env->fpu_status); uint32_t cc = set_cc_conv_f128(v2, &env->fpu_status); @@ -711,10 +700,10 @@ uint64_t HELPER(clfdb)(CPUS390XState *env, uint64_t v2, uint32_t m34) } /* convert 128-bit float to 32-bit uint */ -uint64_t HELPER(clfxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34) +uint64_t HELPER(clfxb)(CPUS390XState *env, Int128 i2, uint32_t m34) { int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); - float128 v2 = make_float128(h, l); + float128 v2 = ARG128(i2); uint32_t ret = float128_to_uint32(v2, &env->fpu_status); uint32_t cc = set_cc_conv_f128(v2, &env->fpu_status); @@ -750,11 +739,10 @@ uint64_t HELPER(fidb)(CPUS390XState *env, uint64_t f2, uint32_t m34) } /* round to integer 128-bit */ -Int128 HELPER(fixb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint32_t m34) +Int128 HELPER(fixb)(CPUS390XState *env, Int128 a, uint32_t m34) { int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); - float128 ret = float128_round_to_int(make_float128(ah, al), - &env->fpu_status); + float128 ret = float128_round_to_int(ARG128(a), &env->fpu_status); s390_restore_bfp_rounding_mode(env, old_mode); handle_exceptions(env, xxc_from_m34(m34), GETPC()); @@ -778,11 +766,9 @@ uint32_t HELPER(kdb)(CPUS390XState *env, uint64_t f1, uint64_t f2) } /* 128-bit FP compare and signal */ -uint32_t HELPER(kxb)(CPUS390XState *env, uint64_t ah, uint64_t al, - uint64_t bh, uint64_t bl) +uint32_t HELPER(kxb)(CPUS390XState *env, Int128 a, Int128 b) { - FloatRelation cmp = float128_compare(make_float128(ah, al), - make_float128(bh, bl), + FloatRelation cmp = float128_compare(ARG128(a), ARG128(b), &env->fpu_status); handle_exceptions(env, false, GETPC()); return float_comp_to_cc(env, cmp); @@ -869,9 +855,9 @@ uint32_t HELPER(tcdb)(CPUS390XState *env, uint64_t v1, uint64_t m2) } /* test data class 128-bit */ -uint32_t HELPER(tcxb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint64_t m2) +uint32_t HELPER(tcxb)(CPUS390XState *env, Int128 a, uint64_t m2) { - return (m2 & float128_dcmask(env, make_float128(ah, al))) != 0; + return (m2 & float128_dcmask(env, ARG128(a))) != 0; } /* square root 32-bit */ @@ -891,9 +877,9 @@ uint64_t HELPER(sqdb)(CPUS390XState *env, uint64_t f2) } /* square root 128-bit */ -Int128 HELPER(sqxb)(CPUS390XState *env, uint64_t ah, uint64_t al) +Int128 HELPER(sqxb)(CPUS390XState *env, Int128 a) { - float128 ret = float128_sqrt(make_float128(ah, al), &env->fpu_status); + float128 ret = float128_sqrt(ARG128(a), &env->fpu_status); handle_exceptions(env, false, GETPC()); return RET128(ret); } diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc index 517a4500ae..893f4b48db 100644 --- a/target/s390x/tcg/insn-data.h.inc +++ b/target/s390x/tcg/insn-data.h.inc @@ -34,7 +34,7 @@ C(0xe318, AGF, RXY_a, Z, r1, m2_32s, r1, 0, add, adds64) F(0xb30a, AEBR, RRE, Z, e1, e2, new, e1, aeb, f32, IF_BFP) F(0xb31a, ADBR, RRE, Z, f1, f2, new, f1, adb, f64, IF_BFP) - F(0xb34a, AXBR, RRE, Z, x2h, x2l, x1, x1, axb, f128, IF_BFP) + F(0xb34a, AXBR, RRE, Z, x1, x2, new_x, x1, axb, f128, IF_BFP) F(0xed0a, AEB, RXE, Z, e1, m2_32u, new, e1, aeb, f32, IF_BFP) F(0xed1a, ADB, RXE, Z, f1, m2_64, new, f1, adb, f64, IF_BFP) /* ADD HIGH */ @@ -172,13 +172,13 @@ C(0xe330, CGF, RXY_a, Z, r1_o, m2_32s, 0, 0, 0, cmps64) F(0xb309, CEBR, RRE, Z, e1, e2, 0, 0, ceb, 0, IF_BFP) F(0xb319, CDBR, RRE, Z, f1, f2, 0, 0, cdb, 0, IF_BFP) - F(0xb349, CXBR, RRE, Z, x2h, x2l, x1, 0, cxb, 0, IF_BFP) + F(0xb349, CXBR, RRE, Z, x1, x2, 0, 0, cxb, 0, IF_BFP) F(0xed09, CEB, RXE, Z, e1, m2_32u, 0, 0, ceb, 0, IF_BFP) F(0xed19, CDB, RXE, Z, f1, m2_64, 0, 0, cdb, 0, IF_BFP) /* COMPARE AND SIGNAL */ F(0xb308, KEBR, RRE, Z, e1, e2, 0, 0, keb, 0, IF_BFP) F(0xb318, KDBR, RRE, Z, f1, f2, 0, 0, kdb, 0, IF_BFP) - F(0xb348, KXBR, RRE, Z, x2h, x2l, x1, 0, kxb, 0, IF_BFP) + F(0xb348, KXBR, RRE, Z, x1, x2, 0, 0, kxb, 0, IF_BFP) F(0xed08, KEB, RXE, Z, e1, m2_32u, 0, 0, keb, 0, IF_BFP) F(0xed18, KDB, RXE, Z, f1, m2_64, 0, 0, kdb, 0, IF_BFP) /* COMPARE IMMEDIATE */ @@ -299,10 +299,10 @@ /* CONVERT TO FIXED */ F(0xb398, CFEBR, RRF_e, Z, 0, e2, new, r1_32, cfeb, 0, IF_BFP) F(0xb399, CFDBR, RRF_e, Z, 0, f2, new, r1_32, cfdb, 0, IF_BFP) - F(0xb39a, CFXBR, RRF_e, Z, x2h, x2l, new, r1_32, cfxb, 0, IF_BFP) + F(0xb39a, CFXBR, RRF_e, Z, 0, x2, new, r1_32, cfxb, 0, IF_BFP) F(0xb3a8, CGEBR, RRF_e, Z, 0, e2, r1, 0, cgeb, 0, IF_BFP) F(0xb3a9, CGDBR, RRF_e, Z, 0, f2, r1, 0, cgdb, 0, IF_BFP) - F(0xb3aa, CGXBR, RRF_e, Z, x2h, x2l, r1, 0, cgxb, 0, IF_BFP) + F(0xb3aa, CGXBR, RRF_e, Z, 0, x2, r1, 0, cgxb, 0, IF_BFP) /* CONVERT FROM FIXED */ F(0xb394, CEFBR, RRF_e, Z, 0, r2_32s, new, e1, cegb, 0, IF_BFP) F(0xb395, CDFBR, RRF_e, Z, 0, r2_32s, new, f1, cdgb, 0, IF_BFP) @@ -313,10 +313,10 @@ /* CONVERT TO LOGICAL */ F(0xb39c, CLFEBR, RRF_e, FPE, 0, e2, new, r1_32, clfeb, 0, IF_BFP) F(0xb39d, CLFDBR, RRF_e, FPE, 0, f2, new, r1_32, clfdb, 0, IF_BFP) - F(0xb39e, CLFXBR, RRF_e, FPE, x2h, x2l, new, r1_32, clfxb, 0, IF_BFP) + F(0xb39e, CLFXBR, RRF_e, FPE, 0, x2, new, r1_32, clfxb, 0, IF_BFP) F(0xb3ac, CLGEBR, RRF_e, FPE, 0, e2, r1, 0, clgeb, 0, IF_BFP) F(0xb3ad, CLGDBR, RRF_e, FPE, 0, f2, r1, 0, clgdb, 0, IF_BFP) - F(0xb3ae, CLGXBR, RRF_e, FPE, x2h, x2l, r1, 0, clgxb, 0, IF_BFP) + F(0xb3ae, CLGXBR, RRF_e, FPE, 0, x2, r1, 0, clgxb, 0, IF_BFP) /* CONVERT FROM LOGICAL */ F(0xb390, CELFBR, RRF_e, FPE, 0, r2_32u, new, e1, celgb, 0, IF_BFP) F(0xb391, CDLFBR, RRF_e, FPE, 0, r2_32u, new, f1, cdlgb, 0, IF_BFP) @@ -343,7 +343,7 @@ C(0x5d00, D, RX_a, Z, r1_D32, m2_32s, new_P, r1_P32, divs32, 0) F(0xb30d, DEBR, RRE, Z, e1, e2, new, e1, deb, 0, IF_BFP) F(0xb31d, DDBR, RRE, Z, f1, f2, new, f1, ddb, 0, IF_BFP) - F(0xb34d, DXBR, RRE, Z, x2h, x2l, x1, x1, dxb, 0, IF_BFP) + F(0xb34d, DXBR, RRE, Z, x1, x2, new_x, x1, dxb, 0, IF_BFP) F(0xed0d, DEB, RXE, Z, e1, m2_32u, new, e1, deb, 0, IF_BFP) F(0xed1d, DDB, RXE, Z, f1, m2_64, new, f1, ddb, 0, IF_BFP) /* DIVIDE LOGICAL */ @@ -597,7 +597,7 @@ /* LOAD FP INTEGER */ F(0xb357, FIEBR, RRF_e, Z, 0, e2, new, e1, fieb, 0, IF_BFP) F(0xb35f, FIDBR, RRF_e, Z, 0, f2, new, f1, fidb, 0, IF_BFP) - F(0xb347, FIXBR, RRF_e, Z, x2h, x2l, new_x, x1, fixb, 0, IF_BFP) + F(0xb347, FIXBR, RRF_e, Z, 0, x2, new_x, x1, fixb, 0, IF_BFP) /* LOAD LENGTHENED */ F(0xb304, LDEBR, RRE, Z, 0, e2, new, f1, ldeb, 0, IF_BFP) @@ -610,8 +610,8 @@ F(0xed24, LDE, RXE, Z, 0, m2_32u, new, f1, lde, 0, IF_AFP1) /* LOAD ROUNDED */ F(0xb344, LEDBR, RRF_e, Z, 0, f2, new, e1, ledb, 0, IF_BFP) - F(0xb345, LDXBR, RRF_e, Z, x2h, x2l, new, f1, ldxb, 0, IF_BFP) - F(0xb346, LEXBR, RRF_e, Z, x2h, x2l, new, e1, lexb, 0, IF_BFP) + F(0xb345, LDXBR, RRF_e, Z, 0, x2, new, f1, ldxb, 0, IF_BFP) + F(0xb346, LEXBR, RRF_e, Z, 0, x2, new, e1, lexb, 0, IF_BFP) /* LOAD MULTIPLE */ C(0x9800, LM, RS_a, Z, 0, a2, 0, 0, lm32, 0) @@ -666,7 +666,7 @@ C(0xe384, MG, RXY_a, MIE2,r1p1_o, m2_64, r1_P, 0, muls128, 0) F(0xb317, MEEBR, RRE, Z, e1, e2, new, e1, meeb, 0, IF_BFP) F(0xb31c, MDBR, RRE, Z, f1, f2, new, f1, mdb, 0, IF_BFP) - F(0xb34c, MXBR, RRE, Z, x2h, x2l, x1, x1, mxb, 0, IF_BFP) + F(0xb34c, MXBR, RRE, Z, x1, x2, new_x, x1, mxb, 0, IF_BFP) F(0xb30c, MDEBR, RRE, Z, f1, e2, new, f1, mdeb, 0, IF_BFP) F(0xb307, MXDBR, RRE, Z, 0, f2, x1, x1, mxdb, 0, IF_BFP) F(0xed17, MEEB, RXE, Z, e1, m2_32u, new, e1, meeb, 0, IF_BFP) @@ -835,7 +835,7 @@ /* SQUARE ROOT */ F(0xb314, SQEBR, RRE, Z, 0, e2, new, e1, sqeb, 0, IF_BFP) F(0xb315, SQDBR, RRE, Z, 0, f2, new, f1, sqdb, 0, IF_BFP) - F(0xb316, SQXBR, RRE, Z, x2h, x2l, new_x, x1, sqxb, 0, IF_BFP) + F(0xb316, SQXBR, RRE, Z, 0, x2, new_x, x1, sqxb, 0, IF_BFP) F(0xed14, SQEB, RXE, Z, 0, m2_32u, new, e1, sqeb, 0, IF_BFP) F(0xed15, SQDB, RXE, Z, 0, m2_64, new, f1, sqdb, 0, IF_BFP) @@ -913,7 +913,7 @@ C(0xe319, SGF, RXY_a, Z, r1, m2_32s, r1, 0, sub, subs64) F(0xb30b, SEBR, RRE, Z, e1, e2, new, e1, seb, f32, IF_BFP) F(0xb31b, SDBR, RRE, Z, f1, f2, new, f1, sdb, f64, IF_BFP) - F(0xb34b, SXBR, RRE, Z, x2h, x2l, x1, x1, sxb, f128, IF_BFP) + F(0xb34b, SXBR, RRE, Z, x1, x2, new_x, x1, sxb, f128, IF_BFP) F(0xed0b, SEB, RXE, Z, e1, m2_32u, new, e1, seb, f32, IF_BFP) F(0xed1b, SDB, RXE, Z, f1, m2_64, new, f1, sdb, f64, IF_BFP) /* SUBTRACT HALFWORD */ @@ -957,7 +957,7 @@ /* TEST DATA CLASS */ F(0xed10, TCEB, RXE, Z, e1, a2, 0, 0, tceb, 0, IF_BFP) F(0xed11, TCDB, RXE, Z, f1, a2, 0, 0, tcdb, 0, IF_BFP) - F(0xed12, TCXB, RXE, Z, 0, a2, x1, 0, tcxb, 0, IF_BFP) + F(0xed12, TCXB, RXE, Z, x1, a2, 0, 0, tcxb, 0, IF_BFP) /* TEST DECIMAL */ C(0xebc0, TP, RSL, E2, la1, 0, 0, 0, tp, 0) diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c index 0a750a5467..d422a1e62b 100644 --- a/target/s390x/tcg/translate.c +++ b/target/s390x/tcg/translate.c @@ -305,6 +305,18 @@ static TCGv_i64 load_freg32_i64(int reg) return r; } +static TCGv_i128 load_freg_128(int reg) +{ + TCGv_i64 h = load_freg(reg); + TCGv_i64 l = load_freg(reg + 2); + TCGv_i128 r = tcg_temp_new_i128(); + + tcg_gen_concat_i64_i128(r, l, h); + tcg_temp_free_i64(h); + tcg_temp_free_i64(l); + return r; +} + static void store_reg(int reg, TCGv_i64 v) { tcg_gen_mov_i64(regs[reg], v); @@ -1103,7 +1115,7 @@ typedef struct { bool g_out, g_out2, g_in1, g_in2; TCGv_i64 out, out2, in1, in2; TCGv_i64 addr1; - TCGv_i128 out_128; + TCGv_i128 out_128, in1_128, in2_128; } DisasOps; /* Instructions can place constraints on their operands, raising specification @@ -1462,7 +1474,7 @@ static DisasJumpType op_adb(DisasContext *s, DisasOps *o) static DisasJumpType op_axb(DisasContext *s, DisasOps *o) { - gen_helper_axb(o->out_128, cpu_env, o->out, o->out2, o->in1, o->in2); + gen_helper_axb(o->out_128, cpu_env, o->in1_128, o->in2_128); return DISAS_NEXT; } @@ -1778,7 +1790,7 @@ static DisasJumpType op_cdb(DisasContext *s, DisasOps *o) static DisasJumpType op_cxb(DisasContext *s, DisasOps *o) { - gen_helper_cxb(cc_op, cpu_env, o->out, o->out2, o->in1, o->in2); + gen_helper_cxb(cc_op, cpu_env, o->in1_128, o->in2_128); set_cc_static(s); return DISAS_NEXT; } @@ -1841,7 +1853,7 @@ static DisasJumpType op_cfxb(DisasContext *s, DisasOps *o) if (!m34) { return DISAS_NORETURN; } - gen_helper_cfxb(o->out, cpu_env, o->in1, o->in2, m34); + gen_helper_cfxb(o->out, cpu_env, o->in2_128, m34); tcg_temp_free_i32(m34); set_cc_static(s); return DISAS_NEXT; @@ -1880,7 +1892,7 @@ static DisasJumpType op_cgxb(DisasContext *s, DisasOps *o) if (!m34) { return DISAS_NORETURN; } - gen_helper_cgxb(o->out, cpu_env, o->in1, o->in2, m34); + gen_helper_cgxb(o->out, cpu_env, o->in2_128, m34); tcg_temp_free_i32(m34); set_cc_static(s); return DISAS_NEXT; @@ -1919,7 +1931,7 @@ static DisasJumpType op_clfxb(DisasContext *s, DisasOps *o) if (!m34) { return DISAS_NORETURN; } - gen_helper_clfxb(o->out, cpu_env, o->in1, o->in2, m34); + gen_helper_clfxb(o->out, cpu_env, o->in2_128, m34); tcg_temp_free_i32(m34); set_cc_static(s); return DISAS_NEXT; @@ -1958,7 +1970,7 @@ static DisasJumpType op_clgxb(DisasContext *s, DisasOps *o) if (!m34) { return DISAS_NORETURN; } - gen_helper_clgxb(o->out, cpu_env, o->in1, o->in2, m34); + gen_helper_clgxb(o->out, cpu_env, o->in2_128, m34); tcg_temp_free_i32(m34); set_cc_static(s); return DISAS_NEXT; @@ -2445,7 +2457,7 @@ static DisasJumpType op_ddb(DisasContext *s, DisasOps *o) static DisasJumpType op_dxb(DisasContext *s, DisasOps *o) { - gen_helper_dxb(o->out_128, cpu_env, o->out, o->out2, o->in1, o->in2); + gen_helper_dxb(o->out_128, cpu_env, o->in1_128, o->in2_128); return DISAS_NEXT; } @@ -2550,7 +2562,7 @@ static DisasJumpType op_fixb(DisasContext *s, DisasOps *o) if (!m34) { return DISAS_NORETURN; } - gen_helper_fixb(o->out_128, cpu_env, o->in1, o->in2, m34); + gen_helper_fixb(o->out_128, cpu_env, o->in2_128, m34); tcg_temp_free_i32(m34); return DISAS_NEXT; } @@ -2769,7 +2781,7 @@ static DisasJumpType op_kdb(DisasContext *s, DisasOps *o) static DisasJumpType op_kxb(DisasContext *s, DisasOps *o) { - gen_helper_kxb(cc_op, cpu_env, o->out, o->out2, o->in1, o->in2); + gen_helper_kxb(cc_op, cpu_env, o->in1_128, o->in2_128); set_cc_static(s); return DISAS_NEXT; } @@ -2843,7 +2855,7 @@ static DisasJumpType op_ldxb(DisasContext *s, DisasOps *o) if (!m34) { return DISAS_NORETURN; } - gen_helper_ldxb(o->out, cpu_env, o->in1, o->in2, m34); + gen_helper_ldxb(o->out, cpu_env, o->in2_128, m34); tcg_temp_free_i32(m34); return DISAS_NEXT; } @@ -2855,7 +2867,7 @@ static DisasJumpType op_lexb(DisasContext *s, DisasOps *o) if (!m34) { return DISAS_NORETURN; } - gen_helper_lexb(o->out, cpu_env, o->in1, o->in2, m34); + gen_helper_lexb(o->out, cpu_env, o->in2_128, m34); tcg_temp_free_i32(m34); return DISAS_NEXT; } @@ -3584,13 +3596,13 @@ static DisasJumpType op_mdb(DisasContext *s, DisasOps *o) static DisasJumpType op_mxb(DisasContext *s, DisasOps *o) { - gen_helper_mxb(o->out_128, cpu_env, o->out, o->out2, o->in1, o->in2); + gen_helper_mxb(o->out_128, cpu_env, o->in1_128, o->in2_128); return DISAS_NEXT; } static DisasJumpType op_mxdb(DisasContext *s, DisasOps *o) { - gen_helper_mxdb(o->out_128, cpu_env, o->out, o->out2, o->in2); + gen_helper_mxdb(o->out_128, cpu_env, o->in1_128, o->in2); return DISAS_NEXT; } @@ -4055,7 +4067,7 @@ static DisasJumpType op_sdb(DisasContext *s, DisasOps *o) static DisasJumpType op_sxb(DisasContext *s, DisasOps *o) { - gen_helper_sxb(o->out_128, cpu_env, o->out, o->out2, o->in1, o->in2); + gen_helper_sxb(o->out_128, cpu_env, o->in1_128, o->in2_128); return DISAS_NEXT; } @@ -4073,7 +4085,7 @@ static DisasJumpType op_sqdb(DisasContext *s, DisasOps *o) static DisasJumpType op_sqxb(DisasContext *s, DisasOps *o) { - gen_helper_sqxb(o->out_128, cpu_env, o->in1, o->in2); + gen_helper_sqxb(o->out_128, cpu_env, o->in2_128); return DISAS_NEXT; } @@ -4852,7 +4864,7 @@ static DisasJumpType op_tcdb(DisasContext *s, DisasOps *o) static DisasJumpType op_tcxb(DisasContext *s, DisasOps *o) { - gen_helper_tcxb(cc_op, cpu_env, o->out, o->out2, o->in2); + gen_helper_tcxb(cc_op, cpu_env, o->in1_128, o->in2); set_cc_static(s); return DISAS_NEXT; } @@ -5387,8 +5399,6 @@ static void prep_new_P(DisasContext *s, DisasOps *o) static void prep_new_x(DisasContext *s, DisasOps *o) { - o->out = tcg_temp_new_i64(); - o->out2 = tcg_temp_new_i64(); o->out_128 = tcg_temp_new_i128(); } #define SPEC_prep_new_x 0 @@ -5411,10 +5421,7 @@ static void prep_r1_P(DisasContext *s, DisasOps *o) static void prep_x1(DisasContext *s, DisasOps *o) { - o->out = load_freg(get_field(s, r1)); - o->out2 = load_freg(get_field(s, r1) + 2); - o->out_128 = tcg_temp_new_i128(); - tcg_gen_concat_i64_i128(o->out_128, o->out2, o->out); + o->out_128 = load_freg_128(get_field(s, r1)); } #define SPEC_prep_x1 SPEC_r1_f128 @@ -5513,6 +5520,11 @@ static void wout_x1(DisasContext *s, DisasOps *o) { int f1 = get_field(s, r1); + /* Split out_128 into out+out2 for cout_f128. */ + tcg_debug_assert(o->out == NULL); + o->out = tcg_temp_new_i64(); + o->out2 = tcg_temp_new_i64(); + tcg_gen_extr_i128_i64(o->out2, o->out, o->out_128); store_freg(f1, o->out); store_freg(f1 + 2, o->out2); @@ -5755,6 +5767,12 @@ static void in1_f1(DisasContext *s, DisasOps *o) } #define SPEC_in1_f1 0 +static void in1_x1(DisasContext *s, DisasOps *o) +{ + o->in1_128 = load_freg_128(get_field(s, r1)); +} +#define SPEC_in1_x1 SPEC_r1_f128 + /* Load the high double word of an extended (128-bit) format FP number */ static void in1_x2h(DisasContext *s, DisasOps *o) { @@ -5964,6 +5982,12 @@ static void in2_f2(DisasContext *s, DisasOps *o) } #define SPEC_in2_f2 0 +static void in2_x2(DisasContext *s, DisasOps *o) +{ + o->in2_128 = load_freg_128(get_field(s, r2)); +} +#define SPEC_in2_x2 SPEC_r2_f128 + /* Load the low double word of an extended (128-bit) format FP number */ static void in2_x2l(DisasContext *s, DisasOps *o) { @@ -6592,6 +6616,12 @@ static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s) if (o.out_128) { tcg_temp_free_i128(o.out_128); } + if (o.in1_128) { + tcg_temp_free_i128(o.in1_128); + } + if (o.in2_128) { + tcg_temp_free_i128(o.in2_128); + } /* io should be the last instruction in tb when icount is enabled */ if (unlikely(icount && ret == DISAS_NEXT)) { ret = DISAS_TOO_MANY; From 1fcd84fa0d610c1215cced54e64046a47148a388 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 9 Nov 2022 13:54:35 +1100 Subject: [PATCH 35/40] target/s390x: Use tcg_gen_atomic_cmpxchg_i128 for CDSG Acked-by: Ilya Leoshkevich Signed-off-by: Richard Henderson --- target/s390x/helper.h | 2 -- target/s390x/tcg/insn-data.h.inc | 2 +- target/s390x/tcg/mem_helper.c | 52 ------------------------------ target/s390x/tcg/translate.c | 55 +++++++++++++++++++------------- 4 files changed, 33 insertions(+), 78 deletions(-) diff --git a/target/s390x/helper.h b/target/s390x/helper.h index bccd3bfca6..341bc51ec2 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -35,8 +35,6 @@ DEF_HELPER_3(cxgb, i128, env, s64, i32) DEF_HELPER_3(celgb, i64, env, i64, i32) DEF_HELPER_3(cdlgb, i64, env, i64, i32) DEF_HELPER_3(cxlgb, i128, env, i64, i32) -DEF_HELPER_4(cdsg, void, env, i64, i32, i32) -DEF_HELPER_4(cdsg_parallel, void, env, i64, i32, i32) DEF_HELPER_4(csst, i32, env, i32, i64, i64) DEF_HELPER_4(csst_parallel, i32, env, i32, i64, i64) DEF_HELPER_FLAGS_3(aeb, TCG_CALL_NO_WG, i64, env, i64, i64) diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc index 893f4b48db..9d2d35f084 100644 --- a/target/s390x/tcg/insn-data.h.inc +++ b/target/s390x/tcg/insn-data.h.inc @@ -276,7 +276,7 @@ /* COMPARE DOUBLE AND SWAP */ D(0xbb00, CDS, RS_a, Z, r3_D32, r1_D32, new, r1_D32, cs, 0, MO_TEUQ) D(0xeb31, CDSY, RSY_a, LD, r3_D32, r1_D32, new, r1_D32, cs, 0, MO_TEUQ) - C(0xeb3e, CDSG, RSY_a, Z, 0, 0, 0, 0, cdsg, 0) + C(0xeb3e, CDSG, RSY_a, Z, la2, r3_D64, 0, r1_D64, cdsg, 0) /* COMPARE AND SWAP AND STORE */ C(0xc802, CSST, SSF, CASS, la1, a2, 0, 0, csst, 0) diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c index 49969abda7..d6725fd18c 100644 --- a/target/s390x/tcg/mem_helper.c +++ b/target/s390x/tcg/mem_helper.c @@ -1771,58 +1771,6 @@ uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2, return cc; } -void HELPER(cdsg)(CPUS390XState *env, uint64_t addr, - uint32_t r1, uint32_t r3) -{ - uintptr_t ra = GETPC(); - Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]); - Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]); - Int128 oldv; - uint64_t oldh, oldl; - bool fail; - - check_alignment(env, addr, 16, ra); - - oldh = cpu_ldq_data_ra(env, addr + 0, ra); - oldl = cpu_ldq_data_ra(env, addr + 8, ra); - - oldv = int128_make128(oldl, oldh); - fail = !int128_eq(oldv, cmpv); - if (fail) { - newv = oldv; - } - - cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra); - cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra); - - env->cc_op = fail; - env->regs[r1] = int128_gethi(oldv); - env->regs[r1 + 1] = int128_getlo(oldv); -} - -void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr, - uint32_t r1, uint32_t r3) -{ - uintptr_t ra = GETPC(); - Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]); - Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]); - int mem_idx; - MemOpIdx oi; - Int128 oldv; - bool fail; - - assert(HAVE_CMPXCHG128); - - mem_idx = cpu_mmu_index(env, false); - oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx); - oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); - fail = !int128_eq(oldv, cmpv); - - env->cc_op = fail; - env->regs[r1] = int128_gethi(oldv); - env->regs[r1 + 1] = int128_getlo(oldv); -} - static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2, bool parallel) { diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c index d422a1e62b..9ea28b3e52 100644 --- a/target/s390x/tcg/translate.c +++ b/target/s390x/tcg/translate.c @@ -2224,31 +2224,25 @@ static DisasJumpType op_cs(DisasContext *s, DisasOps *o) static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o) { int r1 = get_field(s, r1); - int r3 = get_field(s, r3); - int d2 = get_field(s, d2); - int b2 = get_field(s, b2); - DisasJumpType ret = DISAS_NEXT; - TCGv_i64 addr; - TCGv_i32 t_r1, t_r3; - /* Note that R1:R1+1 = expected value and R3:R3+1 = new value. */ - addr = get_address(s, 0, b2, d2); - t_r1 = tcg_const_i32(r1); - t_r3 = tcg_const_i32(r3); - if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { - gen_helper_cdsg(cpu_env, addr, t_r1, t_r3); - } else if (HAVE_CMPXCHG128) { - gen_helper_cdsg_parallel(cpu_env, addr, t_r1, t_r3); - } else { - gen_helper_exit_atomic(cpu_env); - ret = DISAS_NORETURN; - } - tcg_temp_free_i64(addr); - tcg_temp_free_i32(t_r1); - tcg_temp_free_i32(t_r3); + o->out_128 = tcg_temp_new_i128(); + tcg_gen_concat_i64_i128(o->out_128, regs[r1 + 1], regs[r1]); - set_cc_static(s); - return ret; + /* Note out (R1:R1+1) = expected value and in2 (R3:R3+1) = new value. */ + tcg_gen_atomic_cmpxchg_i128(o->out_128, o->addr1, o->out_128, o->in2_128, + get_mem_index(s), MO_BE | MO_128 | MO_ALIGN); + + /* + * Extract result into cc_dst:cc_src, compare vs the expected value + * in the as yet unmodified input registers, then update CC_OP. + */ + tcg_gen_extr_i128_i64(cc_src, cc_dst, o->out_128); + tcg_gen_xor_i64(cc_dst, cc_dst, regs[r1]); + tcg_gen_xor_i64(cc_src, cc_src, regs[r1 + 1]); + tcg_gen_or_i64(cc_dst, cc_dst, cc_src); + set_cc_nz_u64(s, cc_dst); + + return DISAS_NEXT; } static DisasJumpType op_csst(DisasContext *s, DisasOps *o) @@ -5488,6 +5482,13 @@ static void wout_r1_D32(DisasContext *s, DisasOps *o) } #define SPEC_wout_r1_D32 SPEC_r1_even +static void wout_r1_D64(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + tcg_gen_extr_i128_i64(regs[r1 + 1], regs[r1], o->out_128); +} +#define SPEC_wout_r1_D64 SPEC_r1_even + static void wout_r3_P32(DisasContext *s, DisasOps *o) { int r3 = get_field(s, r3); @@ -5935,6 +5936,14 @@ static void in2_r3(DisasContext *s, DisasOps *o) } #define SPEC_in2_r3 0 +static void in2_r3_D64(DisasContext *s, DisasOps *o) +{ + int r3 = get_field(s, r3); + o->in2_128 = tcg_temp_new_i128(); + tcg_gen_concat_i64_i128(o->in2_128, regs[r3 + 1], regs[r3]); +} +#define SPEC_in2_r3_D64 SPEC_r3_even + static void in2_r3_sr32(DisasContext *s, DisasOps *o) { o->in2 = tcg_temp_new_i64(); From b5deff74d1b1cb33b65a6c8db44fc87e972b53f7 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 10 Nov 2022 18:12:09 +1000 Subject: [PATCH 36/40] target/s390x: Implement CC_OP_NZ in gen_op_calc_cc This case is trivial to implement inline. Reviewed-by: David Hildenbrand Signed-off-by: Richard Henderson --- target/s390x/tcg/translate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c index 9ea28b3e52..ac5bd98f04 100644 --- a/target/s390x/tcg/translate.c +++ b/target/s390x/tcg/translate.c @@ -625,6 +625,9 @@ static void gen_op_calc_cc(DisasContext *s) /* env->cc_op already is the cc value */ break; case CC_OP_NZ: + tcg_gen_setcondi_i64(TCG_COND_NE, cc_dst, cc_dst, 0); + tcg_gen_extrl_i64_i32(cc_op, cc_dst); + break; case CC_OP_ABS_64: case CC_OP_NABS_64: case CC_OP_ABS_32: From 6218c177afb341e5a64428fcc17decbc9d6247a6 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 9 Nov 2022 15:22:15 +1100 Subject: [PATCH 37/40] target/i386: Split out gen_cmpxchg8b, gen_cmpxchg16b MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/i386/tcg/translate.c | 48 ++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 7e0b2a709a..a82131d635 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -2993,6 +2993,34 @@ static void gen_sty_env_A0(DisasContext *s, int offset, bool align) #include "emit.c.inc" #include "decode-new.c.inc" +static void gen_cmpxchg8b(DisasContext *s, CPUX86State *env, int modrm) +{ + gen_lea_modrm(env, s, modrm); + + if ((s->prefix & PREFIX_LOCK) && + (tb_cflags(s->base.tb) & CF_PARALLEL)) { + gen_helper_cmpxchg8b(cpu_env, s->A0); + } else { + gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0); + } + set_cc_op(s, CC_OP_EFLAGS); +} + +#ifdef TARGET_X86_64 +static void gen_cmpxchg16b(DisasContext *s, CPUX86State *env, int modrm) +{ + gen_lea_modrm(env, s, modrm); + + if ((s->prefix & PREFIX_LOCK) && + (tb_cflags(s->base.tb) & CF_PARALLEL)) { + gen_helper_cmpxchg16b(cpu_env, s->A0); + } else { + gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0); + } + set_cc_op(s, CC_OP_EFLAGS); +} +#endif + /* convert one instruction. s->base.is_jmp is set if the translation must be stopped. Return the next pc value */ static bool disas_insn(DisasContext *s, CPUState *cpu) @@ -3844,28 +3872,14 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) { goto illegal_op; } - gen_lea_modrm(env, s, modrm); - if ((s->prefix & PREFIX_LOCK) && - (tb_cflags(s->base.tb) & CF_PARALLEL)) { - gen_helper_cmpxchg16b(cpu_env, s->A0); - } else { - gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0); - } - set_cc_op(s, CC_OP_EFLAGS); + gen_cmpxchg16b(s, env, modrm); break; } -#endif +#endif if (!(s->cpuid_features & CPUID_CX8)) { goto illegal_op; } - gen_lea_modrm(env, s, modrm); - if ((s->prefix & PREFIX_LOCK) && - (tb_cflags(s->base.tb) & CF_PARALLEL)) { - gen_helper_cmpxchg8b(cpu_env, s->A0); - } else { - gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0); - } - set_cc_op(s, CC_OP_EFLAGS); + gen_cmpxchg8b(s, env, modrm); break; case 7: /* RDSEED */ From 326ad06cf5b2cf6f4ed7ca635269e89fd189e1a4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 9 Nov 2022 15:59:03 +1100 Subject: [PATCH 38/40] target/i386: Inline cmpxchg8b MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use tcg_gen_atomic_cmpxchg_i64 for the atomic case, and tcg_gen_nonatomic_cmpxchg_i64 otherwise. Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/i386/helper.h | 2 -- target/i386/tcg/mem_helper.c | 57 ------------------------------------ target/i386/tcg/translate.c | 54 ++++++++++++++++++++++++++++++---- 3 files changed, 49 insertions(+), 64 deletions(-) diff --git a/target/i386/helper.h b/target/i386/helper.h index b7de5429ef..2df8049f91 100644 --- a/target/i386/helper.h +++ b/target/i386/helper.h @@ -66,8 +66,6 @@ DEF_HELPER_1(rsm, void, env) #endif /* !CONFIG_USER_ONLY */ DEF_HELPER_2(into, void, env, int) -DEF_HELPER_2(cmpxchg8b_unlocked, void, env, tl) -DEF_HELPER_2(cmpxchg8b, void, env, tl) #ifdef TARGET_X86_64 DEF_HELPER_2(cmpxchg16b_unlocked, void, env, tl) DEF_HELPER_2(cmpxchg16b, void, env, tl) diff --git a/target/i386/tcg/mem_helper.c b/target/i386/tcg/mem_helper.c index e3cdafd2d4..814786bb87 100644 --- a/target/i386/tcg/mem_helper.c +++ b/target/i386/tcg/mem_helper.c @@ -27,63 +27,6 @@ #include "tcg/tcg.h" #include "helper-tcg.h" -void helper_cmpxchg8b_unlocked(CPUX86State *env, target_ulong a0) -{ - uintptr_t ra = GETPC(); - uint64_t oldv, cmpv, newv; - int eflags; - - eflags = cpu_cc_compute_all(env, CC_OP); - - cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]); - newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]); - - oldv = cpu_ldq_data_ra(env, a0, ra); - newv = (cmpv == oldv ? newv : oldv); - /* always do the store */ - cpu_stq_data_ra(env, a0, newv, ra); - - if (oldv == cmpv) { - eflags |= CC_Z; - } else { - env->regs[R_EAX] = (uint32_t)oldv; - env->regs[R_EDX] = (uint32_t)(oldv >> 32); - eflags &= ~CC_Z; - } - CC_SRC = eflags; -} - -void helper_cmpxchg8b(CPUX86State *env, target_ulong a0) -{ -#ifdef CONFIG_ATOMIC64 - uint64_t oldv, cmpv, newv; - int eflags; - - eflags = cpu_cc_compute_all(env, CC_OP); - - cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]); - newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]); - - { - uintptr_t ra = GETPC(); - int mem_idx = cpu_mmu_index(env, false); - MemOpIdx oi = make_memop_idx(MO_TEUQ, mem_idx); - oldv = cpu_atomic_cmpxchgq_le_mmu(env, a0, cmpv, newv, oi, ra); - } - - if (oldv == cmpv) { - eflags |= CC_Z; - } else { - env->regs[R_EAX] = (uint32_t)oldv; - env->regs[R_EDX] = (uint32_t)(oldv >> 32); - eflags &= ~CC_Z; - } - CC_SRC = eflags; -#else - cpu_loop_exit_atomic(env_cpu(env), GETPC()); -#endif /* CONFIG_ATOMIC64 */ -} - #ifdef TARGET_X86_64 void helper_cmpxchg16b_unlocked(CPUX86State *env, target_ulong a0) { diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index a82131d635..b542b084a6 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -2995,15 +2995,59 @@ static void gen_sty_env_A0(DisasContext *s, int offset, bool align) static void gen_cmpxchg8b(DisasContext *s, CPUX86State *env, int modrm) { + TCGv_i64 cmp, val, old; + TCGv Z; + gen_lea_modrm(env, s, modrm); - if ((s->prefix & PREFIX_LOCK) && - (tb_cflags(s->base.tb) & CF_PARALLEL)) { - gen_helper_cmpxchg8b(cpu_env, s->A0); + cmp = tcg_temp_new_i64(); + val = tcg_temp_new_i64(); + old = tcg_temp_new_i64(); + + /* Construct the comparison values from the register pair. */ + tcg_gen_concat_tl_i64(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]); + tcg_gen_concat_tl_i64(val, cpu_regs[R_EBX], cpu_regs[R_ECX]); + + /* Only require atomic with LOCK; non-parallel handled in generator. */ + if (s->prefix & PREFIX_LOCK) { + tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, MO_TEUQ); } else { - gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0); + tcg_gen_nonatomic_cmpxchg_i64(old, s->A0, cmp, val, + s->mem_index, MO_TEUQ); } - set_cc_op(s, CC_OP_EFLAGS); + tcg_temp_free_i64(val); + + /* Set tmp0 to match the required value of Z. */ + tcg_gen_setcond_i64(TCG_COND_EQ, cmp, old, cmp); + Z = tcg_temp_new(); + tcg_gen_trunc_i64_tl(Z, cmp); + tcg_temp_free_i64(cmp); + + /* + * Extract the result values for the register pair. + * For 32-bit, we may do this unconditionally, because on success (Z=1), + * the old value matches the previous value in EDX:EAX. For x86_64, + * the store must be conditional, because we must leave the source + * registers unchanged on success, and zero-extend the writeback + * on failure (Z=0). + */ + if (TARGET_LONG_BITS == 32) { + tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], old); + } else { + TCGv zero = tcg_constant_tl(0); + + tcg_gen_extr_i64_tl(s->T0, s->T1, old); + tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EAX], Z, zero, + s->T0, cpu_regs[R_EAX]); + tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EDX], Z, zero, + s->T1, cpu_regs[R_EDX]); + } + tcg_temp_free_i64(old); + + /* Update Z. */ + gen_compute_eflags(s); + tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, Z, ctz32(CC_Z), 1); + tcg_temp_free(Z); } #ifdef TARGET_X86_64 From 5f0dd8cd33cb6c753ed4435e13bd0622a38a9967 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 9 Nov 2022 23:53:10 +1100 Subject: [PATCH 39/40] target/i386: Inline cmpxchg16b MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use tcg_gen_atomic_cmpxchg_i128 for the atomic case, and tcg_gen_qemu_ld/st_i128 otherwise. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/i386/helper.h | 4 --- target/i386/tcg/mem_helper.c | 69 ------------------------------------ target/i386/tcg/translate.c | 44 ++++++++++++++++++++--- 3 files changed, 39 insertions(+), 78 deletions(-) diff --git a/target/i386/helper.h b/target/i386/helper.h index 2df8049f91..e627a93107 100644 --- a/target/i386/helper.h +++ b/target/i386/helper.h @@ -66,10 +66,6 @@ DEF_HELPER_1(rsm, void, env) #endif /* !CONFIG_USER_ONLY */ DEF_HELPER_2(into, void, env, int) -#ifdef TARGET_X86_64 -DEF_HELPER_2(cmpxchg16b_unlocked, void, env, tl) -DEF_HELPER_2(cmpxchg16b, void, env, tl) -#endif DEF_HELPER_FLAGS_1(single_step, TCG_CALL_NO_WG, noreturn, env) DEF_HELPER_1(rechecking_single_step, void, env) DEF_HELPER_1(cpuid, void, env) diff --git a/target/i386/tcg/mem_helper.c b/target/i386/tcg/mem_helper.c index 814786bb87..3ef84e90d9 100644 --- a/target/i386/tcg/mem_helper.c +++ b/target/i386/tcg/mem_helper.c @@ -27,75 +27,6 @@ #include "tcg/tcg.h" #include "helper-tcg.h" -#ifdef TARGET_X86_64 -void helper_cmpxchg16b_unlocked(CPUX86State *env, target_ulong a0) -{ - uintptr_t ra = GETPC(); - Int128 oldv, cmpv, newv; - uint64_t o0, o1; - int eflags; - bool success; - - if ((a0 & 0xf) != 0) { - raise_exception_ra(env, EXCP0D_GPF, GETPC()); - } - eflags = cpu_cc_compute_all(env, CC_OP); - - cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]); - newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]); - - o0 = cpu_ldq_data_ra(env, a0 + 0, ra); - o1 = cpu_ldq_data_ra(env, a0 + 8, ra); - - oldv = int128_make128(o0, o1); - success = int128_eq(oldv, cmpv); - if (!success) { - newv = oldv; - } - - cpu_stq_data_ra(env, a0 + 0, int128_getlo(newv), ra); - cpu_stq_data_ra(env, a0 + 8, int128_gethi(newv), ra); - - if (success) { - eflags |= CC_Z; - } else { - env->regs[R_EAX] = int128_getlo(oldv); - env->regs[R_EDX] = int128_gethi(oldv); - eflags &= ~CC_Z; - } - CC_SRC = eflags; -} - -void helper_cmpxchg16b(CPUX86State *env, target_ulong a0) -{ - uintptr_t ra = GETPC(); - - if ((a0 & 0xf) != 0) { - raise_exception_ra(env, EXCP0D_GPF, ra); - } else if (HAVE_CMPXCHG128) { - int eflags = cpu_cc_compute_all(env, CC_OP); - - Int128 cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]); - Int128 newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]); - - int mem_idx = cpu_mmu_index(env, false); - MemOpIdx oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx); - Int128 oldv = cpu_atomic_cmpxchgo_le_mmu(env, a0, cmpv, newv, oi, ra); - - if (int128_eq(oldv, cmpv)) { - eflags |= CC_Z; - } else { - env->regs[R_EAX] = int128_getlo(oldv); - env->regs[R_EDX] = int128_gethi(oldv); - eflags &= ~CC_Z; - } - CC_SRC = eflags; - } else { - cpu_loop_exit_atomic(env_cpu(env), ra); - } -} -#endif - void helper_boundw(CPUX86State *env, target_ulong a0, int v) { int low, high; diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index b542b084a6..9d9392b009 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -3053,15 +3053,49 @@ static void gen_cmpxchg8b(DisasContext *s, CPUX86State *env, int modrm) #ifdef TARGET_X86_64 static void gen_cmpxchg16b(DisasContext *s, CPUX86State *env, int modrm) { + MemOp mop = MO_TE | MO_128 | MO_ALIGN; + TCGv_i64 t0, t1; + TCGv_i128 cmp, val; + gen_lea_modrm(env, s, modrm); - if ((s->prefix & PREFIX_LOCK) && - (tb_cflags(s->base.tb) & CF_PARALLEL)) { - gen_helper_cmpxchg16b(cpu_env, s->A0); + cmp = tcg_temp_new_i128(); + val = tcg_temp_new_i128(); + tcg_gen_concat_i64_i128(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]); + tcg_gen_concat_i64_i128(val, cpu_regs[R_EBX], cpu_regs[R_ECX]); + + /* Only require atomic with LOCK; non-parallel handled in generator. */ + if (s->prefix & PREFIX_LOCK) { + tcg_gen_atomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop); } else { - gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0); + tcg_gen_nonatomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop); } - set_cc_op(s, CC_OP_EFLAGS); + + tcg_gen_extr_i128_i64(s->T0, s->T1, val); + tcg_temp_free_i128(cmp); + tcg_temp_free_i128(val); + + /* Determine success after the fact. */ + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + tcg_gen_xor_i64(t0, s->T0, cpu_regs[R_EAX]); + tcg_gen_xor_i64(t1, s->T1, cpu_regs[R_EDX]); + tcg_gen_or_i64(t0, t0, t1); + tcg_temp_free_i64(t1); + + /* Update Z. */ + gen_compute_eflags(s); + tcg_gen_setcondi_i64(TCG_COND_EQ, t0, t0, 0); + tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, t0, ctz32(CC_Z), 1); + tcg_temp_free_i64(t0); + + /* + * Extract the result values for the register pair. We may do this + * unconditionally, because on success (Z=1), the old value matches + * the previous value in RDX:RAX. + */ + tcg_gen_mov_i64(cpu_regs[R_EAX], s->T0); + tcg_gen_mov_i64(cpu_regs[R_EDX], s->T1); } #endif From a2495ede07498ee36b18b03e7038ba30c9871bb2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 3 Feb 2023 17:16:31 +0000 Subject: [PATCH 40/40] tcg/aarch64: Fix patching of LDR in tb_target_set_jmp_target 'offset' should be bits [23:5] of LDR instruction, rather than [4:0]. Fixes: d59d83a1c388 ("tcg/aarch64: Reorg goto_tb implementation") Reviewed-by: Zenghui Yu Reported-by: Zenghui Yu Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index fde3b30ad1..a091326f84 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1914,7 +1914,7 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, ptrdiff_t i_offset = i_addr - jmp_rx; /* Note that we asserted this in range in tcg_out_goto_tb. */ - insn = deposit32(I3305_LDR | TCG_REG_TMP, 0, 5, i_offset >> 2); + insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2); } qatomic_set((uint32_t *)jmp_rw, insn); flush_idcache_range(jmp_rx, jmp_rw, 4);