tcg: Add INDEX_op_qemu_{ld,st}_i128

Add opcodes for backend support for 128-bit memory operations.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2022-11-07 10:42:56 +11:00
parent 7b88010719
commit 12fde9bcdb
15 changed files with 108 additions and 11 deletions

View File

@ -672,19 +672,20 @@ QEMU specific operations
| This operation is optional. If the TCG backend does not implement the | This operation is optional. If the TCG backend does not implement the
goto_ptr opcode, emitting this op is equivalent to emitting exit_tb(0). goto_ptr opcode, emitting this op is equivalent to emitting exit_tb(0).
* - qemu_ld_i32/i64 *t0*, *t1*, *flags*, *memidx* * - qemu_ld_i32/i64/i128 *t0*, *t1*, *flags*, *memidx*
qemu_st_i32/i64 *t0*, *t1*, *flags*, *memidx* qemu_st_i32/i64/i128 *t0*, *t1*, *flags*, *memidx*
qemu_st8_i32 *t0*, *t1*, *flags*, *memidx* qemu_st8_i32 *t0*, *t1*, *flags*, *memidx*
- | Load data at the guest address *t1* into *t0*, or store data in *t0* at guest - | Load data at the guest address *t1* into *t0*, or store data in *t0* at guest
address *t1*. The _i32/_i64 size applies to the size of the input/output address *t1*. The _i32/_i64/_i128 size applies to the size of the input/output
register *t0* only. The address *t1* is always sized according to the guest, register *t0* only. The address *t1* is always sized according to the guest,
and the width of the memory operation is controlled by *flags*. and the width of the memory operation is controlled by *flags*.
| |
| Both *t0* and *t1* may be split into little-endian ordered pairs of registers | Both *t0* and *t1* may be split into little-endian ordered pairs of registers
if dealing with 64-bit quantities on a 32-bit host. if dealing with 64-bit quantities on a 32-bit host, or 128-bit quantities on
a 64-bit host.
| |
| The *memidx* selects the qemu tlb index to use (e.g. user or kernel access). | The *memidx* selects the qemu tlb index to use (e.g. user or kernel access).
The flags are the MemOp bits, selecting the sign, width, and endianness The flags are the MemOp bits, selecting the sign, width, and endianness
@ -693,6 +694,8 @@ QEMU specific operations
| For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a | For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a
64-bit memory access specified in *flags*. 64-bit memory access specified in *flags*.
| |
| For qemu_ld/st_i128, these are only supported for a 64-bit host.
|
| For i386, qemu_st8_i32 is exactly like qemu_st_i32, except the size of | For i386, qemu_st8_i32 is exactly like qemu_st_i32, except the size of
the memory operation is known to be 8-bit. This allows the backend to the memory operation is known to be 8-bit. This allows the backend to
provide a different set of register constraints. provide a different set of register constraints.

View File

@ -213,6 +213,14 @@ DEF(qemu_st8_i32, 0, TLADDR_ARGS + 1, 1,
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
IMPL(TCG_TARGET_HAS_qemu_st8_i32)) IMPL(TCG_TARGET_HAS_qemu_st8_i32))
/* Only for 64-bit hosts at the moment. */
DEF(qemu_ld_i128, 2, 1, 1,
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
DEF(qemu_st_i128, 0, 3, 1,
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
/* Host vector support. */ /* Host vector support. */
#define IMPLVEC TCG_OPF_VECTOR | IMPL(TCG_TARGET_MAYBE_vec) #define IMPLVEC TCG_OPF_VECTOR | IMPL(TCG_TARGET_MAYBE_vec)

View File

@ -129,6 +129,8 @@ extern bool have_lse2;
#define TCG_TARGET_HAS_muluh_i64 1 #define TCG_TARGET_HAS_muluh_i64 1
#define TCG_TARGET_HAS_mulsh_i64 1 #define TCG_TARGET_HAS_mulsh_i64 1
#define TCG_TARGET_HAS_qemu_ldst_i128 0
#define TCG_TARGET_HAS_v64 1 #define TCG_TARGET_HAS_v64 1
#define TCG_TARGET_HAS_v128 1 #define TCG_TARGET_HAS_v128 1
#define TCG_TARGET_HAS_v256 0 #define TCG_TARGET_HAS_v256 0

View File

@ -125,6 +125,8 @@ extern bool use_neon_instructions;
#define TCG_TARGET_HAS_rem_i32 0 #define TCG_TARGET_HAS_rem_i32 0
#define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_qemu_st8_i32 0
#define TCG_TARGET_HAS_qemu_ldst_i128 0
#define TCG_TARGET_HAS_v64 use_neon_instructions #define TCG_TARGET_HAS_v64 use_neon_instructions
#define TCG_TARGET_HAS_v128 use_neon_instructions #define TCG_TARGET_HAS_v128 use_neon_instructions
#define TCG_TARGET_HAS_v256 0 #define TCG_TARGET_HAS_v256 0

View File

@ -194,6 +194,8 @@ extern bool have_atomic16;
#define TCG_TARGET_HAS_qemu_st8_i32 1 #define TCG_TARGET_HAS_qemu_st8_i32 1
#endif #endif
#define TCG_TARGET_HAS_qemu_ldst_i128 0
/* We do not support older SSE systems, only beginning with AVX1. */ /* We do not support older SSE systems, only beginning with AVX1. */
#define TCG_TARGET_HAS_v64 have_avx1 #define TCG_TARGET_HAS_v64 have_avx1
#define TCG_TARGET_HAS_v128 have_avx1 #define TCG_TARGET_HAS_v128 have_avx1

View File

@ -168,6 +168,7 @@ typedef enum {
#define TCG_TARGET_HAS_muls2_i64 0 #define TCG_TARGET_HAS_muls2_i64 0
#define TCG_TARGET_HAS_muluh_i64 1 #define TCG_TARGET_HAS_muluh_i64 1
#define TCG_TARGET_HAS_mulsh_i64 1 #define TCG_TARGET_HAS_mulsh_i64 1
#define TCG_TARGET_HAS_qemu_ldst_i128 0
#define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_DEFAULT_MO (0)

View File

@ -204,6 +204,8 @@ extern bool use_mips32r2_instructions;
#define TCG_TARGET_HAS_ext16u_i64 0 /* andi rt, rs, 0xffff */ #define TCG_TARGET_HAS_ext16u_i64 0 /* andi rt, rs, 0xffff */
#endif #endif
#define TCG_TARGET_HAS_qemu_ldst_i128 0
#define TCG_TARGET_DEFAULT_MO 0 #define TCG_TARGET_DEFAULT_MO 0
#define TCG_TARGET_NEED_LDST_LABELS #define TCG_TARGET_NEED_LDST_LABELS

View File

@ -2186,11 +2186,13 @@ void tcg_optimize(TCGContext *s)
break; break;
case INDEX_op_qemu_ld_i32: case INDEX_op_qemu_ld_i32:
case INDEX_op_qemu_ld_i64: case INDEX_op_qemu_ld_i64:
case INDEX_op_qemu_ld_i128:
done = fold_qemu_ld(&ctx, op); done = fold_qemu_ld(&ctx, op);
break; break;
case INDEX_op_qemu_st_i32: case INDEX_op_qemu_st_i32:
case INDEX_op_qemu_st8_i32: case INDEX_op_qemu_st8_i32:
case INDEX_op_qemu_st_i64: case INDEX_op_qemu_st_i64:
case INDEX_op_qemu_st_i128:
done = fold_qemu_st(&ctx, op); done = fold_qemu_st(&ctx, op);
break; break;
CASE_OP_32_64(rem): CASE_OP_32_64(rem):

View File

@ -149,6 +149,8 @@ extern bool have_vsx;
#define TCG_TARGET_HAS_mulsh_i64 1 #define TCG_TARGET_HAS_mulsh_i64 1
#endif #endif
#define TCG_TARGET_HAS_qemu_ldst_i128 0
/* /*
* While technically Altivec could support V64, it has no 64-bit store * While technically Altivec could support V64, it has no 64-bit store
* instruction and substituting two 32-bit stores makes the generated * instruction and substituting two 32-bit stores makes the generated

View File

@ -163,6 +163,8 @@ typedef enum {
#define TCG_TARGET_HAS_muluh_i64 1 #define TCG_TARGET_HAS_muluh_i64 1
#define TCG_TARGET_HAS_mulsh_i64 1 #define TCG_TARGET_HAS_mulsh_i64 1
#define TCG_TARGET_HAS_qemu_ldst_i128 0
#define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_DEFAULT_MO (0)
#define TCG_TARGET_NEED_LDST_LABELS #define TCG_TARGET_NEED_LDST_LABELS

View File

@ -140,6 +140,8 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_muluh_i64 0 #define TCG_TARGET_HAS_muluh_i64 0
#define TCG_TARGET_HAS_mulsh_i64 0 #define TCG_TARGET_HAS_mulsh_i64 0
#define TCG_TARGET_HAS_qemu_ldst_i128 0
#define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR) #define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR)
#define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR) #define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR)
#define TCG_TARGET_HAS_v256 0 #define TCG_TARGET_HAS_v256 0

View File

@ -151,6 +151,8 @@ extern bool use_vis3_instructions;
#define TCG_TARGET_HAS_muluh_i64 use_vis3_instructions #define TCG_TARGET_HAS_muluh_i64 use_vis3_instructions
#define TCG_TARGET_HAS_mulsh_i64 0 #define TCG_TARGET_HAS_mulsh_i64 0
#define TCG_TARGET_HAS_qemu_ldst_i128 0
#define TCG_AREG0 TCG_REG_I0 #define TCG_AREG0 TCG_REG_I0
#define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_DEFAULT_MO (0)

View File

@ -3205,7 +3205,7 @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop) void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
{ {
MemOpIdx oi = make_memop_idx(memop, idx); const MemOpIdx oi = make_memop_idx(memop, idx);
tcg_debug_assert((memop & MO_SIZE) == MO_128); tcg_debug_assert((memop & MO_SIZE) == MO_128);
tcg_debug_assert((memop & MO_SIGN) == 0); tcg_debug_assert((memop & MO_SIGN) == 0);
@ -3213,9 +3213,36 @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
addr = plugin_prep_mem_callbacks(addr); addr = plugin_prep_mem_callbacks(addr);
/* TODO: allow the tcg backend to see the whole operation. */ /* TODO: For now, force 32-bit hosts to use the helper. */
if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
TCGv_i64 lo, hi;
TCGArg addr_arg;
MemOpIdx adj_oi;
bool need_bswap = false;
if (use_two_i64_for_i128(memop)) { if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
lo = TCGV128_HIGH(val);
hi = TCGV128_LOW(val);
adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
need_bswap = true;
} else {
lo = TCGV128_LOW(val);
hi = TCGV128_HIGH(val);
adj_oi = oi;
}
#if TARGET_LONG_BITS == 32
addr_arg = tcgv_i32_arg(addr);
#else
addr_arg = tcgv_i64_arg(addr);
#endif
tcg_gen_op4ii_i64(INDEX_op_qemu_ld_i128, lo, hi, addr_arg, adj_oi);
if (need_bswap) {
tcg_gen_bswap64_i64(lo, lo);
tcg_gen_bswap64_i64(hi, hi);
}
} else if (use_two_i64_for_i128(memop)) {
MemOp mop[2]; MemOp mop[2];
TCGv addr_p8; TCGv addr_p8;
TCGv_i64 x, y; TCGv_i64 x, y;
@ -3258,7 +3285,7 @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop) void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
{ {
MemOpIdx oi = make_memop_idx(memop, idx); const MemOpIdx oi = make_memop_idx(memop, idx);
tcg_debug_assert((memop & MO_SIZE) == MO_128); tcg_debug_assert((memop & MO_SIZE) == MO_128);
tcg_debug_assert((memop & MO_SIGN) == 0); tcg_debug_assert((memop & MO_SIGN) == 0);
@ -3266,9 +3293,39 @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST); tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
addr = plugin_prep_mem_callbacks(addr); addr = plugin_prep_mem_callbacks(addr);
/* TODO: allow the tcg backend to see the whole operation. */ /* TODO: For now, force 32-bit hosts to use the helper. */
if (use_two_i64_for_i128(memop)) { if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
TCGv_i64 lo, hi;
TCGArg addr_arg;
MemOpIdx adj_oi;
bool need_bswap = false;
if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
lo = tcg_temp_new_i64();
hi = tcg_temp_new_i64();
tcg_gen_bswap64_i64(lo, TCGV128_HIGH(val));
tcg_gen_bswap64_i64(hi, TCGV128_LOW(val));
adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
need_bswap = true;
} else {
lo = TCGV128_LOW(val);
hi = TCGV128_HIGH(val);
adj_oi = oi;
}
#if TARGET_LONG_BITS == 32
addr_arg = tcgv_i32_arg(addr);
#else
addr_arg = tcgv_i64_arg(addr);
#endif
tcg_gen_op4ii_i64(INDEX_op_qemu_st_i128, lo, hi, addr_arg, adj_oi);
if (need_bswap) {
tcg_temp_free_i64(lo);
tcg_temp_free_i64(hi);
}
} else if (use_two_i64_for_i128(memop)) {
MemOp mop[2]; MemOp mop[2];
TCGv addr_p8; TCGv addr_p8;
TCGv_i64 x, y; TCGv_i64 x, y;

View File

@ -1735,6 +1735,10 @@ bool tcg_op_supported(TCGOpcode op)
case INDEX_op_qemu_st8_i32: case INDEX_op_qemu_st8_i32:
return TCG_TARGET_HAS_qemu_st8_i32; return TCG_TARGET_HAS_qemu_st8_i32;
case INDEX_op_qemu_ld_i128:
case INDEX_op_qemu_st_i128:
return TCG_TARGET_HAS_qemu_ldst_i128;
case INDEX_op_mov_i32: case INDEX_op_mov_i32:
case INDEX_op_setcond_i32: case INDEX_op_setcond_i32:
case INDEX_op_brcond_i32: case INDEX_op_brcond_i32:
@ -2187,7 +2191,7 @@ static const char * const cond_name[] =
[TCG_COND_GTU] = "gtu" [TCG_COND_GTU] = "gtu"
}; };
static const char * const ldst_name[] = static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
{ {
[MO_UB] = "ub", [MO_UB] = "ub",
[MO_SB] = "sb", [MO_SB] = "sb",
@ -2201,6 +2205,8 @@ static const char * const ldst_name[] =
[MO_BEUL] = "beul", [MO_BEUL] = "beul",
[MO_BESL] = "besl", [MO_BESL] = "besl",
[MO_BEUQ] = "beq", [MO_BEUQ] = "beq",
[MO_128 + MO_BE] = "beo",
[MO_128 + MO_LE] = "leo",
}; };
static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = { static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
@ -2357,6 +2363,8 @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
case INDEX_op_qemu_st8_i32: case INDEX_op_qemu_st8_i32:
case INDEX_op_qemu_ld_i64: case INDEX_op_qemu_ld_i64:
case INDEX_op_qemu_st_i64: case INDEX_op_qemu_st_i64:
case INDEX_op_qemu_ld_i128:
case INDEX_op_qemu_st_i128:
{ {
const char *s_al, *s_op, *s_at; const char *s_al, *s_op, *s_at;
MemOpIdx oi = op->args[k++]; MemOpIdx oi = op->args[k++];

View File

@ -127,6 +127,8 @@
#define TCG_TARGET_HAS_mulu2_i32 1 #define TCG_TARGET_HAS_mulu2_i32 1
#endif /* TCG_TARGET_REG_BITS == 64 */ #endif /* TCG_TARGET_REG_BITS == 64 */
#define TCG_TARGET_HAS_qemu_ldst_i128 0
/* Number of registers available. */ /* Number of registers available. */
#define TCG_TARGET_NB_REGS 16 #define TCG_TARGET_NB_REGS 16