diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h index 4eda24e63a..2e36d6eb0c 100644 --- a/accel/tcg/tcg-runtime.h +++ b/accel/tcg/tcg-runtime.h @@ -28,6 +28,17 @@ DEF_HELPER_FLAGS_1(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env) DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env) +#ifndef IN_HELPER_PROTO +/* + * Pass calls to memset directly to libc, without a thunk in qemu. + * Do not re-declare memset, especially since we fudge the type here; + * we assume sizeof(void *) == sizeof(size_t), which is true for + * all supported hosts. + */ +#define helper_memset memset +DEF_HELPER_FLAGS_3(memset, TCG_CALL_NO_RWG, ptr, ptr, int, ptr) +#endif /* IN_HELPER_PROTO */ + #ifdef CONFIG_SOFTMMU DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG, diff --git a/include/exec/helper-proto.h b/include/exec/helper-proto.h index a0a8d9aa46..659f9298e8 100644 --- a/include/exec/helper-proto.h +++ b/include/exec/helper-proto.h @@ -35,11 +35,15 @@ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \ dh_ctype(t4), dh_ctype(t5), dh_ctype(t6), \ dh_ctype(t7)); +#define IN_HELPER_PROTO + #include "helper.h" #include "trace/generated-helpers.h" #include "tcg-runtime.h" #include "plugin-helpers.h" +#undef IN_HELPER_PROTO + #undef DEF_HELPER_FLAGS_0 #undef DEF_HELPER_FLAGS_1 #undef DEF_HELPER_FLAGS_2 diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h index 5abf17fecc..5b3bdacc39 100644 --- a/include/tcg/tcg-op.h +++ b/include/tcg/tcg-op.h @@ -1085,6 +1085,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t); #define tcg_gen_bswap16_tl tcg_gen_bswap16_i64 #define tcg_gen_bswap32_tl tcg_gen_bswap32_i64 #define tcg_gen_bswap64_tl tcg_gen_bswap64_i64 +#define tcg_gen_bswap_tl tcg_gen_bswap64_i64 #define tcg_gen_concat_tl_i64 tcg_gen_concat32_i64 #define tcg_gen_extr_i64_tl tcg_gen_extr32_i64 #define tcg_gen_andc_tl tcg_gen_andc_i64 @@ -1197,6 +1198,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t); #define tcg_gen_ext32s_tl tcg_gen_mov_i32 #define tcg_gen_bswap16_tl tcg_gen_bswap16_i32 #define tcg_gen_bswap32_tl tcg_gen_bswap32_i32 +#define tcg_gen_bswap_tl tcg_gen_bswap32_i32 #define tcg_gen_concat_tl_i64 tcg_gen_concat_i32_i64 #define tcg_gen_extr_i64_tl tcg_gen_extr_i64_i32 #define tcg_gen_andc_tl tcg_gen_andc_i32 diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index d536f3ccc1..4089e29cd9 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1462,14 +1462,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_shl_i32: if (c2) { - tcg_out_opc_imm(s, OPC_SLLIW, a0, a1, a2); + tcg_out_opc_imm(s, OPC_SLLIW, a0, a1, a2 & 0x1f); } else { tcg_out_opc_reg(s, OPC_SLLW, a0, a1, a2); } break; case INDEX_op_shl_i64: if (c2) { - tcg_out_opc_imm(s, OPC_SLLI, a0, a1, a2); + tcg_out_opc_imm(s, OPC_SLLI, a0, a1, a2 & 0x3f); } else { tcg_out_opc_reg(s, OPC_SLL, a0, a1, a2); } @@ -1477,14 +1477,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_shr_i32: if (c2) { - tcg_out_opc_imm(s, OPC_SRLIW, a0, a1, a2); + tcg_out_opc_imm(s, OPC_SRLIW, a0, a1, a2 & 0x1f); } else { tcg_out_opc_reg(s, OPC_SRLW, a0, a1, a2); } break; case INDEX_op_shr_i64: if (c2) { - tcg_out_opc_imm(s, OPC_SRLI, a0, a1, a2); + tcg_out_opc_imm(s, OPC_SRLI, a0, a1, a2 & 0x3f); } else { tcg_out_opc_reg(s, OPC_SRL, a0, a1, a2); } @@ -1492,14 +1492,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_sar_i32: if (c2) { - tcg_out_opc_imm(s, OPC_SRAIW, a0, a1, a2); + tcg_out_opc_imm(s, OPC_SRAIW, a0, a1, a2 & 0x1f); } else { tcg_out_opc_reg(s, OPC_SRAW, a0, a1, a2); } break; case INDEX_op_sar_i64: if (c2) { - tcg_out_opc_imm(s, OPC_SRAI, a0, a1, a2); + tcg_out_opc_imm(s, OPC_SRAI, a0, a1, a2 & 0x3f); } else { tcg_out_opc_reg(s, OPC_SRA, a0, a1, a2); } diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c index ddbe06b71a..1a41dfa908 100644 --- a/tcg/tcg-op-gvec.c +++ b/tcg/tcg-op-gvec.c @@ -547,6 +547,9 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz, in_c = dup_const(vece, in_c); if (in_c == 0) { oprsz = maxsz; + vece = MO_8; + } else if (in_c == dup_const(MO_8, in_c)) { + vece = MO_8; } } @@ -628,6 +631,35 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz, /* Otherwise implement out of line. */ t_ptr = tcg_temp_new_ptr(); tcg_gen_addi_ptr(t_ptr, cpu_env, dofs); + + /* + * This may be expand_clr for the tail of an operation, e.g. + * oprsz == 8 && maxsz == 64. The size of the clear is misaligned + * wrt simd_desc and will assert. Simply pass all replicated byte + * stores through to memset. + */ + if (oprsz == maxsz && vece == MO_8) { + TCGv_ptr t_size = tcg_const_ptr(oprsz); + TCGv_i32 t_val; + + if (in_32) { + t_val = in_32; + } else if (in_64) { + t_val = tcg_temp_new_i32(); + tcg_gen_extrl_i64_i32(t_val, in_64); + } else { + t_val = tcg_const_i32(in_c); + } + gen_helper_memset(t_ptr, t_ptr, t_val, t_size); + + if (!in_32) { + tcg_temp_free_i32(t_val); + } + tcg_temp_free_ptr(t_size); + tcg_temp_free_ptr(t_ptr); + return; + } + t_desc = tcg_const_i32(simd_desc(oprsz, maxsz, 0)); if (vece == MO_64) {