tcg: Use memset for large vector byte replication
Inf47db80cc0
, we handled odd-sized tail clearing for the case of hosts that have vector operations, but did not handle the case of hosts that do not have vector ops. This was ok untile2e7168a21
, which changed the encoding of simd_desc such that the odd sizes are impossible. Add memset as a tcg helper, and use that for all out-of-line byte stores to vectors. This includes, but is not limited to, the tail clearing operation in question. Cc: qemu-stable@nongnu.org Buglink: https://bugs.launchpad.net/bugs/1907817 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
41192db338
commit
6d3ef04893
@ -28,6 +28,17 @@ DEF_HELPER_FLAGS_1(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env)
|
||||
|
||||
DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
|
||||
|
||||
#ifndef IN_HELPER_PROTO
|
||||
/*
|
||||
* Pass calls to memset directly to libc, without a thunk in qemu.
|
||||
* Do not re-declare memset, especially since we fudge the type here;
|
||||
* we assume sizeof(void *) == sizeof(size_t), which is true for
|
||||
* all supported hosts.
|
||||
*/
|
||||
#define helper_memset memset
|
||||
DEF_HELPER_FLAGS_3(memset, TCG_CALL_NO_RWG, ptr, ptr, int, ptr)
|
||||
#endif /* IN_HELPER_PROTO */
|
||||
|
||||
#ifdef CONFIG_SOFTMMU
|
||||
|
||||
DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
|
||||
|
@ -35,11 +35,15 @@ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
|
||||
dh_ctype(t4), dh_ctype(t5), dh_ctype(t6), \
|
||||
dh_ctype(t7));
|
||||
|
||||
#define IN_HELPER_PROTO
|
||||
|
||||
#include "helper.h"
|
||||
#include "trace/generated-helpers.h"
|
||||
#include "tcg-runtime.h"
|
||||
#include "plugin-helpers.h"
|
||||
|
||||
#undef IN_HELPER_PROTO
|
||||
|
||||
#undef DEF_HELPER_FLAGS_0
|
||||
#undef DEF_HELPER_FLAGS_1
|
||||
#undef DEF_HELPER_FLAGS_2
|
||||
|
@ -547,6 +547,9 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
|
||||
in_c = dup_const(vece, in_c);
|
||||
if (in_c == 0) {
|
||||
oprsz = maxsz;
|
||||
vece = MO_8;
|
||||
} else if (in_c == dup_const(MO_8, in_c)) {
|
||||
vece = MO_8;
|
||||
}
|
||||
}
|
||||
|
||||
@ -628,6 +631,35 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
|
||||
/* Otherwise implement out of line. */
|
||||
t_ptr = tcg_temp_new_ptr();
|
||||
tcg_gen_addi_ptr(t_ptr, cpu_env, dofs);
|
||||
|
||||
/*
|
||||
* This may be expand_clr for the tail of an operation, e.g.
|
||||
* oprsz == 8 && maxsz == 64. The size of the clear is misaligned
|
||||
* wrt simd_desc and will assert. Simply pass all replicated byte
|
||||
* stores through to memset.
|
||||
*/
|
||||
if (oprsz == maxsz && vece == MO_8) {
|
||||
TCGv_ptr t_size = tcg_const_ptr(oprsz);
|
||||
TCGv_i32 t_val;
|
||||
|
||||
if (in_32) {
|
||||
t_val = in_32;
|
||||
} else if (in_64) {
|
||||
t_val = tcg_temp_new_i32();
|
||||
tcg_gen_extrl_i64_i32(t_val, in_64);
|
||||
} else {
|
||||
t_val = tcg_const_i32(in_c);
|
||||
}
|
||||
gen_helper_memset(t_ptr, t_ptr, t_val, t_size);
|
||||
|
||||
if (!in_32) {
|
||||
tcg_temp_free_i32(t_val);
|
||||
}
|
||||
tcg_temp_free_ptr(t_size);
|
||||
tcg_temp_free_ptr(t_ptr);
|
||||
return;
|
||||
}
|
||||
|
||||
t_desc = tcg_const_i32(simd_desc(oprsz, maxsz, 0));
|
||||
|
||||
if (vece == MO_64) {
|
||||
|
Loading…
Reference in New Issue
Block a user