target/arm: Implement SME LD1, ST1
We cannot reuse the SVE functions for LD[1-4] and ST[1-4], because those functions accept only a Zreg register number. For SME, we want to pass a pointer into ZA storage. Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20220708151540.18136-21-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
parent
e9ad3ef19e
commit
7390e0e9ab
@ -33,3 +33,85 @@ DEF_HELPER_FLAGS_4(sme_mova_cz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sme_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sme_mova_cz_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(sme_mova_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(sme_ld1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(sme_ld1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(sme_ld1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(sme_ld1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(sme_ld1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_ld1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(sme_st1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(sme_st1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(sme_st1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(sme_st1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(sme_st1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
DEF_HELPER_FLAGS_5(sme_st1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
|
||||
|
@ -37,3 +37,12 @@ MOVA 11000000 esz:2 00001 0 v:1 .. pg:3 0 za_imm:4 zr:5 \
|
||||
&mova to_vec=1 rs=%mova_rs
|
||||
MOVA 11000000 11 00001 1 v:1 .. pg:3 0 za_imm:4 zr:5 \
|
||||
&mova to_vec=1 rs=%mova_rs esz=4
|
||||
|
||||
### SME Memory
|
||||
|
||||
&ldst esz rs pg rn rm za_imm v:bool st:bool
|
||||
|
||||
LDST1 1110000 0 esz:2 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
|
||||
&ldst rs=%mova_rs
|
||||
LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
|
||||
&ldst esz=4 rs=%mova_rs
|
||||
|
@ -19,10 +19,14 @@
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "cpu.h"
|
||||
#include "internals.h"
|
||||
#include "tcg/tcg-gvec-desc.h"
|
||||
#include "exec/helper-proto.h"
|
||||
#include "exec/cpu_ldst.h"
|
||||
#include "exec/exec-all.h"
|
||||
#include "qemu/int128.h"
|
||||
#include "vec_internal.h"
|
||||
#include "sve_ldst_internal.h"
|
||||
|
||||
/* ResetSVEState */
|
||||
void arm_reset_sve_state(CPUARMState *env)
|
||||
@ -233,3 +237,594 @@ void HELPER(sme_mova_zc_q)(void *vd, void *za, void *vg, uint32_t desc)
|
||||
}
|
||||
|
||||
#undef DO_MOVA_Z
|
||||
|
||||
/*
|
||||
* Clear elements in a tile slice comprising len bytes.
|
||||
*/
|
||||
|
||||
typedef void ClearFn(void *ptr, size_t off, size_t len);
|
||||
|
||||
static void clear_horizontal(void *ptr, size_t off, size_t len)
|
||||
{
|
||||
memset(ptr + off, 0, len);
|
||||
}
|
||||
|
||||
static void clear_vertical_b(void *vptr, size_t off, size_t len)
|
||||
{
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
*(uint8_t *)(vptr + tile_vslice_offset(i + off)) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void clear_vertical_h(void *vptr, size_t off, size_t len)
|
||||
{
|
||||
for (size_t i = 0; i < len; i += 2) {
|
||||
*(uint16_t *)(vptr + tile_vslice_offset(i + off)) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void clear_vertical_s(void *vptr, size_t off, size_t len)
|
||||
{
|
||||
for (size_t i = 0; i < len; i += 4) {
|
||||
*(uint32_t *)(vptr + tile_vslice_offset(i + off)) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void clear_vertical_d(void *vptr, size_t off, size_t len)
|
||||
{
|
||||
for (size_t i = 0; i < len; i += 8) {
|
||||
*(uint64_t *)(vptr + tile_vslice_offset(i + off)) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void clear_vertical_q(void *vptr, size_t off, size_t len)
|
||||
{
|
||||
for (size_t i = 0; i < len; i += 16) {
|
||||
memset(vptr + tile_vslice_offset(i + off), 0, 16);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy elements from an array into a tile slice comprising len bytes.
|
||||
*/
|
||||
|
||||
typedef void CopyFn(void *dst, const void *src, size_t len);
|
||||
|
||||
static void copy_horizontal(void *dst, const void *src, size_t len)
|
||||
{
|
||||
memcpy(dst, src, len);
|
||||
}
|
||||
|
||||
static void copy_vertical_b(void *vdst, const void *vsrc, size_t len)
|
||||
{
|
||||
const uint8_t *src = vsrc;
|
||||
uint8_t *dst = vdst;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < len; ++i) {
|
||||
dst[tile_vslice_index(i)] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void copy_vertical_h(void *vdst, const void *vsrc, size_t len)
|
||||
{
|
||||
const uint16_t *src = vsrc;
|
||||
uint16_t *dst = vdst;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < len / 2; ++i) {
|
||||
dst[tile_vslice_index(i)] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void copy_vertical_s(void *vdst, const void *vsrc, size_t len)
|
||||
{
|
||||
const uint32_t *src = vsrc;
|
||||
uint32_t *dst = vdst;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < len / 4; ++i) {
|
||||
dst[tile_vslice_index(i)] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void copy_vertical_d(void *vdst, const void *vsrc, size_t len)
|
||||
{
|
||||
const uint64_t *src = vsrc;
|
||||
uint64_t *dst = vdst;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < len / 8; ++i) {
|
||||
dst[tile_vslice_index(i)] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void copy_vertical_q(void *vdst, const void *vsrc, size_t len)
|
||||
{
|
||||
for (size_t i = 0; i < len; i += 16) {
|
||||
memcpy(vdst + tile_vslice_offset(i), vsrc + i, 16);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Host and TLB primitives for vertical tile slice addressing.
|
||||
*/
|
||||
|
||||
#define DO_LD(NAME, TYPE, HOST, TLB) \
|
||||
static inline void sme_##NAME##_v_host(void *za, intptr_t off, void *host) \
|
||||
{ \
|
||||
TYPE val = HOST(host); \
|
||||
*(TYPE *)(za + tile_vslice_offset(off)) = val; \
|
||||
} \
|
||||
static inline void sme_##NAME##_v_tlb(CPUARMState *env, void *za, \
|
||||
intptr_t off, target_ulong addr, uintptr_t ra) \
|
||||
{ \
|
||||
TYPE val = TLB(env, useronly_clean_ptr(addr), ra); \
|
||||
*(TYPE *)(za + tile_vslice_offset(off)) = val; \
|
||||
}
|
||||
|
||||
#define DO_ST(NAME, TYPE, HOST, TLB) \
|
||||
static inline void sme_##NAME##_v_host(void *za, intptr_t off, void *host) \
|
||||
{ \
|
||||
TYPE val = *(TYPE *)(za + tile_vslice_offset(off)); \
|
||||
HOST(host, val); \
|
||||
} \
|
||||
static inline void sme_##NAME##_v_tlb(CPUARMState *env, void *za, \
|
||||
intptr_t off, target_ulong addr, uintptr_t ra) \
|
||||
{ \
|
||||
TYPE val = *(TYPE *)(za + tile_vslice_offset(off)); \
|
||||
TLB(env, useronly_clean_ptr(addr), val, ra); \
|
||||
}
|
||||
|
||||
/*
|
||||
* The ARMVectorReg elements are stored in host-endian 64-bit units.
|
||||
* For 128-bit quantities, the sequence defined by the Elem[] pseudocode
|
||||
* corresponds to storing the two 64-bit pieces in little-endian order.
|
||||
*/
|
||||
#define DO_LDQ(HNAME, VNAME, BE, HOST, TLB) \
|
||||
static inline void HNAME##_host(void *za, intptr_t off, void *host) \
|
||||
{ \
|
||||
uint64_t val0 = HOST(host), val1 = HOST(host + 8); \
|
||||
uint64_t *ptr = za + off; \
|
||||
ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \
|
||||
} \
|
||||
static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \
|
||||
{ \
|
||||
HNAME##_host(za, tile_vslice_offset(off), host); \
|
||||
} \
|
||||
static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \
|
||||
target_ulong addr, uintptr_t ra) \
|
||||
{ \
|
||||
uint64_t val0 = TLB(env, useronly_clean_ptr(addr), ra); \
|
||||
uint64_t val1 = TLB(env, useronly_clean_ptr(addr + 8), ra); \
|
||||
uint64_t *ptr = za + off; \
|
||||
ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \
|
||||
} \
|
||||
static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \
|
||||
target_ulong addr, uintptr_t ra) \
|
||||
{ \
|
||||
HNAME##_tlb(env, za, tile_vslice_offset(off), addr, ra); \
|
||||
}
|
||||
|
||||
#define DO_STQ(HNAME, VNAME, BE, HOST, TLB) \
|
||||
static inline void HNAME##_host(void *za, intptr_t off, void *host) \
|
||||
{ \
|
||||
uint64_t *ptr = za + off; \
|
||||
HOST(host, ptr[BE]); \
|
||||
HOST(host + 1, ptr[!BE]); \
|
||||
} \
|
||||
static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \
|
||||
{ \
|
||||
HNAME##_host(za, tile_vslice_offset(off), host); \
|
||||
} \
|
||||
static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \
|
||||
target_ulong addr, uintptr_t ra) \
|
||||
{ \
|
||||
uint64_t *ptr = za + off; \
|
||||
TLB(env, useronly_clean_ptr(addr), ptr[BE], ra); \
|
||||
TLB(env, useronly_clean_ptr(addr + 8), ptr[!BE], ra); \
|
||||
} \
|
||||
static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \
|
||||
target_ulong addr, uintptr_t ra) \
|
||||
{ \
|
||||
HNAME##_tlb(env, za, tile_vslice_offset(off), addr, ra); \
|
||||
}
|
||||
|
||||
DO_LD(ld1b, uint8_t, ldub_p, cpu_ldub_data_ra)
|
||||
DO_LD(ld1h_be, uint16_t, lduw_be_p, cpu_lduw_be_data_ra)
|
||||
DO_LD(ld1h_le, uint16_t, lduw_le_p, cpu_lduw_le_data_ra)
|
||||
DO_LD(ld1s_be, uint32_t, ldl_be_p, cpu_ldl_be_data_ra)
|
||||
DO_LD(ld1s_le, uint32_t, ldl_le_p, cpu_ldl_le_data_ra)
|
||||
DO_LD(ld1d_be, uint64_t, ldq_be_p, cpu_ldq_be_data_ra)
|
||||
DO_LD(ld1d_le, uint64_t, ldq_le_p, cpu_ldq_le_data_ra)
|
||||
|
||||
DO_LDQ(sve_ld1qq_be, sme_ld1q_be, 1, ldq_be_p, cpu_ldq_be_data_ra)
|
||||
DO_LDQ(sve_ld1qq_le, sme_ld1q_le, 0, ldq_le_p, cpu_ldq_le_data_ra)
|
||||
|
||||
DO_ST(st1b, uint8_t, stb_p, cpu_stb_data_ra)
|
||||
DO_ST(st1h_be, uint16_t, stw_be_p, cpu_stw_be_data_ra)
|
||||
DO_ST(st1h_le, uint16_t, stw_le_p, cpu_stw_le_data_ra)
|
||||
DO_ST(st1s_be, uint32_t, stl_be_p, cpu_stl_be_data_ra)
|
||||
DO_ST(st1s_le, uint32_t, stl_le_p, cpu_stl_le_data_ra)
|
||||
DO_ST(st1d_be, uint64_t, stq_be_p, cpu_stq_be_data_ra)
|
||||
DO_ST(st1d_le, uint64_t, stq_le_p, cpu_stq_le_data_ra)
|
||||
|
||||
DO_STQ(sve_st1qq_be, sme_st1q_be, 1, stq_be_p, cpu_stq_be_data_ra)
|
||||
DO_STQ(sve_st1qq_le, sme_st1q_le, 0, stq_le_p, cpu_stq_le_data_ra)
|
||||
|
||||
#undef DO_LD
|
||||
#undef DO_ST
|
||||
#undef DO_LDQ
|
||||
#undef DO_STQ
|
||||
|
||||
/*
|
||||
* Common helper for all contiguous predicated loads.
|
||||
*/
|
||||
|
||||
static inline QEMU_ALWAYS_INLINE
|
||||
void sme_ld1(CPUARMState *env, void *za, uint64_t *vg,
|
||||
const target_ulong addr, uint32_t desc, const uintptr_t ra,
|
||||
const int esz, uint32_t mtedesc, bool vertical,
|
||||
sve_ldst1_host_fn *host_fn,
|
||||
sve_ldst1_tlb_fn *tlb_fn,
|
||||
ClearFn *clr_fn,
|
||||
CopyFn *cpy_fn)
|
||||
{
|
||||
const intptr_t reg_max = simd_oprsz(desc);
|
||||
const intptr_t esize = 1 << esz;
|
||||
intptr_t reg_off, reg_last;
|
||||
SVEContLdSt info;
|
||||
void *host;
|
||||
int flags;
|
||||
|
||||
/* Find the active elements. */
|
||||
if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, esize)) {
|
||||
/* The entire predicate was false; no load occurs. */
|
||||
clr_fn(za, 0, reg_max);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Probe the page(s). Exit with exception for any invalid page. */
|
||||
sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_LOAD, ra);
|
||||
|
||||
/* Handle watchpoints for all active elements. */
|
||||
sve_cont_ldst_watchpoints(&info, env, vg, addr, esize, esize,
|
||||
BP_MEM_READ, ra);
|
||||
|
||||
/*
|
||||
* Handle mte checks for all active elements.
|
||||
* Since TBI must be set for MTE, !mtedesc => !mte_active.
|
||||
*/
|
||||
if (mtedesc) {
|
||||
sve_cont_ldst_mte_check(&info, env, vg, addr, esize, esize,
|
||||
mtedesc, ra);
|
||||
}
|
||||
|
||||
flags = info.page[0].flags | info.page[1].flags;
|
||||
if (unlikely(flags != 0)) {
|
||||
#ifdef CONFIG_USER_ONLY
|
||||
g_assert_not_reached();
|
||||
#else
|
||||
/*
|
||||
* At least one page includes MMIO.
|
||||
* Any bus operation can fail with cpu_transaction_failed,
|
||||
* which for ARM will raise SyncExternal. Perform the load
|
||||
* into scratch memory to preserve register state until the end.
|
||||
*/
|
||||
ARMVectorReg scratch = { };
|
||||
|
||||
reg_off = info.reg_off_first[0];
|
||||
reg_last = info.reg_off_last[1];
|
||||
if (reg_last < 0) {
|
||||
reg_last = info.reg_off_split;
|
||||
if (reg_last < 0) {
|
||||
reg_last = info.reg_off_last[0];
|
||||
}
|
||||
}
|
||||
|
||||
do {
|
||||
uint64_t pg = vg[reg_off >> 6];
|
||||
do {
|
||||
if ((pg >> (reg_off & 63)) & 1) {
|
||||
tlb_fn(env, &scratch, reg_off, addr + reg_off, ra);
|
||||
}
|
||||
reg_off += esize;
|
||||
} while (reg_off & 63);
|
||||
} while (reg_off <= reg_last);
|
||||
|
||||
cpy_fn(za, &scratch, reg_max);
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* The entire operation is in RAM, on valid pages. */
|
||||
|
||||
reg_off = info.reg_off_first[0];
|
||||
reg_last = info.reg_off_last[0];
|
||||
host = info.page[0].host;
|
||||
|
||||
if (!vertical) {
|
||||
memset(za, 0, reg_max);
|
||||
} else if (reg_off) {
|
||||
clr_fn(za, 0, reg_off);
|
||||
}
|
||||
|
||||
while (reg_off <= reg_last) {
|
||||
uint64_t pg = vg[reg_off >> 6];
|
||||
do {
|
||||
if ((pg >> (reg_off & 63)) & 1) {
|
||||
host_fn(za, reg_off, host + reg_off);
|
||||
} else if (vertical) {
|
||||
clr_fn(za, reg_off, esize);
|
||||
}
|
||||
reg_off += esize;
|
||||
} while (reg_off <= reg_last && (reg_off & 63));
|
||||
}
|
||||
|
||||
/*
|
||||
* Use the slow path to manage the cross-page misalignment.
|
||||
* But we know this is RAM and cannot trap.
|
||||
*/
|
||||
reg_off = info.reg_off_split;
|
||||
if (unlikely(reg_off >= 0)) {
|
||||
tlb_fn(env, za, reg_off, addr + reg_off, ra);
|
||||
}
|
||||
|
||||
reg_off = info.reg_off_first[1];
|
||||
if (unlikely(reg_off >= 0)) {
|
||||
reg_last = info.reg_off_last[1];
|
||||
host = info.page[1].host;
|
||||
|
||||
do {
|
||||
uint64_t pg = vg[reg_off >> 6];
|
||||
do {
|
||||
if ((pg >> (reg_off & 63)) & 1) {
|
||||
host_fn(za, reg_off, host + reg_off);
|
||||
} else if (vertical) {
|
||||
clr_fn(za, reg_off, esize);
|
||||
}
|
||||
reg_off += esize;
|
||||
} while (reg_off & 63);
|
||||
} while (reg_off <= reg_last);
|
||||
}
|
||||
}
|
||||
|
||||
static inline QEMU_ALWAYS_INLINE
|
||||
void sme_ld1_mte(CPUARMState *env, void *za, uint64_t *vg,
|
||||
target_ulong addr, uint32_t desc, uintptr_t ra,
|
||||
const int esz, bool vertical,
|
||||
sve_ldst1_host_fn *host_fn,
|
||||
sve_ldst1_tlb_fn *tlb_fn,
|
||||
ClearFn *clr_fn,
|
||||
CopyFn *cpy_fn)
|
||||
{
|
||||
uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
|
||||
int bit55 = extract64(addr, 55, 1);
|
||||
|
||||
/* Remove mtedesc from the normal sve descriptor. */
|
||||
desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
|
||||
|
||||
/* Perform gross MTE suppression early. */
|
||||
if (!tbi_check(desc, bit55) ||
|
||||
tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
|
||||
mtedesc = 0;
|
||||
}
|
||||
|
||||
sme_ld1(env, za, vg, addr, desc, ra, esz, mtedesc, vertical,
|
||||
host_fn, tlb_fn, clr_fn, cpy_fn);
|
||||
}
|
||||
|
||||
#define DO_LD(L, END, ESZ) \
|
||||
void HELPER(sme_ld1##L##END##_h)(CPUARMState *env, void *za, void *vg, \
|
||||
target_ulong addr, uint32_t desc) \
|
||||
{ \
|
||||
sme_ld1(env, za, vg, addr, desc, GETPC(), ESZ, 0, false, \
|
||||
sve_ld1##L##L##END##_host, sve_ld1##L##L##END##_tlb, \
|
||||
clear_horizontal, copy_horizontal); \
|
||||
} \
|
||||
void HELPER(sme_ld1##L##END##_v)(CPUARMState *env, void *za, void *vg, \
|
||||
target_ulong addr, uint32_t desc) \
|
||||
{ \
|
||||
sme_ld1(env, za, vg, addr, desc, GETPC(), ESZ, 0, true, \
|
||||
sme_ld1##L##END##_v_host, sme_ld1##L##END##_v_tlb, \
|
||||
clear_vertical_##L, copy_vertical_##L); \
|
||||
} \
|
||||
void HELPER(sme_ld1##L##END##_h_mte)(CPUARMState *env, void *za, void *vg, \
|
||||
target_ulong addr, uint32_t desc) \
|
||||
{ \
|
||||
sme_ld1_mte(env, za, vg, addr, desc, GETPC(), ESZ, false, \
|
||||
sve_ld1##L##L##END##_host, sve_ld1##L##L##END##_tlb, \
|
||||
clear_horizontal, copy_horizontal); \
|
||||
} \
|
||||
void HELPER(sme_ld1##L##END##_v_mte)(CPUARMState *env, void *za, void *vg, \
|
||||
target_ulong addr, uint32_t desc) \
|
||||
{ \
|
||||
sme_ld1_mte(env, za, vg, addr, desc, GETPC(), ESZ, true, \
|
||||
sme_ld1##L##END##_v_host, sme_ld1##L##END##_v_tlb, \
|
||||
clear_vertical_##L, copy_vertical_##L); \
|
||||
}
|
||||
|
||||
DO_LD(b, , MO_8)
|
||||
DO_LD(h, _be, MO_16)
|
||||
DO_LD(h, _le, MO_16)
|
||||
DO_LD(s, _be, MO_32)
|
||||
DO_LD(s, _le, MO_32)
|
||||
DO_LD(d, _be, MO_64)
|
||||
DO_LD(d, _le, MO_64)
|
||||
DO_LD(q, _be, MO_128)
|
||||
DO_LD(q, _le, MO_128)
|
||||
|
||||
#undef DO_LD
|
||||
|
||||
/*
|
||||
* Common helper for all contiguous predicated stores.
|
||||
*/
|
||||
|
||||
static inline QEMU_ALWAYS_INLINE
|
||||
void sme_st1(CPUARMState *env, void *za, uint64_t *vg,
|
||||
const target_ulong addr, uint32_t desc, const uintptr_t ra,
|
||||
const int esz, uint32_t mtedesc, bool vertical,
|
||||
sve_ldst1_host_fn *host_fn,
|
||||
sve_ldst1_tlb_fn *tlb_fn)
|
||||
{
|
||||
const intptr_t reg_max = simd_oprsz(desc);
|
||||
const intptr_t esize = 1 << esz;
|
||||
intptr_t reg_off, reg_last;
|
||||
SVEContLdSt info;
|
||||
void *host;
|
||||
int flags;
|
||||
|
||||
/* Find the active elements. */
|
||||
if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, esize)) {
|
||||
/* The entire predicate was false; no store occurs. */
|
||||
return;
|
||||
}
|
||||
|
||||
/* Probe the page(s). Exit with exception for any invalid page. */
|
||||
sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_STORE, ra);
|
||||
|
||||
/* Handle watchpoints for all active elements. */
|
||||
sve_cont_ldst_watchpoints(&info, env, vg, addr, esize, esize,
|
||||
BP_MEM_WRITE, ra);
|
||||
|
||||
/*
|
||||
* Handle mte checks for all active elements.
|
||||
* Since TBI must be set for MTE, !mtedesc => !mte_active.
|
||||
*/
|
||||
if (mtedesc) {
|
||||
sve_cont_ldst_mte_check(&info, env, vg, addr, esize, esize,
|
||||
mtedesc, ra);
|
||||
}
|
||||
|
||||
flags = info.page[0].flags | info.page[1].flags;
|
||||
if (unlikely(flags != 0)) {
|
||||
#ifdef CONFIG_USER_ONLY
|
||||
g_assert_not_reached();
|
||||
#else
|
||||
/*
|
||||
* At least one page includes MMIO.
|
||||
* Any bus operation can fail with cpu_transaction_failed,
|
||||
* which for ARM will raise SyncExternal. We cannot avoid
|
||||
* this fault and will leave with the store incomplete.
|
||||
*/
|
||||
reg_off = info.reg_off_first[0];
|
||||
reg_last = info.reg_off_last[1];
|
||||
if (reg_last < 0) {
|
||||
reg_last = info.reg_off_split;
|
||||
if (reg_last < 0) {
|
||||
reg_last = info.reg_off_last[0];
|
||||
}
|
||||
}
|
||||
|
||||
do {
|
||||
uint64_t pg = vg[reg_off >> 6];
|
||||
do {
|
||||
if ((pg >> (reg_off & 63)) & 1) {
|
||||
tlb_fn(env, za, reg_off, addr + reg_off, ra);
|
||||
}
|
||||
reg_off += esize;
|
||||
} while (reg_off & 63);
|
||||
} while (reg_off <= reg_last);
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
|
||||
reg_off = info.reg_off_first[0];
|
||||
reg_last = info.reg_off_last[0];
|
||||
host = info.page[0].host;
|
||||
|
||||
while (reg_off <= reg_last) {
|
||||
uint64_t pg = vg[reg_off >> 6];
|
||||
do {
|
||||
if ((pg >> (reg_off & 63)) & 1) {
|
||||
host_fn(za, reg_off, host + reg_off);
|
||||
}
|
||||
reg_off += 1 << esz;
|
||||
} while (reg_off <= reg_last && (reg_off & 63));
|
||||
}
|
||||
|
||||
/*
|
||||
* Use the slow path to manage the cross-page misalignment.
|
||||
* But we know this is RAM and cannot trap.
|
||||
*/
|
||||
reg_off = info.reg_off_split;
|
||||
if (unlikely(reg_off >= 0)) {
|
||||
tlb_fn(env, za, reg_off, addr + reg_off, ra);
|
||||
}
|
||||
|
||||
reg_off = info.reg_off_first[1];
|
||||
if (unlikely(reg_off >= 0)) {
|
||||
reg_last = info.reg_off_last[1];
|
||||
host = info.page[1].host;
|
||||
|
||||
do {
|
||||
uint64_t pg = vg[reg_off >> 6];
|
||||
do {
|
||||
if ((pg >> (reg_off & 63)) & 1) {
|
||||
host_fn(za, reg_off, host + reg_off);
|
||||
}
|
||||
reg_off += 1 << esz;
|
||||
} while (reg_off & 63);
|
||||
} while (reg_off <= reg_last);
|
||||
}
|
||||
}
|
||||
|
||||
static inline QEMU_ALWAYS_INLINE
|
||||
void sme_st1_mte(CPUARMState *env, void *za, uint64_t *vg, target_ulong addr,
|
||||
uint32_t desc, uintptr_t ra, int esz, bool vertical,
|
||||
sve_ldst1_host_fn *host_fn,
|
||||
sve_ldst1_tlb_fn *tlb_fn)
|
||||
{
|
||||
uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
|
||||
int bit55 = extract64(addr, 55, 1);
|
||||
|
||||
/* Remove mtedesc from the normal sve descriptor. */
|
||||
desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
|
||||
|
||||
/* Perform gross MTE suppression early. */
|
||||
if (!tbi_check(desc, bit55) ||
|
||||
tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
|
||||
mtedesc = 0;
|
||||
}
|
||||
|
||||
sme_st1(env, za, vg, addr, desc, ra, esz, mtedesc,
|
||||
vertical, host_fn, tlb_fn);
|
||||
}
|
||||
|
||||
#define DO_ST(L, END, ESZ) \
|
||||
void HELPER(sme_st1##L##END##_h)(CPUARMState *env, void *za, void *vg, \
|
||||
target_ulong addr, uint32_t desc) \
|
||||
{ \
|
||||
sme_st1(env, za, vg, addr, desc, GETPC(), ESZ, 0, false, \
|
||||
sve_st1##L##L##END##_host, sve_st1##L##L##END##_tlb); \
|
||||
} \
|
||||
void HELPER(sme_st1##L##END##_v)(CPUARMState *env, void *za, void *vg, \
|
||||
target_ulong addr, uint32_t desc) \
|
||||
{ \
|
||||
sme_st1(env, za, vg, addr, desc, GETPC(), ESZ, 0, true, \
|
||||
sme_st1##L##END##_v_host, sme_st1##L##END##_v_tlb); \
|
||||
} \
|
||||
void HELPER(sme_st1##L##END##_h_mte)(CPUARMState *env, void *za, void *vg, \
|
||||
target_ulong addr, uint32_t desc) \
|
||||
{ \
|
||||
sme_st1_mte(env, za, vg, addr, desc, GETPC(), ESZ, false, \
|
||||
sve_st1##L##L##END##_host, sve_st1##L##L##END##_tlb); \
|
||||
} \
|
||||
void HELPER(sme_st1##L##END##_v_mte)(CPUARMState *env, void *za, void *vg, \
|
||||
target_ulong addr, uint32_t desc) \
|
||||
{ \
|
||||
sme_st1_mte(env, za, vg, addr, desc, GETPC(), ESZ, true, \
|
||||
sme_st1##L##END##_v_host, sme_st1##L##END##_v_tlb); \
|
||||
}
|
||||
|
||||
DO_ST(b, , MO_8)
|
||||
DO_ST(h, _be, MO_16)
|
||||
DO_ST(h, _le, MO_16)
|
||||
DO_ST(s, _be, MO_32)
|
||||
DO_ST(s, _le, MO_32)
|
||||
DO_ST(d, _be, MO_64)
|
||||
DO_ST(d, _le, MO_64)
|
||||
DO_ST(q, _be, MO_128)
|
||||
DO_ST(q, _le, MO_128)
|
||||
|
||||
#undef DO_ST
|
||||
|
@ -173,3 +173,73 @@ static bool trans_MOVA(DisasContext *s, arg_MOVA *a)
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool trans_LDST1(DisasContext *s, arg_LDST1 *a)
|
||||
{
|
||||
typedef void GenLdSt1(TCGv_env, TCGv_ptr, TCGv_ptr, TCGv, TCGv_i32);
|
||||
|
||||
/*
|
||||
* Indexed by [esz][be][v][mte][st], which is (except for load/store)
|
||||
* also the order in which the elements appear in the function names,
|
||||
* and so how we must concatenate the pieces.
|
||||
*/
|
||||
|
||||
#define FN_LS(F) { gen_helper_sme_ld1##F, gen_helper_sme_st1##F }
|
||||
#define FN_MTE(F) { FN_LS(F), FN_LS(F##_mte) }
|
||||
#define FN_HV(F) { FN_MTE(F##_h), FN_MTE(F##_v) }
|
||||
#define FN_END(L, B) { FN_HV(L), FN_HV(B) }
|
||||
|
||||
static GenLdSt1 * const fns[5][2][2][2][2] = {
|
||||
FN_END(b, b),
|
||||
FN_END(h_le, h_be),
|
||||
FN_END(s_le, s_be),
|
||||
FN_END(d_le, d_be),
|
||||
FN_END(q_le, q_be),
|
||||
};
|
||||
|
||||
#undef FN_LS
|
||||
#undef FN_MTE
|
||||
#undef FN_HV
|
||||
#undef FN_END
|
||||
|
||||
TCGv_ptr t_za, t_pg;
|
||||
TCGv_i64 addr;
|
||||
int svl, desc = 0;
|
||||
bool be = s->be_data == MO_BE;
|
||||
bool mte = s->mte_active[0];
|
||||
|
||||
if (!dc_isar_feature(aa64_sme, s)) {
|
||||
return false;
|
||||
}
|
||||
if (!sme_smza_enabled_check(s)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
|
||||
t_pg = pred_full_reg_ptr(s, a->pg);
|
||||
addr = tcg_temp_new_i64();
|
||||
|
||||
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->esz);
|
||||
tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
|
||||
|
||||
if (mte) {
|
||||
desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
|
||||
desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
|
||||
desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
|
||||
desc = FIELD_DP32(desc, MTEDESC, WRITE, a->st);
|
||||
desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << a->esz) - 1);
|
||||
desc <<= SVE_MTEDESC_SHIFT;
|
||||
} else {
|
||||
addr = clean_data_tbi(s, addr);
|
||||
}
|
||||
svl = streaming_vec_reg_size(s);
|
||||
desc = simd_desc(svl, svl, desc);
|
||||
|
||||
fns[a->esz][be][a->v][mte][a->st](cpu_env, t_za, t_pg, addr,
|
||||
tcg_constant_i32(desc));
|
||||
|
||||
tcg_temp_free_ptr(t_za);
|
||||
tcg_temp_free_ptr(t_pg);
|
||||
tcg_temp_free_i64(addr);
|
||||
return true;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user