tcg queued patches
-----BEGIN PGP SIGNATURE----- iQEcBAABAgAGBQJX3AxMAAoJEK0ScMxN0CebW1MIAMcmQ31/Za4/9H5PQdTs1rVh GuK3HKK8jiTIvr8RGLMPZT3439rYm45axHIgoUx82UMcu2UkWuxFkt0KhMd6dnDY Y5YeFeuYDhq0L84iXP54UVKsI3ePy6TwuoDNc+KRzT5vxyRWWmfm5s4V8b2dj+L0 FhS7w+qs0nio58ptX7C65KBJEuWSSQRoO3d8me/c77GVK2hgW6oOoul+E0BN1Xip +02BThRZJNaaoeRgHK29UNHj0DWZErOxq/Ti82rSmLy3TjKGKav9GRPb5I8nIjEt nNHgB+N+qlv45h/WELa+dxLXfbLwUX6hultTp0vA+nrCIqdeiGQcMeVgz2F9zmo= =zDXe -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20160916' into staging tcg queued patches # gpg: Signature made Fri 16 Sep 2016 16:14:20 BST # gpg: using RSA key 0xAD1270CC4DD0279B # gpg: Good signature from "Richard Henderson <rth7680@gmail.com>" # gpg: aka "Richard Henderson <rth@redhat.com>" # gpg: aka "Richard Henderson <rth@twiddle.net>" # Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC 16A4 AD12 70CC 4DD0 279B * remotes/rth/tags/pull-tcg-20160916: tcg: Optimize fence instructions target-i386: Generate fences for x86 target-aarch64: Generate fences for aarch64 target-arm: Generate fences in ARMv7 frontend target-alpha: Generate fence op tcg/tci: Add support for fence tcg/sparc: Add support for fence tcg/s390: Add support for fence tcg/ppc: Add support for fence tcg/mips: Add support for fence tcg/ia64: Add support for fence tcg/arm: Add support for fence tcg/aarch64: Add support for fence tcg/i386: Add support for fence Introduce TCGOpcode for memory barrier cpu-exec: Check -dfilter for -d cpu tcg: Merge GETPC and GETRA tcg: Support arbitrary size + alignment Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
e3571ae30c
@ -147,7 +147,8 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb)
|
||||
itb->tc_ptr, itb->pc, lookup_symbol(itb->pc));
|
||||
|
||||
#if defined(DEBUG_DISAS)
|
||||
if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
|
||||
if (qemu_loglevel_mask(CPU_LOG_TB_CPU)
|
||||
&& qemu_log_in_addr_range(itb->pc)) {
|
||||
#if defined(TARGET_I386)
|
||||
log_cpu_state(cpu, CPU_DUMP_CCOP);
|
||||
#elif defined(TARGET_M68K)
|
||||
|
6
cputlb.c
6
cputlb.c
@ -543,10 +543,8 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
|
||||
#undef MMUSUFFIX
|
||||
|
||||
#define MMUSUFFIX _cmmu
|
||||
#undef GETPC_ADJ
|
||||
#define GETPC_ADJ 0
|
||||
#undef GETRA
|
||||
#define GETRA() ((uintptr_t)0)
|
||||
#undef GETPC
|
||||
#define GETPC() ((uintptr_t)0)
|
||||
#define SOFTMMU_CODE_ACCESS
|
||||
|
||||
#define SHIFT 0
|
||||
|
@ -349,13 +349,12 @@ static inline void tb_add_jump(TranslationBlock *tb, int n,
|
||||
tb_next->jmp_list_first = (uintptr_t)tb | n;
|
||||
}
|
||||
|
||||
/* GETRA is the true target of the return instruction that we'll execute,
|
||||
defined here for simplicity of defining the follow-up macros. */
|
||||
/* GETPC is the true target of the return instruction that we'll execute. */
|
||||
#if defined(CONFIG_TCG_INTERPRETER)
|
||||
extern uintptr_t tci_tb_ptr;
|
||||
# define GETRA() tci_tb_ptr
|
||||
# define GETPC() tci_tb_ptr
|
||||
#else
|
||||
# define GETRA() \
|
||||
# define GETPC() \
|
||||
((uintptr_t)__builtin_extract_return_addr(__builtin_return_address(0)))
|
||||
#endif
|
||||
|
||||
@ -368,8 +367,6 @@ extern uintptr_t tci_tb_ptr;
|
||||
smaller than 4 bytes, so we don't worry about special-casing this. */
|
||||
#define GETPC_ADJ 2
|
||||
|
||||
#define GETPC() (GETRA() - GETPC_ADJ)
|
||||
|
||||
#if !defined(CONFIG_USER_ONLY)
|
||||
|
||||
struct MemoryRegion *iotlb_to_region(CPUState *cpu,
|
||||
|
@ -146,14 +146,11 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr,
|
||||
unsigned mmu_idx = get_mmuidx(oi);
|
||||
int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
|
||||
target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
|
||||
int a_bits = get_alignment_bits(get_memop(oi));
|
||||
unsigned a_bits = get_alignment_bits(get_memop(oi));
|
||||
uintptr_t haddr;
|
||||
DATA_TYPE res;
|
||||
|
||||
/* Adjust the given return address. */
|
||||
retaddr -= GETPC_ADJ;
|
||||
|
||||
if (a_bits > 0 && (addr & ((1 << a_bits) - 1)) != 0) {
|
||||
if (addr & ((1 << a_bits) - 1)) {
|
||||
cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
|
||||
mmu_idx, retaddr);
|
||||
}
|
||||
@ -193,10 +190,8 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr,
|
||||
do_unaligned_access:
|
||||
addr1 = addr & ~(DATA_SIZE - 1);
|
||||
addr2 = addr1 + DATA_SIZE;
|
||||
/* Note the adjustment at the beginning of the function.
|
||||
Undo that for the recursion. */
|
||||
res1 = helper_le_ld_name(env, addr1, oi, retaddr + GETPC_ADJ);
|
||||
res2 = helper_le_ld_name(env, addr2, oi, retaddr + GETPC_ADJ);
|
||||
res1 = helper_le_ld_name(env, addr1, oi, retaddr);
|
||||
res2 = helper_le_ld_name(env, addr2, oi, retaddr);
|
||||
shift = (addr & (DATA_SIZE - 1)) * 8;
|
||||
|
||||
/* Little-endian combine. */
|
||||
@ -220,14 +215,11 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr,
|
||||
unsigned mmu_idx = get_mmuidx(oi);
|
||||
int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
|
||||
target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
|
||||
int a_bits = get_alignment_bits(get_memop(oi));
|
||||
unsigned a_bits = get_alignment_bits(get_memop(oi));
|
||||
uintptr_t haddr;
|
||||
DATA_TYPE res;
|
||||
|
||||
/* Adjust the given return address. */
|
||||
retaddr -= GETPC_ADJ;
|
||||
|
||||
if (a_bits > 0 && (addr & ((1 << a_bits) - 1)) != 0) {
|
||||
if (addr & ((1 << a_bits) - 1)) {
|
||||
cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
|
||||
mmu_idx, retaddr);
|
||||
}
|
||||
@ -267,10 +259,8 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr,
|
||||
do_unaligned_access:
|
||||
addr1 = addr & ~(DATA_SIZE - 1);
|
||||
addr2 = addr1 + DATA_SIZE;
|
||||
/* Note the adjustment at the beginning of the function.
|
||||
Undo that for the recursion. */
|
||||
res1 = helper_be_ld_name(env, addr1, oi, retaddr + GETPC_ADJ);
|
||||
res2 = helper_be_ld_name(env, addr2, oi, retaddr + GETPC_ADJ);
|
||||
res1 = helper_be_ld_name(env, addr1, oi, retaddr);
|
||||
res2 = helper_be_ld_name(env, addr2, oi, retaddr);
|
||||
shift = (addr & (DATA_SIZE - 1)) * 8;
|
||||
|
||||
/* Big-endian combine. */
|
||||
@ -331,13 +321,10 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
|
||||
unsigned mmu_idx = get_mmuidx(oi);
|
||||
int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
|
||||
target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
|
||||
int a_bits = get_alignment_bits(get_memop(oi));
|
||||
unsigned a_bits = get_alignment_bits(get_memop(oi));
|
||||
uintptr_t haddr;
|
||||
|
||||
/* Adjust the given return address. */
|
||||
retaddr -= GETPC_ADJ;
|
||||
|
||||
if (a_bits > 0 && (addr & ((1 << a_bits) - 1)) != 0) {
|
||||
if (addr & ((1 << a_bits) - 1)) {
|
||||
cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
|
||||
mmu_idx, retaddr);
|
||||
}
|
||||
@ -391,10 +378,8 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
|
||||
for (i = 0; i < DATA_SIZE; ++i) {
|
||||
/* Little-endian extract. */
|
||||
uint8_t val8 = val >> (i * 8);
|
||||
/* Note the adjustment at the beginning of the function.
|
||||
Undo that for the recursion. */
|
||||
glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
|
||||
oi, retaddr + GETPC_ADJ);
|
||||
oi, retaddr);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -414,13 +399,10 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
|
||||
unsigned mmu_idx = get_mmuidx(oi);
|
||||
int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
|
||||
target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
|
||||
int a_bits = get_alignment_bits(get_memop(oi));
|
||||
unsigned a_bits = get_alignment_bits(get_memop(oi));
|
||||
uintptr_t haddr;
|
||||
|
||||
/* Adjust the given return address. */
|
||||
retaddr -= GETPC_ADJ;
|
||||
|
||||
if (a_bits > 0 && (addr & ((1 << a_bits) - 1)) != 0) {
|
||||
if (addr & ((1 << a_bits) - 1)) {
|
||||
cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
|
||||
mmu_idx, retaddr);
|
||||
}
|
||||
@ -474,10 +456,8 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
|
||||
for (i = 0; i < DATA_SIZE; ++i) {
|
||||
/* Big-endian extract. */
|
||||
uint8_t val8 = val >> (((DATA_SIZE - 1) * 8) - (i * 8));
|
||||
/* Note the adjustment at the beginning of the function.
|
||||
Undo that for the recursion. */
|
||||
glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
|
||||
oi, retaddr + GETPC_ADJ);
|
||||
oi, retaddr);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -2338,11 +2338,11 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
|
||||
break;
|
||||
case 0x4000:
|
||||
/* MB */
|
||||
/* No-op */
|
||||
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
|
||||
break;
|
||||
case 0x4400:
|
||||
/* WMB */
|
||||
/* No-op */
|
||||
tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
|
||||
break;
|
||||
case 0x8000:
|
||||
/* FETCH */
|
||||
|
@ -8310,12 +8310,12 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
|
||||
* this purpose use the actual register value passed to us
|
||||
* so that we get the fault address right.
|
||||
*/
|
||||
helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETRA());
|
||||
helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC());
|
||||
/* Now we can populate the other TLB entries, if any */
|
||||
for (i = 0; i < maxidx; i++) {
|
||||
uint64_t va = vaddr + TARGET_PAGE_SIZE * i;
|
||||
if (va != (vaddr_in & TARGET_PAGE_MASK)) {
|
||||
helper_ret_stb_mmu(env, va, 0, oi, GETRA());
|
||||
helper_ret_stb_mmu(env, va, 0, oi, GETPC());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -8332,7 +8332,7 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
|
||||
* bounce buffer was in use
|
||||
*/
|
||||
for (i = 0; i < blocklen; i++) {
|
||||
helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETRA());
|
||||
helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC());
|
||||
}
|
||||
}
|
||||
#else
|
||||
|
@ -1294,6 +1294,8 @@ static void gen_clrex(DisasContext *s, uint32_t insn)
|
||||
static void handle_sync(DisasContext *s, uint32_t insn,
|
||||
unsigned int op1, unsigned int op2, unsigned int crm)
|
||||
{
|
||||
TCGBar bar;
|
||||
|
||||
if (op1 != 3) {
|
||||
unallocated_encoding(s);
|
||||
return;
|
||||
@ -1305,7 +1307,18 @@ static void handle_sync(DisasContext *s, uint32_t insn,
|
||||
return;
|
||||
case 4: /* DSB */
|
||||
case 5: /* DMB */
|
||||
/* We don't emulate caches so barriers are no-ops */
|
||||
switch (crm & 3) {
|
||||
case 1: /* MBReqTypes_Reads */
|
||||
bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
|
||||
break;
|
||||
case 2: /* MBReqTypes_Writes */
|
||||
bar = TCG_BAR_SC | TCG_MO_ST_ST;
|
||||
break;
|
||||
default: /* MBReqTypes_All */
|
||||
bar = TCG_BAR_SC | TCG_MO_ALL;
|
||||
break;
|
||||
}
|
||||
tcg_gen_mb(bar);
|
||||
return;
|
||||
case 6: /* ISB */
|
||||
/* We need to break the TB after this insn to execute
|
||||
@ -1934,7 +1947,13 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
|
||||
if (!is_store) {
|
||||
s->is_ldex = true;
|
||||
gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
|
||||
if (is_lasr) {
|
||||
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
|
||||
}
|
||||
} else {
|
||||
if (is_lasr) {
|
||||
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
|
||||
}
|
||||
gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
|
||||
}
|
||||
} else {
|
||||
@ -1943,11 +1962,17 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
|
||||
|
||||
/* Generate ISS for non-exclusive accesses including LASR. */
|
||||
if (is_store) {
|
||||
if (is_lasr) {
|
||||
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
|
||||
}
|
||||
do_gpr_st(s, tcg_rt, tcg_addr, size,
|
||||
true, rt, iss_sf, is_lasr);
|
||||
} else {
|
||||
do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false,
|
||||
true, rt, iss_sf, is_lasr);
|
||||
if (is_lasr) {
|
||||
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -8083,7 +8083,7 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
|
||||
case 4: /* dsb */
|
||||
case 5: /* dmb */
|
||||
ARCH(7);
|
||||
/* We don't emulate caches so these are a no-op. */
|
||||
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
|
||||
return;
|
||||
case 6: /* isb */
|
||||
/* We need to break the TB after this insn to execute
|
||||
@ -10432,7 +10432,7 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw
|
||||
break;
|
||||
case 4: /* dsb */
|
||||
case 5: /* dmb */
|
||||
/* These execute as NOPs. */
|
||||
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
|
||||
break;
|
||||
case 6: /* isb */
|
||||
/* We need to break the TB after this insn
|
||||
|
@ -8012,13 +8012,21 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
|
||||
|| (prefixes & PREFIX_LOCK)) {
|
||||
goto illegal_op;
|
||||
}
|
||||
tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
|
||||
break;
|
||||
case 0xe8 ... 0xef: /* lfence */
|
||||
if (!(s->cpuid_features & CPUID_SSE)
|
||||
|| (prefixes & PREFIX_LOCK)) {
|
||||
goto illegal_op;
|
||||
}
|
||||
tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
|
||||
break;
|
||||
case 0xf0 ... 0xf7: /* mfence */
|
||||
if (!(s->cpuid_features & CPUID_SSE2)
|
||||
|| (prefixes & PREFIX_LOCK)) {
|
||||
goto illegal_op;
|
||||
}
|
||||
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -4122,10 +4122,10 @@ void helper_msa_ld_ ## TYPE(CPUMIPSState *env, uint32_t wd, \
|
||||
}
|
||||
|
||||
#if !defined(CONFIG_USER_ONLY)
|
||||
MSA_LD_DF(DF_BYTE, b, helper_ret_ldub_mmu, oi, GETRA())
|
||||
MSA_LD_DF(DF_HALF, h, helper_ret_lduw_mmu, oi, GETRA())
|
||||
MSA_LD_DF(DF_WORD, w, helper_ret_ldul_mmu, oi, GETRA())
|
||||
MSA_LD_DF(DF_DOUBLE, d, helper_ret_ldq_mmu, oi, GETRA())
|
||||
MSA_LD_DF(DF_BYTE, b, helper_ret_ldub_mmu, oi, GETPC())
|
||||
MSA_LD_DF(DF_HALF, h, helper_ret_lduw_mmu, oi, GETPC())
|
||||
MSA_LD_DF(DF_WORD, w, helper_ret_ldul_mmu, oi, GETPC())
|
||||
MSA_LD_DF(DF_DOUBLE, d, helper_ret_ldq_mmu, oi, GETPC())
|
||||
#else
|
||||
MSA_LD_DF(DF_BYTE, b, cpu_ldub_data)
|
||||
MSA_LD_DF(DF_HALF, h, cpu_lduw_data)
|
||||
@ -4161,17 +4161,17 @@ void helper_msa_st_ ## TYPE(CPUMIPSState *env, uint32_t wd, \
|
||||
int mmu_idx = cpu_mmu_index(env, false); \
|
||||
int i; \
|
||||
MEMOP_IDX(DF) \
|
||||
ensure_writable_pages(env, addr, mmu_idx, GETRA()); \
|
||||
ensure_writable_pages(env, addr, mmu_idx, GETPC()); \
|
||||
for (i = 0; i < DF_ELEMENTS(DF); i++) { \
|
||||
ST_INSN(env, addr + (i << DF), pwd->TYPE[i], ##__VA_ARGS__); \
|
||||
} \
|
||||
}
|
||||
|
||||
#if !defined(CONFIG_USER_ONLY)
|
||||
MSA_ST_DF(DF_BYTE, b, helper_ret_stb_mmu, oi, GETRA())
|
||||
MSA_ST_DF(DF_HALF, h, helper_ret_stw_mmu, oi, GETRA())
|
||||
MSA_ST_DF(DF_WORD, w, helper_ret_stl_mmu, oi, GETRA())
|
||||
MSA_ST_DF(DF_DOUBLE, d, helper_ret_stq_mmu, oi, GETRA())
|
||||
MSA_ST_DF(DF_BYTE, b, helper_ret_stb_mmu, oi, GETPC())
|
||||
MSA_ST_DF(DF_HALF, h, helper_ret_stw_mmu, oi, GETPC())
|
||||
MSA_ST_DF(DF_WORD, w, helper_ret_stl_mmu, oi, GETPC())
|
||||
MSA_ST_DF(DF_DOUBLE, d, helper_ret_stq_mmu, oi, GETPC())
|
||||
#else
|
||||
MSA_ST_DF(DF_BYTE, b, cpu_stb_data)
|
||||
MSA_ST_DF(DF_HALF, h, cpu_stw_data)
|
||||
|
17
tcg/README
17
tcg/README
@ -402,6 +402,23 @@ double-word product T0. The later is returned in two single-word outputs.
|
||||
|
||||
Similar to mulu2, except the two inputs T1 and T2 are signed.
|
||||
|
||||
********* Memory Barrier support
|
||||
|
||||
* mb <$arg>
|
||||
|
||||
Generate a target memory barrier instruction to ensure memory ordering as being
|
||||
enforced by a corresponding guest memory barrier instruction. The ordering
|
||||
enforced by the backend may be stricter than the ordering required by the guest.
|
||||
It cannot be weaker. This opcode takes a constant argument which is required to
|
||||
generate the appropriate barrier instruction. The backend should take care to
|
||||
emit the target barrier instruction only when necessary i.e., for SMP guests and
|
||||
when MTTCG is enabled.
|
||||
|
||||
The guest translators should generate this opcode for all guest instructions
|
||||
which have ordering side effects.
|
||||
|
||||
Please see docs/atomics.txt for more information on memory barriers.
|
||||
|
||||
********* 64-bit guest on 32-bit host support
|
||||
|
||||
The following opcodes are internal to TCG. Thus they are to be implemented by
|
||||
|
@ -372,6 +372,11 @@ typedef enum {
|
||||
I3510_EOR = 0x4a000000,
|
||||
I3510_EON = 0x4a200000,
|
||||
I3510_ANDS = 0x6a000000,
|
||||
|
||||
/* System instructions. */
|
||||
DMB_ISH = 0xd50338bf,
|
||||
DMB_LD = 0x00000100,
|
||||
DMB_ST = 0x00000200,
|
||||
} AArch64Insn;
|
||||
|
||||
static inline uint32_t tcg_in32(TCGContext *s)
|
||||
@ -981,6 +986,18 @@ static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl,
|
||||
tcg_out_mov(s, ext, orig_rl, rl);
|
||||
}
|
||||
|
||||
static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
|
||||
{
|
||||
static const uint32_t sync[] = {
|
||||
[0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
|
||||
[TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
|
||||
[TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
|
||||
[TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
|
||||
[TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
|
||||
};
|
||||
tcg_out32(s, sync[a0 & TCG_MO_ALL]);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SOFTMMU
|
||||
/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
|
||||
* TCGMemOpIdx oi, uintptr_t ra)
|
||||
@ -1081,23 +1098,24 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
|
||||
int tlb_offset = is_read ?
|
||||
offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
|
||||
: offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
|
||||
int a_bits = get_alignment_bits(opc);
|
||||
unsigned a_bits = get_alignment_bits(opc);
|
||||
unsigned s_bits = opc & MO_SIZE;
|
||||
unsigned a_mask = (1u << a_bits) - 1;
|
||||
unsigned s_mask = (1u << s_bits) - 1;
|
||||
TCGReg base = TCG_AREG0, x3;
|
||||
uint64_t tlb_mask;
|
||||
|
||||
/* For aligned accesses, we check the first byte and include the alignment
|
||||
bits within the address. For unaligned access, we check that we don't
|
||||
cross pages using the address of the last byte of the access. */
|
||||
if (a_bits >= 0) {
|
||||
/* A byte access or an alignment check required */
|
||||
tlb_mask = TARGET_PAGE_MASK | ((1 << a_bits) - 1);
|
||||
if (a_bits >= s_bits) {
|
||||
x3 = addr_reg;
|
||||
} else {
|
||||
tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
|
||||
TCG_REG_X3, addr_reg, (1 << (opc & MO_SIZE)) - 1);
|
||||
tlb_mask = TARGET_PAGE_MASK;
|
||||
TCG_REG_X3, addr_reg, s_mask - a_mask);
|
||||
x3 = TCG_REG_X3;
|
||||
}
|
||||
tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
|
||||
|
||||
/* Extract the TLB index from the address into X0.
|
||||
X0<CPU_TLB_BITS:0> =
|
||||
@ -1648,6 +1666,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
||||
tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
|
||||
break;
|
||||
|
||||
case INDEX_op_mb:
|
||||
tcg_out_mb(s, a0);
|
||||
break;
|
||||
|
||||
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
|
||||
case INDEX_op_mov_i64:
|
||||
case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
|
||||
@ -1772,6 +1794,7 @@ static const TCGTargetOpDef aarch64_op_defs[] = {
|
||||
{ INDEX_op_muluh_i64, { "r", "r", "r" } },
|
||||
{ INDEX_op_mulsh_i64, { "r", "r", "r" } },
|
||||
|
||||
{ INDEX_op_mb, { } },
|
||||
{ -1 },
|
||||
};
|
||||
|
||||
|
@ -313,6 +313,10 @@ typedef enum {
|
||||
INSN_LDRD_REG = 0x000000d0,
|
||||
INSN_STRD_IMM = 0x004000f0,
|
||||
INSN_STRD_REG = 0x000000f0,
|
||||
|
||||
INSN_DMB_ISH = 0x5bf07ff5,
|
||||
INSN_DMB_MCR = 0xba0f07ee,
|
||||
|
||||
} ARMInsn;
|
||||
|
||||
#define SHIFT_IMM_LSL(im) (((im) << 7) | 0x00)
|
||||
@ -1066,6 +1070,15 @@ static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l)
|
||||
}
|
||||
}
|
||||
|
||||
static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
|
||||
{
|
||||
if (use_armv7_instructions) {
|
||||
tcg_out32(s, INSN_DMB_ISH);
|
||||
} else if (use_armv6_instructions) {
|
||||
tcg_out32(s, INSN_DMB_MCR);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SOFTMMU
|
||||
/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
|
||||
* int mmu_idx, uintptr_t ra)
|
||||
@ -1168,7 +1181,7 @@ QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
|
||||
containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */
|
||||
|
||||
static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
|
||||
TCGMemOp s_bits, int mem_index, bool is_load)
|
||||
TCGMemOp opc, int mem_index, bool is_load)
|
||||
{
|
||||
TCGReg base = TCG_AREG0;
|
||||
int cmp_off =
|
||||
@ -1176,6 +1189,8 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
|
||||
? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
|
||||
: offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
|
||||
int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
|
||||
unsigned s_bits = opc & MO_SIZE;
|
||||
unsigned a_bits = get_alignment_bits(opc);
|
||||
|
||||
/* Should generate something like the following:
|
||||
* shr tmp, addrlo, #TARGET_PAGE_BITS (1)
|
||||
@ -1216,10 +1231,13 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
|
||||
}
|
||||
}
|
||||
|
||||
/* Check alignment. */
|
||||
if (s_bits) {
|
||||
tcg_out_dat_imm(s, COND_AL, ARITH_TST,
|
||||
0, addrlo, (1 << s_bits) - 1);
|
||||
/* Check alignment. We don't support inline unaligned acceses,
|
||||
but we can easily support overalignment checks. */
|
||||
if (a_bits < s_bits) {
|
||||
a_bits = s_bits;
|
||||
}
|
||||
if (a_bits) {
|
||||
tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, (1 << a_bits) - 1);
|
||||
}
|
||||
|
||||
/* Load the tlb addend. */
|
||||
@ -1499,7 +1517,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
|
||||
|
||||
#ifdef CONFIG_SOFTMMU
|
||||
mem_index = get_mmuidx(oi);
|
||||
addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 1);
|
||||
addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 1);
|
||||
|
||||
/* This a conditional BL only to load a pointer within this opcode into LR
|
||||
for the slow path. We will not be using the value for a tail call. */
|
||||
@ -1630,7 +1648,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
|
||||
|
||||
#ifdef CONFIG_SOFTMMU
|
||||
mem_index = get_mmuidx(oi);
|
||||
addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 0);
|
||||
addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 0);
|
||||
|
||||
tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend);
|
||||
|
||||
@ -1923,6 +1941,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
||||
tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]);
|
||||
break;
|
||||
|
||||
case INDEX_op_mb:
|
||||
tcg_out_mb(s, args[0]);
|
||||
break;
|
||||
|
||||
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
|
||||
case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
|
||||
case INDEX_op_call: /* Always emitted via tcg_out_call. */
|
||||
@ -1997,6 +2019,7 @@ static const TCGTargetOpDef arm_op_defs[] = {
|
||||
{ INDEX_op_div_i32, { "r", "r", "r" } },
|
||||
{ INDEX_op_divu_i32, { "r", "r", "r" } },
|
||||
|
||||
{ INDEX_op_mb, { } },
|
||||
{ -1 },
|
||||
};
|
||||
|
||||
|
@ -686,6 +686,18 @@ static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
|
||||
}
|
||||
}
|
||||
|
||||
static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
|
||||
{
|
||||
/* Given the strength of x86 memory ordering, we only need care for
|
||||
store-load ordering. Experimentally, "lock orl $0,0(%esp)" is
|
||||
faster than "mfence", so don't bother with the sse insn. */
|
||||
if (a0 & TCG_MO_ST_LD) {
|
||||
tcg_out8(s, 0xf0);
|
||||
tcg_out_modrm_offset(s, OPC_ARITH_EvIb, ARITH_OR, TCG_REG_ESP, 0);
|
||||
tcg_out8(s, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void tcg_out_push(TCGContext *s, int reg)
|
||||
{
|
||||
tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
|
||||
@ -1202,7 +1214,10 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
|
||||
TCGType ttype = TCG_TYPE_I32;
|
||||
TCGType tlbtype = TCG_TYPE_I32;
|
||||
int trexw = 0, hrexw = 0, tlbrexw = 0;
|
||||
int a_bits = get_alignment_bits(opc);
|
||||
unsigned a_bits = get_alignment_bits(opc);
|
||||
unsigned s_bits = opc & MO_SIZE;
|
||||
unsigned a_mask = (1 << a_bits) - 1;
|
||||
unsigned s_mask = (1 << s_bits) - 1;
|
||||
target_ulong tlb_mask;
|
||||
|
||||
if (TCG_TARGET_REG_BITS == 64) {
|
||||
@ -1220,17 +1235,15 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
|
||||
}
|
||||
|
||||
tcg_out_mov(s, tlbtype, r0, addrlo);
|
||||
if (a_bits >= 0) {
|
||||
/* A byte access or an alignment check required */
|
||||
/* If the required alignment is at least as large as the access, simply
|
||||
copy the address and mask. For lesser alignments, check that we don't
|
||||
cross pages for the complete access. */
|
||||
if (a_bits >= s_bits) {
|
||||
tcg_out_mov(s, ttype, r1, addrlo);
|
||||
tlb_mask = TARGET_PAGE_MASK | ((1 << a_bits) - 1);
|
||||
} else {
|
||||
/* For unaligned access check that we don't cross pages using
|
||||
the page address of the last byte. */
|
||||
tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo,
|
||||
(1 << (opc & MO_SIZE)) - 1);
|
||||
tlb_mask = TARGET_PAGE_MASK;
|
||||
tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask - a_mask);
|
||||
}
|
||||
tlb_mask = TARGET_PAGE_MASK | a_mask;
|
||||
|
||||
tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0,
|
||||
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
|
||||
@ -2130,6 +2143,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
||||
}
|
||||
break;
|
||||
|
||||
case INDEX_op_mb:
|
||||
tcg_out_mb(s, args[0]);
|
||||
break;
|
||||
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
|
||||
case INDEX_op_mov_i64:
|
||||
case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
|
||||
@ -2195,6 +2211,8 @@ static const TCGTargetOpDef x86_op_defs[] = {
|
||||
{ INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
|
||||
{ INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
|
||||
|
||||
{ INDEX_op_mb, { } },
|
||||
|
||||
#if TCG_TARGET_REG_BITS == 32
|
||||
{ INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
|
||||
{ INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
|
||||
|
@ -247,6 +247,7 @@ enum {
|
||||
OPC_LD4_M3 = 0x0a080000000ull,
|
||||
OPC_LD8_M1 = 0x080c0000000ull,
|
||||
OPC_LD8_M3 = 0x0a0c0000000ull,
|
||||
OPC_MF_M24 = 0x00110000000ull,
|
||||
OPC_MUX1_I3 = 0x0eca0000000ull,
|
||||
OPC_NOP_B9 = 0x04008000000ull,
|
||||
OPC_NOP_F16 = 0x00008000000ull,
|
||||
@ -1496,10 +1497,18 @@ QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
|
||||
R1, R3 are clobbered, leaving R56 free for...
|
||||
BSWAP_1, BSWAP_2 and I-slot insns for swapping data for store. */
|
||||
static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg,
|
||||
TCGMemOp s_bits, int off_rw, int off_add,
|
||||
TCGMemOp opc, int off_rw, int off_add,
|
||||
uint64_t bswap1, uint64_t bswap2)
|
||||
{
|
||||
/*
|
||||
unsigned s_bits = opc & MO_SIZE;
|
||||
unsigned a_bits = get_alignment_bits(opc);
|
||||
|
||||
/* We don't support unaligned accesses, but overalignment is easy. */
|
||||
if (a_bits < s_bits) {
|
||||
a_bits = s_bits;
|
||||
}
|
||||
|
||||
/*
|
||||
.mii
|
||||
mov r2 = off_rw
|
||||
extr.u r3 = addr_reg, ... # extract tlb page
|
||||
@ -1521,7 +1530,7 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg,
|
||||
cmp.eq p6, p7 = r3, r58
|
||||
nop
|
||||
;;
|
||||
*/
|
||||
*/
|
||||
tcg_out_bundle(s, miI,
|
||||
tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, off_rw),
|
||||
tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R3,
|
||||
@ -1536,8 +1545,8 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg,
|
||||
TCG_REG_R3, 63 - CPU_TLB_ENTRY_BITS,
|
||||
63 - CPU_TLB_ENTRY_BITS),
|
||||
tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, TCG_REG_R1, 0,
|
||||
TCG_REG_R57, 63 - s_bits,
|
||||
TARGET_PAGE_BITS - s_bits - 1));
|
||||
TCG_REG_R57, 63 - a_bits,
|
||||
TARGET_PAGE_BITS - a_bits - 1));
|
||||
tcg_out_bundle(s, MmI,
|
||||
tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1,
|
||||
TCG_REG_R2, TCG_REG_R2, TCG_REG_R3),
|
||||
@ -1661,7 +1670,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args)
|
||||
s_bits = opc & MO_SIZE;
|
||||
|
||||
/* Read the TLB entry */
|
||||
tcg_out_qemu_tlb(s, addr_reg, s_bits,
|
||||
tcg_out_qemu_tlb(s, addr_reg, opc,
|
||||
offsetof(CPUArchState, tlb_table[mem_index][0].addr_read),
|
||||
offsetof(CPUArchState, tlb_table[mem_index][0].addend),
|
||||
INSN_NOP_I, INSN_NOP_I);
|
||||
@ -1739,7 +1748,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args)
|
||||
pre1 = tcg_opc_ext_i(TCG_REG_P0, opc, TCG_REG_R58, data_reg);
|
||||
}
|
||||
|
||||
tcg_out_qemu_tlb(s, addr_reg, s_bits,
|
||||
tcg_out_qemu_tlb(s, addr_reg, opc,
|
||||
offsetof(CPUArchState, tlb_table[mem_index][0].addr_write),
|
||||
offsetof(CPUArchState, tlb_table[mem_index][0].addend),
|
||||
pre1, pre2);
|
||||
@ -2223,6 +2232,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
||||
tcg_out_qemu_st(s, args);
|
||||
break;
|
||||
|
||||
case INDEX_op_mb:
|
||||
tcg_out_bundle(s, mmI, OPC_MF_M24, INSN_NOP_M, INSN_NOP_I);
|
||||
break;
|
||||
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
|
||||
case INDEX_op_mov_i64:
|
||||
case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
|
||||
@ -2336,6 +2348,7 @@ static const TCGTargetOpDef ia64_op_defs[] = {
|
||||
{ INDEX_op_qemu_st_i32, { "SZ", "r" } },
|
||||
{ INDEX_op_qemu_st_i64, { "SZ", "r" } },
|
||||
|
||||
{ INDEX_op_mb, { } },
|
||||
{ -1 },
|
||||
};
|
||||
|
||||
|
@ -292,6 +292,7 @@ typedef enum {
|
||||
OPC_JALR = OPC_SPECIAL | 0x09,
|
||||
OPC_MOVZ = OPC_SPECIAL | 0x0A,
|
||||
OPC_MOVN = OPC_SPECIAL | 0x0B,
|
||||
OPC_SYNC = OPC_SPECIAL | 0x0F,
|
||||
OPC_MFHI = OPC_SPECIAL | 0x10,
|
||||
OPC_MFLO = OPC_SPECIAL | 0x12,
|
||||
OPC_MULT = OPC_SPECIAL | 0x18,
|
||||
@ -339,6 +340,14 @@ typedef enum {
|
||||
* backwards-compatible at the assembly level.
|
||||
*/
|
||||
OPC_MUL = use_mips32r6_instructions ? OPC_MUL_R6 : OPC_MUL_R5,
|
||||
|
||||
/* MIPS r6 introduced names for weaker variants of SYNC. These are
|
||||
backward compatible to previous architecture revisions. */
|
||||
OPC_SYNC_WMB = OPC_SYNC | 0x04 << 5,
|
||||
OPC_SYNC_MB = OPC_SYNC | 0x10 << 5,
|
||||
OPC_SYNC_ACQUIRE = OPC_SYNC | 0x11 << 5,
|
||||
OPC_SYNC_RELEASE = OPC_SYNC | 0x12 << 5,
|
||||
OPC_SYNC_RMB = OPC_SYNC | 0x13 << 5,
|
||||
} MIPSInsn;
|
||||
|
||||
/*
|
||||
@ -1040,7 +1049,9 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
|
||||
TCGReg addrh, TCGMemOpIdx oi,
|
||||
tcg_insn_unit *label_ptr[2], bool is_load)
|
||||
{
|
||||
TCGMemOp s_bits = get_memop(oi) & MO_SIZE;
|
||||
TCGMemOp opc = get_memop(oi);
|
||||
unsigned s_bits = opc & MO_SIZE;
|
||||
unsigned a_bits = get_alignment_bits(opc);
|
||||
int mem_index = get_mmuidx(oi);
|
||||
int cmp_off
|
||||
= (is_load
|
||||
@ -1071,10 +1082,15 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
|
||||
tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0,
|
||||
cmp_off + (TARGET_LONG_BITS == 64 ? LO_OFF : 0));
|
||||
|
||||
/* We don't currently support unaligned accesses.
|
||||
We could do so with mips32r6. */
|
||||
if (a_bits < s_bits) {
|
||||
a_bits = s_bits;
|
||||
}
|
||||
/* Mask the page bits, keeping the alignment bits to compare against.
|
||||
In between on 32-bit targets, load the tlb addend for the fast path. */
|
||||
tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1,
|
||||
TARGET_PAGE_MASK | ((1 << s_bits) - 1));
|
||||
TARGET_PAGE_MASK | ((1 << a_bits) - 1));
|
||||
if (TARGET_LONG_BITS == 32) {
|
||||
tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off);
|
||||
}
|
||||
@ -1377,6 +1393,22 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
|
||||
#endif
|
||||
}
|
||||
|
||||
static void tcg_out_mb(TCGContext *s, TCGArg a0)
|
||||
{
|
||||
static const MIPSInsn sync[] = {
|
||||
/* Note that SYNC_MB is a slightly weaker than SYNC 0,
|
||||
as the former is an ordering barrier and the latter
|
||||
is a completion barrier. */
|
||||
[0 ... TCG_MO_ALL] = OPC_SYNC_MB,
|
||||
[TCG_MO_LD_LD] = OPC_SYNC_RMB,
|
||||
[TCG_MO_ST_ST] = OPC_SYNC_WMB,
|
||||
[TCG_MO_LD_ST] = OPC_SYNC_RELEASE,
|
||||
[TCG_MO_LD_ST | TCG_MO_ST_ST] = OPC_SYNC_RELEASE,
|
||||
[TCG_MO_LD_ST | TCG_MO_LD_LD] = OPC_SYNC_ACQUIRE,
|
||||
};
|
||||
tcg_out32(s, sync[a0 & TCG_MO_ALL]);
|
||||
}
|
||||
|
||||
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
||||
const TCGArg *args, const int *const_args)
|
||||
{
|
||||
@ -1646,6 +1678,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
||||
const_args[4], const_args[5], true);
|
||||
break;
|
||||
|
||||
case INDEX_op_mb:
|
||||
tcg_out_mb(s, a0);
|
||||
break;
|
||||
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
|
||||
case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
|
||||
case INDEX_op_call: /* Always emitted via tcg_out_call. */
|
||||
@ -1726,6 +1761,8 @@ static const TCGTargetOpDef mips_op_defs[] = {
|
||||
{ INDEX_op_qemu_ld_i64, { "L", "L", "lZ", "lZ" } },
|
||||
{ INDEX_op_qemu_st_i64, { "SZ", "SZ", "SZ", "SZ" } },
|
||||
#endif
|
||||
|
||||
{ INDEX_op_mb, { } },
|
||||
{ -1 },
|
||||
};
|
||||
|
||||
|
@ -542,6 +542,7 @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
|
||||
void tcg_optimize(TCGContext *s)
|
||||
{
|
||||
int oi, oi_next, nb_temps, nb_globals;
|
||||
TCGArg *prev_mb_args = NULL;
|
||||
|
||||
/* Array VALS has an element for each temp.
|
||||
If this temp holds a constant then its value is kept in VALS' element.
|
||||
@ -1295,5 +1296,43 @@ void tcg_optimize(TCGContext *s)
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Eliminate duplicate and redundant fence instructions. */
|
||||
if (prev_mb_args) {
|
||||
switch (opc) {
|
||||
case INDEX_op_mb:
|
||||
/* Merge two barriers of the same type into one,
|
||||
* or a weaker barrier into a stronger one,
|
||||
* or two weaker barriers into a stronger one.
|
||||
* mb X; mb Y => mb X|Y
|
||||
* mb; strl => mb; st
|
||||
* ldaq; mb => ld; mb
|
||||
* ldaq; strl => ld; mb; st
|
||||
* Other combinations are also merged into a strong
|
||||
* barrier. This is stricter than specified but for
|
||||
* the purposes of TCG is better than not optimizing.
|
||||
*/
|
||||
prev_mb_args[0] |= args[0];
|
||||
tcg_op_remove(s, op);
|
||||
break;
|
||||
|
||||
default:
|
||||
/* Opcodes that end the block stop the optimization. */
|
||||
if ((def->flags & TCG_OPF_BB_END) == 0) {
|
||||
break;
|
||||
}
|
||||
/* fallthru */
|
||||
case INDEX_op_qemu_ld_i32:
|
||||
case INDEX_op_qemu_ld_i64:
|
||||
case INDEX_op_qemu_st_i32:
|
||||
case INDEX_op_qemu_st_i64:
|
||||
case INDEX_op_call:
|
||||
/* Opcodes that touch guest memory stop the optimization. */
|
||||
prev_mb_args = NULL;
|
||||
break;
|
||||
}
|
||||
} else if (opc == INDEX_op_mb) {
|
||||
prev_mb_args = args;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -469,6 +469,10 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
|
||||
#define STHX XO31(407)
|
||||
#define STWX XO31(151)
|
||||
|
||||
#define EIEIO XO31(854)
|
||||
#define HWSYNC XO31(598)
|
||||
#define LWSYNC (HWSYNC | (1u << 21))
|
||||
|
||||
#define SPR(a, b) ((((a)<<5)|(b))<<11)
|
||||
#define LR SPR(8, 0)
|
||||
#define CTR SPR(9, 0)
|
||||
@ -1243,6 +1247,18 @@ static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
|
||||
tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5]));
|
||||
}
|
||||
|
||||
static void tcg_out_mb(TCGContext *s, TCGArg a0)
|
||||
{
|
||||
uint32_t insn = HWSYNC;
|
||||
a0 &= TCG_MO_ALL;
|
||||
if (a0 == TCG_MO_LD_LD) {
|
||||
insn = LWSYNC;
|
||||
} else if (a0 == TCG_MO_ST_ST) {
|
||||
insn = EIEIO;
|
||||
}
|
||||
tcg_out32(s, insn);
|
||||
}
|
||||
|
||||
#ifdef __powerpc64__
|
||||
void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
|
||||
{
|
||||
@ -1404,8 +1420,8 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp opc,
|
||||
: offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
|
||||
int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
|
||||
TCGReg base = TCG_AREG0;
|
||||
TCGMemOp s_bits = opc & MO_SIZE;
|
||||
int a_bits = get_alignment_bits(opc);
|
||||
unsigned s_bits = opc & MO_SIZE;
|
||||
unsigned a_bits = get_alignment_bits(opc);
|
||||
|
||||
/* Extract the page index, shifted into place for tlb index. */
|
||||
if (TCG_TARGET_REG_BITS == 64) {
|
||||
@ -1458,39 +1474,43 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp opc,
|
||||
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3, add_off);
|
||||
|
||||
/* Clear the non-page, non-alignment bits from the address */
|
||||
if (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32) {
|
||||
/* We don't support unaligned accesses on 32-bits, preserve
|
||||
* the bottom bits and thus trigger a comparison failure on
|
||||
* unaligned accesses
|
||||
if (TCG_TARGET_REG_BITS == 32) {
|
||||
/* We don't support unaligned accesses on 32-bits.
|
||||
* Preserve the bottom bits and thus trigger a comparison
|
||||
* failure on unaligned accesses.
|
||||
*/
|
||||
if (a_bits < 0) {
|
||||
if (a_bits < s_bits) {
|
||||
a_bits = s_bits;
|
||||
}
|
||||
tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
|
||||
(32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
|
||||
} else if (a_bits) {
|
||||
/* More than byte access, we need to handle alignment */
|
||||
if (a_bits > 0) {
|
||||
/* Alignment required by the front-end, same as 32-bits */
|
||||
tcg_out_rld(s, RLDICL, TCG_REG_R0, addrlo,
|
||||
} else {
|
||||
TCGReg t = addrlo;
|
||||
|
||||
/* If the access is unaligned, we need to make sure we fail if we
|
||||
* cross a page boundary. The trick is to add the access size-1
|
||||
* to the address before masking the low bits. That will make the
|
||||
* address overflow to the next page if we cross a page boundary,
|
||||
* which will then force a mismatch of the TLB compare.
|
||||
*/
|
||||
if (a_bits < s_bits) {
|
||||
unsigned a_mask = (1 << a_bits) - 1;
|
||||
unsigned s_mask = (1 << s_bits) - 1;
|
||||
tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
|
||||
t = TCG_REG_R0;
|
||||
}
|
||||
|
||||
/* Mask the address for the requested alignment. */
|
||||
if (TARGET_LONG_BITS == 32) {
|
||||
tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
|
||||
(32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
|
||||
} else if (a_bits == 0) {
|
||||
tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
|
||||
} else {
|
||||
tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
|
||||
64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
|
||||
tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
|
||||
} else {
|
||||
/* We support unaligned accesses, we need to make sure we fail
|
||||
* if we cross a page boundary. The trick is to add the
|
||||
* access_size-1 to the address before masking the low bits.
|
||||
* That will make the address overflow to the next page if we
|
||||
* cross a page boundary which will then force a mismatch of
|
||||
* the TLB compare since the next page cannot possibly be in
|
||||
* the same TLB index.
|
||||
*/
|
||||
tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, (1 << s_bits) - 1));
|
||||
tcg_out_rld(s, RLDICR, TCG_REG_R0, TCG_REG_R0,
|
||||
0, 63 - TARGET_PAGE_BITS);
|
||||
}
|
||||
} else {
|
||||
/* Byte access, just chop off the bits below the page index */
|
||||
tcg_out_rld(s, RLDICR, TCG_REG_R0, addrlo, 0, 63 - TARGET_PAGE_BITS);
|
||||
}
|
||||
|
||||
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
|
||||
@ -2449,6 +2469,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
|
||||
tcg_out32(s, MULHD | TAB(args[0], args[1], args[2]));
|
||||
break;
|
||||
|
||||
case INDEX_op_mb:
|
||||
tcg_out_mb(s, args[0]);
|
||||
break;
|
||||
|
||||
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
|
||||
case INDEX_op_mov_i64:
|
||||
case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
|
||||
@ -2596,6 +2620,7 @@ static const TCGTargetOpDef ppc_op_defs[] = {
|
||||
{ INDEX_op_qemu_st_i64, { "S", "S", "S", "S" } },
|
||||
#endif
|
||||
|
||||
{ INDEX_op_mb, { } },
|
||||
{ -1 },
|
||||
};
|
||||
|
||||
|
@ -343,6 +343,7 @@ static tcg_insn_unit *tb_ret_addr;
|
||||
#define FACILITY_EXT_IMM (1ULL << (63 - 21))
|
||||
#define FACILITY_GEN_INST_EXT (1ULL << (63 - 34))
|
||||
#define FACILITY_LOAD_ON_COND (1ULL << (63 - 45))
|
||||
#define FACILITY_FAST_BCR_SER FACILITY_LOAD_ON_COND
|
||||
|
||||
static uint64_t facilities;
|
||||
|
||||
@ -1505,21 +1506,18 @@ QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
|
||||
static TCGReg tcg_out_tlb_read(TCGContext* s, TCGReg addr_reg, TCGMemOp opc,
|
||||
int mem_index, bool is_ld)
|
||||
{
|
||||
int a_bits = get_alignment_bits(opc);
|
||||
unsigned s_bits = opc & MO_SIZE;
|
||||
unsigned a_bits = get_alignment_bits(opc);
|
||||
unsigned s_mask = (1 << s_bits) - 1;
|
||||
unsigned a_mask = (1 << a_bits) - 1;
|
||||
int ofs, a_off;
|
||||
uint64_t tlb_mask;
|
||||
|
||||
/* For aligned accesses, we check the first byte and include the alignment
|
||||
bits within the address. For unaligned access, we check that we don't
|
||||
cross pages using the address of the last byte of the access. */
|
||||
if (a_bits >= 0) {
|
||||
/* A byte access or an alignment check required */
|
||||
a_off = 0;
|
||||
tlb_mask = TARGET_PAGE_MASK | ((1 << a_bits) - 1);
|
||||
} else {
|
||||
a_off = (1 << (opc & MO_SIZE)) - 1;
|
||||
tlb_mask = TARGET_PAGE_MASK;
|
||||
}
|
||||
a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
|
||||
tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
|
||||
|
||||
if (facilities & FACILITY_GEN_INST_EXT) {
|
||||
tcg_out_risbg(s, TCG_REG_R2, addr_reg,
|
||||
@ -2172,6 +2170,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
||||
tgen_deposit(s, args[0], args[2], args[3], args[4]);
|
||||
break;
|
||||
|
||||
case INDEX_op_mb:
|
||||
/* The host memory model is quite strong, we simply need to
|
||||
serialize the instruction stream. */
|
||||
if (args[0] & TCG_MO_ST_LD) {
|
||||
tcg_out_insn(s, RR, BCR,
|
||||
facilities & FACILITY_FAST_BCR_SER ? 14 : 15, 0);
|
||||
}
|
||||
break;
|
||||
|
||||
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
|
||||
case INDEX_op_mov_i64:
|
||||
case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
|
||||
@ -2293,6 +2300,7 @@ static const TCGTargetOpDef s390_op_defs[] = {
|
||||
{ INDEX_op_movcond_i64, { "r", "r", "rC", "r", "0" } },
|
||||
{ INDEX_op_deposit_i64, { "r", "0", "r" } },
|
||||
|
||||
{ INDEX_op_mb, { } },
|
||||
{ -1 },
|
||||
};
|
||||
|
||||
|
@ -249,6 +249,8 @@ static const int tcg_target_call_oarg_regs[] = {
|
||||
#define STWA (INSN_OP(3) | INSN_OP3(0x14))
|
||||
#define STXA (INSN_OP(3) | INSN_OP3(0x1e))
|
||||
|
||||
#define MEMBAR (INSN_OP(2) | INSN_OP3(0x28) | INSN_RS1(15) | (1 << 13))
|
||||
|
||||
#ifndef ASI_PRIMARY_LITTLE
|
||||
#define ASI_PRIMARY_LITTLE 0x88
|
||||
#endif
|
||||
@ -835,6 +837,12 @@ static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
|
||||
tcg_out_nop(s);
|
||||
}
|
||||
|
||||
static void tcg_out_mb(TCGContext *s, TCGArg a0)
|
||||
{
|
||||
/* Note that the TCG memory order constants mirror the Sparc MEMBAR. */
|
||||
tcg_out32(s, MEMBAR | (a0 & TCG_MO_ALL));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SOFTMMU
|
||||
static tcg_insn_unit *qemu_ld_trampoline[16];
|
||||
static tcg_insn_unit *qemu_st_trampoline[16];
|
||||
@ -996,19 +1004,25 @@ static void tcg_target_qemu_prologue(TCGContext *s)
|
||||
is in the returned register, maybe %o0. The TLB addend is in %o1. */
|
||||
|
||||
static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index,
|
||||
TCGMemOp s_bits, int which)
|
||||
TCGMemOp opc, int which)
|
||||
{
|
||||
const TCGReg r0 = TCG_REG_O0;
|
||||
const TCGReg r1 = TCG_REG_O1;
|
||||
const TCGReg r2 = TCG_REG_O2;
|
||||
unsigned s_bits = opc & MO_SIZE;
|
||||
unsigned a_bits = get_alignment_bits(opc);
|
||||
int tlb_ofs;
|
||||
|
||||
/* Shift the page number down. */
|
||||
tcg_out_arithi(s, r1, addr, TARGET_PAGE_BITS, SHIFT_SRL);
|
||||
|
||||
/* Mask out the page offset, except for the required alignment. */
|
||||
/* Mask out the page offset, except for the required alignment.
|
||||
We don't support unaligned accesses. */
|
||||
if (a_bits < s_bits) {
|
||||
a_bits = s_bits;
|
||||
}
|
||||
tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_T1,
|
||||
TARGET_PAGE_MASK | ((1 << s_bits) - 1));
|
||||
TARGET_PAGE_MASK | ((1 << a_bits) - 1));
|
||||
|
||||
/* Mask the tlb index. */
|
||||
tcg_out_arithi(s, r1, r1, CPU_TLB_SIZE - 1, ARITH_AND);
|
||||
@ -1087,7 +1101,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
|
||||
tcg_insn_unit *func;
|
||||
tcg_insn_unit *label_ptr;
|
||||
|
||||
addrz = tcg_out_tlb_load(s, addr, memi, memop & MO_SIZE,
|
||||
addrz = tcg_out_tlb_load(s, addr, memi, memop,
|
||||
offsetof(CPUTLBEntry, addr_read));
|
||||
|
||||
/* The fast path is exactly one insn. Thus we can perform the
|
||||
@ -1169,7 +1183,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
|
||||
tcg_insn_unit *func;
|
||||
tcg_insn_unit *label_ptr;
|
||||
|
||||
addrz = tcg_out_tlb_load(s, addr, memi, memop & MO_SIZE,
|
||||
addrz = tcg_out_tlb_load(s, addr, memi, memop,
|
||||
offsetof(CPUTLBEntry, addr_write));
|
||||
|
||||
/* The fast path is exactly one insn. Thus we can perform the entire
|
||||
@ -1460,6 +1474,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
||||
tcg_out_arithc(s, a0, TCG_REG_G0, a1, const_args[1], c);
|
||||
break;
|
||||
|
||||
case INDEX_op_mb:
|
||||
tcg_out_mb(s, a0);
|
||||
break;
|
||||
|
||||
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
|
||||
case INDEX_op_mov_i64:
|
||||
case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
|
||||
@ -1561,6 +1579,7 @@ static const TCGTargetOpDef sparc_op_defs[] = {
|
||||
{ INDEX_op_qemu_st_i32, { "sZ", "A" } },
|
||||
{ INDEX_op_qemu_st_i64, { "SZ", "A" } },
|
||||
|
||||
{ INDEX_op_mb, { } },
|
||||
{ -1 },
|
||||
};
|
||||
|
||||
|
17
tcg/tcg-op.c
17
tcg/tcg-op.c
@ -148,6 +148,23 @@ void tcg_gen_op6(TCGContext *ctx, TCGOpcode opc, TCGArg a1, TCGArg a2,
|
||||
tcg_emit_op(ctx, opc, pi);
|
||||
}
|
||||
|
||||
void tcg_gen_mb(TCGBar mb_type)
|
||||
{
|
||||
bool emit_barriers = true;
|
||||
|
||||
#ifndef CONFIG_USER_ONLY
|
||||
/* TODO: When MTTCG is available for system mode, we will check
|
||||
* the following condition and enable emit_barriers
|
||||
* (qemu_tcg_mttcg_enabled() && smp_cpus > 1)
|
||||
*/
|
||||
emit_barriers = false;
|
||||
#endif
|
||||
|
||||
if (emit_barriers) {
|
||||
tcg_gen_op1(&tcg_ctx, INDEX_op_mb, mb_type);
|
||||
}
|
||||
}
|
||||
|
||||
/* 32 bit ops */
|
||||
|
||||
void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
|
||||
|
@ -261,6 +261,8 @@ static inline void tcg_gen_br(TCGLabel *l)
|
||||
tcg_gen_op1(&tcg_ctx, INDEX_op_br, label_arg(l));
|
||||
}
|
||||
|
||||
void tcg_gen_mb(TCGBar);
|
||||
|
||||
/* Helper calls. */
|
||||
|
||||
/* 32 bit ops */
|
||||
|
@ -42,6 +42,8 @@ DEF(br, 0, 0, 1, TCG_OPF_BB_END)
|
||||
# define IMPL64 TCG_OPF_64BIT
|
||||
#endif
|
||||
|
||||
DEF(mb, 0, 0, 1, 0)
|
||||
|
||||
DEF(mov_i32, 1, 1, 0, TCG_OPF_NOT_PRESENT)
|
||||
DEF(movi_i32, 1, 0, 1, TCG_OPF_NOT_PRESENT)
|
||||
DEF(setcond_i32, 1, 2, 1, 0)
|
||||
|
68
tcg/tcg.h
68
tcg/tcg.h
@ -287,20 +287,19 @@ typedef enum TCGMemOp {
|
||||
* MO_ALIGN accesses will result in a call to the CPU's
|
||||
* do_unaligned_access hook if the guest address is not aligned.
|
||||
* The default depends on whether the target CPU defines ALIGNED_ONLY.
|
||||
*
|
||||
* Some architectures (e.g. ARMv8) need the address which is aligned
|
||||
* to a size more than the size of the memory access.
|
||||
* To support such check it's enough the current costless alignment
|
||||
* check implementation in QEMU, but we need to support
|
||||
* an alignment size specifying.
|
||||
* MO_ALIGN supposes a natural alignment
|
||||
* (i.e. the alignment size is the size of a memory access).
|
||||
* Note that an alignment size must be equal or greater
|
||||
* than an access size.
|
||||
* Some architectures (e.g. SPARCv9) need an address which is aligned,
|
||||
* but less strictly than the natural alignment.
|
||||
*
|
||||
* MO_ALIGN supposes the alignment size is the size of a memory access.
|
||||
*
|
||||
* There are three options:
|
||||
* - an alignment to the size of an access (MO_ALIGN);
|
||||
* - an alignment to the specified size that is equal or greater than
|
||||
* an access size (MO_ALIGN_x where 'x' is a size in bytes);
|
||||
* - unaligned access permitted (MO_UNALN).
|
||||
* - an alignment to the size of an access (MO_ALIGN);
|
||||
* - an alignment to a specified size, which may be more or less than
|
||||
* the access size (MO_ALIGN_x where 'x' is a size in bytes);
|
||||
*/
|
||||
MO_ASHIFT = 4,
|
||||
MO_AMASK = 7 << MO_ASHIFT,
|
||||
@ -353,38 +352,26 @@ typedef enum TCGMemOp {
|
||||
* @memop: TCGMemOp value
|
||||
*
|
||||
* Extract the alignment size from the memop.
|
||||
*
|
||||
* Returns: 0 in case of byte access (which is always aligned);
|
||||
* positive value - number of alignment bits;
|
||||
* negative value if unaligned access enabled
|
||||
* and this is not a byte access.
|
||||
*/
|
||||
static inline int get_alignment_bits(TCGMemOp memop)
|
||||
static inline unsigned get_alignment_bits(TCGMemOp memop)
|
||||
{
|
||||
int a = memop & MO_AMASK;
|
||||
int s = memop & MO_SIZE;
|
||||
int r;
|
||||
unsigned a = memop & MO_AMASK;
|
||||
|
||||
if (a == MO_UNALN) {
|
||||
/* Negative value if unaligned access enabled,
|
||||
* or zero value in case of byte access.
|
||||
*/
|
||||
return -s;
|
||||
/* No alignment required. */
|
||||
a = 0;
|
||||
} else if (a == MO_ALIGN) {
|
||||
/* A natural alignment: return a number of access size bits */
|
||||
r = s;
|
||||
/* A natural alignment requirement. */
|
||||
a = memop & MO_SIZE;
|
||||
} else {
|
||||
/* Specific alignment size. It must be equal or greater
|
||||
* than the access size.
|
||||
*/
|
||||
r = a >> MO_ASHIFT;
|
||||
tcg_debug_assert(r >= s);
|
||||
/* A specific alignment requirement. */
|
||||
a = a >> MO_ASHIFT;
|
||||
}
|
||||
#if defined(CONFIG_SOFTMMU)
|
||||
/* The requested alignment cannot overlap the TLB flags. */
|
||||
tcg_debug_assert((TLB_FLAGS_MASK & ((1 << r) - 1)) == 0);
|
||||
tcg_debug_assert((TLB_FLAGS_MASK & ((1 << a) - 1)) == 0);
|
||||
#endif
|
||||
return r;
|
||||
return a;
|
||||
}
|
||||
|
||||
typedef tcg_target_ulong TCGArg;
|
||||
@ -478,6 +465,23 @@ static inline intptr_t QEMU_ARTIFICIAL GET_TCGV_PTR(TCGv_ptr t)
|
||||
#define TCG_CALL_DUMMY_TCGV MAKE_TCGV_I32(-1)
|
||||
#define TCG_CALL_DUMMY_ARG ((TCGArg)(-1))
|
||||
|
||||
typedef enum {
|
||||
/* Used to indicate the type of accesses on which ordering
|
||||
is to be ensured. Modeled after SPARC barriers. */
|
||||
TCG_MO_LD_LD = 0x01,
|
||||
TCG_MO_ST_LD = 0x02,
|
||||
TCG_MO_LD_ST = 0x04,
|
||||
TCG_MO_ST_ST = 0x08,
|
||||
TCG_MO_ALL = 0x0F, /* OR of the above */
|
||||
|
||||
/* Used to indicate the kind of ordering which is to be ensured by the
|
||||
instruction. These types are derived from x86/aarch64 instructions.
|
||||
It should be noted that these are different from C11 semantics. */
|
||||
TCG_BAR_LDAQ = 0x10, /* Following ops will not come forward */
|
||||
TCG_BAR_STRL = 0x20, /* Previous ops will not be delayed */
|
||||
TCG_BAR_SC = 0x30, /* No ops cross barrier; OR of the above */
|
||||
} TCGBar;
|
||||
|
||||
/* Conditions. Note that these are laid out for easy manipulation by
|
||||
the functions below:
|
||||
bit 0 is used for inverting;
|
||||
|
@ -255,6 +255,7 @@ static const TCGTargetOpDef tcg_target_op_defs[] = {
|
||||
{ INDEX_op_bswap32_i32, { R, R } },
|
||||
#endif
|
||||
|
||||
{ INDEX_op_mb, { } },
|
||||
{ -1 },
|
||||
};
|
||||
|
||||
@ -800,6 +801,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
|
||||
}
|
||||
tcg_out_i(s, *args++);
|
||||
break;
|
||||
case INDEX_op_mb:
|
||||
break;
|
||||
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
|
||||
case INDEX_op_mov_i64:
|
||||
case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
|
||||
|
4
tci.c
4
tci.c
@ -1236,6 +1236,10 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
|
||||
tcg_abort();
|
||||
}
|
||||
break;
|
||||
case INDEX_op_mb:
|
||||
/* Ensure ordering for all kinds */
|
||||
smp_mb();
|
||||
break;
|
||||
default:
|
||||
TODO();
|
||||
break;
|
||||
|
@ -260,6 +260,8 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
|
||||
int64_t ti = profile_getclock();
|
||||
#endif
|
||||
|
||||
searched_pc -= GETPC_ADJ;
|
||||
|
||||
if (searched_pc < host_pc) {
|
||||
return -1;
|
||||
}
|
||||
|
@ -105,8 +105,11 @@ static inline int handle_cpu_signal(uintptr_t pc, unsigned long address,
|
||||
if (ret == 0) {
|
||||
return 1; /* the MMU fault was handled without causing real CPU fault */
|
||||
}
|
||||
/* now we have a real cpu fault */
|
||||
cpu_restore_state(cpu, pc);
|
||||
|
||||
/* Now we have a real cpu fault. Since this is the exact location of
|
||||
* the exception, we must undo the adjustment done by cpu_restore_state
|
||||
* for handling call return addresses. */
|
||||
cpu_restore_state(cpu, pc + GETPC_ADJ);
|
||||
|
||||
sigprocmask(SIG_SETMASK, old_set, NULL);
|
||||
cpu_loop_exit(cpu);
|
||||
|
Loading…
x
Reference in New Issue
Block a user