3e8f1628e8
Use g2h_untagged in contexts that have no cpu, e.g. the binary loaders that operate before the primary cpu is created. As a colollary, target_mmap and friends must use untagged addresses, since they are used by the loaders. Use g2h_untagged on values returned from target_mmap, as the kernel never applies a tag itself. Use g2h_untagged on all pc values. The only current user of tags, aarch64, removes tags from code addresses upon branch, so "pc" is always untagged. Use g2h with the cpu context on hand wherever possible. Use g2h_untagged in lock_user, which will be updated soon. Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20210212184902.1251044-13-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
444 lines
13 KiB
C
444 lines
13 KiB
C
/*
|
|
* qemu user cpu loop
|
|
*
|
|
* Copyright (c) 2003-2008 Fabrice Bellard
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
#include "qemu-common.h"
|
|
#include "qemu.h"
|
|
#include "cpu_loop-common.h"
|
|
|
|
/***********************************************************/
|
|
/* CPUX86 core interface */
|
|
|
|
uint64_t cpu_get_tsc(CPUX86State *env)
|
|
{
|
|
return cpu_get_host_ticks();
|
|
}
|
|
|
|
static void write_dt(void *ptr, unsigned long addr, unsigned long limit,
|
|
int flags)
|
|
{
|
|
unsigned int e1, e2;
|
|
uint32_t *p;
|
|
e1 = (addr << 16) | (limit & 0xffff);
|
|
e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000);
|
|
e2 |= flags;
|
|
p = ptr;
|
|
p[0] = tswap32(e1);
|
|
p[1] = tswap32(e2);
|
|
}
|
|
|
|
static uint64_t *idt_table;
|
|
#ifdef TARGET_X86_64
|
|
static void set_gate64(void *ptr, unsigned int type, unsigned int dpl,
|
|
uint64_t addr, unsigned int sel)
|
|
{
|
|
uint32_t *p, e1, e2;
|
|
e1 = (addr & 0xffff) | (sel << 16);
|
|
e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
|
|
p = ptr;
|
|
p[0] = tswap32(e1);
|
|
p[1] = tswap32(e2);
|
|
p[2] = tswap32(addr >> 32);
|
|
p[3] = 0;
|
|
}
|
|
/* only dpl matters as we do only user space emulation */
|
|
static void set_idt(int n, unsigned int dpl)
|
|
{
|
|
set_gate64(idt_table + n * 2, 0, dpl, 0, 0);
|
|
}
|
|
#else
|
|
static void set_gate(void *ptr, unsigned int type, unsigned int dpl,
|
|
uint32_t addr, unsigned int sel)
|
|
{
|
|
uint32_t *p, e1, e2;
|
|
e1 = (addr & 0xffff) | (sel << 16);
|
|
e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
|
|
p = ptr;
|
|
p[0] = tswap32(e1);
|
|
p[1] = tswap32(e2);
|
|
}
|
|
|
|
/* only dpl matters as we do only user space emulation */
|
|
static void set_idt(int n, unsigned int dpl)
|
|
{
|
|
set_gate(idt_table + n, 0, dpl, 0, 0);
|
|
}
|
|
#endif
|
|
|
|
static void gen_signal(CPUX86State *env, int sig, int code, abi_ptr addr)
|
|
{
|
|
target_siginfo_t info = {
|
|
.si_signo = sig,
|
|
.si_code = code,
|
|
._sifields._sigfault._addr = addr
|
|
};
|
|
|
|
queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
|
|
}
|
|
|
|
#ifdef TARGET_X86_64
|
|
static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len)
|
|
{
|
|
/*
|
|
* For all the vsyscalls, NULL means "don't write anything" not
|
|
* "write it at address 0".
|
|
*/
|
|
if (addr == 0 || access_ok(VERIFY_WRITE, addr, len)) {
|
|
return true;
|
|
}
|
|
|
|
env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK;
|
|
gen_signal(env, TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr);
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Since v3.1, the kernel traps and emulates the vsyscall page.
|
|
* Entry points other than the official generate SIGSEGV.
|
|
*/
|
|
static void emulate_vsyscall(CPUX86State *env)
|
|
{
|
|
int syscall;
|
|
abi_ulong ret;
|
|
uint64_t caller;
|
|
|
|
/*
|
|
* Validate the entry point. We have already validated the page
|
|
* during translation to get here; now verify the offset.
|
|
*/
|
|
switch (env->eip & ~TARGET_PAGE_MASK) {
|
|
case 0x000:
|
|
syscall = TARGET_NR_gettimeofday;
|
|
break;
|
|
case 0x400:
|
|
syscall = TARGET_NR_time;
|
|
break;
|
|
case 0x800:
|
|
syscall = TARGET_NR_getcpu;
|
|
break;
|
|
default:
|
|
goto sigsegv;
|
|
}
|
|
|
|
/*
|
|
* Validate the return address.
|
|
* Note that the kernel treats this the same as an invalid entry point.
|
|
*/
|
|
if (get_user_u64(caller, env->regs[R_ESP])) {
|
|
goto sigsegv;
|
|
}
|
|
|
|
/*
|
|
* Validate the the pointer arguments.
|
|
*/
|
|
switch (syscall) {
|
|
case TARGET_NR_gettimeofday:
|
|
if (!write_ok_or_segv(env, env->regs[R_EDI],
|
|
sizeof(struct target_timeval)) ||
|
|
!write_ok_or_segv(env, env->regs[R_ESI],
|
|
sizeof(struct target_timezone))) {
|
|
return;
|
|
}
|
|
break;
|
|
case TARGET_NR_time:
|
|
if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) {
|
|
return;
|
|
}
|
|
break;
|
|
case TARGET_NR_getcpu:
|
|
if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) ||
|
|
!write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) {
|
|
return;
|
|
}
|
|
break;
|
|
default:
|
|
g_assert_not_reached();
|
|
}
|
|
|
|
/*
|
|
* Perform the syscall. None of the vsyscalls should need restarting.
|
|
*/
|
|
ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI],
|
|
env->regs[R_EDX], env->regs[10], env->regs[8],
|
|
env->regs[9], 0, 0);
|
|
g_assert(ret != -TARGET_ERESTARTSYS);
|
|
g_assert(ret != -TARGET_QEMU_ESIGRETURN);
|
|
if (ret == -TARGET_EFAULT) {
|
|
goto sigsegv;
|
|
}
|
|
env->regs[R_EAX] = ret;
|
|
|
|
/* Emulate a ret instruction to leave the vsyscall page. */
|
|
env->eip = caller;
|
|
env->regs[R_ESP] += 8;
|
|
return;
|
|
|
|
sigsegv:
|
|
/* Like force_sig(SIGSEGV). */
|
|
gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0);
|
|
}
|
|
#endif
|
|
|
|
void cpu_loop(CPUX86State *env)
|
|
{
|
|
CPUState *cs = env_cpu(env);
|
|
int trapnr;
|
|
abi_ulong pc;
|
|
abi_ulong ret;
|
|
|
|
for(;;) {
|
|
cpu_exec_start(cs);
|
|
trapnr = cpu_exec(cs);
|
|
cpu_exec_end(cs);
|
|
process_queued_cpu_work(cs);
|
|
|
|
switch(trapnr) {
|
|
case 0x80:
|
|
/* linux syscall from int $0x80 */
|
|
ret = do_syscall(env,
|
|
env->regs[R_EAX],
|
|
env->regs[R_EBX],
|
|
env->regs[R_ECX],
|
|
env->regs[R_EDX],
|
|
env->regs[R_ESI],
|
|
env->regs[R_EDI],
|
|
env->regs[R_EBP],
|
|
0, 0);
|
|
if (ret == -TARGET_ERESTARTSYS) {
|
|
env->eip -= 2;
|
|
} else if (ret != -TARGET_QEMU_ESIGRETURN) {
|
|
env->regs[R_EAX] = ret;
|
|
}
|
|
break;
|
|
#ifndef TARGET_ABI32
|
|
case EXCP_SYSCALL:
|
|
/* linux syscall from syscall instruction */
|
|
ret = do_syscall(env,
|
|
env->regs[R_EAX],
|
|
env->regs[R_EDI],
|
|
env->regs[R_ESI],
|
|
env->regs[R_EDX],
|
|
env->regs[10],
|
|
env->regs[8],
|
|
env->regs[9],
|
|
0, 0);
|
|
if (ret == -TARGET_ERESTARTSYS) {
|
|
env->eip -= 2;
|
|
} else if (ret != -TARGET_QEMU_ESIGRETURN) {
|
|
env->regs[R_EAX] = ret;
|
|
}
|
|
break;
|
|
#endif
|
|
#ifdef TARGET_X86_64
|
|
case EXCP_VSYSCALL:
|
|
emulate_vsyscall(env);
|
|
break;
|
|
#endif
|
|
case EXCP0B_NOSEG:
|
|
case EXCP0C_STACK:
|
|
gen_signal(env, TARGET_SIGBUS, TARGET_SI_KERNEL, 0);
|
|
break;
|
|
case EXCP0D_GPF:
|
|
/* XXX: potential problem if ABI32 */
|
|
#ifndef TARGET_X86_64
|
|
if (env->eflags & VM_MASK) {
|
|
handle_vm86_fault(env);
|
|
break;
|
|
}
|
|
#endif
|
|
gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0);
|
|
break;
|
|
case EXCP0E_PAGE:
|
|
gen_signal(env, TARGET_SIGSEGV,
|
|
(env->error_code & 1 ?
|
|
TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR),
|
|
env->cr[2]);
|
|
break;
|
|
case EXCP00_DIVZ:
|
|
#ifndef TARGET_X86_64
|
|
if (env->eflags & VM_MASK) {
|
|
handle_vm86_trap(env, trapnr);
|
|
break;
|
|
}
|
|
#endif
|
|
gen_signal(env, TARGET_SIGFPE, TARGET_FPE_INTDIV, env->eip);
|
|
break;
|
|
case EXCP01_DB:
|
|
case EXCP03_INT3:
|
|
#ifndef TARGET_X86_64
|
|
if (env->eflags & VM_MASK) {
|
|
handle_vm86_trap(env, trapnr);
|
|
break;
|
|
}
|
|
#endif
|
|
if (trapnr == EXCP01_DB) {
|
|
gen_signal(env, TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip);
|
|
} else {
|
|
gen_signal(env, TARGET_SIGTRAP, TARGET_SI_KERNEL, 0);
|
|
}
|
|
break;
|
|
case EXCP04_INTO:
|
|
case EXCP05_BOUND:
|
|
#ifndef TARGET_X86_64
|
|
if (env->eflags & VM_MASK) {
|
|
handle_vm86_trap(env, trapnr);
|
|
break;
|
|
}
|
|
#endif
|
|
gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0);
|
|
break;
|
|
case EXCP06_ILLOP:
|
|
gen_signal(env, TARGET_SIGILL, TARGET_ILL_ILLOPN, env->eip);
|
|
break;
|
|
case EXCP_INTERRUPT:
|
|
/* just indicate that signals should be handled asap */
|
|
break;
|
|
case EXCP_DEBUG:
|
|
gen_signal(env, TARGET_SIGTRAP, TARGET_TRAP_BRKPT, 0);
|
|
break;
|
|
case EXCP_ATOMIC:
|
|
cpu_exec_step_atomic(cs);
|
|
break;
|
|
default:
|
|
pc = env->segs[R_CS].base + env->eip;
|
|
EXCP_DUMP(env, "qemu: 0x%08lx: unhandled CPU exception 0x%x - aborting\n",
|
|
(long)pc, trapnr);
|
|
abort();
|
|
}
|
|
process_pending_signals(env);
|
|
}
|
|
}
|
|
|
|
void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs)
|
|
{
|
|
env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
|
|
env->hflags |= HF_PE_MASK | HF_CPL_MASK;
|
|
if (env->features[FEAT_1_EDX] & CPUID_SSE) {
|
|
env->cr[4] |= CR4_OSFXSR_MASK;
|
|
env->hflags |= HF_OSFXSR_MASK;
|
|
}
|
|
#ifndef TARGET_ABI32
|
|
/* enable 64 bit mode if possible */
|
|
if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) {
|
|
fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
env->cr[4] |= CR4_PAE_MASK;
|
|
env->efer |= MSR_EFER_LMA | MSR_EFER_LME;
|
|
env->hflags |= HF_LMA_MASK;
|
|
#endif
|
|
|
|
/* flags setup : we activate the IRQs by default as in user mode */
|
|
env->eflags |= IF_MASK;
|
|
|
|
/* linux register setup */
|
|
#ifndef TARGET_ABI32
|
|
env->regs[R_EAX] = regs->rax;
|
|
env->regs[R_EBX] = regs->rbx;
|
|
env->regs[R_ECX] = regs->rcx;
|
|
env->regs[R_EDX] = regs->rdx;
|
|
env->regs[R_ESI] = regs->rsi;
|
|
env->regs[R_EDI] = regs->rdi;
|
|
env->regs[R_EBP] = regs->rbp;
|
|
env->regs[R_ESP] = regs->rsp;
|
|
env->eip = regs->rip;
|
|
#else
|
|
env->regs[R_EAX] = regs->eax;
|
|
env->regs[R_EBX] = regs->ebx;
|
|
env->regs[R_ECX] = regs->ecx;
|
|
env->regs[R_EDX] = regs->edx;
|
|
env->regs[R_ESI] = regs->esi;
|
|
env->regs[R_EDI] = regs->edi;
|
|
env->regs[R_EBP] = regs->ebp;
|
|
env->regs[R_ESP] = regs->esp;
|
|
env->eip = regs->eip;
|
|
#endif
|
|
|
|
/* linux interrupt setup */
|
|
#ifndef TARGET_ABI32
|
|
env->idt.limit = 511;
|
|
#else
|
|
env->idt.limit = 255;
|
|
#endif
|
|
env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1),
|
|
PROT_READ|PROT_WRITE,
|
|
MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
|
|
idt_table = g2h_untagged(env->idt.base);
|
|
set_idt(0, 0);
|
|
set_idt(1, 0);
|
|
set_idt(2, 0);
|
|
set_idt(3, 3);
|
|
set_idt(4, 3);
|
|
set_idt(5, 0);
|
|
set_idt(6, 0);
|
|
set_idt(7, 0);
|
|
set_idt(8, 0);
|
|
set_idt(9, 0);
|
|
set_idt(10, 0);
|
|
set_idt(11, 0);
|
|
set_idt(12, 0);
|
|
set_idt(13, 0);
|
|
set_idt(14, 0);
|
|
set_idt(15, 0);
|
|
set_idt(16, 0);
|
|
set_idt(17, 0);
|
|
set_idt(18, 0);
|
|
set_idt(19, 0);
|
|
set_idt(0x80, 3);
|
|
|
|
/* linux segment setup */
|
|
{
|
|
uint64_t *gdt_table;
|
|
env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES,
|
|
PROT_READ|PROT_WRITE,
|
|
MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
|
|
env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1;
|
|
gdt_table = g2h_untagged(env->gdt.base);
|
|
#ifdef TARGET_ABI32
|
|
write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
|
|
DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
|
|
(3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
|
|
#else
|
|
/* 64 bit code segment */
|
|
write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
|
|
DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
|
|
DESC_L_MASK |
|
|
(3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
|
|
#endif
|
|
write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff,
|
|
DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
|
|
(3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT));
|
|
}
|
|
cpu_x86_load_seg(env, R_CS, __USER_CS);
|
|
cpu_x86_load_seg(env, R_SS, __USER_DS);
|
|
#ifdef TARGET_ABI32
|
|
cpu_x86_load_seg(env, R_DS, __USER_DS);
|
|
cpu_x86_load_seg(env, R_ES, __USER_DS);
|
|
cpu_x86_load_seg(env, R_FS, __USER_DS);
|
|
cpu_x86_load_seg(env, R_GS, __USER_DS);
|
|
/* This hack makes Wine work... */
|
|
env->segs[R_FS].selector = 0;
|
|
#else
|
|
cpu_x86_load_seg(env, R_DS, 0);
|
|
cpu_x86_load_seg(env, R_ES, 0);
|
|
cpu_x86_load_seg(env, R_FS, 0);
|
|
cpu_x86_load_seg(env, R_GS, 0);
|
|
#endif
|
|
}
|