qemu/linux-user/i386/cpu_loop.c
Richard Henderson 3e8f1628e8 exec: Use cpu_untagged_addr in g2h; split out g2h_untagged
Use g2h_untagged in contexts that have no cpu, e.g. the binary
loaders that operate before the primary cpu is created.  As a
colollary, target_mmap and friends must use untagged addresses,
since they are used by the loaders.

Use g2h_untagged on values returned from target_mmap, as the
kernel never applies a tag itself.

Use g2h_untagged on all pc values.  The only current user of
tags, aarch64, removes tags from code addresses upon branch,
so "pc" is always untagged.

Use g2h with the cpu context on hand wherever possible.

Use g2h_untagged in lock_user, which will be updated soon.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20210212184902.1251044-13-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2021-02-16 11:04:53 +00:00

444 lines
13 KiB
C

/*
* qemu user cpu loop
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "qemu.h"
#include "cpu_loop-common.h"
/***********************************************************/
/* CPUX86 core interface */
uint64_t cpu_get_tsc(CPUX86State *env)
{
return cpu_get_host_ticks();
}
static void write_dt(void *ptr, unsigned long addr, unsigned long limit,
int flags)
{
unsigned int e1, e2;
uint32_t *p;
e1 = (addr << 16) | (limit & 0xffff);
e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000);
e2 |= flags;
p = ptr;
p[0] = tswap32(e1);
p[1] = tswap32(e2);
}
static uint64_t *idt_table;
#ifdef TARGET_X86_64
static void set_gate64(void *ptr, unsigned int type, unsigned int dpl,
uint64_t addr, unsigned int sel)
{
uint32_t *p, e1, e2;
e1 = (addr & 0xffff) | (sel << 16);
e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
p = ptr;
p[0] = tswap32(e1);
p[1] = tswap32(e2);
p[2] = tswap32(addr >> 32);
p[3] = 0;
}
/* only dpl matters as we do only user space emulation */
static void set_idt(int n, unsigned int dpl)
{
set_gate64(idt_table + n * 2, 0, dpl, 0, 0);
}
#else
static void set_gate(void *ptr, unsigned int type, unsigned int dpl,
uint32_t addr, unsigned int sel)
{
uint32_t *p, e1, e2;
e1 = (addr & 0xffff) | (sel << 16);
e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
p = ptr;
p[0] = tswap32(e1);
p[1] = tswap32(e2);
}
/* only dpl matters as we do only user space emulation */
static void set_idt(int n, unsigned int dpl)
{
set_gate(idt_table + n, 0, dpl, 0, 0);
}
#endif
static void gen_signal(CPUX86State *env, int sig, int code, abi_ptr addr)
{
target_siginfo_t info = {
.si_signo = sig,
.si_code = code,
._sifields._sigfault._addr = addr
};
queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
}
#ifdef TARGET_X86_64
static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len)
{
/*
* For all the vsyscalls, NULL means "don't write anything" not
* "write it at address 0".
*/
if (addr == 0 || access_ok(VERIFY_WRITE, addr, len)) {
return true;
}
env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK;
gen_signal(env, TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr);
return false;
}
/*
* Since v3.1, the kernel traps and emulates the vsyscall page.
* Entry points other than the official generate SIGSEGV.
*/
static void emulate_vsyscall(CPUX86State *env)
{
int syscall;
abi_ulong ret;
uint64_t caller;
/*
* Validate the entry point. We have already validated the page
* during translation to get here; now verify the offset.
*/
switch (env->eip & ~TARGET_PAGE_MASK) {
case 0x000:
syscall = TARGET_NR_gettimeofday;
break;
case 0x400:
syscall = TARGET_NR_time;
break;
case 0x800:
syscall = TARGET_NR_getcpu;
break;
default:
goto sigsegv;
}
/*
* Validate the return address.
* Note that the kernel treats this the same as an invalid entry point.
*/
if (get_user_u64(caller, env->regs[R_ESP])) {
goto sigsegv;
}
/*
* Validate the the pointer arguments.
*/
switch (syscall) {
case TARGET_NR_gettimeofday:
if (!write_ok_or_segv(env, env->regs[R_EDI],
sizeof(struct target_timeval)) ||
!write_ok_or_segv(env, env->regs[R_ESI],
sizeof(struct target_timezone))) {
return;
}
break;
case TARGET_NR_time:
if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) {
return;
}
break;
case TARGET_NR_getcpu:
if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) ||
!write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) {
return;
}
break;
default:
g_assert_not_reached();
}
/*
* Perform the syscall. None of the vsyscalls should need restarting.
*/
ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI],
env->regs[R_EDX], env->regs[10], env->regs[8],
env->regs[9], 0, 0);
g_assert(ret != -TARGET_ERESTARTSYS);
g_assert(ret != -TARGET_QEMU_ESIGRETURN);
if (ret == -TARGET_EFAULT) {
goto sigsegv;
}
env->regs[R_EAX] = ret;
/* Emulate a ret instruction to leave the vsyscall page. */
env->eip = caller;
env->regs[R_ESP] += 8;
return;
sigsegv:
/* Like force_sig(SIGSEGV). */
gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0);
}
#endif
void cpu_loop(CPUX86State *env)
{
CPUState *cs = env_cpu(env);
int trapnr;
abi_ulong pc;
abi_ulong ret;
for(;;) {
cpu_exec_start(cs);
trapnr = cpu_exec(cs);
cpu_exec_end(cs);
process_queued_cpu_work(cs);
switch(trapnr) {
case 0x80:
/* linux syscall from int $0x80 */
ret = do_syscall(env,
env->regs[R_EAX],
env->regs[R_EBX],
env->regs[R_ECX],
env->regs[R_EDX],
env->regs[R_ESI],
env->regs[R_EDI],
env->regs[R_EBP],
0, 0);
if (ret == -TARGET_ERESTARTSYS) {
env->eip -= 2;
} else if (ret != -TARGET_QEMU_ESIGRETURN) {
env->regs[R_EAX] = ret;
}
break;
#ifndef TARGET_ABI32
case EXCP_SYSCALL:
/* linux syscall from syscall instruction */
ret = do_syscall(env,
env->regs[R_EAX],
env->regs[R_EDI],
env->regs[R_ESI],
env->regs[R_EDX],
env->regs[10],
env->regs[8],
env->regs[9],
0, 0);
if (ret == -TARGET_ERESTARTSYS) {
env->eip -= 2;
} else if (ret != -TARGET_QEMU_ESIGRETURN) {
env->regs[R_EAX] = ret;
}
break;
#endif
#ifdef TARGET_X86_64
case EXCP_VSYSCALL:
emulate_vsyscall(env);
break;
#endif
case EXCP0B_NOSEG:
case EXCP0C_STACK:
gen_signal(env, TARGET_SIGBUS, TARGET_SI_KERNEL, 0);
break;
case EXCP0D_GPF:
/* XXX: potential problem if ABI32 */
#ifndef TARGET_X86_64
if (env->eflags & VM_MASK) {
handle_vm86_fault(env);
break;
}
#endif
gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0);
break;
case EXCP0E_PAGE:
gen_signal(env, TARGET_SIGSEGV,
(env->error_code & 1 ?
TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR),
env->cr[2]);
break;
case EXCP00_DIVZ:
#ifndef TARGET_X86_64
if (env->eflags & VM_MASK) {
handle_vm86_trap(env, trapnr);
break;
}
#endif
gen_signal(env, TARGET_SIGFPE, TARGET_FPE_INTDIV, env->eip);
break;
case EXCP01_DB:
case EXCP03_INT3:
#ifndef TARGET_X86_64
if (env->eflags & VM_MASK) {
handle_vm86_trap(env, trapnr);
break;
}
#endif
if (trapnr == EXCP01_DB) {
gen_signal(env, TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip);
} else {
gen_signal(env, TARGET_SIGTRAP, TARGET_SI_KERNEL, 0);
}
break;
case EXCP04_INTO:
case EXCP05_BOUND:
#ifndef TARGET_X86_64
if (env->eflags & VM_MASK) {
handle_vm86_trap(env, trapnr);
break;
}
#endif
gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0);
break;
case EXCP06_ILLOP:
gen_signal(env, TARGET_SIGILL, TARGET_ILL_ILLOPN, env->eip);
break;
case EXCP_INTERRUPT:
/* just indicate that signals should be handled asap */
break;
case EXCP_DEBUG:
gen_signal(env, TARGET_SIGTRAP, TARGET_TRAP_BRKPT, 0);
break;
case EXCP_ATOMIC:
cpu_exec_step_atomic(cs);
break;
default:
pc = env->segs[R_CS].base + env->eip;
EXCP_DUMP(env, "qemu: 0x%08lx: unhandled CPU exception 0x%x - aborting\n",
(long)pc, trapnr);
abort();
}
process_pending_signals(env);
}
}
void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs)
{
env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
env->hflags |= HF_PE_MASK | HF_CPL_MASK;
if (env->features[FEAT_1_EDX] & CPUID_SSE) {
env->cr[4] |= CR4_OSFXSR_MASK;
env->hflags |= HF_OSFXSR_MASK;
}
#ifndef TARGET_ABI32
/* enable 64 bit mode if possible */
if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) {
fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n");
exit(EXIT_FAILURE);
}
env->cr[4] |= CR4_PAE_MASK;
env->efer |= MSR_EFER_LMA | MSR_EFER_LME;
env->hflags |= HF_LMA_MASK;
#endif
/* flags setup : we activate the IRQs by default as in user mode */
env->eflags |= IF_MASK;
/* linux register setup */
#ifndef TARGET_ABI32
env->regs[R_EAX] = regs->rax;
env->regs[R_EBX] = regs->rbx;
env->regs[R_ECX] = regs->rcx;
env->regs[R_EDX] = regs->rdx;
env->regs[R_ESI] = regs->rsi;
env->regs[R_EDI] = regs->rdi;
env->regs[R_EBP] = regs->rbp;
env->regs[R_ESP] = regs->rsp;
env->eip = regs->rip;
#else
env->regs[R_EAX] = regs->eax;
env->regs[R_EBX] = regs->ebx;
env->regs[R_ECX] = regs->ecx;
env->regs[R_EDX] = regs->edx;
env->regs[R_ESI] = regs->esi;
env->regs[R_EDI] = regs->edi;
env->regs[R_EBP] = regs->ebp;
env->regs[R_ESP] = regs->esp;
env->eip = regs->eip;
#endif
/* linux interrupt setup */
#ifndef TARGET_ABI32
env->idt.limit = 511;
#else
env->idt.limit = 255;
#endif
env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1),
PROT_READ|PROT_WRITE,
MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
idt_table = g2h_untagged(env->idt.base);
set_idt(0, 0);
set_idt(1, 0);
set_idt(2, 0);
set_idt(3, 3);
set_idt(4, 3);
set_idt(5, 0);
set_idt(6, 0);
set_idt(7, 0);
set_idt(8, 0);
set_idt(9, 0);
set_idt(10, 0);
set_idt(11, 0);
set_idt(12, 0);
set_idt(13, 0);
set_idt(14, 0);
set_idt(15, 0);
set_idt(16, 0);
set_idt(17, 0);
set_idt(18, 0);
set_idt(19, 0);
set_idt(0x80, 3);
/* linux segment setup */
{
uint64_t *gdt_table;
env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES,
PROT_READ|PROT_WRITE,
MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1;
gdt_table = g2h_untagged(env->gdt.base);
#ifdef TARGET_ABI32
write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
(3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
#else
/* 64 bit code segment */
write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
DESC_L_MASK |
(3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
#endif
write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff,
DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
(3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT));
}
cpu_x86_load_seg(env, R_CS, __USER_CS);
cpu_x86_load_seg(env, R_SS, __USER_DS);
#ifdef TARGET_ABI32
cpu_x86_load_seg(env, R_DS, __USER_DS);
cpu_x86_load_seg(env, R_ES, __USER_DS);
cpu_x86_load_seg(env, R_FS, __USER_DS);
cpu_x86_load_seg(env, R_GS, __USER_DS);
/* This hack makes Wine work... */
env->segs[R_FS].selector = 0;
#else
cpu_x86_load_seg(env, R_DS, 0);
cpu_x86_load_seg(env, R_ES, 0);
cpu_x86_load_seg(env, R_FS, 0);
cpu_x86_load_seg(env, R_GS, 0);
#endif
}