qemu/target/i386/misc_helper.c

648 lines
17 KiB
C
Raw Normal View History

/*
* x86 misc helpers
*
* Copyright (c) 2003 Fabrice Bellard
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "qemu/main-loop.h"
#include "cpu.h"
#include "exec/helper-proto.h"
#include "exec/exec-all.h"
#include "exec/cpu_ldst.h"
#include "exec/address-spaces.h"
void helper_outb(CPUX86State *env, uint32_t port, uint32_t data)
{
#ifdef CONFIG_USER_ONLY
fprintf(stderr, "outb: port=0x%04x, data=%02x\n", port, data);
#else
address_space_stb(&address_space_io, port, data,
cpu_get_mem_attrs(env), NULL);
#endif
}
target_ulong helper_inb(CPUX86State *env, uint32_t port)
{
#ifdef CONFIG_USER_ONLY
fprintf(stderr, "inb: port=0x%04x\n", port);
return 0;
#else
return address_space_ldub(&address_space_io, port,
cpu_get_mem_attrs(env), NULL);
#endif
}
void helper_outw(CPUX86State *env, uint32_t port, uint32_t data)
{
#ifdef CONFIG_USER_ONLY
fprintf(stderr, "outw: port=0x%04x, data=%04x\n", port, data);
#else
address_space_stw(&address_space_io, port, data,
cpu_get_mem_attrs(env), NULL);
#endif
}
target_ulong helper_inw(CPUX86State *env, uint32_t port)
{
#ifdef CONFIG_USER_ONLY
fprintf(stderr, "inw: port=0x%04x\n", port);
return 0;
#else
return address_space_lduw(&address_space_io, port,
cpu_get_mem_attrs(env), NULL);
#endif
}
void helper_outl(CPUX86State *env, uint32_t port, uint32_t data)
{
#ifdef CONFIG_USER_ONLY
fprintf(stderr, "outl: port=0x%04x, data=%08x\n", port, data);
#else
address_space_stl(&address_space_io, port, data,
cpu_get_mem_attrs(env), NULL);
#endif
}
target_ulong helper_inl(CPUX86State *env, uint32_t port)
{
#ifdef CONFIG_USER_ONLY
fprintf(stderr, "inl: port=0x%04x\n", port);
return 0;
#else
return address_space_ldl(&address_space_io, port,
cpu_get_mem_attrs(env), NULL);
#endif
}
void helper_into(CPUX86State *env, int next_eip_addend)
{
int eflags;
eflags = cpu_cc_compute_all(env, CC_OP);
if (eflags & CC_O) {
raise_interrupt(env, EXCP04_INTO, 1, 0, next_eip_addend);
}
}
void helper_cpuid(CPUX86State *env)
{
uint32_t eax, ebx, ecx, edx;
cpu_svm_check_intercept_param(env, SVM_EXIT_CPUID, 0, GETPC());
cpu_x86_cpuid(env, (uint32_t)env->regs[R_EAX], (uint32_t)env->regs[R_ECX],
&eax, &ebx, &ecx, &edx);
env->regs[R_EAX] = eax;
env->regs[R_EBX] = ebx;
env->regs[R_ECX] = ecx;
env->regs[R_EDX] = edx;
}
#if defined(CONFIG_USER_ONLY)
target_ulong helper_read_crN(CPUX86State *env, int reg)
{
return 0;
}
void helper_write_crN(CPUX86State *env, int reg, target_ulong t0)
{
}
#else
target_ulong helper_read_crN(CPUX86State *env, int reg)
{
target_ulong val;
cpu_svm_check_intercept_param(env, SVM_EXIT_READ_CR0 + reg, 0, GETPC());
switch (reg) {
default:
val = env->cr[reg];
break;
case 8:
if (!(env->hflags2 & HF2_VINTR_MASK)) {
val = cpu_get_apic_tpr(env_archcpu(env)->apic_state);
} else {
val = env->v_tpr;
}
break;
}
return val;
}
void helper_write_crN(CPUX86State *env, int reg, target_ulong t0)
{
cpu_svm_check_intercept_param(env, SVM_EXIT_WRITE_CR0 + reg, 0, GETPC());
switch (reg) {
case 0:
cpu_x86_update_cr0(env, t0);
break;
case 3:
cpu_x86_update_cr3(env, t0);
break;
case 4:
cpu_x86_update_cr4(env, t0);
break;
case 8:
if (!(env->hflags2 & HF2_VINTR_MASK)) {
qemu_mutex_lock_iothread();
cpu_set_apic_tpr(env_archcpu(env)->apic_state, t0);
qemu_mutex_unlock_iothread();
}
env->v_tpr = t0 & 0x0f;
break;
default:
env->cr[reg] = t0;
break;
}
}
#endif
void helper_lmsw(CPUX86State *env, target_ulong t0)
{
/* only 4 lower bits of CR0 are modified. PE cannot be set to zero
if already set to one. */
t0 = (env->cr[0] & ~0xe) | (t0 & 0xf);
helper_write_crN(env, 0, t0);
}
void helper_invlpg(CPUX86State *env, target_ulong addr)
{
X86CPU *cpu = env_archcpu(env);
cpu_svm_check_intercept_param(env, SVM_EXIT_INVLPG, 0, GETPC());
tlb_flush_page(CPU(cpu), addr);
}
void helper_rdtsc(CPUX86State *env)
{
uint64_t val;
if ((env->cr[4] & CR4_TSD_MASK) && ((env->hflags & HF_CPL_MASK) != 0)) {
raise_exception_ra(env, EXCP0D_GPF, GETPC());
}
cpu_svm_check_intercept_param(env, SVM_EXIT_RDTSC, 0, GETPC());
val = cpu_get_tsc(env) + env->tsc_offset;
env->regs[R_EAX] = (uint32_t)(val);
env->regs[R_EDX] = (uint32_t)(val >> 32);
}
void helper_rdtscp(CPUX86State *env)
{
helper_rdtsc(env);
env->regs[R_ECX] = (uint32_t)(env->tsc_aux);
}
void helper_rdpmc(CPUX86State *env)
{
if ((env->cr[4] & CR4_PCE_MASK) && ((env->hflags & HF_CPL_MASK) != 0)) {
raise_exception_ra(env, EXCP0D_GPF, GETPC());
}
cpu_svm_check_intercept_param(env, SVM_EXIT_RDPMC, 0, GETPC());
/* currently unimplemented */
qemu_log_mask(LOG_UNIMP, "x86: unimplemented rdpmc\n");
raise_exception_err(env, EXCP06_ILLOP, 0);
}
#if defined(CONFIG_USER_ONLY)
void helper_wrmsr(CPUX86State *env)
{
}
void helper_rdmsr(CPUX86State *env)
{
}
#else
void helper_wrmsr(CPUX86State *env)
{
uint64_t val;
cpu_svm_check_intercept_param(env, SVM_EXIT_MSR, 1, GETPC());
val = ((uint32_t)env->regs[R_EAX]) |
((uint64_t)((uint32_t)env->regs[R_EDX]) << 32);
switch ((uint32_t)env->regs[R_ECX]) {
case MSR_IA32_SYSENTER_CS:
env->sysenter_cs = val & 0xffff;
break;
case MSR_IA32_SYSENTER_ESP:
env->sysenter_esp = val;
break;
case MSR_IA32_SYSENTER_EIP:
env->sysenter_eip = val;
break;
case MSR_IA32_APICBASE:
cpu_set_apic_base(env_archcpu(env)->apic_state, val);
break;
case MSR_EFER:
{
uint64_t update_mask;
update_mask = 0;
if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_SYSCALL) {
update_mask |= MSR_EFER_SCE;
}
if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) {
update_mask |= MSR_EFER_LME;
}
if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_FFXSR) {
update_mask |= MSR_EFER_FFXSR;
}
if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_NX) {
update_mask |= MSR_EFER_NXE;
}
if (env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_SVM) {
update_mask |= MSR_EFER_SVME;
}
if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_FFXSR) {
update_mask |= MSR_EFER_FFXSR;
}
cpu_load_efer(env, (env->efer & ~update_mask) |
(val & update_mask));
}
break;
case MSR_STAR:
env->star = val;
break;
case MSR_PAT:
env->pat = val;
break;
case MSR_VM_HSAVE_PA:
env->vm_hsave = val;
break;
#ifdef TARGET_X86_64
case MSR_LSTAR:
env->lstar = val;
break;
case MSR_CSTAR:
env->cstar = val;
break;
case MSR_FMASK:
env->fmask = val;
break;
case MSR_FSBASE:
env->segs[R_FS].base = val;
break;
case MSR_GSBASE:
env->segs[R_GS].base = val;
break;
case MSR_KERNELGSBASE:
env->kernelgsbase = val;
break;
#endif
case MSR_MTRRphysBase(0):
case MSR_MTRRphysBase(1):
case MSR_MTRRphysBase(2):
case MSR_MTRRphysBase(3):
case MSR_MTRRphysBase(4):
case MSR_MTRRphysBase(5):
case MSR_MTRRphysBase(6):
case MSR_MTRRphysBase(7):
env->mtrr_var[((uint32_t)env->regs[R_ECX] -
MSR_MTRRphysBase(0)) / 2].base = val;
break;
case MSR_MTRRphysMask(0):
case MSR_MTRRphysMask(1):
case MSR_MTRRphysMask(2):
case MSR_MTRRphysMask(3):
case MSR_MTRRphysMask(4):
case MSR_MTRRphysMask(5):
case MSR_MTRRphysMask(6):
case MSR_MTRRphysMask(7):
env->mtrr_var[((uint32_t)env->regs[R_ECX] -
MSR_MTRRphysMask(0)) / 2].mask = val;
break;
case MSR_MTRRfix64K_00000:
env->mtrr_fixed[(uint32_t)env->regs[R_ECX] -
MSR_MTRRfix64K_00000] = val;
break;
case MSR_MTRRfix16K_80000:
case MSR_MTRRfix16K_A0000:
env->mtrr_fixed[(uint32_t)env->regs[R_ECX] -
MSR_MTRRfix16K_80000 + 1] = val;
break;
case MSR_MTRRfix4K_C0000:
case MSR_MTRRfix4K_C8000:
case MSR_MTRRfix4K_D0000:
case MSR_MTRRfix4K_D8000:
case MSR_MTRRfix4K_E0000:
case MSR_MTRRfix4K_E8000:
case MSR_MTRRfix4K_F0000:
case MSR_MTRRfix4K_F8000:
env->mtrr_fixed[(uint32_t)env->regs[R_ECX] -
MSR_MTRRfix4K_C0000 + 3] = val;
break;
case MSR_MTRRdefType:
env->mtrr_deftype = val;
break;
case MSR_MCG_STATUS:
env->mcg_status = val;
break;
case MSR_MCG_CTL:
if ((env->mcg_cap & MCG_CTL_P)
&& (val == 0 || val == ~(uint64_t)0)) {
env->mcg_ctl = val;
}
break;
case MSR_TSC_AUX:
env->tsc_aux = val;
break;
case MSR_IA32_MISC_ENABLE:
env->msr_ia32_misc_enable = val;
break;
case MSR_IA32_BNDCFGS:
/* FIXME: #GP if reserved bits are set. */
/* FIXME: Extend highest implemented bit of linear address. */
env->msr_bndcfgs = val;
cpu_sync_bndcs_hflags(env);
break;
default:
if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL
&& (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL +
(4 * env->mcg_cap & 0xff)) {
uint32_t offset = (uint32_t)env->regs[R_ECX] - MSR_MC0_CTL;
if ((offset & 0x3) != 0
|| (val == 0 || val == ~(uint64_t)0)) {
env->mce_banks[offset] = val;
}
break;
}
/* XXX: exception? */
break;
}
}
void helper_rdmsr(CPUX86State *env)
{
X86CPU *x86_cpu = env_archcpu(env);
uint64_t val;
cpu_svm_check_intercept_param(env, SVM_EXIT_MSR, 0, GETPC());
switch ((uint32_t)env->regs[R_ECX]) {
case MSR_IA32_SYSENTER_CS:
val = env->sysenter_cs;
break;
case MSR_IA32_SYSENTER_ESP:
val = env->sysenter_esp;
break;
case MSR_IA32_SYSENTER_EIP:
val = env->sysenter_eip;
break;
case MSR_IA32_APICBASE:
val = cpu_get_apic_base(env_archcpu(env)->apic_state);
break;
case MSR_EFER:
val = env->efer;
break;
case MSR_STAR:
val = env->star;
break;
case MSR_PAT:
val = env->pat;
break;
case MSR_VM_HSAVE_PA:
val = env->vm_hsave;
break;
case MSR_IA32_PERF_STATUS:
/* tsc_increment_by_tick */
val = 1000ULL;
/* CPU multiplier */
val |= (((uint64_t)4ULL) << 40);
break;
#ifdef TARGET_X86_64
case MSR_LSTAR:
val = env->lstar;
break;
case MSR_CSTAR:
val = env->cstar;
break;
case MSR_FMASK:
val = env->fmask;
break;
case MSR_FSBASE:
val = env->segs[R_FS].base;
break;
case MSR_GSBASE:
val = env->segs[R_GS].base;
break;
case MSR_KERNELGSBASE:
val = env->kernelgsbase;
break;
case MSR_TSC_AUX:
val = env->tsc_aux;
break;
#endif
case MSR_SMI_COUNT:
val = env->msr_smi_count;
break;
case MSR_MTRRphysBase(0):
case MSR_MTRRphysBase(1):
case MSR_MTRRphysBase(2):
case MSR_MTRRphysBase(3):
case MSR_MTRRphysBase(4):
case MSR_MTRRphysBase(5):
case MSR_MTRRphysBase(6):
case MSR_MTRRphysBase(7):
val = env->mtrr_var[((uint32_t)env->regs[R_ECX] -
MSR_MTRRphysBase(0)) / 2].base;
break;
case MSR_MTRRphysMask(0):
case MSR_MTRRphysMask(1):
case MSR_MTRRphysMask(2):
case MSR_MTRRphysMask(3):
case MSR_MTRRphysMask(4):
case MSR_MTRRphysMask(5):
case MSR_MTRRphysMask(6):
case MSR_MTRRphysMask(7):
val = env->mtrr_var[((uint32_t)env->regs[R_ECX] -
MSR_MTRRphysMask(0)) / 2].mask;
break;
case MSR_MTRRfix64K_00000:
val = env->mtrr_fixed[0];
break;
case MSR_MTRRfix16K_80000:
case MSR_MTRRfix16K_A0000:
val = env->mtrr_fixed[(uint32_t)env->regs[R_ECX] -
MSR_MTRRfix16K_80000 + 1];
break;
case MSR_MTRRfix4K_C0000:
case MSR_MTRRfix4K_C8000:
case MSR_MTRRfix4K_D0000:
case MSR_MTRRfix4K_D8000:
case MSR_MTRRfix4K_E0000:
case MSR_MTRRfix4K_E8000:
case MSR_MTRRfix4K_F0000:
case MSR_MTRRfix4K_F8000:
val = env->mtrr_fixed[(uint32_t)env->regs[R_ECX] -
MSR_MTRRfix4K_C0000 + 3];
break;
case MSR_MTRRdefType:
val = env->mtrr_deftype;
break;
case MSR_MTRRcap:
if (env->features[FEAT_1_EDX] & CPUID_MTRR) {
val = MSR_MTRRcap_VCNT | MSR_MTRRcap_FIXRANGE_SUPPORT |
MSR_MTRRcap_WC_SUPPORTED;
} else {
/* XXX: exception? */
val = 0;
}
break;
case MSR_MCG_CAP:
val = env->mcg_cap;
break;
case MSR_MCG_CTL:
if (env->mcg_cap & MCG_CTL_P) {
val = env->mcg_ctl;
} else {
val = 0;
}
break;
case MSR_MCG_STATUS:
val = env->mcg_status;
break;
case MSR_IA32_MISC_ENABLE:
val = env->msr_ia32_misc_enable;
break;
case MSR_IA32_BNDCFGS:
val = env->msr_bndcfgs;
break;
case MSR_IA32_UCODE_REV:
val = x86_cpu->ucode_rev;
break;
default:
if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL
&& (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL +
(4 * env->mcg_cap & 0xff)) {
uint32_t offset = (uint32_t)env->regs[R_ECX] - MSR_MC0_CTL;
val = env->mce_banks[offset];
break;
}
/* XXX: exception? */
val = 0;
break;
}
env->regs[R_EAX] = (uint32_t)(val);
env->regs[R_EDX] = (uint32_t)(val >> 32);
}
#endif
target-i386: yield to another VCPU on PAUSE After commit b1bbfe7 (aio / timers: On timer modification, qemu_notify or aio_notify, 2013-08-21) FreeBSD guests report a huge slowdown. The problem shows up as soon as FreeBSD turns out its periodic (~1 ms) tick, but the timers are only the trigger for a pre-existing problem. Before the offending patch, setting a timer did a timer_settime system call. After, setting the timer exits the event loop (which uses poll) and reenters it with a new deadline. This does not cause any slowdown; the difference is between one system call (timer_settime and a signal delivery (SIGALRM) before the patch, and two system calls afterwards (write to a pipe or eventfd + calling poll again when re-entering the event loop). Unfortunately, the exit/enter causes the main loop to grab the iothread lock, which in turns kicks the VCPU thread out of execution. This causes TCG to execute the next VCPU in its round-robin scheduling of VCPUS. When the second VCPU is mostly unused, FreeBSD runs a "pause" instruction in its idle loop which only burns cycles without any progress. As soon as the timer tick expires, the first VCPU runs the interrupt handler but very soon it sets it again---and QEMU then goes back doing nothing in the second VCPU. The fix is to make the pause instruction do "cpu_loop_exit". Cc: Richard Henderson <rth@twiddle.net> Reported-by: Luigi Rizzo <rizzo@iet.unipi.it> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Reviewed-by: Richard Henderson <rth@twiddle.net> Message-id: 1384948442-24217-1-git-send-email-pbonzini@redhat.com Signed-off-by: Anthony Liguori <aliguori@amazon.com>
2013-11-20 15:54:02 +04:00
static void do_pause(X86CPU *cpu)
{
CPUState *cs = CPU(cpu);
target-i386: yield to another VCPU on PAUSE After commit b1bbfe7 (aio / timers: On timer modification, qemu_notify or aio_notify, 2013-08-21) FreeBSD guests report a huge slowdown. The problem shows up as soon as FreeBSD turns out its periodic (~1 ms) tick, but the timers are only the trigger for a pre-existing problem. Before the offending patch, setting a timer did a timer_settime system call. After, setting the timer exits the event loop (which uses poll) and reenters it with a new deadline. This does not cause any slowdown; the difference is between one system call (timer_settime and a signal delivery (SIGALRM) before the patch, and two system calls afterwards (write to a pipe or eventfd + calling poll again when re-entering the event loop). Unfortunately, the exit/enter causes the main loop to grab the iothread lock, which in turns kicks the VCPU thread out of execution. This causes TCG to execute the next VCPU in its round-robin scheduling of VCPUS. When the second VCPU is mostly unused, FreeBSD runs a "pause" instruction in its idle loop which only burns cycles without any progress. As soon as the timer tick expires, the first VCPU runs the interrupt handler but very soon it sets it again---and QEMU then goes back doing nothing in the second VCPU. The fix is to make the pause instruction do "cpu_loop_exit". Cc: Richard Henderson <rth@twiddle.net> Reported-by: Luigi Rizzo <rizzo@iet.unipi.it> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Reviewed-by: Richard Henderson <rth@twiddle.net> Message-id: 1384948442-24217-1-git-send-email-pbonzini@redhat.com Signed-off-by: Anthony Liguori <aliguori@amazon.com>
2013-11-20 15:54:02 +04:00
/* Just let another CPU run. */
cs->exception_index = EXCP_INTERRUPT;
cpu_loop_exit(cs);
target-i386: yield to another VCPU on PAUSE After commit b1bbfe7 (aio / timers: On timer modification, qemu_notify or aio_notify, 2013-08-21) FreeBSD guests report a huge slowdown. The problem shows up as soon as FreeBSD turns out its periodic (~1 ms) tick, but the timers are only the trigger for a pre-existing problem. Before the offending patch, setting a timer did a timer_settime system call. After, setting the timer exits the event loop (which uses poll) and reenters it with a new deadline. This does not cause any slowdown; the difference is between one system call (timer_settime and a signal delivery (SIGALRM) before the patch, and two system calls afterwards (write to a pipe or eventfd + calling poll again when re-entering the event loop). Unfortunately, the exit/enter causes the main loop to grab the iothread lock, which in turns kicks the VCPU thread out of execution. This causes TCG to execute the next VCPU in its round-robin scheduling of VCPUS. When the second VCPU is mostly unused, FreeBSD runs a "pause" instruction in its idle loop which only burns cycles without any progress. As soon as the timer tick expires, the first VCPU runs the interrupt handler but very soon it sets it again---and QEMU then goes back doing nothing in the second VCPU. The fix is to make the pause instruction do "cpu_loop_exit". Cc: Richard Henderson <rth@twiddle.net> Reported-by: Luigi Rizzo <rizzo@iet.unipi.it> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Reviewed-by: Richard Henderson <rth@twiddle.net> Message-id: 1384948442-24217-1-git-send-email-pbonzini@redhat.com Signed-off-by: Anthony Liguori <aliguori@amazon.com>
2013-11-20 15:54:02 +04:00
}
static void do_hlt(X86CPU *cpu)
{
CPUState *cs = CPU(cpu);
CPUX86State *env = &cpu->env;
env->hflags &= ~HF_INHIBIT_IRQ_MASK; /* needed if sti is just before */
cs->halted = 1;
cs->exception_index = EXCP_HLT;
cpu_loop_exit(cs);
}
void helper_hlt(CPUX86State *env, int next_eip_addend)
{
X86CPU *cpu = env_archcpu(env);
cpu_svm_check_intercept_param(env, SVM_EXIT_HLT, 0, GETPC());
env->eip += next_eip_addend;
do_hlt(cpu);
}
void helper_monitor(CPUX86State *env, target_ulong ptr)
{
if ((uint32_t)env->regs[R_ECX] != 0) {
raise_exception_ra(env, EXCP0D_GPF, GETPC());
}
/* XXX: store address? */
cpu_svm_check_intercept_param(env, SVM_EXIT_MONITOR, 0, GETPC());
}
void helper_mwait(CPUX86State *env, int next_eip_addend)
{
CPUState *cs = env_cpu(env);
X86CPU *cpu = env_archcpu(env);
if ((uint32_t)env->regs[R_ECX] != 0) {
raise_exception_ra(env, EXCP0D_GPF, GETPC());
}
cpu_svm_check_intercept_param(env, SVM_EXIT_MWAIT, 0, GETPC());
env->eip += next_eip_addend;
/* XXX: not complete but not completely erroneous */
if (cs->cpu_index != 0 || CPU_NEXT(cs) != NULL) {
target-i386: yield to another VCPU on PAUSE After commit b1bbfe7 (aio / timers: On timer modification, qemu_notify or aio_notify, 2013-08-21) FreeBSD guests report a huge slowdown. The problem shows up as soon as FreeBSD turns out its periodic (~1 ms) tick, but the timers are only the trigger for a pre-existing problem. Before the offending patch, setting a timer did a timer_settime system call. After, setting the timer exits the event loop (which uses poll) and reenters it with a new deadline. This does not cause any slowdown; the difference is between one system call (timer_settime and a signal delivery (SIGALRM) before the patch, and two system calls afterwards (write to a pipe or eventfd + calling poll again when re-entering the event loop). Unfortunately, the exit/enter causes the main loop to grab the iothread lock, which in turns kicks the VCPU thread out of execution. This causes TCG to execute the next VCPU in its round-robin scheduling of VCPUS. When the second VCPU is mostly unused, FreeBSD runs a "pause" instruction in its idle loop which only burns cycles without any progress. As soon as the timer tick expires, the first VCPU runs the interrupt handler but very soon it sets it again---and QEMU then goes back doing nothing in the second VCPU. The fix is to make the pause instruction do "cpu_loop_exit". Cc: Richard Henderson <rth@twiddle.net> Reported-by: Luigi Rizzo <rizzo@iet.unipi.it> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Reviewed-by: Richard Henderson <rth@twiddle.net> Message-id: 1384948442-24217-1-git-send-email-pbonzini@redhat.com Signed-off-by: Anthony Liguori <aliguori@amazon.com>
2013-11-20 15:54:02 +04:00
do_pause(cpu);
} else {
do_hlt(cpu);
}
}
target-i386: yield to another VCPU on PAUSE After commit b1bbfe7 (aio / timers: On timer modification, qemu_notify or aio_notify, 2013-08-21) FreeBSD guests report a huge slowdown. The problem shows up as soon as FreeBSD turns out its periodic (~1 ms) tick, but the timers are only the trigger for a pre-existing problem. Before the offending patch, setting a timer did a timer_settime system call. After, setting the timer exits the event loop (which uses poll) and reenters it with a new deadline. This does not cause any slowdown; the difference is between one system call (timer_settime and a signal delivery (SIGALRM) before the patch, and two system calls afterwards (write to a pipe or eventfd + calling poll again when re-entering the event loop). Unfortunately, the exit/enter causes the main loop to grab the iothread lock, which in turns kicks the VCPU thread out of execution. This causes TCG to execute the next VCPU in its round-robin scheduling of VCPUS. When the second VCPU is mostly unused, FreeBSD runs a "pause" instruction in its idle loop which only burns cycles without any progress. As soon as the timer tick expires, the first VCPU runs the interrupt handler but very soon it sets it again---and QEMU then goes back doing nothing in the second VCPU. The fix is to make the pause instruction do "cpu_loop_exit". Cc: Richard Henderson <rth@twiddle.net> Reported-by: Luigi Rizzo <rizzo@iet.unipi.it> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Reviewed-by: Richard Henderson <rth@twiddle.net> Message-id: 1384948442-24217-1-git-send-email-pbonzini@redhat.com Signed-off-by: Anthony Liguori <aliguori@amazon.com>
2013-11-20 15:54:02 +04:00
void helper_pause(CPUX86State *env, int next_eip_addend)
{
X86CPU *cpu = env_archcpu(env);
target-i386: yield to another VCPU on PAUSE After commit b1bbfe7 (aio / timers: On timer modification, qemu_notify or aio_notify, 2013-08-21) FreeBSD guests report a huge slowdown. The problem shows up as soon as FreeBSD turns out its periodic (~1 ms) tick, but the timers are only the trigger for a pre-existing problem. Before the offending patch, setting a timer did a timer_settime system call. After, setting the timer exits the event loop (which uses poll) and reenters it with a new deadline. This does not cause any slowdown; the difference is between one system call (timer_settime and a signal delivery (SIGALRM) before the patch, and two system calls afterwards (write to a pipe or eventfd + calling poll again when re-entering the event loop). Unfortunately, the exit/enter causes the main loop to grab the iothread lock, which in turns kicks the VCPU thread out of execution. This causes TCG to execute the next VCPU in its round-robin scheduling of VCPUS. When the second VCPU is mostly unused, FreeBSD runs a "pause" instruction in its idle loop which only burns cycles without any progress. As soon as the timer tick expires, the first VCPU runs the interrupt handler but very soon it sets it again---and QEMU then goes back doing nothing in the second VCPU. The fix is to make the pause instruction do "cpu_loop_exit". Cc: Richard Henderson <rth@twiddle.net> Reported-by: Luigi Rizzo <rizzo@iet.unipi.it> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Reviewed-by: Richard Henderson <rth@twiddle.net> Message-id: 1384948442-24217-1-git-send-email-pbonzini@redhat.com Signed-off-by: Anthony Liguori <aliguori@amazon.com>
2013-11-20 15:54:02 +04:00
cpu_svm_check_intercept_param(env, SVM_EXIT_PAUSE, 0, GETPC());
target-i386: yield to another VCPU on PAUSE After commit b1bbfe7 (aio / timers: On timer modification, qemu_notify or aio_notify, 2013-08-21) FreeBSD guests report a huge slowdown. The problem shows up as soon as FreeBSD turns out its periodic (~1 ms) tick, but the timers are only the trigger for a pre-existing problem. Before the offending patch, setting a timer did a timer_settime system call. After, setting the timer exits the event loop (which uses poll) and reenters it with a new deadline. This does not cause any slowdown; the difference is between one system call (timer_settime and a signal delivery (SIGALRM) before the patch, and two system calls afterwards (write to a pipe or eventfd + calling poll again when re-entering the event loop). Unfortunately, the exit/enter causes the main loop to grab the iothread lock, which in turns kicks the VCPU thread out of execution. This causes TCG to execute the next VCPU in its round-robin scheduling of VCPUS. When the second VCPU is mostly unused, FreeBSD runs a "pause" instruction in its idle loop which only burns cycles without any progress. As soon as the timer tick expires, the first VCPU runs the interrupt handler but very soon it sets it again---and QEMU then goes back doing nothing in the second VCPU. The fix is to make the pause instruction do "cpu_loop_exit". Cc: Richard Henderson <rth@twiddle.net> Reported-by: Luigi Rizzo <rizzo@iet.unipi.it> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Reviewed-by: Richard Henderson <rth@twiddle.net> Message-id: 1384948442-24217-1-git-send-email-pbonzini@redhat.com Signed-off-by: Anthony Liguori <aliguori@amazon.com>
2013-11-20 15:54:02 +04:00
env->eip += next_eip_addend;
do_pause(cpu);
}
void helper_debug(CPUX86State *env)
{
CPUState *cs = env_cpu(env);
cs->exception_index = EXCP_DEBUG;
cpu_loop_exit(cs);
}
uint64_t helper_rdpkru(CPUX86State *env, uint32_t ecx)
{
if ((env->cr[4] & CR4_PKE_MASK) == 0) {
raise_exception_err_ra(env, EXCP06_ILLOP, 0, GETPC());
}
if (ecx != 0) {
raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC());
}
return env->pkru;
}
void helper_wrpkru(CPUX86State *env, uint32_t ecx, uint64_t val)
{
CPUState *cs = env_cpu(env);
if ((env->cr[4] & CR4_PKE_MASK) == 0) {
raise_exception_err_ra(env, EXCP06_ILLOP, 0, GETPC());
}
if (ecx != 0 || (val & 0xFFFFFFFF00000000ull)) {
raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC());
}
env->pkru = val;
tlb_flush(cs);
}