* ac97 cleanups (Zoltan)
* default the amount of prealloc-threads to smp-cpus (Jaroslav) * fix disabling MPX on "-cpu host" with MPX-capable host (Maciej) * thread-pool performance optimizations (myself) * Hyper-V enlightenment enabling and docs (Vitaly) * check ELF header in elf2dmp (Viktor) * tweak LBREn migration (Weijiang) -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmKOgwgUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroOO3Qf7Btcvr2ex9qZ1yThlmZ6hl20WvQZe GlKBq5xJnx2FUpvrH/AiNl2qfiBN5emhzJp1oBieQusDDsWVblmRpWgzUkUZvh0H s5rKsNuOPdhqaxLH4sRCXS2FCVOy81d+lc9yYe5bzy3EHDO/qzMjye+JoBhXtQve 3gOcOb1srIB/xSGNur2iCJkcauhBOipOo77kryfWekfReA3glHGnwhuEO+F+gXT3 hiEO6TuRHjVrVCExbsDJb2pV2sSH6FxOP09BZ84IT0puv/FfgnUGCiNVfVNmMgNq KYysG7vPlRSaDX17bt3UlS4Y6yKb1vZpnvymRRkWxWLIfuAVVNm0vgHBpg== =gX2j -----END PGP SIGNATURE----- Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging * ac97 cleanups (Zoltan) * default the amount of prealloc-threads to smp-cpus (Jaroslav) * fix disabling MPX on "-cpu host" with MPX-capable host (Maciej) * thread-pool performance optimizations (myself) * Hyper-V enlightenment enabling and docs (Vitaly) * check ELF header in elf2dmp (Viktor) * tweak LBREn migration (Weijiang) # -----BEGIN PGP SIGNATURE----- # # iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmKOgwgUHHBib256aW5p # QHJlZGhhdC5jb20ACgkQv/vSX3jHroOO3Qf7Btcvr2ex9qZ1yThlmZ6hl20WvQZe # GlKBq5xJnx2FUpvrH/AiNl2qfiBN5emhzJp1oBieQusDDsWVblmRpWgzUkUZvh0H # s5rKsNuOPdhqaxLH4sRCXS2FCVOy81d+lc9yYe5bzy3EHDO/qzMjye+JoBhXtQve # 3gOcOb1srIB/xSGNur2iCJkcauhBOipOo77kryfWekfReA3glHGnwhuEO+F+gXT3 # hiEO6TuRHjVrVCExbsDJb2pV2sSH6FxOP09BZ84IT0puv/FfgnUGCiNVfVNmMgNq # KYysG7vPlRSaDX17bt3UlS4Y6yKb1vZpnvymRRkWxWLIfuAVVNm0vgHBpg== # =gX2j # -----END PGP SIGNATURE----- # gpg: Signature made Wed 25 May 2022 12:27:04 PM PDT # gpg: using RSA key F13338574B662389866C7682BFFBD25F78C7AE83 # gpg: issuer "pbonzini@redhat.com" # gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [undefined] # gpg: aka "Paolo Bonzini <pbonzini@redhat.com>" [undefined] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4 E2F7 7E15 100C CD36 69B1 # Subkey fingerprint: F133 3857 4B66 2389 866C 7682 BFFB D25F 78C7 AE83 * tag 'for-upstream' of https://gitlab.com/bonzini/qemu: i386: docs: Convert hyperv.txt to rST i386: Hyper-V Direct TLB flush hypercall i386: Hyper-V Support extended GVA ranges for TLB flush hypercalls i386: Hyper-V XMM fast hypercall input feature i386: Hyper-V Enlightened MSR bitmap feature i386: Use hv_build_cpuid_leaf() for HV_CPUID_NESTED_FEATURES ide_ioport_read: Return lower octet of data register instead of 0xFF target/i386/kvm: Fix disabling MPX on "-cpu host" with MPX-capable host hw/audio/ac97: Remove unneeded local variables hw/audio/ac97: Remove unimplemented reset functions hw/audio/ac97: Coding style fixes to avoid checkpatch errors contrib/elf2dmp: add ELF dump header checking thread-pool: remove stopping variable thread-pool: replace semaphore with condition variable thread-pool: optimize scheduling of completion bottom half hostmem: default the amount of prealloc-threads to smp-cpus target/i386: Remove LBREn bit check when access Arch LBR MSRs Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
commit
58b53669e8
@ -274,7 +274,7 @@ static void host_memory_backend_init(Object *obj)
|
||||
backend->merge = machine_mem_merge(machine);
|
||||
backend->dump = machine_dump_guest_core(machine);
|
||||
backend->reserve = true;
|
||||
backend->prealloc_threads = 1;
|
||||
backend->prealloc_threads = machine->smp.cpus;
|
||||
}
|
||||
|
||||
static void host_memory_backend_post_init(Object *obj)
|
||||
|
@ -118,6 +118,53 @@ static void exit_states(QEMU_Elf *qe)
|
||||
free(qe->state);
|
||||
}
|
||||
|
||||
static bool check_ehdr(QEMU_Elf *qe)
|
||||
{
|
||||
Elf64_Ehdr *ehdr = qe->map;
|
||||
|
||||
if (sizeof(Elf64_Ehdr) > qe->size) {
|
||||
eprintf("Invalid input dump file size\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG)) {
|
||||
eprintf("Invalid ELF signature, input file is not ELF\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ehdr->e_ident[EI_CLASS] != ELFCLASS64 ||
|
||||
ehdr->e_ident[EI_DATA] != ELFDATA2LSB) {
|
||||
eprintf("Invalid ELF class or byte order, must be 64-bit LE\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ehdr->e_ident[EI_VERSION] != EV_CURRENT) {
|
||||
eprintf("Invalid ELF version\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ehdr->e_machine != EM_X86_64) {
|
||||
eprintf("Invalid input dump architecture, only x86_64 is supported\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ehdr->e_type != ET_CORE) {
|
||||
eprintf("Invalid ELF type, must be core file\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* ELF dump file must contain one PT_NOTE and at least one PT_LOAD to
|
||||
* restore physical address space.
|
||||
*/
|
||||
if (ehdr->e_phnum < 2) {
|
||||
eprintf("Invalid number of ELF program headers\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int QEMU_Elf_init(QEMU_Elf *qe, const char *filename)
|
||||
{
|
||||
GError *gerr = NULL;
|
||||
@ -133,6 +180,12 @@ int QEMU_Elf_init(QEMU_Elf *qe, const char *filename)
|
||||
qe->map = g_mapped_file_get_contents(qe->gmf);
|
||||
qe->size = g_mapped_file_get_length(qe->gmf);
|
||||
|
||||
if (!check_ehdr(qe)) {
|
||||
eprintf("Input file has the wrong format\n");
|
||||
err = 1;
|
||||
goto out_unmap;
|
||||
}
|
||||
|
||||
if (init_states(qe)) {
|
||||
eprintf("Failed to extract QEMU CPU states\n");
|
||||
err = 1;
|
||||
|
270
docs/hyperv.txt
270
docs/hyperv.txt
@ -1,270 +0,0 @@
|
||||
Hyper-V Enlightenments
|
||||
======================
|
||||
|
||||
|
||||
1. Description
|
||||
===============
|
||||
In some cases when implementing a hardware interface in software is slow, KVM
|
||||
implements its own paravirtualized interfaces. This works well for Linux as
|
||||
guest support for such features is added simultaneously with the feature itself.
|
||||
It may, however, be hard-to-impossible to add support for these interfaces to
|
||||
proprietary OSes, namely, Microsoft Windows.
|
||||
|
||||
KVM on x86 implements Hyper-V Enlightenments for Windows guests. These features
|
||||
make Windows and Hyper-V guests think they're running on top of a Hyper-V
|
||||
compatible hypervisor and use Hyper-V specific features.
|
||||
|
||||
|
||||
2. Setup
|
||||
=========
|
||||
No Hyper-V enlightenments are enabled by default by either KVM or QEMU. In
|
||||
QEMU, individual enlightenments can be enabled through CPU flags, e.g:
|
||||
|
||||
qemu-system-x86_64 --enable-kvm --cpu host,hv_relaxed,hv_vpindex,hv_time, ...
|
||||
|
||||
Sometimes there are dependencies between enlightenments, QEMU is supposed to
|
||||
check that the supplied configuration is sane.
|
||||
|
||||
When any set of the Hyper-V enlightenments is enabled, QEMU changes hypervisor
|
||||
identification (CPUID 0x40000000..0x4000000A) to Hyper-V. KVM identification
|
||||
and features are kept in leaves 0x40000100..0x40000101.
|
||||
|
||||
|
||||
3. Existing enlightenments
|
||||
===========================
|
||||
|
||||
3.1. hv-relaxed
|
||||
================
|
||||
This feature tells guest OS to disable watchdog timeouts as it is running on a
|
||||
hypervisor. It is known that some Windows versions will do this even when they
|
||||
see 'hypervisor' CPU flag.
|
||||
|
||||
3.2. hv-vapic
|
||||
==============
|
||||
Provides so-called VP Assist page MSR to guest allowing it to work with APIC
|
||||
more efficiently. In particular, this enlightenment allows paravirtualized
|
||||
(exit-less) EOI processing.
|
||||
|
||||
3.3. hv-spinlocks=xxx
|
||||
======================
|
||||
Enables paravirtualized spinlocks. The parameter indicates how many times
|
||||
spinlock acquisition should be attempted before indicating the situation to the
|
||||
hypervisor. A special value 0xffffffff indicates "never notify".
|
||||
|
||||
3.4. hv-vpindex
|
||||
================
|
||||
Provides HV_X64_MSR_VP_INDEX (0x40000002) MSR to the guest which has Virtual
|
||||
processor index information. This enlightenment makes sense in conjunction with
|
||||
hv-synic, hv-stimer and other enlightenments which require the guest to know its
|
||||
Virtual Processor indices (e.g. when VP index needs to be passed in a
|
||||
hypercall).
|
||||
|
||||
3.5. hv-runtime
|
||||
================
|
||||
Provides HV_X64_MSR_VP_RUNTIME (0x40000010) MSR to the guest. The MSR keeps the
|
||||
virtual processor run time in 100ns units. This gives guest operating system an
|
||||
idea of how much time was 'stolen' from it (when the virtual CPU was preempted
|
||||
to perform some other work).
|
||||
|
||||
3.6. hv-crash
|
||||
==============
|
||||
Provides HV_X64_MSR_CRASH_P0..HV_X64_MSR_CRASH_P5 (0x40000100..0x40000105) and
|
||||
HV_X64_MSR_CRASH_CTL (0x40000105) MSRs to the guest. These MSRs are written to
|
||||
by the guest when it crashes, HV_X64_MSR_CRASH_P0..HV_X64_MSR_CRASH_P5 MSRs
|
||||
contain additional crash information. This information is outputted in QEMU log
|
||||
and through QAPI.
|
||||
Note: unlike under genuine Hyper-V, write to HV_X64_MSR_CRASH_CTL causes guest
|
||||
to shutdown. This effectively blocks crash dump generation by Windows.
|
||||
|
||||
3.7. hv-time
|
||||
=============
|
||||
Enables two Hyper-V-specific clocksources available to the guest: MSR-based
|
||||
Hyper-V clocksource (HV_X64_MSR_TIME_REF_COUNT, 0x40000020) and Reference TSC
|
||||
page (enabled via MSR HV_X64_MSR_REFERENCE_TSC, 0x40000021). Both clocksources
|
||||
are per-guest, Reference TSC page clocksource allows for exit-less time stamp
|
||||
readings. Using this enlightenment leads to significant speedup of all timestamp
|
||||
related operations.
|
||||
|
||||
3.8. hv-synic
|
||||
==============
|
||||
Enables Hyper-V Synthetic interrupt controller - an extension of a local APIC.
|
||||
When enabled, this enlightenment provides additional communication facilities
|
||||
to the guest: SynIC messages and Events. This is a pre-requisite for
|
||||
implementing VMBus devices (not yet in QEMU). Additionally, this enlightenment
|
||||
is needed to enable Hyper-V synthetic timers. SynIC is controlled through MSRs
|
||||
HV_X64_MSR_SCONTROL..HV_X64_MSR_EOM (0x40000080..0x40000084) and
|
||||
HV_X64_MSR_SINT0..HV_X64_MSR_SINT15 (0x40000090..0x4000009F)
|
||||
|
||||
Requires: hv-vpindex
|
||||
|
||||
3.9. hv-stimer
|
||||
===============
|
||||
Enables Hyper-V synthetic timers. There are four synthetic timers per virtual
|
||||
CPU controlled through HV_X64_MSR_STIMER0_CONFIG..HV_X64_MSR_STIMER3_COUNT
|
||||
(0x400000B0..0x400000B7) MSRs. These timers can work either in single-shot or
|
||||
periodic mode. It is known that certain Windows versions revert to using HPET
|
||||
(or even RTC when HPET is unavailable) extensively when this enlightenment is
|
||||
not provided; this can lead to significant CPU consumption, even when virtual
|
||||
CPU is idle.
|
||||
|
||||
Requires: hv-vpindex, hv-synic, hv-time
|
||||
|
||||
3.10. hv-tlbflush
|
||||
==================
|
||||
Enables paravirtualized TLB shoot-down mechanism. On x86 architecture, remote
|
||||
TLB flush procedure requires sending IPIs and waiting for other CPUs to perform
|
||||
local TLB flush. In virtualized environment some virtual CPUs may not even be
|
||||
scheduled at the time of the call and may not require flushing (or, flushing
|
||||
may be postponed until the virtual CPU is scheduled). hv-tlbflush enlightenment
|
||||
implements TLB shoot-down through hypervisor enabling the optimization.
|
||||
|
||||
Requires: hv-vpindex
|
||||
|
||||
3.11. hv-ipi
|
||||
=============
|
||||
Enables paravirtualized IPI send mechanism. HvCallSendSyntheticClusterIpi
|
||||
hypercall may target more than 64 virtual CPUs simultaneously, doing the same
|
||||
through APIC requires more than one access (and thus exit to the hypervisor).
|
||||
|
||||
Requires: hv-vpindex
|
||||
|
||||
3.12. hv-vendor-id=xxx
|
||||
=======================
|
||||
This changes Hyper-V identification in CPUID 0x40000000.EBX-EDX from the default
|
||||
"Microsoft Hv". The parameter should be no longer than 12 characters. According
|
||||
to the specification, guests shouldn't use this information and it is unknown
|
||||
if there is a Windows version which acts differently.
|
||||
Note: hv-vendor-id is not an enlightenment and thus doesn't enable Hyper-V
|
||||
identification when specified without some other enlightenment.
|
||||
|
||||
3.13. hv-reset
|
||||
===============
|
||||
Provides HV_X64_MSR_RESET (0x40000003) MSR to the guest allowing it to reset
|
||||
itself by writing to it. Even when this MSR is enabled, it is not a recommended
|
||||
way for Windows to perform system reboot and thus it may not be used.
|
||||
|
||||
3.14. hv-frequencies
|
||||
============================================
|
||||
Provides HV_X64_MSR_TSC_FREQUENCY (0x40000022) and HV_X64_MSR_APIC_FREQUENCY
|
||||
(0x40000023) allowing the guest to get its TSC/APIC frequencies without doing
|
||||
measurements.
|
||||
|
||||
3.15 hv-reenlightenment
|
||||
========================
|
||||
The enlightenment is nested specific, it targets Hyper-V on KVM guests. When
|
||||
enabled, it provides HV_X64_MSR_REENLIGHTENMENT_CONTROL (0x40000106),
|
||||
HV_X64_MSR_TSC_EMULATION_CONTROL (0x40000107)and HV_X64_MSR_TSC_EMULATION_STATUS
|
||||
(0x40000108) MSRs allowing the guest to get notified when TSC frequency changes
|
||||
(only happens on migration) and keep using old frequency (through emulation in
|
||||
the hypervisor) until it is ready to switch to the new one. This, in conjunction
|
||||
with hv-frequencies, allows Hyper-V on KVM to pass stable clocksource (Reference
|
||||
TSC page) to its own guests.
|
||||
|
||||
Note, KVM doesn't fully support re-enlightenment notifications and doesn't
|
||||
emulate TSC accesses after migration so 'tsc-frequency=' CPU option also has to
|
||||
be specified to make migration succeed. The destination host has to either have
|
||||
the same TSC frequency or support TSC scaling CPU feature.
|
||||
|
||||
Recommended: hv-frequencies
|
||||
|
||||
3.16. hv-evmcs
|
||||
===============
|
||||
The enlightenment is nested specific, it targets Hyper-V on KVM guests. When
|
||||
enabled, it provides Enlightened VMCS version 1 feature to the guest. The feature
|
||||
implements paravirtualized protocol between L0 (KVM) and L1 (Hyper-V)
|
||||
hypervisors making L2 exits to the hypervisor faster. The feature is Intel-only.
|
||||
Note: some virtualization features (e.g. Posted Interrupts) are disabled when
|
||||
hv-evmcs is enabled. It may make sense to measure your nested workload with and
|
||||
without the feature to find out if enabling it is beneficial.
|
||||
|
||||
Requires: hv-vapic
|
||||
|
||||
3.17. hv-stimer-direct
|
||||
=======================
|
||||
Hyper-V specification allows synthetic timer operation in two modes: "classic",
|
||||
when expiration event is delivered as SynIC message and "direct", when the event
|
||||
is delivered via normal interrupt. It is known that nested Hyper-V can only
|
||||
use synthetic timers in direct mode and thus 'hv-stimer-direct' needs to be
|
||||
enabled.
|
||||
|
||||
Requires: hv-vpindex, hv-synic, hv-time, hv-stimer
|
||||
|
||||
3.18. hv-avic (hv-apicv)
|
||||
=======================
|
||||
The enlightenment allows to use Hyper-V SynIC with hardware APICv/AVIC enabled.
|
||||
Normally, Hyper-V SynIC disables these hardware feature and suggests the guest
|
||||
to use paravirtualized AutoEOI feature.
|
||||
Note: enabling this feature on old hardware (without APICv/AVIC support) may
|
||||
have negative effect on guest's performance.
|
||||
|
||||
3.19. hv-no-nonarch-coresharing=on/off/auto
|
||||
===========================================
|
||||
This enlightenment tells guest OS that virtual processors will never share a
|
||||
physical core unless they are reported as sibling SMT threads. This information
|
||||
is required by Windows and Hyper-V guests to properly mitigate SMT related CPU
|
||||
vulnerabilities.
|
||||
When the option is set to 'auto' QEMU will enable the feature only when KVM
|
||||
reports that non-architectural coresharing is impossible, this means that
|
||||
hyper-threading is not supported or completely disabled on the host. This
|
||||
setting also prevents migration as SMT settings on the destination may differ.
|
||||
When the option is set to 'on' QEMU will always enable the feature, regardless
|
||||
of host setup. To keep guests secure, this can only be used in conjunction with
|
||||
exposing correct vCPU topology and vCPU pinning.
|
||||
|
||||
3.20. hv-version-id-{build,major,minor,spack,sbranch,snumber}
|
||||
=============================================================
|
||||
This changes Hyper-V version identification in CPUID 0x40000002.EAX-EDX from the
|
||||
default (WS2016).
|
||||
- hv-version-id-build sets 'Build Number' (32 bits)
|
||||
- hv-version-id-major sets 'Major Version' (16 bits)
|
||||
- hv-version-id-minor sets 'Minor Version' (16 bits)
|
||||
- hv-version-id-spack sets 'Service Pack' (32 bits)
|
||||
- hv-version-id-sbranch sets 'Service Branch' (8 bits)
|
||||
- hv-version-id-snumber sets 'Service Number' (24 bits)
|
||||
|
||||
Note: hv-version-id-* are not enlightenments and thus don't enable Hyper-V
|
||||
identification when specified without any other enlightenments.
|
||||
|
||||
3.21. hv-syndbg
|
||||
===============
|
||||
Enables Hyper-V synthetic debugger interface, this is a special interface used
|
||||
by Windows Kernel debugger to send the packets through, rather than sending
|
||||
them via serial/network .
|
||||
When enabled, this enlightenment provides additional communication facilities
|
||||
to the guest: SynDbg messages.
|
||||
This new communication is used by Windows Kernel debugger rather than sending
|
||||
packets via serial/network, adding significant performance boost over the other
|
||||
comm channels.
|
||||
This enlightenment requires a VMBus device (-device vmbus-bridge,irq=15)
|
||||
and the follow enlightenments to work:
|
||||
hv-relaxed,hv_time,hv-vapic,hv-vpindex,hv-synic,hv-runtime,hv-stimer
|
||||
|
||||
|
||||
4. Supplementary features
|
||||
=========================
|
||||
|
||||
4.1. hv-passthrough
|
||||
===================
|
||||
In some cases (e.g. during development) it may make sense to use QEMU in
|
||||
'pass-through' mode and give Windows guests all enlightenments currently
|
||||
supported by KVM. This pass-through mode is enabled by "hv-passthrough" CPU
|
||||
flag.
|
||||
Note: "hv-passthrough" flag only enables enlightenments which are known to QEMU
|
||||
(have corresponding "hv-*" flag) and copies "hv-spinlocks="/"hv-vendor-id="
|
||||
values from KVM to QEMU. "hv-passthrough" overrides all other "hv-*" settings on
|
||||
the command line. Also, enabling this flag effectively prevents migration as the
|
||||
list of enabled enlightenments may differ between target and destination hosts.
|
||||
|
||||
4.2. hv-enforce-cpuid
|
||||
=====================
|
||||
By default, KVM allows the guest to use all currently supported Hyper-V
|
||||
enlightenments when Hyper-V CPUID interface was exposed, regardless of if
|
||||
some features were not announced in guest visible CPUIDs. 'hv-enforce-cpuid'
|
||||
feature alters this behavior and only allows the guest to use exposed Hyper-V
|
||||
enlightenments.
|
||||
|
||||
|
||||
5. Useful links
|
||||
================
|
||||
Hyper-V Top Level Functional specification and other information:
|
||||
https://github.com/MicrosoftDocs/Virtualization-Documentation
|
288
docs/system/i386/hyperv.rst
Normal file
288
docs/system/i386/hyperv.rst
Normal file
@ -0,0 +1,288 @@
|
||||
Hyper-V Enlightenments
|
||||
======================
|
||||
|
||||
|
||||
Description
|
||||
-----------
|
||||
|
||||
In some cases when implementing a hardware interface in software is slow, KVM
|
||||
implements its own paravirtualized interfaces. This works well for Linux as
|
||||
guest support for such features is added simultaneously with the feature itself.
|
||||
It may, however, be hard-to-impossible to add support for these interfaces to
|
||||
proprietary OSes, namely, Microsoft Windows.
|
||||
|
||||
KVM on x86 implements Hyper-V Enlightenments for Windows guests. These features
|
||||
make Windows and Hyper-V guests think they're running on top of a Hyper-V
|
||||
compatible hypervisor and use Hyper-V specific features.
|
||||
|
||||
|
||||
Setup
|
||||
-----
|
||||
|
||||
No Hyper-V enlightenments are enabled by default by either KVM or QEMU. In
|
||||
QEMU, individual enlightenments can be enabled through CPU flags, e.g:
|
||||
|
||||
.. parsed-literal::
|
||||
|
||||
|qemu_system| --enable-kvm --cpu host,hv_relaxed,hv_vpindex,hv_time, ...
|
||||
|
||||
Sometimes there are dependencies between enlightenments, QEMU is supposed to
|
||||
check that the supplied configuration is sane.
|
||||
|
||||
When any set of the Hyper-V enlightenments is enabled, QEMU changes hypervisor
|
||||
identification (CPUID 0x40000000..0x4000000A) to Hyper-V. KVM identification
|
||||
and features are kept in leaves 0x40000100..0x40000101.
|
||||
|
||||
|
||||
Existing enlightenments
|
||||
-----------------------
|
||||
|
||||
``hv-relaxed``
|
||||
This feature tells guest OS to disable watchdog timeouts as it is running on a
|
||||
hypervisor. It is known that some Windows versions will do this even when they
|
||||
see 'hypervisor' CPU flag.
|
||||
|
||||
``hv-vapic``
|
||||
Provides so-called VP Assist page MSR to guest allowing it to work with APIC
|
||||
more efficiently. In particular, this enlightenment allows paravirtualized
|
||||
(exit-less) EOI processing.
|
||||
|
||||
``hv-spinlocks`` = xxx
|
||||
Enables paravirtualized spinlocks. The parameter indicates how many times
|
||||
spinlock acquisition should be attempted before indicating the situation to the
|
||||
hypervisor. A special value 0xffffffff indicates "never notify".
|
||||
|
||||
``hv-vpindex``
|
||||
Provides HV_X64_MSR_VP_INDEX (0x40000002) MSR to the guest which has Virtual
|
||||
processor index information. This enlightenment makes sense in conjunction with
|
||||
hv-synic, hv-stimer and other enlightenments which require the guest to know its
|
||||
Virtual Processor indices (e.g. when VP index needs to be passed in a
|
||||
hypercall).
|
||||
|
||||
``hv-runtime``
|
||||
Provides HV_X64_MSR_VP_RUNTIME (0x40000010) MSR to the guest. The MSR keeps the
|
||||
virtual processor run time in 100ns units. This gives guest operating system an
|
||||
idea of how much time was 'stolen' from it (when the virtual CPU was preempted
|
||||
to perform some other work).
|
||||
|
||||
``hv-crash``
|
||||
Provides HV_X64_MSR_CRASH_P0..HV_X64_MSR_CRASH_P5 (0x40000100..0x40000105) and
|
||||
HV_X64_MSR_CRASH_CTL (0x40000105) MSRs to the guest. These MSRs are written to
|
||||
by the guest when it crashes, HV_X64_MSR_CRASH_P0..HV_X64_MSR_CRASH_P5 MSRs
|
||||
contain additional crash information. This information is outputted in QEMU log
|
||||
and through QAPI.
|
||||
Note: unlike under genuine Hyper-V, write to HV_X64_MSR_CRASH_CTL causes guest
|
||||
to shutdown. This effectively blocks crash dump generation by Windows.
|
||||
|
||||
``hv-time``
|
||||
Enables two Hyper-V-specific clocksources available to the guest: MSR-based
|
||||
Hyper-V clocksource (HV_X64_MSR_TIME_REF_COUNT, 0x40000020) and Reference TSC
|
||||
page (enabled via MSR HV_X64_MSR_REFERENCE_TSC, 0x40000021). Both clocksources
|
||||
are per-guest, Reference TSC page clocksource allows for exit-less time stamp
|
||||
readings. Using this enlightenment leads to significant speedup of all timestamp
|
||||
related operations.
|
||||
|
||||
``hv-synic``
|
||||
Enables Hyper-V Synthetic interrupt controller - an extension of a local APIC.
|
||||
When enabled, this enlightenment provides additional communication facilities
|
||||
to the guest: SynIC messages and Events. This is a pre-requisite for
|
||||
implementing VMBus devices (not yet in QEMU). Additionally, this enlightenment
|
||||
is needed to enable Hyper-V synthetic timers. SynIC is controlled through MSRs
|
||||
HV_X64_MSR_SCONTROL..HV_X64_MSR_EOM (0x40000080..0x40000084) and
|
||||
HV_X64_MSR_SINT0..HV_X64_MSR_SINT15 (0x40000090..0x4000009F)
|
||||
|
||||
Requires: ``hv-vpindex``
|
||||
|
||||
``hv-stimer``
|
||||
Enables Hyper-V synthetic timers. There are four synthetic timers per virtual
|
||||
CPU controlled through HV_X64_MSR_STIMER0_CONFIG..HV_X64_MSR_STIMER3_COUNT
|
||||
(0x400000B0..0x400000B7) MSRs. These timers can work either in single-shot or
|
||||
periodic mode. It is known that certain Windows versions revert to using HPET
|
||||
(or even RTC when HPET is unavailable) extensively when this enlightenment is
|
||||
not provided; this can lead to significant CPU consumption, even when virtual
|
||||
CPU is idle.
|
||||
|
||||
Requires: ``hv-vpindex``, ``hv-synic``, ``hv-time``
|
||||
|
||||
``hv-tlbflush``
|
||||
Enables paravirtualized TLB shoot-down mechanism. On x86 architecture, remote
|
||||
TLB flush procedure requires sending IPIs and waiting for other CPUs to perform
|
||||
local TLB flush. In virtualized environment some virtual CPUs may not even be
|
||||
scheduled at the time of the call and may not require flushing (or, flushing
|
||||
may be postponed until the virtual CPU is scheduled). hv-tlbflush enlightenment
|
||||
implements TLB shoot-down through hypervisor enabling the optimization.
|
||||
|
||||
Requires: ``hv-vpindex``
|
||||
|
||||
``hv-ipi``
|
||||
Enables paravirtualized IPI send mechanism. HvCallSendSyntheticClusterIpi
|
||||
hypercall may target more than 64 virtual CPUs simultaneously, doing the same
|
||||
through APIC requires more than one access (and thus exit to the hypervisor).
|
||||
|
||||
Requires: ``hv-vpindex``
|
||||
|
||||
``hv-vendor-id`` = xxx
|
||||
This changes Hyper-V identification in CPUID 0x40000000.EBX-EDX from the default
|
||||
"Microsoft Hv". The parameter should be no longer than 12 characters. According
|
||||
to the specification, guests shouldn't use this information and it is unknown
|
||||
if there is a Windows version which acts differently.
|
||||
Note: hv-vendor-id is not an enlightenment and thus doesn't enable Hyper-V
|
||||
identification when specified without some other enlightenment.
|
||||
|
||||
``hv-reset``
|
||||
Provides HV_X64_MSR_RESET (0x40000003) MSR to the guest allowing it to reset
|
||||
itself by writing to it. Even when this MSR is enabled, it is not a recommended
|
||||
way for Windows to perform system reboot and thus it may not be used.
|
||||
|
||||
``hv-frequencies``
|
||||
Provides HV_X64_MSR_TSC_FREQUENCY (0x40000022) and HV_X64_MSR_APIC_FREQUENCY
|
||||
(0x40000023) allowing the guest to get its TSC/APIC frequencies without doing
|
||||
measurements.
|
||||
|
||||
``hv-reenlightenment``
|
||||
The enlightenment is nested specific, it targets Hyper-V on KVM guests. When
|
||||
enabled, it provides HV_X64_MSR_REENLIGHTENMENT_CONTROL (0x40000106),
|
||||
HV_X64_MSR_TSC_EMULATION_CONTROL (0x40000107)and HV_X64_MSR_TSC_EMULATION_STATUS
|
||||
(0x40000108) MSRs allowing the guest to get notified when TSC frequency changes
|
||||
(only happens on migration) and keep using old frequency (through emulation in
|
||||
the hypervisor) until it is ready to switch to the new one. This, in conjunction
|
||||
with ``hv-frequencies``, allows Hyper-V on KVM to pass stable clocksource
|
||||
(Reference TSC page) to its own guests.
|
||||
|
||||
Note, KVM doesn't fully support re-enlightenment notifications and doesn't
|
||||
emulate TSC accesses after migration so 'tsc-frequency=' CPU option also has to
|
||||
be specified to make migration succeed. The destination host has to either have
|
||||
the same TSC frequency or support TSC scaling CPU feature.
|
||||
|
||||
Recommended: ``hv-frequencies``
|
||||
|
||||
``hv-evmcs``
|
||||
The enlightenment is nested specific, it targets Hyper-V on KVM guests. When
|
||||
enabled, it provides Enlightened VMCS version 1 feature to the guest. The feature
|
||||
implements paravirtualized protocol between L0 (KVM) and L1 (Hyper-V)
|
||||
hypervisors making L2 exits to the hypervisor faster. The feature is Intel-only.
|
||||
|
||||
Note: some virtualization features (e.g. Posted Interrupts) are disabled when
|
||||
hv-evmcs is enabled. It may make sense to measure your nested workload with and
|
||||
without the feature to find out if enabling it is beneficial.
|
||||
|
||||
Requires: ``hv-vapic``
|
||||
|
||||
``hv-stimer-direct``
|
||||
Hyper-V specification allows synthetic timer operation in two modes: "classic",
|
||||
when expiration event is delivered as SynIC message and "direct", when the event
|
||||
is delivered via normal interrupt. It is known that nested Hyper-V can only
|
||||
use synthetic timers in direct mode and thus ``hv-stimer-direct`` needs to be
|
||||
enabled.
|
||||
|
||||
Requires: ``hv-vpindex``, ``hv-synic``, ``hv-time``, ``hv-stimer``
|
||||
|
||||
``hv-avic`` (``hv-apicv``)
|
||||
The enlightenment allows to use Hyper-V SynIC with hardware APICv/AVIC enabled.
|
||||
Normally, Hyper-V SynIC disables these hardware feature and suggests the guest
|
||||
to use paravirtualized AutoEOI feature.
|
||||
Note: enabling this feature on old hardware (without APICv/AVIC support) may
|
||||
have negative effect on guest's performance.
|
||||
|
||||
``hv-no-nonarch-coresharing`` = on/off/auto
|
||||
This enlightenment tells guest OS that virtual processors will never share a
|
||||
physical core unless they are reported as sibling SMT threads. This information
|
||||
is required by Windows and Hyper-V guests to properly mitigate SMT related CPU
|
||||
vulnerabilities.
|
||||
|
||||
When the option is set to 'auto' QEMU will enable the feature only when KVM
|
||||
reports that non-architectural coresharing is impossible, this means that
|
||||
hyper-threading is not supported or completely disabled on the host. This
|
||||
setting also prevents migration as SMT settings on the destination may differ.
|
||||
When the option is set to 'on' QEMU will always enable the feature, regardless
|
||||
of host setup. To keep guests secure, this can only be used in conjunction with
|
||||
exposing correct vCPU topology and vCPU pinning.
|
||||
|
||||
``hv-version-id-build``, ``hv-version-id-major``, ``hv-version-id-minor``, ``hv-version-id-spack``, ``hv-version-id-sbranch``, ``hv-version-id-snumber``
|
||||
This changes Hyper-V version identification in CPUID 0x40000002.EAX-EDX from the
|
||||
default (WS2016).
|
||||
|
||||
- ``hv-version-id-build`` sets 'Build Number' (32 bits)
|
||||
- ``hv-version-id-major`` sets 'Major Version' (16 bits)
|
||||
- ``hv-version-id-minor`` sets 'Minor Version' (16 bits)
|
||||
- ``hv-version-id-spack`` sets 'Service Pack' (32 bits)
|
||||
- ``hv-version-id-sbranch`` sets 'Service Branch' (8 bits)
|
||||
- ``hv-version-id-snumber`` sets 'Service Number' (24 bits)
|
||||
|
||||
Note: hv-version-id-* are not enlightenments and thus don't enable Hyper-V
|
||||
identification when specified without any other enlightenments.
|
||||
|
||||
``hv-syndbg``
|
||||
Enables Hyper-V synthetic debugger interface, this is a special interface used
|
||||
by Windows Kernel debugger to send the packets through, rather than sending
|
||||
them via serial/network .
|
||||
When enabled, this enlightenment provides additional communication facilities
|
||||
to the guest: SynDbg messages.
|
||||
This new communication is used by Windows Kernel debugger rather than sending
|
||||
packets via serial/network, adding significant performance boost over the other
|
||||
comm channels.
|
||||
This enlightenment requires a VMBus device (-device vmbus-bridge,irq=15).
|
||||
|
||||
Requires: ``hv-relaxed``, ``hv_time``, ``hv-vapic``, ``hv-vpindex``, ``hv-synic``, ``hv-runtime``, ``hv-stimer``
|
||||
|
||||
``hv-emsr-bitmap``
|
||||
The enlightenment is nested specific, it targets Hyper-V on KVM guests. When
|
||||
enabled, it allows L0 (KVM) and L1 (Hyper-V) hypervisors to collaborate to
|
||||
avoid unnecessary updates to L2 MSR-Bitmap upon vmexits. While the protocol is
|
||||
supported for both VMX (Intel) and SVM (AMD), the VMX implementation requires
|
||||
Enlightened VMCS (``hv-evmcs``) feature to also be enabled.
|
||||
|
||||
Recommended: ``hv-evmcs`` (Intel)
|
||||
|
||||
``hv-xmm-input``
|
||||
Hyper-V specification allows to pass parameters for certain hypercalls using XMM
|
||||
registers ("XMM Fast Hypercall Input"). When the feature is in use, it allows
|
||||
for faster hypercalls processing as KVM can avoid reading guest's memory.
|
||||
|
||||
``hv-tlbflush-ext``
|
||||
Allow for extended GVA ranges to be passed to Hyper-V TLB flush hypercalls
|
||||
(HvFlushVirtualAddressList/HvFlushVirtualAddressListEx).
|
||||
|
||||
Requires: ``hv-tlbflush``
|
||||
|
||||
``hv-tlbflush-direct``
|
||||
The enlightenment is nested specific, it targets Hyper-V on KVM guests. When
|
||||
enabled, it allows L0 (KVM) to directly handle TLB flush hypercalls from L2
|
||||
guest without the need to exit to L1 (Hyper-V) hypervisor. While the feature is
|
||||
supported for both VMX (Intel) and SVM (AMD), the VMX implementation requires
|
||||
Enlightened VMCS (``hv-evmcs``) feature to also be enabled.
|
||||
|
||||
Requires: ``hv-vapic``
|
||||
|
||||
Recommended: ``hv-evmcs`` (Intel)
|
||||
|
||||
Supplementary features
|
||||
----------------------
|
||||
|
||||
``hv-passthrough``
|
||||
In some cases (e.g. during development) it may make sense to use QEMU in
|
||||
'pass-through' mode and give Windows guests all enlightenments currently
|
||||
supported by KVM. This pass-through mode is enabled by "hv-passthrough" CPU
|
||||
flag.
|
||||
|
||||
Note: ``hv-passthrough`` flag only enables enlightenments which are known to QEMU
|
||||
(have corresponding 'hv-' flag) and copies ``hv-spinlocks`` and ``hv-vendor-id``
|
||||
values from KVM to QEMU. ``hv-passthrough`` overrides all other 'hv-' settings on
|
||||
the command line. Also, enabling this flag effectively prevents migration as the
|
||||
list of enabled enlightenments may differ between target and destination hosts.
|
||||
|
||||
``hv-enforce-cpuid``
|
||||
By default, KVM allows the guest to use all currently supported Hyper-V
|
||||
enlightenments when Hyper-V CPUID interface was exposed, regardless of if
|
||||
some features were not announced in guest visible CPUIDs. ``hv-enforce-cpuid``
|
||||
feature alters this behavior and only allows the guest to use exposed Hyper-V
|
||||
enlightenments.
|
||||
|
||||
|
||||
Useful links
|
||||
------------
|
||||
Hyper-V Top Level Functional specification and other information:
|
||||
|
||||
- https://github.com/MicrosoftDocs/Virtualization-Documentation
|
||||
- https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/tlfs/tlfs
|
||||
|
@ -26,6 +26,7 @@ Architectural features
|
||||
:maxdepth: 1
|
||||
|
||||
i386/cpu
|
||||
i386/hyperv
|
||||
i386/kvm-pv
|
||||
i386/sgx
|
||||
i386/amd-memory-encryption
|
||||
|
752
hw/audio/ac97.c
752
hw/audio/ac97.c
File diff suppressed because it is too large
Load Diff
@ -2166,7 +2166,11 @@ uint32_t ide_ioport_read(void *opaque, uint32_t addr)
|
||||
hob = bus->cmd & (IDE_CTRL_HOB);
|
||||
switch (reg_num) {
|
||||
case ATA_IOPORT_RR_DATA:
|
||||
ret = 0xff;
|
||||
/*
|
||||
* The pre-GRUB Solaris x86 bootloader relies upon inb
|
||||
* consuming a word from the drive's sector buffer.
|
||||
*/
|
||||
ret = ide_data_readw(bus, addr) & 0xff;
|
||||
break;
|
||||
case ATA_IOPORT_RR_ERROR:
|
||||
if ((!bus->ifs[0].blk && !bus->ifs[1].blk) ||
|
||||
|
@ -267,7 +267,9 @@ static uint64_t pmac_ide_read(void *opaque, hwaddr addr, unsigned size)
|
||||
|
||||
switch (reg) {
|
||||
case 0x0:
|
||||
if (size == 2) {
|
||||
if (size == 1) {
|
||||
retval = ide_data_readw(&d->bus, 0) & 0xFF;
|
||||
} else if (size == 2) {
|
||||
retval = ide_data_readw(&d->bus, 0);
|
||||
} else if (size == 4) {
|
||||
retval = ide_data_readl(&d->bus, 0);
|
||||
|
@ -1355,6 +1355,14 @@ static FeatureDep feature_dependencies[] = {
|
||||
.from = { FEAT_7_0_EBX, CPUID_7_0_EBX_INVPCID },
|
||||
.to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_INVPCID },
|
||||
},
|
||||
{
|
||||
.from = { FEAT_7_0_EBX, CPUID_7_0_EBX_MPX },
|
||||
.to = { FEAT_VMX_EXIT_CTLS, VMX_VM_EXIT_CLEAR_BNDCFGS },
|
||||
},
|
||||
{
|
||||
.from = { FEAT_7_0_EBX, CPUID_7_0_EBX_MPX },
|
||||
.to = { FEAT_VMX_ENTRY_CTLS, VMX_VM_ENTRY_LOAD_BNDCFGS },
|
||||
},
|
||||
{
|
||||
.from = { FEAT_7_0_EBX, CPUID_7_0_EBX_RDSEED },
|
||||
.to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_RDSEED_EXITING },
|
||||
@ -6960,6 +6968,14 @@ static Property x86_cpu_properties[] = {
|
||||
HYPERV_FEAT_STIMER_DIRECT, 0),
|
||||
DEFINE_PROP_BIT64("hv-avic", X86CPU, hyperv_features,
|
||||
HYPERV_FEAT_AVIC, 0),
|
||||
DEFINE_PROP_BIT64("hv-emsr-bitmap", X86CPU, hyperv_features,
|
||||
HYPERV_FEAT_MSR_BITMAP, 0),
|
||||
DEFINE_PROP_BIT64("hv-xmm-input", X86CPU, hyperv_features,
|
||||
HYPERV_FEAT_XMM_INPUT, 0),
|
||||
DEFINE_PROP_BIT64("hv-tlbflush-ext", X86CPU, hyperv_features,
|
||||
HYPERV_FEAT_TLBFLUSH_EXT, 0),
|
||||
DEFINE_PROP_BIT64("hv-tlbflush-direct", X86CPU, hyperv_features,
|
||||
HYPERV_FEAT_TLBFLUSH_DIRECT, 0),
|
||||
DEFINE_PROP_ON_OFF_AUTO("hv-no-nonarch-coresharing", X86CPU,
|
||||
hyperv_no_nonarch_cs, ON_OFF_AUTO_OFF),
|
||||
DEFINE_PROP_BIT64("hv-syndbg", X86CPU, hyperv_features,
|
||||
|
@ -1106,6 +1106,10 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
|
||||
#define HYPERV_FEAT_STIMER_DIRECT 14
|
||||
#define HYPERV_FEAT_AVIC 15
|
||||
#define HYPERV_FEAT_SYNDBG 16
|
||||
#define HYPERV_FEAT_MSR_BITMAP 17
|
||||
#define HYPERV_FEAT_XMM_INPUT 18
|
||||
#define HYPERV_FEAT_TLBFLUSH_EXT 19
|
||||
#define HYPERV_FEAT_TLBFLUSH_DIRECT 20
|
||||
|
||||
#ifndef HYPERV_SPINLOCK_NEVER_NOTIFY
|
||||
#define HYPERV_SPINLOCK_NEVER_NOTIFY 0xFFFFFFFF
|
||||
@ -1804,7 +1808,6 @@ struct ArchCPU {
|
||||
uint32_t hyperv_vendor_id[3];
|
||||
uint32_t hyperv_interface_id[4];
|
||||
uint32_t hyperv_limits[3];
|
||||
uint32_t hyperv_nested[4];
|
||||
bool hyperv_enforce_cpuid;
|
||||
uint32_t hyperv_ver_id_build;
|
||||
uint16_t hyperv_ver_id_major;
|
||||
|
@ -54,11 +54,12 @@
|
||||
#define HV_GUEST_DEBUGGING_AVAILABLE (1u << 1)
|
||||
#define HV_PERF_MONITOR_AVAILABLE (1u << 2)
|
||||
#define HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE (1u << 3)
|
||||
#define HV_HYPERCALL_PARAMS_XMM_AVAILABLE (1u << 4)
|
||||
#define HV_HYPERCALL_XMM_INPUT_AVAILABLE (1u << 4)
|
||||
#define HV_GUEST_IDLE_STATE_AVAILABLE (1u << 5)
|
||||
#define HV_FREQUENCY_MSRS_AVAILABLE (1u << 8)
|
||||
#define HV_GUEST_CRASH_MSR_AVAILABLE (1u << 10)
|
||||
#define HV_FEATURE_DEBUG_MSRS_AVAILABLE (1u << 11)
|
||||
#define HV_EXT_GVA_RANGES_FLUSH_AVAILABLE (1u << 14)
|
||||
#define HV_STIMER_DIRECT_MODE_AVAILABLE (1u << 19)
|
||||
|
||||
/*
|
||||
@ -86,6 +87,12 @@
|
||||
*/
|
||||
#define HV_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING (1u << 1)
|
||||
|
||||
/*
|
||||
* HV_CPUID_NESTED_FEATURES.EAX bits
|
||||
*/
|
||||
#define HV_NESTED_DIRECT_FLUSH (1u << 17)
|
||||
#define HV_NESTED_MSR_BITMAP (1u << 19)
|
||||
|
||||
/*
|
||||
* Basic virtualized MSRs
|
||||
*/
|
||||
|
@ -831,6 +831,8 @@ static bool tsc_is_stable_and_known(CPUX86State *env)
|
||||
|| env->user_tsc_khz;
|
||||
}
|
||||
|
||||
#define DEFAULT_EVMCS_VERSION ((1 << 8) | 1)
|
||||
|
||||
static struct {
|
||||
const char *desc;
|
||||
struct {
|
||||
@ -971,6 +973,36 @@ static struct {
|
||||
.dependencies = BIT(HYPERV_FEAT_SYNIC) | BIT(HYPERV_FEAT_RELAXED)
|
||||
},
|
||||
#endif
|
||||
[HYPERV_FEAT_MSR_BITMAP] = {
|
||||
.desc = "enlightened MSR-Bitmap (hv-emsr-bitmap)",
|
||||
.flags = {
|
||||
{.func = HV_CPUID_NESTED_FEATURES, .reg = R_EAX,
|
||||
.bits = HV_NESTED_MSR_BITMAP}
|
||||
}
|
||||
},
|
||||
[HYPERV_FEAT_XMM_INPUT] = {
|
||||
.desc = "XMM fast hypercall input (hv-xmm-input)",
|
||||
.flags = {
|
||||
{.func = HV_CPUID_FEATURES, .reg = R_EDX,
|
||||
.bits = HV_HYPERCALL_XMM_INPUT_AVAILABLE}
|
||||
}
|
||||
},
|
||||
[HYPERV_FEAT_TLBFLUSH_EXT] = {
|
||||
.desc = "Extended gva ranges for TLB flush hypercalls (hv-tlbflush-ext)",
|
||||
.flags = {
|
||||
{.func = HV_CPUID_FEATURES, .reg = R_EDX,
|
||||
.bits = HV_EXT_GVA_RANGES_FLUSH_AVAILABLE}
|
||||
},
|
||||
.dependencies = BIT(HYPERV_FEAT_TLBFLUSH)
|
||||
},
|
||||
[HYPERV_FEAT_TLBFLUSH_DIRECT] = {
|
||||
.desc = "direct TLB flush (hv-tlbflush-direct)",
|
||||
.flags = {
|
||||
{.func = HV_CPUID_NESTED_FEATURES, .reg = R_EAX,
|
||||
.bits = HV_NESTED_DIRECT_FLUSH}
|
||||
},
|
||||
.dependencies = BIT(HYPERV_FEAT_VAPIC)
|
||||
},
|
||||
};
|
||||
|
||||
static struct kvm_cpuid2 *try_get_hv_cpuid(CPUState *cs, int max,
|
||||
@ -1254,6 +1286,13 @@ static uint32_t hv_build_cpuid_leaf(CPUState *cs, uint32_t func, int reg)
|
||||
}
|
||||
}
|
||||
|
||||
/* HV_CPUID_NESTED_FEATURES.EAX also encodes the supported eVMCS range */
|
||||
if (func == HV_CPUID_NESTED_FEATURES && reg == R_EAX) {
|
||||
if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS)) {
|
||||
r |= DEFAULT_EVMCS_VERSION;
|
||||
}
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
@ -1384,11 +1423,11 @@ static int hyperv_fill_cpuids(CPUState *cs,
|
||||
struct kvm_cpuid_entry2 *c;
|
||||
uint32_t signature[3];
|
||||
uint32_t cpuid_i = 0, max_cpuid_leaf = 0;
|
||||
uint32_t nested_eax =
|
||||
hv_build_cpuid_leaf(cs, HV_CPUID_NESTED_FEATURES, R_EAX);
|
||||
|
||||
max_cpuid_leaf = HV_CPUID_IMPLEMENT_LIMITS;
|
||||
if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS)) {
|
||||
max_cpuid_leaf = MAX(max_cpuid_leaf, HV_CPUID_NESTED_FEATURES);
|
||||
}
|
||||
max_cpuid_leaf = nested_eax ? HV_CPUID_NESTED_FEATURES :
|
||||
HV_CPUID_IMPLEMENT_LIMITS;
|
||||
|
||||
if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNDBG)) {
|
||||
max_cpuid_leaf =
|
||||
@ -1461,7 +1500,7 @@ static int hyperv_fill_cpuids(CPUState *cs,
|
||||
c->ecx = cpu->hyperv_limits[1];
|
||||
c->edx = cpu->hyperv_limits[2];
|
||||
|
||||
if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS)) {
|
||||
if (nested_eax) {
|
||||
uint32_t function;
|
||||
|
||||
/* Create zeroed 0x40000006..0x40000009 leaves */
|
||||
@ -1473,7 +1512,7 @@ static int hyperv_fill_cpuids(CPUState *cs,
|
||||
|
||||
c = &cpuid_ent[cpuid_i++];
|
||||
c->function = HV_CPUID_NESTED_FEATURES;
|
||||
c->eax = cpu->hyperv_nested[0];
|
||||
c->eax = nested_eax;
|
||||
}
|
||||
|
||||
if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNDBG)) {
|
||||
@ -1522,8 +1561,6 @@ static bool evmcs_version_supported(uint16_t evmcs_version,
|
||||
(max_version <= max_supported_version);
|
||||
}
|
||||
|
||||
#define DEFAULT_EVMCS_VERSION ((1 << 8) | 1)
|
||||
|
||||
static int hyperv_init_vcpu(X86CPU *cpu)
|
||||
{
|
||||
CPUState *cs = CPU(cpu);
|
||||
@ -1620,8 +1657,6 @@ static int hyperv_init_vcpu(X86CPU *cpu)
|
||||
supported_evmcs_version >> 8);
|
||||
return -ENOTSUP;
|
||||
}
|
||||
|
||||
cpu->hyperv_nested[0] = evmcs_version;
|
||||
}
|
||||
|
||||
if (cpu->hyperv_enforce_cpuid) {
|
||||
@ -3373,15 +3408,14 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
|
||||
int i, ret;
|
||||
|
||||
/*
|
||||
* Only migrate Arch LBR states when: 1) Arch LBR is enabled
|
||||
* for migrated vcpu. 2) the host Arch LBR depth equals that
|
||||
* of source guest's, this is to avoid mismatch of guest/host
|
||||
* config for the msr hence avoid unexpected misbehavior.
|
||||
* Only migrate Arch LBR states when the host Arch LBR depth
|
||||
* equals that of source guest's, this is to avoid mismatch
|
||||
* of guest/host config for the msr hence avoid unexpected
|
||||
* misbehavior.
|
||||
*/
|
||||
ret = kvm_get_one_msr(cpu, MSR_ARCH_LBR_DEPTH, &depth);
|
||||
|
||||
if (ret == 1 && (env->msr_lbr_ctl & 0x1) && !!depth &&
|
||||
depth == env->msr_lbr_depth) {
|
||||
if (ret == 1 && !!depth && depth == env->msr_lbr_depth) {
|
||||
kvm_msr_entry_add(cpu, MSR_ARCH_LBR_CTL, env->msr_lbr_ctl);
|
||||
kvm_msr_entry_add(cpu, MSR_ARCH_LBR_DEPTH, env->msr_lbr_depth);
|
||||
|
||||
@ -3801,13 +3835,11 @@ static int kvm_get_msrs(X86CPU *cpu)
|
||||
|
||||
if (kvm_enabled() && cpu->enable_pmu &&
|
||||
(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR)) {
|
||||
uint64_t ctl, depth;
|
||||
int i, ret2;
|
||||
uint64_t depth;
|
||||
int i, ret;
|
||||
|
||||
ret = kvm_get_one_msr(cpu, MSR_ARCH_LBR_CTL, &ctl);
|
||||
ret2 = kvm_get_one_msr(cpu, MSR_ARCH_LBR_DEPTH, &depth);
|
||||
if (ret == 1 && ret2 == 1 && (ctl & 0x1) &&
|
||||
depth == ARCH_LBR_NR_ENTRIES) {
|
||||
ret = kvm_get_one_msr(cpu, MSR_ARCH_LBR_DEPTH, &depth);
|
||||
if (ret == 1 && depth == ARCH_LBR_NR_ENTRIES) {
|
||||
kvm_msr_entry_add(cpu, MSR_ARCH_LBR_CTL, 0);
|
||||
kvm_msr_entry_add(cpu, MSR_ARCH_LBR_DEPTH, 0);
|
||||
|
||||
|
@ -57,7 +57,7 @@ struct ThreadPool {
|
||||
QEMUBH *completion_bh;
|
||||
QemuMutex lock;
|
||||
QemuCond worker_stopped;
|
||||
QemuSemaphore sem;
|
||||
QemuCond request_cond;
|
||||
QEMUBH *new_thread_bh;
|
||||
|
||||
/* The following variables are only accessed from one AioContext. */
|
||||
@ -69,28 +69,10 @@ struct ThreadPool {
|
||||
int idle_threads;
|
||||
int new_threads; /* backlog of threads we need to create */
|
||||
int pending_threads; /* threads created but not running yet */
|
||||
bool stopping;
|
||||
int min_threads;
|
||||
int max_threads;
|
||||
};
|
||||
|
||||
static inline bool back_to_sleep(ThreadPool *pool, int ret)
|
||||
{
|
||||
/*
|
||||
* The semaphore timed out, we should exit the loop except when:
|
||||
* - There is work to do, we raced with the signal.
|
||||
* - The max threads threshold just changed, we raced with the signal.
|
||||
* - The thread pool forces a minimum number of readily available threads.
|
||||
*/
|
||||
if (ret == -1 && (!QTAILQ_EMPTY(&pool->request_list) ||
|
||||
pool->cur_threads > pool->max_threads ||
|
||||
pool->cur_threads <= pool->min_threads)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void *worker_thread(void *opaque)
|
||||
{
|
||||
ThreadPool *pool = opaque;
|
||||
@ -99,20 +81,25 @@ static void *worker_thread(void *opaque)
|
||||
pool->pending_threads--;
|
||||
do_spawn_thread(pool);
|
||||
|
||||
while (!pool->stopping) {
|
||||
while (pool->cur_threads <= pool->max_threads) {
|
||||
ThreadPoolElement *req;
|
||||
int ret;
|
||||
|
||||
do {
|
||||
if (QTAILQ_EMPTY(&pool->request_list)) {
|
||||
pool->idle_threads++;
|
||||
qemu_mutex_unlock(&pool->lock);
|
||||
ret = qemu_sem_timedwait(&pool->sem, 10000);
|
||||
qemu_mutex_lock(&pool->lock);
|
||||
ret = qemu_cond_timedwait(&pool->request_cond, &pool->lock, 10000);
|
||||
pool->idle_threads--;
|
||||
} while (back_to_sleep(pool, ret));
|
||||
if (ret == -1 || pool->stopping ||
|
||||
pool->cur_threads > pool->max_threads) {
|
||||
break;
|
||||
if (ret == 0 &&
|
||||
QTAILQ_EMPTY(&pool->request_list) &&
|
||||
pool->cur_threads > pool->min_threads) {
|
||||
/* Timed out + no work to do + no need for warm threads = exit. */
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* Even if there was some work to do, check if there aren't
|
||||
* too many worker threads before picking it up.
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
|
||||
req = QTAILQ_FIRST(&pool->request_list);
|
||||
@ -127,14 +114,19 @@ static void *worker_thread(void *opaque)
|
||||
smp_wmb();
|
||||
req->state = THREAD_DONE;
|
||||
|
||||
qemu_mutex_lock(&pool->lock);
|
||||
|
||||
qemu_bh_schedule(pool->completion_bh);
|
||||
qemu_mutex_lock(&pool->lock);
|
||||
}
|
||||
|
||||
pool->cur_threads--;
|
||||
qemu_cond_signal(&pool->worker_stopped);
|
||||
qemu_mutex_unlock(&pool->lock);
|
||||
|
||||
/*
|
||||
* Wake up another thread, in case we got a wakeup but decided
|
||||
* to exit due to pool->cur_threads > pool->max_threads.
|
||||
*/
|
||||
qemu_cond_signal(&pool->request_cond);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -230,13 +222,7 @@ static void thread_pool_cancel(BlockAIOCB *acb)
|
||||
trace_thread_pool_cancel(elem, elem->common.opaque);
|
||||
|
||||
QEMU_LOCK_GUARD(&pool->lock);
|
||||
if (elem->state == THREAD_QUEUED &&
|
||||
/* No thread has yet started working on elem. we can try to "steal"
|
||||
* the item from the worker if we can get a signal from the
|
||||
* semaphore. Because this is non-blocking, we can do it with
|
||||
* the lock taken and ensure that elem will remain THREAD_QUEUED.
|
||||
*/
|
||||
qemu_sem_timedwait(&pool->sem, 0) == 0) {
|
||||
if (elem->state == THREAD_QUEUED) {
|
||||
QTAILQ_REMOVE(&pool->request_list, elem, reqs);
|
||||
qemu_bh_schedule(pool->completion_bh);
|
||||
|
||||
@ -281,7 +267,7 @@ BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool,
|
||||
}
|
||||
QTAILQ_INSERT_TAIL(&pool->request_list, req, reqs);
|
||||
qemu_mutex_unlock(&pool->lock);
|
||||
qemu_sem_post(&pool->sem);
|
||||
qemu_cond_signal(&pool->request_cond);
|
||||
return &req->common;
|
||||
}
|
||||
|
||||
@ -324,7 +310,7 @@ void thread_pool_update_params(ThreadPool *pool, AioContext *ctx)
|
||||
* We either have to:
|
||||
* - Increase the number available of threads until over the min_threads
|
||||
* threshold.
|
||||
* - Decrease the number of available threads until under the max_threads
|
||||
* - Bump the worker threads so that they exit, until under the max_threads
|
||||
* threshold.
|
||||
* - Do nothing. The current number of threads fall in between the min and
|
||||
* max thresholds. We'll let the pool manage itself.
|
||||
@ -334,7 +320,7 @@ void thread_pool_update_params(ThreadPool *pool, AioContext *ctx)
|
||||
}
|
||||
|
||||
for (int i = pool->cur_threads; i > pool->max_threads; i--) {
|
||||
qemu_sem_post(&pool->sem);
|
||||
qemu_cond_signal(&pool->request_cond);
|
||||
}
|
||||
|
||||
qemu_mutex_unlock(&pool->lock);
|
||||
@ -351,7 +337,7 @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)
|
||||
pool->completion_bh = aio_bh_new(ctx, thread_pool_completion_bh, pool);
|
||||
qemu_mutex_init(&pool->lock);
|
||||
qemu_cond_init(&pool->worker_stopped);
|
||||
qemu_sem_init(&pool->sem, 0);
|
||||
qemu_cond_init(&pool->request_cond);
|
||||
pool->new_thread_bh = aio_bh_new(ctx, spawn_thread_bh_fn, pool);
|
||||
|
||||
QLIST_INIT(&pool->head);
|
||||
@ -383,16 +369,16 @@ void thread_pool_free(ThreadPool *pool)
|
||||
pool->new_threads = 0;
|
||||
|
||||
/* Wait for worker threads to terminate */
|
||||
pool->stopping = true;
|
||||
pool->max_threads = 0;
|
||||
qemu_cond_broadcast(&pool->request_cond);
|
||||
while (pool->cur_threads > 0) {
|
||||
qemu_sem_post(&pool->sem);
|
||||
qemu_cond_wait(&pool->worker_stopped, &pool->lock);
|
||||
}
|
||||
|
||||
qemu_mutex_unlock(&pool->lock);
|
||||
|
||||
qemu_bh_delete(pool->completion_bh);
|
||||
qemu_sem_destroy(&pool->sem);
|
||||
qemu_cond_destroy(&pool->request_cond);
|
||||
qemu_cond_destroy(&pool->worker_stopped);
|
||||
qemu_mutex_destroy(&pool->lock);
|
||||
g_free(pool);
|
||||
|
Loading…
Reference in New Issue
Block a user