* target/i386/tcg: fixes for seg_helper.c

* SEV: Don't allow automatic fallback to legacy KVM_SEV_INIT,
   but also don't use it by default
 * scsi: honor bootindex again for legacy drives
 * hpet, utils, scsi, build, cpu: miscellaneous bugfixes
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmaWoP0UHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroOqfggAg3jxUp6B8dFTEid5aV6qvT4M6nwD
 TAYcAl5kRqTOklEmXiPCoA5PeS0rbr+5xzWLAKgkumjCVXbxMoYSr0xJHVuDwQWv
 XunUm4kpxJBLKK3uTGAIW9A21thOaA5eAoLIcqu2smBMU953TBevMqA7T67h22rp
 y8NnZWWdyQRH0RAaWsCBaHVkkf+DuHSG5LHMYhkdyxzno+UWkTADFppVhaDO78Ba
 Egk49oMO+G6of4+dY//p1OtAkAf4bEHePKgxnbZePInJrkgHzr0TJWf9gERWFzdK
 JiM0q6DeqopZm+vENxS+WOx7AyDzdN0qOrf6t9bziXMg0Rr2Z8bu01yBCQ==
 =cZhV
 -----END PGP SIGNATURE-----

Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging

* target/i386/tcg: fixes for seg_helper.c
* SEV: Don't allow automatic fallback to legacy KVM_SEV_INIT,
  but also don't use it by default
* scsi: honor bootindex again for legacy drives
* hpet, utils, scsi, build, cpu: miscellaneous bugfixes

# -----BEGIN PGP SIGNATURE-----
#
# iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmaWoP0UHHBib256aW5p
# QHJlZGhhdC5jb20ACgkQv/vSX3jHroOqfggAg3jxUp6B8dFTEid5aV6qvT4M6nwD
# TAYcAl5kRqTOklEmXiPCoA5PeS0rbr+5xzWLAKgkumjCVXbxMoYSr0xJHVuDwQWv
# XunUm4kpxJBLKK3uTGAIW9A21thOaA5eAoLIcqu2smBMU953TBevMqA7T67h22rp
# y8NnZWWdyQRH0RAaWsCBaHVkkf+DuHSG5LHMYhkdyxzno+UWkTADFppVhaDO78Ba
# Egk49oMO+G6of4+dY//p1OtAkAf4bEHePKgxnbZePInJrkgHzr0TJWf9gERWFzdK
# JiM0q6DeqopZm+vENxS+WOx7AyDzdN0qOrf6t9bziXMg0Rr2Z8bu01yBCQ==
# =cZhV
# -----END PGP SIGNATURE-----
# gpg: Signature made Wed 17 Jul 2024 02:34:05 AM AEST
# gpg:                using RSA key F13338574B662389866C7682BFFBD25F78C7AE83
# gpg:                issuer "pbonzini@redhat.com"
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full]
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>" [full]

* tag 'for-upstream' of https://gitlab.com/bonzini/qemu:
  target/i386/tcg: save current task state before loading new one
  target/i386/tcg: use X86Access for TSS access
  target/i386/tcg: check for correct busy state before switching to a new task
  target/i386/tcg: Compute MMU index once
  target/i386/tcg: Introduce x86_mmu_index_{kernel_,}pl
  target/i386/tcg: Reorg push/pop within seg_helper.c
  target/i386/tcg: use PUSHL/PUSHW for error code
  target/i386/tcg: Allow IRET from user mode to user mode with SMAP
  target/i386/tcg: Remove SEG_ADDL
  target/i386/tcg: fix POP to memory in long mode
  hpet: fix HPET_TN_SETVAL for high 32-bits of the comparator
  hpet: fix clamping of period
  docs: Update description of 'user=username' for '-run-with'
  qemu/timer: Add host ticks function for LoongArch
  scsi: fix regression and honor bootindex again for legacy drives
  hw/scsi/lsi53c895a: bump instruction limit in scripts processing to fix regression
  disas: Fix build against Capstone v6
  cpu: Free queued CPU work
  Revert "qemu-char: do not operate on sources from finalize callbacks"
  i386/sev: Don't allow automatic fallback to legacy KVM_SEV*_INIT

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2024-07-17 15:40:28 +10:00
commit 58ee924b97
18 changed files with 535 additions and 365 deletions

@ -87,16 +87,12 @@ static gboolean io_watch_poll_dispatch(GSource *source, GSourceFunc callback,
static void io_watch_poll_finalize(GSource *source)
{
/*
* Due to a glib bug, removing the last reference to a source
* inside a finalize callback causes recursive locking (and a
* deadlock). This is not a problem inside other callbacks,
* including dispatch callbacks, so we call io_remove_watch_poll
* to remove this source. At this point, iwp->src must
* be NULL, or we would leak it.
*/
IOWatchPoll *iwp = io_watch_poll_from_source(source);
assert(iwp->src == NULL);
if (iwp->src) {
g_source_destroy(iwp->src);
g_source_unref(iwp->src);
iwp->src = NULL;
}
}
static GSourceFuncs io_watch_poll_funcs = {
@ -139,11 +135,6 @@ static void io_remove_watch_poll(GSource *source)
IOWatchPoll *iwp;
iwp = io_watch_poll_from_source(source);
if (iwp->src) {
g_source_destroy(iwp->src);
g_source_unref(iwp->src);
iwp->src = NULL;
}
g_source_destroy(&iwp->parent);
}

@ -331,6 +331,17 @@ void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func,
queue_work_on_cpu(cpu, wi);
}
void free_queued_cpu_work(CPUState *cpu)
{
while (!QSIMPLEQ_EMPTY(&cpu->work_list)) {
struct qemu_work_item *wi = QSIMPLEQ_FIRST(&cpu->work_list);
QSIMPLEQ_REMOVE_HEAD(&cpu->work_list, node);
if (wi->free) {
g_free(wi);
}
}
}
void process_queued_cpu_work(CPUState *cpu)
{
struct qemu_work_item *wi;

@ -281,6 +281,7 @@ static void cpu_common_finalize(Object *obj)
g_free(cpu->plugin_state);
}
#endif
free_queued_cpu_work(cpu);
g_array_free(cpu->gdb_regs, TRUE);
qemu_lockcnt_destroy(&cpu->in_ioctl_lock);
qemu_mutex_destroy(&cpu->work_mutex);

@ -83,7 +83,7 @@ GlobalProperty pc_compat_9_0[] = {
{ TYPE_X86_CPU, "x-amd-topoext-features-only", "false" },
{ TYPE_X86_CPU, "x-l1-cache-per-thread", "false" },
{ TYPE_X86_CPU, "guest-phys-bits", "0" },
{ "sev-guest", "legacy-vm-type", "true" },
{ "sev-guest", "legacy-vm-type", "on" },
{ TYPE_X86_CPU, "legacy-multi-node", "on" },
};
const size_t pc_compat_9_0_len = G_N_ELEMENTS(pc_compat_9_0);

@ -188,7 +188,7 @@ static const char *names[] = {
#define LSI_TAG_VALID (1 << 16)
/* Maximum instructions to process. */
#define LSI_MAX_INSN 100
#define LSI_MAX_INSN 500
typedef struct lsi_request {
SCSIRequest *req;

@ -384,6 +384,7 @@ SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk,
DeviceState *dev;
SCSIDevice *s;
DriveInfo *dinfo;
Error *local_err = NULL;
if (blk_is_sg(blk)) {
driver = "scsi-generic";
@ -403,6 +404,14 @@ SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk,
s = SCSI_DEVICE(dev);
s->conf = *conf;
check_boot_index(conf->bootindex, &local_err);
if (local_err) {
object_unparent(OBJECT(dev));
error_propagate(errp, local_err);
return NULL;
}
add_boot_device_path(conf->bootindex, dev, NULL);
qdev_prop_set_uint32(dev, "scsi-id", unit);
if (object_property_find(OBJECT(dev), "removable")) {
qdev_prop_set_bit(dev, "removable", removable);

@ -548,10 +548,16 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
* FIXME: Clamp period to reasonable min value?
* Clamp period to reasonable max value
*/
new_val &= (timer->config & HPET_TN_32BIT ? ~0u : ~0ull) >> 1;
if (timer->config & HPET_TN_32BIT) {
new_val = MIN(new_val, ~0u >> 1);
}
timer->period =
(timer->period & 0xffffffff00000000ULL) | new_val;
}
/*
* FIXME: on a 64-bit write, HPET_TN_SETVAL should apply to the
* high bits part as well.
*/
timer->config &= ~HPET_TN_SETVAL;
if (hpet_enabled(s)) {
hpet_set_timer(timer);
@ -562,20 +568,21 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
if (!timer_is_periodic(timer)
|| (timer->config & HPET_TN_SETVAL)) {
timer->cmp = (timer->cmp & 0xffffffffULL) | new_val << 32;
} else {
}
if (timer_is_periodic(timer)) {
/*
* FIXME: Clamp period to reasonable min value?
* Clamp period to reasonable max value
*/
new_val &= (timer->config & HPET_TN_32BIT ? ~0u : ~0ull) >> 1;
new_val = MIN(new_val, ~0u >> 1);
timer->period =
(timer->period & 0xffffffffULL) | new_val << 32;
}
timer->config &= ~HPET_TN_SETVAL;
if (hpet_enabled(s)) {
hpet_set_timer(timer);
}
break;
}
timer->config &= ~HPET_TN_SETVAL;
if (hpet_enabled(s)) {
hpet_set_timer(timer);
}
break;
case HPET_TN_ROUTE:
timer->fsb = (timer->fsb & 0xffffffff00000000ULL) | new_val;
break;

@ -3,6 +3,7 @@
#ifdef CONFIG_CAPSTONE
#define CAPSTONE_AARCH64_COMPAT_HEADER
#include <capstone.h>
#else

@ -1008,6 +1008,12 @@ void cpu_resume(CPUState *cpu);
*/
void cpu_remove_sync(CPUState *cpu);
/**
* free_queued_cpu_work() - free all items on CPU work queue
* @cpu: The CPU which work queue to free.
*/
void free_queued_cpu_work(CPUState *cpu);
/**
* process_queued_cpu_work() - process all items on CPU work queue
* @cpu: The CPU which work queue to process.

@ -1016,6 +1016,15 @@ static inline int64_t cpu_get_host_ticks(void)
return val;
}
#elif defined(__loongarch64)
static inline int64_t cpu_get_host_ticks(void)
{
uint64_t val;
asm volatile("rdtime.d %0, $zero" : "=r"(val));
return val;
}
#else
/* The host CPU doesn't have an easily accessible cycle counter.
Just return a monotonically increasing value. This will be

@ -924,12 +924,16 @@
# @handle: SEV firmware handle (default: 0)
#
# @legacy-vm-type: Use legacy KVM_SEV_INIT KVM interface for creating the VM.
# The newer KVM_SEV_INIT2 interface syncs additional vCPU
# state when initializing the VMSA structures, which will
# result in a different guest measurement. Set this to
# maintain compatibility with older QEMU or kernel versions
# that rely on legacy KVM_SEV_INIT behavior.
# (default: false) (since 9.1)
# The newer KVM_SEV_INIT2 interface, from Linux >= 6.10, syncs
# additional vCPU state when initializing the VMSA structures,
# which will result in a different guest measurement. Set
# this to 'on' to force compatibility with older QEMU or kernel
# versions that rely on legacy KVM_SEV_INIT behavior. 'auto'
# will behave identically to 'on', but will automatically
# switch to using KVM_SEV_INIT2 if the user specifies any
# additional options that require it. If set to 'off', QEMU
# will require KVM_SEV_INIT2 unconditionally.
# (default: off) (since 9.1)
#
# Since: 2.12
##
@ -939,7 +943,7 @@
'*session-file': 'str',
'*policy': 'uint32',
'*handle': 'uint32',
'*legacy-vm-type': 'bool' } }
'*legacy-vm-type': 'OnOffAuto' } }
##
# @SevSnpGuestProperties:

@ -5024,8 +5024,11 @@ SRST
in combination with -runas.
``user=username`` or ``user=uid:gid`` can be used to drop root privileges
by switching to the specified user (via username) or user and group
(via uid:gid) immediately before starting guest execution.
before starting guest execution. QEMU will use the ``setuid`` and ``setgid``
system calls to switch to the specified identity. Note that the
``user=username`` syntax will also apply the full set of supplementary
groups for the user, whereas the ``user=uid:gid`` will use only the
``gid`` group.
ERST
#endif

@ -8122,18 +8122,39 @@ static bool x86_cpu_has_work(CPUState *cs)
return x86_cpu_pending_interrupt(cs, cs->interrupt_request) != 0;
}
static int x86_cpu_mmu_index(CPUState *cs, bool ifetch)
int x86_mmu_index_pl(CPUX86State *env, unsigned pl)
{
CPUX86State *env = cpu_env(cs);
int mmu_index_32 = (env->hflags & HF_CS64_MASK) ? 0 : 1;
int mmu_index_base =
(env->hflags & HF_CPL_MASK) == 3 ? MMU_USER64_IDX :
pl == 3 ? MMU_USER64_IDX :
!(env->hflags & HF_SMAP_MASK) ? MMU_KNOSMAP64_IDX :
(env->eflags & AC_MASK) ? MMU_KNOSMAP64_IDX : MMU_KSMAP64_IDX;
return mmu_index_base + mmu_index_32;
}
static int x86_cpu_mmu_index(CPUState *cs, bool ifetch)
{
CPUX86State *env = cpu_env(cs);
return x86_mmu_index_pl(env, env->hflags & HF_CPL_MASK);
}
static int x86_mmu_index_kernel_pl(CPUX86State *env, unsigned pl)
{
int mmu_index_32 = (env->hflags & HF_LMA_MASK) ? 0 : 1;
int mmu_index_base =
!(env->hflags & HF_SMAP_MASK) ? MMU_KNOSMAP64_IDX :
(pl < 3 && (env->eflags & AC_MASK)
? MMU_KNOSMAP64_IDX : MMU_KSMAP64_IDX);
return mmu_index_base + mmu_index_32;
}
int cpu_mmu_index_kernel(CPUX86State *env)
{
return x86_mmu_index_kernel_pl(env, env->hflags & HF_CPL_MASK);
}
static void x86_disas_set_info(CPUState *cs, disassemble_info *info)
{
X86CPU *cpu = X86_CPU(cs);

@ -2445,15 +2445,8 @@ static inline bool is_mmu_index_32(int mmu_index)
return mmu_index & 1;
}
static inline int cpu_mmu_index_kernel(CPUX86State *env)
{
int mmu_index_32 = (env->hflags & HF_LMA_MASK) ? 0 : 1;
int mmu_index_base =
!(env->hflags & HF_SMAP_MASK) ? MMU_KNOSMAP64_IDX :
((env->hflags & HF_CPL_MASK) < 3 && (env->eflags & AC_MASK)) ? MMU_KNOSMAP64_IDX : MMU_KSMAP64_IDX;
return mmu_index_base + mmu_index_32;
}
int x86_mmu_index_pl(CPUX86State *env, unsigned pl);
int cpu_mmu_index_kernel(CPUX86State *env);
#define CC_DST (env->cc_dst)
#define CC_SRC (env->cc_src)

@ -144,7 +144,7 @@ struct SevGuestState {
uint32_t policy;
char *dh_cert_file;
char *session_file;
bool legacy_vm_type;
OnOffAuto legacy_vm_type;
};
struct SevSnpGuestState {
@ -1369,6 +1369,17 @@ sev_vm_state_change(void *opaque, bool running, RunState state)
}
}
/*
* This helper is to examine sev-guest properties and determine if any options
* have been set which rely on the newer KVM_SEV_INIT2 interface and associated
* KVM VM types.
*/
static bool sev_init2_required(SevGuestState *sev_guest)
{
/* Currently no KVM_SEV_INIT2-specific options are exposed via QEMU */
return false;
}
static int sev_kvm_type(X86ConfidentialGuest *cg)
{
SevCommonState *sev_common = SEV_COMMON(cg);
@ -1379,14 +1390,39 @@ static int sev_kvm_type(X86ConfidentialGuest *cg)
goto out;
}
kvm_type = (sev_guest->policy & SEV_POLICY_ES) ?
KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM;
if (kvm_is_vm_type_supported(kvm_type) && !sev_guest->legacy_vm_type) {
sev_common->kvm_type = kvm_type;
} else {
/* These are the only cases where legacy VM types can be used. */
if (sev_guest->legacy_vm_type == ON_OFF_AUTO_ON ||
(sev_guest->legacy_vm_type == ON_OFF_AUTO_AUTO &&
!sev_init2_required(sev_guest))) {
sev_common->kvm_type = KVM_X86_DEFAULT_VM;
goto out;
}
/*
* Newer VM types are required, either explicitly via legacy-vm-type=on, or
* implicitly via legacy-vm-type=auto along with additional sev-guest
* properties that require the newer VM types.
*/
kvm_type = (sev_guest->policy & SEV_POLICY_ES) ?
KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM;
if (!kvm_is_vm_type_supported(kvm_type)) {
if (sev_guest->legacy_vm_type == ON_OFF_AUTO_AUTO) {
error_report("SEV: host kernel does not support requested %s VM type, which is required "
"for the set of options specified. To allow use of the legacy "
"KVM_X86_DEFAULT_VM VM type, please disable any options that are not "
"compatible with the legacy VM type, or upgrade your kernel.",
kvm_type == KVM_X86_SEV_VM ? "KVM_X86_SEV_VM" : "KVM_X86_SEV_ES_VM");
} else {
error_report("SEV: host kernel does not support requested %s VM type. To allow use of "
"the legacy KVM_X86_DEFAULT_VM VM type, the 'legacy-vm-type' argument "
"must be set to 'on' or 'auto' for the sev-guest object.",
kvm_type == KVM_X86_SEV_VM ? "KVM_X86_SEV_VM" : "KVM_X86_SEV_ES_VM");
}
return -1;
}
sev_common->kvm_type = kvm_type;
out:
return sev_common->kvm_type;
}
@ -1477,14 +1513,24 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
}
trace_kvm_sev_init();
if (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) {
switch (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common))) {
case KVM_X86_DEFAULT_VM:
cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT;
ret = sev_ioctl(sev_common->sev_fd, cmd, NULL, &fw_error);
} else {
break;
case KVM_X86_SEV_VM:
case KVM_X86_SEV_ES_VM:
case KVM_X86_SNP_VM: {
struct kvm_sev_init args = { 0 };
ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_INIT2, &args, &fw_error);
break;
}
default:
error_setg(errp, "%s: host kernel does not support the requested SEV configuration.",
__func__);
return -1;
}
if (ret) {
@ -2074,14 +2120,23 @@ sev_guest_set_session_file(Object *obj, const char *value, Error **errp)
SEV_GUEST(obj)->session_file = g_strdup(value);
}
static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp)
static void sev_guest_get_legacy_vm_type(Object *obj, Visitor *v,
const char *name, void *opaque,
Error **errp)
{
return SEV_GUEST(obj)->legacy_vm_type;
SevGuestState *sev_guest = SEV_GUEST(obj);
OnOffAuto legacy_vm_type = sev_guest->legacy_vm_type;
visit_type_OnOffAuto(v, name, &legacy_vm_type, errp);
}
static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp)
static void sev_guest_set_legacy_vm_type(Object *obj, Visitor *v,
const char *name, void *opaque,
Error **errp)
{
SEV_GUEST(obj)->legacy_vm_type = value;
SevGuestState *sev_guest = SEV_GUEST(obj);
visit_type_OnOffAuto(v, name, &sev_guest->legacy_vm_type, errp);
}
static void
@ -2107,9 +2162,9 @@ sev_guest_class_init(ObjectClass *oc, void *data)
sev_guest_set_session_file);
object_class_property_set_description(oc, "session-file",
"guest owners session parameters (encoded with base64)");
object_class_property_add_bool(oc, "legacy-vm-type",
sev_guest_get_legacy_vm_type,
sev_guest_set_legacy_vm_type);
object_class_property_add(oc, "legacy-vm-type", "OnOffAuto",
sev_guest_get_legacy_vm_type,
sev_guest_set_legacy_vm_type, NULL, NULL);
object_class_property_set_description(oc, "legacy-vm-type",
"use legacy VM type to maintain measurement compatibility with older QEMU or kernel versions.");
}
@ -2125,6 +2180,8 @@ sev_guest_instance_init(Object *obj)
object_property_add_uint32_ptr(obj, "policy", &sev_guest->policy,
OBJ_PROP_FLAG_READWRITE);
object_apply_compat_props(obj);
sev_guest->legacy_vm_type = ON_OFF_AUTO_AUTO;
}
/* guest info specific sev/sev-es */

@ -1717,7 +1717,7 @@ static const X86OpEntry opcodes_root[256] = {
[0x8C] = X86_OP_ENTRYwr(MOV, E,v, S,w, op0_Mw),
[0x8D] = X86_OP_ENTRYwr(LEA, G,v, M,v, nolea),
[0x8E] = X86_OP_ENTRYwr(MOV, S,w, E,w),
[0x8F] = X86_OP_GROUPw(group1A, E,v),
[0x8F] = X86_OP_GROUPw(group1A, E,d64),
[0x98] = X86_OP_ENTRY1(CBW, 0,v), /* rAX */
[0x99] = X86_OP_ENTRYwr(CWD, 2,v, 0,v), /* rDX, rAX */

@ -2788,6 +2788,7 @@ static void gen_POP(DisasContext *s, X86DecodedInsn *decode)
X86DecodedOp *op = &decode->op[0];
MemOp ot = gen_pop_T0(s);
assert(ot >= op->ot);
if (op->has_ea || op->unit == X86_OP_SEG) {
/* NOTE: order is important for MMU exceptions */
gen_writeback(s, decode, 0, s->T0);

File diff suppressed because it is too large Load Diff