diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c index c5983c79be..2bd0de82b4 100644 --- a/hw/i386/kvm/apic.c +++ b/hw/i386/kvm/apic.c @@ -184,19 +184,24 @@ static void kvm_apic_realize(DeviceState *dev, Error **errp) { APICCommonState *s = APIC_COMMON(dev); - memory_region_init_io(&s->io_memory, NULL, &kvm_apic_io_ops, s, "kvm-apic-msi", - APIC_SPACE_SIZE); + memory_region_init_io(&s->io_memory, OBJECT(s), &kvm_apic_io_ops, s, + "kvm-apic-msi", APIC_SPACE_SIZE); if (kvm_has_gsi_routing()) { msi_nonbroken = true; } } +static void kvm_apic_unrealize(DeviceState *dev, Error **errp) +{ +} + static void kvm_apic_class_init(ObjectClass *klass, void *data) { APICCommonClass *k = APIC_COMMON_CLASS(klass); k->realize = kvm_apic_realize; + k->unrealize = kvm_apic_unrealize; k->reset = kvm_apic_reset; k->set_base = kvm_apic_set_base; k->set_tpr = kvm_apic_set_tpr; diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 719884ff88..ac7a4d5a47 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -471,9 +471,6 @@ void pc_cmos_init(PCMachineState *pcms, rtc_set_memory(s, 0x5c, val >> 8); rtc_set_memory(s, 0x5d, val >> 16); - /* set the number of CPU */ - rtc_set_memory(s, 0x5f, smp_cpus - 1); - object_property_add_link(OBJECT(pcms), "rtc_state", TYPE_ISA_DEVICE, (Object **)&pcms->rtc, @@ -1090,6 +1087,17 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level) } } +static int pc_present_cpus_count(PCMachineState *pcms) +{ + int i, boot_cpus = 0; + for (i = 0; i < pcms->possible_cpus->len; i++) { + if (pcms->possible_cpus->cpus[i].cpu) { + boot_cpus++; + } + } + return boot_cpus; +} + static X86CPU *pc_new_cpu(const char *typename, int64_t apic_id, Error **errp) { @@ -1122,18 +1130,6 @@ void pc_hot_add_cpu(const int64_t id, Error **errp) return; } - if (cpu_exists(apic_id)) { - error_setg(errp, "Unable to add CPU: %" PRIi64 - ", it already exists", id); - return; - } - - if (id >= max_cpus) { - error_setg(errp, "Unable to add CPU: %" PRIi64 - ", max allowed: %d", id, max_cpus - 1); - return; - } - if (apic_id >= ACPI_CPU_HOTPLUG_ID_LIMIT) { error_setg(errp, "Unable to add CPU: %" PRIi64 ", resulting APIC ID (%" PRIi64 ") is too large", @@ -1208,7 +1204,6 @@ void pc_cpus_init(PCMachineState *pcms) if (i < smp_cpus) { cpu = pc_new_cpu(typename, x86_cpu_apic_id_from_index(i), &error_fatal); - pcms->possible_cpus->cpus[i].cpu = CPU(cpu); object_unref(OBJECT(cpu)); } } @@ -1252,6 +1247,9 @@ void pc_machine_done(Notifier *notifier, void *data) PCMachineState, machine_done); PCIBus *bus = pcms->bus; + /* set the number of CPUs */ + rtc_set_memory(pcms->rtc, 0x5f, pc_present_cpus_count(pcms) - 1); + if (bus) { int extra_hosts = 0; @@ -1756,39 +1754,48 @@ static int pc_apic_cmp(const void *a, const void *b) return apic_a->arch_id - apic_b->arch_id; } +/* returns pointer to CPUArchId descriptor that matches CPU's apic_id + * in pcms->possible_cpus->cpus, if pcms->possible_cpus->cpus has no + * entry correponding to CPU's apic_id returns NULL. + */ +static CPUArchId *pc_find_cpu_slot(PCMachineState *pcms, CPUState *cpu, + int *idx) +{ + CPUClass *cc = CPU_GET_CLASS(cpu); + CPUArchId apic_id, *found_cpu; + + apic_id.arch_id = cc->get_arch_id(CPU(cpu)); + found_cpu = bsearch(&apic_id, pcms->possible_cpus->cpus, + pcms->possible_cpus->len, sizeof(*pcms->possible_cpus->cpus), + pc_apic_cmp); + if (found_cpu && idx) { + *idx = found_cpu - pcms->possible_cpus->cpus; + } + return found_cpu; +} + static void pc_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { - CPUClass *cc = CPU_GET_CLASS(dev); - CPUArchId apic_id, *found_cpu; + CPUArchId *found_cpu; HotplugHandlerClass *hhc; Error *local_err = NULL; PCMachineState *pcms = PC_MACHINE(hotplug_dev); - if (!dev->hotplugged) { - goto out; + if (pcms->acpi_dev) { + hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); + hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); + if (local_err) { + goto out; + } } - if (!pcms->acpi_dev) { - error_setg(&local_err, - "cpu hotplug is not enabled: missing acpi device"); - goto out; + if (dev->hotplugged) { + /* increment the number of CPUs */ + rtc_set_memory(pcms->rtc, 0x5f, rtc_get_memory(pcms->rtc, 0x5f) + 1); } - hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); - hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); - if (local_err) { - goto out; - } - - /* increment the number of CPUs */ - rtc_set_memory(pcms->rtc, 0x5f, rtc_get_memory(pcms->rtc, 0x5f) + 1); - - apic_id.arch_id = cc->get_arch_id(CPU(dev)); - found_cpu = bsearch(&apic_id, pcms->possible_cpus->cpus, - pcms->possible_cpus->len, sizeof(*pcms->possible_cpus->cpus), - pc_apic_cmp); - assert(found_cpu); + found_cpu = pc_find_cpu_slot(pcms, CPU(dev), NULL); found_cpu->cpu = CPU(dev); out: error_propagate(errp, local_err); @@ -1796,10 +1803,35 @@ out: static void pc_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { + int idx = -1; HotplugHandlerClass *hhc; Error *local_err = NULL; PCMachineState *pcms = PC_MACHINE(hotplug_dev); + pc_find_cpu_slot(pcms, CPU(dev), &idx); + assert(idx != -1); + if (idx == 0) { + error_setg(&local_err, "Boot CPU is unpluggable"); + goto out; + } + + if (idx < pcms->possible_cpus->len - 1 && + pcms->possible_cpus->cpus[idx + 1].cpu != NULL) { + X86CPU *cpu; + + for (idx = pcms->possible_cpus->len - 1; + pcms->possible_cpus->cpus[idx].cpu == NULL; idx--) { + ;; + } + + cpu = X86_CPU(pcms->possible_cpus->cpus[idx].cpu); + error_setg(&local_err, "CPU [socket-id: %u, core-id: %u," + " thread-id: %u] should be removed first", + cpu->socket_id, cpu->core_id, cpu->thread_id); + goto out; + + } + hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); hhc->unplug_request(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); @@ -1815,6 +1847,7 @@ static void pc_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, static void pc_cpu_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { + CPUArchId *found_cpu; HotplugHandlerClass *hhc; Error *local_err = NULL; PCMachineState *pcms = PC_MACHINE(hotplug_dev); @@ -1826,17 +1859,129 @@ static void pc_cpu_unplug_cb(HotplugHandler *hotplug_dev, goto out; } - /* - * TODO: enable unplug once generic CPU remove bits land - * for now guest will be able to eject CPU ACPI wise but - * it will come back again on machine reset. - */ - /* object_unparent(OBJECT(dev)); */ + found_cpu = pc_find_cpu_slot(pcms, CPU(dev), NULL); + found_cpu->cpu = NULL; + object_unparent(OBJECT(dev)); + rtc_set_memory(pcms->rtc, 0x5f, rtc_get_memory(pcms->rtc, 0x5f) - 1); out: error_propagate(errp, local_err); } +static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + int idx; + CPUArchId *cpu_slot; + X86CPUTopoInfo topo; + X86CPU *cpu = X86_CPU(dev); + PCMachineState *pcms = PC_MACHINE(hotplug_dev); + + /* if APIC ID is not set, set it based on socket/core/thread properties */ + if (cpu->apic_id == UNASSIGNED_APIC_ID) { + int max_socket = (max_cpus - 1) / smp_threads / smp_cores; + + if (cpu->socket_id < 0) { + error_setg(errp, "CPU socket-id is not set"); + return; + } else if (cpu->socket_id > max_socket) { + error_setg(errp, "Invalid CPU socket-id: %u must be in range 0:%u", + cpu->socket_id, max_socket); + return; + } + if (cpu->core_id < 0) { + error_setg(errp, "CPU core-id is not set"); + return; + } else if (cpu->core_id > (smp_cores - 1)) { + error_setg(errp, "Invalid CPU core-id: %u must be in range 0:%u", + cpu->core_id, smp_cores - 1); + return; + } + if (cpu->thread_id < 0) { + error_setg(errp, "CPU thread-id is not set"); + return; + } else if (cpu->thread_id > (smp_threads - 1)) { + error_setg(errp, "Invalid CPU thread-id: %u must be in range 0:%u", + cpu->thread_id, smp_threads - 1); + return; + } + + topo.pkg_id = cpu->socket_id; + topo.core_id = cpu->core_id; + topo.smt_id = cpu->thread_id; + cpu->apic_id = apicid_from_topo_ids(smp_cores, smp_threads, &topo); + } + + cpu_slot = pc_find_cpu_slot(pcms, CPU(dev), &idx); + if (!cpu_slot) { + x86_topo_ids_from_apicid(cpu->apic_id, smp_cores, smp_threads, &topo); + error_setg(errp, "Invalid CPU [socket: %u, core: %u, thread: %u] with" + " APIC ID %" PRIu32 ", valid index range 0:%d", + topo.pkg_id, topo.core_id, topo.smt_id, cpu->apic_id, + pcms->possible_cpus->len - 1); + return; + } + + if (cpu_slot->cpu) { + error_setg(errp, "CPU[%d] with APIC ID %" PRIu32 " exists", + idx, cpu->apic_id); + return; + } + + if (idx != 0 && pcms->possible_cpus->cpus[idx - 1].cpu == NULL) { + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); + + for (idx = 1; pcms->possible_cpus->cpus[idx].cpu != NULL; idx++) { + ;; + } + + x86_topo_ids_from_apicid(pcms->possible_cpus->cpus[idx].arch_id, + smp_cores, smp_threads, &topo); + + if (!pcmc->legacy_cpu_hotplug) { + error_setg(errp, "CPU [socket: %u, core: %u, thread: %u] should be" + " added first", topo.pkg_id, topo.core_id, topo.smt_id); + return; + } + } + + /* if 'address' properties socket-id/core-id/thread-id are not set, set them + * so that query_hotpluggable_cpus would show correct values + */ + /* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn() + * once -smp refactoring is complete and there will be CPU private + * CPUState::nr_cores and CPUState::nr_threads fields instead of globals */ + x86_topo_ids_from_apicid(cpu->apic_id, smp_cores, smp_threads, &topo); + if (cpu->socket_id != -1 && cpu->socket_id != topo.pkg_id) { + error_setg(errp, "property socket-id: %u doesn't match set apic-id:" + " 0x%x (socket-id: %u)", cpu->socket_id, cpu->apic_id, topo.pkg_id); + return; + } + cpu->socket_id = topo.pkg_id; + + if (cpu->core_id != -1 && cpu->core_id != topo.core_id) { + error_setg(errp, "property core-id: %u doesn't match set apic-id:" + " 0x%x (core-id: %u)", cpu->core_id, cpu->apic_id, topo.core_id); + return; + } + cpu->core_id = topo.core_id; + + if (cpu->thread_id != -1 && cpu->thread_id != topo.smt_id) { + error_setg(errp, "property thread-id: %u doesn't match set apic-id:" + " 0x%x (thread-id: %u)", cpu->thread_id, cpu->apic_id, topo.smt_id); + return; + } + cpu->thread_id = topo.smt_id; +} + +static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + pc_cpu_pre_plug(hotplug_dev, dev, errp); + } +} + static void pc_machine_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -2090,6 +2235,50 @@ static CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *machine) return list; } +static HotpluggableCPUList *pc_query_hotpluggable_cpus(MachineState *machine) +{ + int i; + CPUState *cpu; + HotpluggableCPUList *head = NULL; + PCMachineState *pcms = PC_MACHINE(machine); + const char *cpu_type; + + cpu = pcms->possible_cpus->cpus[0].cpu; + assert(cpu); /* BSP is always present */ + cpu_type = object_class_get_name(OBJECT_CLASS(CPU_GET_CLASS(cpu))); + + for (i = 0; i < pcms->possible_cpus->len; i++) { + X86CPUTopoInfo topo; + HotpluggableCPUList *list_item = g_new0(typeof(*list_item), 1); + HotpluggableCPU *cpu_item = g_new0(typeof(*cpu_item), 1); + CpuInstanceProperties *cpu_props = g_new0(typeof(*cpu_props), 1); + const uint32_t apic_id = pcms->possible_cpus->cpus[i].arch_id; + + x86_topo_ids_from_apicid(apic_id, smp_cores, smp_threads, &topo); + + cpu_item->type = g_strdup(cpu_type); + cpu_item->vcpus_count = 1; + cpu_props->has_socket_id = true; + cpu_props->socket_id = topo.pkg_id; + cpu_props->has_core_id = true; + cpu_props->core_id = topo.core_id; + cpu_props->has_thread_id = true; + cpu_props->thread_id = topo.smt_id; + cpu_item->props = cpu_props; + + cpu = pcms->possible_cpus->cpus[i].cpu; + if (cpu) { + cpu_item->has_qom_path = true; + cpu_item->qom_path = object_get_canonical_path(OBJECT(cpu)); + } + + list_item->value = cpu_item; + list_item->next = head; + head = list_item; + } + return head; +} + static void x86_nmi(NMIState *n, int cpu_index, Error **errp) { /* cpu index isn't used */ @@ -2130,10 +2319,12 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->get_hotplug_handler = pc_get_hotpug_handler; mc->cpu_index_to_socket_id = pc_cpu_index_to_socket_id; mc->possible_cpu_arch_ids = pc_possible_cpu_arch_ids; + mc->query_hotpluggable_cpus = pc_query_hotpluggable_cpus; mc->default_boot_order = "cad"; mc->hot_add_cpu = pc_hot_add_cpu; mc->max_cpus = 255; mc->reset = pc_machine_reset; + hc->pre_plug = pc_machine_device_pre_plug_cb; hc->plug = pc_machine_device_plug_cb; hc->unplug_request = pc_machine_device_unplug_request_cb; hc->unplug = pc_machine_device_unplug_cb; diff --git a/hw/intc/apic.c b/hw/intc/apic.c index e1ab9354c6..45887d99c0 100644 --- a/hw/intc/apic.c +++ b/hw/intc/apic.c @@ -28,7 +28,9 @@ #include "trace.h" #include "hw/i386/pc.h" #include "hw/i386/apic-msidef.h" +#include "qapi/error.h" +#define MAX_APICS 255 #define MAX_APIC_WORDS 8 #define SYNC_FROM_VAPIC 0x1 @@ -419,7 +421,7 @@ static int apic_find_dest(uint8_t dest) int i; if (apic && apic->id == dest) - return dest; /* shortcut in case apic->id == apic->idx */ + return dest; /* shortcut in case apic->id == local_apics[dest]->id */ for (i = 0; i < MAX_APICS; i++) { apic = local_apics[i]; @@ -502,14 +504,14 @@ static void apic_deliver(DeviceState *dev, uint8_t dest, uint8_t dest_mode, break; case 1: memset(deliver_bitmask, 0x00, sizeof(deliver_bitmask)); - apic_set_bit(deliver_bitmask, s->idx); + apic_set_bit(deliver_bitmask, s->id); break; case 2: memset(deliver_bitmask, 0xff, sizeof(deliver_bitmask)); break; case 3: memset(deliver_bitmask, 0xff, sizeof(deliver_bitmask)); - apic_reset_bit(deliver_bitmask, s->idx); + apic_reset_bit(deliver_bitmask, s->id); break; } @@ -870,20 +872,36 @@ static void apic_realize(DeviceState *dev, Error **errp) { APICCommonState *s = APIC_COMMON(dev); + if (s->id >= MAX_APICS) { + error_setg(errp, "%s initialization failed. APIC ID %d is invalid", + object_get_typename(OBJECT(dev)), s->id); + return; + } + memory_region_init_io(&s->io_memory, OBJECT(s), &apic_io_ops, s, "apic-msi", APIC_SPACE_SIZE); s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, apic_timer, s); - local_apics[s->idx] = s; + local_apics[s->id] = s; msi_nonbroken = true; } +static void apic_unrealize(DeviceState *dev, Error **errp) +{ + APICCommonState *s = APIC_COMMON(dev); + + timer_del(s->timer); + timer_free(s->timer); + local_apics[s->id] = NULL; +} + static void apic_class_init(ObjectClass *klass, void *data) { APICCommonClass *k = APIC_COMMON_CLASS(klass); k->realize = apic_realize; + k->unrealize = apic_unrealize; k->set_base = apic_set_base; k->set_tpr = apic_set_tpr; k->get_tpr = apic_get_tpr; diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c index e6eb694de0..14ac43c186 100644 --- a/hw/intc/apic_common.c +++ b/hw/intc/apic_common.c @@ -294,19 +294,14 @@ static int apic_load_old(QEMUFile *f, void *opaque, int version_id) return 0; } +static const VMStateDescription vmstate_apic_common; + static void apic_common_realize(DeviceState *dev, Error **errp) { APICCommonState *s = APIC_COMMON(dev); APICCommonClass *info; static DeviceState *vapic; - static int apic_no; - - if (apic_no >= MAX_APICS) { - error_setg(errp, "%s initialization failed.", - object_get_typename(OBJECT(dev))); - return; - } - s->idx = apic_no++; + int instance_id = s->id; info = APIC_COMMON_GET_CLASS(s); info->realize(dev, errp); @@ -321,6 +316,24 @@ static void apic_common_realize(DeviceState *dev, Error **errp) info->enable_tpr_reporting(s, true); } + if (s->legacy_instance_id) { + instance_id = -1; + } + vmstate_register_with_alias_id(NULL, instance_id, &vmstate_apic_common, + s, -1, 0); +} + +static void apic_common_unrealize(DeviceState *dev, Error **errp) +{ + APICCommonState *s = APIC_COMMON(dev); + APICCommonClass *info = APIC_COMMON_GET_CLASS(s); + + vmstate_unregister(NULL, &vmstate_apic_common, s); + info->unrealize(dev, errp); + + if (apic_report_tpr_access && info->enable_tpr_reporting) { + info->enable_tpr_reporting(s, false); + } } static int apic_pre_load(void *opaque) @@ -418,6 +431,8 @@ static Property apic_properties_common[] = { DEFINE_PROP_UINT8("version", APICCommonState, version, 0x14), DEFINE_PROP_BIT("vapic", APICCommonState, vapic_control, VAPIC_ENABLE_BIT, true), + DEFINE_PROP_BOOL("legacy-instance-id", APICCommonState, legacy_instance_id, + false), DEFINE_PROP_END_OF_LIST(), }; @@ -425,10 +440,10 @@ static void apic_common_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); - dc->vmsd = &vmstate_apic_common; dc->reset = apic_reset_common; dc->props = apic_properties_common; dc->realize = apic_common_realize; + dc->unrealize = apic_common_unrealize; /* * Reason: APIC and CPU need to be wired up by * x86_cpu_apic_create() diff --git a/include/hw/i386/apic_internal.h b/include/hw/i386/apic_internal.h index 73ce71674f..06c4e9f6f9 100644 --- a/include/hw/i386/apic_internal.h +++ b/include/hw/i386/apic_internal.h @@ -121,8 +121,6 @@ #define VAPIC_ENABLE_BIT 0 #define VAPIC_ENABLE_MASK (1 << VAPIC_ENABLE_BIT) -#define MAX_APICS 255 - typedef struct APICCommonState APICCommonState; #define TYPE_APIC_COMMON "apic-common" @@ -138,6 +136,7 @@ typedef struct APICCommonClass DeviceClass parent_class; DeviceRealize realize; + DeviceUnrealize unrealize; void (*set_base)(APICCommonState *s, uint64_t val); void (*set_tpr)(APICCommonState *s, uint8_t val); uint8_t (*get_tpr)(APICCommonState *s); @@ -176,7 +175,6 @@ struct APICCommonState { uint32_t initial_count; int64_t initial_count_load_time; int64_t next_time; - int idx; QEMUTimer *timer; int64_t timer_expiry; int sipi_vector; @@ -185,6 +183,7 @@ struct APICCommonState { uint32_t vapic_control; DeviceState *vapic; hwaddr vapic_paddr; /* note: persistence via kvmvapic */ + bool legacy_instance_id; }; typedef struct VAPICState { diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index e38c95a4da..bc937b989e 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -377,6 +377,16 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); .driver = "vmxnet3",\ .property = "romfile",\ .value = "",\ + },\ + {\ + .driver = TYPE_X86_CPU,\ + .property = "fill-mtrr-mask",\ + .value = "off",\ + },\ + {\ + .driver = "apic",\ + .property = "legacy-instance-id",\ + .value = "on",\ }, #define PC_COMPAT_2_5 \ diff --git a/include/hw/i386/topology.h b/include/hw/i386/topology.h index fc95572394..1ebaee0f76 100644 --- a/include/hw/i386/topology.h +++ b/include/hw/i386/topology.h @@ -117,6 +117,21 @@ static inline void x86_topo_ids_from_idx(unsigned nr_cores, topo->pkg_id = core_index / nr_cores; } +/* Calculate thread/core/package IDs for a specific topology, + * based on APIC ID + */ +static inline void x86_topo_ids_from_apicid(apic_id_t apicid, + unsigned nr_cores, + unsigned nr_threads, + X86CPUTopoInfo *topo) +{ + topo->smt_id = apicid & + ~(0xFFFFFFFFUL << apicid_smt_width(nr_cores, nr_threads)); + topo->core_id = (apicid >> apicid_core_offset(nr_cores, nr_threads)) & + ~(0xFFFFFFFFUL << apicid_core_width(nr_cores, nr_threads)); + topo->pkg_id = apicid >> apicid_pkg_offset(nr_cores, nr_threads); +} + /* Make APIC ID for the CPU 'cpu_index' * * 'cpu_index' is a sequential, contiguous ID for the CPU. diff --git a/qmp-commands.hx b/qmp-commands.hx index 496d73c275..c8d360ad36 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -5026,3 +5026,18 @@ Example for pseries machine type started with { "props": { "core-id": 0 }, "type": "POWER8-spapr-cpu-core", "vcpus-count": 1, "qom-path": "/machine/unattached/device[0]"} ]}' + +Example for pc machine type started with +-smp 1,maxcpus=2: + -> { "execute": "query-hotpluggable-cpus" } + <- {"return": [ + { + "type": "qemu64-x86_64-cpu", "vcpus-count": 1, + "props": {"core-id": 0, "socket-id": 1, "thread-id": 0} + }, + { + "qom-path": "/machine/unattached/device[0]", + "type": "qemu64-x86_64-cpu", "vcpus-count": 1, + "props": {"core-id": 0, "socket-id": 0, "thread-id": 0} + } + ]} diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 6e49e4ca82..6a1afab595 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -305,12 +305,12 @@ static const char *cpuid_7_0_ebx_feature_name[] = { }; static const char *cpuid_7_0_ecx_feature_name[] = { - NULL, NULL, NULL, "pku", + NULL, NULL, "umip", "pku", "ospke", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, + NULL, NULL, "rdpid", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; @@ -1893,50 +1893,6 @@ static void x86_cpuid_set_tsc_freq(Object *obj, Visitor *v, const char *name, cpu->env.tsc_khz = cpu->env.user_tsc_khz = value / 1000; } -static void x86_cpuid_get_apic_id(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - X86CPU *cpu = X86_CPU(obj); - int64_t value = cpu->apic_id; - - visit_type_int(v, name, &value, errp); -} - -static void x86_cpuid_set_apic_id(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - X86CPU *cpu = X86_CPU(obj); - DeviceState *dev = DEVICE(obj); - const int64_t min = 0; - const int64_t max = UINT32_MAX; - Error *error = NULL; - int64_t value; - - if (dev->realized) { - error_setg(errp, "Attempt to set property '%s' on '%s' after " - "it was realized", name, object_get_typename(obj)); - return; - } - - visit_type_int(v, name, &value, &error); - if (error) { - error_propagate(errp, error); - return; - } - if (value < min || value > max) { - error_setg(errp, "Property %s.%s doesn't take value %" PRId64 - " (minimum: %" PRId64 ", maximum: %" PRId64 ")" , - object_get_typename(obj), name, value, min, max); - return; - } - - if ((value != cpu->apic_id) && cpu_exists(value)) { - error_setg(errp, "CPU with APIC ID %" PRIi64 " exists", value); - return; - } - cpu->apic_id = value; -} - /* Generic getter for "feature-words" and "filtered-features" properties */ static void x86_cpu_get_feature_words(Object *obj, Visitor *v, const char *name, void *opaque, @@ -2641,17 +2597,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, break; case 0x80000008: /* virtual & phys address size in low 2 bytes. */ -/* XXX: This value must match the one used in the MMU code. */ if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) { - /* 64 bit processor */ -/* XXX: The physical address space is limited to 42 bits in exec.c. */ - *eax = 0x00003028; /* 48 bits virtual, 40 bits physical */ + /* 64 bit processor, 48 bits virtual, configurable + * physical bits. + */ + *eax = 0x00003000 + cpu->phys_bits; } else { - if (env->features[FEAT_1_EDX] & CPUID_PSE36) { - *eax = 0x00000024; /* 36 bits physical */ - } else { - *eax = 0x00000020; /* 32 bits physical */ - } + *eax = cpu->phys_bits; } *ebx = 0; *ecx = 0; @@ -2874,8 +2826,10 @@ static void x86_cpu_apic_create(X86CPU *cpu, Error **errp) cpu->apic_state = DEVICE(object_new(apic_type)); - object_property_add_child(OBJECT(cpu), "apic", - OBJECT(cpu->apic_state), NULL); + object_property_add_child(OBJECT(cpu), "lapic", + OBJECT(cpu->apic_state), &error_abort); + object_unref(OBJECT(cpu->apic_state)); + qdev_prop_set_uint8(cpu->apic_state, "id", cpu->apic_id); /* TODO: convert to link<> */ apic = APIC_COMMON(cpu->apic_state); @@ -2926,6 +2880,31 @@ static void x86_cpu_apic_realize(X86CPU *cpu, Error **errp) } #endif +/* Note: Only safe for use on x86(-64) hosts */ +static uint32_t x86_host_phys_bits(void) +{ + uint32_t eax; + uint32_t host_phys_bits; + + host_cpuid(0x80000000, 0, &eax, NULL, NULL, NULL); + if (eax >= 0x80000008) { + host_cpuid(0x80000008, 0, &eax, NULL, NULL, NULL); + /* Note: According to AMD doc 25481 rev 2.34 they have a field + * at 23:16 that can specify a maximum physical address bits for + * the guest that can override this value; but I've not seen + * anything with that set. + */ + host_phys_bits = eax & 0xff; + } else { + /* It's an odd 64 bit machine that doesn't have the leaf for + * physical address bits; fall back to 36 that's most older + * Intel. + */ + host_phys_bits = 36; + } + + return host_phys_bits; +} #define IS_INTEL_CPU(env) ((env)->cpuid_vendor1 == CPUID_VENDOR_INTEL_1 && \ (env)->cpuid_vendor2 == CPUID_VENDOR_INTEL_2 && \ @@ -2950,7 +2929,7 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) goto out; } - if (cpu->apic_id < 0) { + if (cpu->apic_id == UNASSIGNED_APIC_ID) { error_setg(errp, "apic-id property was not initialized properly"); return; } @@ -2993,7 +2972,70 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) & CPUID_EXT2_AMD_ALIASES); } + /* For 64bit systems think about the number of physical bits to present. + * ideally this should be the same as the host; anything other than matching + * the host can cause incorrect guest behaviour. + * QEMU used to pick the magic value of 40 bits that corresponds to + * consumer AMD devices but nothing else. + */ + if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) { + if (kvm_enabled()) { + uint32_t host_phys_bits = x86_host_phys_bits(); + static bool warned; + if (cpu->host_phys_bits) { + /* The user asked for us to use the host physical bits */ + cpu->phys_bits = host_phys_bits; + } + + /* Print a warning if the user set it to a value that's not the + * host value. + */ + if (cpu->phys_bits != host_phys_bits && cpu->phys_bits != 0 && + !warned) { + error_report("Warning: Host physical bits (%u)" + " does not match phys-bits property (%u)", + host_phys_bits, cpu->phys_bits); + warned = true; + } + + if (cpu->phys_bits && + (cpu->phys_bits > TARGET_PHYS_ADDR_SPACE_BITS || + cpu->phys_bits < 32)) { + error_setg(errp, "phys-bits should be between 32 and %u " + " (but is %u)", + TARGET_PHYS_ADDR_SPACE_BITS, cpu->phys_bits); + return; + } + } else { + if (cpu->phys_bits && cpu->phys_bits != TCG_PHYS_ADDR_BITS) { + error_setg(errp, "TCG only supports phys-bits=%u", + TCG_PHYS_ADDR_BITS); + return; + } + } + /* 0 means it was not explicitly set by the user (or by machine + * compat_props or by the host code above). In this case, the default + * is the value used by TCG (40). + */ + if (cpu->phys_bits == 0) { + cpu->phys_bits = TCG_PHYS_ADDR_BITS; + } + } else { + /* For 32 bit systems don't use the user set value, but keep + * phys_bits consistent with what we tell the guest. + */ + if (cpu->phys_bits != 0) { + error_setg(errp, "phys-bits is not user-configurable in 32 bit"); + return; + } + + if (env->features[FEAT_1_EDX] & CPUID_PSE36) { + cpu->phys_bits = 36; + } else { + cpu->phys_bits = 32; + } + } cpu_exec_init(cs, &error_abort); if (tcg_enabled()) { @@ -3072,6 +3114,21 @@ out: } } +static void x86_cpu_unrealizefn(DeviceState *dev, Error **errp) +{ + X86CPU *cpu = X86_CPU(dev); + +#ifndef CONFIG_USER_ONLY + cpu_remove_sync(CPU(dev)); + qemu_unregister_reset(x86_cpu_machine_reset_cb, dev); +#endif + + if (cpu->apic_state) { + object_unparent(OBJECT(cpu->apic_state)); + cpu->apic_state = NULL; + } +} + typedef struct BitProperty { uint32_t *ptr; uint32_t mask; @@ -3207,9 +3264,6 @@ static void x86_cpu_initfn(Object *obj) object_property_add(obj, "tsc-frequency", "int", x86_cpuid_get_tsc_freq, x86_cpuid_set_tsc_freq, NULL, NULL, NULL); - object_property_add(obj, "apic-id", "int", - x86_cpuid_get_apic_id, - x86_cpuid_set_apic_id, NULL, NULL, NULL); object_property_add(obj, "feature-words", "X86CPUFeatureWordInfo", x86_cpu_get_feature_words, NULL, NULL, (void *)env->features, NULL); @@ -3219,11 +3273,6 @@ static void x86_cpu_initfn(Object *obj) cpu->hyperv_spinlock_attempts = HYPERV_SPINLOCK_NEVER_RETRY; -#ifndef CONFIG_USER_ONLY - /* Any code creating new X86CPU objects have to set apic-id explicitly */ - cpu->apic_id = -1; -#endif - for (w = 0; w < FEATURE_WORDS; w++) { int bitnr; @@ -3280,6 +3329,18 @@ static bool x86_cpu_has_work(CPUState *cs) } static Property x86_cpu_properties[] = { +#ifdef CONFIG_USER_ONLY + /* apic_id = 0 by default for *-user, see commit 9886e834 */ + DEFINE_PROP_UINT32("apic-id", X86CPU, apic_id, 0), + DEFINE_PROP_INT32("thread-id", X86CPU, thread_id, 0), + DEFINE_PROP_INT32("core-id", X86CPU, core_id, 0), + DEFINE_PROP_INT32("socket-id", X86CPU, socket_id, 0), +#else + DEFINE_PROP_UINT32("apic-id", X86CPU, apic_id, UNASSIGNED_APIC_ID), + DEFINE_PROP_INT32("thread-id", X86CPU, thread_id, -1), + DEFINE_PROP_INT32("core-id", X86CPU, core_id, -1), + DEFINE_PROP_INT32("socket-id", X86CPU, socket_id, -1), +#endif DEFINE_PROP_BOOL("pmu", X86CPU, enable_pmu, false), { .name = "hv-spinlocks", .info = &qdev_prop_spinlocks }, DEFINE_PROP_BOOL("hv-relaxed", X86CPU, hyperv_relaxed_timing, false), @@ -3294,6 +3355,9 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true), DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false), DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), + DEFINE_PROP_UINT32("phys-bits", X86CPU, phys_bits, 0), + DEFINE_PROP_BOOL("host-phys-bits", X86CPU, host_phys_bits, false), + DEFINE_PROP_BOOL("fill-mtrr-mask", X86CPU, fill_mtrr_mask, true), DEFINE_PROP_UINT32("level", X86CPU, env.cpuid_level, 0), DEFINE_PROP_UINT32("xlevel", X86CPU, env.cpuid_xlevel, 0), DEFINE_PROP_UINT32("xlevel2", X86CPU, env.cpuid_xlevel2, 0), @@ -3311,6 +3375,7 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data) xcc->parent_realize = dc->realize; dc->realize = x86_cpu_realizefn; + dc->unrealize = x86_cpu_unrealizefn; dc->props = x86_cpu_properties; xcc->parent_reset = cc->reset; @@ -3347,6 +3412,7 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data) cc->cpu_exec_enter = x86_cpu_exec_enter; cc->cpu_exec_exit = x86_cpu_exec_exit; + dc->cannot_instantiate_with_device_add_yet = false; /* * Reason: x86_cpu_initfn() calls cpu_exec_init(), which saves the * object in cpus -> dangling pointer after final object_unref(). diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 5b14a72baa..65615c0f3b 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -616,8 +616,10 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_7_0_EBX_AVX512ER (1U << 27) /* AVX-512 Exponential and Reciprocal */ #define CPUID_7_0_EBX_AVX512CD (1U << 28) /* AVX-512 Conflict Detection */ +#define CPUID_7_0_ECX_UMIP (1U << 2) #define CPUID_7_0_ECX_PKU (1U << 3) #define CPUID_7_0_ECX_OSPKE (1U << 4) +#define CPUID_7_0_ECX_RDPID (1U << 22) #define CPUID_XSAVE_XSAVEOPT (1U << 0) #define CPUID_XSAVE_XSAVEC (1U << 1) @@ -845,6 +847,11 @@ typedef struct { #define NB_OPMASK_REGS 8 +/* CPU can't have 0xFFFFFFFF APIC ID, use that value to distinguish + * that APIC ID hasn't been set yet + */ +#define UNASSIGNED_APIC_ID 0xFFFFFFFF + typedef union X86LegacyXSaveArea { struct { uint16_t fcw; @@ -1174,7 +1181,7 @@ struct X86CPU { bool expose_kvm; bool migratable; bool host_features; - int64_t apic_id; + uint32_t apic_id; /* if true the CPUID code directly forward host cache leaves to the guest */ bool cache_info_passthrough; @@ -1198,6 +1205,15 @@ struct X86CPU { /* Compatibility bits for old machine types: */ bool enable_cpuid_0xb; + /* if true fill the top bits of the MTRR_PHYSMASKn variable range */ + bool fill_mtrr_mask; + + /* if true override the phys_bits value with a value read from the host */ + bool host_phys_bits; + + /* Number of physical address bits supported */ + uint32_t phys_bits; + /* in order to simplify APIC support, we leave this pointer to the user */ struct DeviceState *apic_state; @@ -1205,6 +1221,10 @@ struct X86CPU { Notifier machine_done; struct kvm_msrs *kvm_msr_buf; + + int32_t socket_id; + int32_t core_id; + int32_t thread_id; }; static inline X86CPU *x86_env_get_cpu(CPUX86State *env) @@ -1419,11 +1439,13 @@ uint64_t cpu_get_tsc(CPUX86State *env); /* XXX: This value should match the one returned by CPUID * and in exec.c */ # if defined(TARGET_X86_64) -# define PHYS_ADDR_MASK 0xffffffffffLL +# define TCG_PHYS_ADDR_BITS 40 # else -# define PHYS_ADDR_MASK 0xfffffffffLL +# define TCG_PHYS_ADDR_BITS 36 # endif +#define PHYS_ADDR_MASK MAKE_64BIT_MASK(0, TCG_PHYS_ADDR_BITS) + #define cpu_init(cpu_model) CPU(cpu_x86_init(cpu_model)) #define cpu_signal_handler cpu_x86_signal_handler diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 93275231ec..df28dd254a 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1716,6 +1716,8 @@ static int kvm_put_msrs(X86CPU *cpu, int level) } } if (has_msr_mtrr) { + uint64_t phys_mask = MAKE_64BIT_MASK(0, cpu->phys_bits); + kvm_msr_entry_add(cpu, MSR_MTRRdefType, env->mtrr_deftype); kvm_msr_entry_add(cpu, MSR_MTRRfix64K_00000, env->mtrr_fixed[0]); kvm_msr_entry_add(cpu, MSR_MTRRfix16K_80000, env->mtrr_fixed[1]); @@ -1729,10 +1731,15 @@ static int kvm_put_msrs(X86CPU *cpu, int level) kvm_msr_entry_add(cpu, MSR_MTRRfix4K_F0000, env->mtrr_fixed[9]); kvm_msr_entry_add(cpu, MSR_MTRRfix4K_F8000, env->mtrr_fixed[10]); for (i = 0; i < MSR_MTRRcap_VCNT; i++) { + /* The CPU GPs if we write to a bit above the physical limit of + * the host CPU (and KVM emulates that) + */ + uint64_t mask = env->mtrr_var[i].mask; + mask &= phys_mask; + kvm_msr_entry_add(cpu, MSR_MTRRphysBase(i), env->mtrr_var[i].base); - kvm_msr_entry_add(cpu, MSR_MTRRphysMask(i), - env->mtrr_var[i].mask); + kvm_msr_entry_add(cpu, MSR_MTRRphysMask(i), mask); } } @@ -1970,6 +1977,7 @@ static int kvm_get_msrs(X86CPU *cpu) CPUX86State *env = &cpu->env; struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; int ret, i; + uint64_t mtrr_top_bits; kvm_msr_buf_reset(cpu); @@ -2122,6 +2130,30 @@ static int kvm_get_msrs(X86CPU *cpu) } assert(ret == cpu->kvm_msr_buf->nmsrs); + /* + * MTRR masks: Each mask consists of 5 parts + * a 10..0: must be zero + * b 11 : valid bit + * c n-1.12: actual mask bits + * d 51..n: reserved must be zero + * e 63.52: reserved must be zero + * + * 'n' is the number of physical bits supported by the CPU and is + * apparently always <= 52. We know our 'n' but don't know what + * the destinations 'n' is; it might be smaller, in which case + * it masks (c) on loading. It might be larger, in which case + * we fill 'd' so that d..c is consistent irrespetive of the 'n' + * we're migrating to. + */ + + if (cpu->fill_mtrr_mask) { + QEMU_BUILD_BUG_ON(TARGET_PHYS_ADDR_SPACE_BITS > 52); + assert(cpu->phys_bits <= TARGET_PHYS_ADDR_SPACE_BITS); + mtrr_top_bits = MAKE_64BIT_MASK(cpu->phys_bits, 52 - cpu->phys_bits); + } else { + mtrr_top_bits = 0; + } + for (i = 0; i < ret; i++) { uint32_t index = msrs[i].index; switch (index) { @@ -2320,7 +2352,8 @@ static int kvm_get_msrs(X86CPU *cpu) break; case MSR_MTRRphysBase(0) ... MSR_MTRRphysMask(MSR_MTRRcap_VCNT - 1): if (index & 1) { - env->mtrr_var[MSR_MTRRphysIndex(index)].mask = msrs[i].data; + env->mtrr_var[MSR_MTRRphysIndex(index)].mask = msrs[i].data | + mtrr_top_bits; } else { env->mtrr_var[MSR_MTRRphysIndex(index)].base = msrs[i].data; }