From d38d201f0ed092b8c7f7738a5db8428e12cb04b6 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 15 Jul 2019 09:28:44 +0800 Subject: [PATCH 01/29] i386/kvm: support guest access CORE cstate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow guest reads CORE cstate when exposing host CPU power management capabilities to the guest. PKG cstate is restricted to avoid a guest to get the whole package information in multi-tenant scenario. Cc: Eduardo Habkost Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Message-Id: <1563154124-18579-1-git-send-email-wanpengli@tencent.com> Signed-off-by: Paolo Bonzini --- linux-headers/linux/kvm.h | 4 +++- target/i386/kvm.c | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index c8423e760c..18892d6541 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -696,9 +696,11 @@ struct kvm_ioeventfd { #define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) #define KVM_X86_DISABLE_EXITS_HLT (1 << 1) #define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) +#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3) #define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ KVM_X86_DISABLE_EXITS_HLT | \ - KVM_X86_DISABLE_EXITS_PAUSE) + KVM_X86_DISABLE_EXITS_PAUSE | \ + KVM_X86_DISABLE_EXITS_CSTATE) /* for KVM_ENABLE_CAP */ struct kvm_enable_cap { diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 8023c679ea..3435fc4345 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -2076,7 +2076,8 @@ int kvm_arch_init(MachineState *ms, KVMState *s) if (disable_exits) { disable_exits &= (KVM_X86_DISABLE_EXITS_MWAIT | KVM_X86_DISABLE_EXITS_HLT | - KVM_X86_DISABLE_EXITS_PAUSE); + KVM_X86_DISABLE_EXITS_PAUSE | + KVM_X86_DISABLE_EXITS_CSTATE); } ret = kvm_vm_enable_cap(s, KVM_CAP_X86_DISABLE_EXITS, 0, From 56b15076805a29673c1a90ea9c3ebef25bfcc912 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 21 Mar 2019 16:25:50 +0800 Subject: [PATCH 02/29] exec.c: replace hwaddr with uint64_t for better understanding Function phys_page_set() and phys_page_set_level() 's argument *nb* stands for number of pages to set instead of hardware address. This would be more proper to use uint64_t instead of hwaddr for its type. Signed-off-by: Wei Yang Message-Id: <20190321082555.21118-2-richardw.yang@linux.intel.com> Signed-off-by: Paolo Bonzini --- exec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exec.c b/exec.c index 235d6bc883..8e0c400de8 100644 --- a/exec.c +++ b/exec.c @@ -255,7 +255,7 @@ static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf) } static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp, - hwaddr *index, hwaddr *nb, uint16_t leaf, + hwaddr *index, uint64_t *nb, uint16_t leaf, int level) { PhysPageEntry *p; @@ -281,7 +281,7 @@ static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp, } static void phys_page_set(AddressSpaceDispatch *d, - hwaddr index, hwaddr nb, + hwaddr index, uint64_t nb, uint16_t leaf) { /* Wildly overreserve - it doesn't matter much. */ From c95cfd040078db8017f74fd3a4d6f798385d960c Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 21 Mar 2019 16:25:52 +0800 Subject: [PATCH 03/29] exec.c: get nodes_nb_alloc with one MAX calculation The purpose of these two MAX here is to get the maximum of these three variables: A: map->nodes_nb + nodes B: map->nodes_nb_alloc C: alloc_hint We can write it like MAX(A, B, C). Since the if condition says A > B, this means MAX(A, B, C) = MAX(A, C). This patch just simplify the calculation a bit. Signed-off-by: Wei Yang Message-Id: <20190321082555.21118-4-richardw.yang@linux.intel.com> Signed-off-by: Paolo Bonzini --- exec.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/exec.c b/exec.c index 8e0c400de8..ff3cb3e25b 100644 --- a/exec.c +++ b/exec.c @@ -227,8 +227,7 @@ static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes) { static unsigned alloc_hint = 16; if (map->nodes_nb + nodes > map->nodes_nb_alloc) { - map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint); - map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes); + map->nodes_nb_alloc = MAX(alloc_hint, map->nodes_nb + nodes); map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc); alloc_hint = map->nodes_nb_alloc; } From b797ab1a15ba8d2b2fc4ec3e1f24d755f6855d05 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 21 Mar 2019 16:25:53 +0800 Subject: [PATCH 04/29] exec.c: subpage->sub_section is already initialized to 0 In subpage_init(), we will set subpage->sub_section to PHYS_SECTION_UNASSIGNED by subpage_register. Since PHYS_SECTION_UNASSIGNED is defined to be 0, and we allocate subpage with g_malloc0, this means subpage->sub_section is already initialized to 0. This patch removes the redundant setup for a new subpage and also fix the code style. Signed-off-by: Wei Yang Message-Id: <20190321082555.21118-5-richardw.yang@linux.intel.com> Signed-off-by: Paolo Bonzini --- exec.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/exec.c b/exec.c index ff3cb3e25b..d9827ef840 100644 --- a/exec.c +++ b/exec.c @@ -1491,8 +1491,8 @@ hwaddr memory_region_section_get_iotlb(CPUState *cpu, #if !defined(CONFIG_USER_ONLY) -static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end, - uint16_t section); +static int subpage_register(subpage_t *mmio, uint32_t start, uint32_t end, + uint16_t section); static subpage_t *subpage_init(FlatView *fv, hwaddr base); static void *(*phys_mem_alloc)(size_t size, uint64_t *align, bool shared) = @@ -2913,8 +2913,8 @@ static const MemoryRegionOps subpage_ops = { .endianness = DEVICE_NATIVE_ENDIAN, }; -static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end, - uint16_t section) +static int subpage_register(subpage_t *mmio, uint32_t start, uint32_t end, + uint16_t section) { int idx, eidx; @@ -2937,6 +2937,7 @@ static subpage_t *subpage_init(FlatView *fv, hwaddr base) { subpage_t *mmio; + /* mmio->sub_section is set to PHYS_SECTION_UNASSIGNED with g_malloc0 */ mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t)); mmio->fv = fv; mmio->base = base; @@ -2947,7 +2948,6 @@ static subpage_t *subpage_init(FlatView *fv, hwaddr base) printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__, mmio, base, TARGET_PAGE_SIZE); #endif - subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED); return mmio; } From 26ca2075babd7775e246b9eb7da75d6de77eb658 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 21 Mar 2019 16:25:54 +0800 Subject: [PATCH 05/29] exec.c: correct the maximum skip value during compact skip is defined with 6 bits. So the maximum value should be (1 << 6). Signed-off-by: Wei Yang Message-Id: <20190321082555.21118-6-richardw.yang@linux.intel.com> Signed-off-by: Paolo Bonzini --- exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exec.c b/exec.c index d9827ef840..d1969bb894 100644 --- a/exec.c +++ b/exec.c @@ -324,7 +324,7 @@ static void phys_page_compact(PhysPageEntry *lp, Node *nodes) assert(valid_ptr < P_L2_SIZE); /* Don't compress if it won't fit in the # of bits we have. */ - if (lp->skip + p[valid_ptr].skip >= (1 << 3)) { + if (lp->skip + p[valid_ptr].skip >= (1 << 6)) { return; } From 526ca2360ea1cd947f74c8c6c38b91b9d6fcfdb5 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 21 Mar 2019 16:25:55 +0800 Subject: [PATCH 06/29] exec.c: add a check between constants to see whether we could skip The maximum level is defined as P_L2_LEVELS and skip is defined with 6 bits, which means if P_L2_LEVELS < (1 << 6), skip never exceeds the boundary. Since this check is between two constants, which leverages compiler to optimize the code based on different configuration. Signed-off-by: Wei Yang Message-Id: <20190321082555.21118-7-richardw.yang@linux.intel.com> Signed-off-by: Paolo Bonzini --- exec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/exec.c b/exec.c index d1969bb894..b9511be096 100644 --- a/exec.c +++ b/exec.c @@ -324,7 +324,8 @@ static void phys_page_compact(PhysPageEntry *lp, Node *nodes) assert(valid_ptr < P_L2_SIZE); /* Don't compress if it won't fit in the # of bits we have. */ - if (lp->skip + p[valid_ptr].skip >= (1 << 6)) { + if (P_L2_LEVELS >= (1 << 6) && + lp->skip + p[valid_ptr].skip >= (1 << 6)) { return; } From f64f598ad96977d2e511be0355d4ae48ecb4f694 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 9 Sep 2019 15:08:32 +0200 Subject: [PATCH 07/29] win32: fix README file in NSIS installer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adjust after the rST conversion and consequent renaming. Fixes: 336a7451e8803c21a2da6e7d1eca8cfb8e8b219a Reviewed-by: Daniel P. Berrangé Reviewed-by: Philippe Mathieu-Daudé Tested-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini --- qemu.nsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qemu.nsi b/qemu.nsi index d0df0f4e3a..0c29ba359c 100644 --- a/qemu.nsi +++ b/qemu.nsi @@ -119,7 +119,7 @@ Section "${PRODUCT} (required)" File "${SRCDIR}\Changelog" File "${SRCDIR}\COPYING" File "${SRCDIR}\COPYING.LIB" - File "${SRCDIR}\README" + File "${SRCDIR}\README.rst" File "${SRCDIR}\VERSION" File "${BINDIR}\*.bmp" @@ -211,7 +211,7 @@ Section "Uninstall" Delete "$INSTDIR\Changelog" Delete "$INSTDIR\COPYING" Delete "$INSTDIR\COPYING.LIB" - Delete "$INSTDIR\README" + Delete "$INSTDIR\README.rst" Delete "$INSTDIR\VERSION" Delete "$INSTDIR\*.bmp" Delete "$INSTDIR\*.bin" From 0c9956f8213ad1074474f2f938574043e4ee5dfd Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 9 Sep 2019 15:06:42 +0200 Subject: [PATCH 08/29] test-char: fix AddressSanitizer failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CharSocketServerTestConfig and CharSocketClientTestConfig objects escape after they are passed to g_test_add_data_func, but they cease existing after the scope that defines them is closed. Make them static to fix this issue. Fixes: e7b6ba4186f243f149b0d8cddc129fe681ba3912 Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini --- tests/test-char.c | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/tests/test-char.c b/tests/test-char.c index f3ebdffd87..d62de1b088 100644 --- a/tests/test-char.c +++ b/tests/test-char.c @@ -1355,6 +1355,18 @@ static void char_hotswap_test(void) g_free(chr_args); } +static SocketAddress tcpaddr = { + .type = SOCKET_ADDRESS_TYPE_INET, + .u.inet.host = (char *)"127.0.0.1", + .u.inet.port = (char *)"0", +}; +#ifndef WIN32 +static SocketAddress unixaddr = { + .type = SOCKET_ADDRESS_TYPE_UNIX, + .u.q_unix.path = (char *)"test-char.sock", +}; +#endif + int main(int argc, char **argv) { bool has_ipv4, has_ipv6; @@ -1390,26 +1402,14 @@ int main(int argc, char **argv) g_test_add_func("/char/file-fifo", char_file_fifo_test); #endif - SocketAddress tcpaddr = { - .type = SOCKET_ADDRESS_TYPE_INET, - .u.inet.host = (char *)"127.0.0.1", - .u.inet.port = (char *)"0", - }; -#ifndef WIN32 - SocketAddress unixaddr = { - .type = SOCKET_ADDRESS_TYPE_UNIX, - .u.q_unix.path = (char *)"test-char.sock", - }; -#endif - #define SOCKET_SERVER_TEST(name, addr) \ - CharSocketServerTestConfig server1 ## name = \ + static CharSocketServerTestConfig server1 ## name = \ { addr, false, false }; \ - CharSocketServerTestConfig server2 ## name = \ + static CharSocketServerTestConfig server2 ## name = \ { addr, true, false }; \ - CharSocketServerTestConfig server3 ## name = \ + static CharSocketServerTestConfig server3 ## name = \ { addr, false, true }; \ - CharSocketServerTestConfig server4 ## name = \ + static CharSocketServerTestConfig server4 ## name = \ { addr, true, true }; \ g_test_add_data_func("/char/socket/server/mainloop/" # name, \ &server1 ##name, char_socket_server_test); \ @@ -1421,17 +1421,17 @@ int main(int argc, char **argv) &server4 ##name, char_socket_server_test) #define SOCKET_CLIENT_TEST(name, addr) \ - CharSocketClientTestConfig client1 ## name = \ + static CharSocketClientTestConfig client1 ## name = \ { addr, NULL, false, false }; \ - CharSocketClientTestConfig client2 ## name = \ + static CharSocketClientTestConfig client2 ## name = \ { addr, NULL, true, false }; \ - CharSocketClientTestConfig client3 ## name = \ + static CharSocketClientTestConfig client3 ## name = \ { addr, ",reconnect=1", false }; \ - CharSocketClientTestConfig client4 ## name = \ + static CharSocketClientTestConfig client4 ## name = \ { addr, ",reconnect=1", true }; \ - CharSocketClientTestConfig client5 ## name = \ + static CharSocketClientTestConfig client5 ## name = \ { addr, NULL, false, true }; \ - CharSocketClientTestConfig client6 ## name = \ + static CharSocketClientTestConfig client6 ## name = \ { addr, NULL, true, true }; \ g_test_add_data_func("/char/socket/client/mainloop/" # name, \ &client1 ##name, char_socket_client_test); \ From 709ebb905491258676776c78ca38a61563e59aac Mon Sep 17 00:00:00 2001 From: Cole Robinson Date: Mon, 9 Sep 2019 13:34:10 -0400 Subject: [PATCH 09/29] hw/i386: Move CONFIG_ACPI_PCI to CONFIG_PC CONFIG_ACPI_PCI is a hard requirement of acpi-build.c, which is built unconditionally for x86 target. Putting it in default-configs/ suggests that it can be easily disabled, which isn't true. Relocate the symbol with the other acpi-build.c requirements, under 'config PC'. This is similar to what is done for the arm 'virt' machine type and CONFIG_ACPI_PCI Signed-off-by: Cole Robinson Message-Id: Signed-off-by: Paolo Bonzini --- default-configs/i386-softmmu.mak | 1 - hw/i386/Kconfig | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak index cd5ea391e8..ba3fb3ff50 100644 --- a/default-configs/i386-softmmu.mak +++ b/default-configs/i386-softmmu.mak @@ -25,4 +25,3 @@ CONFIG_ISAPC=y CONFIG_I440FX=y CONFIG_Q35=y -CONFIG_ACPI_PCI=y diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig index 6350438036..c7a9d6315c 100644 --- a/hw/i386/Kconfig +++ b/hw/i386/Kconfig @@ -29,6 +29,7 @@ config PC select MC146818RTC # For ACPI builder: select SERIAL_ISA + select ACPI_PCI select ACPI_VMGENID select VIRTIO_PMEM_SUPPORTED From 41a2635124117d1fac7e45b3cafa2c1ac68417fd Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 10 Sep 2019 16:22:23 +0200 Subject: [PATCH 10/29] elf-ops.h: fix int overflow in load_elf() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes a possible integer overflow when we calculate the total size of ELF segments loaded. Reported-by: Coverity (CID 1405299) Reviewed-by: Alex Bennée Signed-off-by: Stefano Garzarella Message-Id: <20190910124828.39794-1-sgarzare@redhat.com> Signed-off-by: Paolo Bonzini --- hw/core/loader.c | 2 ++ include/hw/elf_ops.h | 5 +++++ include/hw/loader.h | 1 + 3 files changed, 8 insertions(+) diff --git a/hw/core/loader.c b/hw/core/loader.c index 32f7cc7c33..75eb56ddbb 100644 --- a/hw/core/loader.c +++ b/hw/core/loader.c @@ -338,6 +338,8 @@ const char *load_elf_strerror(int error) return "The image is from incompatible architecture"; case ELF_LOAD_WRONG_ENDIAN: return "The image has incorrect endianness"; + case ELF_LOAD_TOO_BIG: + return "The image segments are too big to load"; default: return "Unknown error"; } diff --git a/include/hw/elf_ops.h b/include/hw/elf_ops.h index 1496d7e753..e07d276df7 100644 --- a/include/hw/elf_ops.h +++ b/include/hw/elf_ops.h @@ -485,6 +485,11 @@ static int glue(load_elf, SZ)(const char *name, int fd, } } + if (mem_size > INT_MAX - total_size) { + ret = ELF_LOAD_TOO_BIG; + goto fail; + } + /* address_offset is hack for kernel images that are linked at the wrong physical address. */ if (translate_fn) { diff --git a/include/hw/loader.h b/include/hw/loader.h index 07fd9286e7..48a96cd559 100644 --- a/include/hw/loader.h +++ b/include/hw/loader.h @@ -89,6 +89,7 @@ int load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz); #define ELF_LOAD_NOT_ELF -2 #define ELF_LOAD_WRONG_ARCH -3 #define ELF_LOAD_WRONG_ENDIAN -4 +#define ELF_LOAD_TOO_BIG -5 const char *load_elf_strerror(int error); /** load_elf_ram_sym: From 72d41eb4b8f923de91e8f06dc20aa86b0a9155fb Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Fri, 30 Aug 2019 10:30:56 +0100 Subject: [PATCH 11/29] memory: fetch pmem size in get_file_size() Neither stat(2) nor lseek(2) report the size of Linux devdax pmem character device nodes. Commit 314aec4a6e06844937f1677f6cba21981005f389 ("hostmem-file: reject invalid pmem file sizes") added code to hostmem-file.c to fetch the size from sysfs and compare against the user-provided size=NUM parameter: if (backend->size > size) { error_setg(errp, "size property %" PRIu64 " is larger than " "pmem file \"%s\" size %" PRIu64, backend->size, fb->mem_path, size); return; } It turns out that exec.c:qemu_ram_alloc_from_fd() already has an equivalent size check but it skips devdax pmem character devices because lseek(2) returns 0: if (file_size > 0 && file_size < size) { error_setg(errp, "backing store %s size 0x%" PRIx64 " does not match 'size' option 0x" RAM_ADDR_FMT, mem_path, file_size, size); return NULL; } This patch moves the devdax pmem file size code into get_file_size() so that we check the memory size in a single place: qemu_ram_alloc_from_fd(). This simplifies the code and makes it more general. This also fixes the problem that hostmem-file only checks the devdax pmem file size when the pmem=on parameter is given. An unchecked size=NUM parameter can lead to SIGBUS in QEMU so we must always fetch the file size for Linux devdax pmem character device nodes. Signed-off-by: Stefan Hajnoczi Message-Id: <20190830093056.12572-1-stefanha@redhat.com> Reviewed-by: Eduardo Habkost Signed-off-by: Paolo Bonzini --- backends/hostmem-file.c | 22 ----------------- exec.c | 34 +++++++++++++++++++++++++- include/qemu/osdep.h | 13 ---------- util/oslib-posix.c | 54 ----------------------------------------- util/oslib-win32.c | 6 ----- 5 files changed, 33 insertions(+), 96 deletions(-) diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c index ecc15e3eb0..be64020746 100644 --- a/backends/hostmem-file.c +++ b/backends/hostmem-file.c @@ -58,28 +58,6 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) return; } - /* - * Verify pmem file size since starting a guest with an incorrect size - * leads to confusing failures inside the guest. - */ - if (fb->is_pmem) { - Error *local_err = NULL; - uint64_t size; - - size = qemu_get_pmem_size(fb->mem_path, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - - if (size && backend->size > size) { - error_setg(errp, "size property %" PRIu64 " is larger than " - "pmem file \"%s\" size %" PRIu64, backend->size, - fb->mem_path, size); - return; - } - } - backend->force_prealloc = mem_prealloc; name = host_memory_backend_get_name(backend); memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), diff --git a/exec.c b/exec.c index b9511be096..8b998974f8 100644 --- a/exec.c +++ b/exec.c @@ -1791,7 +1791,39 @@ long qemu_maxrampagesize(void) #ifdef CONFIG_POSIX static int64_t get_file_size(int fd) { - int64_t size = lseek(fd, 0, SEEK_END); + int64_t size; +#if defined(__linux__) + struct stat st; + + if (fstat(fd, &st) < 0) { + return -errno; + } + + /* Special handling for devdax character devices */ + if (S_ISCHR(st.st_mode)) { + g_autofree char *subsystem_path = NULL; + g_autofree char *subsystem = NULL; + + subsystem_path = g_strdup_printf("/sys/dev/char/%d:%d/subsystem", + major(st.st_rdev), minor(st.st_rdev)); + subsystem = g_file_read_link(subsystem_path, NULL); + + if (subsystem && g_str_has_suffix(subsystem, "/dax")) { + g_autofree char *size_path = NULL; + g_autofree char *size_str = NULL; + + size_path = g_strdup_printf("/sys/dev/char/%d:%d/size", + major(st.st_rdev), minor(st.st_rdev)); + + if (g_file_get_contents(size_path, &size_str, NULL, NULL)) { + return g_ascii_strtoll(size_str, NULL, 0); + } + } + } +#endif /* defined(__linux__) */ + + /* st.st_size may be zero for special files yet lseek(2) works */ + size = lseek(fd, 0, SEEK_END); if (size < 0) { return -errno; } diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index af2b91f0b8..c7d242f476 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -570,19 +570,6 @@ void qemu_set_tty_echo(int fd, bool echo); void os_mem_prealloc(int fd, char *area, size_t sz, int smp_cpus, Error **errp); -/** - * qemu_get_pmem_size: - * @filename: path to a pmem file - * @errp: pointer to a NULL-initialized error object - * - * Determine the size of a persistent memory file. Besides supporting files on - * DAX file systems, this function also supports Linux devdax character - * devices. - * - * Returns: the size or 0 on failure - */ -uint64_t qemu_get_pmem_size(const char *filename, Error **errp); - /** * qemu_get_pid_name: * @pid: pid of a process diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 5fda67dedf..f8693384fc 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -514,60 +514,6 @@ void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, } } -uint64_t qemu_get_pmem_size(const char *filename, Error **errp) -{ - struct stat st; - - if (stat(filename, &st) < 0) { - error_setg(errp, "unable to stat pmem file \"%s\"", filename); - return 0; - } - -#if defined(__linux__) - /* Special handling for devdax character devices */ - if (S_ISCHR(st.st_mode)) { - char *subsystem_path = NULL; - char *subsystem = NULL; - char *size_path = NULL; - char *size_str = NULL; - uint64_t ret = 0; - - subsystem_path = g_strdup_printf("/sys/dev/char/%d:%d/subsystem", - major(st.st_rdev), minor(st.st_rdev)); - subsystem = g_file_read_link(subsystem_path, NULL); - if (!subsystem) { - error_setg(errp, "unable to read subsystem for pmem file \"%s\"", - filename); - goto devdax_err; - } - - if (!g_str_has_suffix(subsystem, "/dax")) { - error_setg(errp, "pmem file \"%s\" is not a dax device", filename); - goto devdax_err; - } - - size_path = g_strdup_printf("/sys/dev/char/%d:%d/size", - major(st.st_rdev), minor(st.st_rdev)); - if (!g_file_get_contents(size_path, &size_str, NULL, NULL)) { - error_setg(errp, "unable to read size for pmem file \"%s\"", - size_path); - goto devdax_err; - } - - ret = g_ascii_strtoull(size_str, NULL, 0); - -devdax_err: - g_free(size_str); - g_free(size_path); - g_free(subsystem); - g_free(subsystem_path); - return ret; - } -#endif /* defined(__linux__) */ - - return st.st_size; -} - char *qemu_get_pid_name(pid_t pid) { char *name = NULL; diff --git a/util/oslib-win32.c b/util/oslib-win32.c index 9583fb4ca4..c62cd4328c 100644 --- a/util/oslib-win32.c +++ b/util/oslib-win32.c @@ -562,12 +562,6 @@ void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, } } -uint64_t qemu_get_pmem_size(const char *filename, Error **errp) -{ - error_setg(errp, "pmem support not available"); - return 0; -} - char *qemu_get_pid_name(pid_t pid) { /* XXX Implement me */ From 7a3df11c2a647cf889f6ede8b7d5f81438bb5cc9 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 12 Sep 2019 16:02:42 +0200 Subject: [PATCH 12/29] memory: inline and optimize devend_memop devend_memop can rely on the fact that the result is always either 0 or MO_BSWAP, corresponding respectively to host endianness and the opposite. Native (target) endianness in turn can be either the host endianness, in which case MO_BSWAP is only returned for host-opposite endianness, or the opposite, in which case 0 is only returned for host endianness. With this in mind, devend_memop can be compiled as a setcond+shift for every target. Do this and, while at it, move it to include/exec/memory.h since !NEED_CPU_H files do not (and should not) need it. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- include/exec/memory.h | 19 ++++++++++++++++++- memory.c | 18 ------------------ 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/include/exec/memory.h b/include/exec/memory.h index 2dd810259d..a30245c25a 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -2201,8 +2201,25 @@ address_space_write_cached(MemoryRegionCache *cache, hwaddr addr, } } +#ifdef NEED_CPU_H /* enum device_endian to MemOp. */ -MemOp devend_memop(enum device_endian end); +static inline MemOp devend_memop(enum device_endian end) +{ + QEMU_BUILD_BUG_ON(DEVICE_HOST_ENDIAN != DEVICE_LITTLE_ENDIAN && + DEVICE_HOST_ENDIAN != DEVICE_BIG_ENDIAN); + +#if defined(HOST_WORDS_BIGENDIAN) != defined(TARGET_WORDS_BIGENDIAN) + /* Swap if non-host endianness or native (target) endianness */ + return (end == DEVICE_HOST_ENDIAN) ? 0 : MO_BSWAP; +#else + const int non_host_endianness = + DEVICE_LITTLE_ENDIAN ^ DEVICE_BIG_ENDIAN ^ DEVICE_HOST_ENDIAN; + + /* In this case, native (target) endianness needs no swap. */ + return (end == non_host_endianness) ? MO_BSWAP : 0; +#endif +} +#endif #endif diff --git a/memory.c b/memory.c index 61a254c3f9..b9dd6b94ca 100644 --- a/memory.c +++ b/memory.c @@ -3267,21 +3267,3 @@ static void memory_register_types(void) } type_init(memory_register_types) - -MemOp devend_memop(enum device_endian end) -{ - static MemOp conv[] = { - [DEVICE_LITTLE_ENDIAN] = MO_LE, - [DEVICE_BIG_ENDIAN] = MO_BE, - [DEVICE_NATIVE_ENDIAN] = MO_TE, - [DEVICE_HOST_ENDIAN] = 0, - }; - switch (end) { - case DEVICE_LITTLE_ENDIAN: - case DEVICE_BIG_ENDIAN: - case DEVICE_NATIVE_ENDIAN: - return conv[end]; - default: - g_assert_not_reached(); - } -} From 3dcc9c6ec4ea60fd1464b35aa82199483f24ba75 Mon Sep 17 00:00:00 2001 From: Yury Kotov Date: Mon, 9 Sep 2019 16:13:33 +0300 Subject: [PATCH 13/29] qemu-thread: Add qemu_cond_timedwait The new function is needed to implement conditional sleep for CPU throttling. It's possible to reuse qemu_sem_timedwait, but it's more difficult than just add qemu_cond_timedwait. Also moved compute_abs_deadline function up the code to reuse it in qemu_cond_timedwait_impl win32. Signed-off-by: Yury Kotov Message-Id: <20190909131335.16848-2-yury-kotov@yandex-team.ru> Signed-off-by: Paolo Bonzini --- include/qemu/thread.h | 19 +++++++++++++++++++ util/qemu-thread-posix.c | 41 ++++++++++++++++++++++++++++------------ util/qemu-thread-win32.c | 17 +++++++++++++++++ util/qsp.c | 20 ++++++++++++++++++++ 4 files changed, 85 insertions(+), 12 deletions(-) diff --git a/include/qemu/thread.h b/include/qemu/thread.h index 55d83a907c..047db0307e 100644 --- a/include/qemu/thread.h +++ b/include/qemu/thread.h @@ -34,6 +34,8 @@ typedef void (*QemuRecMutexLockFunc)(QemuRecMutex *m, const char *f, int l); typedef int (*QemuRecMutexTrylockFunc)(QemuRecMutex *m, const char *f, int l); typedef void (*QemuCondWaitFunc)(QemuCond *c, QemuMutex *m, const char *f, int l); +typedef bool (*QemuCondTimedWaitFunc)(QemuCond *c, QemuMutex *m, int ms, + const char *f, int l); extern QemuMutexLockFunc qemu_bql_mutex_lock_func; extern QemuMutexLockFunc qemu_mutex_lock_func; @@ -41,6 +43,7 @@ extern QemuMutexTrylockFunc qemu_mutex_trylock_func; extern QemuRecMutexLockFunc qemu_rec_mutex_lock_func; extern QemuRecMutexTrylockFunc qemu_rec_mutex_trylock_func; extern QemuCondWaitFunc qemu_cond_wait_func; +extern QemuCondTimedWaitFunc qemu_cond_timedwait_func; /* convenience macros to bypass the profiler */ #define qemu_mutex_lock__raw(m) \ @@ -63,6 +66,8 @@ extern QemuCondWaitFunc qemu_cond_wait_func; qemu_rec_mutex_trylock_impl(m, __FILE__, __LINE__); #define qemu_cond_wait(c, m) \ qemu_cond_wait_impl(c, m, __FILE__, __LINE__); +#define qemu_cond_timedwait(c, m, ms) \ + qemu_cond_wait_impl(c, m, ms, __FILE__, __LINE__); #else #define qemu_mutex_lock(m) ({ \ QemuMutexLockFunc _f = atomic_read(&qemu_mutex_lock_func); \ @@ -89,6 +94,11 @@ extern QemuCondWaitFunc qemu_cond_wait_func; QemuCondWaitFunc _f = atomic_read(&qemu_cond_wait_func); \ _f(c, m, __FILE__, __LINE__); \ }) + +#define qemu_cond_timedwait(c, m, ms) ({ \ + QemuCondTimedWaitFunc _f = atomic_read(&qemu_cond_timedwait_func); \ + _f(c, m, ms, __FILE__, __LINE__); \ + }) #endif #define qemu_mutex_unlock(mutex) \ @@ -134,12 +144,21 @@ void qemu_cond_signal(QemuCond *cond); void qemu_cond_broadcast(QemuCond *cond); void qemu_cond_wait_impl(QemuCond *cond, QemuMutex *mutex, const char *file, const int line); +bool qemu_cond_timedwait_impl(QemuCond *cond, QemuMutex *mutex, int ms, + const char *file, const int line); static inline void (qemu_cond_wait)(QemuCond *cond, QemuMutex *mutex) { qemu_cond_wait(cond, mutex); } +/* Returns true if timeout has not expired, and false otherwise */ +static inline bool (qemu_cond_timedwait)(QemuCond *cond, QemuMutex *mutex, + int ms) +{ + return qemu_cond_timedwait(cond, mutex, ms); +} + void qemu_sem_init(QemuSemaphore *sem, int init); void qemu_sem_post(QemuSemaphore *sem); void qemu_sem_wait(QemuSemaphore *sem); diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c index 1bf5e65dea..838980aaa5 100644 --- a/util/qemu-thread-posix.c +++ b/util/qemu-thread-posix.c @@ -36,6 +36,18 @@ static void error_exit(int err, const char *msg) abort(); } +static void compute_abs_deadline(struct timespec *ts, int ms) +{ + struct timeval tv; + gettimeofday(&tv, NULL); + ts->tv_nsec = tv.tv_usec * 1000 + (ms % 1000) * 1000000; + ts->tv_sec = tv.tv_sec + ms / 1000; + if (ts->tv_nsec >= 1000000000) { + ts->tv_sec++; + ts->tv_nsec -= 1000000000; + } +} + void qemu_mutex_init(QemuMutex *mutex) { int err; @@ -164,6 +176,23 @@ void qemu_cond_wait_impl(QemuCond *cond, QemuMutex *mutex, const char *file, con error_exit(err, __func__); } +bool qemu_cond_timedwait_impl(QemuCond *cond, QemuMutex *mutex, int ms, + const char *file, const int line) +{ + int err; + struct timespec ts; + + assert(cond->initialized); + trace_qemu_mutex_unlock(mutex, file, line); + compute_abs_deadline(&ts, ms); + err = pthread_cond_timedwait(&cond->cond, &mutex->lock, &ts); + trace_qemu_mutex_locked(mutex, file, line); + if (err && err != ETIMEDOUT) { + error_exit(err, __func__); + } + return err != ETIMEDOUT; +} + void qemu_sem_init(QemuSemaphore *sem, int init) { int rc; @@ -238,18 +267,6 @@ void qemu_sem_post(QemuSemaphore *sem) #endif } -static void compute_abs_deadline(struct timespec *ts, int ms) -{ - struct timeval tv; - gettimeofday(&tv, NULL); - ts->tv_nsec = tv.tv_usec * 1000 + (ms % 1000) * 1000000; - ts->tv_sec = tv.tv_sec + ms / 1000; - if (ts->tv_nsec >= 1000000000) { - ts->tv_sec++; - ts->tv_nsec -= 1000000000; - } -} - int qemu_sem_timedwait(QemuSemaphore *sem, int ms) { int rc; diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c index 572f88535d..56a83333da 100644 --- a/util/qemu-thread-win32.c +++ b/util/qemu-thread-win32.c @@ -145,6 +145,23 @@ void qemu_cond_wait_impl(QemuCond *cond, QemuMutex *mutex, const char *file, con qemu_mutex_post_lock(mutex, file, line); } +bool qemu_cond_timedwait_impl(QemuCond *cond, QemuMutex *mutex, int ms, + const char *file, const int line) +{ + int rc = 0; + + assert(cond->initialized); + trace_qemu_mutex_unlock(mutex, file, line); + if (!SleepConditionVariableSRW(&cond->var, &mutex->lock, ms, 0)) { + rc = GetLastError(); + } + trace_qemu_mutex_locked(mutex, file, line); + if (rc && rc != ERROR_TIMEOUT) { + error_exit(rc, __func__); + } + return rc != ERROR_TIMEOUT; +} + void qemu_sem_init(QemuSemaphore *sem, int init) { /* Manual reset. */ diff --git a/util/qsp.c b/util/qsp.c index 5264c97342..62265417fd 100644 --- a/util/qsp.c +++ b/util/qsp.c @@ -131,6 +131,7 @@ QemuRecMutexLockFunc qemu_rec_mutex_lock_func = qemu_rec_mutex_lock_impl; QemuRecMutexTrylockFunc qemu_rec_mutex_trylock_func = qemu_rec_mutex_trylock_impl; QemuCondWaitFunc qemu_cond_wait_func = qemu_cond_wait_impl; +QemuCondTimedWaitFunc qemu_cond_timedwait_func = qemu_cond_timedwait_impl; /* * It pays off to _not_ hash callsite->file; hashing a string is slow, and @@ -412,6 +413,23 @@ qsp_cond_wait(QemuCond *cond, QemuMutex *mutex, const char *file, int line) qsp_entry_record(e, t1 - t0); } +static bool +qsp_cond_timedwait(QemuCond *cond, QemuMutex *mutex, int ms, + const char *file, int line) +{ + QSPEntry *e; + int64_t t0, t1; + bool ret; + + t0 = get_clock(); + ret = qemu_cond_timedwait_impl(cond, mutex, ms, file, line); + t1 = get_clock(); + + e = qsp_entry_get(cond, file, line, QSP_CONDVAR); + qsp_entry_record(e, t1 - t0); + return ret; +} + bool qsp_is_enabled(void) { return atomic_read(&qemu_mutex_lock_func) == qsp_mutex_lock; @@ -425,6 +443,7 @@ void qsp_enable(void) atomic_set(&qemu_rec_mutex_lock_func, qsp_rec_mutex_lock); atomic_set(&qemu_rec_mutex_trylock_func, qsp_rec_mutex_trylock); atomic_set(&qemu_cond_wait_func, qsp_cond_wait); + atomic_set(&qemu_cond_timedwait_func, qsp_cond_timedwait); } void qsp_disable(void) @@ -435,6 +454,7 @@ void qsp_disable(void) atomic_set(&qemu_rec_mutex_lock_func, qemu_rec_mutex_lock_impl); atomic_set(&qemu_rec_mutex_trylock_func, qemu_rec_mutex_trylock_impl); atomic_set(&qemu_cond_wait_func, qemu_cond_wait_impl); + atomic_set(&qemu_cond_timedwait_func, qemu_cond_timedwait_impl); } static gint qsp_tree_cmp(gconstpointer ap, gconstpointer bp, gpointer up) From bd1f7ff4b27444740786274c41caf3e9b9c35bba Mon Sep 17 00:00:00 2001 From: Yury Kotov Date: Mon, 9 Sep 2019 16:13:34 +0300 Subject: [PATCH 14/29] cpus: Fix throttling during vm_stop Throttling thread sleeps in VCPU thread. For high throttle percentage this sleep is more than 10ms. E.g. for 60% - 15ms, for 99% - 990ms. vm_stop() kicks all VCPUs and waits for them. It's called at the end of migration and because of the long sleep the migration downtime might be more than 100ms even for downtime-limit 1ms. Use qemu_cond_timedwait for high percentage to wake up during vm_stop. Signed-off-by: Yury Kotov Reviewed-by: Eric Blake Message-Id: <20190909131335.16848-3-yury-kotov@yandex-team.ru> Signed-off-by: Paolo Bonzini --- cpus.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/cpus.c b/cpus.c index 85cd451a86..d2c61ff155 100644 --- a/cpus.c +++ b/cpus.c @@ -77,6 +77,8 @@ #endif /* CONFIG_LINUX */ +static QemuMutex qemu_global_mutex; + int64_t max_delay; int64_t max_advance; @@ -782,7 +784,7 @@ static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque) { double pct; double throttle_ratio; - long sleeptime_ns; + int64_t sleeptime_ns, endtime_ns; if (!cpu_throttle_get_percentage()) { return; @@ -790,11 +792,20 @@ static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque) pct = (double)cpu_throttle_get_percentage()/100; throttle_ratio = pct / (1 - pct); - sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS); - - qemu_mutex_unlock_iothread(); - g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */ - qemu_mutex_lock_iothread(); + /* Add 1ns to fix double's rounding error (like 0.9999999...) */ + sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1); + endtime_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + sleeptime_ns; + while (sleeptime_ns > 0 && !cpu->stop) { + if (sleeptime_ns > SCALE_MS) { + qemu_cond_timedwait(cpu->halt_cond, &qemu_global_mutex, + sleeptime_ns / SCALE_MS); + } else { + qemu_mutex_unlock_iothread(); + g_usleep(sleeptime_ns / SCALE_US); + qemu_mutex_lock_iothread(); + } + sleeptime_ns = endtime_ns - qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + } atomic_set(&cpu->throttle_thread_scheduled, 0); } @@ -1172,8 +1183,6 @@ static void qemu_init_sigbus(void) } #endif /* !CONFIG_LINUX */ -static QemuMutex qemu_global_mutex; - static QemuThread io_thread; /* cpu creation */ From 42d400acfc2dabc56653ee5396df14a8d8cb46c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 19 Aug 2019 00:54:00 +0200 Subject: [PATCH 15/29] hw/i386/pc: Use e820_get_num_entries() to access e820_entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To be able to extract the e820* code out of this file (in the next patch), access e820_entries with its correct helper. Reviewed-by: Li Qiang Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20190818225414.22590-2-philmd@redhat.com> Signed-off-by: Paolo Bonzini --- hw/i386/pc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index bad866fe44..31e9c1f16b 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1029,7 +1029,7 @@ static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms) fw_cfg_add_bytes(fw_cfg, FW_CFG_E820_TABLE, &e820_reserve, sizeof(e820_reserve)); fw_cfg_add_file(fw_cfg, "etc/e820", e820_table, - sizeof(struct e820_entry) * e820_entries); + sizeof(struct e820_entry) * e820_get_num_entries()); fw_cfg_add_bytes(fw_cfg, FW_CFG_HPET, &hpet_cfg, sizeof(hpet_cfg)); /* allocate memory for the NUMA channel: one (64bit) word for the number From d6d059ca07ae907b8945f88c382fb54d43f9f03a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 19 Aug 2019 00:54:01 +0200 Subject: [PATCH 16/29] hw/i386/pc: Extract e820 memory layout code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Suggested-by: Samuel Ortiz Reviewed-by: Li Qiang Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20190818225414.22590-3-philmd@redhat.com> Signed-off-by: Paolo Bonzini --- hw/i386/Makefile.objs | 2 +- hw/i386/e820_memory_layout.c | 59 ++++++++++++++++++++++++++++++++++ hw/i386/e820_memory_layout.h | 42 +++++++++++++++++++++++++ hw/i386/pc.c | 61 +----------------------------------- include/hw/i386/pc.h | 11 ------- target/i386/kvm.c | 1 + 6 files changed, 104 insertions(+), 72 deletions(-) create mode 100644 hw/i386/e820_memory_layout.c create mode 100644 hw/i386/e820_memory_layout.h diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs index 5d9c9efd5f..d3374e0831 100644 --- a/hw/i386/Makefile.objs +++ b/hw/i386/Makefile.objs @@ -1,5 +1,5 @@ obj-$(CONFIG_KVM) += kvm/ -obj-y += multiboot.o +obj-y += e820_memory_layout.o multiboot.o obj-y += pc.o obj-$(CONFIG_I440FX) += pc_piix.o obj-$(CONFIG_Q35) += pc_q35.o diff --git a/hw/i386/e820_memory_layout.c b/hw/i386/e820_memory_layout.c new file mode 100644 index 0000000000..bcf9eaf837 --- /dev/null +++ b/hw/i386/e820_memory_layout.c @@ -0,0 +1,59 @@ +/* + * QEMU BIOS e820 routines + * + * Copyright (c) 2003-2004 Fabrice Bellard + * + * SPDX-License-Identifier: MIT + */ + +#include "qemu/osdep.h" +#include "qemu/bswap.h" +#include "e820_memory_layout.h" + +static size_t e820_entries; +struct e820_table e820_reserve; +struct e820_entry *e820_table; + +int e820_add_entry(uint64_t address, uint64_t length, uint32_t type) +{ + int index = le32_to_cpu(e820_reserve.count); + struct e820_entry *entry; + + if (type != E820_RAM) { + /* old FW_CFG_E820_TABLE entry -- reservations only */ + if (index >= E820_NR_ENTRIES) { + return -EBUSY; + } + entry = &e820_reserve.entry[index++]; + + entry->address = cpu_to_le64(address); + entry->length = cpu_to_le64(length); + entry->type = cpu_to_le32(type); + + e820_reserve.count = cpu_to_le32(index); + } + + /* new "etc/e820" file -- include ram too */ + e820_table = g_renew(struct e820_entry, e820_table, e820_entries + 1); + e820_table[e820_entries].address = cpu_to_le64(address); + e820_table[e820_entries].length = cpu_to_le64(length); + e820_table[e820_entries].type = cpu_to_le32(type); + e820_entries++; + + return e820_entries; +} + +int e820_get_num_entries(void) +{ + return e820_entries; +} + +bool e820_get_entry(int idx, uint32_t type, uint64_t *address, uint64_t *length) +{ + if (idx < e820_entries && e820_table[idx].type == cpu_to_le32(type)) { + *address = le64_to_cpu(e820_table[idx].address); + *length = le64_to_cpu(e820_table[idx].length); + return true; + } + return false; +} diff --git a/hw/i386/e820_memory_layout.h b/hw/i386/e820_memory_layout.h new file mode 100644 index 0000000000..2a0ceb8b9c --- /dev/null +++ b/hw/i386/e820_memory_layout.h @@ -0,0 +1,42 @@ +/* + * QEMU BIOS e820 routines + * + * Copyright (c) 2003-2004 Fabrice Bellard + * + * SPDX-License-Identifier: MIT + */ + +#ifndef HW_I386_E820_H +#define HW_I386_E820_H + +/* e820 types */ +#define E820_RAM 1 +#define E820_RESERVED 2 +#define E820_ACPI 3 +#define E820_NVS 4 +#define E820_UNUSABLE 5 + +#define E820_NR_ENTRIES 16 + +struct e820_entry { + uint64_t address; + uint64_t length; + uint32_t type; +} QEMU_PACKED __attribute((__aligned__(4))); + +struct e820_table { + uint32_t count; + struct e820_entry entry[E820_NR_ENTRIES]; +} QEMU_PACKED __attribute((__aligned__(4))); + +extern struct e820_table e820_reserve; +extern struct e820_entry *e820_table; + +int e820_add_entry(uint64_t address, uint64_t length, uint32_t type); +int e820_get_num_entries(void); +bool e820_get_entry(int index, uint32_t type, + uint64_t *address, uint64_t *length); + + + +#endif diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 31e9c1f16b..81e5a59d82 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -87,6 +87,7 @@ #include "sysemu/replay.h" #include "qapi/qmp/qerror.h" #include "config-devices.h" +#include "e820_memory_layout.h" /* debug PC/ISA interrupts */ //#define DEBUG_IRQ @@ -98,22 +99,6 @@ #define DPRINTF(fmt, ...) #endif -#define E820_NR_ENTRIES 16 - -struct e820_entry { - uint64_t address; - uint64_t length; - uint32_t type; -} QEMU_PACKED __attribute((__aligned__(4))); - -struct e820_table { - uint32_t count; - struct e820_entry entry[E820_NR_ENTRIES]; -} QEMU_PACKED __attribute((__aligned__(4))); - -static struct e820_table e820_reserve; -static struct e820_entry *e820_table; -static unsigned e820_entries; struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX}; /* Physical Address of PVH entry point read from kernel ELF NOTE */ @@ -880,50 +865,6 @@ static void handle_a20_line_change(void *opaque, int irq, int level) x86_cpu_set_a20(cpu, level); } -int e820_add_entry(uint64_t address, uint64_t length, uint32_t type) -{ - int index = le32_to_cpu(e820_reserve.count); - struct e820_entry *entry; - - if (type != E820_RAM) { - /* old FW_CFG_E820_TABLE entry -- reservations only */ - if (index >= E820_NR_ENTRIES) { - return -EBUSY; - } - entry = &e820_reserve.entry[index++]; - - entry->address = cpu_to_le64(address); - entry->length = cpu_to_le64(length); - entry->type = cpu_to_le32(type); - - e820_reserve.count = cpu_to_le32(index); - } - - /* new "etc/e820" file -- include ram too */ - e820_table = g_renew(struct e820_entry, e820_table, e820_entries + 1); - e820_table[e820_entries].address = cpu_to_le64(address); - e820_table[e820_entries].length = cpu_to_le64(length); - e820_table[e820_entries].type = cpu_to_le32(type); - e820_entries++; - - return e820_entries; -} - -int e820_get_num_entries(void) -{ - return e820_entries; -} - -bool e820_get_entry(int idx, uint32_t type, uint64_t *address, uint64_t *length) -{ - if (idx < e820_entries && e820_table[idx].type == cpu_to_le32(type)) { - *address = le64_to_cpu(e820_table[idx].address); - *length = le64_to_cpu(e820_table[idx].length); - return true; - } - return false; -} - /* Calculates initial APIC ID for a specific CPU index * * Currently we need to be able to calculate the APIC ID from the CPU index diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 19a837889d..062feeb69e 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -291,17 +291,6 @@ void pc_system_firmware_init(PCMachineState *pcms, MemoryRegion *rom_memory); void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid, const CPUArchIdList *apic_ids, GArray *entry); -/* e820 types */ -#define E820_RAM 1 -#define E820_RESERVED 2 -#define E820_ACPI 3 -#define E820_NVS 4 -#define E820_UNUSABLE 5 - -int e820_add_entry(uint64_t, uint64_t, uint32_t); -int e820_get_num_entries(void); -bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); - extern GlobalProperty pc_compat_4_1[]; extern const size_t pc_compat_4_1_len; diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 3435fc4345..92069099ab 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -41,6 +41,7 @@ #include "hw/i386/apic-msidef.h" #include "hw/i386/intel_iommu.h" #include "hw/i386/x86-iommu.h" +#include "hw/i386/e820_memory_layout.h" #include "hw/pci/pci.h" #include "hw/pci/msi.h" From 0f432b3064a227bfce34bede1ba40eb7f0cb3262 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 19 Aug 2019 00:54:02 +0200 Subject: [PATCH 17/29] hw/i386/pc: Use address_space_memory in place MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The address_space_memory variable is used once. Use it in place and remove the argument. Suggested-by: Samuel Ortiz Reviewed-by: Li Qiang Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20190818225414.22590-4-philmd@redhat.com> Signed-off-by: Paolo Bonzini --- hw/i386/pc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 81e5a59d82..dd5ec2e7c7 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -936,7 +936,7 @@ static void pc_build_smbios(PCMachineState *pcms) } } -static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms) +static FWCfgState *bochs_bios_init(PCMachineState *pcms) { FWCfgState *fw_cfg; uint64_t *numa_fw_cfg; @@ -946,7 +946,8 @@ static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms) MachineState *ms = MACHINE(pcms); int nb_numa_nodes = ms->numa_state->num_nodes; - fw_cfg = fw_cfg_init_io_dma(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4, as); + fw_cfg = fw_cfg_init_io_dma(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4, + &address_space_memory); fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus); /* FW_CFG_MAX_CPUS is a bit confusing/problematic on x86: @@ -1868,7 +1869,7 @@ void pc_memory_init(PCMachineState *pcms, option_rom_mr, 1); - fw_cfg = bochs_bios_init(&address_space_memory, pcms); + fw_cfg = bochs_bios_init(pcms); rom_set_fw(fw_cfg); From 5888e01cfdf5044d6f43bb5c6be9e3d80bcb6ea7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 19 Aug 2019 00:54:03 +0200 Subject: [PATCH 18/29] hw/i386/pc: Rename bochs_bios_init as more generic fw_cfg_arch_create MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bochs_bios_init() function is not restricted to the Bochs BIOS and is useful to other BIOS. Since it is not specific to the PC machine, and can be reused by other machines of the X86 architecture, rename it as fw_cfg_arch_create(). Suggested-by: Samuel Ortiz Reviewed-by: Li Qiang Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20190818225414.22590-5-philmd@redhat.com> Signed-off-by: Paolo Bonzini --- hw/i386/pc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index dd5ec2e7c7..b15af77a9f 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -936,7 +936,7 @@ static void pc_build_smbios(PCMachineState *pcms) } } -static FWCfgState *bochs_bios_init(PCMachineState *pcms) +static FWCfgState *fw_cfg_arch_create(PCMachineState *pcms) { FWCfgState *fw_cfg; uint64_t *numa_fw_cfg; @@ -1614,7 +1614,7 @@ void pc_cpus_init(PCMachineState *pcms) * Limit for the APIC ID value, so that all * CPU APIC IDs are < pcms->apic_id_limit. * - * This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init(). + * This is used for FW_CFG_MAX_CPUS. See comments on fw_cfg_arch_create(). */ pcms->apic_id_limit = x86_cpu_apic_id_from_index(pcms, ms->smp.max_cpus - 1) + 1; @@ -1869,7 +1869,7 @@ void pc_memory_init(PCMachineState *pcms, option_rom_mr, 1); - fw_cfg = bochs_bios_init(pcms); + fw_cfg = fw_cfg_arch_create(pcms); rom_set_fw(fw_cfg); From 524acbe97dcaf7e397d75e2d6eee3b0aaabd0b38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 19 Aug 2019 00:54:04 +0200 Subject: [PATCH 19/29] hw/i386/pc: Pass the boot_cpus value by argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The boot_cpus is used once. Pass it by argument, this will allow us to remove the PCMachineState argument later. Suggested-by: Samuel Ortiz Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20190818225414.22590-6-philmd@redhat.com> Signed-off-by: Paolo Bonzini --- hw/i386/pc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index b15af77a9f..dc2ae6d5cf 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -936,7 +936,8 @@ static void pc_build_smbios(PCMachineState *pcms) } } -static FWCfgState *fw_cfg_arch_create(PCMachineState *pcms) +static FWCfgState *fw_cfg_arch_create(PCMachineState *pcms, + uint16_t boot_cpus) { FWCfgState *fw_cfg; uint64_t *numa_fw_cfg; @@ -948,7 +949,7 @@ static FWCfgState *fw_cfg_arch_create(PCMachineState *pcms) fw_cfg = fw_cfg_init_io_dma(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4, &address_space_memory); - fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus); + fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, boot_cpus); /* FW_CFG_MAX_CPUS is a bit confusing/problematic on x86: * @@ -1869,7 +1870,7 @@ void pc_memory_init(PCMachineState *pcms, option_rom_mr, 1); - fw_cfg = fw_cfg_arch_create(pcms); + fw_cfg = fw_cfg_arch_create(pcms, pcms->boot_cpus); rom_set_fw(fw_cfg); From 57e23e78372d8b8426b650cde0ab92adc0fe1865 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 19 Aug 2019 00:54:05 +0200 Subject: [PATCH 20/29] hw/i386/pc: Pass the apic_id_limit value by argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass the apic_id_limit value by argument, this will allow us to remove the PCMachineState argument later. Suggested-by: Samuel Ortiz Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20190818225414.22590-7-philmd@redhat.com> Signed-off-by: Paolo Bonzini --- hw/i386/pc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index dc2ae6d5cf..c0e9d83154 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -937,7 +937,8 @@ static void pc_build_smbios(PCMachineState *pcms) } static FWCfgState *fw_cfg_arch_create(PCMachineState *pcms, - uint16_t boot_cpus) + uint16_t boot_cpus, + uint16_t apic_id_limit) { FWCfgState *fw_cfg; uint64_t *numa_fw_cfg; @@ -1870,7 +1871,7 @@ void pc_memory_init(PCMachineState *pcms, option_rom_mr, 1); - fw_cfg = fw_cfg_arch_create(pcms, pcms->boot_cpus); + fw_cfg = fw_cfg_arch_create(pcms, pcms->boot_cpus, pcms->apic_id_limit); rom_set_fw(fw_cfg); From 264b485776062bdc858ea5e6994a61bb68a3eec0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 19 Aug 2019 00:54:06 +0200 Subject: [PATCH 21/29] hw/i386/pc: Pass the CPUArchIdList array by argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass the CPUArchIdList array by argument, this will allow us to remove the PCMachineState argument later. Suggested-by: Samuel Ortiz Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20190818225414.22590-8-philmd@redhat.com> Signed-off-by: Paolo Bonzini --- hw/i386/pc.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index c0e9d83154..5b3f615406 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -937,14 +937,13 @@ static void pc_build_smbios(PCMachineState *pcms) } static FWCfgState *fw_cfg_arch_create(PCMachineState *pcms, + const CPUArchIdList *cpus, uint16_t boot_cpus, uint16_t apic_id_limit) { FWCfgState *fw_cfg; uint64_t *numa_fw_cfg; int i; - const CPUArchIdList *cpus; - MachineClass *mc = MACHINE_GET_CLASS(pcms); MachineState *ms = MACHINE(pcms); int nb_numa_nodes = ms->numa_state->num_nodes; @@ -964,7 +963,7 @@ static FWCfgState *fw_cfg_arch_create(PCMachineState *pcms, * So for compatibility reasons with old BIOSes we are stuck with * "etc/max-cpus" actually being apic_id_limit */ - fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)pcms->apic_id_limit); + fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, apic_id_limit); fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size); fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES, acpi_tables, acpi_tables_len); @@ -980,20 +979,19 @@ static FWCfgState *fw_cfg_arch_create(PCMachineState *pcms, * of nodes, one word for each VCPU->node and one word for each node to * hold the amount of memory. */ - numa_fw_cfg = g_new0(uint64_t, 1 + pcms->apic_id_limit + nb_numa_nodes); + numa_fw_cfg = g_new0(uint64_t, 1 + apic_id_limit + nb_numa_nodes); numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes); - cpus = mc->possible_cpu_arch_ids(MACHINE(pcms)); for (i = 0; i < cpus->len; i++) { unsigned int apic_id = cpus->cpus[i].arch_id; - assert(apic_id < pcms->apic_id_limit); + assert(apic_id < apic_id_limit); numa_fw_cfg[apic_id + 1] = cpu_to_le64(cpus->cpus[i].props.node_id); } for (i = 0; i < nb_numa_nodes; i++) { - numa_fw_cfg[pcms->apic_id_limit + 1 + i] = + numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(ms->numa_state->nodes[i].node_mem); } fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg, - (1 + pcms->apic_id_limit + nb_numa_nodes) * + (1 + apic_id_limit + nb_numa_nodes) * sizeof(*numa_fw_cfg)); return fw_cfg; @@ -1775,6 +1773,7 @@ void pc_memory_init(PCMachineState *pcms, MemoryRegion *ram_below_4g, *ram_above_4g; FWCfgState *fw_cfg; MachineState *machine = MACHINE(pcms); + MachineClass *mc = MACHINE_GET_CLASS(machine); PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); assert(machine->ram_size == pcms->below_4g_mem_size + @@ -1808,7 +1807,6 @@ void pc_memory_init(PCMachineState *pcms, if (!pcmc->has_reserved_memory && (machine->ram_slots || (machine->maxram_size > machine->ram_size))) { - MachineClass *mc = MACHINE_GET_CLASS(machine); error_report("\"-memory 'slots|maxmem'\" is not supported by: %s", mc->name); @@ -1871,7 +1869,8 @@ void pc_memory_init(PCMachineState *pcms, option_rom_mr, 1); - fw_cfg = fw_cfg_arch_create(pcms, pcms->boot_cpus, pcms->apic_id_limit); + fw_cfg = fw_cfg_arch_create(pcms, mc->possible_cpu_arch_ids(machine), + pcms->boot_cpus, pcms->apic_id_limit); rom_set_fw(fw_cfg); From bd802bd98148e7022491fedbf6583e57f8ef1a78 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 16 Sep 2019 12:42:42 +0200 Subject: [PATCH 22/29] hw/i386/pc: Replace PCMachineState argument with MachineState in fw_cfg_arch_create In the previous commit we removed the last access to PCMachineState. Replace it with a generic MachineState argument and use it to retrieve the CPUArchIdList. Signed-off-by: Paolo Bonzini --- hw/i386/pc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 5b3f615406..8d2e600eaa 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -936,15 +936,15 @@ static void pc_build_smbios(PCMachineState *pcms) } } -static FWCfgState *fw_cfg_arch_create(PCMachineState *pcms, - const CPUArchIdList *cpus, +static FWCfgState *fw_cfg_arch_create(MachineState *ms, uint16_t boot_cpus, uint16_t apic_id_limit) { FWCfgState *fw_cfg; uint64_t *numa_fw_cfg; int i; - MachineState *ms = MACHINE(pcms); + MachineClass *mc = MACHINE_GET_CLASS(ms); + const CPUArchIdList *cpus = mc->possible_cpu_arch_ids(ms); int nb_numa_nodes = ms->numa_state->num_nodes; fw_cfg = fw_cfg_init_io_dma(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4, @@ -1869,7 +1869,7 @@ void pc_memory_init(PCMachineState *pcms, option_rom_mr, 1); - fw_cfg = fw_cfg_arch_create(pcms, mc->possible_cpu_arch_ids(machine), + fw_cfg = fw_cfg_arch_create(machine, pcms->boot_cpus, pcms->apic_id_limit); rom_set_fw(fw_cfg); From 14fe28075772b882b7be4ec27997b3375ba5dfb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 19 Aug 2019 00:54:08 +0200 Subject: [PATCH 23/29] hw/i386/pc: Let pc_build_smbios() take a FWCfgState argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass the FWCfgState object by argument, this will allow us to remove the PCMachineState argument later. Suggested-by: Samuel Ortiz Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20190818225414.22590-10-philmd@redhat.com> Signed-off-by: Paolo Bonzini --- hw/i386/pc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 8d2e600eaa..612d6768d6 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -894,7 +894,7 @@ static uint32_t x86_cpu_apic_id_from_index(PCMachineState *pcms, } } -static void pc_build_smbios(PCMachineState *pcms) +static void pc_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) { uint8_t *smbios_tables, *smbios_anchor; size_t smbios_tables_len, smbios_anchor_len; @@ -908,7 +908,7 @@ static void pc_build_smbios(PCMachineState *pcms) smbios_tables = smbios_get_table_legacy(ms, &smbios_tables_len); if (smbios_tables) { - fw_cfg_add_bytes(pcms->fw_cfg, FW_CFG_SMBIOS_ENTRIES, + fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES, smbios_tables, smbios_tables_len); } @@ -929,9 +929,9 @@ static void pc_build_smbios(PCMachineState *pcms) g_free(mem_array); if (smbios_anchor) { - fw_cfg_add_file(pcms->fw_cfg, "etc/smbios/smbios-tables", + fw_cfg_add_file(fw_cfg, "etc/smbios/smbios-tables", smbios_tables, smbios_tables_len); - fw_cfg_add_file(pcms->fw_cfg, "etc/smbios/smbios-anchor", + fw_cfg_add_file(fw_cfg, "etc/smbios/smbios-anchor", smbios_anchor, smbios_anchor_len); } } @@ -1695,7 +1695,7 @@ void pc_machine_done(Notifier *notifier, void *data) acpi_setup(); if (pcms->fw_cfg) { - pc_build_smbios(pcms); + pc_build_smbios(pcms, pcms->fw_cfg); pc_build_feature_control_file(pcms); /* update FW_CFG_NB_CPUS to account for -device added CPUs */ fw_cfg_modify_i16(pcms->fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus); From f16f6f05a0c5a59c27a9427b38f615b39bc89f8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 19 Aug 2019 00:54:09 +0200 Subject: [PATCH 24/29] hw/i386/pc: Let pc_build_smbios() take a generic MachineState argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let the pc_build_smbios() function take a generic MachineState argument. Suggested-by: Samuel Ortiz Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20190818225414.22590-11-philmd@redhat.com> Signed-off-by: Paolo Bonzini --- hw/i386/pc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 612d6768d6..14ad611b53 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -894,13 +894,12 @@ static uint32_t x86_cpu_apic_id_from_index(PCMachineState *pcms, } } -static void pc_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) +static void pc_build_smbios(MachineState *ms, FWCfgState *fw_cfg) { uint8_t *smbios_tables, *smbios_anchor; size_t smbios_tables_len, smbios_anchor_len; struct smbios_phys_mem_area *mem_array; unsigned i, array_count; - MachineState *ms = MACHINE(pcms); X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu); /* tell smbios about cpuid version and features */ @@ -1695,7 +1694,7 @@ void pc_machine_done(Notifier *notifier, void *data) acpi_setup(); if (pcms->fw_cfg) { - pc_build_smbios(pcms, pcms->fw_cfg); + pc_build_smbios(MACHINE(pcms), pcms->fw_cfg); pc_build_feature_control_file(pcms); /* update FW_CFG_NB_CPUS to account for -device added CPUs */ fw_cfg_modify_i16(pcms->fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus); From e0e949d7d86b4498cf7e1c8dad15d0c055c60697 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 19 Aug 2019 00:54:10 +0200 Subject: [PATCH 25/29] hw/i386/pc: Rename pc_build_smbios() as generic fw_cfg_build_smbios() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that the pc_build_smbios() function has been refactored to not depend of PC specific types, rename it to a more generic name. Suggested-by: Samuel Ortiz Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20190818225414.22590-12-philmd@redhat.com> Signed-off-by: Paolo Bonzini --- hw/i386/pc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 14ad611b53..8f611cb5dc 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -894,7 +894,7 @@ static uint32_t x86_cpu_apic_id_from_index(PCMachineState *pcms, } } -static void pc_build_smbios(MachineState *ms, FWCfgState *fw_cfg) +static void fw_cfg_build_smbios(MachineState *ms, FWCfgState *fw_cfg) { uint8_t *smbios_tables, *smbios_anchor; size_t smbios_tables_len, smbios_anchor_len; @@ -1694,7 +1694,7 @@ void pc_machine_done(Notifier *notifier, void *data) acpi_setup(); if (pcms->fw_cfg) { - pc_build_smbios(MACHINE(pcms), pcms->fw_cfg); + fw_cfg_build_smbios(MACHINE(pcms), pcms->fw_cfg); pc_build_feature_control_file(pcms); /* update FW_CFG_NB_CPUS to account for -device added CPUs */ fw_cfg_modify_i16(pcms->fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus); From eb6449fbd83278ae1376f1c6343b858af479cedc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 19 Aug 2019 00:54:11 +0200 Subject: [PATCH 26/29] hw/i386/pc: Let pc_build_feature_control() take a FWCfgState argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass the FWCfgState object by argument, this will allow us to remove the PCMachineState argument later. Suggested-by: Samuel Ortiz Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20190818225414.22590-13-philmd@redhat.com> Signed-off-by: Paolo Bonzini --- hw/i386/pc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 8f611cb5dc..7213853ad6 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1623,7 +1623,8 @@ void pc_cpus_init(PCMachineState *pcms) } } -static void pc_build_feature_control_file(PCMachineState *pcms) +static void pc_build_feature_control_file(PCMachineState *pcms, + FWCfgState *fw_cfg) { MachineState *ms = MACHINE(pcms); X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu); @@ -1649,7 +1650,7 @@ static void pc_build_feature_control_file(PCMachineState *pcms) val = g_malloc(sizeof(*val)); *val = cpu_to_le64(feature_control_bits | FEATURE_CONTROL_LOCKED); - fw_cfg_add_file(pcms->fw_cfg, "etc/msr_feature_control", val, sizeof(*val)); + fw_cfg_add_file(fw_cfg, "etc/msr_feature_control", val, sizeof(*val)); } static void rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count) @@ -1695,7 +1696,7 @@ void pc_machine_done(Notifier *notifier, void *data) acpi_setup(); if (pcms->fw_cfg) { fw_cfg_build_smbios(MACHINE(pcms), pcms->fw_cfg); - pc_build_feature_control_file(pcms); + pc_build_feature_control_file(pcms, pcms->fw_cfg); /* update FW_CFG_NB_CPUS to account for -device added CPUs */ fw_cfg_modify_i16(pcms->fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus); } From 4e91c7f32e11ac532501d9fbd7c5de5d4a2e3e6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 19 Aug 2019 00:54:12 +0200 Subject: [PATCH 27/29] hw/i386/pc: Let pc_build_feature_control() take a MachineState argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let the pc_build_feature_control_file() function take a generic MachineState argument. Suggested-by: Samuel Ortiz Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20190818225414.22590-14-philmd@redhat.com> Signed-off-by: Paolo Bonzini --- hw/i386/pc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 7213853ad6..438c48506f 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1623,10 +1623,9 @@ void pc_cpus_init(PCMachineState *pcms) } } -static void pc_build_feature_control_file(PCMachineState *pcms, +static void pc_build_feature_control_file(MachineState *ms, FWCfgState *fw_cfg) { - MachineState *ms = MACHINE(pcms); X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu); CPUX86State *env = &cpu->env; uint32_t unused, ecx, edx; @@ -1696,7 +1695,7 @@ void pc_machine_done(Notifier *notifier, void *data) acpi_setup(); if (pcms->fw_cfg) { fw_cfg_build_smbios(MACHINE(pcms), pcms->fw_cfg); - pc_build_feature_control_file(pcms, pcms->fw_cfg); + pc_build_feature_control_file(MACHINE(pcms), pcms->fw_cfg); /* update FW_CFG_NB_CPUS to account for -device added CPUs */ fw_cfg_modify_i16(pcms->fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus); } From 367607f9132e97614e7da0e6a45576c51e9db68c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 19 Aug 2019 00:54:13 +0200 Subject: [PATCH 28/29] hw/i386/pc: Rename pc_build_feature_control() as generic fw_cfg_build_* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that the pc_build_feature_control_file() function has been refactored to not depend of PC specific types, rename it to a more generic name. Suggested-by: Samuel Ortiz Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20190818225414.22590-15-philmd@redhat.com> Signed-off-by: Paolo Bonzini --- hw/i386/pc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 438c48506f..9446596a52 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1623,8 +1623,8 @@ void pc_cpus_init(PCMachineState *pcms) } } -static void pc_build_feature_control_file(MachineState *ms, - FWCfgState *fw_cfg) +static void fw_cfg_build_feature_control(MachineState *ms, + FWCfgState *fw_cfg) { X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu); CPUX86State *env = &cpu->env; @@ -1695,7 +1695,7 @@ void pc_machine_done(Notifier *notifier, void *data) acpi_setup(); if (pcms->fw_cfg) { fw_cfg_build_smbios(MACHINE(pcms), pcms->fw_cfg); - pc_build_feature_control_file(MACHINE(pcms), pcms->fw_cfg); + fw_cfg_build_feature_control(MACHINE(pcms), pcms->fw_cfg); /* update FW_CFG_NB_CPUS to account for -device added CPUs */ fw_cfg_modify_i16(pcms->fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus); } From 149c50cabcc4ea46a460d35fc876346ed441304c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 16 Sep 2019 12:49:28 +0200 Subject: [PATCH 29/29] hw/i386/pc: Extract the x86 generic fw_cfg code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract all the functions that are not PC-machine specific into the (arch-specific) fw_cfg.c file. This will allow other X86-machine to reuse these functions. Suggested-by: Samuel Ortiz Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20190818225414.22590-16-philmd@redhat.com> Signed-off-by: Paolo Bonzini --- hw/i386/fw_cfg.c | 137 +++++++++++++++++++++++++++++++++++++++++++++++ hw/i386/fw_cfg.h | 7 +++ hw/i386/pc.c | 132 +-------------------------------------------- 3 files changed, 145 insertions(+), 131 deletions(-) diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c index 380a819230..39b6bc6052 100644 --- a/hw/i386/fw_cfg.c +++ b/hw/i386/fw_cfg.c @@ -13,8 +13,15 @@ */ #include "qemu/osdep.h" +#include "sysemu/numa.h" +#include "hw/acpi/acpi.h" +#include "hw/firmware/smbios.h" +#include "hw/i386/pc.h" #include "hw/i386/fw_cfg.h" +#include "hw/timer/hpet.h" #include "hw/nvram/fw_cfg.h" +#include "e820_memory_layout.h" +#include "kvm_i386.h" const char *fw_cfg_arch_key_name(uint16_t key) { @@ -36,3 +43,133 @@ const char *fw_cfg_arch_key_name(uint16_t key) } return NULL; } + +void fw_cfg_build_smbios(MachineState *ms, FWCfgState *fw_cfg) +{ + uint8_t *smbios_tables, *smbios_anchor; + size_t smbios_tables_len, smbios_anchor_len; + struct smbios_phys_mem_area *mem_array; + unsigned i, array_count; + X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu); + + /* tell smbios about cpuid version and features */ + smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]); + + smbios_tables = smbios_get_table_legacy(ms, &smbios_tables_len); + if (smbios_tables) { + fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES, + smbios_tables, smbios_tables_len); + } + + /* build the array of physical mem area from e820 table */ + mem_array = g_malloc0(sizeof(*mem_array) * e820_get_num_entries()); + for (i = 0, array_count = 0; i < e820_get_num_entries(); i++) { + uint64_t addr, len; + + if (e820_get_entry(i, E820_RAM, &addr, &len)) { + mem_array[array_count].address = addr; + mem_array[array_count].length = len; + array_count++; + } + } + smbios_get_tables(ms, mem_array, array_count, + &smbios_tables, &smbios_tables_len, + &smbios_anchor, &smbios_anchor_len); + g_free(mem_array); + + if (smbios_anchor) { + fw_cfg_add_file(fw_cfg, "etc/smbios/smbios-tables", + smbios_tables, smbios_tables_len); + fw_cfg_add_file(fw_cfg, "etc/smbios/smbios-anchor", + smbios_anchor, smbios_anchor_len); + } +} + +FWCfgState *fw_cfg_arch_create(MachineState *ms, + uint16_t boot_cpus, + uint16_t apic_id_limit) +{ + FWCfgState *fw_cfg; + uint64_t *numa_fw_cfg; + int i; + MachineClass *mc = MACHINE_GET_CLASS(ms); + const CPUArchIdList *cpus = mc->possible_cpu_arch_ids(ms); + int nb_numa_nodes = ms->numa_state->num_nodes; + + fw_cfg = fw_cfg_init_io_dma(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4, + &address_space_memory); + fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, boot_cpus); + + /* FW_CFG_MAX_CPUS is a bit confusing/problematic on x86: + * + * For machine types prior to 1.8, SeaBIOS needs FW_CFG_MAX_CPUS for + * building MPTable, ACPI MADT, ACPI CPU hotplug and ACPI SRAT table, + * that tables are based on xAPIC ID and QEMU<->SeaBIOS interface + * for CPU hotplug also uses APIC ID and not "CPU index". + * This means that FW_CFG_MAX_CPUS is not the "maximum number of CPUs", + * but the "limit to the APIC ID values SeaBIOS may see". + * + * So for compatibility reasons with old BIOSes we are stuck with + * "etc/max-cpus" actually being apic_id_limit + */ + fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, apic_id_limit); + fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size); + fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES, + acpi_tables, acpi_tables_len); + fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, kvm_allows_irq0_override()); + + fw_cfg_add_bytes(fw_cfg, FW_CFG_E820_TABLE, + &e820_reserve, sizeof(e820_reserve)); + fw_cfg_add_file(fw_cfg, "etc/e820", e820_table, + sizeof(struct e820_entry) * e820_get_num_entries()); + + fw_cfg_add_bytes(fw_cfg, FW_CFG_HPET, &hpet_cfg, sizeof(hpet_cfg)); + /* allocate memory for the NUMA channel: one (64bit) word for the number + * of nodes, one word for each VCPU->node and one word for each node to + * hold the amount of memory. + */ + numa_fw_cfg = g_new0(uint64_t, 1 + apic_id_limit + nb_numa_nodes); + numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes); + for (i = 0; i < cpus->len; i++) { + unsigned int apic_id = cpus->cpus[i].arch_id; + assert(apic_id < apic_id_limit); + numa_fw_cfg[apic_id + 1] = cpu_to_le64(cpus->cpus[i].props.node_id); + } + for (i = 0; i < nb_numa_nodes; i++) { + numa_fw_cfg[apic_id_limit + 1 + i] = + cpu_to_le64(ms->numa_state->nodes[i].node_mem); + } + fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg, + (1 + apic_id_limit + nb_numa_nodes) * + sizeof(*numa_fw_cfg)); + + return fw_cfg; +} + +void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg) +{ + X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu); + CPUX86State *env = &cpu->env; + uint32_t unused, ecx, edx; + uint64_t feature_control_bits = 0; + uint64_t *val; + + cpu_x86_cpuid(env, 1, 0, &unused, &unused, &ecx, &edx); + if (ecx & CPUID_EXT_VMX) { + feature_control_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; + } + + if ((edx & (CPUID_EXT2_MCE | CPUID_EXT2_MCA)) == + (CPUID_EXT2_MCE | CPUID_EXT2_MCA) && + (env->mcg_cap & MCG_LMCE_P)) { + feature_control_bits |= FEATURE_CONTROL_LMCE; + } + + if (!feature_control_bits) { + return; + } + + val = g_malloc(sizeof(*val)); + *val = cpu_to_le64(feature_control_bits | FEATURE_CONTROL_LOCKED); + fw_cfg_add_file(fw_cfg, "etc/msr_feature_control", val, sizeof(*val)); +} diff --git a/hw/i386/fw_cfg.h b/hw/i386/fw_cfg.h index 17a4bc32f2..e0856a3769 100644 --- a/hw/i386/fw_cfg.h +++ b/hw/i386/fw_cfg.h @@ -9,6 +9,7 @@ #ifndef HW_I386_FW_CFG_H #define HW_I386_FW_CFG_H +#include "hw/boards.h" #include "hw/nvram/fw_cfg.h" #define FW_CFG_ACPI_TABLES (FW_CFG_ARCH_LOCAL + 0) @@ -17,4 +18,10 @@ #define FW_CFG_E820_TABLE (FW_CFG_ARCH_LOCAL + 3) #define FW_CFG_HPET (FW_CFG_ARCH_LOCAL + 4) +FWCfgState *fw_cfg_arch_create(MachineState *ms, + uint16_t boot_cpus, + uint16_t apic_id_limit); +void fw_cfg_build_smbios(MachineState *ms, FWCfgState *fw_cfg); +void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg); + #endif diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 9446596a52..e5b59ff3c5 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -88,6 +88,7 @@ #include "qapi/qmp/qerror.h" #include "config-devices.h" #include "e820_memory_layout.h" +#include "fw_cfg.h" /* debug PC/ISA interrupts */ //#define DEBUG_IRQ @@ -894,108 +895,6 @@ static uint32_t x86_cpu_apic_id_from_index(PCMachineState *pcms, } } -static void fw_cfg_build_smbios(MachineState *ms, FWCfgState *fw_cfg) -{ - uint8_t *smbios_tables, *smbios_anchor; - size_t smbios_tables_len, smbios_anchor_len; - struct smbios_phys_mem_area *mem_array; - unsigned i, array_count; - X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu); - - /* tell smbios about cpuid version and features */ - smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]); - - smbios_tables = smbios_get_table_legacy(ms, &smbios_tables_len); - if (smbios_tables) { - fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES, - smbios_tables, smbios_tables_len); - } - - /* build the array of physical mem area from e820 table */ - mem_array = g_malloc0(sizeof(*mem_array) * e820_get_num_entries()); - for (i = 0, array_count = 0; i < e820_get_num_entries(); i++) { - uint64_t addr, len; - - if (e820_get_entry(i, E820_RAM, &addr, &len)) { - mem_array[array_count].address = addr; - mem_array[array_count].length = len; - array_count++; - } - } - smbios_get_tables(ms, mem_array, array_count, - &smbios_tables, &smbios_tables_len, - &smbios_anchor, &smbios_anchor_len); - g_free(mem_array); - - if (smbios_anchor) { - fw_cfg_add_file(fw_cfg, "etc/smbios/smbios-tables", - smbios_tables, smbios_tables_len); - fw_cfg_add_file(fw_cfg, "etc/smbios/smbios-anchor", - smbios_anchor, smbios_anchor_len); - } -} - -static FWCfgState *fw_cfg_arch_create(MachineState *ms, - uint16_t boot_cpus, - uint16_t apic_id_limit) -{ - FWCfgState *fw_cfg; - uint64_t *numa_fw_cfg; - int i; - MachineClass *mc = MACHINE_GET_CLASS(ms); - const CPUArchIdList *cpus = mc->possible_cpu_arch_ids(ms); - int nb_numa_nodes = ms->numa_state->num_nodes; - - fw_cfg = fw_cfg_init_io_dma(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4, - &address_space_memory); - fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, boot_cpus); - - /* FW_CFG_MAX_CPUS is a bit confusing/problematic on x86: - * - * For machine types prior to 1.8, SeaBIOS needs FW_CFG_MAX_CPUS for - * building MPTable, ACPI MADT, ACPI CPU hotplug and ACPI SRAT table, - * that tables are based on xAPIC ID and QEMU<->SeaBIOS interface - * for CPU hotplug also uses APIC ID and not "CPU index". - * This means that FW_CFG_MAX_CPUS is not the "maximum number of CPUs", - * but the "limit to the APIC ID values SeaBIOS may see". - * - * So for compatibility reasons with old BIOSes we are stuck with - * "etc/max-cpus" actually being apic_id_limit - */ - fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, apic_id_limit); - fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size); - fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES, - acpi_tables, acpi_tables_len); - fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, kvm_allows_irq0_override()); - - fw_cfg_add_bytes(fw_cfg, FW_CFG_E820_TABLE, - &e820_reserve, sizeof(e820_reserve)); - fw_cfg_add_file(fw_cfg, "etc/e820", e820_table, - sizeof(struct e820_entry) * e820_get_num_entries()); - - fw_cfg_add_bytes(fw_cfg, FW_CFG_HPET, &hpet_cfg, sizeof(hpet_cfg)); - /* allocate memory for the NUMA channel: one (64bit) word for the number - * of nodes, one word for each VCPU->node and one word for each node to - * hold the amount of memory. - */ - numa_fw_cfg = g_new0(uint64_t, 1 + apic_id_limit + nb_numa_nodes); - numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes); - for (i = 0; i < cpus->len; i++) { - unsigned int apic_id = cpus->cpus[i].arch_id; - assert(apic_id < apic_id_limit); - numa_fw_cfg[apic_id + 1] = cpu_to_le64(cpus->cpus[i].props.node_id); - } - for (i = 0; i < nb_numa_nodes; i++) { - numa_fw_cfg[apic_id_limit + 1 + i] = - cpu_to_le64(ms->numa_state->nodes[i].node_mem); - } - fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg, - (1 + apic_id_limit + nb_numa_nodes) * - sizeof(*numa_fw_cfg)); - - return fw_cfg; -} - static long get_file_size(FILE *f) { long where, size; @@ -1623,35 +1522,6 @@ void pc_cpus_init(PCMachineState *pcms) } } -static void fw_cfg_build_feature_control(MachineState *ms, - FWCfgState *fw_cfg) -{ - X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu); - CPUX86State *env = &cpu->env; - uint32_t unused, ecx, edx; - uint64_t feature_control_bits = 0; - uint64_t *val; - - cpu_x86_cpuid(env, 1, 0, &unused, &unused, &ecx, &edx); - if (ecx & CPUID_EXT_VMX) { - feature_control_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; - } - - if ((edx & (CPUID_EXT2_MCE | CPUID_EXT2_MCA)) == - (CPUID_EXT2_MCE | CPUID_EXT2_MCA) && - (env->mcg_cap & MCG_LMCE_P)) { - feature_control_bits |= FEATURE_CONTROL_LMCE; - } - - if (!feature_control_bits) { - return; - } - - val = g_malloc(sizeof(*val)); - *val = cpu_to_le64(feature_control_bits | FEATURE_CONTROL_LOCKED); - fw_cfg_add_file(fw_cfg, "etc/msr_feature_control", val, sizeof(*val)); -} - static void rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count) { if (cpus_count > 0xff) {