Hi,
"Host Memory Backends" and "Memory devices" queue ("mem"): - Fix NVDIMM error message - Add ThreadContext user-creatable object and wire it up for NUMA-aware hostmem preallocation -----BEGIN PGP SIGNATURE----- iQJFBAABCAAvFiEEG9nKrXNcTDpGDfzKTd4Q9wD/g1oFAmNbpHARHGRhdmlkQHJl ZGhhdC5jb20ACgkQTd4Q9wD/g1pDpw//bG9cyIlzTzDnU5pbQiXyLm0nF9tW/tli npGPSbFFYz/72XD9VJSVLhbNHoQSmFcMK5m/DA4WAMdOc5zF7lP3XdZcj72pDyxu 31hJRvuRhxNb09jhEdWRfX5+Jg9UyYXuIvtKXHSWgrtaYDtHBdTXq/ojZlvlo/rr 36v0jaVaTNRs7dKQL2oaN+DSMiPXHxBzA6FABqYmJNNwuMJT0kkX8pfz0OFwkRn+ iqf9uRhM6b/fNNB0+ReA7FfGL+hzU6Uv8AvAL3orXUqjwPMRe9Fz2gE7HpFnE6DD dOP4Xk2iSSJ5XQA8HwtvrQfrGPh4gPYE80ziK/+8boy3alVeGYbYbvWVtdsNju41 Cq9kM1wDyjZf6SSUIAbjOrNPdbhwyK4GviVBR1zh+/gA3uF5MhrDtZh4h3mWX2if ijmT9mfte4NwF3K1MvckAl7IHRb8nxmr7wjjhJ26JwpD+76lfAcmXC2YOlFGHCMi 028mjvThf3HW7BD2LjlQSX4UkHmM2vUBrgMGQKyeMham1VmMfSK32wzvUNfF7xSz o9k0loBh7unGcUsv3EbqUGswV5F6AgjK3vWRkDql8dNrdIoapDfaejPCd58kVM98 5N/aEoha4bAeJ6NGIKzD+4saiMxUqJ0y2NjSrE8iO4HszXgZW5e1Gbkn4Ae6d37D QSSqyfasVHY= =bLuc -----END PGP SIGNATURE----- Merge tag 'mem-2022-10-28' of https://github.com/davidhildenbrand/qemu into staging Hi, "Host Memory Backends" and "Memory devices" queue ("mem"): - Fix NVDIMM error message - Add ThreadContext user-creatable object and wire it up for NUMA-aware hostmem preallocation # -----BEGIN PGP SIGNATURE----- # # iQJFBAABCAAvFiEEG9nKrXNcTDpGDfzKTd4Q9wD/g1oFAmNbpHARHGRhdmlkQHJl # ZGhhdC5jb20ACgkQTd4Q9wD/g1pDpw//bG9cyIlzTzDnU5pbQiXyLm0nF9tW/tli # npGPSbFFYz/72XD9VJSVLhbNHoQSmFcMK5m/DA4WAMdOc5zF7lP3XdZcj72pDyxu # 31hJRvuRhxNb09jhEdWRfX5+Jg9UyYXuIvtKXHSWgrtaYDtHBdTXq/ojZlvlo/rr # 36v0jaVaTNRs7dKQL2oaN+DSMiPXHxBzA6FABqYmJNNwuMJT0kkX8pfz0OFwkRn+ # iqf9uRhM6b/fNNB0+ReA7FfGL+hzU6Uv8AvAL3orXUqjwPMRe9Fz2gE7HpFnE6DD # dOP4Xk2iSSJ5XQA8HwtvrQfrGPh4gPYE80ziK/+8boy3alVeGYbYbvWVtdsNju41 # Cq9kM1wDyjZf6SSUIAbjOrNPdbhwyK4GviVBR1zh+/gA3uF5MhrDtZh4h3mWX2if # ijmT9mfte4NwF3K1MvckAl7IHRb8nxmr7wjjhJ26JwpD+76lfAcmXC2YOlFGHCMi # 028mjvThf3HW7BD2LjlQSX4UkHmM2vUBrgMGQKyeMham1VmMfSK32wzvUNfF7xSz # o9k0loBh7unGcUsv3EbqUGswV5F6AgjK3vWRkDql8dNrdIoapDfaejPCd58kVM98 # 5N/aEoha4bAeJ6NGIKzD+4saiMxUqJ0y2NjSrE8iO4HszXgZW5e1Gbkn4Ae6d37D # QSSqyfasVHY= # =bLuc # -----END PGP SIGNATURE----- # gpg: Signature made Fri 28 Oct 2022 05:44:16 EDT # gpg: using RSA key 1BD9CAAD735C4C3A460DFCCA4DDE10F700FF835A # gpg: issuer "david@redhat.com" # gpg: Good signature from "David Hildenbrand <david@redhat.com>" [unknown] # gpg: aka "David Hildenbrand <davidhildenbrand@gmail.com>" [full] # gpg: aka "David Hildenbrand <hildenbr@in.tum.de>" [unknown] # gpg: WARNING: The key's User ID is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: 1BD9 CAAD 735C 4C3A 460D FCCA 4DDE 10F7 00FF 835A * tag 'mem-2022-10-28' of https://github.com/davidhildenbrand/qemu: vl: Allow ThreadContext objects to be created before the sandbox option hostmem: Allow for specifying a ThreadContext for preallocation util: Make qemu_prealloc_mem() optionally consume a ThreadContext util: Add write-only "node-affinity" property for ThreadContext util: Introduce ThreadContext user-creatable object util: Introduce qemu_thread_set_affinity() and qemu_thread_get_affinity() util: Cleanup and rename os_mem_prealloc() hw/mem/nvdimm: fix error message for 'unarmed' flag Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
commit
7208429223
@ -232,7 +232,8 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value,
|
||||
void *ptr = memory_region_get_ram_ptr(&backend->mr);
|
||||
uint64_t sz = memory_region_size(&backend->mr);
|
||||
|
||||
os_mem_prealloc(fd, ptr, sz, backend->prealloc_threads, &local_err);
|
||||
qemu_prealloc_mem(fd, ptr, sz, backend->prealloc_threads,
|
||||
backend->prealloc_context, &local_err);
|
||||
if (local_err) {
|
||||
error_propagate(errp, local_err);
|
||||
return;
|
||||
@ -383,8 +384,9 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
|
||||
* specified NUMA policy in place.
|
||||
*/
|
||||
if (backend->prealloc) {
|
||||
os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
|
||||
backend->prealloc_threads, &local_err);
|
||||
qemu_prealloc_mem(memory_region_get_fd(&backend->mr), ptr, sz,
|
||||
backend->prealloc_threads,
|
||||
backend->prealloc_context, &local_err);
|
||||
if (local_err) {
|
||||
goto out;
|
||||
}
|
||||
@ -492,6 +494,11 @@ host_memory_backend_class_init(ObjectClass *oc, void *data)
|
||||
NULL, NULL);
|
||||
object_class_property_set_description(oc, "prealloc-threads",
|
||||
"Number of CPU threads to use for prealloc");
|
||||
object_class_property_add_link(oc, "prealloc-context",
|
||||
TYPE_THREAD_CONTEXT, offsetof(HostMemoryBackend, prealloc_context),
|
||||
object_property_allow_set_link, OBJ_PROP_LINK_STRONG);
|
||||
object_class_property_set_description(oc, "prealloc-context",
|
||||
"Context to use for creating CPU threads for preallocation");
|
||||
object_class_property_add(oc, "size", "int",
|
||||
host_memory_backend_get_size,
|
||||
host_memory_backend_set_size,
|
||||
|
@ -149,7 +149,7 @@ static void nvdimm_prepare_memory_region(NVDIMMDevice *nvdimm, Error **errp)
|
||||
if (!nvdimm->unarmed && memory_region_is_rom(mr)) {
|
||||
HostMemoryBackend *hostmem = dimm->hostmem;
|
||||
|
||||
error_setg(errp, "'unarmed' property must be off since memdev %s "
|
||||
error_setg(errp, "'unarmed' property must be 'on' since memdev %s "
|
||||
"is read-only",
|
||||
object_get_canonical_path_component(OBJECT(hostmem)));
|
||||
return;
|
||||
|
@ -467,7 +467,7 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
|
||||
int fd = memory_region_get_fd(&vmem->memdev->mr);
|
||||
Error *local_err = NULL;
|
||||
|
||||
os_mem_prealloc(fd, area, size, 1, &local_err);
|
||||
qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err);
|
||||
if (local_err) {
|
||||
static bool warned;
|
||||
|
||||
|
@ -576,8 +576,23 @@ unsigned long qemu_getauxval(unsigned long type);
|
||||
|
||||
void qemu_set_tty_echo(int fd, bool echo);
|
||||
|
||||
void os_mem_prealloc(int fd, char *area, size_t sz, int smp_cpus,
|
||||
Error **errp);
|
||||
typedef struct ThreadContext ThreadContext;
|
||||
|
||||
/**
|
||||
* qemu_prealloc_mem:
|
||||
* @fd: the fd mapped into the area, -1 for anonymous memory
|
||||
* @area: start address of the are to preallocate
|
||||
* @sz: the size of the area to preallocate
|
||||
* @max_threads: maximum number of threads to use
|
||||
* @errp: returns an error if this function fails
|
||||
*
|
||||
* Preallocate memory (populate/prefault page tables writable) for the virtual
|
||||
* memory area starting at @area with the size of @sz. After a successful call,
|
||||
* each page in the area was faulted in writable at least once, for example,
|
||||
* after allocating file blocks for mapped files.
|
||||
*/
|
||||
void qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
|
||||
ThreadContext *tc, Error **errp);
|
||||
|
||||
/**
|
||||
* qemu_get_pid_name:
|
||||
|
57
include/qemu/thread-context.h
Normal file
57
include/qemu/thread-context.h
Normal file
@ -0,0 +1,57 @@
|
||||
/*
|
||||
* QEMU Thread Context
|
||||
*
|
||||
* Copyright Red Hat Inc., 2022
|
||||
*
|
||||
* Authors:
|
||||
* David Hildenbrand <david@redhat.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#ifndef SYSEMU_THREAD_CONTEXT_H
|
||||
#define SYSEMU_THREAD_CONTEXT_H
|
||||
|
||||
#include "qapi/qapi-types-machine.h"
|
||||
#include "qemu/thread.h"
|
||||
#include "qom/object.h"
|
||||
|
||||
#define TYPE_THREAD_CONTEXT "thread-context"
|
||||
OBJECT_DECLARE_TYPE(ThreadContext, ThreadContextClass,
|
||||
THREAD_CONTEXT)
|
||||
|
||||
struct ThreadContextClass {
|
||||
ObjectClass parent_class;
|
||||
};
|
||||
|
||||
struct ThreadContext {
|
||||
/* private */
|
||||
Object parent;
|
||||
|
||||
/* private */
|
||||
unsigned int thread_id;
|
||||
QemuThread thread;
|
||||
|
||||
/* Semaphore to wait for context thread action. */
|
||||
QemuSemaphore sem;
|
||||
/* Semaphore to wait for action in context thread. */
|
||||
QemuSemaphore sem_thread;
|
||||
/* Mutex to synchronize requests. */
|
||||
QemuMutex mutex;
|
||||
|
||||
/* Commands for the thread to execute. */
|
||||
int thread_cmd;
|
||||
void *thread_cmd_data;
|
||||
|
||||
/* CPU affinity bitmap used for initialization. */
|
||||
unsigned long *init_cpu_bitmap;
|
||||
int init_cpu_nbits;
|
||||
};
|
||||
|
||||
void thread_context_create_thread(ThreadContext *tc, QemuThread *thread,
|
||||
const char *name,
|
||||
void *(*start_routine)(void *), void *arg,
|
||||
int mode);
|
||||
|
||||
#endif /* SYSEMU_THREAD_CONTEXT_H */
|
@ -185,6 +185,10 @@ void qemu_event_destroy(QemuEvent *ev);
|
||||
void qemu_thread_create(QemuThread *thread, const char *name,
|
||||
void *(*start_routine)(void *),
|
||||
void *arg, int mode);
|
||||
int qemu_thread_set_affinity(QemuThread *thread, unsigned long *host_cpus,
|
||||
unsigned long nbits);
|
||||
int qemu_thread_get_affinity(QemuThread *thread, unsigned long **host_cpus,
|
||||
unsigned long *nbits);
|
||||
void *qemu_thread_join(QemuThread *thread);
|
||||
void qemu_thread_get_self(QemuThread *thread);
|
||||
bool qemu_thread_is_self(QemuThread *thread);
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "qom/object.h"
|
||||
#include "exec/memory.h"
|
||||
#include "qemu/bitmap.h"
|
||||
#include "qemu/thread-context.h"
|
||||
|
||||
#define TYPE_MEMORY_BACKEND "memory-backend"
|
||||
OBJECT_DECLARE_TYPE(HostMemoryBackend, HostMemoryBackendClass,
|
||||
@ -66,6 +67,7 @@ struct HostMemoryBackend {
|
||||
bool merge, dump, use_canonical_path;
|
||||
bool prealloc, is_mapped, share, reserve;
|
||||
uint32_t prealloc_threads;
|
||||
ThreadContext *prealloc_context;
|
||||
DECLARE_BITMAP(host_nodes, MAX_NODES + 1);
|
||||
HostMemPolicy policy;
|
||||
|
||||
|
16
meson.build
16
meson.build
@ -2130,7 +2130,23 @@ config_host_data.set('CONFIG_PTHREAD_CONDATTR_SETCLOCK', cc.links(gnu_source_pre
|
||||
pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
|
||||
return 0;
|
||||
}''', dependencies: threads))
|
||||
config_host_data.set('CONFIG_PTHREAD_AFFINITY_NP', cc.links(gnu_source_prefix + '''
|
||||
#include <pthread.h>
|
||||
|
||||
static void *f(void *p) { return NULL; }
|
||||
int main(void)
|
||||
{
|
||||
int setsize = CPU_ALLOC_SIZE(64);
|
||||
pthread_t thread;
|
||||
cpu_set_t *cpuset;
|
||||
pthread_create(&thread, 0, f, 0);
|
||||
cpuset = CPU_ALLOC(64);
|
||||
CPU_ZERO_S(setsize, cpuset);
|
||||
pthread_setaffinity_np(thread, setsize, cpuset);
|
||||
pthread_getaffinity_np(thread, setsize, cpuset);
|
||||
CPU_FREE(cpuset);
|
||||
return 0;
|
||||
}''', dependencies: threads))
|
||||
config_host_data.set('CONFIG_SIGNALFD', cc.links(gnu_source_prefix + '''
|
||||
#include <sys/signalfd.h>
|
||||
#include <stddef.h>
|
||||
|
@ -578,6 +578,9 @@
|
||||
#
|
||||
# @prealloc-threads: number of CPU threads to use for prealloc (default: 1)
|
||||
#
|
||||
# @prealloc-context: thread context to use for creation of preallocation threads
|
||||
# (default: none) (since 7.2)
|
||||
#
|
||||
# @share: if false, the memory is private to QEMU; if true, it is shared
|
||||
# (default: false)
|
||||
#
|
||||
@ -608,6 +611,7 @@
|
||||
'*policy': 'HostMemPolicy',
|
||||
'*prealloc': 'bool',
|
||||
'*prealloc-threads': 'uint32',
|
||||
'*prealloc-context': 'str',
|
||||
'*share': 'bool',
|
||||
'*reserve': 'bool',
|
||||
'size': 'size',
|
||||
@ -830,6 +834,28 @@
|
||||
'reduced-phys-bits': 'uint32',
|
||||
'*kernel-hashes': 'bool' } }
|
||||
|
||||
##
|
||||
# @ThreadContextProperties:
|
||||
#
|
||||
# Properties for thread context objects.
|
||||
#
|
||||
# @cpu-affinity: the list of host CPU numbers used as CPU affinity for all
|
||||
# threads created in the thread context (default: QEMU main
|
||||
# thread CPU affinity)
|
||||
#
|
||||
# @node-affinity: the list of host node numbers that will be resolved to a
|
||||
# list of host CPU numbers used as CPU affinity. This is a
|
||||
# shortcut for specifying the list of host CPU numbers
|
||||
# belonging to the host nodes manually by setting
|
||||
# @cpu-affinity. (default: QEMU main thread affinity)
|
||||
#
|
||||
# Since: 7.2
|
||||
##
|
||||
{ 'struct': 'ThreadContextProperties',
|
||||
'data': { '*cpu-affinity': ['uint16'],
|
||||
'*node-affinity': ['uint16'] } }
|
||||
|
||||
|
||||
##
|
||||
# @ObjectType:
|
||||
#
|
||||
@ -882,6 +908,7 @@
|
||||
{ 'name': 'secret_keyring',
|
||||
'if': 'CONFIG_SECRET_KEYRING' },
|
||||
'sev-guest',
|
||||
'thread-context',
|
||||
's390-pv-guest',
|
||||
'throttle-group',
|
||||
'tls-creds-anon',
|
||||
@ -948,6 +975,7 @@
|
||||
'secret_keyring': { 'type': 'SecretKeyringProperties',
|
||||
'if': 'CONFIG_SECRET_KEYRING' },
|
||||
'sev-guest': 'SevGuestProperties',
|
||||
'thread-context': 'ThreadContextProperties',
|
||||
'throttle-group': 'ThrottleGroupProperties',
|
||||
'tls-creds-anon': 'TlsCredsAnonProperties',
|
||||
'tls-creds-psk': 'TlsCredsPskProperties',
|
||||
|
@ -354,7 +354,7 @@ static void qemu_init_sigbus(void)
|
||||
|
||||
/*
|
||||
* ALERT: when modifying this, take care that SIGBUS forwarding in
|
||||
* os_mem_prealloc() will continue working as expected.
|
||||
* qemu_prealloc_mem() will continue working as expected.
|
||||
*/
|
||||
memset(&action, 0, sizeof(action));
|
||||
action.sa_flags = SA_SIGINFO;
|
||||
|
36
softmmu/vl.c
36
softmmu/vl.c
@ -1759,6 +1759,27 @@ static void object_option_parse(const char *optarg)
|
||||
visit_free(v);
|
||||
}
|
||||
|
||||
/*
|
||||
* Very early object creation, before the sandbox options have been activated.
|
||||
*/
|
||||
static bool object_create_pre_sandbox(const char *type)
|
||||
{
|
||||
/*
|
||||
* Objects should in general not get initialized "too early" without
|
||||
* a reason. If you add one, state the reason in a comment!
|
||||
*/
|
||||
|
||||
/*
|
||||
* Reason: -sandbox on,resourcecontrol=deny disallows setting CPU
|
||||
* affinity of threads.
|
||||
*/
|
||||
if (g_str_equal(type, "thread-context")) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initial object creation happens before all other
|
||||
* QEMU data types are created. The majority of objects
|
||||
@ -1773,6 +1794,11 @@ static bool object_create_early(const char *type)
|
||||
* add one, state the reason in a comment!
|
||||
*/
|
||||
|
||||
/* Reason: already created. */
|
||||
if (object_create_pre_sandbox(type)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Reason: property "chardev" */
|
||||
if (g_str_equal(type, "rng-egd") ||
|
||||
g_str_equal(type, "qtest")) {
|
||||
@ -1895,7 +1921,7 @@ static void qemu_create_early_backends(void)
|
||||
*/
|
||||
static bool object_create_late(const char *type)
|
||||
{
|
||||
return !object_create_early(type);
|
||||
return !object_create_early(type) && !object_create_pre_sandbox(type);
|
||||
}
|
||||
|
||||
static void qemu_create_late_backends(void)
|
||||
@ -2351,6 +2377,11 @@ static int process_runstate_actions(void *opaque, QemuOpts *opts, Error **errp)
|
||||
|
||||
static void qemu_process_early_options(void)
|
||||
{
|
||||
qemu_opts_foreach(qemu_find_opts("name"),
|
||||
parse_name, NULL, &error_fatal);
|
||||
|
||||
object_option_foreach_add(object_create_pre_sandbox);
|
||||
|
||||
#ifdef CONFIG_SECCOMP
|
||||
QemuOptsList *olist = qemu_find_opts_err("sandbox", NULL);
|
||||
if (olist) {
|
||||
@ -2358,9 +2389,6 @@ static void qemu_process_early_options(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
qemu_opts_foreach(qemu_find_opts("name"),
|
||||
parse_name, NULL, &error_fatal);
|
||||
|
||||
if (qemu_opts_foreach(qemu_find_opts("action"),
|
||||
process_runstate_actions, NULL, &error_fatal)) {
|
||||
exit(1);
|
||||
|
@ -1,4 +1,5 @@
|
||||
util_ss.add(files('osdep.c', 'cutils.c', 'unicode.c', 'qemu-timer-common.c'))
|
||||
util_ss.add(files('thread-context.c'), numa)
|
||||
if not config_host_data.get('CONFIG_ATOMIC64')
|
||||
util_ss.add(files('atomic64.c'))
|
||||
endif
|
||||
|
@ -42,6 +42,7 @@
|
||||
#include "qemu/cutils.h"
|
||||
#include "qemu/compiler.h"
|
||||
#include "qemu/units.h"
|
||||
#include "qemu/thread-context.h"
|
||||
|
||||
#ifdef CONFIG_LINUX
|
||||
#include <sys/syscall.h>
|
||||
@ -329,7 +330,7 @@ static void sigbus_handler(int signal)
|
||||
return;
|
||||
}
|
||||
#endif /* CONFIG_LINUX */
|
||||
warn_report("os_mem_prealloc: unrelated SIGBUS detected and ignored");
|
||||
warn_report("qemu_prealloc_mem: unrelated SIGBUS detected and ignored");
|
||||
}
|
||||
|
||||
static void *do_touch_pages(void *arg)
|
||||
@ -399,13 +400,13 @@ static void *do_madv_populate_write_pages(void *arg)
|
||||
}
|
||||
|
||||
static inline int get_memset_num_threads(size_t hpagesize, size_t numpages,
|
||||
int smp_cpus)
|
||||
int max_threads)
|
||||
{
|
||||
long host_procs = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
int ret = 1;
|
||||
|
||||
if (host_procs > 0) {
|
||||
ret = MIN(MIN(host_procs, MAX_MEM_PREALLOC_THREAD_COUNT), smp_cpus);
|
||||
ret = MIN(MIN(host_procs, MAX_MEM_PREALLOC_THREAD_COUNT), max_threads);
|
||||
}
|
||||
|
||||
/* Especially with gigantic pages, don't create more threads than pages. */
|
||||
@ -418,11 +419,12 @@ static inline int get_memset_num_threads(size_t hpagesize, size_t numpages,
|
||||
}
|
||||
|
||||
static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
|
||||
int smp_cpus, bool use_madv_populate_write)
|
||||
int max_threads, ThreadContext *tc,
|
||||
bool use_madv_populate_write)
|
||||
{
|
||||
static gsize initialized = 0;
|
||||
MemsetContext context = {
|
||||
.num_threads = get_memset_num_threads(hpagesize, numpages, smp_cpus),
|
||||
.num_threads = get_memset_num_threads(hpagesize, numpages, max_threads),
|
||||
};
|
||||
size_t numpages_per_thread, leftover;
|
||||
void *(*touch_fn)(void *);
|
||||
@ -457,9 +459,16 @@ static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
|
||||
context.threads[i].numpages = numpages_per_thread + (i < leftover);
|
||||
context.threads[i].hpagesize = hpagesize;
|
||||
context.threads[i].context = &context;
|
||||
qemu_thread_create(&context.threads[i].pgthread, "touch_pages",
|
||||
touch_fn, &context.threads[i],
|
||||
QEMU_THREAD_JOINABLE);
|
||||
if (tc) {
|
||||
thread_context_create_thread(tc, &context.threads[i].pgthread,
|
||||
"touch_pages",
|
||||
touch_fn, &context.threads[i],
|
||||
QEMU_THREAD_JOINABLE);
|
||||
} else {
|
||||
qemu_thread_create(&context.threads[i].pgthread, "touch_pages",
|
||||
touch_fn, &context.threads[i],
|
||||
QEMU_THREAD_JOINABLE);
|
||||
}
|
||||
addr += context.threads[i].numpages * hpagesize;
|
||||
}
|
||||
|
||||
@ -494,13 +503,13 @@ static bool madv_populate_write_possible(char *area, size_t pagesize)
|
||||
errno != EINVAL;
|
||||
}
|
||||
|
||||
void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus,
|
||||
Error **errp)
|
||||
void qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
|
||||
ThreadContext *tc, Error **errp)
|
||||
{
|
||||
static gsize initialized;
|
||||
int ret;
|
||||
size_t hpagesize = qemu_fd_getpagesize(fd);
|
||||
size_t numpages = DIV_ROUND_UP(memory, hpagesize);
|
||||
size_t numpages = DIV_ROUND_UP(sz, hpagesize);
|
||||
bool use_madv_populate_write;
|
||||
struct sigaction act;
|
||||
|
||||
@ -530,24 +539,24 @@ void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus,
|
||||
if (ret) {
|
||||
qemu_mutex_unlock(&sigbus_mutex);
|
||||
error_setg_errno(errp, errno,
|
||||
"os_mem_prealloc: failed to install signal handler");
|
||||
"qemu_prealloc_mem: failed to install signal handler");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* touch pages simultaneously */
|
||||
ret = touch_all_pages(area, hpagesize, numpages, smp_cpus,
|
||||
ret = touch_all_pages(area, hpagesize, numpages, max_threads, tc,
|
||||
use_madv_populate_write);
|
||||
if (ret) {
|
||||
error_setg_errno(errp, -ret,
|
||||
"os_mem_prealloc: preallocating memory failed");
|
||||
"qemu_prealloc_mem: preallocating memory failed");
|
||||
}
|
||||
|
||||
if (!use_madv_populate_write) {
|
||||
ret = sigaction(SIGBUS, &sigbus_oldact, NULL);
|
||||
if (ret) {
|
||||
/* Terminate QEMU since it can't recover from error */
|
||||
perror("os_mem_prealloc: failed to reinstall signal handler");
|
||||
perror("qemu_prealloc_mem: failed to reinstall signal handler");
|
||||
exit(1);
|
||||
}
|
||||
qemu_mutex_unlock(&sigbus_mutex);
|
||||
|
@ -268,14 +268,14 @@ int getpagesize(void)
|
||||
return system_info.dwPageSize;
|
||||
}
|
||||
|
||||
void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus,
|
||||
Error **errp)
|
||||
void qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
|
||||
ThreadContext *tc, Error **errp)
|
||||
{
|
||||
int i;
|
||||
size_t pagesize = qemu_real_host_page_size();
|
||||
|
||||
memory = (memory + pagesize - 1) & -pagesize;
|
||||
for (i = 0; i < memory / pagesize; i++) {
|
||||
sz = (sz + pagesize - 1) & -pagesize;
|
||||
for (i = 0; i < sz / pagesize; i++) {
|
||||
memset(area + pagesize * i, 0, 1);
|
||||
}
|
||||
}
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "qemu/notify.h"
|
||||
#include "qemu-thread-common.h"
|
||||
#include "qemu/tsan.h"
|
||||
#include "qemu/bitmap.h"
|
||||
|
||||
static bool name_threads;
|
||||
|
||||
@ -552,6 +553,75 @@ void qemu_thread_create(QemuThread *thread, const char *name,
|
||||
pthread_attr_destroy(&attr);
|
||||
}
|
||||
|
||||
int qemu_thread_set_affinity(QemuThread *thread, unsigned long *host_cpus,
|
||||
unsigned long nbits)
|
||||
{
|
||||
#if defined(CONFIG_PTHREAD_AFFINITY_NP)
|
||||
const size_t setsize = CPU_ALLOC_SIZE(nbits);
|
||||
unsigned long value;
|
||||
cpu_set_t *cpuset;
|
||||
int err;
|
||||
|
||||
cpuset = CPU_ALLOC(nbits);
|
||||
g_assert(cpuset);
|
||||
|
||||
CPU_ZERO_S(setsize, cpuset);
|
||||
value = find_first_bit(host_cpus, nbits);
|
||||
while (value < nbits) {
|
||||
CPU_SET_S(value, setsize, cpuset);
|
||||
value = find_next_bit(host_cpus, nbits, value + 1);
|
||||
}
|
||||
|
||||
err = pthread_setaffinity_np(thread->thread, setsize, cpuset);
|
||||
CPU_FREE(cpuset);
|
||||
return err;
|
||||
#else
|
||||
return -ENOSYS;
|
||||
#endif
|
||||
}
|
||||
|
||||
int qemu_thread_get_affinity(QemuThread *thread, unsigned long **host_cpus,
|
||||
unsigned long *nbits)
|
||||
{
|
||||
#if defined(CONFIG_PTHREAD_AFFINITY_NP)
|
||||
unsigned long tmpbits;
|
||||
cpu_set_t *cpuset;
|
||||
size_t setsize;
|
||||
int i, err;
|
||||
|
||||
tmpbits = CPU_SETSIZE;
|
||||
while (true) {
|
||||
setsize = CPU_ALLOC_SIZE(tmpbits);
|
||||
cpuset = CPU_ALLOC(tmpbits);
|
||||
g_assert(cpuset);
|
||||
|
||||
err = pthread_getaffinity_np(thread->thread, setsize, cpuset);
|
||||
if (err) {
|
||||
CPU_FREE(cpuset);
|
||||
if (err != -EINVAL) {
|
||||
return err;
|
||||
}
|
||||
tmpbits *= 2;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Convert the result into a proper bitmap. */
|
||||
*nbits = tmpbits;
|
||||
*host_cpus = bitmap_new(tmpbits);
|
||||
for (i = 0; i < tmpbits; i++) {
|
||||
if (CPU_ISSET(i, cpuset)) {
|
||||
set_bit(i, *host_cpus);
|
||||
}
|
||||
}
|
||||
CPU_FREE(cpuset);
|
||||
return 0;
|
||||
#else
|
||||
return -ENOSYS;
|
||||
#endif
|
||||
}
|
||||
|
||||
void qemu_thread_get_self(QemuThread *thread)
|
||||
{
|
||||
thread->thread = pthread_self();
|
||||
|
@ -477,6 +477,18 @@ void qemu_thread_create(QemuThread *thread, const char *name,
|
||||
thread->data = data;
|
||||
}
|
||||
|
||||
int qemu_thread_set_affinity(QemuThread *thread, unsigned long *host_cpus,
|
||||
unsigned long nbits)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
int qemu_thread_get_affinity(QemuThread *thread, unsigned long **host_cpus,
|
||||
unsigned long *nbits)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
void qemu_thread_get_self(QemuThread *thread)
|
||||
{
|
||||
thread->data = qemu_thread_data;
|
||||
|
362
util/thread-context.c
Normal file
362
util/thread-context.c
Normal file
@ -0,0 +1,362 @@
|
||||
/*
|
||||
* QEMU Thread Context
|
||||
*
|
||||
* Copyright Red Hat Inc., 2022
|
||||
*
|
||||
* Authors:
|
||||
* David Hildenbrand <david@redhat.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/thread-context.h"
|
||||
#include "qapi/error.h"
|
||||
#include "qapi/qapi-builtin-visit.h"
|
||||
#include "qapi/visitor.h"
|
||||
#include "qemu/config-file.h"
|
||||
#include "qapi/qapi-builtin-visit.h"
|
||||
#include "qom/object_interfaces.h"
|
||||
#include "qemu/module.h"
|
||||
#include "qemu/bitmap.h"
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
#include <numa.h>
|
||||
#endif
|
||||
|
||||
enum {
|
||||
TC_CMD_NONE = 0,
|
||||
TC_CMD_STOP,
|
||||
TC_CMD_NEW,
|
||||
};
|
||||
|
||||
typedef struct ThreadContextCmdNew {
|
||||
QemuThread *thread;
|
||||
const char *name;
|
||||
void *(*start_routine)(void *);
|
||||
void *arg;
|
||||
int mode;
|
||||
} ThreadContextCmdNew;
|
||||
|
||||
static void *thread_context_run(void *opaque)
|
||||
{
|
||||
ThreadContext *tc = opaque;
|
||||
|
||||
tc->thread_id = qemu_get_thread_id();
|
||||
qemu_sem_post(&tc->sem);
|
||||
|
||||
while (true) {
|
||||
/*
|
||||
* Threads inherit the CPU affinity of the creating thread. For this
|
||||
* reason, we create new (especially short-lived) threads from our
|
||||
* persistent context thread.
|
||||
*
|
||||
* Especially when QEMU is not allowed to set the affinity itself,
|
||||
* management tools can simply set the affinity of the context thread
|
||||
* after creating the context, to have new threads created via
|
||||
* the context inherit the CPU affinity automatically.
|
||||
*/
|
||||
switch (tc->thread_cmd) {
|
||||
case TC_CMD_NONE:
|
||||
break;
|
||||
case TC_CMD_STOP:
|
||||
tc->thread_cmd = TC_CMD_NONE;
|
||||
qemu_sem_post(&tc->sem);
|
||||
return NULL;
|
||||
case TC_CMD_NEW: {
|
||||
ThreadContextCmdNew *cmd_new = tc->thread_cmd_data;
|
||||
|
||||
qemu_thread_create(cmd_new->thread, cmd_new->name,
|
||||
cmd_new->start_routine, cmd_new->arg,
|
||||
cmd_new->mode);
|
||||
tc->thread_cmd = TC_CMD_NONE;
|
||||
tc->thread_cmd_data = NULL;
|
||||
qemu_sem_post(&tc->sem);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
qemu_sem_wait(&tc->sem_thread);
|
||||
}
|
||||
}
|
||||
|
||||
static void thread_context_set_cpu_affinity(Object *obj, Visitor *v,
|
||||
const char *name, void *opaque,
|
||||
Error **errp)
|
||||
{
|
||||
ThreadContext *tc = THREAD_CONTEXT(obj);
|
||||
uint16List *l, *host_cpus = NULL;
|
||||
unsigned long *bitmap = NULL;
|
||||
int nbits = 0, ret;
|
||||
Error *err = NULL;
|
||||
|
||||
if (tc->init_cpu_bitmap) {
|
||||
error_setg(errp, "Mixing CPU and node affinity not supported");
|
||||
return;
|
||||
}
|
||||
|
||||
visit_type_uint16List(v, name, &host_cpus, &err);
|
||||
if (err) {
|
||||
error_propagate(errp, err);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!host_cpus) {
|
||||
error_setg(errp, "CPU list is empty");
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (l = host_cpus; l; l = l->next) {
|
||||
nbits = MAX(nbits, l->value + 1);
|
||||
}
|
||||
bitmap = bitmap_new(nbits);
|
||||
for (l = host_cpus; l; l = l->next) {
|
||||
set_bit(l->value, bitmap);
|
||||
}
|
||||
|
||||
if (tc->thread_id != -1) {
|
||||
/*
|
||||
* Note: we won't be adjusting the affinity of any thread that is still
|
||||
* around, but only the affinity of the context thread.
|
||||
*/
|
||||
ret = qemu_thread_set_affinity(&tc->thread, bitmap, nbits);
|
||||
if (ret) {
|
||||
error_setg(errp, "Setting CPU affinity failed: %s", strerror(ret));
|
||||
}
|
||||
} else {
|
||||
tc->init_cpu_bitmap = bitmap;
|
||||
bitmap = NULL;
|
||||
tc->init_cpu_nbits = nbits;
|
||||
}
|
||||
out:
|
||||
g_free(bitmap);
|
||||
qapi_free_uint16List(host_cpus);
|
||||
}
|
||||
|
||||
static void thread_context_get_cpu_affinity(Object *obj, Visitor *v,
|
||||
const char *name, void *opaque,
|
||||
Error **errp)
|
||||
{
|
||||
unsigned long *bitmap, nbits, value;
|
||||
ThreadContext *tc = THREAD_CONTEXT(obj);
|
||||
uint16List *host_cpus = NULL;
|
||||
uint16List **tail = &host_cpus;
|
||||
int ret;
|
||||
|
||||
if (tc->thread_id == -1) {
|
||||
error_setg(errp, "Object not initialized yet");
|
||||
return;
|
||||
}
|
||||
|
||||
ret = qemu_thread_get_affinity(&tc->thread, &bitmap, &nbits);
|
||||
if (ret) {
|
||||
error_setg(errp, "Getting CPU affinity failed: %s", strerror(ret));
|
||||
return;
|
||||
}
|
||||
|
||||
value = find_first_bit(bitmap, nbits);
|
||||
while (value < nbits) {
|
||||
QAPI_LIST_APPEND(tail, value);
|
||||
|
||||
value = find_next_bit(bitmap, nbits, value + 1);
|
||||
}
|
||||
g_free(bitmap);
|
||||
|
||||
visit_type_uint16List(v, name, &host_cpus, errp);
|
||||
qapi_free_uint16List(host_cpus);
|
||||
}
|
||||
|
||||
static void thread_context_set_node_affinity(Object *obj, Visitor *v,
|
||||
const char *name, void *opaque,
|
||||
Error **errp)
|
||||
{
|
||||
#ifdef CONFIG_NUMA
|
||||
const int nbits = numa_num_possible_cpus();
|
||||
ThreadContext *tc = THREAD_CONTEXT(obj);
|
||||
uint16List *l, *host_nodes = NULL;
|
||||
unsigned long *bitmap = NULL;
|
||||
struct bitmask *tmp_cpus;
|
||||
Error *err = NULL;
|
||||
int ret, i;
|
||||
|
||||
if (tc->init_cpu_bitmap) {
|
||||
error_setg(errp, "Mixing CPU and node affinity not supported");
|
||||
return;
|
||||
}
|
||||
|
||||
visit_type_uint16List(v, name, &host_nodes, &err);
|
||||
if (err) {
|
||||
error_propagate(errp, err);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!host_nodes) {
|
||||
error_setg(errp, "Node list is empty");
|
||||
goto out;
|
||||
}
|
||||
|
||||
bitmap = bitmap_new(nbits);
|
||||
tmp_cpus = numa_allocate_cpumask();
|
||||
for (l = host_nodes; l; l = l->next) {
|
||||
numa_bitmask_clearall(tmp_cpus);
|
||||
ret = numa_node_to_cpus(l->value, tmp_cpus);
|
||||
if (ret) {
|
||||
/* We ignore any errors, such as impossible nodes. */
|
||||
continue;
|
||||
}
|
||||
for (i = 0; i < nbits; i++) {
|
||||
if (numa_bitmask_isbitset(tmp_cpus, i)) {
|
||||
set_bit(i, bitmap);
|
||||
}
|
||||
}
|
||||
}
|
||||
numa_free_cpumask(tmp_cpus);
|
||||
|
||||
if (bitmap_empty(bitmap, nbits)) {
|
||||
error_setg(errp, "The nodes select no CPUs");
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (tc->thread_id != -1) {
|
||||
/*
|
||||
* Note: we won't be adjusting the affinity of any thread that is still
|
||||
* around for now, but only the affinity of the context thread.
|
||||
*/
|
||||
ret = qemu_thread_set_affinity(&tc->thread, bitmap, nbits);
|
||||
if (ret) {
|
||||
error_setg(errp, "Setting CPU affinity failed: %s", strerror(ret));
|
||||
}
|
||||
} else {
|
||||
tc->init_cpu_bitmap = bitmap;
|
||||
bitmap = NULL;
|
||||
tc->init_cpu_nbits = nbits;
|
||||
}
|
||||
out:
|
||||
g_free(bitmap);
|
||||
qapi_free_uint16List(host_nodes);
|
||||
#else
|
||||
error_setg(errp, "NUMA node affinity is not supported by this QEMU");
|
||||
#endif
|
||||
}
|
||||
|
||||
static void thread_context_get_thread_id(Object *obj, Visitor *v,
|
||||
const char *name, void *opaque,
|
||||
Error **errp)
|
||||
{
|
||||
ThreadContext *tc = THREAD_CONTEXT(obj);
|
||||
uint64_t value = tc->thread_id;
|
||||
|
||||
visit_type_uint64(v, name, &value, errp);
|
||||
}
|
||||
|
||||
static void thread_context_instance_complete(UserCreatable *uc, Error **errp)
|
||||
{
|
||||
ThreadContext *tc = THREAD_CONTEXT(uc);
|
||||
char *thread_name;
|
||||
int ret;
|
||||
|
||||
thread_name = g_strdup_printf("TC %s",
|
||||
object_get_canonical_path_component(OBJECT(uc)));
|
||||
qemu_thread_create(&tc->thread, thread_name, thread_context_run, tc,
|
||||
QEMU_THREAD_JOINABLE);
|
||||
g_free(thread_name);
|
||||
|
||||
/* Wait until initialization of the thread is done. */
|
||||
while (tc->thread_id == -1) {
|
||||
qemu_sem_wait(&tc->sem);
|
||||
}
|
||||
|
||||
if (tc->init_cpu_bitmap) {
|
||||
ret = qemu_thread_set_affinity(&tc->thread, tc->init_cpu_bitmap,
|
||||
tc->init_cpu_nbits);
|
||||
if (ret) {
|
||||
error_setg(errp, "Setting CPU affinity failed: %s", strerror(ret));
|
||||
}
|
||||
g_free(tc->init_cpu_bitmap);
|
||||
tc->init_cpu_bitmap = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void thread_context_class_init(ObjectClass *oc, void *data)
|
||||
{
|
||||
UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
|
||||
|
||||
ucc->complete = thread_context_instance_complete;
|
||||
object_class_property_add(oc, "thread-id", "int",
|
||||
thread_context_get_thread_id, NULL, NULL,
|
||||
NULL);
|
||||
object_class_property_add(oc, "cpu-affinity", "int",
|
||||
thread_context_get_cpu_affinity,
|
||||
thread_context_set_cpu_affinity, NULL, NULL);
|
||||
object_class_property_add(oc, "node-affinity", "int", NULL,
|
||||
thread_context_set_node_affinity, NULL, NULL);
|
||||
}
|
||||
|
||||
static void thread_context_instance_init(Object *obj)
|
||||
{
|
||||
ThreadContext *tc = THREAD_CONTEXT(obj);
|
||||
|
||||
tc->thread_id = -1;
|
||||
qemu_sem_init(&tc->sem, 0);
|
||||
qemu_sem_init(&tc->sem_thread, 0);
|
||||
qemu_mutex_init(&tc->mutex);
|
||||
}
|
||||
|
||||
static void thread_context_instance_finalize(Object *obj)
|
||||
{
|
||||
ThreadContext *tc = THREAD_CONTEXT(obj);
|
||||
|
||||
if (tc->thread_id != -1) {
|
||||
tc->thread_cmd = TC_CMD_STOP;
|
||||
qemu_sem_post(&tc->sem_thread);
|
||||
qemu_thread_join(&tc->thread);
|
||||
}
|
||||
qemu_sem_destroy(&tc->sem);
|
||||
qemu_sem_destroy(&tc->sem_thread);
|
||||
qemu_mutex_destroy(&tc->mutex);
|
||||
}
|
||||
|
||||
static const TypeInfo thread_context_info = {
|
||||
.name = TYPE_THREAD_CONTEXT,
|
||||
.parent = TYPE_OBJECT,
|
||||
.class_init = thread_context_class_init,
|
||||
.instance_size = sizeof(ThreadContext),
|
||||
.instance_init = thread_context_instance_init,
|
||||
.instance_finalize = thread_context_instance_finalize,
|
||||
.interfaces = (InterfaceInfo[]) {
|
||||
{ TYPE_USER_CREATABLE },
|
||||
{ }
|
||||
}
|
||||
};
|
||||
|
||||
static void thread_context_register_types(void)
|
||||
{
|
||||
type_register_static(&thread_context_info);
|
||||
}
|
||||
type_init(thread_context_register_types)
|
||||
|
||||
void thread_context_create_thread(ThreadContext *tc, QemuThread *thread,
|
||||
const char *name,
|
||||
void *(*start_routine)(void *), void *arg,
|
||||
int mode)
|
||||
{
|
||||
ThreadContextCmdNew data = {
|
||||
.thread = thread,
|
||||
.name = name,
|
||||
.start_routine = start_routine,
|
||||
.arg = arg,
|
||||
.mode = mode,
|
||||
};
|
||||
|
||||
qemu_mutex_lock(&tc->mutex);
|
||||
tc->thread_cmd = TC_CMD_NEW;
|
||||
tc->thread_cmd_data = &data;
|
||||
qemu_sem_post(&tc->sem_thread);
|
||||
|
||||
while (tc->thread_cmd != TC_CMD_NONE) {
|
||||
qemu_sem_wait(&tc->sem);
|
||||
}
|
||||
qemu_mutex_unlock(&tc->mutex);
|
||||
}
|
Loading…
Reference in New Issue
Block a user