ramblock-notifier: new

This adds a notify interface of ram block additions and removals.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Paolo Bonzini 2016-12-21 00:31:36 +08:00
parent fb5e19d2e1
commit 0987d735a3
6 changed files with 112 additions and 49 deletions

5
exec.c
View File

@ -1687,6 +1687,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE); qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
/* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */ /* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */
qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK); qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
ram_block_notify_add(new_block->host, new_block->max_length);
} }
} }
@ -1817,6 +1818,10 @@ void qemu_ram_free(RAMBlock *block)
return; return;
} }
if (block->host) {
ram_block_notify_remove(block->host, block->max_length);
}
qemu_mutex_lock_ramlist(); qemu_mutex_lock_ramlist();
QLIST_REMOVE_RCU(block, next); QLIST_REMOVE_RCU(block, next);
ram_list.mru_block = NULL; ram_list.mru_block = NULL;

View File

@ -16,16 +16,12 @@
#ifndef CONFIG_USER_ONLY #ifndef CONFIG_USER_ONLY
#define DIRTY_MEMORY_VGA 0
#define DIRTY_MEMORY_CODE 1
#define DIRTY_MEMORY_MIGRATION 2
#define DIRTY_MEMORY_NUM 3 /* num of dirty bits */
#include "exec/cpu-common.h" #include "exec/cpu-common.h"
#ifndef CONFIG_USER_ONLY #ifndef CONFIG_USER_ONLY
#include "exec/hwaddr.h" #include "exec/hwaddr.h"
#endif #endif
#include "exec/memattrs.h" #include "exec/memattrs.h"
#include "exec/ramlist.h"
#include "qemu/queue.h" #include "qemu/queue.h"
#include "qemu/int128.h" #include "qemu/int128.h"
#include "qemu/notify.h" #include "qemu/notify.h"

View File

@ -21,6 +21,7 @@
#ifndef CONFIG_USER_ONLY #ifndef CONFIG_USER_ONLY
#include "hw/xen/xen.h" #include "hw/xen/xen.h"
#include "exec/ramlist.h"
struct RAMBlock { struct RAMBlock {
struct rcu_head rcu; struct rcu_head rcu;
@ -35,6 +36,7 @@ struct RAMBlock {
char idstr[256]; char idstr[256];
/* RCU-enabled, writes protected by the ramlist lock */ /* RCU-enabled, writes protected by the ramlist lock */
QLIST_ENTRY(RAMBlock) next; QLIST_ENTRY(RAMBlock) next;
QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
int fd; int fd;
size_t page_size; size_t page_size;
}; };
@ -50,51 +52,7 @@ static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset)
return (char *)block->host + offset; return (char *)block->host + offset;
} }
/* The dirty memory bitmap is split into fixed-size blocks to allow growth
* under RCU. The bitmap for a block can be accessed as follows:
*
* rcu_read_lock();
*
* DirtyMemoryBlocks *blocks =
* atomic_rcu_read(&ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION]);
*
* ram_addr_t idx = (addr >> TARGET_PAGE_BITS) / DIRTY_MEMORY_BLOCK_SIZE;
* unsigned long *block = blocks.blocks[idx];
* ...access block bitmap...
*
* rcu_read_unlock();
*
* Remember to check for the end of the block when accessing a range of
* addresses. Move on to the next block if you reach the end.
*
* Organization into blocks allows dirty memory to grow (but not shrink) under
* RCU. When adding new RAMBlocks requires the dirty memory to grow, a new
* DirtyMemoryBlocks array is allocated with pointers to existing blocks kept
* the same. Other threads can safely access existing blocks while dirty
* memory is being grown. When no threads are using the old DirtyMemoryBlocks
* anymore it is freed by RCU (but the underlying blocks stay because they are
* pointed to from the new DirtyMemoryBlocks).
*/
#define DIRTY_MEMORY_BLOCK_SIZE ((ram_addr_t)256 * 1024 * 8)
typedef struct {
struct rcu_head rcu;
unsigned long *blocks[];
} DirtyMemoryBlocks;
typedef struct RAMList {
QemuMutex mutex;
RAMBlock *mru_block;
/* RCU-enabled, writes protected by the ramlist lock. */
QLIST_HEAD(, RAMBlock) blocks;
DirtyMemoryBlocks *dirty_memory[DIRTY_MEMORY_NUM];
uint32_t version;
} RAMList;
extern RAMList ram_list;
ram_addr_t last_ram_offset(void); ram_addr_t last_ram_offset(void);
void qemu_mutex_lock_ramlist(void);
void qemu_mutex_unlock_ramlist(void);
RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
bool share, const char *mem_path, bool share, const char *mem_path,
Error **errp); Error **errp);

72
include/exec/ramlist.h Normal file
View File

@ -0,0 +1,72 @@
#ifndef RAMLIST_H
#define RAMLIST_H
#include "qemu/queue.h"
#include "qemu/thread.h"
#include "qemu/rcu.h"
typedef struct RAMBlockNotifier RAMBlockNotifier;
#define DIRTY_MEMORY_VGA 0
#define DIRTY_MEMORY_CODE 1
#define DIRTY_MEMORY_MIGRATION 2
#define DIRTY_MEMORY_NUM 3 /* num of dirty bits */
/* The dirty memory bitmap is split into fixed-size blocks to allow growth
* under RCU. The bitmap for a block can be accessed as follows:
*
* rcu_read_lock();
*
* DirtyMemoryBlocks *blocks =
* atomic_rcu_read(&ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION]);
*
* ram_addr_t idx = (addr >> TARGET_PAGE_BITS) / DIRTY_MEMORY_BLOCK_SIZE;
* unsigned long *block = blocks.blocks[idx];
* ...access block bitmap...
*
* rcu_read_unlock();
*
* Remember to check for the end of the block when accessing a range of
* addresses. Move on to the next block if you reach the end.
*
* Organization into blocks allows dirty memory to grow (but not shrink) under
* RCU. When adding new RAMBlocks requires the dirty memory to grow, a new
* DirtyMemoryBlocks array is allocated with pointers to existing blocks kept
* the same. Other threads can safely access existing blocks while dirty
* memory is being grown. When no threads are using the old DirtyMemoryBlocks
* anymore it is freed by RCU (but the underlying blocks stay because they are
* pointed to from the new DirtyMemoryBlocks).
*/
#define DIRTY_MEMORY_BLOCK_SIZE ((ram_addr_t)256 * 1024 * 8)
typedef struct {
struct rcu_head rcu;
unsigned long *blocks[];
} DirtyMemoryBlocks;
typedef struct RAMList {
QemuMutex mutex;
RAMBlock *mru_block;
/* RCU-enabled, writes protected by the ramlist lock. */
QLIST_HEAD(, RAMBlock) blocks;
DirtyMemoryBlocks *dirty_memory[DIRTY_MEMORY_NUM];
uint32_t version;
QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
} RAMList;
extern RAMList ram_list;
void qemu_mutex_lock_ramlist(void);
void qemu_mutex_unlock_ramlist(void);
struct RAMBlockNotifier {
void (*ram_block_added)(RAMBlockNotifier *n, void *host, size_t size);
void (*ram_block_removed)(RAMBlockNotifier *n, void *host, size_t size);
QLIST_ENTRY(RAMBlockNotifier) next;
};
void ram_block_notifier_add(RAMBlockNotifier *n);
void ram_block_notifier_remove(RAMBlockNotifier *n);
void ram_block_notify_add(void *host, size_t size);
void ram_block_notify_remove(void *host, size_t size);
#endif /* RAMLIST_H */

29
numa.c
View File

@ -25,6 +25,7 @@
#include "qemu/osdep.h" #include "qemu/osdep.h"
#include "sysemu/numa.h" #include "sysemu/numa.h"
#include "exec/cpu-common.h" #include "exec/cpu-common.h"
#include "exec/ramlist.h"
#include "qemu/bitmap.h" #include "qemu/bitmap.h"
#include "qom/cpu.h" #include "qom/cpu.h"
#include "qemu/error-report.h" #include "qemu/error-report.h"
@ -572,3 +573,31 @@ int numa_get_node_for_cpu(int idx)
} }
return i; return i;
} }
void ram_block_notifier_add(RAMBlockNotifier *n)
{
QLIST_INSERT_HEAD(&ram_list.ramblock_notifiers, n, next);
}
void ram_block_notifier_remove(RAMBlockNotifier *n)
{
QLIST_REMOVE(n, next);
}
void ram_block_notify_add(void *host, size_t size)
{
RAMBlockNotifier *notifier;
QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) {
notifier->ram_block_added(notifier, host, size);
}
}
void ram_block_notify_remove(void *host, size_t size)
{
RAMBlockNotifier *notifier;
QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) {
notifier->ram_block_removed(notifier, host, size);
}
}

View File

@ -163,6 +163,7 @@ static void xen_remap_bucket(MapCacheEntry *entry,
err = g_malloc0(nb_pfn * sizeof (int)); err = g_malloc0(nb_pfn * sizeof (int));
if (entry->vaddr_base != NULL) { if (entry->vaddr_base != NULL) {
ram_block_notify_remove(entry->vaddr_base, entry->size);
if (munmap(entry->vaddr_base, entry->size) != 0) { if (munmap(entry->vaddr_base, entry->size) != 0) {
perror("unmap fails"); perror("unmap fails");
exit(-1); exit(-1);
@ -188,6 +189,7 @@ static void xen_remap_bucket(MapCacheEntry *entry,
entry->valid_mapping = (unsigned long *) g_malloc0(sizeof(unsigned long) * entry->valid_mapping = (unsigned long *) g_malloc0(sizeof(unsigned long) *
BITS_TO_LONGS(size >> XC_PAGE_SHIFT)); BITS_TO_LONGS(size >> XC_PAGE_SHIFT));
ram_block_notify_add(entry->vaddr_base, entry->size);
bitmap_zero(entry->valid_mapping, nb_pfn); bitmap_zero(entry->valid_mapping, nb_pfn);
for (i = 0; i < nb_pfn; i++) { for (i = 0; i < nb_pfn; i++) {
if (!err[i]) { if (!err[i]) {
@ -397,6 +399,7 @@ static void xen_invalidate_map_cache_entry_unlocked(uint8_t *buffer)
} }
pentry->next = entry->next; pentry->next = entry->next;
ram_block_notify_remove(entry->vaddr_base, entry->size);
if (munmap(entry->vaddr_base, entry->size) != 0) { if (munmap(entry->vaddr_base, entry->size) != 0) {
perror("unmap fails"); perror("unmap fails");
exit(-1); exit(-1);