qemu/memory.c

2601 lines
81 KiB
C
Raw Normal View History

/*
* Physical memory management
*
* Copyright 2011 Red Hat, Inc. and/or its affiliates
*
* Authors:
* Avi Kivity <avi@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*
* Contributions after 2012-01-13 are licensed under the terms of the
* GNU GPL, version 2 or (at your option) any later version.
*/
#include "qemu/osdep.h"
2016-03-14 11:01:28 +03:00
#include "qapi/error.h"
#include "qemu-common.h"
#include "cpu.h"
#include "exec/memory.h"
#include "exec/address-spaces.h"
#include "exec/ioport.h"
#include "qapi/visitor.h"
#include "qemu/bitops.h"
#include "qemu/error-report.h"
#include "qom/object.h"
#include "trace.h"
#include "exec/memory-internal.h"
#include "exec/ram_addr.h"
#include "sysemu/kvm.h"
#include "sysemu/sysemu.h"
//#define DEBUG_UNASSIGNED
static unsigned memory_region_transaction_depth;
static bool memory_region_update_pending;
static bool ioeventfd_update_pending;
static bool global_dirty_log = false;
static QTAILQ_HEAD(memory_listeners, MemoryListener) memory_listeners
= QTAILQ_HEAD_INITIALIZER(memory_listeners);
static QTAILQ_HEAD(, AddressSpace) address_spaces
= QTAILQ_HEAD_INITIALIZER(address_spaces);
typedef struct AddrRange AddrRange;
/*
* Note that signed integers are needed for negative offsetting in aliases
* (large MemoryRegion::alias_offset).
*/
struct AddrRange {
Int128 start;
Int128 size;
};
static AddrRange addrrange_make(Int128 start, Int128 size)
{
return (AddrRange) { start, size };
}
static bool addrrange_equal(AddrRange r1, AddrRange r2)
{
return int128_eq(r1.start, r2.start) && int128_eq(r1.size, r2.size);
}
static Int128 addrrange_end(AddrRange r)
{
return int128_add(r.start, r.size);
}
static AddrRange addrrange_shift(AddrRange range, Int128 delta)
{
int128_addto(&range.start, delta);
return range;
}
static bool addrrange_contains(AddrRange range, Int128 addr)
{
return int128_ge(addr, range.start)
&& int128_lt(addr, addrrange_end(range));
}
static bool addrrange_intersects(AddrRange r1, AddrRange r2)
{
return addrrange_contains(r1, r2.start)
|| addrrange_contains(r2, r1.start);
}
static AddrRange addrrange_intersection(AddrRange r1, AddrRange r2)
{
Int128 start = int128_max(r1.start, r2.start);
Int128 end = int128_min(addrrange_end(r1), addrrange_end(r2));
return addrrange_make(start, int128_sub(end, start));
}
enum ListenerDirection { Forward, Reverse };
#define MEMORY_LISTENER_CALL_GLOBAL(_callback, _direction, _args...) \
do { \
MemoryListener *_listener; \
\
switch (_direction) { \
case Forward: \
QTAILQ_FOREACH(_listener, &memory_listeners, link) { \
if (_listener->_callback) { \
_listener->_callback(_listener, ##_args); \
} \
} \
break; \
case Reverse: \
QTAILQ_FOREACH_REVERSE(_listener, &memory_listeners, \
memory_listeners, link) { \
if (_listener->_callback) { \
_listener->_callback(_listener, ##_args); \
} \
} \
break; \
default: \
abort(); \
} \
} while (0)
#define MEMORY_LISTENER_CALL(_as, _callback, _direction, _section, _args...) \
do { \
MemoryListener *_listener; \
struct memory_listeners_as *list = &(_as)->listeners; \
\
switch (_direction) { \
case Forward: \
QTAILQ_FOREACH(_listener, list, link_as) { \
if (_listener->_callback) { \
_listener->_callback(_listener, _section, ##_args); \
} \
} \
break; \
case Reverse: \
QTAILQ_FOREACH_REVERSE(_listener, list, memory_listeners_as, \
link_as) { \
if (_listener->_callback) { \
_listener->_callback(_listener, _section, ##_args); \
} \
} \
break; \
default: \
abort(); \
} \
} while (0)
/* No need to ref/unref .mr, the FlatRange keeps it alive. */
#define MEMORY_LISTENER_UPDATE_REGION(fr, as, dir, callback, _args...) \
do { \
MemoryRegionSection mrs = section_from_flat_range(fr, as); \
MEMORY_LISTENER_CALL(as, callback, dir, &mrs, ##_args); \
} while(0)
struct CoalescedMemoryRange {
AddrRange addr;
QTAILQ_ENTRY(CoalescedMemoryRange) link;
};
struct MemoryRegionIoeventfd {
AddrRange addr;
bool match_data;
uint64_t data;
EventNotifier *e;
};
static bool memory_region_ioeventfd_before(MemoryRegionIoeventfd a,
MemoryRegionIoeventfd b)
{
if (int128_lt(a.addr.start, b.addr.start)) {
return true;
} else if (int128_gt(a.addr.start, b.addr.start)) {
return false;
} else if (int128_lt(a.addr.size, b.addr.size)) {
return true;
} else if (int128_gt(a.addr.size, b.addr.size)) {
return false;
} else if (a.match_data < b.match_data) {
return true;
} else if (a.match_data > b.match_data) {
return false;
} else if (a.match_data) {
if (a.data < b.data) {
return true;
} else if (a.data > b.data) {
return false;
}
}
if (a.e < b.e) {
return true;
} else if (a.e > b.e) {
return false;
}
return false;
}
static bool memory_region_ioeventfd_equal(MemoryRegionIoeventfd a,
MemoryRegionIoeventfd b)
{
return !memory_region_ioeventfd_before(a, b)
&& !memory_region_ioeventfd_before(b, a);
}
typedef struct FlatRange FlatRange;
typedef struct FlatView FlatView;
/* Range of memory in the global map. Addresses are absolute. */
struct FlatRange {
MemoryRegion *mr;
hwaddr offset_in_region;
AddrRange addr;
uint8_t dirty_log_mask;
bool romd_mode;
bool readonly;
};
/* Flattened global view of current active memory hierarchy. Kept in sorted
* order.
*/
struct FlatView {
struct rcu_head rcu;
unsigned ref;
FlatRange *ranges;
unsigned nr;
unsigned nr_allocated;
};
typedef struct AddressSpaceOps AddressSpaceOps;
#define FOR_EACH_FLAT_RANGE(var, view) \
for (var = (view)->ranges; var < (view)->ranges + (view)->nr; ++var)
static inline MemoryRegionSection
section_from_flat_range(FlatRange *fr, AddressSpace *as)
{
return (MemoryRegionSection) {
.mr = fr->mr,
.address_space = as,
.offset_within_region = fr->offset_in_region,
.size = fr->addr.size,
.offset_within_address_space = int128_get64(fr->addr.start),
.readonly = fr->readonly,
};
}
static bool flatrange_equal(FlatRange *a, FlatRange *b)
{
return a->mr == b->mr
&& addrrange_equal(a->addr, b->addr)
&& a->offset_in_region == b->offset_in_region
&& a->romd_mode == b->romd_mode
&& a->readonly == b->readonly;
}
static void flatview_init(FlatView *view)
{
view->ref = 1;
view->ranges = NULL;
view->nr = 0;
view->nr_allocated = 0;
}
/* Insert a range into a given position. Caller is responsible for maintaining
* sorting order.
*/
static void flatview_insert(FlatView *view, unsigned pos, FlatRange *range)
{
if (view->nr == view->nr_allocated) {
view->nr_allocated = MAX(2 * view->nr, 10);
view->ranges = g_realloc(view->ranges,
view->nr_allocated * sizeof(*view->ranges));
}
memmove(view->ranges + pos + 1, view->ranges + pos,
(view->nr - pos) * sizeof(FlatRange));
view->ranges[pos] = *range;
memory_region_ref(range->mr);
++view->nr;
}
static void flatview_destroy(FlatView *view)
{
int i;
for (i = 0; i < view->nr; i++) {
memory_region_unref(view->ranges[i].mr);
}
g_free(view->ranges);
g_free(view);
}
static void flatview_ref(FlatView *view)
{
atomic_inc(&view->ref);
}
static void flatview_unref(FlatView *view)
{
if (atomic_fetch_dec(&view->ref) == 1) {
flatview_destroy(view);
}
}
static bool can_merge(FlatRange *r1, FlatRange *r2)
{
return int128_eq(addrrange_end(r1->addr), r2->addr.start)
&& r1->mr == r2->mr
&& int128_eq(int128_add(int128_make64(r1->offset_in_region),
r1->addr.size),
int128_make64(r2->offset_in_region))
&& r1->dirty_log_mask == r2->dirty_log_mask
&& r1->romd_mode == r2->romd_mode
&& r1->readonly == r2->readonly;
}
/* Attempt to simplify a view by merging adjacent ranges */
static void flatview_simplify(FlatView *view)
{
unsigned i, j;
i = 0;
while (i < view->nr) {
j = i + 1;
while (j < view->nr
&& can_merge(&view->ranges[j-1], &view->ranges[j])) {
int128_addto(&view->ranges[i].addr.size, view->ranges[j].addr.size);
++j;
}
++i;
memmove(&view->ranges[i], &view->ranges[j],
(view->nr - j) * sizeof(view->ranges[j]));
view->nr -= j - i;
}
}
static bool memory_region_big_endian(MemoryRegion *mr)
{
#ifdef TARGET_WORDS_BIGENDIAN
return mr->ops->endianness != DEVICE_LITTLE_ENDIAN;
#else
return mr->ops->endianness == DEVICE_BIG_ENDIAN;
#endif
}
static bool memory_region_wrong_endianness(MemoryRegion *mr)
{
#ifdef TARGET_WORDS_BIGENDIAN
return mr->ops->endianness == DEVICE_LITTLE_ENDIAN;
#else
return mr->ops->endianness == DEVICE_BIG_ENDIAN;
#endif
}
static void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned size)
{
if (memory_region_wrong_endianness(mr)) {
switch (size) {
case 1:
break;
case 2:
*data = bswap16(*data);
break;
case 4:
*data = bswap32(*data);
break;
case 8:
*data = bswap64(*data);
break;
default:
abort();
}
}
}
static hwaddr memory_region_to_absolute_addr(MemoryRegion *mr, hwaddr offset)
{
MemoryRegion *root;
hwaddr abs_addr = offset;
abs_addr += mr->addr;
for (root = mr; root->container; ) {
root = root->container;
abs_addr += root->addr;
}
return abs_addr;
}
static int get_cpu_index(void)
{
if (current_cpu) {
return current_cpu->cpu_index;
}
return -1;
}
static MemTxResult memory_region_oldmmio_read_accessor(MemoryRegion *mr,
hwaddr addr,
uint64_t *value,
unsigned size,
unsigned shift,
uint64_t mask,
MemTxAttrs attrs)
{
uint64_t tmp;
tmp = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
if (mr->subpage) {
trace_memory_region_subpage_read(get_cpu_index(), mr, addr, tmp, size);
} else if (mr == &io_mem_notdirty) {
/* Accesses to code which has previously been translated into a TB show
* up in the MMIO path, as accesses to the io_mem_notdirty
* MemoryRegion. */
trace_memory_region_tb_read(get_cpu_index(), addr, tmp, size);
} else if (TRACE_MEMORY_REGION_OPS_READ_ENABLED) {
hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size);
}
*value |= (tmp & mask) << shift;
return MEMTX_OK;
}
static MemTxResult memory_region_read_accessor(MemoryRegion *mr,
hwaddr addr,
uint64_t *value,
unsigned size,
unsigned shift,
uint64_t mask,
MemTxAttrs attrs)
{
uint64_t tmp;
tmp = mr->ops->read(mr->opaque, addr, size);
if (mr->subpage) {
trace_memory_region_subpage_read(get_cpu_index(), mr, addr, tmp, size);
} else if (mr == &io_mem_notdirty) {
/* Accesses to code which has previously been translated into a TB show
* up in the MMIO path, as accesses to the io_mem_notdirty
* MemoryRegion. */
trace_memory_region_tb_read(get_cpu_index(), addr, tmp, size);
} else if (TRACE_MEMORY_REGION_OPS_READ_ENABLED) {
hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size);
}
*value |= (tmp & mask) << shift;
return MEMTX_OK;
}
static MemTxResult memory_region_read_with_attrs_accessor(MemoryRegion *mr,
hwaddr addr,
uint64_t *value,
unsigned size,
unsigned shift,
uint64_t mask,
MemTxAttrs attrs)
{
uint64_t tmp = 0;
MemTxResult r;
r = mr->ops->read_with_attrs(mr->opaque, addr, &tmp, size, attrs);
if (mr->subpage) {
trace_memory_region_subpage_read(get_cpu_index(), mr, addr, tmp, size);
} else if (mr == &io_mem_notdirty) {
/* Accesses to code which has previously been translated into a TB show
* up in the MMIO path, as accesses to the io_mem_notdirty
* MemoryRegion. */
trace_memory_region_tb_read(get_cpu_index(), addr, tmp, size);
} else if (TRACE_MEMORY_REGION_OPS_READ_ENABLED) {
hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size);
}
*value |= (tmp & mask) << shift;
return r;
}
static MemTxResult memory_region_oldmmio_write_accessor(MemoryRegion *mr,
hwaddr addr,
uint64_t *value,
unsigned size,
unsigned shift,
uint64_t mask,
MemTxAttrs attrs)
{
uint64_t tmp;
tmp = (*value >> shift) & mask;
if (mr->subpage) {
trace_memory_region_subpage_write(get_cpu_index(), mr, addr, tmp, size);
} else if (mr == &io_mem_notdirty) {
/* Accesses to code which has previously been translated into a TB show
* up in the MMIO path, as accesses to the io_mem_notdirty
* MemoryRegion. */
trace_memory_region_tb_write(get_cpu_index(), addr, tmp, size);
} else if (TRACE_MEMORY_REGION_OPS_WRITE_ENABLED) {
hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
trace_memory_region_ops_write(get_cpu_index(), mr, abs_addr, tmp, size);
}
mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, tmp);
return MEMTX_OK;
}
static MemTxResult memory_region_write_accessor(MemoryRegion *mr,
hwaddr addr,
uint64_t *value,
unsigned size,
unsigned shift,
uint64_t mask,
MemTxAttrs attrs)
{
uint64_t tmp;
tmp = (*value >> shift) & mask;
if (mr->subpage) {
trace_memory_region_subpage_write(get_cpu_index(), mr, addr, tmp, size);
} else if (mr == &io_mem_notdirty) {
/* Accesses to code which has previously been translated into a TB show
* up in the MMIO path, as accesses to the io_mem_notdirty
* MemoryRegion. */
trace_memory_region_tb_write(get_cpu_index(), addr, tmp, size);
} else if (TRACE_MEMORY_REGION_OPS_WRITE_ENABLED) {
hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
trace_memory_region_ops_write(get_cpu_index(), mr, abs_addr, tmp, size);
}
mr->ops->write(mr->opaque, addr, tmp, size);
return MEMTX_OK;
}
static MemTxResult memory_region_write_with_attrs_accessor(MemoryRegion *mr,
hwaddr addr,
uint64_t *value,
unsigned size,
unsigned shift,
uint64_t mask,
MemTxAttrs attrs)
{
uint64_t tmp;
tmp = (*value >> shift) & mask;
if (mr->subpage) {
trace_memory_region_subpage_write(get_cpu_index(), mr, addr, tmp, size);
} else if (mr == &io_mem_notdirty) {
/* Accesses to code which has previously been translated into a TB show
* up in the MMIO path, as accesses to the io_mem_notdirty
* MemoryRegion. */
trace_memory_region_tb_write(get_cpu_index(), addr, tmp, size);
} else if (TRACE_MEMORY_REGION_OPS_WRITE_ENABLED) {
hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
trace_memory_region_ops_write(get_cpu_index(), mr, abs_addr, tmp, size);
}
return mr->ops->write_with_attrs(mr->opaque, addr, tmp, size, attrs);
}
static MemTxResult access_with_adjusted_size(hwaddr addr,
uint64_t *value,
unsigned size,
unsigned access_size_min,
unsigned access_size_max,
MemTxResult (*access)(MemoryRegion *mr,
hwaddr addr,
uint64_t *value,
unsigned size,
unsigned shift,
uint64_t mask,
MemTxAttrs attrs),
MemoryRegion *mr,
MemTxAttrs attrs)
{
uint64_t access_mask;
unsigned access_size;
unsigned i;
MemTxResult r = MEMTX_OK;
if (!access_size_min) {
access_size_min = 1;
}
if (!access_size_max) {
access_size_max = 4;
}
/* FIXME: support unaligned access? */
access_size = MAX(MIN(size, access_size_max), access_size_min);
access_mask = -1ULL >> (64 - access_size * 8);
if (memory_region_big_endian(mr)) {
for (i = 0; i < size; i += access_size) {
r |= access(mr, addr + i, value, access_size,
(size - access_size - i) * 8, access_mask, attrs);
}
} else {
for (i = 0; i < size; i += access_size) {
r |= access(mr, addr + i, value, access_size, i * 8,
access_mask, attrs);
}
}
return r;
}
static AddressSpace *memory_region_to_address_space(MemoryRegion *mr)
{
AddressSpace *as;
while (mr->container) {
mr = mr->container;
}
QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
if (mr == as->root) {
return as;
}
}
return NULL;
}
/* Render a memory region into the global view. Ranges in @view obscure
* ranges in @mr.
*/
static void render_memory_region(FlatView *view,
MemoryRegion *mr,
Int128 base,
AddrRange clip,
bool readonly)
{
MemoryRegion *subregion;
unsigned i;
hwaddr offset_in_region;
Int128 remain;
Int128 now;
FlatRange fr;
AddrRange tmp;
if (!mr->enabled) {
return;
}
int128_addto(&base, int128_make64(mr->addr));
readonly |= mr->readonly;
tmp = addrrange_make(base, mr->size);
if (!addrrange_intersects(tmp, clip)) {
return;
}
clip = addrrange_intersection(tmp, clip);
if (mr->alias) {
int128_subfrom(&base, int128_make64(mr->alias->addr));
int128_subfrom(&base, int128_make64(mr->alias_offset));
render_memory_region(view, mr->alias, base, clip, readonly);
return;
}
/* Render subregions in priority order. */
QTAILQ_FOREACH(subregion, &mr->subregions, subregions_link) {
render_memory_region(view, subregion, base, clip, readonly);
}
if (!mr->terminates) {
return;
}
offset_in_region = int128_get64(int128_sub(clip.start, base));
base = clip.start;
remain = clip.size;
fr.mr = mr;
fr.dirty_log_mask = memory_region_get_dirty_log_mask(mr);
fr.romd_mode = mr->romd_mode;
fr.readonly = readonly;
/* Render the region itself into any gaps left by the current view. */
for (i = 0; i < view->nr && int128_nz(remain); ++i) {
if (int128_ge(base, addrrange_end(view->ranges[i].addr))) {
continue;
}
if (int128_lt(base, view->ranges[i].addr.start)) {
now = int128_min(remain,
int128_sub(view->ranges[i].addr.start, base));
fr.offset_in_region = offset_in_region;
fr.addr = addrrange_make(base, now);
flatview_insert(view, i, &fr);
++i;
int128_addto(&base, now);
offset_in_region += int128_get64(now);
int128_subfrom(&remain, now);
}
now = int128_sub(int128_min(int128_add(base, remain),
addrrange_end(view->ranges[i].addr)),
base);
int128_addto(&base, now);
offset_in_region += int128_get64(now);
int128_subfrom(&remain, now);
}
if (int128_nz(remain)) {
fr.offset_in_region = offset_in_region;
fr.addr = addrrange_make(base, remain);
flatview_insert(view, i, &fr);
}
}
/* Render a memory topology into a list of disjoint absolute ranges. */
static FlatView *generate_memory_topology(MemoryRegion *mr)
{
FlatView *view;
view = g_new(FlatView, 1);
flatview_init(view);
if (mr) {
render_memory_region(view, mr, int128_zero(),
addrrange_make(int128_zero(), int128_2_64()), false);
}
flatview_simplify(view);
return view;
}
static void address_space_add_del_ioeventfds(AddressSpace *as,
MemoryRegionIoeventfd *fds_new,
unsigned fds_new_nb,
MemoryRegionIoeventfd *fds_old,
unsigned fds_old_nb)
{
unsigned iold, inew;
MemoryRegionIoeventfd *fd;
MemoryRegionSection section;
/* Generate a symmetric difference of the old and new fd sets, adding
* and deleting as necessary.
*/
iold = inew = 0;
while (iold < fds_old_nb || inew < fds_new_nb) {
if (iold < fds_old_nb
&& (inew == fds_new_nb
|| memory_region_ioeventfd_before(fds_old[iold],
fds_new[inew]))) {
fd = &fds_old[iold];
section = (MemoryRegionSection) {
.address_space = as,
.offset_within_address_space = int128_get64(fd->addr.start),
.size = fd->addr.size,
};
MEMORY_LISTENER_CALL(as, eventfd_del, Forward, &section,
fd->match_data, fd->data, fd->e);
++iold;
} else if (inew < fds_new_nb
&& (iold == fds_old_nb
|| memory_region_ioeventfd_before(fds_new[inew],
fds_old[iold]))) {
fd = &fds_new[inew];
section = (MemoryRegionSection) {
.address_space = as,
.offset_within_address_space = int128_get64(fd->addr.start),
.size = fd->addr.size,
};
MEMORY_LISTENER_CALL(as, eventfd_add, Reverse, &section,
fd->match_data, fd->data, fd->e);
++inew;
} else {
++iold;
++inew;
}
}
}
static FlatView *address_space_get_flatview(AddressSpace *as)
{
FlatView *view;
rcu_read_lock();
view = atomic_rcu_read(&as->current_map);
flatview_ref(view);
rcu_read_unlock();
return view;
}
static void address_space_update_ioeventfds(AddressSpace *as)
{
FlatView *view;
FlatRange *fr;
unsigned ioeventfd_nb = 0;
MemoryRegionIoeventfd *ioeventfds = NULL;
AddrRange tmp;
unsigned i;
view = address_space_get_flatview(as);
FOR_EACH_FLAT_RANGE(fr, view) {
for (i = 0; i < fr->mr->ioeventfd_nb; ++i) {
tmp = addrrange_shift(fr->mr->ioeventfds[i].addr,
int128_sub(fr->addr.start,
int128_make64(fr->offset_in_region)));
if (addrrange_intersects(fr->addr, tmp)) {
++ioeventfd_nb;
ioeventfds = g_realloc(ioeventfds,
ioeventfd_nb * sizeof(*ioeventfds));
ioeventfds[ioeventfd_nb-1] = fr->mr->ioeventfds[i];
ioeventfds[ioeventfd_nb-1].addr = tmp;
}
}
}
address_space_add_del_ioeventfds(as, ioeventfds, ioeventfd_nb,
as->ioeventfds, as->ioeventfd_nb);
g_free(as->ioeventfds);
as->ioeventfds = ioeventfds;
as->ioeventfd_nb = ioeventfd_nb;
flatview_unref(view);
}
static void address_space_update_topology_pass(AddressSpace *as,
const FlatView *old_view,
const FlatView *new_view,
bool adding)
{
unsigned iold, inew;
FlatRange *frold, *frnew;
/* Generate a symmetric difference of the old and new memory maps.
* Kill ranges in the old map, and instantiate ranges in the new map.
*/
iold = inew = 0;
while (iold < old_view->nr || inew < new_view->nr) {
if (iold < old_view->nr) {
frold = &old_view->ranges[iold];
} else {
frold = NULL;
}
if (inew < new_view->nr) {
frnew = &new_view->ranges[inew];
} else {
frnew = NULL;
}
if (frold
&& (!frnew
|| int128_lt(frold->addr.start, frnew->addr.start)
|| (int128_eq(frold->addr.start, frnew->addr.start)
&& !flatrange_equal(frold, frnew)))) {
/* In old but not in new, or in both but attributes changed. */
if (!adding) {
MEMORY_LISTENER_UPDATE_REGION(frold, as, Reverse, region_del);
}
++iold;
} else if (frold && frnew && flatrange_equal(frold, frnew)) {
/* In both and unchanged (except logging may have changed) */
if (adding) {
MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, region_nop);
if (frnew->dirty_log_mask & ~frold->dirty_log_mask) {
MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, log_start,
frold->dirty_log_mask,
frnew->dirty_log_mask);
}
if (frold->dirty_log_mask & ~frnew->dirty_log_mask) {
MEMORY_LISTENER_UPDATE_REGION(frnew, as, Reverse, log_stop,
frold->dirty_log_mask,
frnew->dirty_log_mask);
}
}
++iold;
++inew;
} else {
/* In new */
if (adding) {
MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, region_add);
}
++inew;
}
}
}
static void address_space_update_topology(AddressSpace *as)
{
FlatView *old_view = address_space_get_flatview(as);
FlatView *new_view = generate_memory_topology(as->root);
address_space_update_topology_pass(as, old_view, new_view, false);
address_space_update_topology_pass(as, old_view, new_view, true);
/* Writes are protected by the BQL. */
atomic_rcu_set(&as->current_map, new_view);
call_rcu(old_view, flatview_unref, rcu);
/* Note that all the old MemoryRegions are still alive up to this
* point. This relieves most MemoryListeners from the need to
* ref/unref the MemoryRegions they get---unless they use them
* outside the iothread mutex, in which case precise reference
* counting is necessary.
*/
flatview_unref(old_view);
address_space_update_ioeventfds(as);
}
void memory_region_transaction_begin(void)
{
qemu_flush_coalesced_mmio_buffer();
++memory_region_transaction_depth;
}
static void memory_region_clear_pending(void)
{
memory_region_update_pending = false;
ioeventfd_update_pending = false;
}
void memory_region_transaction_commit(void)
{
AddressSpace *as;
assert(memory_region_transaction_depth);
--memory_region_transaction_depth;
if (!memory_region_transaction_depth) {
if (memory_region_update_pending) {
MEMORY_LISTENER_CALL_GLOBAL(begin, Forward);
QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
address_space_update_topology(as);
}
MEMORY_LISTENER_CALL_GLOBAL(commit, Forward);
} else if (ioeventfd_update_pending) {
QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
address_space_update_ioeventfds(as);
}
}
memory_region_clear_pending();
}
}
static void memory_region_destructor_none(MemoryRegion *mr)
{
}
static void memory_region_destructor_ram(MemoryRegion *mr)
{
qemu_ram_free(mr->ram_block);
}
static bool memory_region_need_escape(char c)
{
return c == '/' || c == '[' || c == '\\' || c == ']';
}
static char *memory_region_escape_name(const char *name)
{
const char *p;
char *escaped, *q;
uint8_t c;
size_t bytes = 0;
for (p = name; *p; p++) {
bytes += memory_region_need_escape(*p) ? 4 : 1;
}
if (bytes == p - name) {
return g_memdup(name, bytes + 1);
}
escaped = g_malloc(bytes + 1);
for (p = name, q = escaped; *p; p++) {
c = *p;
if (unlikely(memory_region_need_escape(c))) {
*q++ = '\\';
*q++ = 'x';
*q++ = "0123456789abcdef"[c >> 4];
c = "0123456789abcdef"[c & 15];
}
*q++ = c;
}
*q = 0;
return escaped;
}
void memory_region_init(MemoryRegion *mr,
Object *owner,
const char *name,
uint64_t size)
{
object_initialize(mr, sizeof(*mr), TYPE_MEMORY_REGION);
mr->size = int128_make64(size);
if (size == UINT64_MAX) {
mr->size = int128_2_64();
}
mr->name = g_strdup(name);
mr->owner = owner;
mr->ram_block = NULL;
if (name) {
char *escaped_name = memory_region_escape_name(name);
char *name_array = g_strdup_printf("%s[*]", escaped_name);
if (!owner) {
owner = container_get(qdev_get_machine(), "/unattached");
}
object_property_add_child(owner, name_array, OBJECT(mr), &error_abort);
object_unref(OBJECT(mr));
g_free(name_array);
g_free(escaped_name);
}
}
static void memory_region_get_addr(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
{
MemoryRegion *mr = MEMORY_REGION(obj);
uint64_t value = mr->addr;
qapi: Swap visit_* arguments for consistent 'name' placement JSON uses "name":value, but many of our visitor interfaces were called with visit_type_FOO(v, &value, name, errp). This can be a bit confusing to have to mentally swap the parameter order to match JSON order. It's particularly bad for visit_start_struct(), where the 'name' parameter is smack in the middle of the otherwise-related group of 'obj, kind, size' parameters! It's time to do a global swap of the parameter ordering, so that the 'name' parameter is always immediately after the Visitor argument. Additional reason in favor of the swap: the existing include/qjson.h prefers listing 'name' first in json_prop_*(), and I have plans to unify that file with the qapi visitors; listing 'name' first in qapi will minimize churn to the (admittedly few) qjson.h clients. Later patches will then fix docs, object.h, visitor-impl.h, and those clients to match. Done by first patching scripts/qapi*.py by hand to make generated files do what I want, then by running the following Coccinelle script to affect the rest of the code base: $ spatch --sp-file script `git grep -l '\bvisit_' -- '**/*.[ch]'` I then had to apply some touchups (Coccinelle insisted on TAB indentation in visitor.h, and botched the signature of visit_type_enum() by rewriting 'const char *const strings[]' to the syntactically invalid 'const char*const[] strings'). The movement of parameters is sufficient to provoke compiler errors if any callers were missed. // Part 1: Swap declaration order @@ type TV, TErr, TObj, T1, T2; identifier OBJ, ARG1, ARG2; @@ void visit_start_struct -(TV v, TObj OBJ, T1 ARG1, const char *name, T2 ARG2, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, T2 ARG2, TErr errp) { ... } @@ type bool, TV, T1; identifier ARG1; @@ bool visit_optional -(TV v, T1 ARG1, const char *name) +(TV v, const char *name, T1 ARG1) { ... } @@ type TV, TErr, TObj, T1; identifier OBJ, ARG1; @@ void visit_get_next_type -(TV v, TObj OBJ, T1 ARG1, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, TErr errp) { ... } @@ type TV, TErr, TObj, T1, T2; identifier OBJ, ARG1, ARG2; @@ void visit_type_enum -(TV v, TObj OBJ, T1 ARG1, T2 ARG2, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, T2 ARG2, TErr errp) { ... } @@ type TV, TErr, TObj; identifier OBJ; identifier VISIT_TYPE =~ "^visit_type_"; @@ void VISIT_TYPE -(TV v, TObj OBJ, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, TErr errp) { ... } // Part 2: swap caller order @@ expression V, NAME, OBJ, ARG1, ARG2, ERR; identifier VISIT_TYPE =~ "^visit_type_"; @@ ( -visit_start_struct(V, OBJ, ARG1, NAME, ARG2, ERR) +visit_start_struct(V, NAME, OBJ, ARG1, ARG2, ERR) | -visit_optional(V, ARG1, NAME) +visit_optional(V, NAME, ARG1) | -visit_get_next_type(V, OBJ, ARG1, NAME, ERR) +visit_get_next_type(V, NAME, OBJ, ARG1, ERR) | -visit_type_enum(V, OBJ, ARG1, ARG2, NAME, ERR) +visit_type_enum(V, NAME, OBJ, ARG1, ARG2, ERR) | -VISIT_TYPE(V, OBJ, NAME, ERR) +VISIT_TYPE(V, NAME, OBJ, ERR) ) Signed-off-by: Eric Blake <eblake@redhat.com> Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com> Message-Id: <1454075341-13658-19-git-send-email-eblake@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-01-29 16:48:54 +03:00
visit_type_uint64(v, name, &value, errp);
}
static void memory_region_get_container(Object *obj, Visitor *v,
const char *name, void *opaque,
Error **errp)
{
MemoryRegion *mr = MEMORY_REGION(obj);
gchar *path = (gchar *)"";
if (mr->container) {
path = object_get_canonical_path(OBJECT(mr->container));
}
qapi: Swap visit_* arguments for consistent 'name' placement JSON uses "name":value, but many of our visitor interfaces were called with visit_type_FOO(v, &value, name, errp). This can be a bit confusing to have to mentally swap the parameter order to match JSON order. It's particularly bad for visit_start_struct(), where the 'name' parameter is smack in the middle of the otherwise-related group of 'obj, kind, size' parameters! It's time to do a global swap of the parameter ordering, so that the 'name' parameter is always immediately after the Visitor argument. Additional reason in favor of the swap: the existing include/qjson.h prefers listing 'name' first in json_prop_*(), and I have plans to unify that file with the qapi visitors; listing 'name' first in qapi will minimize churn to the (admittedly few) qjson.h clients. Later patches will then fix docs, object.h, visitor-impl.h, and those clients to match. Done by first patching scripts/qapi*.py by hand to make generated files do what I want, then by running the following Coccinelle script to affect the rest of the code base: $ spatch --sp-file script `git grep -l '\bvisit_' -- '**/*.[ch]'` I then had to apply some touchups (Coccinelle insisted on TAB indentation in visitor.h, and botched the signature of visit_type_enum() by rewriting 'const char *const strings[]' to the syntactically invalid 'const char*const[] strings'). The movement of parameters is sufficient to provoke compiler errors if any callers were missed. // Part 1: Swap declaration order @@ type TV, TErr, TObj, T1, T2; identifier OBJ, ARG1, ARG2; @@ void visit_start_struct -(TV v, TObj OBJ, T1 ARG1, const char *name, T2 ARG2, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, T2 ARG2, TErr errp) { ... } @@ type bool, TV, T1; identifier ARG1; @@ bool visit_optional -(TV v, T1 ARG1, const char *name) +(TV v, const char *name, T1 ARG1) { ... } @@ type TV, TErr, TObj, T1; identifier OBJ, ARG1; @@ void visit_get_next_type -(TV v, TObj OBJ, T1 ARG1, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, TErr errp) { ... } @@ type TV, TErr, TObj, T1, T2; identifier OBJ, ARG1, ARG2; @@ void visit_type_enum -(TV v, TObj OBJ, T1 ARG1, T2 ARG2, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, T2 ARG2, TErr errp) { ... } @@ type TV, TErr, TObj; identifier OBJ; identifier VISIT_TYPE =~ "^visit_type_"; @@ void VISIT_TYPE -(TV v, TObj OBJ, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, TErr errp) { ... } // Part 2: swap caller order @@ expression V, NAME, OBJ, ARG1, ARG2, ERR; identifier VISIT_TYPE =~ "^visit_type_"; @@ ( -visit_start_struct(V, OBJ, ARG1, NAME, ARG2, ERR) +visit_start_struct(V, NAME, OBJ, ARG1, ARG2, ERR) | -visit_optional(V, ARG1, NAME) +visit_optional(V, NAME, ARG1) | -visit_get_next_type(V, OBJ, ARG1, NAME, ERR) +visit_get_next_type(V, NAME, OBJ, ARG1, ERR) | -visit_type_enum(V, OBJ, ARG1, ARG2, NAME, ERR) +visit_type_enum(V, NAME, OBJ, ARG1, ARG2, ERR) | -VISIT_TYPE(V, OBJ, NAME, ERR) +VISIT_TYPE(V, NAME, OBJ, ERR) ) Signed-off-by: Eric Blake <eblake@redhat.com> Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com> Message-Id: <1454075341-13658-19-git-send-email-eblake@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-01-29 16:48:54 +03:00
visit_type_str(v, name, &path, errp);
if (mr->container) {
g_free(path);
}
}
static Object *memory_region_resolve_container(Object *obj, void *opaque,
const char *part)
{
MemoryRegion *mr = MEMORY_REGION(obj);
return OBJECT(mr->container);
}
static void memory_region_get_priority(Object *obj, Visitor *v,
const char *name, void *opaque,
Error **errp)
{
MemoryRegion *mr = MEMORY_REGION(obj);
int32_t value = mr->priority;
qapi: Swap visit_* arguments for consistent 'name' placement JSON uses "name":value, but many of our visitor interfaces were called with visit_type_FOO(v, &value, name, errp). This can be a bit confusing to have to mentally swap the parameter order to match JSON order. It's particularly bad for visit_start_struct(), where the 'name' parameter is smack in the middle of the otherwise-related group of 'obj, kind, size' parameters! It's time to do a global swap of the parameter ordering, so that the 'name' parameter is always immediately after the Visitor argument. Additional reason in favor of the swap: the existing include/qjson.h prefers listing 'name' first in json_prop_*(), and I have plans to unify that file with the qapi visitors; listing 'name' first in qapi will minimize churn to the (admittedly few) qjson.h clients. Later patches will then fix docs, object.h, visitor-impl.h, and those clients to match. Done by first patching scripts/qapi*.py by hand to make generated files do what I want, then by running the following Coccinelle script to affect the rest of the code base: $ spatch --sp-file script `git grep -l '\bvisit_' -- '**/*.[ch]'` I then had to apply some touchups (Coccinelle insisted on TAB indentation in visitor.h, and botched the signature of visit_type_enum() by rewriting 'const char *const strings[]' to the syntactically invalid 'const char*const[] strings'). The movement of parameters is sufficient to provoke compiler errors if any callers were missed. // Part 1: Swap declaration order @@ type TV, TErr, TObj, T1, T2; identifier OBJ, ARG1, ARG2; @@ void visit_start_struct -(TV v, TObj OBJ, T1 ARG1, const char *name, T2 ARG2, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, T2 ARG2, TErr errp) { ... } @@ type bool, TV, T1; identifier ARG1; @@ bool visit_optional -(TV v, T1 ARG1, const char *name) +(TV v, const char *name, T1 ARG1) { ... } @@ type TV, TErr, TObj, T1; identifier OBJ, ARG1; @@ void visit_get_next_type -(TV v, TObj OBJ, T1 ARG1, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, TErr errp) { ... } @@ type TV, TErr, TObj, T1, T2; identifier OBJ, ARG1, ARG2; @@ void visit_type_enum -(TV v, TObj OBJ, T1 ARG1, T2 ARG2, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, T2 ARG2, TErr errp) { ... } @@ type TV, TErr, TObj; identifier OBJ; identifier VISIT_TYPE =~ "^visit_type_"; @@ void VISIT_TYPE -(TV v, TObj OBJ, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, TErr errp) { ... } // Part 2: swap caller order @@ expression V, NAME, OBJ, ARG1, ARG2, ERR; identifier VISIT_TYPE =~ "^visit_type_"; @@ ( -visit_start_struct(V, OBJ, ARG1, NAME, ARG2, ERR) +visit_start_struct(V, NAME, OBJ, ARG1, ARG2, ERR) | -visit_optional(V, ARG1, NAME) +visit_optional(V, NAME, ARG1) | -visit_get_next_type(V, OBJ, ARG1, NAME, ERR) +visit_get_next_type(V, NAME, OBJ, ARG1, ERR) | -visit_type_enum(V, OBJ, ARG1, ARG2, NAME, ERR) +visit_type_enum(V, NAME, OBJ, ARG1, ARG2, ERR) | -VISIT_TYPE(V, OBJ, NAME, ERR) +VISIT_TYPE(V, NAME, OBJ, ERR) ) Signed-off-by: Eric Blake <eblake@redhat.com> Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com> Message-Id: <1454075341-13658-19-git-send-email-eblake@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-01-29 16:48:54 +03:00
visit_type_int32(v, name, &value, errp);
}
static void memory_region_get_size(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
{
MemoryRegion *mr = MEMORY_REGION(obj);
uint64_t value = memory_region_size(mr);
qapi: Swap visit_* arguments for consistent 'name' placement JSON uses "name":value, but many of our visitor interfaces were called with visit_type_FOO(v, &value, name, errp). This can be a bit confusing to have to mentally swap the parameter order to match JSON order. It's particularly bad for visit_start_struct(), where the 'name' parameter is smack in the middle of the otherwise-related group of 'obj, kind, size' parameters! It's time to do a global swap of the parameter ordering, so that the 'name' parameter is always immediately after the Visitor argument. Additional reason in favor of the swap: the existing include/qjson.h prefers listing 'name' first in json_prop_*(), and I have plans to unify that file with the qapi visitors; listing 'name' first in qapi will minimize churn to the (admittedly few) qjson.h clients. Later patches will then fix docs, object.h, visitor-impl.h, and those clients to match. Done by first patching scripts/qapi*.py by hand to make generated files do what I want, then by running the following Coccinelle script to affect the rest of the code base: $ spatch --sp-file script `git grep -l '\bvisit_' -- '**/*.[ch]'` I then had to apply some touchups (Coccinelle insisted on TAB indentation in visitor.h, and botched the signature of visit_type_enum() by rewriting 'const char *const strings[]' to the syntactically invalid 'const char*const[] strings'). The movement of parameters is sufficient to provoke compiler errors if any callers were missed. // Part 1: Swap declaration order @@ type TV, TErr, TObj, T1, T2; identifier OBJ, ARG1, ARG2; @@ void visit_start_struct -(TV v, TObj OBJ, T1 ARG1, const char *name, T2 ARG2, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, T2 ARG2, TErr errp) { ... } @@ type bool, TV, T1; identifier ARG1; @@ bool visit_optional -(TV v, T1 ARG1, const char *name) +(TV v, const char *name, T1 ARG1) { ... } @@ type TV, TErr, TObj, T1; identifier OBJ, ARG1; @@ void visit_get_next_type -(TV v, TObj OBJ, T1 ARG1, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, TErr errp) { ... } @@ type TV, TErr, TObj, T1, T2; identifier OBJ, ARG1, ARG2; @@ void visit_type_enum -(TV v, TObj OBJ, T1 ARG1, T2 ARG2, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, T2 ARG2, TErr errp) { ... } @@ type TV, TErr, TObj; identifier OBJ; identifier VISIT_TYPE =~ "^visit_type_"; @@ void VISIT_TYPE -(TV v, TObj OBJ, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, TErr errp) { ... } // Part 2: swap caller order @@ expression V, NAME, OBJ, ARG1, ARG2, ERR; identifier VISIT_TYPE =~ "^visit_type_"; @@ ( -visit_start_struct(V, OBJ, ARG1, NAME, ARG2, ERR) +visit_start_struct(V, NAME, OBJ, ARG1, ARG2, ERR) | -visit_optional(V, ARG1, NAME) +visit_optional(V, NAME, ARG1) | -visit_get_next_type(V, OBJ, ARG1, NAME, ERR) +visit_get_next_type(V, NAME, OBJ, ARG1, ERR) | -visit_type_enum(V, OBJ, ARG1, ARG2, NAME, ERR) +visit_type_enum(V, NAME, OBJ, ARG1, ARG2, ERR) | -VISIT_TYPE(V, OBJ, NAME, ERR) +VISIT_TYPE(V, NAME, OBJ, ERR) ) Signed-off-by: Eric Blake <eblake@redhat.com> Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com> Message-Id: <1454075341-13658-19-git-send-email-eblake@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-01-29 16:48:54 +03:00
visit_type_uint64(v, name, &value, errp);
}
static void memory_region_initfn(Object *obj)
{
MemoryRegion *mr = MEMORY_REGION(obj);
ObjectProperty *op;
mr->ops = &unassigned_mem_ops;
mr->enabled = true;
mr->romd_mode = true;
mr->global_locking = true;
mr->destructor = memory_region_destructor_none;
QTAILQ_INIT(&mr->subregions);
QTAILQ_INIT(&mr->coalesced);
op = object_property_add(OBJECT(mr), "container",
"link<" TYPE_MEMORY_REGION ">",
memory_region_get_container,
NULL, /* memory_region_set_container */
NULL, NULL, &error_abort);
op->resolve = memory_region_resolve_container;
object_property_add(OBJECT(mr), "addr", "uint64",
memory_region_get_addr,
NULL, /* memory_region_set_addr */
NULL, NULL, &error_abort);
object_property_add(OBJECT(mr), "priority", "uint32",
memory_region_get_priority,
NULL, /* memory_region_set_priority */
NULL, NULL, &error_abort);
object_property_add(OBJECT(mr), "size", "uint64",
memory_region_get_size,
NULL, /* memory_region_set_size, */
NULL, NULL, &error_abort);
}
static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
unsigned size)
{
#ifdef DEBUG_UNASSIGNED
printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
#endif
if (current_cpu != NULL) {
cpu_unassigned_access(current_cpu, addr, false, false, 0, size);
}
return 0;
}
static void unassigned_mem_write(void *opaque, hwaddr addr,
uint64_t val, unsigned size)
{
#ifdef DEBUG_UNASSIGNED
printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
#endif
if (current_cpu != NULL) {
cpu_unassigned_access(current_cpu, addr, true, false, 0, size);
}
}
static bool unassigned_mem_accepts(void *opaque, hwaddr addr,
unsigned size, bool is_write)
{
return false;
}
const MemoryRegionOps unassigned_mem_ops = {
.valid.accepts = unassigned_mem_accepts,
.endianness = DEVICE_NATIVE_ENDIAN,
};
memory: Don't use memcpy for ram_device regions With a vfio assigned device we lay down a base MemoryRegion registered as an IO region, giving us read & write accessors. If the region supports mmap, we lay down a higher priority sub-region MemoryRegion on top of the base layer initialized as a RAM device pointer to the mmap. Finally, if we have any quirks for the device (ie. address ranges that need additional virtualization support), we put another IO sub-region on top of the mmap MemoryRegion. When this is flattened, we now potentially have sub-page mmap MemoryRegions exposed which cannot be directly mapped through KVM. This is as expected, but a subtle detail of this is that we end up with two different access mechanisms through QEMU. If we disable the mmap MemoryRegion, we make use of the IO MemoryRegion and service accesses using pread and pwrite to the vfio device file descriptor. If the mmap MemoryRegion is enabled and results in one of these sub-page gaps, QEMU handles the access as RAM, using memcpy to the mmap. Using either pread/pwrite or the mmap directly should be correct, but using memcpy causes us problems. I expect that not only does memcpy not necessarily honor the original width and alignment in performing a copy, but it potentially also uses processor instructions not intended for MMIO spaces. It turns out that this has been a problem for Realtek NIC assignment, which has such a quirk that creates a sub-page mmap MemoryRegion access. To resolve this, we disable memory_access_is_direct() for ram_device regions since QEMU assumes that it can use memcpy for those regions. Instead we access through MemoryRegionOps, which replaces the memcpy with simple de-references of standard sizes to the host memory. With this patch we attempt to provide unrestricted access to the RAM device, allowing byte through qword access as well as unaligned access. The assumption here is that accesses initiated by the VM are driven by a device specific driver, which knows the device capabilities. If unaligned accesses are not supported by the device, we don't want them to work in a VM by performing multiple aligned accesses to compose the unaligned access. A down-side of this philosophy is that the xp command from the monitor attempts to use the largest available access weidth, unaware of the underlying device. Using memcpy had this same restriction, but at least now an operator can dump individual registers, even if blocks of device memory may result in access widths beyond the capabilities of a given device (RTL NICs only support up to dword). Reported-by: Thorsten Kohfeldt <thorsten.kohfeldt@gmx.de> Signed-off-by: Alex Williamson <alex.williamson@redhat.com> Acked-by: Paolo Bonzini <pbonzini@redhat.com>
2016-10-31 18:53:03 +03:00
static uint64_t memory_region_ram_device_read(void *opaque,
hwaddr addr, unsigned size)
{
MemoryRegion *mr = opaque;
uint64_t data = (uint64_t)~0;
switch (size) {
case 1:
data = *(uint8_t *)(mr->ram_block->host + addr);
break;
case 2:
data = *(uint16_t *)(mr->ram_block->host + addr);
break;
case 4:
data = *(uint32_t *)(mr->ram_block->host + addr);
break;
case 8:
data = *(uint64_t *)(mr->ram_block->host + addr);
break;
}
trace_memory_region_ram_device_read(get_cpu_index(), mr, addr, data, size);
return data;
}
static void memory_region_ram_device_write(void *opaque, hwaddr addr,
uint64_t data, unsigned size)
{
MemoryRegion *mr = opaque;
trace_memory_region_ram_device_write(get_cpu_index(), mr, addr, data, size);
switch (size) {
case 1:
*(uint8_t *)(mr->ram_block->host + addr) = (uint8_t)data;
break;
case 2:
*(uint16_t *)(mr->ram_block->host + addr) = (uint16_t)data;
break;
case 4:
*(uint32_t *)(mr->ram_block->host + addr) = (uint32_t)data;
break;
case 8:
*(uint64_t *)(mr->ram_block->host + addr) = data;
break;
}
}
static const MemoryRegionOps ram_device_mem_ops = {
.read = memory_region_ram_device_read,
.write = memory_region_ram_device_write,
.endianness = DEVICE_NATIVE_ENDIAN,
.valid = {
.min_access_size = 1,
.max_access_size = 8,
.unaligned = true,
},
.impl = {
.min_access_size = 1,
.max_access_size = 8,
.unaligned = true,
},
};
bool memory_region_access_valid(MemoryRegion *mr,
hwaddr addr,
unsigned size,
bool is_write)
{
int access_size_min, access_size_max;
int access_size, i;
if (!mr->ops->valid.unaligned && (addr & (size - 1))) {
return false;
}
if (!mr->ops->valid.accepts) {
return true;
}
access_size_min = mr->ops->valid.min_access_size;
if (!mr->ops->valid.min_access_size) {
access_size_min = 1;
}
access_size_max = mr->ops->valid.max_access_size;
if (!mr->ops->valid.max_access_size) {
access_size_max = 4;
}
access_size = MAX(MIN(size, access_size_max), access_size_min);
for (i = 0; i < size; i += access_size) {
if (!mr->ops->valid.accepts(mr->opaque, addr + i, access_size,
is_write)) {
return false;
}
}
return true;
}
static MemTxResult memory_region_dispatch_read1(MemoryRegion *mr,
hwaddr addr,
uint64_t *pval,
unsigned size,
MemTxAttrs attrs)
{
*pval = 0;
if (mr->ops->read) {
return access_with_adjusted_size(addr, pval, size,
mr->ops->impl.min_access_size,
mr->ops->impl.max_access_size,
memory_region_read_accessor,
mr, attrs);
} else if (mr->ops->read_with_attrs) {
return access_with_adjusted_size(addr, pval, size,
mr->ops->impl.min_access_size,
mr->ops->impl.max_access_size,
memory_region_read_with_attrs_accessor,
mr, attrs);
} else {
return access_with_adjusted_size(addr, pval, size, 1, 4,
memory_region_oldmmio_read_accessor,
mr, attrs);
}
}
MemTxResult memory_region_dispatch_read(MemoryRegion *mr,
hwaddr addr,
uint64_t *pval,
unsigned size,
MemTxAttrs attrs)
{
MemTxResult r;
if (!memory_region_access_valid(mr, addr, size, false)) {
*pval = unassigned_mem_read(mr, addr, size);
return MEMTX_DECODE_ERROR;
}
r = memory_region_dispatch_read1(mr, addr, pval, size, attrs);
adjust_endianness(mr, pval, size);
return r;
}
/* Return true if an eventfd was signalled */
static bool memory_region_dispatch_write_eventfds(MemoryRegion *mr,
hwaddr addr,
uint64_t data,
unsigned size,
MemTxAttrs attrs)
{
MemoryRegionIoeventfd ioeventfd = {
.addr = addrrange_make(int128_make64(addr), int128_make64(size)),
.data = data,
};
unsigned i;
for (i = 0; i < mr->ioeventfd_nb; i++) {
ioeventfd.match_data = mr->ioeventfds[i].match_data;
ioeventfd.e = mr->ioeventfds[i].e;
if (memory_region_ioeventfd_equal(ioeventfd, mr->ioeventfds[i])) {
event_notifier_set(ioeventfd.e);
return true;
}
}
return false;
}
MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
hwaddr addr,
uint64_t data,
unsigned size,
MemTxAttrs attrs)
{
if (!memory_region_access_valid(mr, addr, size, true)) {
unassigned_mem_write(mr, addr, data, size);
return MEMTX_DECODE_ERROR;
}
adjust_endianness(mr, &data, size);
if ((!kvm_eventfds_enabled()) &&
memory_region_dispatch_write_eventfds(mr, addr, data, size, attrs)) {
return MEMTX_OK;
}
if (mr->ops->write) {
return access_with_adjusted_size(addr, &data, size,
mr->ops->impl.min_access_size,
mr->ops->impl.max_access_size,
memory_region_write_accessor, mr,
attrs);
} else if (mr->ops->write_with_attrs) {
return
access_with_adjusted_size(addr, &data, size,
mr->ops->impl.min_access_size,
mr->ops->impl.max_access_size,
memory_region_write_with_attrs_accessor,
mr, attrs);
} else {
return access_with_adjusted_size(addr, &data, size, 1, 4,
memory_region_oldmmio_write_accessor,
mr, attrs);
}
}
void memory_region_init_io(MemoryRegion *mr,
Object *owner,
const MemoryRegionOps *ops,
void *opaque,
const char *name,
uint64_t size)
{
memory_region_init(mr, owner, name, size);
mr->ops = ops ? ops : &unassigned_mem_ops;
mr->opaque = opaque;
mr->terminates = true;
}
void memory_region_init_ram(MemoryRegion *mr,
Object *owner,
const char *name,
uint64_t size,
Error **errp)
{
memory_region_init(mr, owner, name, size);
mr->ram = true;
mr->terminates = true;
mr->destructor = memory_region_destructor_ram;
mr->ram_block = qemu_ram_alloc(size, mr, errp);
mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
}
void memory_region_init_resizeable_ram(MemoryRegion *mr,
Object *owner,
const char *name,
uint64_t size,
uint64_t max_size,
void (*resized)(const char*,
uint64_t length,
void *host),
Error **errp)
{
memory_region_init(mr, owner, name, size);
mr->ram = true;
mr->terminates = true;
mr->destructor = memory_region_destructor_ram;
mr->ram_block = qemu_ram_alloc_resizeable(size, max_size, resized,
mr, errp);
mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
}
#ifdef __linux__
void memory_region_init_ram_from_file(MemoryRegion *mr,
struct Object *owner,
const char *name,
uint64_t size,
bool share,
const char *path,
Error **errp)
{
memory_region_init(mr, owner, name, size);
mr->ram = true;
mr->terminates = true;
mr->destructor = memory_region_destructor_ram;
mr->ram_block = qemu_ram_alloc_from_file(size, mr, share, path, errp);
mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
}
#endif
void memory_region_init_ram_ptr(MemoryRegion *mr,
Object *owner,
const char *name,
uint64_t size,
void *ptr)
{
memory_region_init(mr, owner, name, size);
mr->ram = true;
mr->terminates = true;
mr->destructor = memory_region_destructor_ram;
mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
/* qemu_ram_alloc_from_ptr cannot fail with ptr != NULL. */
assert(ptr != NULL);
mr->ram_block = qemu_ram_alloc_from_ptr(size, ptr, mr, &error_fatal);
}
void memory_region_init_ram_device_ptr(MemoryRegion *mr,
Object *owner,
const char *name,
uint64_t size,
void *ptr)
{
memory_region_init_ram_ptr(mr, owner, name, size, ptr);
mr->ram_device = true;
memory: Don't use memcpy for ram_device regions With a vfio assigned device we lay down a base MemoryRegion registered as an IO region, giving us read & write accessors. If the region supports mmap, we lay down a higher priority sub-region MemoryRegion on top of the base layer initialized as a RAM device pointer to the mmap. Finally, if we have any quirks for the device (ie. address ranges that need additional virtualization support), we put another IO sub-region on top of the mmap MemoryRegion. When this is flattened, we now potentially have sub-page mmap MemoryRegions exposed which cannot be directly mapped through KVM. This is as expected, but a subtle detail of this is that we end up with two different access mechanisms through QEMU. If we disable the mmap MemoryRegion, we make use of the IO MemoryRegion and service accesses using pread and pwrite to the vfio device file descriptor. If the mmap MemoryRegion is enabled and results in one of these sub-page gaps, QEMU handles the access as RAM, using memcpy to the mmap. Using either pread/pwrite or the mmap directly should be correct, but using memcpy causes us problems. I expect that not only does memcpy not necessarily honor the original width and alignment in performing a copy, but it potentially also uses processor instructions not intended for MMIO spaces. It turns out that this has been a problem for Realtek NIC assignment, which has such a quirk that creates a sub-page mmap MemoryRegion access. To resolve this, we disable memory_access_is_direct() for ram_device regions since QEMU assumes that it can use memcpy for those regions. Instead we access through MemoryRegionOps, which replaces the memcpy with simple de-references of standard sizes to the host memory. With this patch we attempt to provide unrestricted access to the RAM device, allowing byte through qword access as well as unaligned access. The assumption here is that accesses initiated by the VM are driven by a device specific driver, which knows the device capabilities. If unaligned accesses are not supported by the device, we don't want them to work in a VM by performing multiple aligned accesses to compose the unaligned access. A down-side of this philosophy is that the xp command from the monitor attempts to use the largest available access weidth, unaware of the underlying device. Using memcpy had this same restriction, but at least now an operator can dump individual registers, even if blocks of device memory may result in access widths beyond the capabilities of a given device (RTL NICs only support up to dword). Reported-by: Thorsten Kohfeldt <thorsten.kohfeldt@gmx.de> Signed-off-by: Alex Williamson <alex.williamson@redhat.com> Acked-by: Paolo Bonzini <pbonzini@redhat.com>
2016-10-31 18:53:03 +03:00
mr->ops = &ram_device_mem_ops;
mr->opaque = mr;
}
void memory_region_init_alias(MemoryRegion *mr,
Object *owner,
const char *name,
MemoryRegion *orig,
hwaddr offset,
uint64_t size)
{
memory_region_init(mr, owner, name, size);
mr->alias = orig;
mr->alias_offset = offset;
}
void memory_region_init_rom(MemoryRegion *mr,
struct Object *owner,
const char *name,
uint64_t size,
Error **errp)
{
memory_region_init(mr, owner, name, size);
mr->ram = true;
mr->readonly = true;
mr->terminates = true;
mr->destructor = memory_region_destructor_ram;
mr->ram_block = qemu_ram_alloc(size, mr, errp);
mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
}
void memory_region_init_rom_device(MemoryRegion *mr,
Object *owner,
const MemoryRegionOps *ops,
void *opaque,
const char *name,
uint64_t size,
Error **errp)
{
assert(ops);
memory_region_init(mr, owner, name, size);
mr->ops = ops;
mr->opaque = opaque;
mr->terminates = true;
mr->rom_device = true;
mr->destructor = memory_region_destructor_ram;
mr->ram_block = qemu_ram_alloc(size, mr, errp);
}
void memory_region_init_iommu(MemoryRegion *mr,
Object *owner,
const MemoryRegionIOMMUOps *ops,
const char *name,
uint64_t size)
{
memory_region_init(mr, owner, name, size);
mr->iommu_ops = ops,
mr->terminates = true; /* then re-forwards */
QLIST_INIT(&mr->iommu_notify);
mr->iommu_notify_flags = IOMMU_NOTIFIER_NONE;
}
static void memory_region_finalize(Object *obj)
{
MemoryRegion *mr = MEMORY_REGION(obj);
assert(!mr->container);
/* We know the region is not visible in any address space (it
* does not have a container and cannot be a root either because
* it has no references, so we can blindly clear mr->enabled.
* memory_region_set_enabled instead could trigger a transaction
* and cause an infinite loop.
*/
mr->enabled = false;
memory_region_transaction_begin();
while (!QTAILQ_EMPTY(&mr->subregions)) {
MemoryRegion *subregion = QTAILQ_FIRST(&mr->subregions);
memory_region_del_subregion(mr, subregion);
}
memory_region_transaction_commit();
mr->destructor(mr);
memory_region_clear_coalescing(mr);
g_free((char *)mr->name);
g_free(mr->ioeventfds);
}
Object *memory_region_owner(MemoryRegion *mr)
{
Object *obj = OBJECT(mr);
return obj->parent;
}
void memory_region_ref(MemoryRegion *mr)
{
/* MMIO callbacks most likely will access data that belongs
* to the owner, hence the need to ref/unref the owner whenever
* the memory region is in use.
*
* The memory region is a child of its owner. As long as the
* owner doesn't call unparent itself on the memory region,
* ref-ing the owner will also keep the memory region alive.
* Memory regions without an owner are supposed to never go away;
* we do not ref/unref them because it slows down DMA sensibly.
*/
if (mr && mr->owner) {
object_ref(mr->owner);
}
}
void memory_region_unref(MemoryRegion *mr)
{
if (mr && mr->owner) {
object_unref(mr->owner);
}
}
uint64_t memory_region_size(MemoryRegion *mr)
{
if (int128_eq(mr->size, int128_2_64())) {
return UINT64_MAX;
}
return int128_get64(mr->size);
}
const char *memory_region_name(const MemoryRegion *mr)
{
if (!mr->name) {
((MemoryRegion *)mr)->name =
object_get_canonical_path_component(OBJECT(mr));
}
return mr->name;
}
bool memory_region_is_ram_device(MemoryRegion *mr)
{
return mr->ram_device;
}
uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr)
{
uint8_t mask = mr->dirty_log_mask;
if (global_dirty_log && mr->ram_block) {
mask |= (1 << DIRTY_MEMORY_MIGRATION);
}
return mask;
}
bool memory_region_is_logging(MemoryRegion *mr, uint8_t client)
{
return memory_region_get_dirty_log_mask(mr) & (1 << client);
}
static void memory_region_update_iommu_notify_flags(MemoryRegion *mr)
{
IOMMUNotifierFlag flags = IOMMU_NOTIFIER_NONE;
IOMMUNotifier *iommu_notifier;
QLIST_FOREACH(iommu_notifier, &mr->iommu_notify, node) {
flags |= iommu_notifier->notifier_flags;
}
if (flags != mr->iommu_notify_flags &&
mr->iommu_ops->notify_flag_changed) {
mr->iommu_ops->notify_flag_changed(mr, mr->iommu_notify_flags,
flags);
}
mr->iommu_notify_flags = flags;
}
void memory_region_register_iommu_notifier(MemoryRegion *mr,
IOMMUNotifier *n)
{
if (mr->alias) {
memory_region_register_iommu_notifier(mr->alias, n);
return;
}
/* We need to register for at least one bitfield */
assert(n->notifier_flags != IOMMU_NOTIFIER_NONE);
QLIST_INSERT_HEAD(&mr->iommu_notify, n, node);
memory_region_update_iommu_notify_flags(mr);
}
uint64_t memory_region_iommu_get_min_page_size(MemoryRegion *mr)
memory: Allow replay of IOMMU mapping notifications When we have guest visible IOMMUs, we allow notifiers to be registered which will be informed of all changes to IOMMU mappings. This is used by vfio to keep the host IOMMU mappings in sync with guest IOMMU mappings. However, unlike with a memory region listener, an iommu notifier won't be told about any mappings which already exist in the (guest) IOMMU at the time it is registered. This can cause problems if hotplugging a VFIO device onto a guest bus which had existing guest IOMMU mappings, but didn't previously have an VFIO devices (and hence no host IOMMU mappings). This adds a memory_region_iommu_replay() function to handle this case. It replays any existing mappings in an IOMMU memory region to a specified notifier. Because the IOMMU memory region doesn't internally remember the granularity of the guest IOMMU it has a small hack where the caller must specify a granularity at which to replay mappings. If there are finer mappings in the guest IOMMU these will be reported in the iotlb structures passed to the notifier which it must handle (probably causing it to flag an error). This isn't new - the VFIO iommu notifier must already handle notifications about guest IOMMU mappings too short for it to represent in the host IOMMU. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Reviewed-by: Laurent Vivier <lvivier@redhat.com> Acked-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2015-09-30 05:13:55 +03:00
{
assert(memory_region_is_iommu(mr));
if (mr->iommu_ops && mr->iommu_ops->get_min_page_size) {
return mr->iommu_ops->get_min_page_size(mr);
}
return TARGET_PAGE_SIZE;
}
void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n,
bool is_write)
{
hwaddr addr, granularity;
memory: Allow replay of IOMMU mapping notifications When we have guest visible IOMMUs, we allow notifiers to be registered which will be informed of all changes to IOMMU mappings. This is used by vfio to keep the host IOMMU mappings in sync with guest IOMMU mappings. However, unlike with a memory region listener, an iommu notifier won't be told about any mappings which already exist in the (guest) IOMMU at the time it is registered. This can cause problems if hotplugging a VFIO device onto a guest bus which had existing guest IOMMU mappings, but didn't previously have an VFIO devices (and hence no host IOMMU mappings). This adds a memory_region_iommu_replay() function to handle this case. It replays any existing mappings in an IOMMU memory region to a specified notifier. Because the IOMMU memory region doesn't internally remember the granularity of the guest IOMMU it has a small hack where the caller must specify a granularity at which to replay mappings. If there are finer mappings in the guest IOMMU these will be reported in the iotlb structures passed to the notifier which it must handle (probably causing it to flag an error). This isn't new - the VFIO iommu notifier must already handle notifications about guest IOMMU mappings too short for it to represent in the host IOMMU. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Reviewed-by: Laurent Vivier <lvivier@redhat.com> Acked-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2015-09-30 05:13:55 +03:00
IOMMUTLBEntry iotlb;
granularity = memory_region_iommu_get_min_page_size(mr);
memory: Allow replay of IOMMU mapping notifications When we have guest visible IOMMUs, we allow notifiers to be registered which will be informed of all changes to IOMMU mappings. This is used by vfio to keep the host IOMMU mappings in sync with guest IOMMU mappings. However, unlike with a memory region listener, an iommu notifier won't be told about any mappings which already exist in the (guest) IOMMU at the time it is registered. This can cause problems if hotplugging a VFIO device onto a guest bus which had existing guest IOMMU mappings, but didn't previously have an VFIO devices (and hence no host IOMMU mappings). This adds a memory_region_iommu_replay() function to handle this case. It replays any existing mappings in an IOMMU memory region to a specified notifier. Because the IOMMU memory region doesn't internally remember the granularity of the guest IOMMU it has a small hack where the caller must specify a granularity at which to replay mappings. If there are finer mappings in the guest IOMMU these will be reported in the iotlb structures passed to the notifier which it must handle (probably causing it to flag an error). This isn't new - the VFIO iommu notifier must already handle notifications about guest IOMMU mappings too short for it to represent in the host IOMMU. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Reviewed-by: Laurent Vivier <lvivier@redhat.com> Acked-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2015-09-30 05:13:55 +03:00
for (addr = 0; addr < memory_region_size(mr); addr += granularity) {
iotlb = mr->iommu_ops->translate(mr, addr, is_write);
if (iotlb.perm != IOMMU_NONE) {
n->notify(n, &iotlb);
}
/* if (2^64 - MR size) < granularity, it's possible to get an
* infinite loop here. This should catch such a wraparound */
if ((addr + granularity) < addr) {
break;
}
}
}
void memory_region_unregister_iommu_notifier(MemoryRegion *mr,
IOMMUNotifier *n)
{
if (mr->alias) {
memory_region_unregister_iommu_notifier(mr->alias, n);
return;
}
QLIST_REMOVE(n, node);
memory_region_update_iommu_notify_flags(mr);
}
void memory_region_notify_iommu(MemoryRegion *mr,
IOMMUTLBEntry entry)
{
IOMMUNotifier *iommu_notifier;
IOMMUNotifierFlag request_flags;
assert(memory_region_is_iommu(mr));
if (entry.perm & IOMMU_RW) {
request_flags = IOMMU_NOTIFIER_MAP;
} else {
request_flags = IOMMU_NOTIFIER_UNMAP;
}
QLIST_FOREACH(iommu_notifier, &mr->iommu_notify, node) {
if (iommu_notifier->notifier_flags & request_flags) {
iommu_notifier->notify(iommu_notifier, &entry);
}
}
}
void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
{
uint8_t mask = 1 << client;
uint8_t old_logging;
assert(client == DIRTY_MEMORY_VGA);
old_logging = mr->vga_logging_count;
mr->vga_logging_count += log ? 1 : -1;
if (!!old_logging == !!mr->vga_logging_count) {
return;
}
memory_region_transaction_begin();
mr->dirty_log_mask = (mr->dirty_log_mask & ~mask) | (log * mask);
memory_region_update_pending |= mr->enabled;
memory_region_transaction_commit();
}
bool memory_region_get_dirty(MemoryRegion *mr, hwaddr addr,
hwaddr size, unsigned client)
{
assert(mr->ram_block);
return cpu_physical_memory_get_dirty(memory_region_get_ram_addr(mr) + addr,
size, client);
}
void memory_region_set_dirty(MemoryRegion *mr, hwaddr addr,
hwaddr size)
{
assert(mr->ram_block);
cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
size,
memory_region_get_dirty_log_mask(mr));
}
bool memory_region_test_and_clear_dirty(MemoryRegion *mr, hwaddr addr,
hwaddr size, unsigned client)
{
assert(mr->ram_block);
return cpu_physical_memory_test_and_clear_dirty(
memory_region_get_ram_addr(mr) + addr, size, client);
}
void memory_region_sync_dirty_bitmap(MemoryRegion *mr)
{
MemoryListener *listener;
AddressSpace *as;
FlatView *view;
FlatRange *fr;
/* If the same address space has multiple log_sync listeners, we
* visit that address space's FlatView multiple times. But because
* log_sync listeners are rare, it's still cheaper than walking each
* address space once.
*/
QTAILQ_FOREACH(listener, &memory_listeners, link) {
if (!listener->log_sync) {
continue;
}
as = listener->address_space;
view = address_space_get_flatview(as);
FOR_EACH_FLAT_RANGE(fr, view) {
if (fr->mr == mr) {
MemoryRegionSection mrs = section_from_flat_range(fr, as);
listener->log_sync(listener, &mrs);
}
}
flatview_unref(view);
}
}
void memory_region_set_readonly(MemoryRegion *mr, bool readonly)
{
if (mr->readonly != readonly) {
memory_region_transaction_begin();
mr->readonly = readonly;
memory_region_update_pending |= mr->enabled;
memory_region_transaction_commit();
}
}
void memory_region_rom_device_set_romd(MemoryRegion *mr, bool romd_mode)
{
if (mr->romd_mode != romd_mode) {
memory_region_transaction_begin();
mr->romd_mode = romd_mode;
memory_region_update_pending |= mr->enabled;
memory_region_transaction_commit();
}
}
void memory_region_reset_dirty(MemoryRegion *mr, hwaddr addr,
hwaddr size, unsigned client)
{
assert(mr->ram_block);
cpu_physical_memory_test_and_clear_dirty(
memory_region_get_ram_addr(mr) + addr, size, client);
}
int memory_region_get_fd(MemoryRegion *mr)
{
int fd;
rcu_read_lock();
while (mr->alias) {
mr = mr->alias;
}
fd = mr->ram_block->fd;
rcu_read_unlock();
return fd;
}
void memory_region_set_fd(MemoryRegion *mr, int fd)
{
rcu_read_lock();
while (mr->alias) {
mr = mr->alias;
}
mr->ram_block->fd = fd;
rcu_read_unlock();
}
void *memory_region_get_ram_ptr(MemoryRegion *mr)
{
void *ptr;
uint64_t offset = 0;
rcu_read_lock();
while (mr->alias) {
offset += mr->alias_offset;
mr = mr->alias;
}
assert(mr->ram_block);
ptr = qemu_map_ram_ptr(mr->ram_block, offset);
rcu_read_unlock();
return ptr;
}
MemoryRegion *memory_region_from_host(void *ptr, ram_addr_t *offset)
{
RAMBlock *block;
block = qemu_ram_block_from_host(ptr, false, offset);
if (!block) {
return NULL;
}
return block->mr;
}
ram_addr_t memory_region_get_ram_addr(MemoryRegion *mr)
{
return mr->ram_block ? mr->ram_block->offset : RAM_ADDR_INVALID;
}
void memory_region_ram_resize(MemoryRegion *mr, ram_addr_t newsize, Error **errp)
{
assert(mr->ram_block);
qemu_ram_resize(mr->ram_block, newsize, errp);
}
static void memory_region_update_coalesced_range_as(MemoryRegion *mr, AddressSpace *as)
{
FlatView *view;
FlatRange *fr;
CoalescedMemoryRange *cmr;
AddrRange tmp;
MemoryRegionSection section;
view = address_space_get_flatview(as);
FOR_EACH_FLAT_RANGE(fr, view) {
if (fr->mr == mr) {
section = (MemoryRegionSection) {
.address_space = as,
.offset_within_address_space = int128_get64(fr->addr.start),
.size = fr->addr.size,
};
MEMORY_LISTENER_CALL(as, coalesced_mmio_del, Reverse, &section,
int128_get64(fr->addr.start),
int128_get64(fr->addr.size));
QTAILQ_FOREACH(cmr, &mr->coalesced, link) {
tmp = addrrange_shift(cmr->addr,
int128_sub(fr->addr.start,
int128_make64(fr->offset_in_region)));
if (!addrrange_intersects(tmp, fr->addr)) {
continue;
}
tmp = addrrange_intersection(tmp, fr->addr);
MEMORY_LISTENER_CALL(as, coalesced_mmio_add, Forward, &section,
int128_get64(tmp.start),
int128_get64(tmp.size));
}
}
}
flatview_unref(view);
}
static void memory_region_update_coalesced_range(MemoryRegion *mr)
{
AddressSpace *as;
QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
memory_region_update_coalesced_range_as(mr, as);
}
}
void memory_region_set_coalescing(MemoryRegion *mr)
{
memory_region_clear_coalescing(mr);
memory_region_add_coalescing(mr, 0, int128_get64(mr->size));
}
void memory_region_add_coalescing(MemoryRegion *mr,
hwaddr offset,
uint64_t size)
{
CoalescedMemoryRange *cmr = g_malloc(sizeof(*cmr));
cmr->addr = addrrange_make(int128_make64(offset), int128_make64(size));
QTAILQ_INSERT_TAIL(&mr->coalesced, cmr, link);
memory_region_update_coalesced_range(mr);
memory_region_set_flush_coalesced(mr);
}
void memory_region_clear_coalescing(MemoryRegion *mr)
{
CoalescedMemoryRange *cmr;
bool updated = false;
qemu_flush_coalesced_mmio_buffer();
mr->flush_coalesced_mmio = false;
while (!QTAILQ_EMPTY(&mr->coalesced)) {
cmr = QTAILQ_FIRST(&mr->coalesced);
QTAILQ_REMOVE(&mr->coalesced, cmr, link);
g_free(cmr);
updated = true;
}
if (updated) {
memory_region_update_coalesced_range(mr);
}
}
void memory_region_set_flush_coalesced(MemoryRegion *mr)
{
mr->flush_coalesced_mmio = true;
}
void memory_region_clear_flush_coalesced(MemoryRegion *mr)
{
qemu_flush_coalesced_mmio_buffer();
if (QTAILQ_EMPTY(&mr->coalesced)) {
mr->flush_coalesced_mmio = false;
}
}
void memory_region_set_global_locking(MemoryRegion *mr)
{
mr->global_locking = true;
}
void memory_region_clear_global_locking(MemoryRegion *mr)
{
mr->global_locking = false;
}
static bool userspace_eventfd_warning;
void memory_region_add_eventfd(MemoryRegion *mr,
hwaddr addr,
unsigned size,
bool match_data,
uint64_t data,
EventNotifier *e)
{
MemoryRegionIoeventfd mrfd = {
.addr.start = int128_make64(addr),
.addr.size = int128_make64(size),
.match_data = match_data,
.data = data,
.e = e,
};
unsigned i;
if (kvm_enabled() && (!(kvm_eventfds_enabled() ||
userspace_eventfd_warning))) {
userspace_eventfd_warning = true;
error_report("Using eventfd without MMIO binding in KVM. "
"Suboptimal performance expected");
}
if (size) {
adjust_endianness(mr, &mrfd.data, size);
}
memory_region_transaction_begin();
for (i = 0; i < mr->ioeventfd_nb; ++i) {
if (memory_region_ioeventfd_before(mrfd, mr->ioeventfds[i])) {
break;
}
}
++mr->ioeventfd_nb;
mr->ioeventfds = g_realloc(mr->ioeventfds,
sizeof(*mr->ioeventfds) * mr->ioeventfd_nb);
memmove(&mr->ioeventfds[i+1], &mr->ioeventfds[i],
sizeof(*mr->ioeventfds) * (mr->ioeventfd_nb-1 - i));
mr->ioeventfds[i] = mrfd;
ioeventfd_update_pending |= mr->enabled;
memory_region_transaction_commit();
}
void memory_region_del_eventfd(MemoryRegion *mr,
hwaddr addr,
unsigned size,
bool match_data,
uint64_t data,
EventNotifier *e)
{
MemoryRegionIoeventfd mrfd = {
.addr.start = int128_make64(addr),
.addr.size = int128_make64(size),
.match_data = match_data,
.data = data,
.e = e,
};
unsigned i;
if (size) {
adjust_endianness(mr, &mrfd.data, size);
}
memory_region_transaction_begin();
for (i = 0; i < mr->ioeventfd_nb; ++i) {
if (memory_region_ioeventfd_equal(mrfd, mr->ioeventfds[i])) {
break;
}
}
assert(i != mr->ioeventfd_nb);
memmove(&mr->ioeventfds[i], &mr->ioeventfds[i+1],
sizeof(*mr->ioeventfds) * (mr->ioeventfd_nb - (i+1)));
--mr->ioeventfd_nb;
mr->ioeventfds = g_realloc(mr->ioeventfds,
sizeof(*mr->ioeventfds)*mr->ioeventfd_nb + 1);
ioeventfd_update_pending |= mr->enabled;
memory_region_transaction_commit();
}
static void memory_region_update_container_subregions(MemoryRegion *subregion)
{
MemoryRegion *mr = subregion->container;
MemoryRegion *other;
memory_region_transaction_begin();
memory_region_ref(subregion);
QTAILQ_FOREACH(other, &mr->subregions, subregions_link) {
if (subregion->priority >= other->priority) {
QTAILQ_INSERT_BEFORE(other, subregion, subregions_link);
goto done;
}
}
QTAILQ_INSERT_TAIL(&mr->subregions, subregion, subregions_link);
done:
memory_region_update_pending |= mr->enabled && subregion->enabled;
memory_region_transaction_commit();
}
static void memory_region_add_subregion_common(MemoryRegion *mr,
hwaddr offset,
MemoryRegion *subregion)
{
assert(!subregion->container);
subregion->container = mr;
subregion->addr = offset;
memory_region_update_container_subregions(subregion);
}
void memory_region_add_subregion(MemoryRegion *mr,
hwaddr offset,
MemoryRegion *subregion)
{
subregion->priority = 0;
memory_region_add_subregion_common(mr, offset, subregion);
}
void memory_region_add_subregion_overlap(MemoryRegion *mr,
hwaddr offset,
MemoryRegion *subregion,
int priority)
{
subregion->priority = priority;
memory_region_add_subregion_common(mr, offset, subregion);
}
void memory_region_del_subregion(MemoryRegion *mr,
MemoryRegion *subregion)
{
memory_region_transaction_begin();
assert(subregion->container == mr);
subregion->container = NULL;
QTAILQ_REMOVE(&mr->subregions, subregion, subregions_link);
memory_region_unref(subregion);
memory_region_update_pending |= mr->enabled && subregion->enabled;
memory_region_transaction_commit();
}
void memory_region_set_enabled(MemoryRegion *mr, bool enabled)
{
if (enabled == mr->enabled) {
return;
}
memory_region_transaction_begin();
mr->enabled = enabled;
memory_region_update_pending = true;
memory_region_transaction_commit();
}
void memory_region_set_size(MemoryRegion *mr, uint64_t size)
{
Int128 s = int128_make64(size);
if (size == UINT64_MAX) {
s = int128_2_64();
}
if (int128_eq(s, mr->size)) {
return;
}
memory_region_transaction_begin();
mr->size = s;
memory_region_update_pending = true;
memory_region_transaction_commit();
}
static void memory_region_readd_subregion(MemoryRegion *mr)
{
MemoryRegion *container = mr->container;
if (container) {
memory_region_transaction_begin();
memory_region_ref(mr);
memory_region_del_subregion(container, mr);
mr->container = container;
memory_region_update_container_subregions(mr);
memory_region_unref(mr);
memory_region_transaction_commit();
}
}
void memory_region_set_address(MemoryRegion *mr, hwaddr addr)
{
if (addr != mr->addr) {
mr->addr = addr;
memory_region_readd_subregion(mr);
}
}
void memory_region_set_alias_offset(MemoryRegion *mr, hwaddr offset)
{
assert(mr->alias);
if (offset == mr->alias_offset) {
return;
}
memory_region_transaction_begin();
mr->alias_offset = offset;
memory_region_update_pending |= mr->enabled;
memory_region_transaction_commit();
}
uint64_t memory_region_get_alignment(const MemoryRegion *mr)
{
return mr->align;
}
static int cmp_flatrange_addr(const void *addr_, const void *fr_)
{
const AddrRange *addr = addr_;
const FlatRange *fr = fr_;
if (int128_le(addrrange_end(*addr), fr->addr.start)) {
return -1;
} else if (int128_ge(addr->start, addrrange_end(fr->addr))) {
return 1;
}
return 0;
}
static FlatRange *flatview_lookup(FlatView *view, AddrRange addr)
{
return bsearch(&addr, view->ranges, view->nr,
sizeof(FlatRange), cmp_flatrange_addr);
}
bool memory_region_is_mapped(MemoryRegion *mr)
{
return mr->container ? true : false;
}
/* Same as memory_region_find, but it does not add a reference to the
* returned region. It must be called from an RCU critical section.
*/
static MemoryRegionSection memory_region_find_rcu(MemoryRegion *mr,
hwaddr addr, uint64_t size)
{
MemoryRegionSection ret = { .mr = NULL };
MemoryRegion *root;
AddressSpace *as;
AddrRange range;
FlatView *view;
FlatRange *fr;
addr += mr->addr;
for (root = mr; root->container; ) {
root = root->container;
addr += root->addr;
}
as = memory_region_to_address_space(root);
if (!as) {
return ret;
}
range = addrrange_make(int128_make64(addr), int128_make64(size));
view = atomic_rcu_read(&as->current_map);
fr = flatview_lookup(view, range);
if (!fr) {
return ret;
}
while (fr > view->ranges && addrrange_intersects(fr[-1].addr, range)) {
--fr;
}
ret.mr = fr->mr;
ret.address_space = as;
range = addrrange_intersection(range, fr->addr);
ret.offset_within_region = fr->offset_in_region;
ret.offset_within_region += int128_get64(int128_sub(range.start,
fr->addr.start));
ret.size = range.size;
ret.offset_within_address_space = int128_get64(range.start);
ret.readonly = fr->readonly;
return ret;
}
MemoryRegionSection memory_region_find(MemoryRegion *mr,
hwaddr addr, uint64_t size)
{
MemoryRegionSection ret;
rcu_read_lock();
ret = memory_region_find_rcu(mr, addr, size);
if (ret.mr) {
memory_region_ref(ret.mr);
}
rcu_read_unlock();
return ret;
}
bool memory_region_present(MemoryRegion *container, hwaddr addr)
{
MemoryRegion *mr;
rcu_read_lock();
mr = memory_region_find_rcu(container, addr, 1).mr;
rcu_read_unlock();
return mr && mr != container;
}
void memory_global_dirty_log_sync(void)
{
MemoryListener *listener;
AddressSpace *as;
FlatView *view;
FlatRange *fr;
QTAILQ_FOREACH(listener, &memory_listeners, link) {
if (!listener->log_sync) {
continue;
}
as = listener->address_space;
view = address_space_get_flatview(as);
FOR_EACH_FLAT_RANGE(fr, view) {
if (fr->dirty_log_mask) {
MemoryRegionSection mrs = section_from_flat_range(fr, as);
listener->log_sync(listener, &mrs);
}
}
flatview_unref(view);
}
}
void memory_global_dirty_log_start(void)
{
global_dirty_log = true;
MEMORY_LISTENER_CALL_GLOBAL(log_global_start, Forward);
/* Refresh DIRTY_LOG_MIGRATION bit. */
memory_region_transaction_begin();
memory_region_update_pending = true;
memory_region_transaction_commit();
}
void memory_global_dirty_log_stop(void)
{
global_dirty_log = false;
/* Refresh DIRTY_LOG_MIGRATION bit. */
memory_region_transaction_begin();
memory_region_update_pending = true;
memory_region_transaction_commit();
MEMORY_LISTENER_CALL_GLOBAL(log_global_stop, Reverse);
}
static void listener_add_address_space(MemoryListener *listener,
AddressSpace *as)
{
FlatView *view;
FlatRange *fr;
if (listener->begin) {
listener->begin(listener);
}
if (global_dirty_log) {
if (listener->log_global_start) {
listener->log_global_start(listener);
}
}
view = address_space_get_flatview(as);
FOR_EACH_FLAT_RANGE(fr, view) {
MemoryRegionSection section = {
.mr = fr->mr,
.address_space = as,
.offset_within_region = fr->offset_in_region,
.size = fr->addr.size,
.offset_within_address_space = int128_get64(fr->addr.start),
.readonly = fr->readonly,
};
if (fr->dirty_log_mask && listener->log_start) {
listener->log_start(listener, &section, 0, fr->dirty_log_mask);
}
if (listener->region_add) {
listener->region_add(listener, &section);
}
}
if (listener->commit) {
listener->commit(listener);
}
flatview_unref(view);
}
void memory_listener_register(MemoryListener *listener, AddressSpace *as)
{
MemoryListener *other = NULL;
listener->address_space = as;
if (QTAILQ_EMPTY(&memory_listeners)
|| listener->priority >= QTAILQ_LAST(&memory_listeners,
memory_listeners)->priority) {
QTAILQ_INSERT_TAIL(&memory_listeners, listener, link);
} else {
QTAILQ_FOREACH(other, &memory_listeners, link) {
if (listener->priority < other->priority) {
break;
}
}
QTAILQ_INSERT_BEFORE(other, listener, link);
}
if (QTAILQ_EMPTY(&as->listeners)
|| listener->priority >= QTAILQ_LAST(&as->listeners,
memory_listeners)->priority) {
QTAILQ_INSERT_TAIL(&as->listeners, listener, link_as);
} else {
QTAILQ_FOREACH(other, &as->listeners, link_as) {
if (listener->priority < other->priority) {
break;
}
}
QTAILQ_INSERT_BEFORE(other, listener, link_as);
}
listener_add_address_space(listener, as);
}
void memory_listener_unregister(MemoryListener *listener)
{
QTAILQ_REMOVE(&memory_listeners, listener, link);
QTAILQ_REMOVE(&listener->address_space->listeners, listener, link_as);
}
void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name)
{
memory_region_ref(root);
memory_region_transaction_begin();
as->ref_count = 1;
as->root = root;
as->malloced = false;
as->current_map = g_new(FlatView, 1);
flatview_init(as->current_map);
as->ioeventfd_nb = 0;
as->ioeventfds = NULL;
QTAILQ_INIT(&as->listeners);
QTAILQ_INSERT_TAIL(&address_spaces, as, address_spaces_link);
as->name = g_strdup(name ? name : "anonymous");
address_space_init_dispatch(as);
memory_region_update_pending |= root->enabled;
memory_region_transaction_commit();
}
static void do_address_space_destroy(AddressSpace *as)
{
bool do_free = as->malloced;
address_space_destroy_dispatch(as);
assert(QTAILQ_EMPTY(&as->listeners));
flatview_unref(as->current_map);
g_free(as->name);
g_free(as->ioeventfds);
memory_region_unref(as->root);
if (do_free) {
g_free(as);
}
}
AddressSpace *address_space_init_shareable(MemoryRegion *root, const char *name)
{
AddressSpace *as;
QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
if (root == as->root && as->malloced) {
as->ref_count++;
return as;
}
}
as = g_malloc0(sizeof *as);
address_space_init(as, root, name);
as->malloced = true;
return as;
}
void address_space_destroy(AddressSpace *as)
{
MemoryRegion *root = as->root;
as->ref_count--;
if (as->ref_count) {
return;
}
/* Flush out anything from MemoryListeners listening in on this */
memory_region_transaction_begin();
as->root = NULL;
memory_region_transaction_commit();
QTAILQ_REMOVE(&address_spaces, as, address_spaces_link);
address_space_unregister(as);
/* At this point, as->dispatch and as->current_map are dummy
* entries that the guest should never use. Wait for the old
* values to expire before freeing the data.
*/
as->root = root;
call_rcu(as, do_address_space_destroy, rcu);
}
typedef struct MemoryRegionList MemoryRegionList;
struct MemoryRegionList {
const MemoryRegion *mr;
QTAILQ_ENTRY(MemoryRegionList) queue;
};
typedef QTAILQ_HEAD(queue, MemoryRegionList) MemoryRegionListHead;
static void mtree_print_mr(fprintf_function mon_printf, void *f,
const MemoryRegion *mr, unsigned int level,
hwaddr base,
MemoryRegionListHead *alias_print_queue)
{
MemoryRegionList *new_ml, *ml, *next_ml;
MemoryRegionListHead submr_print_queue;
const MemoryRegion *submr;
unsigned int i;
if (!mr) {
return;
}
for (i = 0; i < level; i++) {
mon_printf(f, " ");
}
if (mr->alias) {
MemoryRegionList *ml;
bool found = false;
/* check if the alias is already in the queue */
QTAILQ_FOREACH(ml, alias_print_queue, queue) {
if (ml->mr == mr->alias) {
found = true;
}
}
if (!found) {
ml = g_new(MemoryRegionList, 1);
ml->mr = mr->alias;
QTAILQ_INSERT_TAIL(alias_print_queue, ml, queue);
}
mon_printf(f, TARGET_FMT_plx "-" TARGET_FMT_plx
" (prio %d, %c%c): alias %s @%s " TARGET_FMT_plx
"-" TARGET_FMT_plx "%s\n",
base + mr->addr,
base + mr->addr
+ (int128_nz(mr->size) ?
(hwaddr)int128_get64(int128_sub(mr->size,
int128_one())) : 0),
mr->priority,
mr->romd_mode ? 'R' : '-',
!mr->readonly && !(mr->rom_device && mr->romd_mode) ? 'W'
: '-',
memory_region_name(mr),
memory_region_name(mr->alias),
mr->alias_offset,
mr->alias_offset
+ (int128_nz(mr->size) ?
(hwaddr)int128_get64(int128_sub(mr->size,
int128_one())) : 0),
mr->enabled ? "" : " [disabled]");
} else {
mon_printf(f,
TARGET_FMT_plx "-" TARGET_FMT_plx " (prio %d, %c%c): %s%s\n",
base + mr->addr,
base + mr->addr
+ (int128_nz(mr->size) ?
(hwaddr)int128_get64(int128_sub(mr->size,
int128_one())) : 0),
mr->priority,
mr->romd_mode ? 'R' : '-',
!mr->readonly && !(mr->rom_device && mr->romd_mode) ? 'W'
: '-',
memory_region_name(mr),
mr->enabled ? "" : " [disabled]");
}
QTAILQ_INIT(&submr_print_queue);
QTAILQ_FOREACH(submr, &mr->subregions, subregions_link) {
new_ml = g_new(MemoryRegionList, 1);
new_ml->mr = submr;
QTAILQ_FOREACH(ml, &submr_print_queue, queue) {
if (new_ml->mr->addr < ml->mr->addr ||
(new_ml->mr->addr == ml->mr->addr &&
new_ml->mr->priority > ml->mr->priority)) {
QTAILQ_INSERT_BEFORE(ml, new_ml, queue);
new_ml = NULL;
break;
}
}
if (new_ml) {
QTAILQ_INSERT_TAIL(&submr_print_queue, new_ml, queue);
}
}
QTAILQ_FOREACH(ml, &submr_print_queue, queue) {
mtree_print_mr(mon_printf, f, ml->mr, level + 1, base + mr->addr,
alias_print_queue);
}
QTAILQ_FOREACH_SAFE(ml, &submr_print_queue, queue, next_ml) {
g_free(ml);
}
}
void mtree_info(fprintf_function mon_printf, void *f)
{
MemoryRegionListHead ml_head;
MemoryRegionList *ml, *ml2;
AddressSpace *as;
QTAILQ_INIT(&ml_head);
QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
mon_printf(f, "address-space: %s\n", as->name);
mtree_print_mr(mon_printf, f, as->root, 1, 0, &ml_head);
mon_printf(f, "\n");
}
/* print aliased regions */
QTAILQ_FOREACH(ml, &ml_head, queue) {
mon_printf(f, "memory-region: %s\n", memory_region_name(ml->mr));
mtree_print_mr(mon_printf, f, ml->mr, 1, 0, &ml_head);
mon_printf(f, "\n");
}
QTAILQ_FOREACH_SAFE(ml, &ml_head, queue, ml2) {
g_free(ml);
}
}
static const TypeInfo memory_region_info = {
.parent = TYPE_OBJECT,
.name = TYPE_MEMORY_REGION,
.instance_size = sizeof(MemoryRegion),
.instance_init = memory_region_initfn,
.instance_finalize = memory_region_finalize,
};
static void memory_register_types(void)
{
type_register_static(&memory_region_info);
}
type_init(memory_register_types)