2003-05-13 04:25:15 +04:00
|
|
|
/*
|
2020-10-06 10:05:29 +03:00
|
|
|
* RAM allocation and memory access
|
2007-09-17 01:08:06 +04:00
|
|
|
*
|
2003-05-13 04:25:15 +04:00
|
|
|
* Copyright (c) 2003 Fabrice Bellard
|
|
|
|
*
|
|
|
|
* This library is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
2020-10-23 15:44:24 +03:00
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
2003-05-13 04:25:15 +04:00
|
|
|
*
|
|
|
|
* This library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
2009-07-17 00:47:01 +04:00
|
|
|
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
2003-05-13 04:25:15 +04:00
|
|
|
*/
|
2019-05-23 17:35:05 +03:00
|
|
|
|
2016-01-26 21:16:56 +03:00
|
|
|
#include "qemu/osdep.h"
|
2022-03-23 18:57:34 +03:00
|
|
|
#include "exec/page-vary.h"
|
include/qemu/osdep.h: Don't include qapi/error.h
Commit 57cb38b included qapi/error.h into qemu/osdep.h to get the
Error typedef. Since then, we've moved to include qemu/osdep.h
everywhere. Its file comment explains: "To avoid getting into
possible circular include dependencies, this file should not include
any other QEMU headers, with the exceptions of config-host.h,
compiler.h, os-posix.h and os-win32.h, all of which are doing a
similar job to this file and are under similar constraints."
qapi/error.h doesn't do a similar job, and it doesn't adhere to
similar constraints: it includes qapi-types.h. That's in excess of
100KiB of crap most .c files don't actually need.
Add the typedef to qemu/typedefs.h, and include that instead of
qapi/error.h. Include qapi/error.h in .c files that need it and don't
get it now. Include qapi-types.h in qom/object.h for uint16List.
Update scripts/clean-includes accordingly. Update it further to match
reality: replace config.h by config-target.h, add sysemu/os-posix.h,
sysemu/os-win32.h. Update the list of includes in the qemu/osdep.h
comment quoted above similarly.
This reduces the number of objects depending on qapi/error.h from "all
of them" to less than a third. Unfortunately, the number depending on
qapi-types.h shrinks only a little. More work is needed for that one.
Signed-off-by: Markus Armbruster <armbru@redhat.com>
[Fix compilation without the spice devel packages. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-14 11:01:28 +03:00
|
|
|
#include "qapi/error.h"
|
2003-05-13 04:25:15 +04:00
|
|
|
|
2016-03-20 20:16:19 +03:00
|
|
|
#include "qemu/cutils.h"
|
2020-12-14 17:02:33 +03:00
|
|
|
#include "qemu/cacheflush.h"
|
2022-12-21 16:35:49 +03:00
|
|
|
#include "qemu/hbitmap.h"
|
2022-02-08 23:08:52 +03:00
|
|
|
#include "qemu/madvise.h"
|
2024-05-07 15:12:46 +03:00
|
|
|
#include "qemu/lockable.h"
|
2021-02-04 19:39:23 +03:00
|
|
|
|
|
|
|
#ifdef CONFIG_TCG
|
|
|
|
#include "hw/core/tcg-cpu-ops.h"
|
|
|
|
#endif /* CONFIG_TCG */
|
|
|
|
|
2016-03-15 15:18:37 +03:00
|
|
|
#include "exec/exec-all.h"
|
2023-12-06 22:27:32 +03:00
|
|
|
#include "exec/page-protection.h"
|
2017-04-24 21:50:19 +03:00
|
|
|
#include "exec/target_page.h"
|
2014-06-27 10:40:04 +04:00
|
|
|
#include "hw/qdev-core.h"
|
2017-07-14 05:15:08 +03:00
|
|
|
#include "hw/qdev-properties.h"
|
2015-02-04 18:43:54 +03:00
|
|
|
#include "hw/boards.h"
|
2023-11-11 00:37:20 +03:00
|
|
|
#include "sysemu/xen.h"
|
2012-12-17 21:20:04 +04:00
|
|
|
#include "sysemu/kvm.h"
|
2019-05-23 17:35:05 +03:00
|
|
|
#include "sysemu/tcg.h"
|
2020-02-20 07:11:09 +03:00
|
|
|
#include "sysemu/qtest.h"
|
2012-12-17 21:20:00 +04:00
|
|
|
#include "qemu/timer.h"
|
|
|
|
#include "qemu/config-file.h"
|
2013-09-02 18:57:02 +04:00
|
|
|
#include "qemu/error-report.h"
|
2019-04-17 22:17:56 +03:00
|
|
|
#include "qemu/qemu-print.h"
|
2021-12-15 21:24:21 +03:00
|
|
|
#include "qemu/log.h"
|
2022-02-26 21:07:23 +03:00
|
|
|
#include "qemu/memalign.h"
|
2014-06-27 10:40:04 +04:00
|
|
|
#include "exec/memory.h"
|
2016-03-16 12:24:54 +03:00
|
|
|
#include "exec/ioport.h"
|
2014-06-27 10:40:04 +04:00
|
|
|
#include "sysemu/dma.h"
|
2019-08-12 08:23:55 +03:00
|
|
|
#include "sysemu/hostmem.h"
|
2017-03-07 17:19:08 +03:00
|
|
|
#include "sysemu/hw_accel.h"
|
2012-12-17 21:20:04 +04:00
|
|
|
#include "sysemu/xen-mapcache.h"
|
2020-02-04 14:20:10 +03:00
|
|
|
#include "trace/trace-root.h"
|
2017-02-24 21:28:32 +03:00
|
|
|
|
2017-02-24 21:28:33 +03:00
|
|
|
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
|
|
|
|
#include <linux/falloc.h>
|
|
|
|
#endif
|
|
|
|
|
2013-09-05 22:41:35 +04:00
|
|
|
#include "qemu/rcu_queue.h"
|
2015-06-18 19:47:22 +03:00
|
|
|
#include "qemu/main-loop.h"
|
2020-12-16 15:27:58 +03:00
|
|
|
#include "exec/translate-all.h"
|
2015-09-17 19:25:07 +03:00
|
|
|
#include "sysemu/replay.h"
|
2012-04-09 20:50:52 +04:00
|
|
|
|
2012-12-17 21:19:49 +04:00
|
|
|
#include "exec/memory-internal.h"
|
2013-10-14 19:13:59 +04:00
|
|
|
#include "exec/ram_addr.h"
|
2011-12-15 17:25:22 +04:00
|
|
|
|
2019-11-21 03:08:41 +03:00
|
|
|
#include "qemu/pmem.h"
|
|
|
|
|
2016-05-12 06:48:12 +03:00
|
|
|
#include "migration/vmstate.h"
|
|
|
|
|
2013-11-11 19:52:07 +04:00
|
|
|
#include "qemu/range.h"
|
2015-09-24 14:41:17 +03:00
|
|
|
#ifndef _WIN32
|
|
|
|
#include "qemu/mmap-alloc.h"
|
|
|
|
#endif
|
2013-11-11 19:52:07 +04:00
|
|
|
|
2017-05-12 07:17:41 +03:00
|
|
|
#include "monitor/monitor.h"
|
|
|
|
|
2020-04-29 11:50:09 +03:00
|
|
|
#ifdef CONFIG_LIBDAXCTL
|
|
|
|
#include <daxctl/libdaxctl.h>
|
|
|
|
#endif
|
|
|
|
|
2007-05-26 21:36:03 +04:00
|
|
|
//#define DEBUG_SUBPAGE
|
2007-03-17 18:17:58 +03:00
|
|
|
|
2013-09-05 22:41:35 +04:00
|
|
|
/* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
|
|
|
|
* are protected by the ramlist lock.
|
|
|
|
*/
|
2015-01-21 15:45:24 +03:00
|
|
|
RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
|
2011-07-26 15:26:14 +04:00
|
|
|
|
|
|
|
static MemoryRegion *system_memory;
|
2011-08-08 17:09:03 +04:00
|
|
|
static MemoryRegion *system_io;
|
2011-07-26 15:26:14 +04:00
|
|
|
|
2012-10-02 22:13:51 +04:00
|
|
|
AddressSpace address_space_io;
|
|
|
|
AddressSpace address_space_memory;
|
2012-10-02 20:49:28 +04:00
|
|
|
|
2013-05-26 23:55:37 +04:00
|
|
|
static MemoryRegion io_mem_unassigned;
|
2012-02-10 19:00:01 +04:00
|
|
|
|
2013-05-21 14:07:21 +04:00
|
|
|
typedef struct PhysPageEntry PhysPageEntry;
|
|
|
|
|
|
|
|
struct PhysPageEntry {
|
2013-11-11 16:42:43 +04:00
|
|
|
/* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
|
2013-11-11 16:51:56 +04:00
|
|
|
uint32_t skip : 6;
|
2013-11-11 16:42:43 +04:00
|
|
|
/* index into phys_sections (!skip) or phys_map_nodes (skip) */
|
2013-11-11 16:51:56 +04:00
|
|
|
uint32_t ptr : 26;
|
2013-05-21 14:07:21 +04:00
|
|
|
};
|
|
|
|
|
2013-11-11 16:51:56 +04:00
|
|
|
#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
|
|
|
|
|
2013-11-07 20:14:36 +04:00
|
|
|
/* Size of the L2 (and L3, etc) page tables. */
|
2013-11-07 20:14:37 +04:00
|
|
|
#define ADDR_SPACE_BITS 64
|
2013-11-07 20:14:36 +04:00
|
|
|
|
2013-11-13 22:13:03 +04:00
|
|
|
#define P_L2_BITS 9
|
2013-11-07 20:14:36 +04:00
|
|
|
#define P_L2_SIZE (1 << P_L2_BITS)
|
|
|
|
|
|
|
|
#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
|
|
|
|
|
|
|
|
typedef PhysPageEntry Node[P_L2_SIZE];
|
2013-05-29 14:28:21 +04:00
|
|
|
|
2013-12-01 16:02:23 +04:00
|
|
|
typedef struct PhysPageMap {
|
2015-01-21 14:09:14 +03:00
|
|
|
struct rcu_head rcu;
|
|
|
|
|
2013-12-01 16:02:23 +04:00
|
|
|
unsigned sections_nb;
|
|
|
|
unsigned sections_nb_alloc;
|
|
|
|
unsigned nodes_nb;
|
|
|
|
unsigned nodes_nb_alloc;
|
|
|
|
Node *nodes;
|
|
|
|
MemoryRegionSection *sections;
|
|
|
|
} PhysPageMap;
|
|
|
|
|
2013-05-21 14:07:21 +04:00
|
|
|
struct AddressSpaceDispatch {
|
2016-03-01 09:18:24 +03:00
|
|
|
MemoryRegionSection *mru_section;
|
2013-05-21 14:07:21 +04:00
|
|
|
/* This is a multi-level map on the physical address space.
|
|
|
|
* The bottom level has pointers to MemoryRegionSections.
|
|
|
|
*/
|
|
|
|
PhysPageEntry phys_map;
|
2013-12-01 16:02:23 +04:00
|
|
|
PhysPageMap map;
|
2013-05-21 14:07:21 +04:00
|
|
|
};
|
|
|
|
|
2013-05-26 23:46:51 +04:00
|
|
|
#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
|
|
|
|
typedef struct subpage_t {
|
|
|
|
MemoryRegion iomem;
|
2017-09-21 11:50:58 +03:00
|
|
|
FlatView *fv;
|
2013-05-26 23:46:51 +04:00
|
|
|
hwaddr base;
|
2016-10-24 18:26:49 +03:00
|
|
|
uint16_t sub_section[];
|
2013-05-26 23:46:51 +04:00
|
|
|
} subpage_t;
|
|
|
|
|
2013-05-29 13:09:17 +04:00
|
|
|
#define PHYS_SECTION_UNASSIGNED 0
|
2012-02-12 20:32:55 +04:00
|
|
|
|
2008-06-08 05:09:01 +04:00
|
|
|
static void io_mem_init(void);
|
2011-07-26 15:26:14 +04:00
|
|
|
static void memory_map_init(void);
|
2018-02-06 20:37:39 +03:00
|
|
|
static void tcg_log_global_after_sync(MemoryListener *listener);
|
2013-12-17 07:06:51 +04:00
|
|
|
static void tcg_commit(MemoryListener *listener);
|
2008-06-08 05:09:01 +04:00
|
|
|
|
2015-10-01 17:29:50 +03:00
|
|
|
/**
|
|
|
|
* CPUAddressSpace: all the information a CPU needs about an AddressSpace
|
|
|
|
* @cpu: the CPU whose AddressSpace this is
|
|
|
|
* @as: the AddressSpace itself
|
|
|
|
* @memory_dispatch: its dispatch pointer (cached, RCU protected)
|
|
|
|
* @tcg_as_listener: listener for tracking changes to the AddressSpace
|
|
|
|
*/
|
2024-05-02 18:14:42 +03:00
|
|
|
typedef struct CPUAddressSpace {
|
2015-10-01 17:29:50 +03:00
|
|
|
CPUState *cpu;
|
|
|
|
AddressSpace *as;
|
|
|
|
struct AddressSpaceDispatch *memory_dispatch;
|
|
|
|
MemoryListener tcg_as_listener;
|
2024-05-02 18:14:42 +03:00
|
|
|
} CPUAddressSpace;
|
2015-10-01 17:29:50 +03:00
|
|
|
|
2017-04-21 12:16:25 +03:00
|
|
|
struct DirtyBitmapSnapshot {
|
|
|
|
ram_addr_t start;
|
|
|
|
ram_addr_t end;
|
|
|
|
unsigned long dirty[];
|
|
|
|
};
|
|
|
|
|
2013-12-01 16:02:23 +04:00
|
|
|
static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
|
2012-02-12 22:12:49 +04:00
|
|
|
{
|
2016-07-15 13:03:50 +03:00
|
|
|
static unsigned alloc_hint = 16;
|
2013-12-01 16:02:23 +04:00
|
|
|
if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
|
2019-03-21 11:25:52 +03:00
|
|
|
map->nodes_nb_alloc = MAX(alloc_hint, map->nodes_nb + nodes);
|
2013-12-01 16:02:23 +04:00
|
|
|
map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
|
2016-07-15 13:03:50 +03:00
|
|
|
alloc_hint = map->nodes_nb_alloc;
|
2012-02-12 22:12:49 +04:00
|
|
|
}
|
2012-02-13 22:12:05 +04:00
|
|
|
}
|
|
|
|
|
2015-05-21 16:12:29 +03:00
|
|
|
static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
|
2012-02-13 22:12:05 +04:00
|
|
|
{
|
|
|
|
unsigned i;
|
2013-11-11 16:51:56 +04:00
|
|
|
uint32_t ret;
|
2015-05-21 16:12:29 +03:00
|
|
|
PhysPageEntry e;
|
|
|
|
PhysPageEntry *p;
|
2012-02-13 22:12:05 +04:00
|
|
|
|
2013-12-01 16:02:23 +04:00
|
|
|
ret = map->nodes_nb++;
|
2015-05-21 16:12:29 +03:00
|
|
|
p = map->nodes[ret];
|
2012-02-13 22:12:05 +04:00
|
|
|
assert(ret != PHYS_MAP_NODE_NIL);
|
2013-12-01 16:02:23 +04:00
|
|
|
assert(ret != map->nodes_nb_alloc);
|
2015-05-21 16:12:29 +03:00
|
|
|
|
|
|
|
e.skip = leaf ? 0 : 1;
|
|
|
|
e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
|
2013-11-07 20:14:36 +04:00
|
|
|
for (i = 0; i < P_L2_SIZE; ++i) {
|
2015-05-21 16:12:29 +03:00
|
|
|
memcpy(&p[i], &e, sizeof(e));
|
2012-02-12 22:12:49 +04:00
|
|
|
}
|
2012-02-13 22:12:05 +04:00
|
|
|
return ret;
|
2012-02-12 22:12:49 +04:00
|
|
|
}
|
|
|
|
|
2013-12-01 16:02:23 +04:00
|
|
|
static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
|
2019-03-21 11:25:50 +03:00
|
|
|
hwaddr *index, uint64_t *nb, uint16_t leaf,
|
2012-02-13 22:21:20 +04:00
|
|
|
int level)
|
2012-02-13 22:12:05 +04:00
|
|
|
{
|
|
|
|
PhysPageEntry *p;
|
2013-11-07 20:14:36 +04:00
|
|
|
hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
|
2005-07-24 16:55:09 +04:00
|
|
|
|
2013-11-11 16:42:43 +04:00
|
|
|
if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
|
2015-05-21 16:12:29 +03:00
|
|
|
lp->ptr = phys_map_node_alloc(map, level == 0);
|
2004-05-21 18:52:29 +04:00
|
|
|
}
|
2015-05-21 16:12:29 +03:00
|
|
|
p = map->nodes[lp->ptr];
|
2013-11-07 20:14:36 +04:00
|
|
|
lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
|
2012-02-13 22:12:05 +04:00
|
|
|
|
2013-11-07 20:14:36 +04:00
|
|
|
while (*nb && lp < &p[P_L2_SIZE]) {
|
2012-02-13 22:45:32 +04:00
|
|
|
if ((*index & (step - 1)) == 0 && *nb >= step) {
|
2013-11-11 16:42:43 +04:00
|
|
|
lp->skip = 0;
|
2012-02-13 22:25:31 +04:00
|
|
|
lp->ptr = leaf;
|
2012-02-13 22:45:32 +04:00
|
|
|
*index += step;
|
|
|
|
*nb -= step;
|
2012-02-13 22:21:20 +04:00
|
|
|
} else {
|
2013-12-01 16:02:23 +04:00
|
|
|
phys_page_set_level(map, lp, index, nb, leaf, level - 1);
|
2012-02-13 22:21:20 +04:00
|
|
|
}
|
|
|
|
++lp;
|
2012-02-13 22:12:05 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-10-03 18:22:53 +04:00
|
|
|
static void phys_page_set(AddressSpaceDispatch *d,
|
2019-03-21 11:25:50 +03:00
|
|
|
hwaddr index, uint64_t nb,
|
2012-02-13 22:21:20 +04:00
|
|
|
uint16_t leaf)
|
2012-02-13 22:12:05 +04:00
|
|
|
{
|
2012-02-13 22:21:20 +04:00
|
|
|
/* Wildly overreserve - it doesn't matter much. */
|
2013-12-01 16:02:23 +04:00
|
|
|
phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
|
2010-03-11 02:53:37 +03:00
|
|
|
|
2013-12-01 16:02:23 +04:00
|
|
|
phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
|
2004-05-21 18:52:29 +04:00
|
|
|
}
|
|
|
|
|
2013-11-11 19:52:07 +04:00
|
|
|
/* Compact a non leaf page entry. Simply detect that the entry has a single child,
|
|
|
|
* and update our entry so we can skip it and go directly to the destination.
|
|
|
|
*/
|
2016-09-28 15:37:20 +03:00
|
|
|
static void phys_page_compact(PhysPageEntry *lp, Node *nodes)
|
2013-11-11 19:52:07 +04:00
|
|
|
{
|
|
|
|
unsigned valid_ptr = P_L2_SIZE;
|
|
|
|
int valid = 0;
|
|
|
|
PhysPageEntry *p;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (lp->ptr == PHYS_MAP_NODE_NIL) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
p = nodes[lp->ptr];
|
|
|
|
for (i = 0; i < P_L2_SIZE; i++) {
|
|
|
|
if (p[i].ptr == PHYS_MAP_NODE_NIL) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
valid_ptr = i;
|
|
|
|
valid++;
|
|
|
|
if (p[i].skip) {
|
2016-09-28 15:37:20 +03:00
|
|
|
phys_page_compact(&p[i], nodes);
|
2013-11-11 19:52:07 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We can only compress if there's only one child. */
|
|
|
|
if (valid != 1) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(valid_ptr < P_L2_SIZE);
|
|
|
|
|
|
|
|
/* Don't compress if it won't fit in the # of bits we have. */
|
2019-03-21 11:25:55 +03:00
|
|
|
if (P_L2_LEVELS >= (1 << 6) &&
|
|
|
|
lp->skip + p[valid_ptr].skip >= (1 << 6)) {
|
2013-11-11 19:52:07 +04:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
lp->ptr = p[valid_ptr].ptr;
|
|
|
|
if (!p[valid_ptr].skip) {
|
|
|
|
/* If our only child is a leaf, make this a leaf. */
|
|
|
|
/* By design, we should have made this node a leaf to begin with so we
|
|
|
|
* should never reach here.
|
|
|
|
* But since it's so simple to handle this, let's do it just in case we
|
|
|
|
* change this rule.
|
|
|
|
*/
|
|
|
|
lp->skip = 0;
|
|
|
|
} else {
|
|
|
|
lp->skip += p[valid_ptr].skip;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-09-21 11:51:00 +03:00
|
|
|
void address_space_dispatch_compact(AddressSpaceDispatch *d)
|
2013-11-11 19:52:07 +04:00
|
|
|
{
|
|
|
|
if (d->phys_map.skip) {
|
2016-09-28 15:37:20 +03:00
|
|
|
phys_page_compact(&d->phys_map, d->map.nodes);
|
2013-11-11 19:52:07 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-03-01 09:18:23 +03:00
|
|
|
static inline bool section_covers_addr(const MemoryRegionSection *section,
|
|
|
|
hwaddr addr)
|
|
|
|
{
|
|
|
|
/* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
|
|
|
|
* the section must cover the entire address space.
|
|
|
|
*/
|
2016-06-30 01:48:03 +03:00
|
|
|
return int128_gethi(section->size) ||
|
2016-03-01 09:18:23 +03:00
|
|
|
range_covers_byte(section->offset_within_address_space,
|
2016-06-30 01:48:03 +03:00
|
|
|
int128_getlo(section->size), addr);
|
2016-03-01 09:18:23 +03:00
|
|
|
}
|
|
|
|
|
2017-05-15 11:50:57 +03:00
|
|
|
static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr addr)
|
2004-05-21 18:52:29 +04:00
|
|
|
{
|
2017-05-15 11:50:57 +03:00
|
|
|
PhysPageEntry lp = d->phys_map, *p;
|
|
|
|
Node *nodes = d->map.nodes;
|
|
|
|
MemoryRegionSection *sections = d->map.sections;
|
2013-11-13 22:08:19 +04:00
|
|
|
hwaddr index = addr >> TARGET_PAGE_BITS;
|
2012-02-13 18:44:19 +04:00
|
|
|
int i;
|
2011-11-20 19:52:22 +04:00
|
|
|
|
2013-11-11 16:42:43 +04:00
|
|
|
for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
|
2012-02-13 22:25:31 +04:00
|
|
|
if (lp.ptr == PHYS_MAP_NODE_NIL) {
|
2013-05-29 14:09:47 +04:00
|
|
|
return §ions[PHYS_SECTION_UNASSIGNED];
|
2012-02-13 18:44:19 +04:00
|
|
|
}
|
2013-05-29 14:09:47 +04:00
|
|
|
p = nodes[lp.ptr];
|
2013-11-07 20:14:36 +04:00
|
|
|
lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
|
2012-02-12 20:32:55 +04:00
|
|
|
}
|
2013-11-11 19:52:07 +04:00
|
|
|
|
2016-03-01 09:18:23 +03:00
|
|
|
if (section_covers_addr(§ions[lp.ptr], addr)) {
|
2013-11-11 19:52:07 +04:00
|
|
|
return §ions[lp.ptr];
|
|
|
|
} else {
|
|
|
|
return §ions[PHYS_SECTION_UNASSIGNED];
|
|
|
|
}
|
2012-03-08 18:16:34 +04:00
|
|
|
}
|
|
|
|
|
2015-01-21 14:09:14 +03:00
|
|
|
/* Called from RCU critical section */
|
2013-06-02 17:27:39 +04:00
|
|
|
static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
|
2013-05-26 23:46:51 +04:00
|
|
|
hwaddr addr,
|
|
|
|
bool resolve_subpage)
|
2013-05-06 18:48:02 +04:00
|
|
|
{
|
2020-09-23 13:56:46 +03:00
|
|
|
MemoryRegionSection *section = qatomic_read(&d->mru_section);
|
2013-05-26 23:46:51 +04:00
|
|
|
subpage_t *subpage;
|
|
|
|
|
2017-11-15 17:11:03 +03:00
|
|
|
if (!section || section == &d->map.sections[PHYS_SECTION_UNASSIGNED] ||
|
|
|
|
!section_covers_addr(section, addr)) {
|
2017-05-15 11:50:57 +03:00
|
|
|
section = phys_page_find(d, addr);
|
2020-09-23 13:56:46 +03:00
|
|
|
qatomic_set(&d->mru_section, section);
|
2016-03-01 09:18:24 +03:00
|
|
|
}
|
2013-05-26 23:46:51 +04:00
|
|
|
if (resolve_subpage && section->mr->subpage) {
|
|
|
|
subpage = container_of(section->mr, subpage_t, iomem);
|
2013-12-01 16:02:23 +04:00
|
|
|
section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
|
2013-05-26 23:46:51 +04:00
|
|
|
}
|
|
|
|
return section;
|
2013-05-06 18:48:02 +04:00
|
|
|
}
|
|
|
|
|
2015-01-21 14:09:14 +03:00
|
|
|
/* Called from RCU critical section */
|
2013-05-26 23:46:51 +04:00
|
|
|
static MemoryRegionSection *
|
2013-06-02 17:27:39 +04:00
|
|
|
address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
|
2013-05-26 23:46:51 +04:00
|
|
|
hwaddr *plen, bool resolve_subpage)
|
2013-05-24 14:59:37 +04:00
|
|
|
{
|
|
|
|
MemoryRegionSection *section;
|
2015-06-17 11:40:27 +03:00
|
|
|
MemoryRegion *mr;
|
2014-02-07 18:47:46 +04:00
|
|
|
Int128 diff;
|
2013-05-24 14:59:37 +04:00
|
|
|
|
2013-06-02 17:27:39 +04:00
|
|
|
section = address_space_lookup_region(d, addr, resolve_subpage);
|
2013-05-24 14:59:37 +04:00
|
|
|
/* Compute offset within MemoryRegionSection */
|
|
|
|
addr -= section->offset_within_address_space;
|
|
|
|
|
|
|
|
/* Compute offset within MemoryRegion */
|
|
|
|
*xlat = addr + section->offset_within_region;
|
|
|
|
|
2015-06-17 11:40:27 +03:00
|
|
|
mr = section->mr;
|
exec: skip MMIO regions correctly in cpu_physical_memory_write_rom_internal
Loading the BIOS in the mac99 machine is interesting, because there is a
PROM in the middle of the BIOS region (from 16K to 32K). Before memory
region accesses were clamped, when QEMU was asked to load a BIOS from
0xfff00000 to 0xffffffff it would put even those 16K from the BIOS file
into the region. This is weird because those 16K were not actually
visible between 0xfff04000 and 0xfff07fff. However, it worked.
After clamping was added, this also worked. In this case, the
cpu_physical_memory_write_rom_internal function split the write in
three parts: the first 16K were copied, the PROM area (second 16K) were
ignored, then the rest was copied.
Problems then started with commit 965eb2f (exec: do not clamp accesses
to MMIO regions, 2015-06-17). Clamping accesses is not done for MMIO
regions because they can overlap wildly, and MMIO registers can be
expected to perform full-width accesses based only on their address
(with no respect for adjacent registers that could decode to completely
different MemoryRegions). However, this lack of clamping also applied
to the PROM area! cpu_physical_memory_write_rom_internal thus failed
to copy the third range above, i.e. only copied the first 16K of the BIOS.
In effect, address_space_translate is expecting _something else_ to do
the clamping for MMIO regions if the incoming length is large. This
"something else" is memory_access_size in the case of address_space_rw,
so use the same logic in cpu_physical_memory_write_rom_internal.
Reported-by: Alexander Graf <agraf@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Tested-by: Laurent Vivier <lvivier@redhat.com>
Fixes: 965eb2f
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2015-07-04 01:24:51 +03:00
|
|
|
|
|
|
|
/* MMIO registers can be expected to perform full-width accesses based only
|
|
|
|
* on their address, without considering adjacent registers that could
|
|
|
|
* decode to completely different MemoryRegions. When such registers
|
|
|
|
* exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
|
|
|
|
* regions overlap wildly. For this reason we cannot clamp the accesses
|
|
|
|
* here.
|
|
|
|
*
|
|
|
|
* If the length is small (as is the case for address_space_ldl/stl),
|
|
|
|
* everything works fine. If the incoming length is large, however,
|
|
|
|
* the caller really has to do the clamping through memory_access_size.
|
|
|
|
*/
|
2015-06-17 11:40:27 +03:00
|
|
|
if (memory_region_is_ram(mr)) {
|
2015-06-17 11:36:54 +03:00
|
|
|
diff = int128_sub(section->size, int128_make64(addr));
|
2015-06-17 11:40:27 +03:00
|
|
|
*plen = int128_get64(int128_min(diff, int128_make64(*plen)));
|
|
|
|
}
|
2013-05-24 14:59:37 +04:00
|
|
|
return section;
|
|
|
|
}
|
2013-05-26 23:46:51 +04:00
|
|
|
|
2018-03-03 19:24:04 +03:00
|
|
|
/**
|
|
|
|
* address_space_translate_iommu - translate an address through an IOMMU
|
|
|
|
* memory region and then through the target address space.
|
|
|
|
*
|
|
|
|
* @iommu_mr: the IOMMU memory region that we start the translation from
|
|
|
|
* @addr: the address to be translated through the MMU
|
|
|
|
* @xlat: the translated address offset within the destination memory region.
|
|
|
|
* It cannot be %NULL.
|
|
|
|
* @plen_out: valid read/write length of the translated address. It
|
|
|
|
* cannot be %NULL.
|
|
|
|
* @page_mask_out: page mask for the translated address. This
|
|
|
|
* should only be meaningful for IOMMU translated
|
|
|
|
* addresses, since there may be huge pages that this bit
|
|
|
|
* would tell. It can be %NULL if we don't care about it.
|
|
|
|
* @is_write: whether the translation operation is for write
|
|
|
|
* @is_mmio: whether this can be MMIO, set true if it can
|
|
|
|
* @target_as: the address space targeted by the IOMMU
|
2018-05-31 16:50:53 +03:00
|
|
|
* @attrs: transaction attributes
|
2018-03-03 19:24:04 +03:00
|
|
|
*
|
|
|
|
* This function is called from RCU critical section. It is the common
|
|
|
|
* part of flatview_do_translate and address_space_translate_cached.
|
|
|
|
*/
|
|
|
|
static MemoryRegionSection address_space_translate_iommu(IOMMUMemoryRegion *iommu_mr,
|
|
|
|
hwaddr *xlat,
|
|
|
|
hwaddr *plen_out,
|
|
|
|
hwaddr *page_mask_out,
|
|
|
|
bool is_write,
|
|
|
|
bool is_mmio,
|
2018-05-31 16:50:53 +03:00
|
|
|
AddressSpace **target_as,
|
|
|
|
MemTxAttrs attrs)
|
2018-03-03 19:24:04 +03:00
|
|
|
{
|
|
|
|
MemoryRegionSection *section;
|
|
|
|
hwaddr page_mask = (hwaddr)-1;
|
|
|
|
|
|
|
|
do {
|
|
|
|
hwaddr addr = *xlat;
|
|
|
|
IOMMUMemoryRegionClass *imrc = memory_region_get_iommu_class_nocheck(iommu_mr);
|
2018-06-15 16:57:16 +03:00
|
|
|
int iommu_idx = 0;
|
|
|
|
IOMMUTLBEntry iotlb;
|
|
|
|
|
|
|
|
if (imrc->attrs_to_index) {
|
|
|
|
iommu_idx = imrc->attrs_to_index(iommu_mr, attrs);
|
|
|
|
}
|
|
|
|
|
|
|
|
iotlb = imrc->translate(iommu_mr, addr, is_write ?
|
|
|
|
IOMMU_WO : IOMMU_RO, iommu_idx);
|
2018-03-03 19:24:04 +03:00
|
|
|
|
|
|
|
if (!(iotlb.perm & (1 << is_write))) {
|
|
|
|
goto unassigned;
|
|
|
|
}
|
|
|
|
|
|
|
|
addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
|
|
|
|
| (addr & iotlb.addr_mask));
|
|
|
|
page_mask &= iotlb.addr_mask;
|
|
|
|
*plen_out = MIN(*plen_out, (addr | iotlb.addr_mask) - addr + 1);
|
|
|
|
*target_as = iotlb.target_as;
|
|
|
|
|
|
|
|
section = address_space_translate_internal(
|
|
|
|
address_space_to_dispatch(iotlb.target_as), addr, xlat,
|
|
|
|
plen_out, is_mmio);
|
|
|
|
|
|
|
|
iommu_mr = memory_region_get_iommu(section->mr);
|
|
|
|
} while (unlikely(iommu_mr));
|
|
|
|
|
|
|
|
if (page_mask_out) {
|
|
|
|
*page_mask_out = page_mask;
|
|
|
|
}
|
|
|
|
return *section;
|
|
|
|
|
|
|
|
unassigned:
|
|
|
|
return (MemoryRegionSection) { .mr = &io_mem_unassigned };
|
|
|
|
}
|
|
|
|
|
exec: add page_mask for flatview_do_translate
The function is originally used for flatview_space_translate() and what
we care about most is (xlat, plen) range. However for iotlb requests, we
don't really care about "plen", but the size of the page that "xlat" is
located on. While, plen cannot really contain this information.
A simple example to show why "plen" is not good for IOTLB translations:
E.g., for huge pages, it is possible that guest mapped 1G huge page on
device side that used this GPA range:
0x100000000 - 0x13fffffff
Then let's say we want to translate one IOVA that finally mapped to GPA
0x13ffffe00 (which is located on this 1G huge page). Then here we'll
get:
(xlat, plen) = (0x13fffe00, 0x200)
So the IOTLB would be only covering a very small range since from
"plen" (which is 0x200 bytes) we cannot tell the size of the page.
Actually we can really know that this is a huge page - we just throw the
information away in flatview_do_translate().
This patch introduced "page_mask" optional parameter to capture that
page mask info. Also, I made "plen" an optional parameter as well, with
some comments for the whole function.
No functional change yet.
Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Message-Id: <20171010094247.10173-2-maxime.coquelin@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-10-10 12:42:45 +03:00
|
|
|
/**
|
|
|
|
* flatview_do_translate - translate an address in FlatView
|
|
|
|
*
|
|
|
|
* @fv: the flat view that we want to translate on
|
|
|
|
* @addr: the address to be translated in above address space
|
|
|
|
* @xlat: the translated address offset within memory region. It
|
|
|
|
* cannot be @NULL.
|
|
|
|
* @plen_out: valid read/write length of the translated address. It
|
|
|
|
* can be @NULL when we don't care about it.
|
|
|
|
* @page_mask_out: page mask for the translated address. This
|
|
|
|
* should only be meaningful for IOMMU translated
|
|
|
|
* addresses, since there may be huge pages that this bit
|
|
|
|
* would tell. It can be @NULL if we don't care about it.
|
|
|
|
* @is_write: whether the translation operation is for write
|
|
|
|
* @is_mmio: whether this can be MMIO, set true if it can
|
2018-04-17 12:39:35 +03:00
|
|
|
* @target_as: the address space targeted by the IOMMU
|
2018-05-31 16:50:53 +03:00
|
|
|
* @attrs: memory transaction attributes
|
exec: add page_mask for flatview_do_translate
The function is originally used for flatview_space_translate() and what
we care about most is (xlat, plen) range. However for iotlb requests, we
don't really care about "plen", but the size of the page that "xlat" is
located on. While, plen cannot really contain this information.
A simple example to show why "plen" is not good for IOTLB translations:
E.g., for huge pages, it is possible that guest mapped 1G huge page on
device side that used this GPA range:
0x100000000 - 0x13fffffff
Then let's say we want to translate one IOVA that finally mapped to GPA
0x13ffffe00 (which is located on this 1G huge page). Then here we'll
get:
(xlat, plen) = (0x13fffe00, 0x200)
So the IOTLB would be only covering a very small range since from
"plen" (which is 0x200 bytes) we cannot tell the size of the page.
Actually we can really know that this is a huge page - we just throw the
information away in flatview_do_translate().
This patch introduced "page_mask" optional parameter to capture that
page mask info. Also, I made "plen" an optional parameter as well, with
some comments for the whole function.
No functional change yet.
Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Message-Id: <20171010094247.10173-2-maxime.coquelin@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-10-10 12:42:45 +03:00
|
|
|
*
|
|
|
|
* This function is called from RCU critical section
|
|
|
|
*/
|
2017-09-21 11:50:58 +03:00
|
|
|
static MemoryRegionSection flatview_do_translate(FlatView *fv,
|
|
|
|
hwaddr addr,
|
|
|
|
hwaddr *xlat,
|
exec: add page_mask for flatview_do_translate
The function is originally used for flatview_space_translate() and what
we care about most is (xlat, plen) range. However for iotlb requests, we
don't really care about "plen", but the size of the page that "xlat" is
located on. While, plen cannot really contain this information.
A simple example to show why "plen" is not good for IOTLB translations:
E.g., for huge pages, it is possible that guest mapped 1G huge page on
device side that used this GPA range:
0x100000000 - 0x13fffffff
Then let's say we want to translate one IOVA that finally mapped to GPA
0x13ffffe00 (which is located on this 1G huge page). Then here we'll
get:
(xlat, plen) = (0x13fffe00, 0x200)
So the IOTLB would be only covering a very small range since from
"plen" (which is 0x200 bytes) we cannot tell the size of the page.
Actually we can really know that this is a huge page - we just throw the
information away in flatview_do_translate().
This patch introduced "page_mask" optional parameter to capture that
page mask info. Also, I made "plen" an optional parameter as well, with
some comments for the whole function.
No functional change yet.
Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Message-Id: <20171010094247.10173-2-maxime.coquelin@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-10-10 12:42:45 +03:00
|
|
|
hwaddr *plen_out,
|
|
|
|
hwaddr *page_mask_out,
|
2017-09-21 11:50:58 +03:00
|
|
|
bool is_write,
|
|
|
|
bool is_mmio,
|
2018-05-31 16:50:53 +03:00
|
|
|
AddressSpace **target_as,
|
|
|
|
MemTxAttrs attrs)
|
2016-12-30 13:09:13 +03:00
|
|
|
{
|
|
|
|
MemoryRegionSection *section;
|
2017-07-11 06:56:19 +03:00
|
|
|
IOMMUMemoryRegion *iommu_mr;
|
exec: add page_mask for flatview_do_translate
The function is originally used for flatview_space_translate() and what
we care about most is (xlat, plen) range. However for iotlb requests, we
don't really care about "plen", but the size of the page that "xlat" is
located on. While, plen cannot really contain this information.
A simple example to show why "plen" is not good for IOTLB translations:
E.g., for huge pages, it is possible that guest mapped 1G huge page on
device side that used this GPA range:
0x100000000 - 0x13fffffff
Then let's say we want to translate one IOVA that finally mapped to GPA
0x13ffffe00 (which is located on this 1G huge page). Then here we'll
get:
(xlat, plen) = (0x13fffe00, 0x200)
So the IOTLB would be only covering a very small range since from
"plen" (which is 0x200 bytes) we cannot tell the size of the page.
Actually we can really know that this is a huge page - we just throw the
information away in flatview_do_translate().
This patch introduced "page_mask" optional parameter to capture that
page mask info. Also, I made "plen" an optional parameter as well, with
some comments for the whole function.
No functional change yet.
Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Message-Id: <20171010094247.10173-2-maxime.coquelin@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-10-10 12:42:45 +03:00
|
|
|
hwaddr plen = (hwaddr)(-1);
|
|
|
|
|
2018-04-17 12:39:35 +03:00
|
|
|
if (!plen_out) {
|
|
|
|
plen_out = &plen;
|
exec: add page_mask for flatview_do_translate
The function is originally used for flatview_space_translate() and what
we care about most is (xlat, plen) range. However for iotlb requests, we
don't really care about "plen", but the size of the page that "xlat" is
located on. While, plen cannot really contain this information.
A simple example to show why "plen" is not good for IOTLB translations:
E.g., for huge pages, it is possible that guest mapped 1G huge page on
device side that used this GPA range:
0x100000000 - 0x13fffffff
Then let's say we want to translate one IOVA that finally mapped to GPA
0x13ffffe00 (which is located on this 1G huge page). Then here we'll
get:
(xlat, plen) = (0x13fffe00, 0x200)
So the IOTLB would be only covering a very small range since from
"plen" (which is 0x200 bytes) we cannot tell the size of the page.
Actually we can really know that this is a huge page - we just throw the
information away in flatview_do_translate().
This patch introduced "page_mask" optional parameter to capture that
page mask info. Also, I made "plen" an optional parameter as well, with
some comments for the whole function.
No functional change yet.
Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Message-Id: <20171010094247.10173-2-maxime.coquelin@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-10-10 12:42:45 +03:00
|
|
|
}
|
2016-12-30 13:09:13 +03:00
|
|
|
|
2018-03-03 19:24:04 +03:00
|
|
|
section = address_space_translate_internal(
|
|
|
|
flatview_to_dispatch(fv), addr, xlat,
|
|
|
|
plen_out, is_mmio);
|
2016-12-30 13:09:13 +03:00
|
|
|
|
2018-03-03 19:24:04 +03:00
|
|
|
iommu_mr = memory_region_get_iommu(section->mr);
|
|
|
|
if (unlikely(iommu_mr)) {
|
|
|
|
return address_space_translate_iommu(iommu_mr, xlat,
|
|
|
|
plen_out, page_mask_out,
|
|
|
|
is_write, is_mmio,
|
2018-05-31 16:50:53 +03:00
|
|
|
target_as, attrs);
|
2016-12-30 13:09:13 +03:00
|
|
|
}
|
exec: add page_mask for flatview_do_translate
The function is originally used for flatview_space_translate() and what
we care about most is (xlat, plen) range. However for iotlb requests, we
don't really care about "plen", but the size of the page that "xlat" is
located on. While, plen cannot really contain this information.
A simple example to show why "plen" is not good for IOTLB translations:
E.g., for huge pages, it is possible that guest mapped 1G huge page on
device side that used this GPA range:
0x100000000 - 0x13fffffff
Then let's say we want to translate one IOVA that finally mapped to GPA
0x13ffffe00 (which is located on this 1G huge page). Then here we'll
get:
(xlat, plen) = (0x13fffe00, 0x200)
So the IOTLB would be only covering a very small range since from
"plen" (which is 0x200 bytes) we cannot tell the size of the page.
Actually we can really know that this is a huge page - we just throw the
information away in flatview_do_translate().
This patch introduced "page_mask" optional parameter to capture that
page mask info. Also, I made "plen" an optional parameter as well, with
some comments for the whole function.
No functional change yet.
Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Message-Id: <20171010094247.10173-2-maxime.coquelin@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-10-10 12:42:45 +03:00
|
|
|
if (page_mask_out) {
|
2018-03-03 19:24:04 +03:00
|
|
|
/* Not behind an IOMMU, use default page size. */
|
|
|
|
*page_mask_out = ~TARGET_PAGE_MASK;
|
exec: add page_mask for flatview_do_translate
The function is originally used for flatview_space_translate() and what
we care about most is (xlat, plen) range. However for iotlb requests, we
don't really care about "plen", but the size of the page that "xlat" is
located on. While, plen cannot really contain this information.
A simple example to show why "plen" is not good for IOTLB translations:
E.g., for huge pages, it is possible that guest mapped 1G huge page on
device side that used this GPA range:
0x100000000 - 0x13fffffff
Then let's say we want to translate one IOVA that finally mapped to GPA
0x13ffffe00 (which is located on this 1G huge page). Then here we'll
get:
(xlat, plen) = (0x13fffe00, 0x200)
So the IOTLB would be only covering a very small range since from
"plen" (which is 0x200 bytes) we cannot tell the size of the page.
Actually we can really know that this is a huge page - we just throw the
information away in flatview_do_translate().
This patch introduced "page_mask" optional parameter to capture that
page mask info. Also, I made "plen" an optional parameter as well, with
some comments for the whole function.
No functional change yet.
Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Message-Id: <20171010094247.10173-2-maxime.coquelin@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-10-10 12:42:45 +03:00
|
|
|
}
|
|
|
|
|
2017-05-17 11:57:42 +03:00
|
|
|
return *section;
|
2016-12-30 13:09:13 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Called from RCU critical section */
|
2017-05-17 11:57:42 +03:00
|
|
|
IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr,
|
2018-05-31 16:50:53 +03:00
|
|
|
bool is_write, MemTxAttrs attrs)
|
2013-05-26 23:46:51 +04:00
|
|
|
{
|
2017-05-17 11:57:42 +03:00
|
|
|
MemoryRegionSection section;
|
2017-10-10 12:42:46 +03:00
|
|
|
hwaddr xlat, page_mask;
|
2012-10-30 15:47:46 +04:00
|
|
|
|
2017-10-10 12:42:46 +03:00
|
|
|
/*
|
|
|
|
* This can never be MMIO, and we don't really care about plen,
|
|
|
|
* but page mask.
|
|
|
|
*/
|
|
|
|
section = flatview_do_translate(address_space_to_flatview(as), addr, &xlat,
|
2018-05-31 16:50:53 +03:00
|
|
|
NULL, &page_mask, is_write, false, &as,
|
|
|
|
attrs);
|
2012-10-30 15:47:46 +04:00
|
|
|
|
2017-05-17 11:57:42 +03:00
|
|
|
/* Illegal translation */
|
|
|
|
if (section.mr == &io_mem_unassigned) {
|
|
|
|
goto iotlb_fail;
|
|
|
|
}
|
2012-10-30 15:47:46 +04:00
|
|
|
|
2017-05-17 11:57:42 +03:00
|
|
|
/* Convert memory region offset into address space offset */
|
|
|
|
xlat += section.offset_within_address_space -
|
|
|
|
section.offset_within_region;
|
|
|
|
|
|
|
|
return (IOMMUTLBEntry) {
|
2017-09-21 11:50:53 +03:00
|
|
|
.target_as = as,
|
2017-10-10 12:42:46 +03:00
|
|
|
.iova = addr & ~page_mask,
|
|
|
|
.translated_addr = xlat & ~page_mask,
|
|
|
|
.addr_mask = page_mask,
|
2017-05-17 11:57:42 +03:00
|
|
|
/* IOTLBs are for DMAs, and DMA only allows on RAMs. */
|
|
|
|
.perm = IOMMU_RW,
|
|
|
|
};
|
|
|
|
|
|
|
|
iotlb_fail:
|
|
|
|
return (IOMMUTLBEntry) {0};
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Called from RCU critical section */
|
2017-09-21 11:50:58 +03:00
|
|
|
MemoryRegion *flatview_translate(FlatView *fv, hwaddr addr, hwaddr *xlat,
|
2018-05-31 16:50:52 +03:00
|
|
|
hwaddr *plen, bool is_write,
|
|
|
|
MemTxAttrs attrs)
|
2017-05-17 11:57:42 +03:00
|
|
|
{
|
|
|
|
MemoryRegion *mr;
|
|
|
|
MemoryRegionSection section;
|
2017-09-21 11:50:58 +03:00
|
|
|
AddressSpace *as = NULL;
|
2017-05-17 11:57:42 +03:00
|
|
|
|
|
|
|
/* This can be MMIO, so setup MMIO bit. */
|
exec: add page_mask for flatview_do_translate
The function is originally used for flatview_space_translate() and what
we care about most is (xlat, plen) range. However for iotlb requests, we
don't really care about "plen", but the size of the page that "xlat" is
located on. While, plen cannot really contain this information.
A simple example to show why "plen" is not good for IOTLB translations:
E.g., for huge pages, it is possible that guest mapped 1G huge page on
device side that used this GPA range:
0x100000000 - 0x13fffffff
Then let's say we want to translate one IOVA that finally mapped to GPA
0x13ffffe00 (which is located on this 1G huge page). Then here we'll
get:
(xlat, plen) = (0x13fffe00, 0x200)
So the IOTLB would be only covering a very small range since from
"plen" (which is 0x200 bytes) we cannot tell the size of the page.
Actually we can really know that this is a huge page - we just throw the
information away in flatview_do_translate().
This patch introduced "page_mask" optional parameter to capture that
page mask info. Also, I made "plen" an optional parameter as well, with
some comments for the whole function.
No functional change yet.
Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Message-Id: <20171010094247.10173-2-maxime.coquelin@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-10-10 12:42:45 +03:00
|
|
|
section = flatview_do_translate(fv, addr, xlat, plen, NULL,
|
2018-05-31 16:50:53 +03:00
|
|
|
is_write, true, &as, attrs);
|
2017-05-17 11:57:42 +03:00
|
|
|
mr = section.mr;
|
|
|
|
|
2014-05-07 17:40:39 +04:00
|
|
|
if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
|
2014-02-07 18:47:46 +04:00
|
|
|
hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
|
2015-03-17 08:35:54 +03:00
|
|
|
*plen = MIN(page, *plen);
|
2014-02-07 18:47:46 +04:00
|
|
|
}
|
|
|
|
|
2012-10-30 15:47:46 +04:00
|
|
|
return mr;
|
2013-05-26 23:46:51 +04:00
|
|
|
}
|
|
|
|
|
2018-06-15 16:57:16 +03:00
|
|
|
typedef struct TCGIOMMUNotifier {
|
|
|
|
IOMMUNotifier n;
|
|
|
|
MemoryRegion *mr;
|
|
|
|
CPUState *cpu;
|
|
|
|
int iommu_idx;
|
|
|
|
bool active;
|
|
|
|
} TCGIOMMUNotifier;
|
|
|
|
|
|
|
|
static void tcg_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
|
|
|
|
{
|
|
|
|
TCGIOMMUNotifier *notifier = container_of(n, TCGIOMMUNotifier, n);
|
|
|
|
|
|
|
|
if (!notifier->active) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
tlb_flush(notifier->cpu);
|
|
|
|
notifier->active = false;
|
|
|
|
/* We leave the notifier struct on the list to avoid reallocating it later.
|
|
|
|
* Generally the number of IOMMUs a CPU deals with will be small.
|
|
|
|
* In any case we can't unregister the iommu notifier from a notify
|
|
|
|
* callback.
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
|
|
|
static void tcg_register_iommu_notifier(CPUState *cpu,
|
|
|
|
IOMMUMemoryRegion *iommu_mr,
|
|
|
|
int iommu_idx)
|
|
|
|
{
|
|
|
|
/* Make sure this CPU has an IOMMU notifier registered for this
|
|
|
|
* IOMMU/IOMMU index combination, so that we can flush its TLB
|
|
|
|
* when the IOMMU tells us the mappings we've cached have changed.
|
|
|
|
*/
|
|
|
|
MemoryRegion *mr = MEMORY_REGION(iommu_mr);
|
2021-01-17 20:04:11 +03:00
|
|
|
TCGIOMMUNotifier *notifier = NULL;
|
2020-07-22 11:40:48 +03:00
|
|
|
int i;
|
2018-06-15 16:57:16 +03:00
|
|
|
|
|
|
|
for (i = 0; i < cpu->iommu_notifiers->len; i++) {
|
2019-02-01 17:55:45 +03:00
|
|
|
notifier = g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier *, i);
|
2018-06-15 16:57:16 +03:00
|
|
|
if (notifier->mr == mr && notifier->iommu_idx == iommu_idx) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (i == cpu->iommu_notifiers->len) {
|
|
|
|
/* Not found, add a new entry at the end of the array */
|
|
|
|
cpu->iommu_notifiers = g_array_set_size(cpu->iommu_notifiers, i + 1);
|
2019-02-01 17:55:45 +03:00
|
|
|
notifier = g_new0(TCGIOMMUNotifier, 1);
|
|
|
|
g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier *, i) = notifier;
|
2018-06-15 16:57:16 +03:00
|
|
|
|
|
|
|
notifier->mr = mr;
|
|
|
|
notifier->iommu_idx = iommu_idx;
|
|
|
|
notifier->cpu = cpu;
|
|
|
|
/* Rather than trying to register interest in the specific part
|
|
|
|
* of the iommu's address space that we've accessed and then
|
|
|
|
* expand it later as subsequent accesses touch more of it, we
|
|
|
|
* just register interest in the whole thing, on the assumption
|
|
|
|
* that iommu reconfiguration will be rare.
|
|
|
|
*/
|
|
|
|
iommu_notifier_init(¬ifier->n,
|
|
|
|
tcg_iommu_unmap_notify,
|
|
|
|
IOMMU_NOTIFIER_UNMAP,
|
|
|
|
0,
|
|
|
|
HWADDR_MAX,
|
|
|
|
iommu_idx);
|
2020-07-22 11:40:48 +03:00
|
|
|
memory_region_register_iommu_notifier(notifier->mr, ¬ifier->n,
|
|
|
|
&error_fatal);
|
2018-06-15 16:57:16 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!notifier->active) {
|
|
|
|
notifier->active = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-06 10:05:29 +03:00
|
|
|
void tcg_iommu_free_notifier_list(CPUState *cpu)
|
2018-06-15 16:57:16 +03:00
|
|
|
{
|
|
|
|
/* Destroy the CPU's notifier list */
|
|
|
|
int i;
|
|
|
|
TCGIOMMUNotifier *notifier;
|
|
|
|
|
|
|
|
for (i = 0; i < cpu->iommu_notifiers->len; i++) {
|
2019-02-01 17:55:45 +03:00
|
|
|
notifier = g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier *, i);
|
2018-06-15 16:57:16 +03:00
|
|
|
memory_region_unregister_iommu_notifier(notifier->mr, ¬ifier->n);
|
2019-02-01 17:55:45 +03:00
|
|
|
g_free(notifier);
|
2018-06-15 16:57:16 +03:00
|
|
|
}
|
|
|
|
g_array_free(cpu->iommu_notifiers, true);
|
|
|
|
}
|
|
|
|
|
2020-10-06 10:05:29 +03:00
|
|
|
void tcg_iommu_init_notifier_list(CPUState *cpu)
|
|
|
|
{
|
|
|
|
cpu->iommu_notifiers = g_array_new(false, true, sizeof(TCGIOMMUNotifier *));
|
|
|
|
}
|
|
|
|
|
2015-01-21 14:09:14 +03:00
|
|
|
/* Called from RCU critical section */
|
2013-05-26 23:46:51 +04:00
|
|
|
MemoryRegionSection *
|
2022-06-21 18:38:29 +03:00
|
|
|
address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr orig_addr,
|
2018-06-15 16:57:16 +03:00
|
|
|
hwaddr *xlat, hwaddr *plen,
|
|
|
|
MemTxAttrs attrs, int *prot)
|
2013-05-26 23:46:51 +04:00
|
|
|
{
|
2012-10-30 15:47:46 +04:00
|
|
|
MemoryRegionSection *section;
|
2018-06-15 16:57:16 +03:00
|
|
|
IOMMUMemoryRegion *iommu_mr;
|
|
|
|
IOMMUMemoryRegionClass *imrc;
|
|
|
|
IOMMUTLBEntry iotlb;
|
|
|
|
int iommu_idx;
|
2022-06-21 18:38:29 +03:00
|
|
|
hwaddr addr = orig_addr;
|
2023-08-26 02:13:17 +03:00
|
|
|
AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
|
2016-01-21 17:15:05 +03:00
|
|
|
|
2018-06-15 16:57:16 +03:00
|
|
|
for (;;) {
|
|
|
|
section = address_space_translate_internal(d, addr, &addr, plen, false);
|
|
|
|
|
|
|
|
iommu_mr = memory_region_get_iommu(section->mr);
|
|
|
|
if (!iommu_mr) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
imrc = memory_region_get_iommu_class_nocheck(iommu_mr);
|
|
|
|
|
|
|
|
iommu_idx = imrc->attrs_to_index(iommu_mr, attrs);
|
|
|
|
tcg_register_iommu_notifier(cpu, iommu_mr, iommu_idx);
|
|
|
|
/* We need all the permissions, so pass IOMMU_NONE so the IOMMU
|
|
|
|
* doesn't short-cut its translation table walk.
|
|
|
|
*/
|
|
|
|
iotlb = imrc->translate(iommu_mr, addr, IOMMU_NONE, iommu_idx);
|
|
|
|
addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
|
|
|
|
| (addr & iotlb.addr_mask));
|
|
|
|
/* Update the caller's prot bits to remove permissions the IOMMU
|
|
|
|
* is giving us a failure response for. If we get down to no
|
|
|
|
* permissions left at all we can give up now.
|
|
|
|
*/
|
|
|
|
if (!(iotlb.perm & IOMMU_RO)) {
|
|
|
|
*prot &= ~(PAGE_READ | PAGE_EXEC);
|
|
|
|
}
|
|
|
|
if (!(iotlb.perm & IOMMU_WO)) {
|
|
|
|
*prot &= ~PAGE_WRITE;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!*prot) {
|
|
|
|
goto translate_fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
d = flatview_to_dispatch(address_space_to_flatview(iotlb.target_as));
|
|
|
|
}
|
2012-10-30 15:47:46 +04:00
|
|
|
|
2017-07-11 06:56:19 +03:00
|
|
|
assert(!memory_region_is_iommu(section->mr));
|
2018-06-15 16:57:16 +03:00
|
|
|
*xlat = addr;
|
2012-10-30 15:47:46 +04:00
|
|
|
return section;
|
2018-06-15 16:57:16 +03:00
|
|
|
|
|
|
|
translate_fail:
|
2022-06-21 18:38:29 +03:00
|
|
|
/*
|
|
|
|
* We should be given a page-aligned address -- certainly
|
|
|
|
* tlb_set_page_with_attrs() does so. The page offset of xlat
|
|
|
|
* is used to index sections[], and PHYS_SECTION_UNASSIGNED = 0.
|
|
|
|
* The page portion of xlat will be logged by memory_region_access_valid()
|
|
|
|
* when this memory access is rejected, so use the original untranslated
|
|
|
|
* physical address.
|
|
|
|
*/
|
|
|
|
assert((orig_addr & ~TARGET_PAGE_MASK) == 0);
|
|
|
|
*xlat = orig_addr;
|
2018-06-15 16:57:16 +03:00
|
|
|
return &d->map.sections[PHYS_SECTION_UNASSIGNED];
|
2013-05-26 23:46:51 +04:00
|
|
|
}
|
2013-06-17 06:09:11 +04:00
|
|
|
|
2017-11-23 12:23:32 +03:00
|
|
|
void cpu_address_space_init(CPUState *cpu, int asidx,
|
|
|
|
const char *prefix, MemoryRegion *mr)
|
2013-12-17 07:06:51 +04:00
|
|
|
{
|
2016-01-21 17:15:04 +03:00
|
|
|
CPUAddressSpace *newas;
|
2017-11-23 12:23:32 +03:00
|
|
|
AddressSpace *as = g_new0(AddressSpace, 1);
|
2017-11-23 12:23:33 +03:00
|
|
|
char *as_name;
|
2017-11-23 12:23:32 +03:00
|
|
|
|
|
|
|
assert(mr);
|
2017-11-23 12:23:33 +03:00
|
|
|
as_name = g_strdup_printf("%s-%d", prefix, cpu->cpu_index);
|
|
|
|
address_space_init(as, mr, as_name);
|
|
|
|
g_free(as_name);
|
2016-01-21 17:15:04 +03:00
|
|
|
|
|
|
|
/* Target code should have set num_ases before calling us */
|
|
|
|
assert(asidx < cpu->num_ases);
|
|
|
|
|
2016-01-21 17:15:04 +03:00
|
|
|
if (asidx == 0) {
|
|
|
|
/* address space 0 gets the convenience alias */
|
|
|
|
cpu->as = as;
|
|
|
|
}
|
|
|
|
|
2016-01-21 17:15:04 +03:00
|
|
|
/* KVM cannot currently support multiple address spaces. */
|
|
|
|
assert(asidx == 0 || !kvm_enabled());
|
2013-12-17 07:06:51 +04:00
|
|
|
|
2016-01-21 17:15:04 +03:00
|
|
|
if (!cpu->cpu_ases) {
|
|
|
|
cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
|
2013-12-17 07:06:51 +04:00
|
|
|
}
|
2015-10-01 17:29:50 +03:00
|
|
|
|
2016-01-21 17:15:04 +03:00
|
|
|
newas = &cpu->cpu_ases[asidx];
|
|
|
|
newas->cpu = cpu;
|
|
|
|
newas->as = as;
|
2016-01-21 17:15:04 +03:00
|
|
|
if (tcg_enabled()) {
|
2018-02-06 20:37:39 +03:00
|
|
|
newas->tcg_as_listener.log_global_after_sync = tcg_log_global_after_sync;
|
2016-01-21 17:15:04 +03:00
|
|
|
newas->tcg_as_listener.commit = tcg_commit;
|
2021-08-17 04:35:52 +03:00
|
|
|
newas->tcg_as_listener.name = "tcg";
|
2016-01-21 17:15:04 +03:00
|
|
|
memory_listener_register(&newas->tcg_as_listener, as);
|
2016-01-21 17:15:04 +03:00
|
|
|
}
|
2013-12-17 07:06:51 +04:00
|
|
|
}
|
2016-01-21 17:15:05 +03:00
|
|
|
|
|
|
|
AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
|
|
|
|
{
|
|
|
|
/* Return the AddressSpace corresponding to the specified index */
|
|
|
|
return cpu->cpu_ases[asidx].as;
|
|
|
|
}
|
2018-05-30 12:58:36 +03:00
|
|
|
|
2013-09-05 22:41:35 +04:00
|
|
|
/* Called from RCU critical section */
|
2013-09-09 19:49:45 +04:00
|
|
|
static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
|
|
|
|
{
|
|
|
|
RAMBlock *block;
|
|
|
|
|
2020-09-23 13:56:46 +03:00
|
|
|
block = qatomic_rcu_read(&ram_list.mru_block);
|
2014-12-15 23:55:32 +03:00
|
|
|
if (block && addr - block->offset < block->max_length) {
|
2015-10-22 14:51:30 +03:00
|
|
|
return block;
|
2013-09-09 19:49:45 +04:00
|
|
|
}
|
2017-05-12 07:17:39 +03:00
|
|
|
RAMBLOCK_FOREACH(block) {
|
2014-12-15 23:55:32 +03:00
|
|
|
if (addr - block->offset < block->max_length) {
|
2013-09-09 19:49:45 +04:00
|
|
|
goto found;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
|
|
|
|
abort();
|
|
|
|
|
|
|
|
found:
|
2024-01-02 18:35:28 +03:00
|
|
|
/* It is safe to write mru_block outside the BQL. This
|
2013-09-09 19:58:40 +04:00
|
|
|
* is what happens:
|
|
|
|
*
|
|
|
|
* mru_block = xxx
|
|
|
|
* rcu_read_unlock()
|
|
|
|
* xxx removed from list
|
|
|
|
* rcu_read_lock()
|
|
|
|
* read mru_block
|
|
|
|
* mru_block = NULL;
|
|
|
|
* call_rcu(reclaim_ramblock, xxx);
|
|
|
|
* rcu_read_unlock()
|
|
|
|
*
|
2020-09-23 13:56:46 +03:00
|
|
|
* qatomic_rcu_set is not needed here. The block was already published
|
2013-09-09 19:58:40 +04:00
|
|
|
* when it was placed into the list. Here we're just making an extra
|
|
|
|
* copy of the pointer.
|
|
|
|
*/
|
2013-09-09 19:49:45 +04:00
|
|
|
ram_list.mru_block = block;
|
|
|
|
return block;
|
|
|
|
}
|
|
|
|
|
2024-03-12 23:14:56 +03:00
|
|
|
void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
|
2012-05-22 02:42:40 +04:00
|
|
|
{
|
2015-09-11 08:39:41 +03:00
|
|
|
CPUState *cpu;
|
2013-09-09 19:49:45 +04:00
|
|
|
ram_addr_t start1;
|
2013-10-10 13:49:53 +04:00
|
|
|
RAMBlock *block;
|
|
|
|
ram_addr_t end;
|
|
|
|
|
2018-06-22 20:45:31 +03:00
|
|
|
assert(tcg_enabled());
|
2013-10-10 13:49:53 +04:00
|
|
|
end = TARGET_PAGE_ALIGN(start + length);
|
|
|
|
start &= TARGET_PAGE_MASK;
|
2012-05-22 02:42:40 +04:00
|
|
|
|
2019-10-07 17:36:41 +03:00
|
|
|
RCU_READ_LOCK_GUARD();
|
2013-09-09 19:49:45 +04:00
|
|
|
block = qemu_get_ram_block(start);
|
|
|
|
assert(block == qemu_get_ram_block(end - 1));
|
2014-11-12 12:44:41 +03:00
|
|
|
start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
|
2015-09-11 08:39:41 +03:00
|
|
|
CPU_FOREACH(cpu) {
|
|
|
|
tlb_reset_dirty(cpu, start1, length);
|
|
|
|
}
|
2012-05-22 02:42:40 +04:00
|
|
|
}
|
|
|
|
|
2009-04-11 18:47:08 +04:00
|
|
|
/* Note: start and end must be within the same ram block. */
|
2014-12-02 14:23:18 +03:00
|
|
|
bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
|
|
|
|
ram_addr_t length,
|
|
|
|
unsigned client)
|
2004-02-06 22:46:14 +03:00
|
|
|
{
|
2016-01-25 16:33:20 +03:00
|
|
|
DirtyMemoryBlocks *blocks;
|
2020-02-18 13:19:10 +03:00
|
|
|
unsigned long end, page, start_page;
|
2016-01-25 16:33:20 +03:00
|
|
|
bool dirty = false;
|
2019-06-03 09:50:51 +03:00
|
|
|
RAMBlock *ramblock;
|
|
|
|
uint64_t mr_offset, mr_size;
|
2014-12-02 14:23:18 +03:00
|
|
|
|
|
|
|
if (length == 0) {
|
|
|
|
return false;
|
|
|
|
}
|
2005-08-21 23:12:28 +04:00
|
|
|
|
2014-12-02 14:23:18 +03:00
|
|
|
end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
|
2020-02-18 13:19:10 +03:00
|
|
|
start_page = start >> TARGET_PAGE_BITS;
|
|
|
|
page = start_page;
|
2016-01-25 16:33:20 +03:00
|
|
|
|
2019-10-07 17:36:41 +03:00
|
|
|
WITH_RCU_READ_LOCK_GUARD() {
|
2020-09-23 13:56:46 +03:00
|
|
|
blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]);
|
2019-10-07 17:36:41 +03:00
|
|
|
ramblock = qemu_get_ram_block(start);
|
|
|
|
/* Range sanity check on the ramblock */
|
|
|
|
assert(start >= ramblock->offset &&
|
|
|
|
start + length <= ramblock->offset + ramblock->used_length);
|
2016-01-25 16:33:20 +03:00
|
|
|
|
2019-10-07 17:36:41 +03:00
|
|
|
while (page < end) {
|
|
|
|
unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
|
|
|
|
unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
|
|
|
|
unsigned long num = MIN(end - page,
|
|
|
|
DIRTY_MEMORY_BLOCK_SIZE - offset);
|
2016-01-25 16:33:20 +03:00
|
|
|
|
2019-10-07 17:36:41 +03:00
|
|
|
dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
|
|
|
|
offset, num);
|
|
|
|
page += num;
|
|
|
|
}
|
2016-01-25 16:33:20 +03:00
|
|
|
|
2020-02-18 13:19:10 +03:00
|
|
|
mr_offset = (ram_addr_t)(start_page << TARGET_PAGE_BITS) - ramblock->offset;
|
|
|
|
mr_size = (end - start_page) << TARGET_PAGE_BITS;
|
2019-10-07 17:36:41 +03:00
|
|
|
memory_region_clear_dirty_bitmap(ramblock->mr, mr_offset, mr_size);
|
2016-01-25 16:33:20 +03:00
|
|
|
}
|
|
|
|
|
2024-03-12 23:14:57 +03:00
|
|
|
if (dirty) {
|
|
|
|
cpu_physical_memory_dirty_bits_cleared(start, length);
|
2009-04-11 18:47:08 +04:00
|
|
|
}
|
2014-12-02 14:23:18 +03:00
|
|
|
|
|
|
|
return dirty;
|
2004-02-06 22:46:14 +03:00
|
|
|
}
|
|
|
|
|
2017-04-21 12:16:25 +03:00
|
|
|
DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty
|
2019-06-03 09:50:50 +03:00
|
|
|
(MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client)
|
2017-04-21 12:16:25 +03:00
|
|
|
{
|
|
|
|
DirtyMemoryBlocks *blocks;
|
2019-06-03 09:50:50 +03:00
|
|
|
ram_addr_t start = memory_region_get_ram_addr(mr) + offset;
|
2017-04-21 12:16:25 +03:00
|
|
|
unsigned long align = 1UL << (TARGET_PAGE_BITS + BITS_PER_LEVEL);
|
|
|
|
ram_addr_t first = QEMU_ALIGN_DOWN(start, align);
|
|
|
|
ram_addr_t last = QEMU_ALIGN_UP(start + length, align);
|
|
|
|
DirtyBitmapSnapshot *snap;
|
|
|
|
unsigned long page, end, dest;
|
|
|
|
|
|
|
|
snap = g_malloc0(sizeof(*snap) +
|
|
|
|
((last - first) >> (TARGET_PAGE_BITS + 3)));
|
|
|
|
snap->start = first;
|
|
|
|
snap->end = last;
|
|
|
|
|
|
|
|
page = first >> TARGET_PAGE_BITS;
|
|
|
|
end = last >> TARGET_PAGE_BITS;
|
|
|
|
dest = 0;
|
|
|
|
|
2019-10-07 17:36:41 +03:00
|
|
|
WITH_RCU_READ_LOCK_GUARD() {
|
2020-09-23 13:56:46 +03:00
|
|
|
blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]);
|
2017-04-21 12:16:25 +03:00
|
|
|
|
2019-10-07 17:36:41 +03:00
|
|
|
while (page < end) {
|
|
|
|
unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
|
2023-09-04 19:12:34 +03:00
|
|
|
unsigned long ofs = page % DIRTY_MEMORY_BLOCK_SIZE;
|
2019-10-07 17:36:41 +03:00
|
|
|
unsigned long num = MIN(end - page,
|
2023-09-04 19:12:34 +03:00
|
|
|
DIRTY_MEMORY_BLOCK_SIZE - ofs);
|
2017-04-21 12:16:25 +03:00
|
|
|
|
2023-09-04 19:12:34 +03:00
|
|
|
assert(QEMU_IS_ALIGNED(ofs, (1 << BITS_PER_LEVEL)));
|
2019-10-07 17:36:41 +03:00
|
|
|
assert(QEMU_IS_ALIGNED(num, (1 << BITS_PER_LEVEL)));
|
2023-09-04 19:12:34 +03:00
|
|
|
ofs >>= BITS_PER_LEVEL;
|
2017-04-21 12:16:25 +03:00
|
|
|
|
2019-10-07 17:36:41 +03:00
|
|
|
bitmap_copy_and_clear_atomic(snap->dirty + dest,
|
2023-09-04 19:12:34 +03:00
|
|
|
blocks->blocks[idx] + ofs,
|
2019-10-07 17:36:41 +03:00
|
|
|
num);
|
|
|
|
page += num;
|
|
|
|
dest += num >> BITS_PER_LEVEL;
|
|
|
|
}
|
2017-04-21 12:16:25 +03:00
|
|
|
}
|
|
|
|
|
2024-03-12 23:14:57 +03:00
|
|
|
cpu_physical_memory_dirty_bits_cleared(start, length);
|
2017-04-21 12:16:25 +03:00
|
|
|
|
2019-06-03 09:50:51 +03:00
|
|
|
memory_region_clear_dirty_bitmap(mr, offset, length);
|
|
|
|
|
2017-04-21 12:16:25 +03:00
|
|
|
return snap;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap,
|
|
|
|
ram_addr_t start,
|
|
|
|
ram_addr_t length)
|
|
|
|
{
|
|
|
|
unsigned long page, end;
|
|
|
|
|
|
|
|
assert(start >= snap->start);
|
|
|
|
assert(start + length <= snap->end);
|
|
|
|
|
|
|
|
end = TARGET_PAGE_ALIGN(start + length - snap->start) >> TARGET_PAGE_BITS;
|
|
|
|
page = (start - snap->start) >> TARGET_PAGE_BITS;
|
|
|
|
|
|
|
|
while (page < end) {
|
|
|
|
if (test_bit(page, snap->dirty)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
page++;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2015-01-21 14:09:14 +03:00
|
|
|
/* Called from RCU critical section */
|
2013-09-03 15:32:01 +04:00
|
|
|
hwaddr memory_region_section_get_iotlb(CPUState *cpu,
|
2019-09-20 07:09:58 +03:00
|
|
|
MemoryRegionSection *section)
|
2012-04-21 17:08:33 +04:00
|
|
|
{
|
2019-09-20 07:09:58 +03:00
|
|
|
AddressSpaceDispatch *d = flatview_to_dispatch(section->fv);
|
|
|
|
return section - d->map.sections;
|
2012-04-21 17:08:33 +04:00
|
|
|
}
|
2008-12-01 21:59:50 +03:00
|
|
|
|
2019-03-21 11:25:53 +03:00
|
|
|
static int subpage_register(subpage_t *mmio, uint32_t start, uint32_t end,
|
|
|
|
uint16_t section);
|
2017-09-21 11:50:58 +03:00
|
|
|
static subpage_t *subpage_init(FlatView *fv, hwaddr base);
|
2012-02-09 19:34:32 +04:00
|
|
|
|
2013-12-01 16:02:23 +04:00
|
|
|
static uint16_t phys_section_add(PhysPageMap *map,
|
|
|
|
MemoryRegionSection *section)
|
2012-02-12 20:32:55 +04:00
|
|
|
{
|
2013-05-07 13:30:23 +04:00
|
|
|
/* The physical section number is ORed with a page-aligned
|
|
|
|
* pointer to produce the iotlb entries. Thus it should
|
|
|
|
* never overflow into the page-aligned value.
|
|
|
|
*/
|
2013-12-01 16:02:23 +04:00
|
|
|
assert(map->sections_nb < TARGET_PAGE_SIZE);
|
2013-05-07 13:30:23 +04:00
|
|
|
|
2013-12-01 16:02:23 +04:00
|
|
|
if (map->sections_nb == map->sections_nb_alloc) {
|
|
|
|
map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
|
|
|
|
map->sections = g_renew(MemoryRegionSection, map->sections,
|
|
|
|
map->sections_nb_alloc);
|
2012-02-12 20:32:55 +04:00
|
|
|
}
|
2013-12-01 16:02:23 +04:00
|
|
|
map->sections[map->sections_nb] = *section;
|
2013-05-06 12:46:11 +04:00
|
|
|
memory_region_ref(section->mr);
|
2013-12-01 16:02:23 +04:00
|
|
|
return map->sections_nb++;
|
2012-02-12 20:32:55 +04:00
|
|
|
}
|
|
|
|
|
2013-06-25 11:30:48 +04:00
|
|
|
static void phys_section_destroy(MemoryRegion *mr)
|
|
|
|
{
|
2015-12-01 01:11:04 +03:00
|
|
|
bool have_sub_page = mr->subpage;
|
|
|
|
|
2013-05-06 12:46:11 +04:00
|
|
|
memory_region_unref(mr);
|
|
|
|
|
2015-12-01 01:11:04 +03:00
|
|
|
if (have_sub_page) {
|
2013-06-25 11:30:48 +04:00
|
|
|
subpage_t *subpage = container_of(mr, subpage_t, iomem);
|
2014-06-06 10:15:52 +04:00
|
|
|
object_unref(OBJECT(&subpage->iomem));
|
2013-06-25 11:30:48 +04:00
|
|
|
g_free(subpage);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-05-29 14:30:26 +04:00
|
|
|
static void phys_sections_free(PhysPageMap *map)
|
2012-02-12 20:32:55 +04:00
|
|
|
{
|
2013-05-29 14:09:47 +04:00
|
|
|
while (map->sections_nb > 0) {
|
|
|
|
MemoryRegionSection *section = &map->sections[--map->sections_nb];
|
2013-06-25 11:30:48 +04:00
|
|
|
phys_section_destroy(section->mr);
|
|
|
|
}
|
2013-05-29 14:09:47 +04:00
|
|
|
g_free(map->sections);
|
|
|
|
g_free(map->nodes);
|
2012-02-12 20:32:55 +04:00
|
|
|
}
|
|
|
|
|
2017-09-21 11:50:59 +03:00
|
|
|
static void register_subpage(FlatView *fv, MemoryRegionSection *section)
|
2012-02-13 19:14:32 +04:00
|
|
|
{
|
2017-09-21 11:50:59 +03:00
|
|
|
AddressSpaceDispatch *d = flatview_to_dispatch(fv);
|
2012-02-13 19:14:32 +04:00
|
|
|
subpage_t *subpage;
|
2012-10-23 14:30:10 +04:00
|
|
|
hwaddr base = section->offset_within_address_space
|
2012-02-13 19:14:32 +04:00
|
|
|
& TARGET_PAGE_MASK;
|
2017-05-15 11:50:57 +03:00
|
|
|
MemoryRegionSection *existing = phys_page_find(d, base);
|
2012-02-13 19:14:32 +04:00
|
|
|
MemoryRegionSection subsection = {
|
|
|
|
.offset_within_address_space = base,
|
2013-05-27 12:08:27 +04:00
|
|
|
.size = int128_make64(TARGET_PAGE_SIZE),
|
2012-02-13 19:14:32 +04:00
|
|
|
};
|
2012-10-23 14:30:10 +04:00
|
|
|
hwaddr start, end;
|
2012-02-13 19:14:32 +04:00
|
|
|
|
2012-03-08 18:16:34 +04:00
|
|
|
assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
|
2012-02-13 19:14:32 +04:00
|
|
|
|
2012-03-08 18:16:34 +04:00
|
|
|
if (!(existing->mr->subpage)) {
|
2017-09-21 11:50:58 +03:00
|
|
|
subpage = subpage_init(fv, base);
|
|
|
|
subsection.fv = fv;
|
2012-02-13 19:14:32 +04:00
|
|
|
subsection.mr = &subpage->iomem;
|
2012-10-03 18:22:53 +04:00
|
|
|
phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
|
2013-12-01 16:02:23 +04:00
|
|
|
phys_section_add(&d->map, &subsection));
|
2012-02-13 19:14:32 +04:00
|
|
|
} else {
|
2012-03-08 18:16:34 +04:00
|
|
|
subpage = container_of(existing->mr, subpage_t, iomem);
|
2012-02-13 19:14:32 +04:00
|
|
|
}
|
|
|
|
start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
|
2013-05-27 12:08:27 +04:00
|
|
|
end = start + int128_get64(section->size) - 1;
|
2013-12-01 16:02:23 +04:00
|
|
|
subpage_register(subpage, start, end,
|
|
|
|
phys_section_add(&d->map, section));
|
2012-02-13 19:14:32 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-09-21 11:50:59 +03:00
|
|
|
static void register_multipage(FlatView *fv,
|
2013-05-27 12:08:27 +04:00
|
|
|
MemoryRegionSection *section)
|
2003-08-11 01:47:01 +04:00
|
|
|
{
|
2017-09-21 11:50:59 +03:00
|
|
|
AddressSpaceDispatch *d = flatview_to_dispatch(fv);
|
2012-10-23 14:30:10 +04:00
|
|
|
hwaddr start_addr = section->offset_within_address_space;
|
2013-12-01 16:02:23 +04:00
|
|
|
uint16_t section_index = phys_section_add(&d->map, section);
|
2013-05-27 12:08:27 +04:00
|
|
|
uint64_t num_pages = int128_get64(int128_rshift(section->size,
|
|
|
|
TARGET_PAGE_BITS));
|
2012-01-02 14:17:03 +04:00
|
|
|
|
2013-05-27 12:47:10 +04:00
|
|
|
assert(num_pages);
|
|
|
|
phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
|
2003-08-11 01:47:01 +04:00
|
|
|
}
|
|
|
|
|
2019-03-11 08:42:52 +03:00
|
|
|
/*
|
|
|
|
* The range in *section* may look like this:
|
|
|
|
*
|
|
|
|
* |s|PPPPPPP|s|
|
|
|
|
*
|
|
|
|
* where s stands for subpage and P for page.
|
|
|
|
*/
|
2017-09-21 11:51:00 +03:00
|
|
|
void flatview_add_to_dispatch(FlatView *fv, MemoryRegionSection *section)
|
2012-02-13 19:14:32 +04:00
|
|
|
{
|
2019-03-11 08:42:52 +03:00
|
|
|
MemoryRegionSection remain = *section;
|
2013-05-27 12:08:27 +04:00
|
|
|
Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
|
2012-02-13 19:14:32 +04:00
|
|
|
|
2019-03-11 08:42:52 +03:00
|
|
|
/* register first subpage */
|
|
|
|
if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
|
|
|
|
uint64_t left = TARGET_PAGE_ALIGN(remain.offset_within_address_space)
|
|
|
|
- remain.offset_within_address_space;
|
2013-05-27 12:47:10 +04:00
|
|
|
|
2019-03-11 08:42:52 +03:00
|
|
|
MemoryRegionSection now = remain;
|
2013-05-27 12:08:27 +04:00
|
|
|
now.size = int128_min(int128_make64(left), now.size);
|
2017-09-21 11:50:59 +03:00
|
|
|
register_subpage(fv, &now);
|
2019-03-11 08:42:52 +03:00
|
|
|
if (int128_eq(remain.size, now.size)) {
|
|
|
|
return;
|
|
|
|
}
|
2013-05-27 12:08:27 +04:00
|
|
|
remain.size = int128_sub(remain.size, now.size);
|
|
|
|
remain.offset_within_address_space += int128_get64(now.size);
|
|
|
|
remain.offset_within_region += int128_get64(now.size);
|
2019-03-11 08:42:52 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/* register whole pages */
|
|
|
|
if (int128_ge(remain.size, page_size)) {
|
|
|
|
MemoryRegionSection now = remain;
|
|
|
|
now.size = int128_and(now.size, int128_neg(page_size));
|
|
|
|
register_multipage(fv, &now);
|
|
|
|
if (int128_eq(remain.size, now.size)) {
|
|
|
|
return;
|
2012-07-26 02:45:04 +04:00
|
|
|
}
|
2019-03-11 08:42:52 +03:00
|
|
|
remain.size = int128_sub(remain.size, now.size);
|
|
|
|
remain.offset_within_address_space += int128_get64(now.size);
|
|
|
|
remain.offset_within_region += int128_get64(now.size);
|
2012-02-13 19:14:32 +04:00
|
|
|
}
|
2019-03-11 08:42:52 +03:00
|
|
|
|
|
|
|
/* register last subpage */
|
|
|
|
register_subpage(fv, &remain);
|
2012-02-13 19:14:32 +04:00
|
|
|
}
|
|
|
|
|
2010-01-26 14:21:16 +03:00
|
|
|
void qemu_flush_coalesced_mmio_buffer(void)
|
|
|
|
{
|
|
|
|
if (kvm_enabled())
|
|
|
|
kvm_flush_coalesced_mmio_buffer();
|
|
|
|
}
|
|
|
|
|
2011-08-17 11:01:33 +04:00
|
|
|
void qemu_mutex_lock_ramlist(void)
|
|
|
|
{
|
|
|
|
qemu_mutex_lock(&ram_list.mutex);
|
|
|
|
}
|
|
|
|
|
|
|
|
void qemu_mutex_unlock_ramlist(void)
|
|
|
|
{
|
|
|
|
qemu_mutex_unlock(&ram_list.mutex);
|
|
|
|
}
|
|
|
|
|
2021-09-08 12:35:43 +03:00
|
|
|
GString *ram_block_format(void)
|
2017-05-12 07:17:41 +03:00
|
|
|
{
|
|
|
|
RAMBlock *block;
|
|
|
|
char *psize;
|
2021-09-08 12:35:43 +03:00
|
|
|
GString *buf = g_string_new("");
|
2017-05-12 07:17:41 +03:00
|
|
|
|
2019-10-07 17:36:41 +03:00
|
|
|
RCU_READ_LOCK_GUARD();
|
2022-12-05 15:07:12 +03:00
|
|
|
g_string_append_printf(buf, "%24s %8s %18s %18s %18s %18s %3s\n",
|
|
|
|
"Block Name", "PSize", "Offset", "Used", "Total",
|
|
|
|
"HVA", "RO");
|
|
|
|
|
2017-05-12 07:17:41 +03:00
|
|
|
RAMBLOCK_FOREACH(block) {
|
|
|
|
psize = size_to_str(block->page_size);
|
2021-09-08 12:35:43 +03:00
|
|
|
g_string_append_printf(buf, "%24s %8s 0x%016" PRIx64 " 0x%016" PRIx64
|
2022-12-05 15:07:12 +03:00
|
|
|
" 0x%016" PRIx64 " 0x%016" PRIx64 " %3s\n",
|
|
|
|
block->idstr, psize,
|
2021-09-08 12:35:43 +03:00
|
|
|
(uint64_t)block->offset,
|
|
|
|
(uint64_t)block->used_length,
|
2022-12-05 15:07:12 +03:00
|
|
|
(uint64_t)block->max_length,
|
|
|
|
(uint64_t)(uintptr_t)block->host,
|
|
|
|
block->mr->readonly ? "ro" : "rw");
|
|
|
|
|
2017-05-12 07:17:41 +03:00
|
|
|
g_free(psize);
|
|
|
|
}
|
2021-09-08 12:35:43 +03:00
|
|
|
|
|
|
|
return buf;
|
2017-05-12 07:17:41 +03:00
|
|
|
}
|
|
|
|
|
2019-04-17 14:31:43 +03:00
|
|
|
static int find_min_backend_pagesize(Object *obj, void *opaque)
|
2017-03-02 05:36:11 +03:00
|
|
|
{
|
|
|
|
long *hpsize_min = opaque;
|
|
|
|
|
|
|
|
if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
|
2019-03-26 06:33:33 +03:00
|
|
|
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
|
|
|
|
long hpsize = host_memory_backend_pagesize(backend);
|
2018-04-03 08:05:45 +03:00
|
|
|
|
2019-03-26 06:33:33 +03:00
|
|
|
if (host_memory_backend_is_mapped(backend) && (hpsize < *hpsize_min)) {
|
2018-04-03 07:55:11 +03:00
|
|
|
*hpsize_min = hpsize;
|
2017-03-02 05:36:11 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-04-17 14:31:43 +03:00
|
|
|
static int find_max_backend_pagesize(Object *obj, void *opaque)
|
|
|
|
{
|
|
|
|
long *hpsize_max = opaque;
|
|
|
|
|
|
|
|
if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
|
|
|
|
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
|
|
|
|
long hpsize = host_memory_backend_pagesize(backend);
|
|
|
|
|
|
|
|
if (host_memory_backend_is_mapped(backend) && (hpsize > *hpsize_max)) {
|
|
|
|
*hpsize_max = hpsize;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* TODO: We assume right now that all mapped host memory backends are
|
|
|
|
* used as RAM, however some might be used for different purposes.
|
|
|
|
*/
|
|
|
|
long qemu_minrampagesize(void)
|
2017-03-02 05:36:11 +03:00
|
|
|
{
|
|
|
|
long hpsize = LONG_MAX;
|
2020-02-19 19:09:47 +03:00
|
|
|
Object *memdev_root = object_resolve_path("/objects", NULL);
|
2017-03-02 05:36:11 +03:00
|
|
|
|
2020-02-19 19:09:47 +03:00
|
|
|
object_child_foreach(memdev_root, find_min_backend_pagesize, &hpsize);
|
2017-03-02 05:36:11 +03:00
|
|
|
return hpsize;
|
|
|
|
}
|
2019-04-17 14:31:43 +03:00
|
|
|
|
|
|
|
long qemu_maxrampagesize(void)
|
|
|
|
{
|
2020-02-19 19:09:47 +03:00
|
|
|
long pagesize = 0;
|
2019-04-17 14:31:43 +03:00
|
|
|
Object *memdev_root = object_resolve_path("/objects", NULL);
|
|
|
|
|
2020-02-19 19:09:47 +03:00
|
|
|
object_child_foreach(memdev_root, find_max_backend_pagesize, &pagesize);
|
2019-04-17 14:31:43 +03:00
|
|
|
return pagesize;
|
|
|
|
}
|
2017-03-02 05:36:11 +03:00
|
|
|
|
2018-09-24 15:32:05 +03:00
|
|
|
#ifdef CONFIG_POSIX
|
2016-10-27 07:22:58 +03:00
|
|
|
static int64_t get_file_size(int fd)
|
|
|
|
{
|
2019-08-30 12:30:56 +03:00
|
|
|
int64_t size;
|
|
|
|
#if defined(__linux__)
|
|
|
|
struct stat st;
|
|
|
|
|
|
|
|
if (fstat(fd, &st) < 0) {
|
|
|
|
return -errno;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Special handling for devdax character devices */
|
|
|
|
if (S_ISCHR(st.st_mode)) {
|
|
|
|
g_autofree char *subsystem_path = NULL;
|
|
|
|
g_autofree char *subsystem = NULL;
|
|
|
|
|
|
|
|
subsystem_path = g_strdup_printf("/sys/dev/char/%d:%d/subsystem",
|
|
|
|
major(st.st_rdev), minor(st.st_rdev));
|
|
|
|
subsystem = g_file_read_link(subsystem_path, NULL);
|
|
|
|
|
|
|
|
if (subsystem && g_str_has_suffix(subsystem, "/dax")) {
|
|
|
|
g_autofree char *size_path = NULL;
|
|
|
|
g_autofree char *size_str = NULL;
|
|
|
|
|
|
|
|
size_path = g_strdup_printf("/sys/dev/char/%d:%d/size",
|
|
|
|
major(st.st_rdev), minor(st.st_rdev));
|
|
|
|
|
|
|
|
if (g_file_get_contents(size_path, &size_str, NULL, NULL)) {
|
|
|
|
return g_ascii_strtoll(size_str, NULL, 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif /* defined(__linux__) */
|
|
|
|
|
|
|
|
/* st.st_size may be zero for special files yet lseek(2) works */
|
|
|
|
size = lseek(fd, 0, SEEK_END);
|
2016-10-27 07:22:58 +03:00
|
|
|
if (size < 0) {
|
|
|
|
return -errno;
|
|
|
|
}
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
2020-04-29 11:50:09 +03:00
|
|
|
static int64_t get_file_align(int fd)
|
|
|
|
{
|
|
|
|
int64_t align = -1;
|
|
|
|
#if defined(__linux__) && defined(CONFIG_LIBDAXCTL)
|
|
|
|
struct stat st;
|
|
|
|
|
|
|
|
if (fstat(fd, &st) < 0) {
|
|
|
|
return -errno;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Special handling for devdax character devices */
|
|
|
|
if (S_ISCHR(st.st_mode)) {
|
|
|
|
g_autofree char *path = NULL;
|
|
|
|
g_autofree char *rpath = NULL;
|
|
|
|
struct daxctl_ctx *ctx;
|
|
|
|
struct daxctl_region *region;
|
|
|
|
int rc = 0;
|
|
|
|
|
|
|
|
path = g_strdup_printf("/sys/dev/char/%d:%d",
|
|
|
|
major(st.st_rdev), minor(st.st_rdev));
|
|
|
|
rpath = realpath(path, NULL);
|
2021-08-12 18:15:25 +03:00
|
|
|
if (!rpath) {
|
|
|
|
return -errno;
|
|
|
|
}
|
2020-04-29 11:50:09 +03:00
|
|
|
|
|
|
|
rc = daxctl_new(&ctx);
|
|
|
|
if (rc) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
daxctl_region_foreach(ctx, region) {
|
|
|
|
if (strstr(rpath, daxctl_region_get_path(region))) {
|
|
|
|
align = daxctl_region_get_align(region);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
daxctl_unref(ctx);
|
|
|
|
}
|
|
|
|
#endif /* defined(__linux__) && defined(CONFIG_LIBDAXCTL) */
|
|
|
|
|
|
|
|
return align;
|
|
|
|
}
|
|
|
|
|
2017-06-02 17:12:22 +03:00
|
|
|
static int file_ram_open(const char *path,
|
|
|
|
const char *region_name,
|
2021-01-04 20:13:18 +03:00
|
|
|
bool readonly,
|
softmmu/physmem: Fail creation of new files in file_ram_open() with readonly=true
Currently, if a file does not exist yet, file_ram_open() will create new
empty file and open it writable. However, it even does that when
readonly=true was specified.
Specifying O_RDONLY instead to create a new readonly file would
theoretically work, however, ftruncate() will refuse to resize the new
empty file and we'll get a warning:
ftruncate: Invalid argument
And later eventually more problems when actually mmap'ing that file and
accessing it.
If someone intends to let QEMU open+mmap a file read-only, better
create+resize+fill that file ahead of time outside of QEMU context.
We'll now fail with:
./qemu-system-x86_64 \
-object memory-backend-file,id=ram0,mem-path=tmp,readonly=true,size=1g
qemu-system-x86_64: can't open backing store tmp for guest RAM: No such file or directory
All use cases of readonly files (R/O NVDIMMs, VM templating) work on
existing files, so silently creating new files might just hide user
errors when accidentally specifying a non-existent file.
Note that the only memory-backend-file will end up calling
memory_region_init_ram_from_file() -> qemu_ram_alloc_from_file() ->
file_ram_open().
Move error reporting to the single caller.
Message-ID: <20230906120503.359863-7-david@redhat.com>
Acked-by: Peter Xu <peterx@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
2023-09-06 15:04:58 +03:00
|
|
|
bool *created)
|
2010-03-02 02:25:08 +03:00
|
|
|
{
|
|
|
|
char *filename;
|
2013-03-04 22:54:25 +04:00
|
|
|
char *sanitized_name;
|
|
|
|
char *c;
|
2016-03-17 17:53:13 +03:00
|
|
|
int fd = -1;
|
2010-03-02 02:25:08 +03:00
|
|
|
|
2017-06-02 17:12:22 +03:00
|
|
|
*created = false;
|
exec: Fix memory allocation when memory path names new file
Commit 8d31d6b extended file_ram_alloc() to accept file names in
addition to directory names. Even though it passes O_CREAT to open(),
it actually works only for existing files. Reproducer adapted from
the commit's qemu-doc.texi update:
$ qemu-system-x86_64 -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1
qemu-system-x86_64: -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1: failed to get page size of file /dev/hugepages/my-shmem-file: No such file or directory
This is because we first get the page size for @path, then open the
actual file. Unwise even before the flawed commit, because the
directory could change in between, invalidating the page size.
Unlikely to bite in practice.
Rearrange the code to create the file (if necessary) before getting
its page size. Carefully avoid TOCTTOU conditions with a method
suggested by Paolo Bonzini.
While there, replace "hugepages" by "guest RAM" in error messages,
because host memory backends can be used for purposes other than huge
pages, e.g. /dev/shm/ shared memory. Help text of -mem-path agrees.
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1457378754-21649-2-git-send-email-armbru@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 22:25:13 +03:00
|
|
|
for (;;) {
|
2021-01-04 20:13:18 +03:00
|
|
|
fd = open(path, readonly ? O_RDONLY : O_RDWR);
|
exec: Fix memory allocation when memory path names new file
Commit 8d31d6b extended file_ram_alloc() to accept file names in
addition to directory names. Even though it passes O_CREAT to open(),
it actually works only for existing files. Reproducer adapted from
the commit's qemu-doc.texi update:
$ qemu-system-x86_64 -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1
qemu-system-x86_64: -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1: failed to get page size of file /dev/hugepages/my-shmem-file: No such file or directory
This is because we first get the page size for @path, then open the
actual file. Unwise even before the flawed commit, because the
directory could change in between, invalidating the page size.
Unlikely to bite in practice.
Rearrange the code to create the file (if necessary) before getting
its page size. Carefully avoid TOCTTOU conditions with a method
suggested by Paolo Bonzini.
While there, replace "hugepages" by "guest RAM" in error messages,
because host memory backends can be used for purposes other than huge
pages, e.g. /dev/shm/ shared memory. Help text of -mem-path agrees.
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1457378754-21649-2-git-send-email-armbru@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 22:25:13 +03:00
|
|
|
if (fd >= 0) {
|
softmmu/physmem: Never return directories from file_ram_open()
open() does not fail on directories when opening them readonly (O_RDONLY).
Currently, we succeed opening such directories and fail later during
mmap(), resulting in a misleading error message.
$ ./qemu-system-x86_64 \
-object memory-backend-file,id=ram0,mem-path=tmp,readonly=true,size=1g
qemu-system-x86_64: unable to map backing store for guest RAM: No such device
To identify directories and handle them accordingly in file_ram_open()
also when readonly=true was specified, detect if we just opened a directory
using fstat() instead. Then, fail file_ram_open() right away, similarly
to how we now fail if the file does not exist and we want to open the
file readonly.
With this change, we get a nicer error message:
qemu-system-x86_64: can't open backing store tmp for guest RAM: Is a directory
Note that the only memory-backend-file will end up calling
memory_region_init_ram_from_file() -> qemu_ram_alloc_from_file() ->
file_ram_open().
Message-ID: <20230906120503.359863-8-david@redhat.com>
Reported-by: Thiner Logoer <logoerthiner1@163.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Tested-by: Mario Casquero <mcasquer@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
2023-09-06 15:04:59 +03:00
|
|
|
/*
|
|
|
|
* open(O_RDONLY) won't fail with EISDIR. Check manually if we
|
|
|
|
* opened a directory and fail similarly to how we fail ENOENT
|
|
|
|
* in readonly mode. Note that mkstemp() would imply O_RDWR.
|
|
|
|
*/
|
|
|
|
if (readonly) {
|
|
|
|
struct stat file_stat;
|
|
|
|
|
|
|
|
if (fstat(fd, &file_stat)) {
|
|
|
|
close(fd);
|
|
|
|
if (errno == EINTR) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
return -errno;
|
|
|
|
} else if (S_ISDIR(file_stat.st_mode)) {
|
|
|
|
close(fd);
|
|
|
|
return -EISDIR;
|
|
|
|
}
|
|
|
|
}
|
exec: Fix memory allocation when memory path names new file
Commit 8d31d6b extended file_ram_alloc() to accept file names in
addition to directory names. Even though it passes O_CREAT to open(),
it actually works only for existing files. Reproducer adapted from
the commit's qemu-doc.texi update:
$ qemu-system-x86_64 -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1
qemu-system-x86_64: -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1: failed to get page size of file /dev/hugepages/my-shmem-file: No such file or directory
This is because we first get the page size for @path, then open the
actual file. Unwise even before the flawed commit, because the
directory could change in between, invalidating the page size.
Unlikely to bite in practice.
Rearrange the code to create the file (if necessary) before getting
its page size. Carefully avoid TOCTTOU conditions with a method
suggested by Paolo Bonzini.
While there, replace "hugepages" by "guest RAM" in error messages,
because host memory backends can be used for purposes other than huge
pages, e.g. /dev/shm/ shared memory. Help text of -mem-path agrees.
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1457378754-21649-2-git-send-email-armbru@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 22:25:13 +03:00
|
|
|
/* @path names an existing file, use it */
|
|
|
|
break;
|
2015-10-28 12:54:07 +03:00
|
|
|
}
|
exec: Fix memory allocation when memory path names new file
Commit 8d31d6b extended file_ram_alloc() to accept file names in
addition to directory names. Even though it passes O_CREAT to open(),
it actually works only for existing files. Reproducer adapted from
the commit's qemu-doc.texi update:
$ qemu-system-x86_64 -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1
qemu-system-x86_64: -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1: failed to get page size of file /dev/hugepages/my-shmem-file: No such file or directory
This is because we first get the page size for @path, then open the
actual file. Unwise even before the flawed commit, because the
directory could change in between, invalidating the page size.
Unlikely to bite in practice.
Rearrange the code to create the file (if necessary) before getting
its page size. Carefully avoid TOCTTOU conditions with a method
suggested by Paolo Bonzini.
While there, replace "hugepages" by "guest RAM" in error messages,
because host memory backends can be used for purposes other than huge
pages, e.g. /dev/shm/ shared memory. Help text of -mem-path agrees.
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1457378754-21649-2-git-send-email-armbru@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 22:25:13 +03:00
|
|
|
if (errno == ENOENT) {
|
softmmu/physmem: Fail creation of new files in file_ram_open() with readonly=true
Currently, if a file does not exist yet, file_ram_open() will create new
empty file and open it writable. However, it even does that when
readonly=true was specified.
Specifying O_RDONLY instead to create a new readonly file would
theoretically work, however, ftruncate() will refuse to resize the new
empty file and we'll get a warning:
ftruncate: Invalid argument
And later eventually more problems when actually mmap'ing that file and
accessing it.
If someone intends to let QEMU open+mmap a file read-only, better
create+resize+fill that file ahead of time outside of QEMU context.
We'll now fail with:
./qemu-system-x86_64 \
-object memory-backend-file,id=ram0,mem-path=tmp,readonly=true,size=1g
qemu-system-x86_64: can't open backing store tmp for guest RAM: No such file or directory
All use cases of readonly files (R/O NVDIMMs, VM templating) work on
existing files, so silently creating new files might just hide user
errors when accidentally specifying a non-existent file.
Note that the only memory-backend-file will end up calling
memory_region_init_ram_from_file() -> qemu_ram_alloc_from_file() ->
file_ram_open().
Move error reporting to the single caller.
Message-ID: <20230906120503.359863-7-david@redhat.com>
Acked-by: Peter Xu <peterx@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
2023-09-06 15:04:58 +03:00
|
|
|
if (readonly) {
|
|
|
|
/* Refuse to create new, readonly files. */
|
|
|
|
return -ENOENT;
|
|
|
|
}
|
exec: Fix memory allocation when memory path names new file
Commit 8d31d6b extended file_ram_alloc() to accept file names in
addition to directory names. Even though it passes O_CREAT to open(),
it actually works only for existing files. Reproducer adapted from
the commit's qemu-doc.texi update:
$ qemu-system-x86_64 -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1
qemu-system-x86_64: -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1: failed to get page size of file /dev/hugepages/my-shmem-file: No such file or directory
This is because we first get the page size for @path, then open the
actual file. Unwise even before the flawed commit, because the
directory could change in between, invalidating the page size.
Unlikely to bite in practice.
Rearrange the code to create the file (if necessary) before getting
its page size. Carefully avoid TOCTTOU conditions with a method
suggested by Paolo Bonzini.
While there, replace "hugepages" by "guest RAM" in error messages,
because host memory backends can be used for purposes other than huge
pages, e.g. /dev/shm/ shared memory. Help text of -mem-path agrees.
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1457378754-21649-2-git-send-email-armbru@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 22:25:13 +03:00
|
|
|
/* @path names a file that doesn't exist, create it */
|
|
|
|
fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
|
|
|
|
if (fd >= 0) {
|
2017-06-02 17:12:22 +03:00
|
|
|
*created = true;
|
exec: Fix memory allocation when memory path names new file
Commit 8d31d6b extended file_ram_alloc() to accept file names in
addition to directory names. Even though it passes O_CREAT to open(),
it actually works only for existing files. Reproducer adapted from
the commit's qemu-doc.texi update:
$ qemu-system-x86_64 -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1
qemu-system-x86_64: -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1: failed to get page size of file /dev/hugepages/my-shmem-file: No such file or directory
This is because we first get the page size for @path, then open the
actual file. Unwise even before the flawed commit, because the
directory could change in between, invalidating the page size.
Unlikely to bite in practice.
Rearrange the code to create the file (if necessary) before getting
its page size. Carefully avoid TOCTTOU conditions with a method
suggested by Paolo Bonzini.
While there, replace "hugepages" by "guest RAM" in error messages,
because host memory backends can be used for purposes other than huge
pages, e.g. /dev/shm/ shared memory. Help text of -mem-path agrees.
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1457378754-21649-2-git-send-email-armbru@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 22:25:13 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
} else if (errno == EISDIR) {
|
|
|
|
/* @path names a directory, create a file there */
|
|
|
|
/* Make name safe to use with mkstemp by replacing '/' with '_'. */
|
2017-06-02 17:12:22 +03:00
|
|
|
sanitized_name = g_strdup(region_name);
|
exec: Fix memory allocation when memory path names new file
Commit 8d31d6b extended file_ram_alloc() to accept file names in
addition to directory names. Even though it passes O_CREAT to open(),
it actually works only for existing files. Reproducer adapted from
the commit's qemu-doc.texi update:
$ qemu-system-x86_64 -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1
qemu-system-x86_64: -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1: failed to get page size of file /dev/hugepages/my-shmem-file: No such file or directory
This is because we first get the page size for @path, then open the
actual file. Unwise even before the flawed commit, because the
directory could change in between, invalidating the page size.
Unlikely to bite in practice.
Rearrange the code to create the file (if necessary) before getting
its page size. Carefully avoid TOCTTOU conditions with a method
suggested by Paolo Bonzini.
While there, replace "hugepages" by "guest RAM" in error messages,
because host memory backends can be used for purposes other than huge
pages, e.g. /dev/shm/ shared memory. Help text of -mem-path agrees.
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1457378754-21649-2-git-send-email-armbru@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 22:25:13 +03:00
|
|
|
for (c = sanitized_name; *c != '\0'; c++) {
|
|
|
|
if (*c == '/') {
|
|
|
|
*c = '_';
|
|
|
|
}
|
|
|
|
}
|
2013-03-04 22:54:25 +04:00
|
|
|
|
exec: Fix memory allocation when memory path names new file
Commit 8d31d6b extended file_ram_alloc() to accept file names in
addition to directory names. Even though it passes O_CREAT to open(),
it actually works only for existing files. Reproducer adapted from
the commit's qemu-doc.texi update:
$ qemu-system-x86_64 -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1
qemu-system-x86_64: -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1: failed to get page size of file /dev/hugepages/my-shmem-file: No such file or directory
This is because we first get the page size for @path, then open the
actual file. Unwise even before the flawed commit, because the
directory could change in between, invalidating the page size.
Unlikely to bite in practice.
Rearrange the code to create the file (if necessary) before getting
its page size. Carefully avoid TOCTTOU conditions with a method
suggested by Paolo Bonzini.
While there, replace "hugepages" by "guest RAM" in error messages,
because host memory backends can be used for purposes other than huge
pages, e.g. /dev/shm/ shared memory. Help text of -mem-path agrees.
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1457378754-21649-2-git-send-email-armbru@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 22:25:13 +03:00
|
|
|
filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
|
|
|
|
sanitized_name);
|
|
|
|
g_free(sanitized_name);
|
2015-10-28 12:54:07 +03:00
|
|
|
|
exec: Fix memory allocation when memory path names new file
Commit 8d31d6b extended file_ram_alloc() to accept file names in
addition to directory names. Even though it passes O_CREAT to open(),
it actually works only for existing files. Reproducer adapted from
the commit's qemu-doc.texi update:
$ qemu-system-x86_64 -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1
qemu-system-x86_64: -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1: failed to get page size of file /dev/hugepages/my-shmem-file: No such file or directory
This is because we first get the page size for @path, then open the
actual file. Unwise even before the flawed commit, because the
directory could change in between, invalidating the page size.
Unlikely to bite in practice.
Rearrange the code to create the file (if necessary) before getting
its page size. Carefully avoid TOCTTOU conditions with a method
suggested by Paolo Bonzini.
While there, replace "hugepages" by "guest RAM" in error messages,
because host memory backends can be used for purposes other than huge
pages, e.g. /dev/shm/ shared memory. Help text of -mem-path agrees.
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1457378754-21649-2-git-send-email-armbru@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 22:25:13 +03:00
|
|
|
fd = mkstemp(filename);
|
|
|
|
if (fd >= 0) {
|
|
|
|
unlink(filename);
|
|
|
|
g_free(filename);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
g_free(filename);
|
2015-10-28 12:54:07 +03:00
|
|
|
}
|
exec: Fix memory allocation when memory path names new file
Commit 8d31d6b extended file_ram_alloc() to accept file names in
addition to directory names. Even though it passes O_CREAT to open(),
it actually works only for existing files. Reproducer adapted from
the commit's qemu-doc.texi update:
$ qemu-system-x86_64 -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1
qemu-system-x86_64: -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1: failed to get page size of file /dev/hugepages/my-shmem-file: No such file or directory
This is because we first get the page size for @path, then open the
actual file. Unwise even before the flawed commit, because the
directory could change in between, invalidating the page size.
Unlikely to bite in practice.
Rearrange the code to create the file (if necessary) before getting
its page size. Carefully avoid TOCTTOU conditions with a method
suggested by Paolo Bonzini.
While there, replace "hugepages" by "guest RAM" in error messages,
because host memory backends can be used for purposes other than huge
pages, e.g. /dev/shm/ shared memory. Help text of -mem-path agrees.
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1457378754-21649-2-git-send-email-armbru@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 22:25:13 +03:00
|
|
|
if (errno != EEXIST && errno != EINTR) {
|
softmmu/physmem: Fail creation of new files in file_ram_open() with readonly=true
Currently, if a file does not exist yet, file_ram_open() will create new
empty file and open it writable. However, it even does that when
readonly=true was specified.
Specifying O_RDONLY instead to create a new readonly file would
theoretically work, however, ftruncate() will refuse to resize the new
empty file and we'll get a warning:
ftruncate: Invalid argument
And later eventually more problems when actually mmap'ing that file and
accessing it.
If someone intends to let QEMU open+mmap a file read-only, better
create+resize+fill that file ahead of time outside of QEMU context.
We'll now fail with:
./qemu-system-x86_64 \
-object memory-backend-file,id=ram0,mem-path=tmp,readonly=true,size=1g
qemu-system-x86_64: can't open backing store tmp for guest RAM: No such file or directory
All use cases of readonly files (R/O NVDIMMs, VM templating) work on
existing files, so silently creating new files might just hide user
errors when accidentally specifying a non-existent file.
Note that the only memory-backend-file will end up calling
memory_region_init_ram_from_file() -> qemu_ram_alloc_from_file() ->
file_ram_open().
Move error reporting to the single caller.
Message-ID: <20230906120503.359863-7-david@redhat.com>
Acked-by: Peter Xu <peterx@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
2023-09-06 15:04:58 +03:00
|
|
|
return -errno;
|
exec: Fix memory allocation when memory path names new file
Commit 8d31d6b extended file_ram_alloc() to accept file names in
addition to directory names. Even though it passes O_CREAT to open(),
it actually works only for existing files. Reproducer adapted from
the commit's qemu-doc.texi update:
$ qemu-system-x86_64 -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1
qemu-system-x86_64: -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1: failed to get page size of file /dev/hugepages/my-shmem-file: No such file or directory
This is because we first get the page size for @path, then open the
actual file. Unwise even before the flawed commit, because the
directory could change in between, invalidating the page size.
Unlikely to bite in practice.
Rearrange the code to create the file (if necessary) before getting
its page size. Carefully avoid TOCTTOU conditions with a method
suggested by Paolo Bonzini.
While there, replace "hugepages" by "guest RAM" in error messages,
because host memory backends can be used for purposes other than huge
pages, e.g. /dev/shm/ shared memory. Help text of -mem-path agrees.
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1457378754-21649-2-git-send-email-armbru@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 22:25:13 +03:00
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Try again on EINTR and EEXIST. The latter happens when
|
|
|
|
* something else creates the file between our two open().
|
|
|
|
*/
|
2015-10-28 12:54:07 +03:00
|
|
|
}
|
2010-03-02 02:25:08 +03:00
|
|
|
|
2017-06-02 17:12:22 +03:00
|
|
|
return fd;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *file_ram_alloc(RAMBlock *block,
|
|
|
|
ram_addr_t memory,
|
|
|
|
int fd,
|
|
|
|
bool truncate,
|
2021-01-29 19:46:04 +03:00
|
|
|
off_t offset,
|
2017-06-02 17:12:22 +03:00
|
|
|
Error **errp)
|
|
|
|
{
|
2021-05-10 14:43:20 +03:00
|
|
|
uint32_t qemu_map_flags;
|
2017-06-02 17:12:22 +03:00
|
|
|
void *area;
|
|
|
|
|
2016-09-29 22:09:37 +03:00
|
|
|
block->page_size = qemu_fd_getpagesize(fd);
|
hostmem-file: add "align" option
When mmap(2) the backend files, QEMU uses the host page size
(getpagesize(2)) by default as the alignment of mapping address.
However, some backends may require alignments different than the page
size. For example, mmap a device DAX (e.g., /dev/dax0.0) on Linux
kernel 4.13 to an address, which is 4K-aligned but not 2M-aligned,
fails with a kernel message like
[617494.969768] dax dax0.0: qemu-system-x86: dax_mmap: fail, unaligned vma (0x7fa37c579000 - 0x7fa43c579000, 0x1fffff)
Because there is no common approach to get such alignment requirement,
we add the 'align' option to 'memory-backend-file', so that users or
management utils, which have enough knowledge about the backend, can
specify a proper alignment via this option.
Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
Message-Id: <20171211072806.2812-2-haozhong.zhang@intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
[ehabkost: fixed typo, fixed error_setg() format string]
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-12-11 10:28:04 +03:00
|
|
|
if (block->mr->align % block->page_size) {
|
|
|
|
error_setg(errp, "alignment 0x%" PRIx64
|
|
|
|
" must be multiples of page size 0x%zx",
|
|
|
|
block->mr->align, block->page_size);
|
|
|
|
return NULL;
|
2018-06-07 18:47:05 +03:00
|
|
|
} else if (block->mr->align && !is_power_of_2(block->mr->align)) {
|
|
|
|
error_setg(errp, "alignment 0x%" PRIx64
|
|
|
|
" must be a power of two", block->mr->align);
|
|
|
|
return NULL;
|
2023-04-04 01:14:21 +03:00
|
|
|
} else if (offset % block->page_size) {
|
|
|
|
error_setg(errp, "offset 0x%" PRIx64
|
|
|
|
" must be multiples of page size 0x%zx",
|
|
|
|
offset, block->page_size);
|
|
|
|
return NULL;
|
hostmem-file: add "align" option
When mmap(2) the backend files, QEMU uses the host page size
(getpagesize(2)) by default as the alignment of mapping address.
However, some backends may require alignments different than the page
size. For example, mmap a device DAX (e.g., /dev/dax0.0) on Linux
kernel 4.13 to an address, which is 4K-aligned but not 2M-aligned,
fails with a kernel message like
[617494.969768] dax dax0.0: qemu-system-x86: dax_mmap: fail, unaligned vma (0x7fa37c579000 - 0x7fa43c579000, 0x1fffff)
Because there is no common approach to get such alignment requirement,
we add the 'align' option to 'memory-backend-file', so that users or
management utils, which have enough knowledge about the backend, can
specify a proper alignment via this option.
Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
Message-Id: <20171211072806.2812-2-haozhong.zhang@intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
[ehabkost: fixed typo, fixed error_setg() format string]
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-12-11 10:28:04 +03:00
|
|
|
}
|
|
|
|
block->mr->align = MAX(block->page_size, block->mr->align);
|
exec.c: workaround regression caused by alignment change in d2f39ad
Commit d2f39ad "exec.c: Ensure right alignment also for file backed ram"
added an additional alignment requirement on the size of backend file
besides the previous page size. On x86, the alignment is changed from
4KB in QEMU 2.6 to 2MB in QEMU 2.7.
This change breaks certain usages in QEMU 2.7 on x86, e.g.
-object memory-backend-file,id=mem1,mem-path=/tmp/,size=$SZ
-device pc-dimm,id=dimm1,memdev=mem1
where $SZ is multiple of 4KB but not 2MB (e.g. 1023M). QEMU 2.7
reports the following error message and aborts:
qemu-system-x86_64: -device pc-dimm,memdev=mem1,id=nv1: backend memory size must be multiple of 0x200000
The same regression may also happen in other platforms as indicated by
Igor Mammedov. This change is however necessary for s390 according to
the commit message of d2f39ad, so we workaround the regression by taking
the change only on s390.
Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
Reported-by: "Xu, Anthony" <anthony.xu@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-10-24 15:49:37 +03:00
|
|
|
#if defined(__s390x__)
|
|
|
|
if (kvm_enabled()) {
|
|
|
|
block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
|
|
|
|
}
|
|
|
|
#endif
|
exec: Fix memory allocation when memory path names new file
Commit 8d31d6b extended file_ram_alloc() to accept file names in
addition to directory names. Even though it passes O_CREAT to open(),
it actually works only for existing files. Reproducer adapted from
the commit's qemu-doc.texi update:
$ qemu-system-x86_64 -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1
qemu-system-x86_64: -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1: failed to get page size of file /dev/hugepages/my-shmem-file: No such file or directory
This is because we first get the page size for @path, then open the
actual file. Unwise even before the flawed commit, because the
directory could change in between, invalidating the page size.
Unlikely to bite in practice.
Rearrange the code to create the file (if necessary) before getting
its page size. Carefully avoid TOCTTOU conditions with a method
suggested by Paolo Bonzini.
While there, replace "hugepages" by "guest RAM" in error messages,
because host memory backends can be used for purposes other than huge
pages, e.g. /dev/shm/ shared memory. Help text of -mem-path agrees.
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1457378754-21649-2-git-send-email-armbru@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 22:25:13 +03:00
|
|
|
|
2016-09-29 22:09:37 +03:00
|
|
|
if (memory < block->page_size) {
|
exec: Fix memory allocation when memory path names new file
Commit 8d31d6b extended file_ram_alloc() to accept file names in
addition to directory names. Even though it passes O_CREAT to open(),
it actually works only for existing files. Reproducer adapted from
the commit's qemu-doc.texi update:
$ qemu-system-x86_64 -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1
qemu-system-x86_64: -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1: failed to get page size of file /dev/hugepages/my-shmem-file: No such file or directory
This is because we first get the page size for @path, then open the
actual file. Unwise even before the flawed commit, because the
directory could change in between, invalidating the page size.
Unlikely to bite in practice.
Rearrange the code to create the file (if necessary) before getting
its page size. Carefully avoid TOCTTOU conditions with a method
suggested by Paolo Bonzini.
While there, replace "hugepages" by "guest RAM" in error messages,
because host memory backends can be used for purposes other than huge
pages, e.g. /dev/shm/ shared memory. Help text of -mem-path agrees.
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1457378754-21649-2-git-send-email-armbru@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 22:25:13 +03:00
|
|
|
error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
|
2016-09-29 22:09:37 +03:00
|
|
|
"or larger than page size 0x%zx",
|
|
|
|
memory, block->page_size);
|
2017-06-02 17:12:22 +03:00
|
|
|
return NULL;
|
2016-11-02 04:05:51 +03:00
|
|
|
}
|
|
|
|
|
2016-09-29 22:09:37 +03:00
|
|
|
memory = ROUND_UP(memory, block->page_size);
|
2010-03-02 02:25:08 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* ftruncate is not supported by hugetlbfs in older
|
|
|
|
* hosts, so don't bother bailing out on errors.
|
|
|
|
* If anything goes wrong with it under other filesystems,
|
|
|
|
* mmap will fail.
|
2016-10-27 07:22:58 +03:00
|
|
|
*
|
|
|
|
* Do not truncate the non-empty backend file to avoid corrupting
|
|
|
|
* the existing data in the file. Disabling shrinking is not
|
|
|
|
* enough. For example, the current vNVDIMM implementation stores
|
|
|
|
* the guest NVDIMM labels at the end of the backend file. If the
|
|
|
|
* backend file is later extended, QEMU will not be able to find
|
|
|
|
* those labels. Therefore, extending the non-empty backend file
|
|
|
|
* is disabled as well.
|
2010-03-02 02:25:08 +03:00
|
|
|
*/
|
2023-04-04 01:14:21 +03:00
|
|
|
if (truncate && ftruncate(fd, offset + memory)) {
|
2010-08-18 08:30:13 +04:00
|
|
|
perror("ftruncate");
|
2014-05-14 13:43:20 +04:00
|
|
|
}
|
2010-03-02 02:25:08 +03:00
|
|
|
|
2023-09-06 15:04:54 +03:00
|
|
|
qemu_map_flags = (block->flags & RAM_READONLY) ? QEMU_MAP_READONLY : 0;
|
2021-05-10 14:43:20 +03:00
|
|
|
qemu_map_flags |= (block->flags & RAM_SHARED) ? QEMU_MAP_SHARED : 0;
|
|
|
|
qemu_map_flags |= (block->flags & RAM_PMEM) ? QEMU_MAP_SYNC : 0;
|
2021-05-10 14:43:21 +03:00
|
|
|
qemu_map_flags |= (block->flags & RAM_NORESERVE) ? QEMU_MAP_NORESERVE : 0;
|
2021-05-10 14:43:20 +03:00
|
|
|
area = qemu_ram_mmap(fd, memory, block->mr->align, qemu_map_flags, offset);
|
2010-03-02 02:25:08 +03:00
|
|
|
if (area == MAP_FAILED) {
|
2014-05-14 13:43:20 +04:00
|
|
|
error_setg_errno(errp, errno,
|
exec: Fix memory allocation when memory path names new file
Commit 8d31d6b extended file_ram_alloc() to accept file names in
addition to directory names. Even though it passes O_CREAT to open(),
it actually works only for existing files. Reproducer adapted from
the commit's qemu-doc.texi update:
$ qemu-system-x86_64 -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1
qemu-system-x86_64: -object memory-backend-file,size=2M,mem-path=/dev/hugepages/my-shmem-file,id=mb1: failed to get page size of file /dev/hugepages/my-shmem-file: No such file or directory
This is because we first get the page size for @path, then open the
actual file. Unwise even before the flawed commit, because the
directory could change in between, invalidating the page size.
Unlikely to bite in practice.
Rearrange the code to create the file (if necessary) before getting
its page size. Carefully avoid TOCTTOU conditions with a method
suggested by Paolo Bonzini.
While there, replace "hugepages" by "guest RAM" in error messages,
because host memory backends can be used for purposes other than huge
pages, e.g. /dev/shm/ shared memory. Help text of -mem-path agrees.
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1457378754-21649-2-git-send-email-armbru@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-07 22:25:13 +03:00
|
|
|
"unable to map backing store for guest RAM");
|
2017-06-02 17:12:22 +03:00
|
|
|
return NULL;
|
2010-03-02 02:25:08 +03:00
|
|
|
}
|
2013-10-29 00:51:46 +04:00
|
|
|
|
2010-07-02 21:13:17 +04:00
|
|
|
block->fd = fd;
|
2023-04-04 01:14:21 +03:00
|
|
|
block->fd_offset = offset;
|
2010-03-02 02:25:08 +03:00
|
|
|
return area;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2018-01-05 20:01:37 +03:00
|
|
|
/* Allocate space within the ram_addr_t space that governs the
|
|
|
|
* dirty bitmaps.
|
|
|
|
* Called with the ramlist lock held.
|
|
|
|
*/
|
2010-06-25 21:08:38 +04:00
|
|
|
static ram_addr_t find_ram_offset(ram_addr_t size)
|
2010-07-02 21:13:17 +04:00
|
|
|
{
|
|
|
|
RAMBlock *block, *next_block;
|
2011-10-31 18:54:09 +04:00
|
|
|
ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
|
2010-07-02 21:13:17 +04:00
|
|
|
|
2013-03-11 13:20:21 +04:00
|
|
|
assert(size != 0); /* it would hand out same offset multiple times */
|
|
|
|
|
2013-09-05 22:41:35 +04:00
|
|
|
if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
|
2010-07-02 21:13:17 +04:00
|
|
|
return 0;
|
2015-01-21 15:45:24 +03:00
|
|
|
}
|
2010-07-02 21:13:17 +04:00
|
|
|
|
2017-05-12 07:17:39 +03:00
|
|
|
RAMBLOCK_FOREACH(block) {
|
2018-01-05 20:01:37 +03:00
|
|
|
ram_addr_t candidate, next = RAM_ADDR_MAX;
|
2010-07-02 21:13:17 +04:00
|
|
|
|
2018-01-05 20:01:38 +03:00
|
|
|
/* Align blocks to start on a 'long' in the bitmap
|
|
|
|
* which makes the bitmap sync'ing take the fast path.
|
|
|
|
*/
|
2018-01-05 20:01:37 +03:00
|
|
|
candidate = block->offset + block->max_length;
|
2018-01-05 20:01:38 +03:00
|
|
|
candidate = ROUND_UP(candidate, BITS_PER_LONG << TARGET_PAGE_BITS);
|
2010-07-02 21:13:17 +04:00
|
|
|
|
2018-01-05 20:01:37 +03:00
|
|
|
/* Search for the closest following block
|
|
|
|
* and find the gap.
|
|
|
|
*/
|
2017-05-12 07:17:39 +03:00
|
|
|
RAMBLOCK_FOREACH(next_block) {
|
2018-01-05 20:01:37 +03:00
|
|
|
if (next_block->offset >= candidate) {
|
2010-07-02 21:13:17 +04:00
|
|
|
next = MIN(next, next_block->offset);
|
|
|
|
}
|
|
|
|
}
|
2018-01-05 20:01:37 +03:00
|
|
|
|
|
|
|
/* If it fits remember our place and remember the size
|
|
|
|
* of gap, but keep going so that we might find a smaller
|
|
|
|
* gap to fill so avoiding fragmentation.
|
|
|
|
*/
|
|
|
|
if (next - candidate >= size && next - candidate < mingap) {
|
|
|
|
offset = candidate;
|
|
|
|
mingap = next - candidate;
|
2010-07-02 21:13:17 +04:00
|
|
|
}
|
2018-01-05 20:01:37 +03:00
|
|
|
|
|
|
|
trace_find_ram_offset_loop(size, candidate, offset, next, mingap);
|
2010-07-02 21:13:17 +04:00
|
|
|
}
|
2011-10-31 18:54:09 +04:00
|
|
|
|
|
|
|
if (offset == RAM_ADDR_MAX) {
|
|
|
|
fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
|
|
|
|
(uint64_t)size);
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
2018-01-05 20:01:37 +03:00
|
|
|
trace_find_ram_offset(size, offset);
|
|
|
|
|
2010-07-02 21:13:17 +04:00
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
|
2018-06-20 23:27:36 +03:00
|
|
|
static unsigned long last_ram_page(void)
|
2010-06-25 21:08:38 +04:00
|
|
|
{
|
|
|
|
RAMBlock *block;
|
|
|
|
ram_addr_t last = 0;
|
|
|
|
|
2019-10-07 17:36:41 +03:00
|
|
|
RCU_READ_LOCK_GUARD();
|
2017-05-12 07:17:39 +03:00
|
|
|
RAMBLOCK_FOREACH(block) {
|
2014-11-12 15:27:41 +03:00
|
|
|
last = MAX(last, block->offset + block->max_length);
|
2015-01-21 15:45:24 +03:00
|
|
|
}
|
2017-03-21 19:44:30 +03:00
|
|
|
return last >> TARGET_PAGE_BITS;
|
2010-06-25 21:08:38 +04:00
|
|
|
}
|
|
|
|
|
2012-08-02 23:44:16 +04:00
|
|
|
static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
|
2015-02-04 18:43:54 +03:00
|
|
|
if (!machine_dump_guest_core(current_machine)) {
|
2012-08-02 23:44:16 +04:00
|
|
|
ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
|
|
|
|
if (ret) {
|
|
|
|
perror("qemu_madvise");
|
|
|
|
fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
|
|
|
|
"but dump_guest_core=off specified\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-11-05 21:10:32 +03:00
|
|
|
const char *qemu_ram_get_idstr(RAMBlock *rb)
|
|
|
|
{
|
|
|
|
return rb->idstr;
|
|
|
|
}
|
|
|
|
|
2019-02-15 20:45:44 +03:00
|
|
|
void *qemu_ram_get_host_addr(RAMBlock *rb)
|
|
|
|
{
|
|
|
|
return rb->host;
|
|
|
|
}
|
|
|
|
|
|
|
|
ram_addr_t qemu_ram_get_offset(RAMBlock *rb)
|
|
|
|
{
|
|
|
|
return rb->offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
ram_addr_t qemu_ram_get_used_length(RAMBlock *rb)
|
|
|
|
{
|
|
|
|
return rb->used_length;
|
|
|
|
}
|
|
|
|
|
2021-04-29 14:26:59 +03:00
|
|
|
ram_addr_t qemu_ram_get_max_length(RAMBlock *rb)
|
|
|
|
{
|
|
|
|
return rb->max_length;
|
|
|
|
}
|
|
|
|
|
2017-03-07 21:36:36 +03:00
|
|
|
bool qemu_ram_is_shared(RAMBlock *rb)
|
|
|
|
{
|
|
|
|
return rb->flags & RAM_SHARED;
|
|
|
|
}
|
|
|
|
|
2021-05-10 14:43:21 +03:00
|
|
|
bool qemu_ram_is_noreserve(RAMBlock *rb)
|
|
|
|
{
|
|
|
|
return rb->flags & RAM_NORESERVE;
|
|
|
|
}
|
|
|
|
|
2018-03-12 20:20:58 +03:00
|
|
|
/* Note: Only set at the start of postcopy */
|
|
|
|
bool qemu_ram_is_uf_zeroable(RAMBlock *rb)
|
|
|
|
{
|
|
|
|
return rb->flags & RAM_UF_ZEROPAGE;
|
|
|
|
}
|
|
|
|
|
|
|
|
void qemu_ram_set_uf_zeroable(RAMBlock *rb)
|
|
|
|
{
|
|
|
|
rb->flags |= RAM_UF_ZEROPAGE;
|
|
|
|
}
|
|
|
|
|
2018-05-14 09:57:00 +03:00
|
|
|
bool qemu_ram_is_migratable(RAMBlock *rb)
|
|
|
|
{
|
|
|
|
return rb->flags & RAM_MIGRATABLE;
|
|
|
|
}
|
|
|
|
|
|
|
|
void qemu_ram_set_migratable(RAMBlock *rb)
|
|
|
|
{
|
|
|
|
rb->flags |= RAM_MIGRATABLE;
|
|
|
|
}
|
|
|
|
|
|
|
|
void qemu_ram_unset_migratable(RAMBlock *rb)
|
|
|
|
{
|
|
|
|
rb->flags &= ~RAM_MIGRATABLE;
|
|
|
|
}
|
|
|
|
|
2023-06-07 18:18:36 +03:00
|
|
|
bool qemu_ram_is_named_file(RAMBlock *rb)
|
|
|
|
{
|
|
|
|
return rb->flags & RAM_NAMED_FILE;
|
|
|
|
}
|
|
|
|
|
2022-10-13 21:59:05 +03:00
|
|
|
int qemu_ram_get_fd(RAMBlock *rb)
|
|
|
|
{
|
|
|
|
return rb->fd;
|
|
|
|
}
|
|
|
|
|
2024-01-02 18:35:28 +03:00
|
|
|
/* Called with the BQL held. */
|
2016-05-10 05:04:59 +03:00
|
|
|
void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
|
2014-04-02 11:13:26 +04:00
|
|
|
{
|
2016-05-10 05:04:59 +03:00
|
|
|
RAMBlock *block;
|
2014-04-02 11:13:26 +04:00
|
|
|
|
2011-12-20 17:59:12 +04:00
|
|
|
assert(new_block);
|
|
|
|
assert(!new_block->idstr[0]);
|
2010-07-27 04:10:57 +04:00
|
|
|
|
2012-02-03 22:28:43 +04:00
|
|
|
if (dev) {
|
|
|
|
char *id = qdev_get_dev_path(dev);
|
2010-07-27 04:10:57 +04:00
|
|
|
if (id) {
|
|
|
|
snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
|
2011-08-21 07:09:37 +04:00
|
|
|
g_free(id);
|
2010-07-27 04:10:57 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
|
|
|
|
|
2019-10-07 17:36:41 +03:00
|
|
|
RCU_READ_LOCK_GUARD();
|
2017-05-12 07:17:39 +03:00
|
|
|
RAMBLOCK_FOREACH(block) {
|
2016-05-10 05:04:59 +03:00
|
|
|
if (block != new_block &&
|
|
|
|
!strcmp(block->idstr, new_block->idstr)) {
|
2010-07-27 04:10:57 +04:00
|
|
|
fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
|
|
|
|
new_block->idstr);
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
}
|
2011-12-20 17:59:12 +04:00
|
|
|
}
|
|
|
|
|
2024-01-02 18:35:28 +03:00
|
|
|
/* Called with the BQL held. */
|
2016-05-10 05:04:59 +03:00
|
|
|
void qemu_ram_unset_idstr(RAMBlock *block)
|
2014-04-02 11:13:26 +04:00
|
|
|
{
|
2013-09-05 22:41:35 +04:00
|
|
|
/* FIXME: arch_init.c assumes that this is not called throughout
|
|
|
|
* migration. Ignore the problem since hot-unplug during migration
|
|
|
|
* does not work anyway.
|
|
|
|
*/
|
2014-04-02 11:13:26 +04:00
|
|
|
if (block) {
|
|
|
|
memset(block->idstr, 0, sizeof(block->idstr));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-29 22:09:37 +03:00
|
|
|
size_t qemu_ram_pagesize(RAMBlock *rb)
|
|
|
|
{
|
|
|
|
return rb->page_size;
|
|
|
|
}
|
|
|
|
|
2017-02-24 21:28:34 +03:00
|
|
|
/* Returns the largest size of page in use */
|
|
|
|
size_t qemu_ram_pagesize_largest(void)
|
|
|
|
{
|
|
|
|
RAMBlock *block;
|
|
|
|
size_t largest = 0;
|
|
|
|
|
2017-05-12 07:17:39 +03:00
|
|
|
RAMBLOCK_FOREACH(block) {
|
2017-02-24 21:28:34 +03:00
|
|
|
largest = MAX(largest, qemu_ram_pagesize(block));
|
|
|
|
}
|
|
|
|
|
|
|
|
return largest;
|
|
|
|
}
|
|
|
|
|
2012-09-05 23:50:16 +04:00
|
|
|
static int memory_try_enable_merging(void *addr, size_t len)
|
|
|
|
{
|
2015-02-04 18:43:55 +03:00
|
|
|
if (!machine_mem_merge(current_machine)) {
|
2012-09-05 23:50:16 +04:00
|
|
|
/* disabled by the user */
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
|
|
|
|
}
|
|
|
|
|
2021-04-29 14:27:02 +03:00
|
|
|
/*
|
|
|
|
* Resizing RAM while migrating can result in the migration being canceled.
|
|
|
|
* Care has to be taken if the guest might have already detected the memory.
|
2014-11-12 15:27:41 +03:00
|
|
|
*
|
|
|
|
* As memory core doesn't know how is memory accessed, it is up to
|
|
|
|
* resize callback to update device state and/or add assertions to detect
|
|
|
|
* misuse, if necessary.
|
|
|
|
*/
|
2016-05-10 05:04:59 +03:00
|
|
|
int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
|
2014-11-12 15:27:41 +03:00
|
|
|
{
|
2021-04-29 14:27:00 +03:00
|
|
|
const ram_addr_t oldsize = block->used_length;
|
2020-04-03 13:18:27 +03:00
|
|
|
const ram_addr_t unaligned_size = newsize;
|
|
|
|
|
2014-11-12 15:27:41 +03:00
|
|
|
assert(block);
|
|
|
|
|
2024-01-02 04:57:49 +03:00
|
|
|
newsize = TARGET_PAGE_ALIGN(newsize);
|
|
|
|
newsize = REAL_HOST_PAGE_ALIGN(newsize);
|
2015-02-17 12:15:30 +03:00
|
|
|
|
2014-11-12 15:27:41 +03:00
|
|
|
if (block->used_length == newsize) {
|
2020-04-03 13:18:27 +03:00
|
|
|
/*
|
|
|
|
* We don't have to resize the ram block (which only knows aligned
|
|
|
|
* sizes), however, we have to notify if the unaligned size changed.
|
|
|
|
*/
|
|
|
|
if (unaligned_size != memory_region_size(block->mr)) {
|
|
|
|
memory_region_set_size(block->mr, unaligned_size);
|
|
|
|
if (block->resized) {
|
|
|
|
block->resized(block->idstr, unaligned_size, block->host);
|
|
|
|
}
|
|
|
|
}
|
2014-11-12 15:27:41 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!(block->flags & RAM_RESIZEABLE)) {
|
|
|
|
error_setg_errno(errp, EINVAL,
|
2020-10-22 14:13:02 +03:00
|
|
|
"Size mismatch: %s: 0x" RAM_ADDR_FMT
|
|
|
|
" != 0x" RAM_ADDR_FMT, block->idstr,
|
2014-11-12 15:27:41 +03:00
|
|
|
newsize, block->used_length);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (block->max_length < newsize) {
|
|
|
|
error_setg_errno(errp, EINVAL,
|
2020-10-22 14:13:02 +03:00
|
|
|
"Size too large: %s: 0x" RAM_ADDR_FMT
|
2014-11-12 15:27:41 +03:00
|
|
|
" > 0x" RAM_ADDR_FMT, block->idstr,
|
|
|
|
newsize, block->max_length);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2021-04-29 14:27:00 +03:00
|
|
|
/* Notify before modifying the ram block and touching the bitmaps. */
|
|
|
|
if (block->host) {
|
|
|
|
ram_block_notify_resize(block->host, oldsize, newsize);
|
|
|
|
}
|
|
|
|
|
2014-11-12 15:27:41 +03:00
|
|
|
cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
|
|
|
|
block->used_length = newsize;
|
2015-03-23 13:56:01 +03:00
|
|
|
cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
|
|
|
|
DIRTY_CLIENTS_ALL);
|
2020-04-03 13:18:27 +03:00
|
|
|
memory_region_set_size(block->mr, unaligned_size);
|
2014-11-12 15:27:41 +03:00
|
|
|
if (block->resized) {
|
2020-04-03 13:18:27 +03:00
|
|
|
block->resized(block->idstr, unaligned_size, block->host);
|
2014-11-12 15:27:41 +03:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-11-21 03:08:41 +03:00
|
|
|
/*
|
|
|
|
* Trigger sync on the given ram block for range [start, start + length]
|
|
|
|
* with the backing store if one is available.
|
|
|
|
* Otherwise no-op.
|
|
|
|
* @Note: this is supposed to be a synchronous op.
|
|
|
|
*/
|
2020-05-08 09:24:56 +03:00
|
|
|
void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length)
|
2019-11-21 03:08:41 +03:00
|
|
|
{
|
|
|
|
/* The requested range should fit in within the block range */
|
|
|
|
g_assert((start + length) <= block->used_length);
|
|
|
|
|
|
|
|
#ifdef CONFIG_LIBPMEM
|
|
|
|
/* The lack of support for pmem should not block the sync */
|
|
|
|
if (ramblock_is_pmem(block)) {
|
2019-12-19 18:43:22 +03:00
|
|
|
void *addr = ramblock_ptr(block, start);
|
2019-11-21 03:08:41 +03:00
|
|
|
pmem_persist(addr, length);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
if (block->fd >= 0) {
|
|
|
|
/**
|
|
|
|
* Case there is no support for PMEM or the memory has not been
|
|
|
|
* specified as persistent (or is not one) - use the msync.
|
|
|
|
* Less optimal but still achieves the same goal
|
|
|
|
*/
|
2019-12-19 18:43:22 +03:00
|
|
|
void *addr = ramblock_ptr(block, start);
|
2019-11-21 03:08:41 +03:00
|
|
|
if (qemu_msync(addr, length, block->fd)) {
|
|
|
|
warn_report("%s: failed to sync memory range: start: "
|
|
|
|
RAM_ADDR_FMT " length: " RAM_ADDR_FMT,
|
|
|
|
__func__, start, length);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-01-25 16:33:20 +03:00
|
|
|
/* Called with ram_list.mutex held */
|
|
|
|
static void dirty_memory_extend(ram_addr_t old_ram_size,
|
|
|
|
ram_addr_t new_ram_size)
|
|
|
|
{
|
|
|
|
ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
|
|
|
|
DIRTY_MEMORY_BLOCK_SIZE);
|
|
|
|
ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
|
|
|
|
DIRTY_MEMORY_BLOCK_SIZE);
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/* Only need to extend if block count increased */
|
|
|
|
if (new_num_blocks <= old_num_blocks) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
|
|
|
|
DirtyMemoryBlocks *old_blocks;
|
|
|
|
DirtyMemoryBlocks *new_blocks;
|
|
|
|
int j;
|
|
|
|
|
2020-09-23 13:56:46 +03:00
|
|
|
old_blocks = qatomic_rcu_read(&ram_list.dirty_memory[i]);
|
2016-01-25 16:33:20 +03:00
|
|
|
new_blocks = g_malloc(sizeof(*new_blocks) +
|
|
|
|
sizeof(new_blocks->blocks[0]) * new_num_blocks);
|
|
|
|
|
|
|
|
if (old_num_blocks) {
|
|
|
|
memcpy(new_blocks->blocks, old_blocks->blocks,
|
|
|
|
old_num_blocks * sizeof(old_blocks->blocks[0]));
|
|
|
|
}
|
|
|
|
|
|
|
|
for (j = old_num_blocks; j < new_num_blocks; j++) {
|
|
|
|
new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
|
|
|
|
}
|
|
|
|
|
2020-09-23 13:56:46 +03:00
|
|
|
qatomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
|
2016-01-25 16:33:20 +03:00
|
|
|
|
|
|
|
if (old_blocks) {
|
|
|
|
g_free_rcu(old_blocks, rcu);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-06 11:01:24 +03:00
|
|
|
static void ram_block_add(RAMBlock *new_block, Error **errp)
|
2011-12-20 17:59:12 +04:00
|
|
|
{
|
2021-05-10 14:43:21 +03:00
|
|
|
const bool noreserve = qemu_ram_is_noreserve(new_block);
|
2021-04-06 11:01:24 +03:00
|
|
|
const bool shared = qemu_ram_is_shared(new_block);
|
2014-05-14 13:43:18 +04:00
|
|
|
RAMBlock *block;
|
2015-01-21 15:45:24 +03:00
|
|
|
RAMBlock *last_block = NULL;
|
2024-03-20 11:39:02 +03:00
|
|
|
bool free_on_error = false;
|
2013-10-08 15:52:02 +04:00
|
|
|
ram_addr_t old_ram_size, new_ram_size;
|
2016-01-14 18:09:39 +03:00
|
|
|
Error *err = NULL;
|
2013-10-08 15:52:02 +04:00
|
|
|
|
2017-03-21 19:44:30 +03:00
|
|
|
old_ram_size = last_ram_page();
|
2011-12-20 17:59:12 +04:00
|
|
|
|
2011-08-17 11:01:33 +04:00
|
|
|
qemu_mutex_lock_ramlist();
|
2014-12-15 23:55:32 +03:00
|
|
|
new_block->offset = find_ram_offset(new_block->max_length);
|
2014-05-14 13:43:18 +04:00
|
|
|
|
|
|
|
if (!new_block->host) {
|
|
|
|
if (xen_enabled()) {
|
2014-12-15 23:55:32 +03:00
|
|
|
xen_ram_alloc(new_block->offset, new_block->max_length,
|
2016-01-14 18:09:39 +03:00
|
|
|
new_block->mr, &err);
|
|
|
|
if (err) {
|
|
|
|
error_propagate(errp, err);
|
|
|
|
qemu_mutex_unlock_ramlist();
|
2016-03-09 20:14:01 +03:00
|
|
|
return;
|
2016-01-14 18:09:39 +03:00
|
|
|
}
|
2014-05-14 13:43:18 +04:00
|
|
|
} else {
|
2021-03-03 16:09:16 +03:00
|
|
|
new_block->host = qemu_anon_ram_alloc(new_block->max_length,
|
|
|
|
&new_block->mr->align,
|
2021-05-10 14:43:21 +03:00
|
|
|
shared, noreserve);
|
2013-07-31 17:11:11 +04:00
|
|
|
if (!new_block->host) {
|
2014-09-09 09:27:54 +04:00
|
|
|
error_setg_errno(errp, errno,
|
|
|
|
"cannot set up guest memory '%s'",
|
|
|
|
memory_region_name(new_block->mr));
|
|
|
|
qemu_mutex_unlock_ramlist();
|
2016-03-09 20:14:01 +03:00
|
|
|
return;
|
2013-07-31 17:11:11 +04:00
|
|
|
}
|
2014-12-15 23:55:32 +03:00
|
|
|
memory_try_enable_merging(new_block->host, new_block->max_length);
|
2024-03-20 11:39:02 +03:00
|
|
|
free_on_error = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (new_block->flags & RAM_GUEST_MEMFD) {
|
|
|
|
assert(kvm_enabled());
|
|
|
|
assert(new_block->guest_memfd < 0);
|
|
|
|
|
2024-03-20 19:45:29 +03:00
|
|
|
if (ram_block_discard_require(true) < 0) {
|
|
|
|
error_setg_errno(errp, errno,
|
|
|
|
"cannot set up private guest memory: discard currently blocked");
|
|
|
|
error_append_hint(errp, "Are you using assigned devices?\n");
|
|
|
|
goto out_free;
|
|
|
|
}
|
|
|
|
|
2024-03-20 11:39:02 +03:00
|
|
|
new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length,
|
|
|
|
0, errp);
|
|
|
|
if (new_block->guest_memfd < 0) {
|
|
|
|
qemu_mutex_unlock_ramlist();
|
|
|
|
goto out_free;
|
2010-08-18 10:41:49 +04:00
|
|
|
}
|
2010-03-02 02:25:08 +03:00
|
|
|
}
|
2009-04-11 21:15:54 +04:00
|
|
|
|
2015-07-02 15:18:06 +03:00
|
|
|
new_ram_size = MAX(old_ram_size,
|
|
|
|
(new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
|
|
|
|
if (new_ram_size > old_ram_size) {
|
2016-01-25 16:33:20 +03:00
|
|
|
dirty_memory_extend(old_ram_size, new_ram_size);
|
2015-07-02 15:18:06 +03:00
|
|
|
}
|
2015-01-21 15:45:24 +03:00
|
|
|
/* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
|
|
|
|
* QLIST (which has an RCU-friendly variant) does not have insertion at
|
|
|
|
* tail, so save the last element in last_block.
|
|
|
|
*/
|
2017-05-12 07:17:39 +03:00
|
|
|
RAMBLOCK_FOREACH(block) {
|
2015-01-21 15:45:24 +03:00
|
|
|
last_block = block;
|
2014-12-15 23:55:32 +03:00
|
|
|
if (block->max_length < new_block->max_length) {
|
2012-11-14 19:00:51 +04:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (block) {
|
2013-09-05 22:41:35 +04:00
|
|
|
QLIST_INSERT_BEFORE_RCU(block, new_block, next);
|
2015-01-21 15:45:24 +03:00
|
|
|
} else if (last_block) {
|
2013-09-05 22:41:35 +04:00
|
|
|
QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
|
2015-01-21 15:45:24 +03:00
|
|
|
} else { /* list is empty */
|
2013-09-05 22:41:35 +04:00
|
|
|
QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
|
2012-11-14 19:00:51 +04:00
|
|
|
}
|
2012-11-14 18:45:02 +04:00
|
|
|
ram_list.mru_block = NULL;
|
2009-04-11 21:15:54 +04:00
|
|
|
|
2013-09-05 22:41:35 +04:00
|
|
|
/* Write list before version */
|
|
|
|
smp_wmb();
|
2011-08-18 22:41:17 +04:00
|
|
|
ram_list.version++;
|
2011-08-17 11:01:33 +04:00
|
|
|
qemu_mutex_unlock_ramlist();
|
2011-08-18 22:41:17 +04:00
|
|
|
|
2014-12-15 23:55:32 +03:00
|
|
|
cpu_physical_memory_set_dirty_range(new_block->offset,
|
2015-03-23 13:56:01 +03:00
|
|
|
new_block->used_length,
|
|
|
|
DIRTY_CLIENTS_ALL);
|
2009-04-11 21:15:54 +04:00
|
|
|
|
2015-01-21 18:18:35 +03:00
|
|
|
if (new_block->host) {
|
|
|
|
qemu_ram_setup_dump(new_block->host, new_block->max_length);
|
|
|
|
qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
|
2020-02-20 07:11:09 +03:00
|
|
|
/*
|
|
|
|
* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU
|
|
|
|
* Configure it unless the machine is a qtest server, in which case
|
|
|
|
* KVM is not used and it may be forked (eg for fuzzing purposes).
|
|
|
|
*/
|
|
|
|
if (!qtest_enabled()) {
|
|
|
|
qemu_madvise(new_block->host, new_block->max_length,
|
|
|
|
QEMU_MADV_DONTFORK);
|
|
|
|
}
|
2021-04-29 14:27:00 +03:00
|
|
|
ram_block_notify_add(new_block->host, new_block->used_length,
|
|
|
|
new_block->max_length);
|
2014-05-14 13:43:18 +04:00
|
|
|
}
|
2024-03-20 11:39:02 +03:00
|
|
|
return;
|
|
|
|
|
|
|
|
out_free:
|
|
|
|
if (free_on_error) {
|
|
|
|
qemu_anon_ram_free(new_block->host, new_block->max_length);
|
|
|
|
new_block->host = NULL;
|
|
|
|
}
|
2009-04-11 21:15:54 +04:00
|
|
|
}
|
2007-02-09 02:08:38 +03:00
|
|
|
|
2018-09-24 15:32:05 +03:00
|
|
|
#ifdef CONFIG_POSIX
|
2017-06-02 17:12:23 +03:00
|
|
|
RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
|
2021-01-29 19:46:04 +03:00
|
|
|
uint32_t ram_flags, int fd, off_t offset,
|
2023-09-06 15:04:54 +03:00
|
|
|
Error **errp)
|
2014-05-14 13:43:18 +04:00
|
|
|
{
|
|
|
|
RAMBlock *new_block;
|
2014-09-09 09:27:54 +04:00
|
|
|
Error *local_err = NULL;
|
2020-04-29 11:50:09 +03:00
|
|
|
int64_t file_size, file_align;
|
2014-05-14 13:43:18 +04:00
|
|
|
|
2018-07-18 10:48:00 +03:00
|
|
|
/* Just support these ram flags by now. */
|
2021-07-19 14:21:04 +03:00
|
|
|
assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE |
|
2023-09-06 15:04:54 +03:00
|
|
|
RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY |
|
2024-03-20 11:39:02 +03:00
|
|
|
RAM_READONLY_FD | RAM_GUEST_MEMFD)) == 0);
|
2018-07-18 10:48:00 +03:00
|
|
|
|
2014-05-14 13:43:18 +04:00
|
|
|
if (xen_enabled()) {
|
2014-05-14 13:43:20 +04:00
|
|
|
error_setg(errp, "-mem-path not supported with Xen");
|
2016-03-01 09:18:18 +03:00
|
|
|
return NULL;
|
2014-05-14 13:43:18 +04:00
|
|
|
}
|
|
|
|
|
2017-06-02 17:12:21 +03:00
|
|
|
if (kvm_enabled() && !kvm_has_sync_mmu()) {
|
|
|
|
error_setg(errp,
|
|
|
|
"host lacks kvm mmu notifiers, -mem-path unsupported");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2024-01-02 04:57:49 +03:00
|
|
|
size = TARGET_PAGE_ALIGN(size);
|
|
|
|
size = REAL_HOST_PAGE_ALIGN(size);
|
|
|
|
|
2017-06-02 17:12:22 +03:00
|
|
|
file_size = get_file_size(fd);
|
2023-04-04 01:14:21 +03:00
|
|
|
if (file_size > offset && file_size < (offset + size)) {
|
2020-02-19 19:09:48 +03:00
|
|
|
error_setg(errp, "backing store size 0x%" PRIx64
|
2017-06-02 17:12:22 +03:00
|
|
|
" does not match 'size' option 0x" RAM_ADDR_FMT,
|
2020-02-19 19:09:48 +03:00
|
|
|
file_size, size);
|
2017-06-02 17:12:22 +03:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2020-04-29 11:50:09 +03:00
|
|
|
file_align = get_file_align(fd);
|
2021-08-12 18:06:24 +03:00
|
|
|
if (file_align > 0 && file_align > mr->align) {
|
2020-04-29 11:50:09 +03:00
|
|
|
error_setg(errp, "backing store align 0x%" PRIx64
|
2020-04-29 11:50:10 +03:00
|
|
|
" is larger than 'align' option 0x%" PRIx64,
|
2020-04-29 11:50:09 +03:00
|
|
|
file_align, mr->align);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2014-05-14 13:43:18 +04:00
|
|
|
new_block = g_malloc0(sizeof(*new_block));
|
|
|
|
new_block->mr = mr;
|
2014-12-15 23:55:32 +03:00
|
|
|
new_block->used_length = size;
|
|
|
|
new_block->max_length = size;
|
2018-07-18 10:47:58 +03:00
|
|
|
new_block->flags = ram_flags;
|
2024-03-20 11:39:02 +03:00
|
|
|
new_block->guest_memfd = -1;
|
2023-09-06 15:04:54 +03:00
|
|
|
new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset,
|
|
|
|
errp);
|
2014-05-14 13:43:20 +04:00
|
|
|
if (!new_block->host) {
|
|
|
|
g_free(new_block);
|
2016-03-01 09:18:18 +03:00
|
|
|
return NULL;
|
2014-05-14 13:43:20 +04:00
|
|
|
}
|
|
|
|
|
2021-04-06 11:01:24 +03:00
|
|
|
ram_block_add(new_block, &local_err);
|
2014-09-09 09:27:54 +04:00
|
|
|
if (local_err) {
|
|
|
|
g_free(new_block);
|
|
|
|
error_propagate(errp, local_err);
|
2016-03-01 09:18:18 +03:00
|
|
|
return NULL;
|
2014-09-09 09:27:54 +04:00
|
|
|
}
|
2016-03-01 09:18:18 +03:00
|
|
|
return new_block;
|
2017-06-02 17:12:23 +03:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
|
2018-07-18 10:47:58 +03:00
|
|
|
uint32_t ram_flags, const char *mem_path,
|
2023-09-06 15:04:54 +03:00
|
|
|
off_t offset, Error **errp)
|
2017-06-02 17:12:23 +03:00
|
|
|
{
|
|
|
|
int fd;
|
|
|
|
bool created;
|
|
|
|
RAMBlock *block;
|
|
|
|
|
2023-09-06 15:04:54 +03:00
|
|
|
fd = file_ram_open(mem_path, memory_region_name(mr),
|
softmmu/physmem: Fail creation of new files in file_ram_open() with readonly=true
Currently, if a file does not exist yet, file_ram_open() will create new
empty file and open it writable. However, it even does that when
readonly=true was specified.
Specifying O_RDONLY instead to create a new readonly file would
theoretically work, however, ftruncate() will refuse to resize the new
empty file and we'll get a warning:
ftruncate: Invalid argument
And later eventually more problems when actually mmap'ing that file and
accessing it.
If someone intends to let QEMU open+mmap a file read-only, better
create+resize+fill that file ahead of time outside of QEMU context.
We'll now fail with:
./qemu-system-x86_64 \
-object memory-backend-file,id=ram0,mem-path=tmp,readonly=true,size=1g
qemu-system-x86_64: can't open backing store tmp for guest RAM: No such file or directory
All use cases of readonly files (R/O NVDIMMs, VM templating) work on
existing files, so silently creating new files might just hide user
errors when accidentally specifying a non-existent file.
Note that the only memory-backend-file will end up calling
memory_region_init_ram_from_file() -> qemu_ram_alloc_from_file() ->
file_ram_open().
Move error reporting to the single caller.
Message-ID: <20230906120503.359863-7-david@redhat.com>
Acked-by: Peter Xu <peterx@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
2023-09-06 15:04:58 +03:00
|
|
|
!!(ram_flags & RAM_READONLY_FD), &created);
|
2017-06-02 17:12:23 +03:00
|
|
|
if (fd < 0) {
|
softmmu/physmem: Fail creation of new files in file_ram_open() with readonly=true
Currently, if a file does not exist yet, file_ram_open() will create new
empty file and open it writable. However, it even does that when
readonly=true was specified.
Specifying O_RDONLY instead to create a new readonly file would
theoretically work, however, ftruncate() will refuse to resize the new
empty file and we'll get a warning:
ftruncate: Invalid argument
And later eventually more problems when actually mmap'ing that file and
accessing it.
If someone intends to let QEMU open+mmap a file read-only, better
create+resize+fill that file ahead of time outside of QEMU context.
We'll now fail with:
./qemu-system-x86_64 \
-object memory-backend-file,id=ram0,mem-path=tmp,readonly=true,size=1g
qemu-system-x86_64: can't open backing store tmp for guest RAM: No such file or directory
All use cases of readonly files (R/O NVDIMMs, VM templating) work on
existing files, so silently creating new files might just hide user
errors when accidentally specifying a non-existent file.
Note that the only memory-backend-file will end up calling
memory_region_init_ram_from_file() -> qemu_ram_alloc_from_file() ->
file_ram_open().
Move error reporting to the single caller.
Message-ID: <20230906120503.359863-7-david@redhat.com>
Acked-by: Peter Xu <peterx@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
2023-09-06 15:04:58 +03:00
|
|
|
error_setg_errno(errp, -fd, "can't open backing store %s for guest RAM",
|
|
|
|
mem_path);
|
softmmu/physmem: Hint that "readonly=on,rom=off" exists when opening file R/W for private mapping fails
It's easy to miss that memory-backend-file with "share=off" (default)
will always try opening the file R/W as default, and fail if we don't
have write permissions to the file.
In that case, the user has to explicit specify "readonly=on,rom=off" to
get usable RAM, for example, for VM templating.
Let's hint that '-object memory-backend-file,readonly=on,rom=off,...'
exists to consume R/O files in a private mapping to create writable RAM,
but only if we have permissions to open the file read-only.
Message-ID: <20230906120503.359863-11-david@redhat.com>
Suggested-by: ThinerLogoer <logoerthiner1@163.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
2023-09-06 15:05:02 +03:00
|
|
|
if (!(ram_flags & RAM_READONLY_FD) && !(ram_flags & RAM_SHARED) &&
|
|
|
|
fd == -EACCES) {
|
|
|
|
/*
|
|
|
|
* If we can open the file R/O (note: will never create a new file)
|
|
|
|
* and we are dealing with a private mapping, there are still ways
|
|
|
|
* to consume such files and get RAM instead of ROM.
|
|
|
|
*/
|
|
|
|
fd = file_ram_open(mem_path, memory_region_name(mr), true,
|
|
|
|
&created);
|
|
|
|
if (fd < 0) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
assert(!created);
|
|
|
|
close(fd);
|
|
|
|
error_append_hint(errp, "Consider opening the backing store"
|
|
|
|
" read-only but still creating writable RAM using"
|
|
|
|
" '-object memory-backend-file,readonly=on,rom=off...'"
|
|
|
|
" (see \"VM templating\" documentation)\n");
|
|
|
|
}
|
2017-06-02 17:12:23 +03:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2023-09-06 15:04:54 +03:00
|
|
|
block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset, errp);
|
2017-06-02 17:12:23 +03:00
|
|
|
if (!block) {
|
|
|
|
if (created) {
|
|
|
|
unlink(mem_path);
|
|
|
|
}
|
|
|
|
close(fd);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return block;
|
2014-05-14 13:43:18 +04:00
|
|
|
}
|
2014-05-14 13:43:19 +04:00
|
|
|
#endif
|
2014-05-14 13:43:18 +04:00
|
|
|
|
2014-11-12 15:27:41 +03:00
|
|
|
static
|
2016-03-01 09:18:18 +03:00
|
|
|
RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
|
|
|
|
void (*resized)(const char*,
|
|
|
|
uint64_t length,
|
|
|
|
void *host),
|
2021-05-10 14:43:19 +03:00
|
|
|
void *host, uint32_t ram_flags,
|
2016-03-01 09:18:18 +03:00
|
|
|
MemoryRegion *mr, Error **errp)
|
2014-05-14 13:43:18 +04:00
|
|
|
{
|
|
|
|
RAMBlock *new_block;
|
2014-09-09 09:27:54 +04:00
|
|
|
Error *local_err = NULL;
|
2024-01-02 04:57:49 +03:00
|
|
|
int align;
|
2014-05-14 13:43:18 +04:00
|
|
|
|
2021-05-10 14:43:21 +03:00
|
|
|
assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC |
|
2024-03-20 11:39:02 +03:00
|
|
|
RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0);
|
2021-05-10 14:43:19 +03:00
|
|
|
assert(!host ^ (ram_flags & RAM_PREALLOC));
|
|
|
|
|
2024-01-02 04:57:49 +03:00
|
|
|
align = qemu_real_host_page_size();
|
|
|
|
align = MAX(align, TARGET_PAGE_SIZE);
|
|
|
|
size = ROUND_UP(size, align);
|
|
|
|
max_size = ROUND_UP(max_size, align);
|
|
|
|
|
2014-05-14 13:43:18 +04:00
|
|
|
new_block = g_malloc0(sizeof(*new_block));
|
|
|
|
new_block->mr = mr;
|
2014-11-12 15:27:41 +03:00
|
|
|
new_block->resized = resized;
|
2014-12-15 23:55:32 +03:00
|
|
|
new_block->used_length = size;
|
|
|
|
new_block->max_length = max_size;
|
2014-11-12 15:27:41 +03:00
|
|
|
assert(max_size >= size);
|
2014-05-14 13:43:18 +04:00
|
|
|
new_block->fd = -1;
|
2024-03-20 11:39:02 +03:00
|
|
|
new_block->guest_memfd = -1;
|
2022-03-23 18:57:22 +03:00
|
|
|
new_block->page_size = qemu_real_host_page_size();
|
2014-05-14 13:43:18 +04:00
|
|
|
new_block->host = host;
|
2021-05-10 14:43:19 +03:00
|
|
|
new_block->flags = ram_flags;
|
2021-04-06 11:01:24 +03:00
|
|
|
ram_block_add(new_block, &local_err);
|
2014-09-09 09:27:54 +04:00
|
|
|
if (local_err) {
|
|
|
|
g_free(new_block);
|
|
|
|
error_propagate(errp, local_err);
|
2016-03-01 09:18:18 +03:00
|
|
|
return NULL;
|
2014-09-09 09:27:54 +04:00
|
|
|
}
|
2016-03-01 09:18:18 +03:00
|
|
|
return new_block;
|
2014-05-14 13:43:18 +04:00
|
|
|
}
|
|
|
|
|
2016-03-01 09:18:18 +03:00
|
|
|
RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
|
2014-11-12 15:27:41 +03:00
|
|
|
MemoryRegion *mr, Error **errp)
|
|
|
|
{
|
2021-05-10 14:43:19 +03:00
|
|
|
return qemu_ram_alloc_internal(size, size, NULL, host, RAM_PREALLOC, mr,
|
|
|
|
errp);
|
2014-11-12 15:27:41 +03:00
|
|
|
}
|
|
|
|
|
2021-05-10 14:43:19 +03:00
|
|
|
RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags,
|
2017-12-13 17:37:37 +03:00
|
|
|
MemoryRegion *mr, Error **errp)
|
2010-08-18 10:41:49 +04:00
|
|
|
{
|
2024-03-20 11:39:02 +03:00
|
|
|
assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0);
|
2021-05-10 14:43:19 +03:00
|
|
|
return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp);
|
2014-11-12 15:27:41 +03:00
|
|
|
}
|
|
|
|
|
2016-03-01 09:18:18 +03:00
|
|
|
RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
|
2014-11-12 15:27:41 +03:00
|
|
|
void (*resized)(const char*,
|
|
|
|
uint64_t length,
|
|
|
|
void *host),
|
|
|
|
MemoryRegion *mr, Error **errp)
|
|
|
|
{
|
2021-05-10 14:43:19 +03:00
|
|
|
return qemu_ram_alloc_internal(size, maxsz, resized, NULL,
|
|
|
|
RAM_RESIZEABLE, mr, errp);
|
2010-08-18 10:41:49 +04:00
|
|
|
}
|
|
|
|
|
2013-09-09 19:58:40 +04:00
|
|
|
static void reclaim_ramblock(RAMBlock *block)
|
|
|
|
{
|
|
|
|
if (block->flags & RAM_PREALLOC) {
|
|
|
|
;
|
|
|
|
} else if (xen_enabled()) {
|
|
|
|
xen_invalidate_map_cache_entry(block->host);
|
|
|
|
#ifndef _WIN32
|
|
|
|
} else if (block->fd >= 0) {
|
2019-01-31 02:36:05 +03:00
|
|
|
qemu_ram_munmap(block->fd, block->host, block->max_length);
|
2013-09-09 19:58:40 +04:00
|
|
|
close(block->fd);
|
|
|
|
#endif
|
|
|
|
} else {
|
|
|
|
qemu_anon_ram_free(block->host, block->max_length);
|
|
|
|
}
|
2024-03-20 11:39:02 +03:00
|
|
|
|
|
|
|
if (block->guest_memfd >= 0) {
|
|
|
|
close(block->guest_memfd);
|
2024-03-20 19:45:29 +03:00
|
|
|
ram_block_discard_require(false);
|
2024-03-20 11:39:02 +03:00
|
|
|
}
|
|
|
|
|
2013-09-09 19:58:40 +04:00
|
|
|
g_free(block);
|
|
|
|
}
|
|
|
|
|
2016-03-01 09:18:22 +03:00
|
|
|
void qemu_ram_free(RAMBlock *block)
|
2007-02-09 02:08:38 +03:00
|
|
|
{
|
2016-03-29 14:20:51 +03:00
|
|
|
if (!block) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2016-12-20 19:31:36 +03:00
|
|
|
if (block->host) {
|
2021-04-29 14:27:00 +03:00
|
|
|
ram_block_notify_remove(block->host, block->used_length,
|
|
|
|
block->max_length);
|
2016-12-20 19:31:36 +03:00
|
|
|
}
|
|
|
|
|
2011-08-17 11:01:33 +04:00
|
|
|
qemu_mutex_lock_ramlist();
|
2016-03-01 09:18:22 +03:00
|
|
|
QLIST_REMOVE_RCU(block, next);
|
|
|
|
ram_list.mru_block = NULL;
|
|
|
|
/* Write list before version */
|
|
|
|
smp_wmb();
|
|
|
|
ram_list.version++;
|
|
|
|
call_rcu(block, reclaim_ramblock, rcu);
|
2011-08-17 11:01:33 +04:00
|
|
|
qemu_mutex_unlock_ramlist();
|
2007-02-09 02:08:38 +03:00
|
|
|
}
|
|
|
|
|
2011-03-02 10:56:19 +03:00
|
|
|
#ifndef _WIN32
|
|
|
|
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
|
|
|
|
{
|
|
|
|
RAMBlock *block;
|
|
|
|
ram_addr_t offset;
|
|
|
|
int flags;
|
|
|
|
void *area, *vaddr;
|
2023-09-06 15:04:56 +03:00
|
|
|
int prot;
|
2011-03-02 10:56:19 +03:00
|
|
|
|
2017-05-12 07:17:39 +03:00
|
|
|
RAMBLOCK_FOREACH(block) {
|
2011-03-02 10:56:19 +03:00
|
|
|
offset = addr - block->offset;
|
2014-12-15 23:55:32 +03:00
|
|
|
if (offset < block->max_length) {
|
2014-11-12 12:44:41 +03:00
|
|
|
vaddr = ramblock_ptr(block, offset);
|
2014-05-14 13:43:22 +04:00
|
|
|
if (block->flags & RAM_PREALLOC) {
|
2011-03-02 10:56:19 +03:00
|
|
|
;
|
2013-07-31 17:11:05 +04:00
|
|
|
} else if (xen_enabled()) {
|
|
|
|
abort();
|
2011-03-02 10:56:19 +03:00
|
|
|
} else {
|
|
|
|
flags = MAP_FIXED;
|
2021-04-06 11:01:26 +03:00
|
|
|
flags |= block->flags & RAM_SHARED ?
|
|
|
|
MAP_SHARED : MAP_PRIVATE;
|
util/mmap-alloc: Support RAM_NORESERVE via MAP_NORESERVE under Linux
Let's support RAM_NORESERVE via MAP_NORESERVE on Linux. The flag has no
effect on most shared mappings - except for hugetlbfs and anonymous memory.
Linux man page:
"MAP_NORESERVE: Do not reserve swap space for this mapping. When swap
space is reserved, one has the guarantee that it is possible to modify
the mapping. When swap space is not reserved one might get SIGSEGV
upon a write if no physical memory is available. See also the discussion
of the file /proc/sys/vm/overcommit_memory in proc(5). In kernels before
2.6, this flag had effect only for private writable mappings."
Note that the "guarantee" part is wrong with memory overcommit in Linux.
Also, in Linux hugetlbfs is treated differently - we configure reservation
of huge pages from the pool, not reservation of swap space (huge pages
cannot be swapped).
The rough behavior is [1]:
a) !Hugetlbfs:
1) Without MAP_NORESERVE *or* with memory overcommit under Linux
disabled ("/proc/sys/vm/overcommit_memory == 2"), the following
accounting/reservation happens:
For a file backed map
SHARED or READ-only - 0 cost (the file is the map not swap)
PRIVATE WRITABLE - size of mapping per instance
For an anonymous or /dev/zero map
SHARED - size of mapping
PRIVATE READ-only - 0 cost (but of little use)
PRIVATE WRITABLE - size of mapping per instance
2) With MAP_NORESERVE, no accounting/reservation happens.
b) Hugetlbfs:
1) Without MAP_NORESERVE, huge pages are reserved.
2) With MAP_NORESERVE, no huge pages are reserved.
Note: With "/proc/sys/vm/overcommit_memory == 0", we were already able
to configure it for !hugetlbfs globally; this toggle now allows
configuring it more fine-grained, not for the whole system.
The target use case is virtio-mem, which dynamically exposes memory
inside a large, sparse memory area to the VM.
[1] https://www.kernel.org/doc/Documentation/vm/overcommit-accounting
Reviewed-by: Peter Xu <peterx@redhat.com>
Acked-by: Eduardo Habkost <ehabkost@redhat.com> for memory backend and machine core
Signed-off-by: David Hildenbrand <david@redhat.com>
Message-Id: <20210510114328.21835-10-david@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2021-05-10 14:43:22 +03:00
|
|
|
flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
|
2023-09-06 15:04:56 +03:00
|
|
|
prot = PROT_READ;
|
|
|
|
prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
|
2013-07-31 17:11:07 +04:00
|
|
|
if (block->fd >= 0) {
|
2023-09-06 15:04:56 +03:00
|
|
|
area = mmap(vaddr, length, prot, flags, block->fd,
|
|
|
|
offset + block->fd_offset);
|
2011-03-02 10:56:19 +03:00
|
|
|
} else {
|
2021-04-06 11:01:26 +03:00
|
|
|
flags |= MAP_ANONYMOUS;
|
2023-09-06 15:04:56 +03:00
|
|
|
area = mmap(vaddr, length, prot, flags, -1, 0);
|
2011-03-02 10:56:19 +03:00
|
|
|
}
|
|
|
|
if (area != vaddr) {
|
tcg: Replace fprintf(stderr, "*\n" with error_report()
Replace a large number of the fprintf(stderr, "*\n" calls with
error_report(). The functions were renamed with these commands and then
compiler issues where manually fixed.
find ./* -type f -exec sed -i \
'N;N;N;N;N;N;N;N;N;N;N;N; {s|fprintf(stderr, "\(.*\)\\n"\(.*\));|error_report("\1"\2);|Ig}' \
{} +
find ./* -type f -exec sed -i \
'N;N;N;N;N;N;N;N;N;N;N; {s|fprintf(stderr, "\(.*\)\\n"\(.*\));|error_report("\1"\2);|Ig}' \
{} +
find ./* -type f -exec sed -i \
'N;N;N;N;N;N;N;N;N; {s|fprintf(stderr, "\(.*\)\\n"\(.*\));|error_report("\1"\2);|Ig}' \
{} +
find ./* -type f -exec sed -i \
'N;N;N;N;N;N;N;N; {s|fprintf(stderr, "\(.*\)\\n"\(.*\));|error_report("\1"\2);|Ig}' \
{} +
find ./* -type f -exec sed -i \
'N;N;N;N;N;N;N; {s|fprintf(stderr, "\(.*\)\\n"\(.*\));|error_report("\1"\2);|Ig}' \
{} +
find ./* -type f -exec sed -i \
'N;N;N;N;N;N; {s|fprintf(stderr, "\(.*\)\\n"\(.*\));|error_report("\1"\2);|Ig}' \
{} +
find ./* -type f -exec sed -i \
'N;N;N;N;N; {s|fprintf(stderr, "\(.*\)\\n"\(.*\));|error_report("\1"\2);|Ig}' \
{} +
find ./* -type f -exec sed -i \
'N;N;N;N; {s|fprintf(stderr, "\(.*\)\\n"\(.*\));|error_report("\1"\2);|Ig}' \
{} +
find ./* -type f -exec sed -i \
'N;N;N; {s|fprintf(stderr, "\(.*\)\\n"\(.*\));|error_report("\1"\2);|Ig}' \
{} +
find ./* -type f -exec sed -i \
'N;N; {s|fprintf(stderr, "\(.*\)\\n"\(.*\));|error_report("\1"\2);|Ig}' \
{} +
find ./* -type f -exec sed -i \
'N; {s|fprintf(stderr, "\(.*\)\\n"\(.*\));|error_report("\1"\2);|Ig}' \
{} +
Signed-off-by: Alistair Francis <alistair.francis@xilinx.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Crosthwaite <crosthwaite.peter@gmail.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Stefan Weil <sw@weilnetz.de>
Conversions that aren't followed by exit() dropped, because they might
be inappropriate.
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20180203084315.20497-14-armbru@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
2018-02-03 11:43:14 +03:00
|
|
|
error_report("Could not remap addr: "
|
|
|
|
RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
|
|
|
|
length, addr);
|
2011-03-02 10:56:19 +03:00
|
|
|
exit(1);
|
|
|
|
}
|
2012-09-05 23:50:16 +04:00
|
|
|
memory_try_enable_merging(vaddr, length);
|
2012-08-02 23:44:16 +04:00
|
|
|
qemu_ram_setup_dump(vaddr, length);
|
2011-03-02 10:56:19 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif /* !_WIN32 */
|
|
|
|
|
2023-10-05 21:17:13 +03:00
|
|
|
/*
|
|
|
|
* Return a host pointer to guest's ram.
|
2024-04-30 19:49:35 +03:00
|
|
|
* For Xen, foreign mappings get created if they don't already exist.
|
2013-09-05 22:41:35 +04:00
|
|
|
*
|
2024-04-30 19:49:35 +03:00
|
|
|
* @block: block for the RAM to lookup (optional and may be NULL).
|
|
|
|
* @addr: address within the memory region.
|
|
|
|
* @size: pointer to requested size (optional and may be NULL).
|
|
|
|
* size may get modified and return a value smaller than
|
|
|
|
* what was requested.
|
|
|
|
* @lock: wether to lock the mapping in xen-mapcache until invalidated.
|
|
|
|
* @is_write: hint wether to map RW or RO in the xen-mapcache.
|
|
|
|
* (optional and may always be set to true).
|
2013-09-05 22:41:35 +04:00
|
|
|
*
|
2015-12-16 12:31:26 +03:00
|
|
|
* Called within RCU critical section.
|
2013-09-05 22:41:35 +04:00
|
|
|
*/
|
2024-02-15 12:15:06 +03:00
|
|
|
static void *qemu_ram_ptr_length(RAMBlock *block, ram_addr_t addr,
|
2024-04-30 19:49:35 +03:00
|
|
|
hwaddr *size, bool lock,
|
|
|
|
bool is_write)
|
2011-05-19 21:35:45 +04:00
|
|
|
{
|
2023-10-05 21:17:13 +03:00
|
|
|
hwaddr len = 0;
|
|
|
|
|
|
|
|
if (size && *size == 0) {
|
2011-06-27 21:26:06 +04:00
|
|
|
return NULL;
|
|
|
|
}
|
2015-12-16 12:31:26 +03:00
|
|
|
|
2016-02-20 05:35:20 +03:00
|
|
|
if (block == NULL) {
|
|
|
|
block = qemu_get_ram_block(addr);
|
2016-02-22 13:02:12 +03:00
|
|
|
addr -= block->offset;
|
2016-02-20 05:35:20 +03:00
|
|
|
}
|
2023-10-05 21:17:13 +03:00
|
|
|
if (size) {
|
|
|
|
*size = MIN(*size, block->max_length - addr);
|
|
|
|
len = *size;
|
|
|
|
}
|
2015-12-16 12:31:26 +03:00
|
|
|
|
|
|
|
if (xen_enabled() && block->host == NULL) {
|
|
|
|
/* We need to check if the requested address is in the RAM
|
|
|
|
* because we don't want to map the entire memory in QEMU.
|
|
|
|
* In that case just map the requested area.
|
|
|
|
*/
|
|
|
|
if (block->offset == 0) {
|
2024-04-30 19:49:35 +03:00
|
|
|
return xen_map_cache(block->mr, addr, len, lock, lock,
|
|
|
|
is_write);
|
2011-05-19 21:35:45 +04:00
|
|
|
}
|
|
|
|
|
2024-04-30 19:49:35 +03:00
|
|
|
block->host = xen_map_cache(block->mr, block->offset,
|
|
|
|
block->max_length, 1,
|
|
|
|
lock, is_write);
|
2011-05-19 21:35:45 +04:00
|
|
|
}
|
2015-12-16 12:31:26 +03:00
|
|
|
|
2016-02-22 13:02:12 +03:00
|
|
|
return ramblock_ptr(block, addr);
|
2011-05-19 21:35:45 +04:00
|
|
|
}
|
|
|
|
|
2023-10-05 21:17:13 +03:00
|
|
|
/*
|
|
|
|
* Return a host pointer to ram allocated with qemu_ram_alloc.
|
|
|
|
* This should not be used for general purpose DMA. Use address_space_map
|
|
|
|
* or address_space_rw instead. For local memory (e.g. video ram) that the
|
|
|
|
* device owns, use memory_region_get_ram_ptr.
|
|
|
|
*
|
|
|
|
* Called within RCU critical section.
|
|
|
|
*/
|
|
|
|
void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
|
|
|
|
{
|
2024-04-30 19:49:35 +03:00
|
|
|
return qemu_ram_ptr_length(ram_block, addr, NULL, false, true);
|
2023-10-05 21:17:13 +03:00
|
|
|
}
|
|
|
|
|
2018-03-12 20:20:57 +03:00
|
|
|
/* Return the offset of a hostpointer within a ramblock */
|
|
|
|
ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host)
|
|
|
|
{
|
|
|
|
ram_addr_t res = (uint8_t *)host - (uint8_t *)rb->host;
|
|
|
|
assert((uintptr_t)host >= (uintptr_t)rb->host);
|
|
|
|
assert(res < rb->max_length);
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2015-11-05 21:10:32 +03:00
|
|
|
RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
|
|
|
|
ram_addr_t *offset)
|
2009-04-11 18:47:08 +04:00
|
|
|
{
|
2009-04-11 21:15:54 +04:00
|
|
|
RAMBlock *block;
|
|
|
|
uint8_t *host = ptr;
|
|
|
|
|
2011-06-22 00:59:09 +04:00
|
|
|
if (xen_enabled()) {
|
2016-05-26 11:07:50 +03:00
|
|
|
ram_addr_t ram_addr;
|
2019-10-07 17:36:41 +03:00
|
|
|
RCU_READ_LOCK_GUARD();
|
2016-05-26 11:07:50 +03:00
|
|
|
ram_addr = xen_ram_addr_from_mapcache(ptr);
|
|
|
|
block = qemu_get_ram_block(ram_addr);
|
2015-11-05 21:10:32 +03:00
|
|
|
if (block) {
|
2016-06-09 18:56:17 +03:00
|
|
|
*offset = ram_addr - block->offset;
|
2015-11-05 21:10:32 +03:00
|
|
|
}
|
|
|
|
return block;
|
2011-05-19 21:35:46 +04:00
|
|
|
}
|
|
|
|
|
2019-10-07 17:36:41 +03:00
|
|
|
RCU_READ_LOCK_GUARD();
|
2020-09-23 13:56:46 +03:00
|
|
|
block = qatomic_rcu_read(&ram_list.mru_block);
|
2014-12-15 23:55:32 +03:00
|
|
|
if (block && block->host && host - block->host < block->max_length) {
|
2013-05-06 16:28:39 +04:00
|
|
|
goto found;
|
|
|
|
}
|
|
|
|
|
2017-05-12 07:17:39 +03:00
|
|
|
RAMBLOCK_FOREACH(block) {
|
2010-08-31 19:41:25 +04:00
|
|
|
/* This case append when the block is not mapped. */
|
|
|
|
if (block->host == NULL) {
|
|
|
|
continue;
|
|
|
|
}
|
2014-12-15 23:55:32 +03:00
|
|
|
if (host - block->host < block->max_length) {
|
2013-05-06 16:28:39 +04:00
|
|
|
goto found;
|
2010-06-11 21:11:42 +04:00
|
|
|
}
|
2009-04-11 21:15:54 +04:00
|
|
|
}
|
2010-08-31 19:41:25 +04:00
|
|
|
|
2013-05-06 16:36:15 +04:00
|
|
|
return NULL;
|
2013-05-06 16:28:39 +04:00
|
|
|
|
|
|
|
found:
|
2015-11-05 21:10:32 +03:00
|
|
|
*offset = (host - block->host);
|
|
|
|
if (round_offset) {
|
|
|
|
*offset &= TARGET_PAGE_MASK;
|
|
|
|
}
|
|
|
|
return block;
|
|
|
|
}
|
|
|
|
|
2015-11-05 21:10:33 +03:00
|
|
|
/*
|
|
|
|
* Finds the named RAMBlock
|
|
|
|
*
|
|
|
|
* name: The name of RAMBlock to find
|
|
|
|
*
|
|
|
|
* Returns: RAMBlock (or NULL if not found)
|
|
|
|
*/
|
|
|
|
RAMBlock *qemu_ram_block_by_name(const char *name)
|
|
|
|
{
|
|
|
|
RAMBlock *block;
|
|
|
|
|
2017-05-12 07:17:39 +03:00
|
|
|
RAMBLOCK_FOREACH(block) {
|
2015-11-05 21:10:33 +03:00
|
|
|
if (!strcmp(name, block->idstr)) {
|
|
|
|
return block;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2023-10-04 12:06:28 +03:00
|
|
|
/*
|
|
|
|
* Some of the system routines need to translate from a host pointer
|
|
|
|
* (typically a TLB entry) back to a ram offset.
|
|
|
|
*/
|
2016-03-25 14:55:08 +03:00
|
|
|
ram_addr_t qemu_ram_addr_from_host(void *ptr)
|
2015-11-05 21:10:32 +03:00
|
|
|
{
|
|
|
|
RAMBlock *block;
|
2016-05-26 11:07:50 +03:00
|
|
|
ram_addr_t offset;
|
2015-11-05 21:10:32 +03:00
|
|
|
|
2016-05-26 11:07:50 +03:00
|
|
|
block = qemu_ram_block_from_host(ptr, false, &offset);
|
2015-11-05 21:10:32 +03:00
|
|
|
if (!block) {
|
2016-03-25 14:55:08 +03:00
|
|
|
return RAM_ADDR_INVALID;
|
2015-11-05 21:10:32 +03:00
|
|
|
}
|
|
|
|
|
2016-03-25 14:55:08 +03:00
|
|
|
return block->offset + offset;
|
2010-10-11 22:31:19 +04:00
|
|
|
}
|
2010-06-11 21:11:42 +04:00
|
|
|
|
2022-08-10 22:04:15 +03:00
|
|
|
ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
|
|
|
|
{
|
|
|
|
ram_addr_t ram_addr;
|
|
|
|
|
|
|
|
ram_addr = qemu_ram_addr_from_host(ptr);
|
|
|
|
if (ram_addr == RAM_ADDR_INVALID) {
|
|
|
|
error_report("Bad ram pointer %p", ptr);
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
return ram_addr;
|
|
|
|
}
|
|
|
|
|
2018-03-05 02:19:49 +03:00
|
|
|
static MemTxResult flatview_read(FlatView *fv, hwaddr addr,
|
2020-02-19 21:52:44 +03:00
|
|
|
MemTxAttrs attrs, void *buf, hwaddr len);
|
2017-09-21 11:50:58 +03:00
|
|
|
static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs,
|
2020-02-19 21:52:44 +03:00
|
|
|
const void *buf, hwaddr len);
|
2019-01-17 15:49:01 +03:00
|
|
|
static bool flatview_access_valid(FlatView *fv, hwaddr addr, hwaddr len,
|
2018-05-31 16:50:52 +03:00
|
|
|
bool is_write, MemTxAttrs attrs);
|
2017-09-21 11:50:58 +03:00
|
|
|
|
2015-04-26 18:49:24 +03:00
|
|
|
static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
|
|
|
|
unsigned len, MemTxAttrs attrs)
|
2007-05-26 21:36:03 +04:00
|
|
|
{
|
2013-05-26 23:55:37 +04:00
|
|
|
subpage_t *subpage = opaque;
|
2014-12-22 15:11:39 +03:00
|
|
|
uint8_t buf[8];
|
2015-04-26 18:49:24 +03:00
|
|
|
MemTxResult res;
|
2013-05-24 18:10:39 +04:00
|
|
|
|
2007-05-26 21:36:03 +04:00
|
|
|
#if defined(DEBUG_SUBPAGE)
|
2023-01-11 00:29:47 +03:00
|
|
|
printf("%s: subpage %p len %u addr " HWADDR_FMT_plx "\n", __func__,
|
2013-05-26 23:55:37 +04:00
|
|
|
subpage, len, addr);
|
2007-05-26 21:36:03 +04:00
|
|
|
#endif
|
2017-09-21 11:50:58 +03:00
|
|
|
res = flatview_read(subpage->fv, addr + subpage->base, attrs, buf, len);
|
2015-04-26 18:49:24 +03:00
|
|
|
if (res) {
|
|
|
|
return res;
|
2015-04-26 18:49:24 +03:00
|
|
|
}
|
2018-06-15 16:57:14 +03:00
|
|
|
*data = ldn_p(buf, len);
|
|
|
|
return MEMTX_OK;
|
2007-05-26 21:36:03 +04:00
|
|
|
}
|
|
|
|
|
2015-04-26 18:49:24 +03:00
|
|
|
static MemTxResult subpage_write(void *opaque, hwaddr addr,
|
|
|
|
uint64_t value, unsigned len, MemTxAttrs attrs)
|
2007-05-26 21:36:03 +04:00
|
|
|
{
|
2013-05-26 23:55:37 +04:00
|
|
|
subpage_t *subpage = opaque;
|
2014-12-22 15:11:39 +03:00
|
|
|
uint8_t buf[8];
|
2013-05-26 23:55:37 +04:00
|
|
|
|
2007-05-26 21:36:03 +04:00
|
|
|
#if defined(DEBUG_SUBPAGE)
|
2023-01-11 00:29:47 +03:00
|
|
|
printf("%s: subpage %p len %u addr " HWADDR_FMT_plx
|
2013-05-26 23:55:37 +04:00
|
|
|
" value %"PRIx64"\n",
|
|
|
|
__func__, subpage, len, addr, value);
|
2007-05-26 21:36:03 +04:00
|
|
|
#endif
|
2018-06-15 16:57:14 +03:00
|
|
|
stn_p(buf, len, value);
|
2017-09-21 11:50:58 +03:00
|
|
|
return flatview_write(subpage->fv, addr + subpage->base, attrs, buf, len);
|
2007-05-26 21:36:03 +04:00
|
|
|
}
|
|
|
|
|
2013-05-24 16:02:39 +04:00
|
|
|
static bool subpage_accepts(void *opaque, hwaddr addr,
|
2018-05-31 16:50:52 +03:00
|
|
|
unsigned len, bool is_write,
|
|
|
|
MemTxAttrs attrs)
|
2013-05-24 16:02:39 +04:00
|
|
|
{
|
2013-05-26 23:55:37 +04:00
|
|
|
subpage_t *subpage = opaque;
|
2013-05-24 16:02:39 +04:00
|
|
|
#if defined(DEBUG_SUBPAGE)
|
2023-01-11 00:29:47 +03:00
|
|
|
printf("%s: subpage %p %c len %u addr " HWADDR_FMT_plx "\n",
|
2013-05-26 23:55:37 +04:00
|
|
|
__func__, subpage, is_write ? 'w' : 'r', len, addr);
|
2013-05-24 16:02:39 +04:00
|
|
|
#endif
|
|
|
|
|
2017-09-21 11:50:58 +03:00
|
|
|
return flatview_access_valid(subpage->fv, addr + subpage->base,
|
2018-05-31 16:50:52 +03:00
|
|
|
len, is_write, attrs);
|
2013-05-24 16:02:39 +04:00
|
|
|
}
|
|
|
|
|
2012-01-02 14:32:48 +04:00
|
|
|
static const MemoryRegionOps subpage_ops = {
|
2015-04-26 18:49:24 +03:00
|
|
|
.read_with_attrs = subpage_read,
|
|
|
|
.write_with_attrs = subpage_write,
|
2014-12-22 15:11:39 +03:00
|
|
|
.impl.min_access_size = 1,
|
|
|
|
.impl.max_access_size = 8,
|
|
|
|
.valid.min_access_size = 1,
|
|
|
|
.valid.max_access_size = 8,
|
2013-05-24 16:02:39 +04:00
|
|
|
.valid.accepts = subpage_accepts,
|
2012-01-02 14:32:48 +04:00
|
|
|
.endianness = DEVICE_NATIVE_ENDIAN,
|
2007-05-26 21:36:03 +04:00
|
|
|
};
|
|
|
|
|
2019-03-21 11:25:53 +03:00
|
|
|
static int subpage_register(subpage_t *mmio, uint32_t start, uint32_t end,
|
|
|
|
uint16_t section)
|
2007-05-26 21:36:03 +04:00
|
|
|
{
|
|
|
|
int idx, eidx;
|
|
|
|
|
|
|
|
if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
|
|
|
|
return -1;
|
|
|
|
idx = SUBPAGE_IDX(start);
|
|
|
|
eidx = SUBPAGE_IDX(end);
|
|
|
|
#if defined(DEBUG_SUBPAGE)
|
2013-09-27 05:25:38 +04:00
|
|
|
printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
|
|
|
|
__func__, mmio, start, end, idx, eidx, section);
|
2007-05-26 21:36:03 +04:00
|
|
|
#endif
|
|
|
|
for (; idx <= eidx; idx++) {
|
2012-02-12 20:32:55 +04:00
|
|
|
mmio->sub_section[idx] = section;
|
2007-05-26 21:36:03 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-09-21 11:50:58 +03:00
|
|
|
static subpage_t *subpage_init(FlatView *fv, hwaddr base)
|
2007-05-26 21:36:03 +04:00
|
|
|
{
|
2009-10-02 01:12:16 +04:00
|
|
|
subpage_t *mmio;
|
2007-05-26 21:36:03 +04:00
|
|
|
|
2019-03-21 11:25:53 +03:00
|
|
|
/* mmio->sub_section is set to PHYS_SECTION_UNASSIGNED with g_malloc0 */
|
2016-10-24 18:26:49 +03:00
|
|
|
mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t));
|
2017-09-21 11:50:58 +03:00
|
|
|
mmio->fv = fv;
|
2009-02-06 01:06:18 +03:00
|
|
|
mmio->base = base;
|
2013-06-06 13:41:28 +04:00
|
|
|
memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
|
2014-06-06 10:15:52 +04:00
|
|
|
NULL, TARGET_PAGE_SIZE);
|
2012-01-02 15:20:11 +04:00
|
|
|
mmio->iomem.subpage = true;
|
2007-05-26 21:36:03 +04:00
|
|
|
#if defined(DEBUG_SUBPAGE)
|
2023-01-11 00:29:47 +03:00
|
|
|
printf("%s: %p base " HWADDR_FMT_plx " len %08x\n", __func__,
|
2013-09-27 05:25:38 +04:00
|
|
|
mmio, base, TARGET_PAGE_SIZE);
|
2007-05-26 21:36:03 +04:00
|
|
|
#endif
|
|
|
|
|
|
|
|
return mmio;
|
|
|
|
}
|
|
|
|
|
2017-09-21 11:50:58 +03:00
|
|
|
static uint16_t dummy_section(PhysPageMap *map, FlatView *fv, MemoryRegion *mr)
|
2012-02-12 20:32:55 +04:00
|
|
|
{
|
2017-09-21 11:50:58 +03:00
|
|
|
assert(fv);
|
2012-02-12 20:32:55 +04:00
|
|
|
MemoryRegionSection section = {
|
2017-09-21 11:50:58 +03:00
|
|
|
.fv = fv,
|
2012-02-12 20:32:55 +04:00
|
|
|
.mr = mr,
|
|
|
|
.offset_within_address_space = 0,
|
|
|
|
.offset_within_region = 0,
|
2013-05-27 12:08:27 +04:00
|
|
|
.size = int128_2_64(),
|
2012-02-12 20:32:55 +04:00
|
|
|
};
|
|
|
|
|
2013-12-01 16:02:23 +04:00
|
|
|
return phys_section_add(map, §ion);
|
2012-02-12 20:32:55 +04:00
|
|
|
}
|
|
|
|
|
2018-06-15 16:57:14 +03:00
|
|
|
MemoryRegionSection *iotlb_to_section(CPUState *cpu,
|
|
|
|
hwaddr index, MemTxAttrs attrs)
|
2012-03-08 19:06:55 +04:00
|
|
|
{
|
2016-01-21 17:15:05 +03:00
|
|
|
int asidx = cpu_asidx_from_attrs(cpu, attrs);
|
|
|
|
CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
|
2023-08-26 02:13:17 +03:00
|
|
|
AddressSpaceDispatch *d = cpuas->memory_dispatch;
|
2023-08-26 00:06:58 +03:00
|
|
|
int section_index = index & ~TARGET_PAGE_MASK;
|
|
|
|
MemoryRegionSection *ret;
|
|
|
|
|
|
|
|
assert(section_index < d->map.sections_nb);
|
|
|
|
ret = d->map.sections + section_index;
|
|
|
|
assert(ret->mr);
|
|
|
|
assert(ret->mr->ops);
|
2013-08-16 10:26:30 +04:00
|
|
|
|
2023-08-26 00:06:58 +03:00
|
|
|
return ret;
|
2012-03-08 19:06:55 +04:00
|
|
|
}
|
|
|
|
|
2009-06-14 12:38:52 +04:00
|
|
|
static void io_mem_init(void)
|
|
|
|
{
|
2013-06-06 13:41:28 +04:00
|
|
|
memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
|
2014-06-13 12:48:06 +04:00
|
|
|
NULL, UINT64_MAX);
|
2009-06-14 12:38:52 +04:00
|
|
|
}
|
|
|
|
|
2017-09-21 11:51:00 +03:00
|
|
|
AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv)
|
2013-05-29 14:13:54 +04:00
|
|
|
{
|
2013-12-01 16:02:23 +04:00
|
|
|
AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
|
|
|
|
uint16_t n;
|
|
|
|
|
2017-09-21 11:50:58 +03:00
|
|
|
n = dummy_section(&d->map, fv, &io_mem_unassigned);
|
2013-12-01 16:02:23 +04:00
|
|
|
assert(n == PHYS_SECTION_UNASSIGNED);
|
2013-05-29 14:13:54 +04:00
|
|
|
|
2013-11-11 16:42:43 +04:00
|
|
|
d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
|
2017-09-21 11:50:56 +03:00
|
|
|
|
|
|
|
return d;
|
2013-05-29 14:13:54 +04:00
|
|
|
}
|
|
|
|
|
2017-09-21 11:50:56 +03:00
|
|
|
void address_space_dispatch_free(AddressSpaceDispatch *d)
|
2015-01-21 14:09:14 +03:00
|
|
|
{
|
|
|
|
phys_sections_free(&d->map);
|
|
|
|
g_free(d);
|
|
|
|
}
|
|
|
|
|
2018-02-06 20:37:39 +03:00
|
|
|
static void do_nothing(CPUState *cpu, run_on_cpu_data d)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
static void tcg_log_global_after_sync(MemoryListener *listener)
|
|
|
|
{
|
|
|
|
CPUAddressSpace *cpuas;
|
|
|
|
|
|
|
|
/* Wait for the CPU to end the current TB. This avoids the following
|
|
|
|
* incorrect race:
|
|
|
|
*
|
|
|
|
* vCPU migration
|
|
|
|
* ---------------------- -------------------------
|
|
|
|
* TLB check -> slow path
|
|
|
|
* notdirty_mem_write
|
|
|
|
* write to RAM
|
|
|
|
* mark dirty
|
|
|
|
* clear dirty flag
|
|
|
|
* TLB check -> fast path
|
|
|
|
* read memory
|
|
|
|
* write to RAM
|
|
|
|
*
|
|
|
|
* by pushing the migration thread's memory read after the vCPU thread has
|
|
|
|
* written the memory.
|
|
|
|
*/
|
2019-09-17 12:54:06 +03:00
|
|
|
if (replay_mode == REPLAY_MODE_NONE) {
|
|
|
|
/*
|
|
|
|
* VGA can make calls to this function while updating the screen.
|
|
|
|
* In record/replay mode this causes a deadlock, because
|
|
|
|
* run_on_cpu waits for rr mutex. Therefore no races are possible
|
|
|
|
* in this case and no need for making run_on_cpu when
|
2021-10-15 12:29:44 +03:00
|
|
|
* record/replay is enabled.
|
2019-09-17 12:54:06 +03:00
|
|
|
*/
|
|
|
|
cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
|
|
|
|
run_on_cpu(cpuas->cpu, do_nothing, RUN_ON_CPU_NULL);
|
|
|
|
}
|
2018-02-06 20:37:39 +03:00
|
|
|
}
|
|
|
|
|
2023-08-26 02:13:17 +03:00
|
|
|
static void tcg_commit_cpu(CPUState *cpu, run_on_cpu_data data)
|
|
|
|
{
|
|
|
|
CPUAddressSpace *cpuas = data.host_ptr;
|
|
|
|
|
|
|
|
cpuas->memory_dispatch = address_space_to_dispatch(cpuas->as);
|
|
|
|
tlb_flush(cpu);
|
|
|
|
}
|
|
|
|
|
2012-10-02 20:54:45 +04:00
|
|
|
static void tcg_commit(MemoryListener *listener)
|
2012-02-08 23:36:02 +04:00
|
|
|
{
|
2015-10-01 17:29:50 +03:00
|
|
|
CPUAddressSpace *cpuas;
|
2023-08-26 02:13:17 +03:00
|
|
|
CPUState *cpu;
|
2012-02-12 23:23:17 +04:00
|
|
|
|
2018-06-22 20:45:31 +03:00
|
|
|
assert(tcg_enabled());
|
2012-02-12 23:23:17 +04:00
|
|
|
/* since each CPU stores ram addresses in its TLB cache, we must
|
|
|
|
reset the modified entries */
|
2015-10-01 17:29:50 +03:00
|
|
|
cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
|
2023-08-26 02:13:17 +03:00
|
|
|
cpu = cpuas->cpu;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Defer changes to as->memory_dispatch until the cpu is quiescent.
|
|
|
|
* Otherwise we race between (1) other cpu threads and (2) ongoing
|
|
|
|
* i/o for the current cpu thread, with data cached by mmu_lookup().
|
|
|
|
*
|
|
|
|
* In addition, queueing the work function will kick the cpu back to
|
|
|
|
* the main loop, which will end the RCU critical section and reclaim
|
|
|
|
* the memory data structures.
|
|
|
|
*
|
|
|
|
* That said, the listener is also called during realize, before
|
|
|
|
* all of the tcg machinery for run-on is initialized: thus halt_cond.
|
2015-10-01 17:29:50 +03:00
|
|
|
*/
|
2023-08-26 02:13:17 +03:00
|
|
|
if (cpu->halt_cond) {
|
|
|
|
async_run_on_cpu(cpu, tcg_commit_cpu, RUN_ON_CPU_HOST_PTR(cpuas));
|
|
|
|
} else {
|
|
|
|
tcg_commit_cpu(cpu, RUN_ON_CPU_HOST_PTR(cpuas));
|
|
|
|
}
|
2012-02-08 23:36:02 +04:00
|
|
|
}
|
|
|
|
|
2011-07-26 15:26:14 +04:00
|
|
|
static void memory_map_init(void)
|
|
|
|
{
|
2011-08-21 07:09:37 +04:00
|
|
|
system_memory = g_malloc(sizeof(*system_memory));
|
2013-11-07 20:14:36 +04:00
|
|
|
|
2013-11-07 20:14:37 +04:00
|
|
|
memory_region_init(system_memory, NULL, "system", UINT64_MAX);
|
2013-04-29 20:25:51 +04:00
|
|
|
address_space_init(&address_space_memory, system_memory, "memory");
|
2011-08-08 17:09:03 +04:00
|
|
|
|
2011-08-21 07:09:37 +04:00
|
|
|
system_io = g_malloc(sizeof(*system_io));
|
2013-09-02 20:43:30 +04:00
|
|
|
memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
|
|
|
|
65536);
|
2013-04-29 20:25:51 +04:00
|
|
|
address_space_init(&address_space_io, system_io, "I/O");
|
2011-07-26 15:26:14 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
MemoryRegion *get_system_memory(void)
|
|
|
|
{
|
|
|
|
return system_memory;
|
|
|
|
}
|
|
|
|
|
2011-08-08 17:09:03 +04:00
|
|
|
MemoryRegion *get_system_io(void)
|
|
|
|
{
|
|
|
|
return system_io;
|
|
|
|
}
|
|
|
|
|
2015-03-23 13:45:53 +03:00
|
|
|
static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
|
2012-10-23 14:30:10 +04:00
|
|
|
hwaddr length)
|
2012-10-03 17:49:05 +04:00
|
|
|
{
|
2015-03-25 17:21:39 +03:00
|
|
|
uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
|
2016-02-22 13:02:12 +03:00
|
|
|
addr += memory_region_get_ram_addr(mr);
|
|
|
|
|
2015-03-25 17:21:39 +03:00
|
|
|
/* No early return if dirty_log_mask is or becomes 0, because
|
|
|
|
* cpu_physical_memory_set_dirty_range will still call
|
|
|
|
* xen_modified_memory.
|
|
|
|
*/
|
|
|
|
if (dirty_log_mask) {
|
|
|
|
dirty_log_mask =
|
|
|
|
cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
|
|
|
|
}
|
|
|
|
if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
|
2017-07-03 18:50:40 +03:00
|
|
|
assert(tcg_enabled());
|
2023-03-06 04:30:11 +03:00
|
|
|
tb_invalidate_phys_range(addr, addr + length - 1);
|
2015-03-25 17:21:39 +03:00
|
|
|
dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
|
2012-10-03 17:49:05 +04:00
|
|
|
}
|
2015-03-25 17:21:39 +03:00
|
|
|
cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
|
2012-10-03 17:49:05 +04:00
|
|
|
}
|
|
|
|
|
2019-01-29 14:46:04 +03:00
|
|
|
void memory_region_flush_rom_device(MemoryRegion *mr, hwaddr addr, hwaddr size)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* In principle this function would work on other memory region types too,
|
|
|
|
* but the ROM device use case is the only one where this operation is
|
|
|
|
* necessary. Other memory regions should use the
|
|
|
|
* address_space_read/write() APIs.
|
|
|
|
*/
|
|
|
|
assert(memory_region_is_romd(mr));
|
|
|
|
|
|
|
|
invalidate_and_set_dirty(mr, addr, size);
|
|
|
|
}
|
|
|
|
|
2022-06-13 23:26:32 +03:00
|
|
|
int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
|
2013-05-24 13:59:43 +04:00
|
|
|
{
|
2013-07-17 15:17:41 +04:00
|
|
|
unsigned access_size_max = mr->ops->valid.max_access_size;
|
2013-07-09 01:55:59 +04:00
|
|
|
|
|
|
|
/* Regions are assumed to support 1-4 byte accesses unless
|
|
|
|
otherwise specified. */
|
|
|
|
if (access_size_max == 0) {
|
|
|
|
access_size_max = 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Bound the maximum access by the alignment of the address. */
|
|
|
|
if (!mr->ops->impl.unaligned) {
|
|
|
|
unsigned align_size_max = addr & -addr;
|
|
|
|
if (align_size_max != 0 && align_size_max < access_size_max) {
|
|
|
|
access_size_max = align_size_max;
|
|
|
|
}
|
2013-05-24 13:59:43 +04:00
|
|
|
}
|
2013-07-09 01:55:59 +04:00
|
|
|
|
|
|
|
/* Don't attempt accesses larger than the maximum. */
|
|
|
|
if (l > access_size_max) {
|
|
|
|
l = access_size_max;
|
2013-05-24 13:59:43 +04:00
|
|
|
}
|
2015-07-24 15:33:10 +03:00
|
|
|
l = pow2floor(l);
|
2013-07-09 01:55:59 +04:00
|
|
|
|
|
|
|
return l;
|
2013-05-24 13:59:43 +04:00
|
|
|
}
|
|
|
|
|
2022-06-13 23:26:32 +03:00
|
|
|
bool prepare_mmio_access(MemoryRegion *mr)
|
2015-06-18 19:47:21 +03:00
|
|
|
{
|
2015-06-18 19:47:22 +03:00
|
|
|
bool release_lock = false;
|
|
|
|
|
2024-01-02 18:35:25 +03:00
|
|
|
if (!bql_locked()) {
|
|
|
|
bql_lock();
|
2015-06-18 19:47:22 +03:00
|
|
|
release_lock = true;
|
|
|
|
}
|
2015-06-18 19:47:21 +03:00
|
|
|
if (mr->flush_coalesced_mmio) {
|
|
|
|
qemu_flush_coalesced_mmio_buffer();
|
|
|
|
}
|
2015-06-18 19:47:22 +03:00
|
|
|
|
|
|
|
return release_lock;
|
2015-06-18 19:47:21 +03:00
|
|
|
}
|
|
|
|
|
2021-12-15 21:24:21 +03:00
|
|
|
/**
|
|
|
|
* flatview_access_allowed
|
|
|
|
* @mr: #MemoryRegion to be accessed
|
|
|
|
* @attrs: memory transaction attributes
|
|
|
|
* @addr: address within that memory region
|
|
|
|
* @len: the number of bytes to access
|
|
|
|
*
|
|
|
|
* Check if a memory transaction is allowed.
|
|
|
|
*
|
|
|
|
* Returns: true if transaction is allowed, false if denied.
|
|
|
|
*/
|
|
|
|
static bool flatview_access_allowed(MemoryRegion *mr, MemTxAttrs attrs,
|
|
|
|
hwaddr addr, hwaddr len)
|
|
|
|
{
|
|
|
|
if (likely(!attrs.memory)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (memory_region_is_ram(mr)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
qemu_log_mask(LOG_GUEST_ERROR,
|
|
|
|
"Invalid access to non-RAM device at "
|
|
|
|
"addr 0x%" HWADDR_PRIX ", size %" HWADDR_PRIu ", "
|
|
|
|
"region '%s'\n", addr, len, memory_region_name(mr));
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2024-03-07 18:37:09 +03:00
|
|
|
static MemTxResult flatview_write_continue_step(MemTxAttrs attrs,
|
|
|
|
const uint8_t *buf,
|
|
|
|
hwaddr len, hwaddr mr_addr,
|
|
|
|
hwaddr *l, MemoryRegion *mr)
|
|
|
|
{
|
|
|
|
if (!flatview_access_allowed(mr, attrs, mr_addr, *l)) {
|
|
|
|
return MEMTX_ACCESS_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!memory_access_is_direct(mr, true)) {
|
|
|
|
uint64_t val;
|
|
|
|
MemTxResult result;
|
|
|
|
bool release_lock = prepare_mmio_access(mr);
|
|
|
|
|
|
|
|
*l = memory_access_size(mr, *l, mr_addr);
|
|
|
|
/*
|
|
|
|
* XXX: could force current_cpu to NULL to avoid
|
|
|
|
* potential bugs
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Assure Coverity (and ourselves) that we are not going to OVERRUN
|
|
|
|
* the buffer by following ldn_he_p().
|
|
|
|
*/
|
|
|
|
#ifdef QEMU_STATIC_ANALYSIS
|
|
|
|
assert((*l == 1 && len >= 1) ||
|
|
|
|
(*l == 2 && len >= 2) ||
|
|
|
|
(*l == 4 && len >= 4) ||
|
|
|
|
(*l == 8 && len >= 8));
|
|
|
|
#endif
|
|
|
|
val = ldn_he_p(buf, *l);
|
|
|
|
result = memory_region_dispatch_write(mr, mr_addr, val,
|
|
|
|
size_memop(*l), attrs);
|
|
|
|
if (release_lock) {
|
|
|
|
bql_unlock();
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
} else {
|
|
|
|
/* RAM case */
|
|
|
|
uint8_t *ram_ptr = qemu_ram_ptr_length(mr->ram_block, mr_addr, l,
|
2024-04-30 19:49:35 +03:00
|
|
|
false, true);
|
2024-03-07 18:37:09 +03:00
|
|
|
|
|
|
|
memmove(ram_ptr, buf, *l);
|
|
|
|
invalidate_and_set_dirty(mr, mr_addr, *l);
|
|
|
|
|
|
|
|
return MEMTX_OK;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-12-09 12:18:57 +03:00
|
|
|
/* Called within RCU critical section. */
|
2017-09-21 11:50:58 +03:00
|
|
|
static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr,
|
|
|
|
MemTxAttrs attrs,
|
2020-02-19 21:52:44 +03:00
|
|
|
const void *ptr,
|
2024-03-07 18:37:07 +03:00
|
|
|
hwaddr len, hwaddr mr_addr,
|
2017-09-21 11:50:58 +03:00
|
|
|
hwaddr l, MemoryRegion *mr)
|
2004-01-24 18:23:36 +03:00
|
|
|
{
|
2015-04-26 18:49:23 +03:00
|
|
|
MemTxResult result = MEMTX_OK;
|
2020-02-19 21:52:44 +03:00
|
|
|
const uint8_t *buf = ptr;
|
2007-09-17 12:09:54 +04:00
|
|
|
|
2015-12-09 12:18:57 +03:00
|
|
|
for (;;) {
|
2024-03-07 18:37:09 +03:00
|
|
|
result |= flatview_write_continue_step(attrs, buf, len, mr_addr, &l,
|
|
|
|
mr);
|
2015-06-18 19:47:22 +03:00
|
|
|
|
2004-01-24 18:23:36 +03:00
|
|
|
len -= l;
|
|
|
|
buf += l;
|
|
|
|
addr += l;
|
2015-12-09 12:18:57 +03:00
|
|
|
|
|
|
|
if (!len) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
l = len;
|
2024-03-07 18:37:07 +03:00
|
|
|
mr = flatview_translate(fv, addr, &mr_addr, &l, true, attrs);
|
2004-01-24 18:23:36 +03:00
|
|
|
}
|
2013-05-21 11:56:55 +04:00
|
|
|
|
2015-04-26 18:49:23 +03:00
|
|
|
return result;
|
2004-01-24 18:23:36 +03:00
|
|
|
}
|
2005-01-29 01:37:22 +03:00
|
|
|
|
2018-03-05 11:23:56 +03:00
|
|
|
/* Called from RCU critical section. */
|
2017-09-21 11:50:58 +03:00
|
|
|
static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs,
|
2020-02-19 21:52:44 +03:00
|
|
|
const void *buf, hwaddr len)
|
2012-10-03 18:22:53 +04:00
|
|
|
{
|
2015-12-09 12:06:31 +03:00
|
|
|
hwaddr l;
|
2024-03-07 18:37:07 +03:00
|
|
|
hwaddr mr_addr;
|
2015-12-09 12:06:31 +03:00
|
|
|
MemoryRegion *mr;
|
|
|
|
|
2018-03-05 11:23:56 +03:00
|
|
|
l = len;
|
2024-03-07 18:37:07 +03:00
|
|
|
mr = flatview_translate(fv, addr, &mr_addr, &l, true, attrs);
|
2021-12-15 21:24:21 +03:00
|
|
|
if (!flatview_access_allowed(mr, attrs, addr, len)) {
|
|
|
|
return MEMTX_ACCESS_ERROR;
|
|
|
|
}
|
2021-12-15 21:24:20 +03:00
|
|
|
return flatview_write_continue(fv, addr, attrs, buf, len,
|
2024-03-07 18:37:07 +03:00
|
|
|
mr_addr, l, mr);
|
2015-12-09 12:18:57 +03:00
|
|
|
}
|
|
|
|
|
2024-03-07 18:37:09 +03:00
|
|
|
static MemTxResult flatview_read_continue_step(MemTxAttrs attrs, uint8_t *buf,
|
|
|
|
hwaddr len, hwaddr mr_addr,
|
|
|
|
hwaddr *l,
|
|
|
|
MemoryRegion *mr)
|
|
|
|
{
|
|
|
|
if (!flatview_access_allowed(mr, attrs, mr_addr, *l)) {
|
|
|
|
return MEMTX_ACCESS_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!memory_access_is_direct(mr, false)) {
|
|
|
|
/* I/O case */
|
|
|
|
uint64_t val;
|
|
|
|
MemTxResult result;
|
|
|
|
bool release_lock = prepare_mmio_access(mr);
|
|
|
|
|
|
|
|
*l = memory_access_size(mr, *l, mr_addr);
|
|
|
|
result = memory_region_dispatch_read(mr, mr_addr, &val, size_memop(*l),
|
|
|
|
attrs);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Assure Coverity (and ourselves) that we are not going to OVERRUN
|
|
|
|
* the buffer by following stn_he_p().
|
|
|
|
*/
|
|
|
|
#ifdef QEMU_STATIC_ANALYSIS
|
|
|
|
assert((*l == 1 && len >= 1) ||
|
|
|
|
(*l == 2 && len >= 2) ||
|
|
|
|
(*l == 4 && len >= 4) ||
|
|
|
|
(*l == 8 && len >= 8));
|
|
|
|
#endif
|
|
|
|
stn_he_p(buf, *l, val);
|
|
|
|
|
|
|
|
if (release_lock) {
|
|
|
|
bql_unlock();
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
} else {
|
|
|
|
/* RAM case */
|
|
|
|
uint8_t *ram_ptr = qemu_ram_ptr_length(mr->ram_block, mr_addr, l,
|
2024-04-30 19:49:35 +03:00
|
|
|
false, false);
|
2024-03-07 18:37:09 +03:00
|
|
|
|
|
|
|
memcpy(buf, ram_ptr, *l);
|
|
|
|
|
|
|
|
return MEMTX_OK;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-12-09 12:18:57 +03:00
|
|
|
/* Called within RCU critical section. */
|
2017-09-21 11:50:58 +03:00
|
|
|
MemTxResult flatview_read_continue(FlatView *fv, hwaddr addr,
|
2020-02-19 21:52:44 +03:00
|
|
|
MemTxAttrs attrs, void *ptr,
|
2024-03-07 18:37:07 +03:00
|
|
|
hwaddr len, hwaddr mr_addr, hwaddr l,
|
2017-09-21 11:50:58 +03:00
|
|
|
MemoryRegion *mr)
|
2015-12-09 12:18:57 +03:00
|
|
|
{
|
|
|
|
MemTxResult result = MEMTX_OK;
|
2020-02-19 21:52:44 +03:00
|
|
|
uint8_t *buf = ptr;
|
2015-12-09 12:06:31 +03:00
|
|
|
|
2021-03-15 17:05:12 +03:00
|
|
|
fuzz_dma_read_cb(addr, len, mr);
|
2015-12-09 12:18:57 +03:00
|
|
|
for (;;) {
|
2024-03-07 18:37:09 +03:00
|
|
|
result |= flatview_read_continue_step(attrs, buf, len, mr_addr, &l, mr);
|
2015-12-09 12:06:31 +03:00
|
|
|
|
|
|
|
len -= l;
|
|
|
|
buf += l;
|
|
|
|
addr += l;
|
2015-12-09 12:18:57 +03:00
|
|
|
|
|
|
|
if (!len) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
l = len;
|
2024-03-07 18:37:07 +03:00
|
|
|
mr = flatview_translate(fv, addr, &mr_addr, &l, false, attrs);
|
2015-12-09 12:18:57 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2018-03-05 02:19:49 +03:00
|
|
|
/* Called from RCU critical section. */
|
|
|
|
static MemTxResult flatview_read(FlatView *fv, hwaddr addr,
|
2020-02-19 21:52:44 +03:00
|
|
|
MemTxAttrs attrs, void *buf, hwaddr len)
|
2015-12-09 12:18:57 +03:00
|
|
|
{
|
|
|
|
hwaddr l;
|
2024-03-07 18:37:07 +03:00
|
|
|
hwaddr mr_addr;
|
2015-12-09 12:18:57 +03:00
|
|
|
MemoryRegion *mr;
|
2015-12-09 12:06:31 +03:00
|
|
|
|
2018-03-05 02:19:49 +03:00
|
|
|
l = len;
|
2024-03-07 18:37:07 +03:00
|
|
|
mr = flatview_translate(fv, addr, &mr_addr, &l, false, attrs);
|
2021-12-15 21:24:21 +03:00
|
|
|
if (!flatview_access_allowed(mr, attrs, addr, len)) {
|
|
|
|
return MEMTX_ACCESS_ERROR;
|
|
|
|
}
|
2018-03-05 02:19:49 +03:00
|
|
|
return flatview_read_continue(fv, addr, attrs, buf, len,
|
2024-03-07 18:37:07 +03:00
|
|
|
mr_addr, l, mr);
|
2012-10-03 18:22:53 +04:00
|
|
|
}
|
|
|
|
|
2018-03-05 02:19:49 +03:00
|
|
|
MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
|
2020-02-19 21:54:35 +03:00
|
|
|
MemTxAttrs attrs, void *buf, hwaddr len)
|
2018-03-05 02:19:49 +03:00
|
|
|
{
|
|
|
|
MemTxResult result = MEMTX_OK;
|
|
|
|
FlatView *fv;
|
|
|
|
|
|
|
|
if (len > 0) {
|
2019-10-07 17:36:41 +03:00
|
|
|
RCU_READ_LOCK_GUARD();
|
2018-03-05 02:19:49 +03:00
|
|
|
fv = address_space_to_flatview(as);
|
|
|
|
result = flatview_read(fv, addr, attrs, buf, len);
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2018-03-05 11:23:56 +03:00
|
|
|
MemTxResult address_space_write(AddressSpace *as, hwaddr addr,
|
|
|
|
MemTxAttrs attrs,
|
2020-02-19 21:54:35 +03:00
|
|
|
const void *buf, hwaddr len)
|
2018-03-05 11:23:56 +03:00
|
|
|
{
|
|
|
|
MemTxResult result = MEMTX_OK;
|
|
|
|
FlatView *fv;
|
|
|
|
|
|
|
|
if (len > 0) {
|
2019-10-07 17:36:41 +03:00
|
|
|
RCU_READ_LOCK_GUARD();
|
2018-03-05 11:23:56 +03:00
|
|
|
fv = address_space_to_flatview(as);
|
|
|
|
result = flatview_write(fv, addr, attrs, buf, len);
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2018-03-05 11:29:04 +03:00
|
|
|
MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
|
2020-02-19 21:54:35 +03:00
|
|
|
void *buf, hwaddr len, bool is_write)
|
2018-03-05 11:29:04 +03:00
|
|
|
{
|
|
|
|
if (is_write) {
|
|
|
|
return address_space_write(as, addr, attrs, buf, len);
|
|
|
|
} else {
|
|
|
|
return address_space_read_full(as, addr, attrs, buf, len);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-15 23:37:23 +03:00
|
|
|
MemTxResult address_space_set(AddressSpace *as, hwaddr addr,
|
|
|
|
uint8_t c, hwaddr len, MemTxAttrs attrs)
|
|
|
|
{
|
|
|
|
#define FILLBUF_SIZE 512
|
|
|
|
uint8_t fillbuf[FILLBUF_SIZE];
|
|
|
|
int l;
|
|
|
|
MemTxResult error = MEMTX_OK;
|
|
|
|
|
|
|
|
memset(fillbuf, c, FILLBUF_SIZE);
|
|
|
|
while (len > 0) {
|
|
|
|
l = len < FILLBUF_SIZE ? len : FILLBUF_SIZE;
|
|
|
|
error |= address_space_write(as, addr, attrs, fillbuf, l);
|
|
|
|
len -= l;
|
|
|
|
addr += l;
|
|
|
|
}
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2020-02-19 22:02:11 +03:00
|
|
|
void cpu_physical_memory_rw(hwaddr addr, void *buf,
|
2020-02-19 22:32:30 +03:00
|
|
|
hwaddr len, bool is_write)
|
2012-10-03 18:22:53 +04:00
|
|
|
{
|
2015-04-26 18:49:24 +03:00
|
|
|
address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
|
|
|
|
buf, len, is_write);
|
2012-10-03 18:22:53 +04:00
|
|
|
}
|
|
|
|
|
2013-12-11 17:17:44 +04:00
|
|
|
enum write_rom_type {
|
|
|
|
WRITE_DATA,
|
|
|
|
FLUSH_CACHE,
|
|
|
|
};
|
|
|
|
|
2018-12-14 16:30:48 +03:00
|
|
|
static inline MemTxResult address_space_write_rom_internal(AddressSpace *as,
|
|
|
|
hwaddr addr,
|
|
|
|
MemTxAttrs attrs,
|
2020-02-19 21:54:35 +03:00
|
|
|
const void *ptr,
|
2019-01-17 15:49:01 +03:00
|
|
|
hwaddr len,
|
2018-12-14 16:30:48 +03:00
|
|
|
enum write_rom_type type)
|
2006-04-23 21:14:48 +04:00
|
|
|
{
|
2013-05-24 14:59:37 +04:00
|
|
|
hwaddr l;
|
2020-02-19 20:01:32 +03:00
|
|
|
uint8_t *ram_ptr;
|
2013-05-24 14:59:37 +04:00
|
|
|
hwaddr addr1;
|
2013-05-29 14:42:00 +04:00
|
|
|
MemoryRegion *mr;
|
2020-02-19 21:54:35 +03:00
|
|
|
const uint8_t *buf = ptr;
|
2007-09-17 12:09:54 +04:00
|
|
|
|
2019-10-07 17:36:41 +03:00
|
|
|
RCU_READ_LOCK_GUARD();
|
2006-04-23 21:14:48 +04:00
|
|
|
while (len > 0) {
|
2013-05-24 14:59:37 +04:00
|
|
|
l = len;
|
2018-12-14 16:30:48 +03:00
|
|
|
mr = address_space_translate(as, addr, &addr1, &l, true, attrs);
|
2007-09-17 12:09:54 +04:00
|
|
|
|
2013-05-29 14:42:00 +04:00
|
|
|
if (!(memory_region_is_ram(mr) ||
|
|
|
|
memory_region_is_romd(mr))) {
|
exec: skip MMIO regions correctly in cpu_physical_memory_write_rom_internal
Loading the BIOS in the mac99 machine is interesting, because there is a
PROM in the middle of the BIOS region (from 16K to 32K). Before memory
region accesses were clamped, when QEMU was asked to load a BIOS from
0xfff00000 to 0xffffffff it would put even those 16K from the BIOS file
into the region. This is weird because those 16K were not actually
visible between 0xfff04000 and 0xfff07fff. However, it worked.
After clamping was added, this also worked. In this case, the
cpu_physical_memory_write_rom_internal function split the write in
three parts: the first 16K were copied, the PROM area (second 16K) were
ignored, then the rest was copied.
Problems then started with commit 965eb2f (exec: do not clamp accesses
to MMIO regions, 2015-06-17). Clamping accesses is not done for MMIO
regions because they can overlap wildly, and MMIO registers can be
expected to perform full-width accesses based only on their address
(with no respect for adjacent registers that could decode to completely
different MemoryRegions). However, this lack of clamping also applied
to the PROM area! cpu_physical_memory_write_rom_internal thus failed
to copy the third range above, i.e. only copied the first 16K of the BIOS.
In effect, address_space_translate is expecting _something else_ to do
the clamping for MMIO regions if the incoming length is large. This
"something else" is memory_access_size in the case of address_space_rw,
so use the same logic in cpu_physical_memory_write_rom_internal.
Reported-by: Alexander Graf <agraf@redhat.com>
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
Tested-by: Laurent Vivier <lvivier@redhat.com>
Fixes: 965eb2f
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2015-07-04 01:24:51 +03:00
|
|
|
l = memory_access_size(mr, l, addr1);
|
2006-04-23 21:14:48 +04:00
|
|
|
} else {
|
|
|
|
/* ROM/RAM case */
|
2020-02-19 20:01:32 +03:00
|
|
|
ram_ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
|
2013-12-11 17:17:44 +04:00
|
|
|
switch (type) {
|
|
|
|
case WRITE_DATA:
|
2020-02-19 20:01:32 +03:00
|
|
|
memcpy(ram_ptr, buf, l);
|
2015-03-23 13:45:53 +03:00
|
|
|
invalidate_and_set_dirty(mr, addr1, l);
|
2013-12-11 17:17:44 +04:00
|
|
|
break;
|
|
|
|
case FLUSH_CACHE:
|
2020-12-12 19:38:21 +03:00
|
|
|
flush_idcache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr, l);
|
2013-12-11 17:17:44 +04:00
|
|
|
break;
|
|
|
|
}
|
2006-04-23 21:14:48 +04:00
|
|
|
}
|
|
|
|
len -= l;
|
|
|
|
buf += l;
|
|
|
|
addr += l;
|
|
|
|
}
|
2018-12-14 16:30:48 +03:00
|
|
|
return MEMTX_OK;
|
2006-04-23 21:14:48 +04:00
|
|
|
}
|
|
|
|
|
2013-12-11 17:17:44 +04:00
|
|
|
/* used for ROM loading : can write in RAM and ROM */
|
2018-12-14 16:30:48 +03:00
|
|
|
MemTxResult address_space_write_rom(AddressSpace *as, hwaddr addr,
|
|
|
|
MemTxAttrs attrs,
|
2020-02-19 21:54:35 +03:00
|
|
|
const void *buf, hwaddr len)
|
2013-12-11 17:17:44 +04:00
|
|
|
{
|
2018-12-14 16:30:48 +03:00
|
|
|
return address_space_write_rom_internal(as, addr, attrs,
|
|
|
|
buf, len, WRITE_DATA);
|
2013-12-11 17:17:44 +04:00
|
|
|
}
|
|
|
|
|
2019-01-17 15:49:01 +03:00
|
|
|
void cpu_flush_icache_range(hwaddr start, hwaddr len)
|
2013-12-11 17:17:44 +04:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* This function should do the same thing as an icache flush that was
|
|
|
|
* triggered from within the guest. For TCG we are always cache coherent,
|
|
|
|
* so there is no need to flush anything. For KVM / Xen we need to flush
|
|
|
|
* the host's instruction cache at least.
|
|
|
|
*/
|
|
|
|
if (tcg_enabled()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-12-14 16:30:48 +03:00
|
|
|
address_space_write_rom_internal(&address_space_memory,
|
|
|
|
start, MEMTXATTRS_UNSPECIFIED,
|
|
|
|
NULL, len, FLUSH_CACHE);
|
2013-12-11 17:17:44 +04:00
|
|
|
}
|
|
|
|
|
2023-09-07 16:04:23 +03:00
|
|
|
static void
|
|
|
|
address_space_unregister_map_client_do(AddressSpaceMapClient *client)
|
2015-03-16 12:03:37 +03:00
|
|
|
{
|
|
|
|
QLIST_REMOVE(client, link);
|
|
|
|
g_free(client);
|
|
|
|
}
|
|
|
|
|
2023-09-07 16:04:23 +03:00
|
|
|
static void address_space_notify_map_clients_locked(AddressSpace *as)
|
2015-03-16 12:03:36 +03:00
|
|
|
{
|
2023-09-07 16:04:23 +03:00
|
|
|
AddressSpaceMapClient *client;
|
2015-03-16 12:03:36 +03:00
|
|
|
|
2023-09-07 16:04:23 +03:00
|
|
|
while (!QLIST_EMPTY(&as->map_client_list)) {
|
|
|
|
client = QLIST_FIRST(&as->map_client_list);
|
2015-03-16 12:03:37 +03:00
|
|
|
qemu_bh_schedule(client->bh);
|
2023-09-07 16:04:23 +03:00
|
|
|
address_space_unregister_map_client_do(client);
|
2015-03-16 12:03:36 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-09-07 16:04:23 +03:00
|
|
|
void address_space_register_map_client(AddressSpace *as, QEMUBH *bh)
|
2009-01-22 19:59:16 +03:00
|
|
|
{
|
2023-09-07 16:04:23 +03:00
|
|
|
AddressSpaceMapClient *client = g_malloc(sizeof(*client));
|
2009-01-22 19:59:16 +03:00
|
|
|
|
2023-09-07 16:04:23 +03:00
|
|
|
QEMU_LOCK_GUARD(&as->map_client_list_lock);
|
2015-03-16 12:03:37 +03:00
|
|
|
client->bh = bh;
|
2023-09-07 16:04:23 +03:00
|
|
|
QLIST_INSERT_HEAD(&as->map_client_list, client, link);
|
2023-03-03 16:36:32 +03:00
|
|
|
/* Write map_client_list before reading in_use. */
|
|
|
|
smp_mb();
|
2023-09-07 16:04:23 +03:00
|
|
|
if (!qatomic_read(&as->bounce.in_use)) {
|
2023-09-07 16:04:23 +03:00
|
|
|
address_space_notify_map_clients_locked(as);
|
2015-03-16 12:03:36 +03:00
|
|
|
}
|
2009-01-22 19:59:16 +03:00
|
|
|
}
|
|
|
|
|
2015-03-16 12:03:35 +03:00
|
|
|
void cpu_exec_init_all(void)
|
2009-01-22 19:59:16 +03:00
|
|
|
{
|
2015-03-16 12:03:35 +03:00
|
|
|
qemu_mutex_init(&ram_list.mutex);
|
2016-10-24 18:26:49 +03:00
|
|
|
/* The data structures we set up here depend on knowing the page size,
|
|
|
|
* so no more changes can be made after this point.
|
|
|
|
* In an ideal world, nothing we did before we had finished the
|
|
|
|
* machine setup would care about the target page size, and we could
|
|
|
|
* do this much later, rather than requiring board models to state
|
|
|
|
* up front what their requirements are.
|
|
|
|
*/
|
|
|
|
finalize_target_page_bits();
|
2015-03-16 12:03:35 +03:00
|
|
|
io_mem_init();
|
2015-11-02 11:23:52 +03:00
|
|
|
memory_map_init();
|
2009-01-22 19:59:16 +03:00
|
|
|
}
|
|
|
|
|
2023-09-07 16:04:23 +03:00
|
|
|
void address_space_unregister_map_client(AddressSpace *as, QEMUBH *bh)
|
2009-01-22 19:59:16 +03:00
|
|
|
{
|
2023-09-07 16:04:23 +03:00
|
|
|
AddressSpaceMapClient *client;
|
2009-01-22 19:59:16 +03:00
|
|
|
|
2023-09-07 16:04:23 +03:00
|
|
|
QEMU_LOCK_GUARD(&as->map_client_list_lock);
|
|
|
|
QLIST_FOREACH(client, &as->map_client_list, link) {
|
2015-03-16 12:03:37 +03:00
|
|
|
if (client->bh == bh) {
|
2023-09-07 16:04:23 +03:00
|
|
|
address_space_unregister_map_client_do(client);
|
2015-03-16 12:03:37 +03:00
|
|
|
break;
|
|
|
|
}
|
2009-01-22 19:59:16 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-09-07 16:04:23 +03:00
|
|
|
static void address_space_notify_map_clients(AddressSpace *as)
|
2009-01-22 19:59:16 +03:00
|
|
|
{
|
2023-09-07 16:04:23 +03:00
|
|
|
QEMU_LOCK_GUARD(&as->map_client_list_lock);
|
2023-09-07 16:04:23 +03:00
|
|
|
address_space_notify_map_clients_locked(as);
|
2009-01-22 19:59:16 +03:00
|
|
|
}
|
|
|
|
|
2019-01-17 15:49:01 +03:00
|
|
|
static bool flatview_access_valid(FlatView *fv, hwaddr addr, hwaddr len,
|
2018-05-31 16:50:52 +03:00
|
|
|
bool is_write, MemTxAttrs attrs)
|
2013-04-11 17:40:59 +04:00
|
|
|
{
|
2013-05-29 14:42:00 +04:00
|
|
|
MemoryRegion *mr;
|
2013-04-11 17:40:59 +04:00
|
|
|
hwaddr l, xlat;
|
|
|
|
|
|
|
|
while (len > 0) {
|
|
|
|
l = len;
|
2018-05-31 16:50:52 +03:00
|
|
|
mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs);
|
2013-05-29 14:42:00 +04:00
|
|
|
if (!memory_access_is_direct(mr, is_write)) {
|
|
|
|
l = memory_access_size(mr, l, addr);
|
2018-05-31 16:50:52 +03:00
|
|
|
if (!memory_region_access_valid(mr, xlat, l, is_write, attrs)) {
|
2013-04-11 17:40:59 +04:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
len -= l;
|
|
|
|
addr += l;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-09-21 11:50:58 +03:00
|
|
|
bool address_space_access_valid(AddressSpace *as, hwaddr addr,
|
2019-01-17 15:49:01 +03:00
|
|
|
hwaddr len, bool is_write,
|
2018-05-31 16:50:52 +03:00
|
|
|
MemTxAttrs attrs)
|
2017-09-21 11:50:58 +03:00
|
|
|
{
|
2018-03-05 02:23:26 +03:00
|
|
|
FlatView *fv;
|
|
|
|
|
2019-10-07 17:36:41 +03:00
|
|
|
RCU_READ_LOCK_GUARD();
|
2018-03-05 02:23:26 +03:00
|
|
|
fv = address_space_to_flatview(as);
|
2021-12-15 21:24:20 +03:00
|
|
|
return flatview_access_valid(fv, addr, len, is_write, attrs);
|
2017-09-21 11:50:58 +03:00
|
|
|
}
|
|
|
|
|
2016-11-22 14:04:31 +03:00
|
|
|
static hwaddr
|
2017-09-21 11:50:58 +03:00
|
|
|
flatview_extend_translation(FlatView *fv, hwaddr addr,
|
2018-05-31 16:50:52 +03:00
|
|
|
hwaddr target_len,
|
|
|
|
MemoryRegion *mr, hwaddr base, hwaddr len,
|
|
|
|
bool is_write, MemTxAttrs attrs)
|
2016-11-22 14:04:31 +03:00
|
|
|
{
|
|
|
|
hwaddr done = 0;
|
|
|
|
hwaddr xlat;
|
|
|
|
MemoryRegion *this_mr;
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
target_len -= len;
|
|
|
|
addr += len;
|
|
|
|
done += len;
|
|
|
|
if (target_len == 0) {
|
|
|
|
return done;
|
|
|
|
}
|
|
|
|
|
|
|
|
len = target_len;
|
2017-09-21 11:50:58 +03:00
|
|
|
this_mr = flatview_translate(fv, addr, &xlat,
|
2018-05-31 16:50:52 +03:00
|
|
|
&len, is_write, attrs);
|
2016-11-22 14:04:31 +03:00
|
|
|
if (this_mr != mr || xlat != base + done) {
|
|
|
|
return done;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-01-22 19:59:11 +03:00
|
|
|
/* Map a physical memory region into a host virtual address.
|
|
|
|
* May map a subset of the requested range, given by and returned in *plen.
|
|
|
|
* May return NULL if resources needed to perform the mapping are exhausted.
|
|
|
|
* Use only for reads OR writes - not for read-modify-write operations.
|
2023-09-07 16:04:23 +03:00
|
|
|
* Use address_space_register_map_client() to know when retrying the map
|
|
|
|
* operation is likely to succeed.
|
2009-01-22 19:59:11 +03:00
|
|
|
*/
|
2012-10-03 18:22:53 +04:00
|
|
|
void *address_space_map(AddressSpace *as,
|
2012-10-23 14:30:10 +04:00
|
|
|
hwaddr addr,
|
|
|
|
hwaddr *plen,
|
2018-05-31 16:50:52 +03:00
|
|
|
bool is_write,
|
|
|
|
MemTxAttrs attrs)
|
2009-01-22 19:59:11 +03:00
|
|
|
{
|
2012-10-23 14:30:10 +04:00
|
|
|
hwaddr len = *plen;
|
2016-11-22 14:04:31 +03:00
|
|
|
hwaddr l, xlat;
|
|
|
|
MemoryRegion *mr;
|
2018-03-05 02:23:26 +03:00
|
|
|
FlatView *fv;
|
2009-01-22 19:59:11 +03:00
|
|
|
|
2013-06-28 19:29:27 +04:00
|
|
|
if (len == 0) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2011-05-19 21:35:45 +04:00
|
|
|
|
2013-06-28 19:29:27 +04:00
|
|
|
l = len;
|
2019-10-07 17:36:41 +03:00
|
|
|
RCU_READ_LOCK_GUARD();
|
2018-03-05 02:23:26 +03:00
|
|
|
fv = address_space_to_flatview(as);
|
2018-05-31 16:50:52 +03:00
|
|
|
mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs);
|
2015-03-18 16:21:43 +03:00
|
|
|
|
2013-06-28 19:29:27 +04:00
|
|
|
if (!memory_access_is_direct(mr, is_write)) {
|
2023-09-07 16:04:23 +03:00
|
|
|
if (qatomic_xchg(&as->bounce.in_use, true)) {
|
2020-05-26 14:17:43 +03:00
|
|
|
*plen = 0;
|
2013-06-28 19:29:27 +04:00
|
|
|
return NULL;
|
2009-01-22 19:59:11 +03:00
|
|
|
}
|
2013-07-22 16:30:23 +04:00
|
|
|
/* Avoid unbounded allocations */
|
|
|
|
l = MIN(l, TARGET_PAGE_SIZE);
|
2023-09-07 16:04:23 +03:00
|
|
|
as->bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
|
|
|
|
as->bounce.addr = addr;
|
|
|
|
as->bounce.len = l;
|
2013-06-28 19:33:29 +04:00
|
|
|
|
|
|
|
memory_region_ref(mr);
|
2023-09-07 16:04:23 +03:00
|
|
|
as->bounce.mr = mr;
|
2013-06-28 19:29:27 +04:00
|
|
|
if (!is_write) {
|
2017-09-21 11:50:58 +03:00
|
|
|
flatview_read(fv, addr, MEMTXATTRS_UNSPECIFIED,
|
2023-09-07 16:04:23 +03:00
|
|
|
as->bounce.buffer, l);
|
2011-06-27 21:26:06 +04:00
|
|
|
}
|
2009-01-22 19:59:11 +03:00
|
|
|
|
2013-06-28 19:29:27 +04:00
|
|
|
*plen = l;
|
2023-09-07 16:04:23 +03:00
|
|
|
return as->bounce.buffer;
|
2013-06-28 19:29:27 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-06-28 19:33:29 +04:00
|
|
|
memory_region_ref(mr);
|
2017-09-21 11:50:58 +03:00
|
|
|
*plen = flatview_extend_translation(fv, addr, len, mr, xlat,
|
2018-05-31 16:50:52 +03:00
|
|
|
l, is_write, attrs);
|
2021-01-20 09:02:55 +03:00
|
|
|
fuzz_dma_read_cb(addr, *plen, mr);
|
2024-04-30 19:49:35 +03:00
|
|
|
return qemu_ram_ptr_length(mr->ram_block, xlat, plen, true, is_write);
|
2009-01-22 19:59:11 +03:00
|
|
|
}
|
|
|
|
|
2012-10-03 18:22:53 +04:00
|
|
|
/* Unmaps a memory region previously mapped by address_space_map().
|
2020-02-19 22:12:01 +03:00
|
|
|
* Will also mark the memory as dirty if is_write is true. access_len gives
|
2009-01-22 19:59:11 +03:00
|
|
|
* the amount of memory that was actually read or written by the caller.
|
|
|
|
*/
|
2012-10-23 14:30:10 +04:00
|
|
|
void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
|
2020-02-19 22:12:01 +03:00
|
|
|
bool is_write, hwaddr access_len)
|
2009-01-22 19:59:11 +03:00
|
|
|
{
|
2023-09-07 16:04:23 +03:00
|
|
|
if (buffer != as->bounce.buffer) {
|
2013-06-28 19:33:29 +04:00
|
|
|
MemoryRegion *mr;
|
|
|
|
ram_addr_t addr1;
|
|
|
|
|
2016-03-25 14:55:08 +03:00
|
|
|
mr = memory_region_from_host(buffer, &addr1);
|
2013-06-28 19:33:29 +04:00
|
|
|
assert(mr != NULL);
|
2009-01-22 19:59:11 +03:00
|
|
|
if (is_write) {
|
2015-03-23 13:45:53 +03:00
|
|
|
invalidate_and_set_dirty(mr, addr1, access_len);
|
2009-01-22 19:59:11 +03:00
|
|
|
}
|
2011-06-22 00:59:09 +04:00
|
|
|
if (xen_enabled()) {
|
2011-06-22 00:59:08 +04:00
|
|
|
xen_invalidate_map_cache_entry(buffer);
|
2010-09-16 16:57:49 +04:00
|
|
|
}
|
2013-06-28 19:33:29 +04:00
|
|
|
memory_region_unref(mr);
|
2009-01-22 19:59:11 +03:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (is_write) {
|
2023-09-07 16:04:23 +03:00
|
|
|
address_space_write(as, as->bounce.addr, MEMTXATTRS_UNSPECIFIED,
|
|
|
|
as->bounce.buffer, access_len);
|
2009-01-22 19:59:11 +03:00
|
|
|
}
|
2023-09-07 16:04:23 +03:00
|
|
|
qemu_vfree(as->bounce.buffer);
|
|
|
|
as->bounce.buffer = NULL;
|
|
|
|
memory_region_unref(as->bounce.mr);
|
2023-03-03 16:36:32 +03:00
|
|
|
/* Clear in_use before reading map_client_list. */
|
2023-09-07 16:04:23 +03:00
|
|
|
qatomic_set_mb(&as->bounce.in_use, false);
|
2023-09-07 16:04:23 +03:00
|
|
|
address_space_notify_map_clients(as);
|
2009-01-22 19:59:11 +03:00
|
|
|
}
|
2006-04-23 21:14:48 +04:00
|
|
|
|
2012-10-23 14:30:10 +04:00
|
|
|
void *cpu_physical_memory_map(hwaddr addr,
|
|
|
|
hwaddr *plen,
|
2020-02-19 22:32:30 +03:00
|
|
|
bool is_write)
|
2012-10-03 18:22:53 +04:00
|
|
|
{
|
2018-05-31 16:50:52 +03:00
|
|
|
return address_space_map(&address_space_memory, addr, plen, is_write,
|
|
|
|
MEMTXATTRS_UNSPECIFIED);
|
2012-10-03 18:22:53 +04:00
|
|
|
}
|
|
|
|
|
2012-10-23 14:30:10 +04:00
|
|
|
void cpu_physical_memory_unmap(void *buffer, hwaddr len,
|
2020-02-19 22:32:30 +03:00
|
|
|
bool is_write, hwaddr access_len)
|
2012-10-03 18:22:53 +04:00
|
|
|
{
|
|
|
|
return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
|
|
|
|
}
|
|
|
|
|
2016-11-22 13:34:02 +03:00
|
|
|
#define ARG1_DECL AddressSpace *as
|
|
|
|
#define ARG1 as
|
|
|
|
#define SUFFIX
|
|
|
|
#define TRANSLATE(...) address_space_translate(as, __VA_ARGS__)
|
|
|
|
#define RCU_READ_LOCK(...) rcu_read_lock()
|
|
|
|
#define RCU_READ_UNLOCK(...) rcu_read_unlock()
|
2020-02-04 14:41:01 +03:00
|
|
|
#include "memory_ldst.c.inc"
|
2011-07-06 11:09:23 +04:00
|
|
|
|
2016-11-22 14:04:52 +03:00
|
|
|
int64_t address_space_cache_init(MemoryRegionCache *cache,
|
|
|
|
AddressSpace *as,
|
|
|
|
hwaddr addr,
|
|
|
|
hwaddr len,
|
|
|
|
bool is_write)
|
|
|
|
{
|
2018-03-18 20:26:36 +03:00
|
|
|
AddressSpaceDispatch *d;
|
|
|
|
hwaddr l;
|
|
|
|
MemoryRegion *mr;
|
memory: clamp cached translation in case it points to an MMIO region
In using the address_space_translate_internal API, address_space_cache_init
forgot one piece of advice that can be found in the code for
address_space_translate_internal:
/* MMIO registers can be expected to perform full-width accesses based only
* on their address, without considering adjacent registers that could
* decode to completely different MemoryRegions. When such registers
* exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
* regions overlap wildly. For this reason we cannot clamp the accesses
* here.
*
* If the length is small (as is the case for address_space_ldl/stl),
* everything works fine. If the incoming length is large, however,
* the caller really has to do the clamping through memory_access_size.
*/
address_space_cache_init is exactly one such case where "the incoming length
is large", therefore we need to clamp the resulting length---not to
memory_access_size though, since we are not doing an access yet, but to
the size of the resulting section. This ensures that subsequent accesses
to the cached MemoryRegionSection will be in range.
With this patch, the enclosed testcase notices that the used ring does
not fit into the MSI-X table and prints a "qemu-system-x86_64: Cannot map used"
error.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2020-12-01 17:29:56 +03:00
|
|
|
Int128 diff;
|
2018-03-18 20:26:36 +03:00
|
|
|
|
|
|
|
assert(len > 0);
|
|
|
|
|
|
|
|
l = len;
|
|
|
|
cache->fv = address_space_get_flatview(as);
|
|
|
|
d = flatview_to_dispatch(cache->fv);
|
|
|
|
cache->mrs = *address_space_translate_internal(d, addr, &cache->xlat, &l, true);
|
|
|
|
|
memory: clamp cached translation in case it points to an MMIO region
In using the address_space_translate_internal API, address_space_cache_init
forgot one piece of advice that can be found in the code for
address_space_translate_internal:
/* MMIO registers can be expected to perform full-width accesses based only
* on their address, without considering adjacent registers that could
* decode to completely different MemoryRegions. When such registers
* exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
* regions overlap wildly. For this reason we cannot clamp the accesses
* here.
*
* If the length is small (as is the case for address_space_ldl/stl),
* everything works fine. If the incoming length is large, however,
* the caller really has to do the clamping through memory_access_size.
*/
address_space_cache_init is exactly one such case where "the incoming length
is large", therefore we need to clamp the resulting length---not to
memory_access_size though, since we are not doing an access yet, but to
the size of the resulting section. This ensures that subsequent accesses
to the cached MemoryRegionSection will be in range.
With this patch, the enclosed testcase notices that the used ring does
not fit into the MSI-X table and prints a "qemu-system-x86_64: Cannot map used"
error.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2020-12-01 17:29:56 +03:00
|
|
|
/*
|
|
|
|
* cache->xlat is now relative to cache->mrs.mr, not to the section itself.
|
|
|
|
* Take that into account to compute how many bytes are there between
|
|
|
|
* cache->xlat and the end of the section.
|
|
|
|
*/
|
|
|
|
diff = int128_sub(cache->mrs.size,
|
|
|
|
int128_make64(cache->xlat - cache->mrs.offset_within_region));
|
|
|
|
l = int128_get64(int128_min(diff, int128_make64(l)));
|
|
|
|
|
2018-03-18 20:26:36 +03:00
|
|
|
mr = cache->mrs.mr;
|
|
|
|
memory_region_ref(mr);
|
|
|
|
if (memory_access_is_direct(mr, is_write)) {
|
2018-05-31 16:50:52 +03:00
|
|
|
/* We don't care about the memory attributes here as we're only
|
|
|
|
* doing this if we found actual RAM, which behaves the same
|
|
|
|
* regardless of attributes; so UNSPECIFIED is fine.
|
|
|
|
*/
|
2018-03-18 20:26:36 +03:00
|
|
|
l = flatview_extend_translation(cache->fv, addr, len, mr,
|
2018-05-31 16:50:52 +03:00
|
|
|
cache->xlat, l, is_write,
|
|
|
|
MEMTXATTRS_UNSPECIFIED);
|
2024-04-30 19:49:35 +03:00
|
|
|
cache->ptr = qemu_ram_ptr_length(mr->ram_block, cache->xlat, &l, true,
|
|
|
|
is_write);
|
2018-03-18 20:26:36 +03:00
|
|
|
} else {
|
|
|
|
cache->ptr = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
cache->len = l;
|
|
|
|
cache->is_write = is_write;
|
|
|
|
return l;
|
2016-11-22 14:04:52 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
void address_space_cache_invalidate(MemoryRegionCache *cache,
|
|
|
|
hwaddr addr,
|
|
|
|
hwaddr access_len)
|
|
|
|
{
|
2018-03-18 20:26:36 +03:00
|
|
|
assert(cache->is_write);
|
|
|
|
if (likely(cache->ptr)) {
|
|
|
|
invalidate_and_set_dirty(cache->mrs.mr, addr + cache->xlat, access_len);
|
|
|
|
}
|
2016-11-22 14:04:52 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
void address_space_cache_destroy(MemoryRegionCache *cache)
|
|
|
|
{
|
2018-03-18 20:26:36 +03:00
|
|
|
if (!cache->mrs.mr) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (xen_enabled()) {
|
|
|
|
xen_invalidate_map_cache_entry(cache->ptr);
|
|
|
|
}
|
|
|
|
memory_region_unref(cache->mrs.mr);
|
|
|
|
flatview_unref(cache->fv);
|
|
|
|
cache->mrs.mr = NULL;
|
|
|
|
cache->fv = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Called from RCU critical section. This function has the same
|
|
|
|
* semantics as address_space_translate, but it only works on a
|
|
|
|
* predefined range of a MemoryRegion that was mapped with
|
|
|
|
* address_space_cache_init.
|
|
|
|
*/
|
|
|
|
static inline MemoryRegion *address_space_translate_cached(
|
|
|
|
MemoryRegionCache *cache, hwaddr addr, hwaddr *xlat,
|
2018-05-31 16:50:52 +03:00
|
|
|
hwaddr *plen, bool is_write, MemTxAttrs attrs)
|
2018-03-18 20:26:36 +03:00
|
|
|
{
|
|
|
|
MemoryRegionSection section;
|
|
|
|
MemoryRegion *mr;
|
|
|
|
IOMMUMemoryRegion *iommu_mr;
|
|
|
|
AddressSpace *target_as;
|
|
|
|
|
|
|
|
assert(!cache->ptr);
|
|
|
|
*xlat = addr + cache->xlat;
|
|
|
|
|
|
|
|
mr = cache->mrs.mr;
|
|
|
|
iommu_mr = memory_region_get_iommu(mr);
|
|
|
|
if (!iommu_mr) {
|
|
|
|
/* MMIO region. */
|
|
|
|
return mr;
|
|
|
|
}
|
|
|
|
|
|
|
|
section = address_space_translate_iommu(iommu_mr, xlat, plen,
|
|
|
|
NULL, is_write, true,
|
2018-05-31 16:50:53 +03:00
|
|
|
&target_as, attrs);
|
2018-03-18 20:26:36 +03:00
|
|
|
return section.mr;
|
|
|
|
}
|
|
|
|
|
2024-03-07 18:37:10 +03:00
|
|
|
/* Called within RCU critical section. */
|
|
|
|
static MemTxResult address_space_write_continue_cached(MemTxAttrs attrs,
|
|
|
|
const void *ptr,
|
|
|
|
hwaddr len,
|
|
|
|
hwaddr mr_addr,
|
|
|
|
hwaddr l,
|
|
|
|
MemoryRegion *mr)
|
|
|
|
{
|
|
|
|
MemTxResult result = MEMTX_OK;
|
|
|
|
const uint8_t *buf = ptr;
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
result |= flatview_write_continue_step(attrs, buf, len, mr_addr, &l,
|
|
|
|
mr);
|
|
|
|
|
|
|
|
len -= l;
|
|
|
|
buf += l;
|
|
|
|
mr_addr += l;
|
|
|
|
|
|
|
|
if (!len) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
l = len;
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Called within RCU critical section. */
|
|
|
|
static MemTxResult address_space_read_continue_cached(MemTxAttrs attrs,
|
|
|
|
void *ptr, hwaddr len,
|
|
|
|
hwaddr mr_addr, hwaddr l,
|
|
|
|
MemoryRegion *mr)
|
|
|
|
{
|
|
|
|
MemTxResult result = MEMTX_OK;
|
|
|
|
uint8_t *buf = ptr;
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
result |= flatview_read_continue_step(attrs, buf, len, mr_addr, &l, mr);
|
|
|
|
len -= l;
|
|
|
|
buf += l;
|
|
|
|
mr_addr += l;
|
|
|
|
|
|
|
|
if (!len) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
l = len;
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2018-03-18 20:26:36 +03:00
|
|
|
/* Called from RCU critical section. address_space_read_cached uses this
|
|
|
|
* out of line function when the target is an MMIO or IOMMU region.
|
|
|
|
*/
|
2020-05-18 18:53:02 +03:00
|
|
|
MemTxResult
|
2018-03-18 20:26:36 +03:00
|
|
|
address_space_read_cached_slow(MemoryRegionCache *cache, hwaddr addr,
|
2019-01-17 15:49:01 +03:00
|
|
|
void *buf, hwaddr len)
|
2018-03-18 20:26:36 +03:00
|
|
|
{
|
2024-03-07 18:37:07 +03:00
|
|
|
hwaddr mr_addr, l;
|
2018-03-18 20:26:36 +03:00
|
|
|
MemoryRegion *mr;
|
|
|
|
|
|
|
|
l = len;
|
2024-03-07 18:37:07 +03:00
|
|
|
mr = address_space_translate_cached(cache, addr, &mr_addr, &l, false,
|
2018-05-31 16:50:52 +03:00
|
|
|
MEMTXATTRS_UNSPECIFIED);
|
2024-03-07 18:37:10 +03:00
|
|
|
return address_space_read_continue_cached(MEMTXATTRS_UNSPECIFIED,
|
|
|
|
buf, len, mr_addr, l, mr);
|
2018-03-18 20:26:36 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Called from RCU critical section. address_space_write_cached uses this
|
|
|
|
* out of line function when the target is an MMIO or IOMMU region.
|
|
|
|
*/
|
2020-05-18 18:53:02 +03:00
|
|
|
MemTxResult
|
2018-03-18 20:26:36 +03:00
|
|
|
address_space_write_cached_slow(MemoryRegionCache *cache, hwaddr addr,
|
2019-01-17 15:49:01 +03:00
|
|
|
const void *buf, hwaddr len)
|
2018-03-18 20:26:36 +03:00
|
|
|
{
|
2024-03-07 18:37:07 +03:00
|
|
|
hwaddr mr_addr, l;
|
2018-03-18 20:26:36 +03:00
|
|
|
MemoryRegion *mr;
|
|
|
|
|
|
|
|
l = len;
|
2024-03-07 18:37:07 +03:00
|
|
|
mr = address_space_translate_cached(cache, addr, &mr_addr, &l, true,
|
2018-05-31 16:50:52 +03:00
|
|
|
MEMTXATTRS_UNSPECIFIED);
|
2024-03-07 18:37:10 +03:00
|
|
|
return address_space_write_continue_cached(MEMTXATTRS_UNSPECIFIED,
|
|
|
|
buf, len, mr_addr, l, mr);
|
2016-11-22 14:04:52 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
#define ARG1_DECL MemoryRegionCache *cache
|
|
|
|
#define ARG1 cache
|
2018-03-18 20:26:36 +03:00
|
|
|
#define SUFFIX _cached_slow
|
|
|
|
#define TRANSLATE(...) address_space_translate_cached(cache, __VA_ARGS__)
|
|
|
|
#define RCU_READ_LOCK() ((void)0)
|
|
|
|
#define RCU_READ_UNLOCK() ((void)0)
|
2020-02-04 14:41:01 +03:00
|
|
|
#include "memory_ldst.c.inc"
|
2016-11-22 14:04:52 +03:00
|
|
|
|
2009-03-28 20:51:36 +03:00
|
|
|
/* virtual memory access for debug (includes writing to ROM) */
|
2022-02-03 04:13:28 +03:00
|
|
|
int cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
|
|
|
|
void *ptr, size_t len, bool is_write)
|
2004-01-24 18:23:36 +03:00
|
|
|
{
|
2012-10-23 14:30:10 +04:00
|
|
|
hwaddr phys_addr;
|
2022-02-03 04:13:28 +03:00
|
|
|
vaddr l, page;
|
2020-02-19 22:02:11 +03:00
|
|
|
uint8_t *buf = ptr;
|
2004-01-24 18:23:36 +03:00
|
|
|
|
2017-03-07 17:19:08 +03:00
|
|
|
cpu_synchronize_state(cpu);
|
2004-01-24 18:23:36 +03:00
|
|
|
while (len > 0) {
|
2016-01-21 17:15:06 +03:00
|
|
|
int asidx;
|
|
|
|
MemTxAttrs attrs;
|
2020-05-18 18:53:03 +03:00
|
|
|
MemTxResult res;
|
2016-01-21 17:15:06 +03:00
|
|
|
|
2004-01-24 18:23:36 +03:00
|
|
|
page = addr & TARGET_PAGE_MASK;
|
2016-01-21 17:15:06 +03:00
|
|
|
phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
|
|
|
|
asidx = cpu_asidx_from_attrs(cpu, attrs);
|
2004-01-24 18:23:36 +03:00
|
|
|
/* if no physical page mapped, return an error */
|
|
|
|
if (phys_addr == -1)
|
|
|
|
return -1;
|
|
|
|
l = (page + TARGET_PAGE_SIZE) - addr;
|
|
|
|
if (l > len)
|
|
|
|
l = len;
|
2009-03-28 20:51:36 +03:00
|
|
|
phys_addr += (addr & ~TARGET_PAGE_MASK);
|
2013-12-13 10:31:02 +04:00
|
|
|
if (is_write) {
|
2020-05-18 18:53:03 +03:00
|
|
|
res = address_space_write_rom(cpu->cpu_ases[asidx].as, phys_addr,
|
|
|
|
attrs, buf, l);
|
2013-12-13 10:31:02 +04:00
|
|
|
} else {
|
2020-05-18 18:53:03 +03:00
|
|
|
res = address_space_read(cpu->cpu_ases[asidx].as, phys_addr,
|
|
|
|
attrs, buf, l);
|
|
|
|
}
|
|
|
|
if (res != MEMTX_OK) {
|
|
|
|
return -1;
|
2013-12-13 10:31:02 +04:00
|
|
|
}
|
2004-01-24 18:23:36 +03:00
|
|
|
len -= l;
|
|
|
|
buf += l;
|
|
|
|
addr += l;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
2015-11-05 21:10:29 +03:00
|
|
|
|
2012-10-23 14:30:10 +04:00
|
|
|
bool cpu_physical_memory_is_io(hwaddr phys_addr)
|
2012-05-07 08:04:18 +04:00
|
|
|
{
|
2013-05-29 14:42:00 +04:00
|
|
|
MemoryRegion*mr;
|
2013-05-24 14:59:37 +04:00
|
|
|
hwaddr l = 1;
|
2012-05-07 08:04:18 +04:00
|
|
|
|
2019-10-07 17:36:41 +03:00
|
|
|
RCU_READ_LOCK_GUARD();
|
2013-05-29 14:42:00 +04:00
|
|
|
mr = address_space_translate(&address_space_memory,
|
2018-05-31 16:50:52 +03:00
|
|
|
phys_addr, &phys_addr, &l, false,
|
|
|
|
MEMTXATTRS_UNSPECIFIED);
|
2012-05-07 08:04:18 +04:00
|
|
|
|
2022-11-22 16:49:16 +03:00
|
|
|
return !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
|
2012-05-07 08:04:18 +04:00
|
|
|
}
|
2013-06-26 05:35:34 +04:00
|
|
|
|
2015-05-21 15:24:13 +03:00
|
|
|
int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
|
2013-06-26 05:35:34 +04:00
|
|
|
{
|
|
|
|
RAMBlock *block;
|
2015-05-21 15:24:13 +03:00
|
|
|
int ret = 0;
|
2013-06-26 05:35:34 +04:00
|
|
|
|
2019-10-07 17:36:41 +03:00
|
|
|
RCU_READ_LOCK_GUARD();
|
2017-05-12 07:17:39 +03:00
|
|
|
RAMBLOCK_FOREACH(block) {
|
2019-02-15 20:45:44 +03:00
|
|
|
ret = func(block, opaque);
|
2015-05-21 15:24:13 +03:00
|
|
|
if (ret) {
|
|
|
|
break;
|
|
|
|
}
|
2013-06-26 05:35:34 +04:00
|
|
|
}
|
2015-05-21 15:24:13 +03:00
|
|
|
return ret;
|
2013-06-26 05:35:34 +04:00
|
|
|
}
|
2017-02-24 21:28:32 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Unmap pages of memory from start to start+length such that
|
|
|
|
* they a) read as 0, b) Trigger whatever fault mechanism
|
|
|
|
* the OS provides for postcopy.
|
|
|
|
* The pages must be unmapped by the end of the function.
|
|
|
|
* Returns: 0 on success, none-0 on failure
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
|
|
|
|
{
|
|
|
|
int ret = -1;
|
|
|
|
|
|
|
|
uint8_t *host_startaddr = rb->host + start;
|
|
|
|
|
2020-01-03 10:39:58 +03:00
|
|
|
if (!QEMU_PTR_IS_ALIGNED(host_startaddr, rb->page_size)) {
|
2024-01-25 05:33:28 +03:00
|
|
|
error_report("%s: Unaligned start address: %p",
|
|
|
|
__func__, host_startaddr);
|
2017-02-24 21:28:32 +03:00
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2021-04-29 14:27:03 +03:00
|
|
|
if ((start + length) <= rb->max_length) {
|
2018-03-12 20:20:56 +03:00
|
|
|
bool need_madvise, need_fallocate;
|
2020-01-03 10:39:58 +03:00
|
|
|
if (!QEMU_IS_ALIGNED(length, rb->page_size)) {
|
2024-01-25 05:33:28 +03:00
|
|
|
error_report("%s: Unaligned length: %zx", __func__, length);
|
2017-02-24 21:28:32 +03:00
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
errno = ENOTSUP; /* If we are missing MADVISE etc */
|
|
|
|
|
2018-03-12 20:20:56 +03:00
|
|
|
/* The logic here is messy;
|
|
|
|
* madvise DONTNEED fails for hugepages
|
|
|
|
* fallocate works on hugepages and shmem
|
softmmu/physmem: Fix ram_block_discard_range() to handle shared anonymous memory
We can create shared anonymous memory via
"-object memory-backend-ram,share=on,..."
which is, for example, required by PVRDMA for mremap() to work.
Shared anonymous memory is weird, though. Instead of MADV_DONTNEED, we
have to use MADV_REMOVE: MADV_DONTNEED will only remove / zap all
relevant page table entries of the current process, the backend storage
will not get removed, resulting in no reduced memory consumption and
a repopulation of previous content on next access.
Shared anonymous memory is internally really just shmem, but without a
fd exposed. As we cannot use fallocate() without the fd to discard the
backing storage, MADV_REMOVE gets the same job done without a fd as
documented in "man 2 madvise". Removing backing storage implicitly
invalidates all page table entries with relevant mappings - an additional
MADV_DONTNEED is not required.
Fixes: 06329ccecfa0 ("mem: add share parameter to memory-backend-ram")
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
Message-Id: <20210406080126.24010-3-david@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2021-04-06 11:01:25 +03:00
|
|
|
* shared anonymous memory requires madvise REMOVE
|
2018-03-12 20:20:56 +03:00
|
|
|
*/
|
2024-01-02 04:57:48 +03:00
|
|
|
need_madvise = (rb->page_size == qemu_real_host_page_size());
|
2018-03-12 20:20:56 +03:00
|
|
|
need_fallocate = rb->fd != -1;
|
|
|
|
if (need_fallocate) {
|
|
|
|
/* For a file, this causes the area of the file to be zero'd
|
|
|
|
* if read, and for hugetlbfs also causes it to be unmapped
|
|
|
|
* so a userfault will trigger.
|
2017-02-24 21:28:33 +03:00
|
|
|
*/
|
|
|
|
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
|
2023-09-06 15:04:57 +03:00
|
|
|
/*
|
|
|
|
* fallocate() will fail with readonly files. Let's print a
|
|
|
|
* proper error message.
|
|
|
|
*/
|
|
|
|
if (rb->flags & RAM_READONLY_FD) {
|
2024-01-25 05:33:28 +03:00
|
|
|
error_report("%s: Discarding RAM with readonly files is not"
|
|
|
|
" supported", __func__);
|
2023-09-06 15:04:57 +03:00
|
|
|
goto err;
|
|
|
|
|
|
|
|
}
|
2023-07-06 10:56:06 +03:00
|
|
|
/*
|
|
|
|
* We'll discard data from the actual file, even though we only
|
|
|
|
* have a MAP_PRIVATE mapping, possibly messing with other
|
|
|
|
* MAP_PRIVATE/MAP_SHARED mappings. There is no easy way to
|
|
|
|
* change that behavior whithout violating the promised
|
|
|
|
* semantics of ram_block_discard_range().
|
|
|
|
*
|
|
|
|
* Only warn, because it works as long as nobody else uses that
|
|
|
|
* file.
|
|
|
|
*/
|
|
|
|
if (!qemu_ram_is_shared(rb)) {
|
2024-01-25 05:33:28 +03:00
|
|
|
warn_report_once("%s: Discarding RAM"
|
2023-07-06 10:56:06 +03:00
|
|
|
" in private file mappings is possibly"
|
|
|
|
" dangerous, because it will modify the"
|
|
|
|
" underlying file and will affect other"
|
2024-01-25 05:33:28 +03:00
|
|
|
" users of the file", __func__);
|
2023-07-06 10:56:06 +03:00
|
|
|
}
|
|
|
|
|
2017-02-24 21:28:33 +03:00
|
|
|
ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
|
|
|
|
start, length);
|
2018-03-12 20:20:56 +03:00
|
|
|
if (ret) {
|
|
|
|
ret = -errno;
|
2024-01-25 05:33:28 +03:00
|
|
|
error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)",
|
|
|
|
__func__, rb->idstr, start, length, ret);
|
2018-03-12 20:20:56 +03:00
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
ret = -ENOSYS;
|
2024-01-25 05:33:28 +03:00
|
|
|
error_report("%s: fallocate not available/file"
|
2018-03-12 20:20:56 +03:00
|
|
|
"%s:%" PRIx64 " +%zx (%d)",
|
2024-01-25 05:33:28 +03:00
|
|
|
__func__, rb->idstr, start, length, ret);
|
2018-03-12 20:20:56 +03:00
|
|
|
goto err;
|
2017-02-24 21:28:33 +03:00
|
|
|
#endif
|
|
|
|
}
|
2018-03-12 20:20:56 +03:00
|
|
|
if (need_madvise) {
|
|
|
|
/* For normal RAM this causes it to be unmapped,
|
|
|
|
* for shared memory it causes the local mapping to disappear
|
|
|
|
* and to fall back on the file contents (which we just
|
|
|
|
* fallocate'd away).
|
|
|
|
*/
|
|
|
|
#if defined(CONFIG_MADVISE)
|
softmmu/physmem: Fix ram_block_discard_range() to handle shared anonymous memory
We can create shared anonymous memory via
"-object memory-backend-ram,share=on,..."
which is, for example, required by PVRDMA for mremap() to work.
Shared anonymous memory is weird, though. Instead of MADV_DONTNEED, we
have to use MADV_REMOVE: MADV_DONTNEED will only remove / zap all
relevant page table entries of the current process, the backend storage
will not get removed, resulting in no reduced memory consumption and
a repopulation of previous content on next access.
Shared anonymous memory is internally really just shmem, but without a
fd exposed. As we cannot use fallocate() without the fd to discard the
backing storage, MADV_REMOVE gets the same job done without a fd as
documented in "man 2 madvise". Removing backing storage implicitly
invalidates all page table entries with relevant mappings - an additional
MADV_DONTNEED is not required.
Fixes: 06329ccecfa0 ("mem: add share parameter to memory-backend-ram")
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
Message-Id: <20210406080126.24010-3-david@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2021-04-06 11:01:25 +03:00
|
|
|
if (qemu_ram_is_shared(rb) && rb->fd < 0) {
|
|
|
|
ret = madvise(host_startaddr, length, QEMU_MADV_REMOVE);
|
|
|
|
} else {
|
|
|
|
ret = madvise(host_startaddr, length, QEMU_MADV_DONTNEED);
|
|
|
|
}
|
2018-03-12 20:20:56 +03:00
|
|
|
if (ret) {
|
|
|
|
ret = -errno;
|
2024-01-25 05:33:28 +03:00
|
|
|
error_report("%s: Failed to discard range "
|
2018-03-12 20:20:56 +03:00
|
|
|
"%s:%" PRIx64 " +%zx (%d)",
|
2024-01-25 05:33:28 +03:00
|
|
|
__func__, rb->idstr, start, length, ret);
|
2018-03-12 20:20:56 +03:00
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
ret = -ENOSYS;
|
2024-01-25 05:33:28 +03:00
|
|
|
error_report("%s: MADVISE not available %s:%" PRIx64 " +%zx (%d)",
|
|
|
|
__func__, rb->idstr, start, length, ret);
|
2018-03-12 20:20:56 +03:00
|
|
|
goto err;
|
|
|
|
#endif
|
2017-02-24 21:28:32 +03:00
|
|
|
}
|
2018-03-12 20:20:56 +03:00
|
|
|
trace_ram_block_discard_range(rb->idstr, host_startaddr, length,
|
|
|
|
need_madvise, need_fallocate, ret);
|
2017-02-24 21:28:32 +03:00
|
|
|
} else {
|
2024-01-25 05:33:28 +03:00
|
|
|
error_report("%s: Overrun block '%s' (%" PRIu64 "/%zx/" RAM_ADDR_FMT")",
|
|
|
|
__func__, rb->idstr, start, length, rb->max_length);
|
2017-02-24 21:28:32 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
err:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2024-03-20 11:39:07 +03:00
|
|
|
int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start,
|
|
|
|
size_t length)
|
|
|
|
{
|
|
|
|
int ret = -1;
|
|
|
|
|
|
|
|
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
|
|
|
|
ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
|
|
|
|
start, length);
|
|
|
|
|
|
|
|
if (ret) {
|
|
|
|
ret = -errno;
|
|
|
|
error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)",
|
|
|
|
__func__, rb->idstr, start, length, ret);
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
ret = -ENOSYS;
|
|
|
|
error_report("%s: fallocate not available %s:%" PRIx64 " +%zx (%d)",
|
|
|
|
__func__, rb->idstr, start, length, ret);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-07-18 10:48:00 +03:00
|
|
|
bool ramblock_is_pmem(RAMBlock *rb)
|
|
|
|
{
|
|
|
|
return rb->flags & RAM_PMEM;
|
|
|
|
}
|
|
|
|
|
2019-04-17 22:17:56 +03:00
|
|
|
static void mtree_print_phys_entries(int start, int end, int skip, int ptr)
|
2017-09-21 11:51:06 +03:00
|
|
|
{
|
|
|
|
if (start == end - 1) {
|
2019-04-17 22:17:56 +03:00
|
|
|
qemu_printf("\t%3d ", start);
|
2017-09-21 11:51:06 +03:00
|
|
|
} else {
|
2019-04-17 22:17:56 +03:00
|
|
|
qemu_printf("\t%3d..%-3d ", start, end - 1);
|
2017-09-21 11:51:06 +03:00
|
|
|
}
|
2019-04-17 22:17:56 +03:00
|
|
|
qemu_printf(" skip=%d ", skip);
|
2017-09-21 11:51:06 +03:00
|
|
|
if (ptr == PHYS_MAP_NODE_NIL) {
|
2019-04-17 22:17:56 +03:00
|
|
|
qemu_printf(" ptr=NIL");
|
2017-09-21 11:51:06 +03:00
|
|
|
} else if (!skip) {
|
2019-04-17 22:17:56 +03:00
|
|
|
qemu_printf(" ptr=#%d", ptr);
|
2017-09-21 11:51:06 +03:00
|
|
|
} else {
|
2019-04-17 22:17:56 +03:00
|
|
|
qemu_printf(" ptr=[%d]", ptr);
|
2017-09-21 11:51:06 +03:00
|
|
|
}
|
2019-04-17 22:17:56 +03:00
|
|
|
qemu_printf("\n");
|
2017-09-21 11:51:06 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
#define MR_SIZE(size) (int128_nz(size) ? (hwaddr)int128_get64( \
|
|
|
|
int128_sub((size), int128_one())) : 0)
|
|
|
|
|
2019-04-17 22:17:56 +03:00
|
|
|
void mtree_print_dispatch(AddressSpaceDispatch *d, MemoryRegion *root)
|
2017-09-21 11:51:06 +03:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2019-04-17 22:17:56 +03:00
|
|
|
qemu_printf(" Dispatch\n");
|
|
|
|
qemu_printf(" Physical sections\n");
|
2017-09-21 11:51:06 +03:00
|
|
|
|
|
|
|
for (i = 0; i < d->map.sections_nb; ++i) {
|
|
|
|
MemoryRegionSection *s = d->map.sections + i;
|
|
|
|
const char *names[] = { " [unassigned]", " [not dirty]",
|
|
|
|
" [ROM]", " [watch]" };
|
|
|
|
|
2023-01-11 00:29:47 +03:00
|
|
|
qemu_printf(" #%d @" HWADDR_FMT_plx ".." HWADDR_FMT_plx
|
2019-04-17 22:17:56 +03:00
|
|
|
" %s%s%s%s%s",
|
2017-09-21 11:51:06 +03:00
|
|
|
i,
|
|
|
|
s->offset_within_address_space,
|
2022-06-22 12:59:12 +03:00
|
|
|
s->offset_within_address_space + MR_SIZE(s->size),
|
2017-09-21 11:51:06 +03:00
|
|
|
s->mr->name ? s->mr->name : "(noname)",
|
|
|
|
i < ARRAY_SIZE(names) ? names[i] : "",
|
|
|
|
s->mr == root ? " [ROOT]" : "",
|
|
|
|
s == d->mru_section ? " [MRU]" : "",
|
|
|
|
s->mr->is_iommu ? " [iommu]" : "");
|
|
|
|
|
|
|
|
if (s->mr->alias) {
|
2019-04-17 22:17:56 +03:00
|
|
|
qemu_printf(" alias=%s", s->mr->alias->name ?
|
2017-09-21 11:51:06 +03:00
|
|
|
s->mr->alias->name : "noname");
|
|
|
|
}
|
2019-04-17 22:17:56 +03:00
|
|
|
qemu_printf("\n");
|
2017-09-21 11:51:06 +03:00
|
|
|
}
|
|
|
|
|
2019-04-17 22:17:56 +03:00
|
|
|
qemu_printf(" Nodes (%d bits per level, %d levels) ptr=[%d] skip=%d\n",
|
2017-09-21 11:51:06 +03:00
|
|
|
P_L2_BITS, P_L2_LEVELS, d->phys_map.ptr, d->phys_map.skip);
|
|
|
|
for (i = 0; i < d->map.nodes_nb; ++i) {
|
|
|
|
int j, jprev;
|
|
|
|
PhysPageEntry prev;
|
|
|
|
Node *n = d->map.nodes + i;
|
|
|
|
|
2019-04-17 22:17:56 +03:00
|
|
|
qemu_printf(" [%d]\n", i);
|
2017-09-21 11:51:06 +03:00
|
|
|
|
|
|
|
for (j = 0, jprev = 0, prev = *n[0]; j < ARRAY_SIZE(*n); ++j) {
|
|
|
|
PhysPageEntry *pe = *n + j;
|
|
|
|
|
|
|
|
if (pe->ptr == prev.ptr && pe->skip == prev.skip) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2019-04-17 22:17:56 +03:00
|
|
|
mtree_print_phys_entries(jprev, j, prev.skip, prev.ptr);
|
2017-09-21 11:51:06 +03:00
|
|
|
|
|
|
|
jprev = j;
|
|
|
|
prev = *pe;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (jprev != ARRAY_SIZE(*n)) {
|
2019-04-17 22:17:56 +03:00
|
|
|
mtree_print_phys_entries(jprev, j, prev.skip, prev.ptr);
|
2017-09-21 11:51:06 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-13 12:55:29 +03:00
|
|
|
/* Require any discards to work. */
|
2021-04-13 12:55:28 +03:00
|
|
|
static unsigned int ram_block_discard_required_cnt;
|
2021-04-13 12:55:29 +03:00
|
|
|
/* Require only coordinated discards to work. */
|
|
|
|
static unsigned int ram_block_coordinated_discard_required_cnt;
|
|
|
|
/* Disable any discards. */
|
2021-04-13 12:55:28 +03:00
|
|
|
static unsigned int ram_block_discard_disabled_cnt;
|
2021-04-13 12:55:29 +03:00
|
|
|
/* Disable only uncoordinated discards. */
|
|
|
|
static unsigned int ram_block_uncoordinated_discard_disabled_cnt;
|
2021-04-13 12:55:28 +03:00
|
|
|
static QemuMutex ram_block_discard_disable_mutex;
|
|
|
|
|
|
|
|
static void ram_block_discard_disable_mutex_lock(void)
|
|
|
|
{
|
|
|
|
static gsize initialized;
|
|
|
|
|
|
|
|
if (g_once_init_enter(&initialized)) {
|
|
|
|
qemu_mutex_init(&ram_block_discard_disable_mutex);
|
|
|
|
g_once_init_leave(&initialized, 1);
|
|
|
|
}
|
|
|
|
qemu_mutex_lock(&ram_block_discard_disable_mutex);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void ram_block_discard_disable_mutex_unlock(void)
|
|
|
|
{
|
|
|
|
qemu_mutex_unlock(&ram_block_discard_disable_mutex);
|
|
|
|
}
|
2020-06-26 10:22:29 +03:00
|
|
|
|
|
|
|
int ram_block_discard_disable(bool state)
|
|
|
|
{
|
2021-04-13 12:55:28 +03:00
|
|
|
int ret = 0;
|
2020-06-26 10:22:29 +03:00
|
|
|
|
2021-04-13 12:55:28 +03:00
|
|
|
ram_block_discard_disable_mutex_lock();
|
2020-06-26 10:22:29 +03:00
|
|
|
if (!state) {
|
2021-04-13 12:55:28 +03:00
|
|
|
ram_block_discard_disabled_cnt--;
|
2021-04-13 12:55:29 +03:00
|
|
|
} else if (ram_block_discard_required_cnt ||
|
|
|
|
ram_block_coordinated_discard_required_cnt) {
|
|
|
|
ret = -EBUSY;
|
2021-04-13 12:55:28 +03:00
|
|
|
} else {
|
2021-04-13 12:55:29 +03:00
|
|
|
ram_block_discard_disabled_cnt++;
|
|
|
|
}
|
|
|
|
ram_block_discard_disable_mutex_unlock();
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
int ram_block_uncoordinated_discard_disable(bool state)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
ram_block_discard_disable_mutex_lock();
|
|
|
|
if (!state) {
|
|
|
|
ram_block_uncoordinated_discard_disabled_cnt--;
|
|
|
|
} else if (ram_block_discard_required_cnt) {
|
2021-04-13 12:55:28 +03:00
|
|
|
ret = -EBUSY;
|
2021-04-13 12:55:29 +03:00
|
|
|
} else {
|
|
|
|
ram_block_uncoordinated_discard_disabled_cnt++;
|
2020-06-26 10:22:29 +03:00
|
|
|
}
|
2021-04-13 12:55:28 +03:00
|
|
|
ram_block_discard_disable_mutex_unlock();
|
|
|
|
return ret;
|
2020-06-26 10:22:29 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
int ram_block_discard_require(bool state)
|
|
|
|
{
|
2021-04-13 12:55:28 +03:00
|
|
|
int ret = 0;
|
2020-06-26 10:22:29 +03:00
|
|
|
|
2021-04-13 12:55:28 +03:00
|
|
|
ram_block_discard_disable_mutex_lock();
|
2020-06-26 10:22:29 +03:00
|
|
|
if (!state) {
|
2021-04-13 12:55:28 +03:00
|
|
|
ram_block_discard_required_cnt--;
|
2021-04-13 12:55:29 +03:00
|
|
|
} else if (ram_block_discard_disabled_cnt ||
|
|
|
|
ram_block_uncoordinated_discard_disabled_cnt) {
|
|
|
|
ret = -EBUSY;
|
2021-04-13 12:55:28 +03:00
|
|
|
} else {
|
2021-04-13 12:55:29 +03:00
|
|
|
ram_block_discard_required_cnt++;
|
|
|
|
}
|
|
|
|
ram_block_discard_disable_mutex_unlock();
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
int ram_block_coordinated_discard_require(bool state)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
ram_block_discard_disable_mutex_lock();
|
|
|
|
if (!state) {
|
|
|
|
ram_block_coordinated_discard_required_cnt--;
|
|
|
|
} else if (ram_block_discard_disabled_cnt) {
|
2021-04-13 12:55:28 +03:00
|
|
|
ret = -EBUSY;
|
2021-04-13 12:55:29 +03:00
|
|
|
} else {
|
|
|
|
ram_block_coordinated_discard_required_cnt++;
|
2020-06-26 10:22:29 +03:00
|
|
|
}
|
2021-04-13 12:55:28 +03:00
|
|
|
ram_block_discard_disable_mutex_unlock();
|
|
|
|
return ret;
|
2020-06-26 10:22:29 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
bool ram_block_discard_is_disabled(void)
|
|
|
|
{
|
2021-04-13 12:55:29 +03:00
|
|
|
return qatomic_read(&ram_block_discard_disabled_cnt) ||
|
|
|
|
qatomic_read(&ram_block_uncoordinated_discard_disabled_cnt);
|
2020-06-26 10:22:29 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
bool ram_block_discard_is_required(void)
|
|
|
|
{
|
2021-04-13 12:55:29 +03:00
|
|
|
return qatomic_read(&ram_block_discard_required_cnt) ||
|
|
|
|
qatomic_read(&ram_block_coordinated_discard_required_cnt);
|
2020-06-26 10:22:29 +03:00
|
|
|
}
|