2008-12-04 23:33:06 +03:00
|
|
|
/*
|
2011-07-20 12:07:01 +04:00
|
|
|
* Virtio Balloon Device
|
2008-12-04 23:33:06 +03:00
|
|
|
*
|
|
|
|
* Copyright IBM, Corp. 2008
|
2011-07-20 12:07:01 +04:00
|
|
|
* Copyright (C) 2011 Red Hat, Inc.
|
|
|
|
* Copyright (C) 2011 Amit Shah <amit.shah@redhat.com>
|
2008-12-04 23:33:06 +03:00
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Anthony Liguori <aliguori@us.ibm.com>
|
|
|
|
*
|
|
|
|
* This work is licensed under the terms of the GNU GPL, version 2. See
|
|
|
|
* the COPYING file in the top-level directory.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2010-04-27 16:34:06 +04:00
|
|
|
#include "iov.h"
|
2008-12-04 23:33:06 +03:00
|
|
|
#include "qemu-common.h"
|
|
|
|
#include "virtio.h"
|
|
|
|
#include "pc.h"
|
|
|
|
#include "cpu.h"
|
|
|
|
#include "balloon.h"
|
|
|
|
#include "virtio-balloon.h"
|
|
|
|
#include "kvm.h"
|
2011-12-19 15:18:13 +04:00
|
|
|
#include "exec-memory.h"
|
2008-12-04 23:33:06 +03:00
|
|
|
|
|
|
|
#if defined(__linux__)
|
|
|
|
#include <sys/mman.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
typedef struct VirtIOBalloon
|
|
|
|
{
|
|
|
|
VirtIODevice vdev;
|
2010-01-26 23:17:35 +03:00
|
|
|
VirtQueue *ivq, *dvq, *svq;
|
2008-12-04 23:33:06 +03:00
|
|
|
uint32_t num_pages;
|
|
|
|
uint32_t actual;
|
2010-01-26 23:17:35 +03:00
|
|
|
uint64_t stats[VIRTIO_BALLOON_S_NR];
|
|
|
|
VirtQueueElement stats_vq_elem;
|
|
|
|
size_t stats_vq_offset;
|
2011-07-28 10:06:26 +04:00
|
|
|
DeviceState *qdev;
|
2008-12-04 23:33:06 +03:00
|
|
|
} VirtIOBalloon;
|
|
|
|
|
|
|
|
static VirtIOBalloon *to_virtio_balloon(VirtIODevice *vdev)
|
|
|
|
{
|
|
|
|
return (VirtIOBalloon *)vdev;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void balloon_page(void *addr, int deflate)
|
|
|
|
{
|
|
|
|
#if defined(__linux__)
|
|
|
|
if (!kvm_enabled() || kvm_has_sync_mmu())
|
2010-09-25 15:26:05 +04:00
|
|
|
qemu_madvise(addr, TARGET_PAGE_SIZE,
|
|
|
|
deflate ? QEMU_MADV_WILLNEED : QEMU_MADV_DONTNEED);
|
2008-12-04 23:33:06 +03:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2010-01-26 23:17:35 +03:00
|
|
|
/*
|
|
|
|
* reset_stats - Mark all items in the stats array as unset
|
|
|
|
*
|
|
|
|
* This function needs to be called at device intialization and before
|
|
|
|
* before updating to a set of newly-generated stats. This will ensure that no
|
|
|
|
* stale values stick around in case the guest reports a subset of the supported
|
|
|
|
* statistics.
|
|
|
|
*/
|
|
|
|
static inline void reset_stats(VirtIOBalloon *dev)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < VIRTIO_BALLOON_S_NR; dev->stats[i++] = -1);
|
|
|
|
}
|
|
|
|
|
2008-12-04 23:33:06 +03:00
|
|
|
static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
|
|
|
|
{
|
|
|
|
VirtIOBalloon *s = to_virtio_balloon(vdev);
|
|
|
|
VirtQueueElement elem;
|
2011-12-19 15:18:13 +04:00
|
|
|
MemoryRegionSection section;
|
2008-12-04 23:33:06 +03:00
|
|
|
|
|
|
|
while (virtqueue_pop(vq, &elem)) {
|
|
|
|
size_t offset = 0;
|
|
|
|
uint32_t pfn;
|
|
|
|
|
2010-04-27 16:34:06 +04:00
|
|
|
while (iov_to_buf(elem.out_sg, elem.out_num, &pfn, offset, 4) == 4) {
|
2009-10-02 01:12:16 +04:00
|
|
|
ram_addr_t pa;
|
|
|
|
ram_addr_t addr;
|
2008-12-04 23:33:06 +03:00
|
|
|
|
2009-10-02 01:12:16 +04:00
|
|
|
pa = (ram_addr_t)ldl_p(&pfn) << VIRTIO_BALLOON_PFN_SHIFT;
|
2008-12-04 23:33:06 +03:00
|
|
|
offset += 4;
|
|
|
|
|
2011-12-19 15:18:13 +04:00
|
|
|
/* FIXME: remove get_system_memory(), but how? */
|
|
|
|
section = memory_region_find(get_system_memory(), pa, 1);
|
|
|
|
if (!section.size || !memory_region_is_ram(section.mr))
|
2008-12-04 23:33:06 +03:00
|
|
|
continue;
|
|
|
|
|
2011-12-19 15:18:13 +04:00
|
|
|
/* Using memory_region_get_ram_ptr is bending the rules a bit, but
|
2009-04-10 18:29:45 +04:00
|
|
|
should be OK because we only want a single page. */
|
2011-12-19 15:18:13 +04:00
|
|
|
addr = section.offset_within_region;
|
|
|
|
balloon_page(memory_region_get_ram_ptr(section.mr) + addr,
|
|
|
|
!!(vq == s->dvq));
|
2008-12-04 23:33:06 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
virtqueue_push(vq, &elem, offset);
|
|
|
|
virtio_notify(vdev, vq);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-01-26 23:17:35 +03:00
|
|
|
static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq)
|
|
|
|
{
|
|
|
|
VirtIOBalloon *s = DO_UPCAST(VirtIOBalloon, vdev, vdev);
|
|
|
|
VirtQueueElement *elem = &s->stats_vq_elem;
|
|
|
|
VirtIOBalloonStat stat;
|
|
|
|
size_t offset = 0;
|
|
|
|
|
|
|
|
if (!virtqueue_pop(vq, elem)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Initialize the stats to get rid of any stale values. This is only
|
|
|
|
* needed to handle the case where a guest supports fewer stats than it
|
|
|
|
* used to (ie. it has booted into an old kernel).
|
|
|
|
*/
|
|
|
|
reset_stats(s);
|
|
|
|
|
2010-04-27 16:34:06 +04:00
|
|
|
while (iov_to_buf(elem->out_sg, elem->out_num, &stat, offset, sizeof(stat))
|
|
|
|
== sizeof(stat)) {
|
2010-01-26 23:17:35 +03:00
|
|
|
uint16_t tag = tswap16(stat.tag);
|
|
|
|
uint64_t val = tswap64(stat.val);
|
|
|
|
|
|
|
|
offset += sizeof(stat);
|
|
|
|
if (tag < VIRTIO_BALLOON_S_NR)
|
|
|
|
s->stats[tag] = val;
|
|
|
|
}
|
|
|
|
s->stats_vq_offset = offset;
|
|
|
|
}
|
|
|
|
|
2008-12-04 23:33:06 +03:00
|
|
|
static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
|
|
|
|
{
|
|
|
|
VirtIOBalloon *dev = to_virtio_balloon(vdev);
|
|
|
|
struct virtio_balloon_config config;
|
|
|
|
|
|
|
|
config.num_pages = cpu_to_le32(dev->num_pages);
|
|
|
|
config.actual = cpu_to_le32(dev->actual);
|
|
|
|
|
|
|
|
memcpy(config_data, &config, 8);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_balloon_set_config(VirtIODevice *vdev,
|
|
|
|
const uint8_t *config_data)
|
|
|
|
{
|
|
|
|
VirtIOBalloon *dev = to_virtio_balloon(vdev);
|
|
|
|
struct virtio_balloon_config config;
|
|
|
|
memcpy(&config, config_data, 8);
|
virtio-balloon: fixed endianness bug in the config space
The specification for the virtio balloon device requres that the values
in the config space be encoded little-endian. This differs from most
virtio things, where guest-native endian is the norm.
Currently, the qemu virtio-balloon code correctly makes the conversion
on get_config(), but doesn't on set_config for the 'actual' field. The
kernel driver, on the other hand, correctly converts when setting the
actual field, but does not convert when reading the config space. The
upshot is that virtio-balloon will only work correctly if both host and
guest are LE, making all the conversions nops.
This patch corrects the qemu side, correctly doing host-native <-> LE
conversions when accessing the config space. This won't break any setups
that aren't already broken, and fixes the case of BE host, LE guest.
Fixing the BE guest case will require kernel fixes as well.
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
2011-04-07 07:02:04 +04:00
|
|
|
dev->actual = le32_to_cpu(config.actual);
|
2008-12-04 23:33:06 +03:00
|
|
|
}
|
|
|
|
|
2010-01-10 14:52:53 +03:00
|
|
|
static uint32_t virtio_balloon_get_features(VirtIODevice *vdev, uint32_t f)
|
2008-12-04 23:33:06 +03:00
|
|
|
{
|
2010-01-26 23:17:35 +03:00
|
|
|
f |= (1 << VIRTIO_BALLOON_F_STATS_VQ);
|
2010-01-10 14:52:53 +03:00
|
|
|
return f;
|
2008-12-04 23:33:06 +03:00
|
|
|
}
|
|
|
|
|
2011-10-21 17:41:37 +04:00
|
|
|
static void virtio_balloon_stat(void *opaque, BalloonInfo *info)
|
2011-07-20 11:49:07 +04:00
|
|
|
{
|
|
|
|
VirtIOBalloon *dev = opaque;
|
|
|
|
|
2011-10-21 17:41:37 +04:00
|
|
|
#if 0
|
|
|
|
/* Disable guest-provided stats for now. For more details please check:
|
|
|
|
* https://bugzilla.redhat.com/show_bug.cgi?id=623903
|
|
|
|
*
|
|
|
|
* If you do enable it (which is probably not going to happen as we
|
|
|
|
* need a new command for it), remember that you also need to fill the
|
|
|
|
* appropriate members of the BalloonInfo structure so that the stats
|
|
|
|
* are returned to the client.
|
2011-07-20 11:49:07 +04:00
|
|
|
*/
|
2011-10-21 17:41:37 +04:00
|
|
|
if (dev->vdev.guest_features & (1 << VIRTIO_BALLOON_F_STATS_VQ)) {
|
2011-07-20 11:49:07 +04:00
|
|
|
virtqueue_push(dev->svq, &dev->stats_vq_elem, dev->stats_vq_offset);
|
|
|
|
virtio_notify(&dev->vdev, dev->svq);
|
|
|
|
return;
|
|
|
|
}
|
2011-10-21 17:41:37 +04:00
|
|
|
#endif
|
2011-07-20 11:49:07 +04:00
|
|
|
|
|
|
|
/* Stats are not supported. Clear out any stale values that might
|
|
|
|
* have been set by a more featureful guest kernel.
|
|
|
|
*/
|
|
|
|
reset_stats(dev);
|
2011-10-21 17:41:37 +04:00
|
|
|
|
|
|
|
info->actual = ram_size - ((uint64_t) dev->actual <<
|
|
|
|
VIRTIO_BALLOON_PFN_SHIFT);
|
2011-07-20 11:49:07 +04:00
|
|
|
}
|
|
|
|
|
balloon: Separate out stat and balloon handling
Passing on '0' as ballooning target to indicate retrieval of stats is
bad API. It also makes 'balloon 0' in the monitor cause a segfault.
Have two different functions handle the different functionality instead.
Detailed explanation from Markus's review:
1. do_info_balloon() is an info_async() method. It receives a callback
with argument, to be called exactly once (callback frees the
argument). It passes the callback via qemu_balloon_status() and
indirectly through qemu_balloon_event to virtio_balloon_to_target().
virtio_balloon_to_target() executes its balloon stats half. It
stores the callback in the device state.
If it can't send a stats request, it resets stats and calls the
callback right away.
Else, it sends a stats request. The device model runs the callback
when it receives the answer.
Works.
2. do_balloon() is a cmd_async() method. It receives a callback with
argument, to be called when the command completes. do_balloon()
calls it right before it succeeds. Odd, but should work.
Nevertheless, it passes the callback on via qemu_ballon() and
indirectly through qemu_balloon_event to virtio_balloon_to_target().
a. If the argument is non-zero, virtio_balloon_to_target() executes
its balloon half, which doesn't use the callback in any way.
Odd, but works.
b. If the argument is zero, virtio_balloon_to_target() executes its
balloon stats half, just like in 1. It either calls the callback
right away, or arranges for it to be called later.
Thus, the callback runs twice: use after free and double free.
Test case: start with -S -device virtio-balloon, execute "balloon 0" in
human monitor. Runs the callback first from virtio_balloon_to_target(),
then again from do_balloon().
Reported-by: Mike Cao <bcao@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2011-07-20 12:00:56 +04:00
|
|
|
static void virtio_balloon_to_target(void *opaque, ram_addr_t target)
|
2008-12-04 23:33:06 +03:00
|
|
|
{
|
|
|
|
VirtIOBalloon *dev = opaque;
|
|
|
|
|
2011-07-20 11:49:07 +04:00
|
|
|
if (target > ram_size) {
|
2008-12-04 23:33:06 +03:00
|
|
|
target = ram_size;
|
2011-07-20 11:49:07 +04:00
|
|
|
}
|
2008-12-04 23:33:06 +03:00
|
|
|
if (target) {
|
|
|
|
dev->num_pages = (ram_size - target) >> VIRTIO_BALLOON_PFN_SHIFT;
|
|
|
|
virtio_notify_config(&dev->vdev);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_balloon_save(QEMUFile *f, void *opaque)
|
|
|
|
{
|
|
|
|
VirtIOBalloon *s = opaque;
|
|
|
|
|
|
|
|
virtio_save(&s->vdev, f);
|
|
|
|
|
|
|
|
qemu_put_be32(f, s->num_pages);
|
|
|
|
qemu_put_be32(f, s->actual);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int virtio_balloon_load(QEMUFile *f, void *opaque, int version_id)
|
|
|
|
{
|
|
|
|
VirtIOBalloon *s = opaque;
|
|
|
|
|
|
|
|
if (version_id != 1)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
virtio_load(&s->vdev, f);
|
|
|
|
|
|
|
|
s->num_pages = qemu_get_be32(f);
|
|
|
|
s->actual = qemu_get_be32(f);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-05-18 17:51:59 +04:00
|
|
|
VirtIODevice *virtio_balloon_init(DeviceState *dev)
|
2008-12-04 23:33:06 +03:00
|
|
|
{
|
|
|
|
VirtIOBalloon *s;
|
2011-07-27 10:59:33 +04:00
|
|
|
int ret;
|
2008-12-04 23:33:06 +03:00
|
|
|
|
2009-05-18 17:51:59 +04:00
|
|
|
s = (VirtIOBalloon *)virtio_common_init("virtio-balloon",
|
|
|
|
VIRTIO_ID_BALLOON,
|
|
|
|
8, sizeof(VirtIOBalloon));
|
2008-12-04 23:33:06 +03:00
|
|
|
|
|
|
|
s->vdev.get_config = virtio_balloon_get_config;
|
|
|
|
s->vdev.set_config = virtio_balloon_set_config;
|
|
|
|
s->vdev.get_features = virtio_balloon_get_features;
|
|
|
|
|
2011-07-27 10:59:33 +04:00
|
|
|
ret = qemu_add_balloon_handler(virtio_balloon_to_target,
|
|
|
|
virtio_balloon_stat, s);
|
|
|
|
if (ret < 0) {
|
|
|
|
virtio_cleanup(&s->vdev);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2008-12-04 23:33:06 +03:00
|
|
|
s->ivq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output);
|
|
|
|
s->dvq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output);
|
2010-01-26 23:17:35 +03:00
|
|
|
s->svq = virtio_add_queue(&s->vdev, 128, virtio_balloon_receive_stats);
|
2008-12-04 23:33:06 +03:00
|
|
|
|
2010-01-26 23:17:35 +03:00
|
|
|
reset_stats(s);
|
2008-12-04 23:33:06 +03:00
|
|
|
|
2011-07-28 10:06:26 +04:00
|
|
|
s->qdev = dev;
|
2010-06-25 21:09:07 +04:00
|
|
|
register_savevm(dev, "virtio-balloon", -1, 1,
|
|
|
|
virtio_balloon_save, virtio_balloon_load, s);
|
2008-12-04 23:33:06 +03:00
|
|
|
|
2009-05-18 17:51:59 +04:00
|
|
|
return &s->vdev;
|
2008-12-04 23:33:06 +03:00
|
|
|
}
|
2011-07-27 12:20:41 +04:00
|
|
|
|
|
|
|
void virtio_balloon_exit(VirtIODevice *vdev)
|
|
|
|
{
|
2011-07-28 10:06:26 +04:00
|
|
|
VirtIOBalloon *s = DO_UPCAST(VirtIOBalloon, vdev, vdev);
|
2011-09-09 13:00:40 +04:00
|
|
|
|
|
|
|
qemu_remove_balloon_handler(s);
|
2011-07-28 10:06:26 +04:00
|
|
|
unregister_savevm(s->qdev, "virtio-balloon", s);
|
2011-07-27 12:20:41 +04:00
|
|
|
virtio_cleanup(vdev);
|
|
|
|
}
|