2008-12-04 23:33:06 +03:00
|
|
|
/*
|
2011-07-20 12:07:01 +04:00
|
|
|
* Virtio Balloon Device
|
2008-12-04 23:33:06 +03:00
|
|
|
*
|
|
|
|
* Copyright IBM, Corp. 2008
|
2011-07-20 12:07:01 +04:00
|
|
|
* Copyright (C) 2011 Red Hat, Inc.
|
|
|
|
* Copyright (C) 2011 Amit Shah <amit.shah@redhat.com>
|
2008-12-04 23:33:06 +03:00
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Anthony Liguori <aliguori@us.ibm.com>
|
|
|
|
*
|
|
|
|
* This work is licensed under the terms of the GNU GPL, version 2. See
|
|
|
|
* the COPYING file in the top-level directory.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2012-12-17 21:20:00 +04:00
|
|
|
#include "qemu/iov.h"
|
2008-12-04 23:33:06 +03:00
|
|
|
#include "qemu-common.h"
|
|
|
|
#include "virtio.h"
|
|
|
|
#include "pc.h"
|
|
|
|
#include "cpu.h"
|
2012-12-17 21:20:04 +04:00
|
|
|
#include "sysemu/balloon.h"
|
2008-12-04 23:33:06 +03:00
|
|
|
#include "virtio-balloon.h"
|
2012-12-17 21:20:04 +04:00
|
|
|
#include "sysemu/kvm.h"
|
2012-12-17 21:19:49 +04:00
|
|
|
#include "exec/address-spaces.h"
|
2008-12-04 23:33:06 +03:00
|
|
|
|
|
|
|
#if defined(__linux__)
|
|
|
|
#include <sys/mman.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
typedef struct VirtIOBalloon
|
|
|
|
{
|
|
|
|
VirtIODevice vdev;
|
2010-01-26 23:17:35 +03:00
|
|
|
VirtQueue *ivq, *dvq, *svq;
|
2008-12-04 23:33:06 +03:00
|
|
|
uint32_t num_pages;
|
|
|
|
uint32_t actual;
|
2010-01-26 23:17:35 +03:00
|
|
|
uint64_t stats[VIRTIO_BALLOON_S_NR];
|
|
|
|
VirtQueueElement stats_vq_elem;
|
|
|
|
size_t stats_vq_offset;
|
2011-07-28 10:06:26 +04:00
|
|
|
DeviceState *qdev;
|
2008-12-04 23:33:06 +03:00
|
|
|
} VirtIOBalloon;
|
|
|
|
|
|
|
|
static VirtIOBalloon *to_virtio_balloon(VirtIODevice *vdev)
|
|
|
|
{
|
|
|
|
return (VirtIOBalloon *)vdev;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void balloon_page(void *addr, int deflate)
|
|
|
|
{
|
|
|
|
#if defined(__linux__)
|
|
|
|
if (!kvm_enabled() || kvm_has_sync_mmu())
|
2010-09-25 15:26:05 +04:00
|
|
|
qemu_madvise(addr, TARGET_PAGE_SIZE,
|
|
|
|
deflate ? QEMU_MADV_WILLNEED : QEMU_MADV_DONTNEED);
|
2008-12-04 23:33:06 +03:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2010-01-26 23:17:35 +03:00
|
|
|
/*
|
|
|
|
* reset_stats - Mark all items in the stats array as unset
|
|
|
|
*
|
|
|
|
* This function needs to be called at device intialization and before
|
|
|
|
* before updating to a set of newly-generated stats. This will ensure that no
|
|
|
|
* stale values stick around in case the guest reports a subset of the supported
|
|
|
|
* statistics.
|
|
|
|
*/
|
|
|
|
static inline void reset_stats(VirtIOBalloon *dev)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < VIRTIO_BALLOON_S_NR; dev->stats[i++] = -1);
|
|
|
|
}
|
|
|
|
|
2008-12-04 23:33:06 +03:00
|
|
|
static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
|
|
|
|
{
|
|
|
|
VirtIOBalloon *s = to_virtio_balloon(vdev);
|
|
|
|
VirtQueueElement elem;
|
2011-12-19 15:18:13 +04:00
|
|
|
MemoryRegionSection section;
|
2008-12-04 23:33:06 +03:00
|
|
|
|
|
|
|
while (virtqueue_pop(vq, &elem)) {
|
|
|
|
size_t offset = 0;
|
|
|
|
uint32_t pfn;
|
|
|
|
|
change iov_* function prototypes to be more appropriate
Reorder arguments to be more natural, readable and
consistent with other iov_* functions, and change
argument names, from:
iov_from_buf(iov, iov_cnt, buf, iov_off, size)
to
iov_from_buf(iov, iov_cnt, offset, buf, bytes)
The result becomes natural English:
copy data to this `iov' vector with `iov_cnt'
elements starting at byte offset `offset'
from memory buffer `buf', processing `bytes'
bytes max.
(Try to read the original prototype this way).
Also change iov_clear() to more general iov_memset()
(it uses memset() internally anyway).
While at it, add comments to the header file
describing what the routines actually does.
The patch only renames argumens in the header, but
keeps old names in the implementation. The next
patch will touch actual code to match.
Now, it might look wrong to pay so much attention
to so small things. But we've so many badly designed
interfaces already so the whole thing becomes rather
confusing or error prone. One example of this is
previous commit and small discussion which emerged
from it, with an outcome that the utility functions
like these aren't well-understdandable, leading to
strange usage cases. That's why I paid quite some
attention to this set of functions and a few
others in subsequent patches.
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2012-03-11 18:05:12 +04:00
|
|
|
while (iov_to_buf(elem.out_sg, elem.out_num, offset, &pfn, 4) == 4) {
|
2009-10-02 01:12:16 +04:00
|
|
|
ram_addr_t pa;
|
|
|
|
ram_addr_t addr;
|
2008-12-04 23:33:06 +03:00
|
|
|
|
2009-10-02 01:12:16 +04:00
|
|
|
pa = (ram_addr_t)ldl_p(&pfn) << VIRTIO_BALLOON_PFN_SHIFT;
|
2008-12-04 23:33:06 +03:00
|
|
|
offset += 4;
|
|
|
|
|
2011-12-19 15:18:13 +04:00
|
|
|
/* FIXME: remove get_system_memory(), but how? */
|
|
|
|
section = memory_region_find(get_system_memory(), pa, 1);
|
|
|
|
if (!section.size || !memory_region_is_ram(section.mr))
|
2008-12-04 23:33:06 +03:00
|
|
|
continue;
|
|
|
|
|
2011-12-19 15:18:13 +04:00
|
|
|
/* Using memory_region_get_ram_ptr is bending the rules a bit, but
|
2009-04-10 18:29:45 +04:00
|
|
|
should be OK because we only want a single page. */
|
2011-12-19 15:18:13 +04:00
|
|
|
addr = section.offset_within_region;
|
|
|
|
balloon_page(memory_region_get_ram_ptr(section.mr) + addr,
|
|
|
|
!!(vq == s->dvq));
|
2008-12-04 23:33:06 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
virtqueue_push(vq, &elem, offset);
|
|
|
|
virtio_notify(vdev, vq);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-01-26 23:17:35 +03:00
|
|
|
static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq)
|
|
|
|
{
|
|
|
|
VirtIOBalloon *s = DO_UPCAST(VirtIOBalloon, vdev, vdev);
|
|
|
|
VirtQueueElement *elem = &s->stats_vq_elem;
|
|
|
|
VirtIOBalloonStat stat;
|
|
|
|
size_t offset = 0;
|
|
|
|
|
|
|
|
if (!virtqueue_pop(vq, elem)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Initialize the stats to get rid of any stale values. This is only
|
|
|
|
* needed to handle the case where a guest supports fewer stats than it
|
|
|
|
* used to (ie. it has booted into an old kernel).
|
|
|
|
*/
|
|
|
|
reset_stats(s);
|
|
|
|
|
change iov_* function prototypes to be more appropriate
Reorder arguments to be more natural, readable and
consistent with other iov_* functions, and change
argument names, from:
iov_from_buf(iov, iov_cnt, buf, iov_off, size)
to
iov_from_buf(iov, iov_cnt, offset, buf, bytes)
The result becomes natural English:
copy data to this `iov' vector with `iov_cnt'
elements starting at byte offset `offset'
from memory buffer `buf', processing `bytes'
bytes max.
(Try to read the original prototype this way).
Also change iov_clear() to more general iov_memset()
(it uses memset() internally anyway).
While at it, add comments to the header file
describing what the routines actually does.
The patch only renames argumens in the header, but
keeps old names in the implementation. The next
patch will touch actual code to match.
Now, it might look wrong to pay so much attention
to so small things. But we've so many badly designed
interfaces already so the whole thing becomes rather
confusing or error prone. One example of this is
previous commit and small discussion which emerged
from it, with an outcome that the utility functions
like these aren't well-understdandable, leading to
strange usage cases. That's why I paid quite some
attention to this set of functions and a few
others in subsequent patches.
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2012-03-11 18:05:12 +04:00
|
|
|
while (iov_to_buf(elem->out_sg, elem->out_num, offset, &stat, sizeof(stat))
|
2010-04-27 16:34:06 +04:00
|
|
|
== sizeof(stat)) {
|
2010-01-26 23:17:35 +03:00
|
|
|
uint16_t tag = tswap16(stat.tag);
|
|
|
|
uint64_t val = tswap64(stat.val);
|
|
|
|
|
|
|
|
offset += sizeof(stat);
|
|
|
|
if (tag < VIRTIO_BALLOON_S_NR)
|
|
|
|
s->stats[tag] = val;
|
|
|
|
}
|
|
|
|
s->stats_vq_offset = offset;
|
|
|
|
}
|
|
|
|
|
2008-12-04 23:33:06 +03:00
|
|
|
static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
|
|
|
|
{
|
|
|
|
VirtIOBalloon *dev = to_virtio_balloon(vdev);
|
|
|
|
struct virtio_balloon_config config;
|
|
|
|
|
|
|
|
config.num_pages = cpu_to_le32(dev->num_pages);
|
|
|
|
config.actual = cpu_to_le32(dev->actual);
|
|
|
|
|
|
|
|
memcpy(config_data, &config, 8);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_balloon_set_config(VirtIODevice *vdev,
|
|
|
|
const uint8_t *config_data)
|
|
|
|
{
|
|
|
|
VirtIOBalloon *dev = to_virtio_balloon(vdev);
|
|
|
|
struct virtio_balloon_config config;
|
2012-06-14 21:12:56 +04:00
|
|
|
uint32_t oldactual = dev->actual;
|
2008-12-04 23:33:06 +03:00
|
|
|
memcpy(&config, config_data, 8);
|
virtio-balloon: fixed endianness bug in the config space
The specification for the virtio balloon device requres that the values
in the config space be encoded little-endian. This differs from most
virtio things, where guest-native endian is the norm.
Currently, the qemu virtio-balloon code correctly makes the conversion
on get_config(), but doesn't on set_config for the 'actual' field. The
kernel driver, on the other hand, correctly converts when setting the
actual field, but does not convert when reading the config space. The
upshot is that virtio-balloon will only work correctly if both host and
guest are LE, making all the conversions nops.
This patch corrects the qemu side, correctly doing host-native <-> LE
conversions when accessing the config space. This won't break any setups
that aren't already broken, and fixes the case of BE host, LE guest.
Fixing the BE guest case will require kernel fixes as well.
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
2011-04-07 07:02:04 +04:00
|
|
|
dev->actual = le32_to_cpu(config.actual);
|
2012-06-14 21:12:56 +04:00
|
|
|
if (dev->actual != oldactual) {
|
|
|
|
qemu_balloon_changed(ram_size -
|
|
|
|
(dev->actual << VIRTIO_BALLOON_PFN_SHIFT));
|
|
|
|
}
|
2008-12-04 23:33:06 +03:00
|
|
|
}
|
|
|
|
|
2010-01-10 14:52:53 +03:00
|
|
|
static uint32_t virtio_balloon_get_features(VirtIODevice *vdev, uint32_t f)
|
2008-12-04 23:33:06 +03:00
|
|
|
{
|
2010-01-26 23:17:35 +03:00
|
|
|
f |= (1 << VIRTIO_BALLOON_F_STATS_VQ);
|
2010-01-10 14:52:53 +03:00
|
|
|
return f;
|
2008-12-04 23:33:06 +03:00
|
|
|
}
|
|
|
|
|
2011-10-21 17:41:37 +04:00
|
|
|
static void virtio_balloon_stat(void *opaque, BalloonInfo *info)
|
2011-07-20 11:49:07 +04:00
|
|
|
{
|
|
|
|
VirtIOBalloon *dev = opaque;
|
|
|
|
|
2011-10-21 17:41:37 +04:00
|
|
|
#if 0
|
|
|
|
/* Disable guest-provided stats for now. For more details please check:
|
|
|
|
* https://bugzilla.redhat.com/show_bug.cgi?id=623903
|
|
|
|
*
|
|
|
|
* If you do enable it (which is probably not going to happen as we
|
|
|
|
* need a new command for it), remember that you also need to fill the
|
|
|
|
* appropriate members of the BalloonInfo structure so that the stats
|
|
|
|
* are returned to the client.
|
2011-07-20 11:49:07 +04:00
|
|
|
*/
|
2011-10-21 17:41:37 +04:00
|
|
|
if (dev->vdev.guest_features & (1 << VIRTIO_BALLOON_F_STATS_VQ)) {
|
2011-07-20 11:49:07 +04:00
|
|
|
virtqueue_push(dev->svq, &dev->stats_vq_elem, dev->stats_vq_offset);
|
|
|
|
virtio_notify(&dev->vdev, dev->svq);
|
|
|
|
return;
|
|
|
|
}
|
2011-10-21 17:41:37 +04:00
|
|
|
#endif
|
2011-07-20 11:49:07 +04:00
|
|
|
|
|
|
|
/* Stats are not supported. Clear out any stale values that might
|
|
|
|
* have been set by a more featureful guest kernel.
|
|
|
|
*/
|
|
|
|
reset_stats(dev);
|
2011-10-21 17:41:37 +04:00
|
|
|
|
|
|
|
info->actual = ram_size - ((uint64_t) dev->actual <<
|
|
|
|
VIRTIO_BALLOON_PFN_SHIFT);
|
2011-07-20 11:49:07 +04:00
|
|
|
}
|
|
|
|
|
balloon: Separate out stat and balloon handling
Passing on '0' as ballooning target to indicate retrieval of stats is
bad API. It also makes 'balloon 0' in the monitor cause a segfault.
Have two different functions handle the different functionality instead.
Detailed explanation from Markus's review:
1. do_info_balloon() is an info_async() method. It receives a callback
with argument, to be called exactly once (callback frees the
argument). It passes the callback via qemu_balloon_status() and
indirectly through qemu_balloon_event to virtio_balloon_to_target().
virtio_balloon_to_target() executes its balloon stats half. It
stores the callback in the device state.
If it can't send a stats request, it resets stats and calls the
callback right away.
Else, it sends a stats request. The device model runs the callback
when it receives the answer.
Works.
2. do_balloon() is a cmd_async() method. It receives a callback with
argument, to be called when the command completes. do_balloon()
calls it right before it succeeds. Odd, but should work.
Nevertheless, it passes the callback on via qemu_ballon() and
indirectly through qemu_balloon_event to virtio_balloon_to_target().
a. If the argument is non-zero, virtio_balloon_to_target() executes
its balloon half, which doesn't use the callback in any way.
Odd, but works.
b. If the argument is zero, virtio_balloon_to_target() executes its
balloon stats half, just like in 1. It either calls the callback
right away, or arranges for it to be called later.
Thus, the callback runs twice: use after free and double free.
Test case: start with -S -device virtio-balloon, execute "balloon 0" in
human monitor. Runs the callback first from virtio_balloon_to_target(),
then again from do_balloon().
Reported-by: Mike Cao <bcao@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2011-07-20 12:00:56 +04:00
|
|
|
static void virtio_balloon_to_target(void *opaque, ram_addr_t target)
|
2008-12-04 23:33:06 +03:00
|
|
|
{
|
|
|
|
VirtIOBalloon *dev = opaque;
|
|
|
|
|
2011-07-20 11:49:07 +04:00
|
|
|
if (target > ram_size) {
|
2008-12-04 23:33:06 +03:00
|
|
|
target = ram_size;
|
2011-07-20 11:49:07 +04:00
|
|
|
}
|
2008-12-04 23:33:06 +03:00
|
|
|
if (target) {
|
|
|
|
dev->num_pages = (ram_size - target) >> VIRTIO_BALLOON_PFN_SHIFT;
|
|
|
|
virtio_notify_config(&dev->vdev);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_balloon_save(QEMUFile *f, void *opaque)
|
|
|
|
{
|
|
|
|
VirtIOBalloon *s = opaque;
|
|
|
|
|
|
|
|
virtio_save(&s->vdev, f);
|
|
|
|
|
|
|
|
qemu_put_be32(f, s->num_pages);
|
|
|
|
qemu_put_be32(f, s->actual);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int virtio_balloon_load(QEMUFile *f, void *opaque, int version_id)
|
|
|
|
{
|
|
|
|
VirtIOBalloon *s = opaque;
|
2012-05-16 14:21:35 +04:00
|
|
|
int ret;
|
2008-12-04 23:33:06 +03:00
|
|
|
|
|
|
|
if (version_id != 1)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2012-05-16 14:21:35 +04:00
|
|
|
ret = virtio_load(&s->vdev, f);
|
|
|
|
if (ret) {
|
|
|
|
return ret;
|
|
|
|
}
|
2008-12-04 23:33:06 +03:00
|
|
|
|
|
|
|
s->num_pages = qemu_get_be32(f);
|
|
|
|
s->actual = qemu_get_be32(f);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-05-18 17:51:59 +04:00
|
|
|
VirtIODevice *virtio_balloon_init(DeviceState *dev)
|
2008-12-04 23:33:06 +03:00
|
|
|
{
|
|
|
|
VirtIOBalloon *s;
|
2011-07-27 10:59:33 +04:00
|
|
|
int ret;
|
2008-12-04 23:33:06 +03:00
|
|
|
|
2009-05-18 17:51:59 +04:00
|
|
|
s = (VirtIOBalloon *)virtio_common_init("virtio-balloon",
|
|
|
|
VIRTIO_ID_BALLOON,
|
|
|
|
8, sizeof(VirtIOBalloon));
|
2008-12-04 23:33:06 +03:00
|
|
|
|
|
|
|
s->vdev.get_config = virtio_balloon_get_config;
|
|
|
|
s->vdev.set_config = virtio_balloon_set_config;
|
|
|
|
s->vdev.get_features = virtio_balloon_get_features;
|
|
|
|
|
2011-07-27 10:59:33 +04:00
|
|
|
ret = qemu_add_balloon_handler(virtio_balloon_to_target,
|
|
|
|
virtio_balloon_stat, s);
|
|
|
|
if (ret < 0) {
|
|
|
|
virtio_cleanup(&s->vdev);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2008-12-04 23:33:06 +03:00
|
|
|
s->ivq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output);
|
|
|
|
s->dvq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output);
|
2010-01-26 23:17:35 +03:00
|
|
|
s->svq = virtio_add_queue(&s->vdev, 128, virtio_balloon_receive_stats);
|
2008-12-04 23:33:06 +03:00
|
|
|
|
2010-01-26 23:17:35 +03:00
|
|
|
reset_stats(s);
|
2008-12-04 23:33:06 +03:00
|
|
|
|
2011-07-28 10:06:26 +04:00
|
|
|
s->qdev = dev;
|
2010-06-25 21:09:07 +04:00
|
|
|
register_savevm(dev, "virtio-balloon", -1, 1,
|
|
|
|
virtio_balloon_save, virtio_balloon_load, s);
|
2008-12-04 23:33:06 +03:00
|
|
|
|
2009-05-18 17:51:59 +04:00
|
|
|
return &s->vdev;
|
2008-12-04 23:33:06 +03:00
|
|
|
}
|
2011-07-27 12:20:41 +04:00
|
|
|
|
|
|
|
void virtio_balloon_exit(VirtIODevice *vdev)
|
|
|
|
{
|
2011-07-28 10:06:26 +04:00
|
|
|
VirtIOBalloon *s = DO_UPCAST(VirtIOBalloon, vdev, vdev);
|
2011-09-09 13:00:40 +04:00
|
|
|
|
|
|
|
qemu_remove_balloon_handler(s);
|
2011-07-28 10:06:26 +04:00
|
|
|
unregister_savevm(s->qdev, "virtio-balloon", s);
|
2011-07-27 12:20:41 +04:00
|
|
|
virtio_cleanup(vdev);
|
|
|
|
}
|