2008-12-04 23:33:06 +03:00
|
|
|
/*
|
2011-07-20 12:07:01 +04:00
|
|
|
* Virtio Balloon Device
|
2008-12-04 23:33:06 +03:00
|
|
|
*
|
|
|
|
* Copyright IBM, Corp. 2008
|
2011-07-20 12:07:01 +04:00
|
|
|
* Copyright (C) 2011 Red Hat, Inc.
|
|
|
|
* Copyright (C) 2011 Amit Shah <amit.shah@redhat.com>
|
2008-12-04 23:33:06 +03:00
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Anthony Liguori <aliguori@us.ibm.com>
|
|
|
|
*
|
|
|
|
* This work is licensed under the terms of the GNU GPL, version 2. See
|
|
|
|
* the COPYING file in the top-level directory.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2016-01-26 21:17:07 +03:00
|
|
|
#include "qemu/osdep.h"
|
2012-12-17 21:20:00 +04:00
|
|
|
#include "qemu/iov.h"
|
2019-05-23 17:35:07 +03:00
|
|
|
#include "qemu/module.h"
|
balloon: re-enable balloon stats
The statistics are now available through device properties via a
polling mechanism. First a client has to enable polling, then it
can query available stats.
Polling is enabled by setting an update interval (in seconds)
to a property named guest-stats-polling-interval, like this:
{ "execute": "qom-set",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats-polling-interval", "value": 4 } }
Then the available stats can be retrieved by querying the
guest-stats property. The returned object is a dict containing
all available stats. Example:
{ "execute": "qom-get",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats" } }
{
"return": {
"stats": {
"stat-swap-out": 0,
"stat-free-memory": 844943360,
"stat-minor-faults": 219028,
"stat-major-faults": 235,
"stat-total-memory": 1044406272,
"stat-swap-in": 0
},
"last-update": 1358529861
}
}
Please, check the next commit for full documentation.
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2012-12-01 06:14:57 +04:00
|
|
|
#include "qemu/timer.h"
|
2022-02-08 23:08:52 +03:00
|
|
|
#include "qemu/madvise.h"
|
2013-02-05 20:06:20 +04:00
|
|
|
#include "hw/virtio/virtio.h"
|
2017-10-17 19:44:13 +03:00
|
|
|
#include "hw/mem/pc-dimm.h"
|
2019-08-12 08:23:51 +03:00
|
|
|
#include "hw/qdev-properties.h"
|
2020-10-28 13:24:22 +03:00
|
|
|
#include "hw/boards.h"
|
2012-12-17 21:20:04 +04:00
|
|
|
#include "sysemu/balloon.h"
|
2013-02-05 20:06:20 +04:00
|
|
|
#include "hw/virtio/virtio-balloon.h"
|
2012-12-17 21:19:49 +04:00
|
|
|
#include "exec/address-spaces.h"
|
2018-02-01 14:18:31 +03:00
|
|
|
#include "qapi/error.h"
|
2020-09-13 22:53:43 +03:00
|
|
|
#include "qapi/qapi-events-machine.h"
|
balloon: re-enable balloon stats
The statistics are now available through device properties via a
polling mechanism. First a client has to enable polling, then it
can query available stats.
Polling is enabled by setting an update interval (in seconds)
to a property named guest-stats-polling-interval, like this:
{ "execute": "qom-set",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats-polling-interval", "value": 4 } }
Then the available stats can be retrieved by querying the
guest-stats property. The returned object is a dict containing
all available stats. Example:
{ "execute": "qom-get",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats" } }
{
"return": {
"stats": {
"stat-swap-out": 0,
"stat-free-memory": 844943360,
"stat-minor-faults": 219028,
"stat-major-faults": 235,
"stat-total-memory": 1044406272,
"stat-swap-in": 0
},
"last-update": 1358529861
}
}
Please, check the next commit for full documentation.
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2012-12-01 06:14:57 +04:00
|
|
|
#include "qapi/visitor.h"
|
2014-11-17 08:11:10 +03:00
|
|
|
#include "trace.h"
|
2017-09-11 22:52:50 +03:00
|
|
|
#include "qemu/error-report.h"
|
2018-12-11 11:24:53 +03:00
|
|
|
#include "migration/misc.h"
|
virtio-balloon: don't start free page hinting if postcopy is possible
Postcopy never worked properly with 'free-page-hint=on', as there are
at least two issues:
1) With postcopy, the guest will never receive a VIRTIO_BALLOON_CMD_ID_DONE
and consequently won't release free pages back to the OS once
migration finishes.
The issue is that for postcopy, we won't do a final bitmap sync while
the guest is stopped on the source and
virtio_balloon_free_page_hint_notify() will only call
virtio_balloon_free_page_done() on the source during
PRECOPY_NOTIFY_CLEANUP, after the VM state was already migrated to
the destination.
2) Once the VM touches a page on the destination that has been excluded
from migration on the source via qemu_guest_free_page_hint() while
postcopy is active, that thread will stall until postcopy finishes
and all threads are woken up. (with older Linux kernels that won't
retry faults when woken up via userfaultfd, we might actually get a
SEGFAULT)
The issue is that the source will refuse to migrate any pages that
are not marked as dirty in the dirty bmap -- for example, because the
page might just have been sent. Consequently, the faulting thread will
stall, waiting for the page to be migrated -- which could take quite
a while and result in guest OS issues.
While we could fix 1) comparatively easily, 2) is harder to get right and
might require more involved RAM migration changes on source and destination
[1].
As it never worked properly, let's not start free page hinting in the
precopy notifier if the postcopy migration capability was enabled to fix
it easily. Capabilities cannot be enabled once migration is already
running.
Note 1: in the future we might either adjust migration code on the source
to track pages that have actually been sent or adjust
migration code on source and destination to eventually send
pages multiple times from the source and and deal with pages
that are sent multiple times on the destination.
Note 2: virtio-mem has similar issues, however, access to "unplugged"
memory by the guest is very rare and we would have to be very
lucky for it to happen during migration. The spec states
"The driver SHOULD NOT read from unplugged memory blocks ..."
and "The driver MUST NOT write to unplugged memory blocks".
virtio-mem will move away from virtio_balloon_free_page_done()
soon and handle this case explicitly on the destination.
[1] https://lkml.kernel.org/r/e79fd18c-aa62-c1d8-c7f3-ba3fc2c25fc8@redhat.com
Fixes: c13c4153f76d ("virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT")
Cc: qemu-stable@nongnu.org
Cc: Wei Wang <wei.w.wang@intel.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Philippe Mathieu-Daudé <philmd@redhat.com>
Cc: Alexander Duyck <alexander.duyck@gmail.com>
Cc: Juan Quintela <quintela@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
Message-Id: <20210708095339.20274-2-david@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
2021-07-08 12:53:38 +03:00
|
|
|
#include "migration/migration.h"
|
2008-12-04 23:33:06 +03:00
|
|
|
|
2013-02-05 20:06:20 +04:00
|
|
|
#include "hw/virtio/virtio-bus.h"
|
2014-06-24 21:43:22 +04:00
|
|
|
#include "hw/virtio/virtio-access.h"
|
2013-03-27 13:49:10 +04:00
|
|
|
|
2016-04-14 11:50:07 +03:00
|
|
|
#define BALLOON_PAGE_SIZE (1 << VIRTIO_BALLOON_PFN_SHIFT)
|
|
|
|
|
2019-07-22 16:41:08 +03:00
|
|
|
typedef struct PartiallyBalloonedPage {
|
2019-07-22 16:41:07 +03:00
|
|
|
ram_addr_t base_gpa;
|
|
|
|
unsigned long *bitmap;
|
2019-07-22 16:41:08 +03:00
|
|
|
} PartiallyBalloonedPage;
|
virtio-balloon: Safely handle BALLOON_PAGE_SIZE < host page size
The virtio-balloon always works in units of 4kiB (BALLOON_PAGE_SIZE), but
we can only actually discard memory in units of the host page size.
Now, we handle this very badly: we silently ignore balloon requests that
aren't host page aligned, and for requests that are host page aligned we
discard the entire host page. The latter can corrupt guest memory if its
page size is smaller than the host's.
The obvious choice would be to disable the balloon if the host page size is
not 4kiB. However, that would break the special case where host and guest
have the same page size, but that's larger than 4kiB. That case currently
works by accident[1] - and is used in practice on many production POWER
systems where 64kiB has long been the Linux default page size on both host
and guest.
To make the balloon safe, without breaking that useful special case, we
need to accumulate 4kiB balloon requests until we have a whole contiguous
host page to discard.
We could in principle do that across all guest memory, but it would require
a large bitmap to track. This patch represents a compromise: we track
ballooned subpages for a single contiguous host page at a time. This means
that if the guest discards all 4kiB chunks of a host page in succession,
we will discard it. This is the expected behaviour in the (host page) ==
(guest page) != 4kiB case we want to support.
If the guest scatters 4kiB requests across different host pages, we don't
discard anything, and issue a warning. Not ideal, but at least we don't
corrupt guest memory as the previous version could.
Warning reporting is kind of a compromise here. Determining whether we're
in a problematic state at realize() time is tricky, because we'd have to
look at the host pagesizes of all memory backends, but we can't really know
if some of those backends could be for special purpose memory that's not
subject to ballooning.
Reporting only when the guest tries to balloon a partial page also isn't
great because if the guest page size happens to line up it won't indicate
that we're in a non ideal situation. It could also cause alarming repeated
warnings whenever a migration is attempted.
So, what we do is warn the first time the guest attempts balloon a partial
host page, whether or not it will end up ballooning the rest of the page
immediately afterwards.
[1] Because when the guest attempts to balloon a page, it will submit
requests for each 4kiB subpage. Most will be ignored, but the one
which happens to be host page aligned will discard the whole lot.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190214043916.22128-6-david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-02-14 07:39:16 +03:00
|
|
|
|
2019-07-22 16:41:07 +03:00
|
|
|
static void virtio_balloon_pbp_free(PartiallyBalloonedPage *pbp)
|
|
|
|
{
|
2019-07-25 14:54:25 +03:00
|
|
|
if (!pbp->bitmap) {
|
2019-07-22 16:41:07 +03:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
g_free(pbp->bitmap);
|
2019-07-25 14:54:25 +03:00
|
|
|
pbp->bitmap = NULL;
|
2019-07-22 16:41:07 +03:00
|
|
|
}
|
|
|
|
|
2019-07-25 14:54:25 +03:00
|
|
|
static void virtio_balloon_pbp_alloc(PartiallyBalloonedPage *pbp,
|
|
|
|
ram_addr_t base_gpa,
|
|
|
|
long subpages)
|
2019-07-22 16:41:07 +03:00
|
|
|
{
|
|
|
|
pbp->base_gpa = base_gpa;
|
|
|
|
pbp->bitmap = bitmap_new(subpages);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool virtio_balloon_pbp_matches(PartiallyBalloonedPage *pbp,
|
2019-07-25 14:36:38 +03:00
|
|
|
ram_addr_t base_gpa)
|
2019-07-22 16:41:07 +03:00
|
|
|
{
|
2019-07-25 14:36:38 +03:00
|
|
|
return pbp->base_gpa == base_gpa;
|
2019-07-22 16:41:07 +03:00
|
|
|
}
|
|
|
|
|
2020-06-26 10:22:33 +03:00
|
|
|
static bool virtio_balloon_inhibited(void)
|
|
|
|
{
|
2021-04-01 12:22:24 +03:00
|
|
|
/*
|
|
|
|
* Postcopy cannot deal with concurrent discards,
|
|
|
|
* so it's special, as well as background snapshots.
|
|
|
|
*/
|
|
|
|
return ram_block_discard_is_disabled() || migration_in_incoming_postcopy() ||
|
|
|
|
migration_in_bg_snapshot();
|
2020-06-26 10:22:33 +03:00
|
|
|
}
|
|
|
|
|
2019-02-14 07:39:14 +03:00
|
|
|
static void balloon_inflate_page(VirtIOBalloon *balloon,
|
2019-07-22 16:41:08 +03:00
|
|
|
MemoryRegion *mr, hwaddr mr_offset,
|
2019-07-25 14:54:25 +03:00
|
|
|
PartiallyBalloonedPage *pbp)
|
2008-12-04 23:33:06 +03:00
|
|
|
{
|
2019-07-22 16:41:06 +03:00
|
|
|
void *addr = memory_region_get_ram_ptr(mr) + mr_offset;
|
2019-07-22 16:41:07 +03:00
|
|
|
ram_addr_t rb_offset, rb_aligned_offset, base_gpa;
|
virtio-balloon: Use ram_block_discard_range() instead of raw madvise()
Currently, virtio-balloon uses madvise() with MADV_DONTNEED to actually
discard RAM pages inserted into the balloon. This is basically a Linux
only interface (MADV_DONTNEED exists on some other platforms, but doesn't
always have the same semantics). It also doesn't work on hugepages and has
some other limitations.
It turns out that postcopy also needs to discard chunks of memory, and uses
a better interface for it: ram_block_discard_range(). It doesn't cover
every case, but it covers more than going direct to madvise() and this
gives us a single place to update for more possibilities in future.
There are some subtleties here to maintain the current balloon behaviour:
* For now, we just ignore requests to balloon in a hugepage backed region.
That matches current behaviour, because MADV_DONTNEED on a hugepage would
simply fail, and we ignore the error.
* If host page size is > BALLOON_PAGE_SIZE we can frequently call this on
non-host-page-aligned addresses. These would also fail in madvise(),
which we then ignored. ram_block_discard_range() error_report()s calls
on unaligned addresses, so we explicitly check that case to avoid
spamming the logs.
* We now call ram_block_discard_range() with the *host* page size, whereas
we previously called madvise() with BALLOON_PAGE_SIZE. Surprisingly,
this also matches existing behaviour. Although the kernel fails madvise
on unaligned addresses, it will round unaligned sizes *up* to the host
page size. Yes, this means that if BALLOON_PAGE_SIZE < guest page size
we can incorrectly discard more memory than the guest asked us to. I'm
planning to address that soon.
Errors other than the ones discussed above, will now be reported by
ram_block_discard_range(), rather than silently ignored, which means we
have a much better chance of seeing when something is going wrong.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-Id: <20190214043916.22128-5-david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-02-14 07:39:15 +03:00
|
|
|
RAMBlock *rb;
|
|
|
|
size_t rb_page_size;
|
virtio-balloon: Safely handle BALLOON_PAGE_SIZE < host page size
The virtio-balloon always works in units of 4kiB (BALLOON_PAGE_SIZE), but
we can only actually discard memory in units of the host page size.
Now, we handle this very badly: we silently ignore balloon requests that
aren't host page aligned, and for requests that are host page aligned we
discard the entire host page. The latter can corrupt guest memory if its
page size is smaller than the host's.
The obvious choice would be to disable the balloon if the host page size is
not 4kiB. However, that would break the special case where host and guest
have the same page size, but that's larger than 4kiB. That case currently
works by accident[1] - and is used in practice on many production POWER
systems where 64kiB has long been the Linux default page size on both host
and guest.
To make the balloon safe, without breaking that useful special case, we
need to accumulate 4kiB balloon requests until we have a whole contiguous
host page to discard.
We could in principle do that across all guest memory, but it would require
a large bitmap to track. This patch represents a compromise: we track
ballooned subpages for a single contiguous host page at a time. This means
that if the guest discards all 4kiB chunks of a host page in succession,
we will discard it. This is the expected behaviour in the (host page) ==
(guest page) != 4kiB case we want to support.
If the guest scatters 4kiB requests across different host pages, we don't
discard anything, and issue a warning. Not ideal, but at least we don't
corrupt guest memory as the previous version could.
Warning reporting is kind of a compromise here. Determining whether we're
in a problematic state at realize() time is tricky, because we'd have to
look at the host pagesizes of all memory backends, but we can't really know
if some of those backends could be for special purpose memory that's not
subject to ballooning.
Reporting only when the guest tries to balloon a partial page also isn't
great because if the guest page size happens to line up it won't indicate
that we're in a non ideal situation. It could also cause alarming repeated
warnings whenever a migration is attempted.
So, what we do is warn the first time the guest attempts balloon a partial
host page, whether or not it will end up ballooning the rest of the page
immediately afterwards.
[1] Because when the guest attempts to balloon a page, it will submit
requests for each 4kiB subpage. Most will be ignored, but the one
which happens to be host page aligned will discard the whole lot.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190214043916.22128-6-david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-02-14 07:39:16 +03:00
|
|
|
int subpages;
|
2019-02-14 07:39:14 +03:00
|
|
|
|
virtio-balloon: Use ram_block_discard_range() instead of raw madvise()
Currently, virtio-balloon uses madvise() with MADV_DONTNEED to actually
discard RAM pages inserted into the balloon. This is basically a Linux
only interface (MADV_DONTNEED exists on some other platforms, but doesn't
always have the same semantics). It also doesn't work on hugepages and has
some other limitations.
It turns out that postcopy also needs to discard chunks of memory, and uses
a better interface for it: ram_block_discard_range(). It doesn't cover
every case, but it covers more than going direct to madvise() and this
gives us a single place to update for more possibilities in future.
There are some subtleties here to maintain the current balloon behaviour:
* For now, we just ignore requests to balloon in a hugepage backed region.
That matches current behaviour, because MADV_DONTNEED on a hugepage would
simply fail, and we ignore the error.
* If host page size is > BALLOON_PAGE_SIZE we can frequently call this on
non-host-page-aligned addresses. These would also fail in madvise(),
which we then ignored. ram_block_discard_range() error_report()s calls
on unaligned addresses, so we explicitly check that case to avoid
spamming the logs.
* We now call ram_block_discard_range() with the *host* page size, whereas
we previously called madvise() with BALLOON_PAGE_SIZE. Surprisingly,
this also matches existing behaviour. Although the kernel fails madvise
on unaligned addresses, it will round unaligned sizes *up* to the host
page size. Yes, this means that if BALLOON_PAGE_SIZE < guest page size
we can incorrectly discard more memory than the guest asked us to. I'm
planning to address that soon.
Errors other than the ones discussed above, will now be reported by
ram_block_discard_range(), rather than silently ignored, which means we
have a much better chance of seeing when something is going wrong.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-Id: <20190214043916.22128-5-david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-02-14 07:39:15 +03:00
|
|
|
/* XXX is there a better way to get to the RAMBlock than via a
|
|
|
|
* host address? */
|
2019-07-22 16:41:06 +03:00
|
|
|
rb = qemu_ram_block_from_host(addr, false, &rb_offset);
|
virtio-balloon: Use ram_block_discard_range() instead of raw madvise()
Currently, virtio-balloon uses madvise() with MADV_DONTNEED to actually
discard RAM pages inserted into the balloon. This is basically a Linux
only interface (MADV_DONTNEED exists on some other platforms, but doesn't
always have the same semantics). It also doesn't work on hugepages and has
some other limitations.
It turns out that postcopy also needs to discard chunks of memory, and uses
a better interface for it: ram_block_discard_range(). It doesn't cover
every case, but it covers more than going direct to madvise() and this
gives us a single place to update for more possibilities in future.
There are some subtleties here to maintain the current balloon behaviour:
* For now, we just ignore requests to balloon in a hugepage backed region.
That matches current behaviour, because MADV_DONTNEED on a hugepage would
simply fail, and we ignore the error.
* If host page size is > BALLOON_PAGE_SIZE we can frequently call this on
non-host-page-aligned addresses. These would also fail in madvise(),
which we then ignored. ram_block_discard_range() error_report()s calls
on unaligned addresses, so we explicitly check that case to avoid
spamming the logs.
* We now call ram_block_discard_range() with the *host* page size, whereas
we previously called madvise() with BALLOON_PAGE_SIZE. Surprisingly,
this also matches existing behaviour. Although the kernel fails madvise
on unaligned addresses, it will round unaligned sizes *up* to the host
page size. Yes, this means that if BALLOON_PAGE_SIZE < guest page size
we can incorrectly discard more memory than the guest asked us to. I'm
planning to address that soon.
Errors other than the ones discussed above, will now be reported by
ram_block_discard_range(), rather than silently ignored, which means we
have a much better chance of seeing when something is going wrong.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-Id: <20190214043916.22128-5-david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-02-14 07:39:15 +03:00
|
|
|
rb_page_size = qemu_ram_pagesize(rb);
|
virtio-balloon: Safely handle BALLOON_PAGE_SIZE < host page size
The virtio-balloon always works in units of 4kiB (BALLOON_PAGE_SIZE), but
we can only actually discard memory in units of the host page size.
Now, we handle this very badly: we silently ignore balloon requests that
aren't host page aligned, and for requests that are host page aligned we
discard the entire host page. The latter can corrupt guest memory if its
page size is smaller than the host's.
The obvious choice would be to disable the balloon if the host page size is
not 4kiB. However, that would break the special case where host and guest
have the same page size, but that's larger than 4kiB. That case currently
works by accident[1] - and is used in practice on many production POWER
systems where 64kiB has long been the Linux default page size on both host
and guest.
To make the balloon safe, without breaking that useful special case, we
need to accumulate 4kiB balloon requests until we have a whole contiguous
host page to discard.
We could in principle do that across all guest memory, but it would require
a large bitmap to track. This patch represents a compromise: we track
ballooned subpages for a single contiguous host page at a time. This means
that if the guest discards all 4kiB chunks of a host page in succession,
we will discard it. This is the expected behaviour in the (host page) ==
(guest page) != 4kiB case we want to support.
If the guest scatters 4kiB requests across different host pages, we don't
discard anything, and issue a warning. Not ideal, but at least we don't
corrupt guest memory as the previous version could.
Warning reporting is kind of a compromise here. Determining whether we're
in a problematic state at realize() time is tricky, because we'd have to
look at the host pagesizes of all memory backends, but we can't really know
if some of those backends could be for special purpose memory that's not
subject to ballooning.
Reporting only when the guest tries to balloon a partial page also isn't
great because if the guest page size happens to line up it won't indicate
that we're in a non ideal situation. It could also cause alarming repeated
warnings whenever a migration is attempted.
So, what we do is warn the first time the guest attempts balloon a partial
host page, whether or not it will end up ballooning the rest of the page
immediately afterwards.
[1] Because when the guest attempts to balloon a page, it will submit
requests for each 4kiB subpage. Most will be ignored, but the one
which happens to be host page aligned will discard the whole lot.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190214043916.22128-6-david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-02-14 07:39:16 +03:00
|
|
|
|
|
|
|
if (rb_page_size == BALLOON_PAGE_SIZE) {
|
|
|
|
/* Easy case */
|
virtio-balloon: Use ram_block_discard_range() instead of raw madvise()
Currently, virtio-balloon uses madvise() with MADV_DONTNEED to actually
discard RAM pages inserted into the balloon. This is basically a Linux
only interface (MADV_DONTNEED exists on some other platforms, but doesn't
always have the same semantics). It also doesn't work on hugepages and has
some other limitations.
It turns out that postcopy also needs to discard chunks of memory, and uses
a better interface for it: ram_block_discard_range(). It doesn't cover
every case, but it covers more than going direct to madvise() and this
gives us a single place to update for more possibilities in future.
There are some subtleties here to maintain the current balloon behaviour:
* For now, we just ignore requests to balloon in a hugepage backed region.
That matches current behaviour, because MADV_DONTNEED on a hugepage would
simply fail, and we ignore the error.
* If host page size is > BALLOON_PAGE_SIZE we can frequently call this on
non-host-page-aligned addresses. These would also fail in madvise(),
which we then ignored. ram_block_discard_range() error_report()s calls
on unaligned addresses, so we explicitly check that case to avoid
spamming the logs.
* We now call ram_block_discard_range() with the *host* page size, whereas
we previously called madvise() with BALLOON_PAGE_SIZE. Surprisingly,
this also matches existing behaviour. Although the kernel fails madvise
on unaligned addresses, it will round unaligned sizes *up* to the host
page size. Yes, this means that if BALLOON_PAGE_SIZE < guest page size
we can incorrectly discard more memory than the guest asked us to. I'm
planning to address that soon.
Errors other than the ones discussed above, will now be reported by
ram_block_discard_range(), rather than silently ignored, which means we
have a much better chance of seeing when something is going wrong.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-Id: <20190214043916.22128-5-david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-02-14 07:39:15 +03:00
|
|
|
|
2019-07-22 16:41:06 +03:00
|
|
|
ram_block_discard_range(rb, rb_offset, rb_page_size);
|
virtio-balloon: Safely handle BALLOON_PAGE_SIZE < host page size
The virtio-balloon always works in units of 4kiB (BALLOON_PAGE_SIZE), but
we can only actually discard memory in units of the host page size.
Now, we handle this very badly: we silently ignore balloon requests that
aren't host page aligned, and for requests that are host page aligned we
discard the entire host page. The latter can corrupt guest memory if its
page size is smaller than the host's.
The obvious choice would be to disable the balloon if the host page size is
not 4kiB. However, that would break the special case where host and guest
have the same page size, but that's larger than 4kiB. That case currently
works by accident[1] - and is used in practice on many production POWER
systems where 64kiB has long been the Linux default page size on both host
and guest.
To make the balloon safe, without breaking that useful special case, we
need to accumulate 4kiB balloon requests until we have a whole contiguous
host page to discard.
We could in principle do that across all guest memory, but it would require
a large bitmap to track. This patch represents a compromise: we track
ballooned subpages for a single contiguous host page at a time. This means
that if the guest discards all 4kiB chunks of a host page in succession,
we will discard it. This is the expected behaviour in the (host page) ==
(guest page) != 4kiB case we want to support.
If the guest scatters 4kiB requests across different host pages, we don't
discard anything, and issue a warning. Not ideal, but at least we don't
corrupt guest memory as the previous version could.
Warning reporting is kind of a compromise here. Determining whether we're
in a problematic state at realize() time is tricky, because we'd have to
look at the host pagesizes of all memory backends, but we can't really know
if some of those backends could be for special purpose memory that's not
subject to ballooning.
Reporting only when the guest tries to balloon a partial page also isn't
great because if the guest page size happens to line up it won't indicate
that we're in a non ideal situation. It could also cause alarming repeated
warnings whenever a migration is attempted.
So, what we do is warn the first time the guest attempts balloon a partial
host page, whether or not it will end up ballooning the rest of the page
immediately afterwards.
[1] Because when the guest attempts to balloon a page, it will submit
requests for each 4kiB subpage. Most will be ignored, but the one
which happens to be host page aligned will discard the whole lot.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190214043916.22128-6-david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-02-14 07:39:16 +03:00
|
|
|
/* We ignore errors from ram_block_discard_range(), because it
|
|
|
|
* has already reported them, and failing to discard a balloon
|
|
|
|
* page is not fatal */
|
virtio-balloon: Use ram_block_discard_range() instead of raw madvise()
Currently, virtio-balloon uses madvise() with MADV_DONTNEED to actually
discard RAM pages inserted into the balloon. This is basically a Linux
only interface (MADV_DONTNEED exists on some other platforms, but doesn't
always have the same semantics). It also doesn't work on hugepages and has
some other limitations.
It turns out that postcopy also needs to discard chunks of memory, and uses
a better interface for it: ram_block_discard_range(). It doesn't cover
every case, but it covers more than going direct to madvise() and this
gives us a single place to update for more possibilities in future.
There are some subtleties here to maintain the current balloon behaviour:
* For now, we just ignore requests to balloon in a hugepage backed region.
That matches current behaviour, because MADV_DONTNEED on a hugepage would
simply fail, and we ignore the error.
* If host page size is > BALLOON_PAGE_SIZE we can frequently call this on
non-host-page-aligned addresses. These would also fail in madvise(),
which we then ignored. ram_block_discard_range() error_report()s calls
on unaligned addresses, so we explicitly check that case to avoid
spamming the logs.
* We now call ram_block_discard_range() with the *host* page size, whereas
we previously called madvise() with BALLOON_PAGE_SIZE. Surprisingly,
this also matches existing behaviour. Although the kernel fails madvise
on unaligned addresses, it will round unaligned sizes *up* to the host
page size. Yes, this means that if BALLOON_PAGE_SIZE < guest page size
we can incorrectly discard more memory than the guest asked us to. I'm
planning to address that soon.
Errors other than the ones discussed above, will now be reported by
ram_block_discard_range(), rather than silently ignored, which means we
have a much better chance of seeing when something is going wrong.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-Id: <20190214043916.22128-5-david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-02-14 07:39:15 +03:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
virtio-balloon: Safely handle BALLOON_PAGE_SIZE < host page size
The virtio-balloon always works in units of 4kiB (BALLOON_PAGE_SIZE), but
we can only actually discard memory in units of the host page size.
Now, we handle this very badly: we silently ignore balloon requests that
aren't host page aligned, and for requests that are host page aligned we
discard the entire host page. The latter can corrupt guest memory if its
page size is smaller than the host's.
The obvious choice would be to disable the balloon if the host page size is
not 4kiB. However, that would break the special case where host and guest
have the same page size, but that's larger than 4kiB. That case currently
works by accident[1] - and is used in practice on many production POWER
systems where 64kiB has long been the Linux default page size on both host
and guest.
To make the balloon safe, without breaking that useful special case, we
need to accumulate 4kiB balloon requests until we have a whole contiguous
host page to discard.
We could in principle do that across all guest memory, but it would require
a large bitmap to track. This patch represents a compromise: we track
ballooned subpages for a single contiguous host page at a time. This means
that if the guest discards all 4kiB chunks of a host page in succession,
we will discard it. This is the expected behaviour in the (host page) ==
(guest page) != 4kiB case we want to support.
If the guest scatters 4kiB requests across different host pages, we don't
discard anything, and issue a warning. Not ideal, but at least we don't
corrupt guest memory as the previous version could.
Warning reporting is kind of a compromise here. Determining whether we're
in a problematic state at realize() time is tricky, because we'd have to
look at the host pagesizes of all memory backends, but we can't really know
if some of those backends could be for special purpose memory that's not
subject to ballooning.
Reporting only when the guest tries to balloon a partial page also isn't
great because if the guest page size happens to line up it won't indicate
that we're in a non ideal situation. It could also cause alarming repeated
warnings whenever a migration is attempted.
So, what we do is warn the first time the guest attempts balloon a partial
host page, whether or not it will end up ballooning the rest of the page
immediately afterwards.
[1] Because when the guest attempts to balloon a page, it will submit
requests for each 4kiB subpage. Most will be ignored, but the one
which happens to be host page aligned will discard the whole lot.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190214043916.22128-6-david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-02-14 07:39:16 +03:00
|
|
|
/* Hard case
|
|
|
|
*
|
|
|
|
* We've put a piece of a larger host page into the balloon - we
|
|
|
|
* need to keep track until we have a whole host page to
|
|
|
|
* discard
|
|
|
|
*/
|
|
|
|
warn_report_once(
|
|
|
|
"Balloon used with backing page size > 4kiB, this may not be reliable");
|
|
|
|
|
2019-07-22 16:41:06 +03:00
|
|
|
rb_aligned_offset = QEMU_ALIGN_DOWN(rb_offset, rb_page_size);
|
virtio-balloon: Safely handle BALLOON_PAGE_SIZE < host page size
The virtio-balloon always works in units of 4kiB (BALLOON_PAGE_SIZE), but
we can only actually discard memory in units of the host page size.
Now, we handle this very badly: we silently ignore balloon requests that
aren't host page aligned, and for requests that are host page aligned we
discard the entire host page. The latter can corrupt guest memory if its
page size is smaller than the host's.
The obvious choice would be to disable the balloon if the host page size is
not 4kiB. However, that would break the special case where host and guest
have the same page size, but that's larger than 4kiB. That case currently
works by accident[1] - and is used in practice on many production POWER
systems where 64kiB has long been the Linux default page size on both host
and guest.
To make the balloon safe, without breaking that useful special case, we
need to accumulate 4kiB balloon requests until we have a whole contiguous
host page to discard.
We could in principle do that across all guest memory, but it would require
a large bitmap to track. This patch represents a compromise: we track
ballooned subpages for a single contiguous host page at a time. This means
that if the guest discards all 4kiB chunks of a host page in succession,
we will discard it. This is the expected behaviour in the (host page) ==
(guest page) != 4kiB case we want to support.
If the guest scatters 4kiB requests across different host pages, we don't
discard anything, and issue a warning. Not ideal, but at least we don't
corrupt guest memory as the previous version could.
Warning reporting is kind of a compromise here. Determining whether we're
in a problematic state at realize() time is tricky, because we'd have to
look at the host pagesizes of all memory backends, but we can't really know
if some of those backends could be for special purpose memory that's not
subject to ballooning.
Reporting only when the guest tries to balloon a partial page also isn't
great because if the guest page size happens to line up it won't indicate
that we're in a non ideal situation. It could also cause alarming repeated
warnings whenever a migration is attempted.
So, what we do is warn the first time the guest attempts balloon a partial
host page, whether or not it will end up ballooning the rest of the page
immediately afterwards.
[1] Because when the guest attempts to balloon a page, it will submit
requests for each 4kiB subpage. Most will be ignored, but the one
which happens to be host page aligned will discard the whole lot.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190214043916.22128-6-david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-02-14 07:39:16 +03:00
|
|
|
subpages = rb_page_size / BALLOON_PAGE_SIZE;
|
2019-07-22 16:41:07 +03:00
|
|
|
base_gpa = memory_region_get_ram_addr(mr) + mr_offset -
|
|
|
|
(rb_offset - rb_aligned_offset);
|
virtio-balloon: Safely handle BALLOON_PAGE_SIZE < host page size
The virtio-balloon always works in units of 4kiB (BALLOON_PAGE_SIZE), but
we can only actually discard memory in units of the host page size.
Now, we handle this very badly: we silently ignore balloon requests that
aren't host page aligned, and for requests that are host page aligned we
discard the entire host page. The latter can corrupt guest memory if its
page size is smaller than the host's.
The obvious choice would be to disable the balloon if the host page size is
not 4kiB. However, that would break the special case where host and guest
have the same page size, but that's larger than 4kiB. That case currently
works by accident[1] - and is used in practice on many production POWER
systems where 64kiB has long been the Linux default page size on both host
and guest.
To make the balloon safe, without breaking that useful special case, we
need to accumulate 4kiB balloon requests until we have a whole contiguous
host page to discard.
We could in principle do that across all guest memory, but it would require
a large bitmap to track. This patch represents a compromise: we track
ballooned subpages for a single contiguous host page at a time. This means
that if the guest discards all 4kiB chunks of a host page in succession,
we will discard it. This is the expected behaviour in the (host page) ==
(guest page) != 4kiB case we want to support.
If the guest scatters 4kiB requests across different host pages, we don't
discard anything, and issue a warning. Not ideal, but at least we don't
corrupt guest memory as the previous version could.
Warning reporting is kind of a compromise here. Determining whether we're
in a problematic state at realize() time is tricky, because we'd have to
look at the host pagesizes of all memory backends, but we can't really know
if some of those backends could be for special purpose memory that's not
subject to ballooning.
Reporting only when the guest tries to balloon a partial page also isn't
great because if the guest page size happens to line up it won't indicate
that we're in a non ideal situation. It could also cause alarming repeated
warnings whenever a migration is attempted.
So, what we do is warn the first time the guest attempts balloon a partial
host page, whether or not it will end up ballooning the rest of the page
immediately afterwards.
[1] Because when the guest attempts to balloon a page, it will submit
requests for each 4kiB subpage. Most will be ignored, but the one
which happens to be host page aligned will discard the whole lot.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190214043916.22128-6-david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-02-14 07:39:16 +03:00
|
|
|
|
2019-07-25 14:54:25 +03:00
|
|
|
if (pbp->bitmap && !virtio_balloon_pbp_matches(pbp, base_gpa)) {
|
virtio-balloon: Safely handle BALLOON_PAGE_SIZE < host page size
The virtio-balloon always works in units of 4kiB (BALLOON_PAGE_SIZE), but
we can only actually discard memory in units of the host page size.
Now, we handle this very badly: we silently ignore balloon requests that
aren't host page aligned, and for requests that are host page aligned we
discard the entire host page. The latter can corrupt guest memory if its
page size is smaller than the host's.
The obvious choice would be to disable the balloon if the host page size is
not 4kiB. However, that would break the special case where host and guest
have the same page size, but that's larger than 4kiB. That case currently
works by accident[1] - and is used in practice on many production POWER
systems where 64kiB has long been the Linux default page size on both host
and guest.
To make the balloon safe, without breaking that useful special case, we
need to accumulate 4kiB balloon requests until we have a whole contiguous
host page to discard.
We could in principle do that across all guest memory, but it would require
a large bitmap to track. This patch represents a compromise: we track
ballooned subpages for a single contiguous host page at a time. This means
that if the guest discards all 4kiB chunks of a host page in succession,
we will discard it. This is the expected behaviour in the (host page) ==
(guest page) != 4kiB case we want to support.
If the guest scatters 4kiB requests across different host pages, we don't
discard anything, and issue a warning. Not ideal, but at least we don't
corrupt guest memory as the previous version could.
Warning reporting is kind of a compromise here. Determining whether we're
in a problematic state at realize() time is tricky, because we'd have to
look at the host pagesizes of all memory backends, but we can't really know
if some of those backends could be for special purpose memory that's not
subject to ballooning.
Reporting only when the guest tries to balloon a partial page also isn't
great because if the guest page size happens to line up it won't indicate
that we're in a non ideal situation. It could also cause alarming repeated
warnings whenever a migration is attempted.
So, what we do is warn the first time the guest attempts balloon a partial
host page, whether or not it will end up ballooning the rest of the page
immediately afterwards.
[1] Because when the guest attempts to balloon a page, it will submit
requests for each 4kiB subpage. Most will be ignored, but the one
which happens to be host page aligned will discard the whole lot.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190214043916.22128-6-david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-02-14 07:39:16 +03:00
|
|
|
/* We've partially ballooned part of a host page, but now
|
|
|
|
* we're trying to balloon part of a different one. Too hard,
|
|
|
|
* give up on the old partial page */
|
2019-07-25 14:54:25 +03:00
|
|
|
virtio_balloon_pbp_free(pbp);
|
virtio-balloon: Use ram_block_discard_range() instead of raw madvise()
Currently, virtio-balloon uses madvise() with MADV_DONTNEED to actually
discard RAM pages inserted into the balloon. This is basically a Linux
only interface (MADV_DONTNEED exists on some other platforms, but doesn't
always have the same semantics). It also doesn't work on hugepages and has
some other limitations.
It turns out that postcopy also needs to discard chunks of memory, and uses
a better interface for it: ram_block_discard_range(). It doesn't cover
every case, but it covers more than going direct to madvise() and this
gives us a single place to update for more possibilities in future.
There are some subtleties here to maintain the current balloon behaviour:
* For now, we just ignore requests to balloon in a hugepage backed region.
That matches current behaviour, because MADV_DONTNEED on a hugepage would
simply fail, and we ignore the error.
* If host page size is > BALLOON_PAGE_SIZE we can frequently call this on
non-host-page-aligned addresses. These would also fail in madvise(),
which we then ignored. ram_block_discard_range() error_report()s calls
on unaligned addresses, so we explicitly check that case to avoid
spamming the logs.
* We now call ram_block_discard_range() with the *host* page size, whereas
we previously called madvise() with BALLOON_PAGE_SIZE. Surprisingly,
this also matches existing behaviour. Although the kernel fails madvise
on unaligned addresses, it will round unaligned sizes *up* to the host
page size. Yes, this means that if BALLOON_PAGE_SIZE < guest page size
we can incorrectly discard more memory than the guest asked us to. I'm
planning to address that soon.
Errors other than the ones discussed above, will now be reported by
ram_block_discard_range(), rather than silently ignored, which means we
have a much better chance of seeing when something is going wrong.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-Id: <20190214043916.22128-5-david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-02-14 07:39:15 +03:00
|
|
|
}
|
|
|
|
|
2019-07-25 14:54:25 +03:00
|
|
|
if (!pbp->bitmap) {
|
|
|
|
virtio_balloon_pbp_alloc(pbp, base_gpa, subpages);
|
virtio-balloon: Safely handle BALLOON_PAGE_SIZE < host page size
The virtio-balloon always works in units of 4kiB (BALLOON_PAGE_SIZE), but
we can only actually discard memory in units of the host page size.
Now, we handle this very badly: we silently ignore balloon requests that
aren't host page aligned, and for requests that are host page aligned we
discard the entire host page. The latter can corrupt guest memory if its
page size is smaller than the host's.
The obvious choice would be to disable the balloon if the host page size is
not 4kiB. However, that would break the special case where host and guest
have the same page size, but that's larger than 4kiB. That case currently
works by accident[1] - and is used in practice on many production POWER
systems where 64kiB has long been the Linux default page size on both host
and guest.
To make the balloon safe, without breaking that useful special case, we
need to accumulate 4kiB balloon requests until we have a whole contiguous
host page to discard.
We could in principle do that across all guest memory, but it would require
a large bitmap to track. This patch represents a compromise: we track
ballooned subpages for a single contiguous host page at a time. This means
that if the guest discards all 4kiB chunks of a host page in succession,
we will discard it. This is the expected behaviour in the (host page) ==
(guest page) != 4kiB case we want to support.
If the guest scatters 4kiB requests across different host pages, we don't
discard anything, and issue a warning. Not ideal, but at least we don't
corrupt guest memory as the previous version could.
Warning reporting is kind of a compromise here. Determining whether we're
in a problematic state at realize() time is tricky, because we'd have to
look at the host pagesizes of all memory backends, but we can't really know
if some of those backends could be for special purpose memory that's not
subject to ballooning.
Reporting only when the guest tries to balloon a partial page also isn't
great because if the guest page size happens to line up it won't indicate
that we're in a non ideal situation. It could also cause alarming repeated
warnings whenever a migration is attempted.
So, what we do is warn the first time the guest attempts balloon a partial
host page, whether or not it will end up ballooning the rest of the page
immediately afterwards.
[1] Because when the guest attempts to balloon a page, it will submit
requests for each 4kiB subpage. Most will be ignored, but the one
which happens to be host page aligned will discard the whole lot.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190214043916.22128-6-david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-02-14 07:39:16 +03:00
|
|
|
}
|
|
|
|
|
2019-07-22 16:41:07 +03:00
|
|
|
set_bit((rb_offset - rb_aligned_offset) / BALLOON_PAGE_SIZE,
|
2019-07-25 14:54:25 +03:00
|
|
|
pbp->bitmap);
|
virtio-balloon: Safely handle BALLOON_PAGE_SIZE < host page size
The virtio-balloon always works in units of 4kiB (BALLOON_PAGE_SIZE), but
we can only actually discard memory in units of the host page size.
Now, we handle this very badly: we silently ignore balloon requests that
aren't host page aligned, and for requests that are host page aligned we
discard the entire host page. The latter can corrupt guest memory if its
page size is smaller than the host's.
The obvious choice would be to disable the balloon if the host page size is
not 4kiB. However, that would break the special case where host and guest
have the same page size, but that's larger than 4kiB. That case currently
works by accident[1] - and is used in practice on many production POWER
systems where 64kiB has long been the Linux default page size on both host
and guest.
To make the balloon safe, without breaking that useful special case, we
need to accumulate 4kiB balloon requests until we have a whole contiguous
host page to discard.
We could in principle do that across all guest memory, but it would require
a large bitmap to track. This patch represents a compromise: we track
ballooned subpages for a single contiguous host page at a time. This means
that if the guest discards all 4kiB chunks of a host page in succession,
we will discard it. This is the expected behaviour in the (host page) ==
(guest page) != 4kiB case we want to support.
If the guest scatters 4kiB requests across different host pages, we don't
discard anything, and issue a warning. Not ideal, but at least we don't
corrupt guest memory as the previous version could.
Warning reporting is kind of a compromise here. Determining whether we're
in a problematic state at realize() time is tricky, because we'd have to
look at the host pagesizes of all memory backends, but we can't really know
if some of those backends could be for special purpose memory that's not
subject to ballooning.
Reporting only when the guest tries to balloon a partial page also isn't
great because if the guest page size happens to line up it won't indicate
that we're in a non ideal situation. It could also cause alarming repeated
warnings whenever a migration is attempted.
So, what we do is warn the first time the guest attempts balloon a partial
host page, whether or not it will end up ballooning the rest of the page
immediately afterwards.
[1] Because when the guest attempts to balloon a page, it will submit
requests for each 4kiB subpage. Most will be ignored, but the one
which happens to be host page aligned will discard the whole lot.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190214043916.22128-6-david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-02-14 07:39:16 +03:00
|
|
|
|
2019-07-25 14:54:25 +03:00
|
|
|
if (bitmap_full(pbp->bitmap, subpages)) {
|
virtio-balloon: Safely handle BALLOON_PAGE_SIZE < host page size
The virtio-balloon always works in units of 4kiB (BALLOON_PAGE_SIZE), but
we can only actually discard memory in units of the host page size.
Now, we handle this very badly: we silently ignore balloon requests that
aren't host page aligned, and for requests that are host page aligned we
discard the entire host page. The latter can corrupt guest memory if its
page size is smaller than the host's.
The obvious choice would be to disable the balloon if the host page size is
not 4kiB. However, that would break the special case where host and guest
have the same page size, but that's larger than 4kiB. That case currently
works by accident[1] - and is used in practice on many production POWER
systems where 64kiB has long been the Linux default page size on both host
and guest.
To make the balloon safe, without breaking that useful special case, we
need to accumulate 4kiB balloon requests until we have a whole contiguous
host page to discard.
We could in principle do that across all guest memory, but it would require
a large bitmap to track. This patch represents a compromise: we track
ballooned subpages for a single contiguous host page at a time. This means
that if the guest discards all 4kiB chunks of a host page in succession,
we will discard it. This is the expected behaviour in the (host page) ==
(guest page) != 4kiB case we want to support.
If the guest scatters 4kiB requests across different host pages, we don't
discard anything, and issue a warning. Not ideal, but at least we don't
corrupt guest memory as the previous version could.
Warning reporting is kind of a compromise here. Determining whether we're
in a problematic state at realize() time is tricky, because we'd have to
look at the host pagesizes of all memory backends, but we can't really know
if some of those backends could be for special purpose memory that's not
subject to ballooning.
Reporting only when the guest tries to balloon a partial page also isn't
great because if the guest page size happens to line up it won't indicate
that we're in a non ideal situation. It could also cause alarming repeated
warnings whenever a migration is attempted.
So, what we do is warn the first time the guest attempts balloon a partial
host page, whether or not it will end up ballooning the rest of the page
immediately afterwards.
[1] Because when the guest attempts to balloon a page, it will submit
requests for each 4kiB subpage. Most will be ignored, but the one
which happens to be host page aligned will discard the whole lot.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190214043916.22128-6-david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-02-14 07:39:16 +03:00
|
|
|
/* We've accumulated a full host page, we can actually discard
|
|
|
|
* it now */
|
|
|
|
|
2019-07-22 16:41:07 +03:00
|
|
|
ram_block_discard_range(rb, rb_aligned_offset, rb_page_size);
|
virtio-balloon: Safely handle BALLOON_PAGE_SIZE < host page size
The virtio-balloon always works in units of 4kiB (BALLOON_PAGE_SIZE), but
we can only actually discard memory in units of the host page size.
Now, we handle this very badly: we silently ignore balloon requests that
aren't host page aligned, and for requests that are host page aligned we
discard the entire host page. The latter can corrupt guest memory if its
page size is smaller than the host's.
The obvious choice would be to disable the balloon if the host page size is
not 4kiB. However, that would break the special case where host and guest
have the same page size, but that's larger than 4kiB. That case currently
works by accident[1] - and is used in practice on many production POWER
systems where 64kiB has long been the Linux default page size on both host
and guest.
To make the balloon safe, without breaking that useful special case, we
need to accumulate 4kiB balloon requests until we have a whole contiguous
host page to discard.
We could in principle do that across all guest memory, but it would require
a large bitmap to track. This patch represents a compromise: we track
ballooned subpages for a single contiguous host page at a time. This means
that if the guest discards all 4kiB chunks of a host page in succession,
we will discard it. This is the expected behaviour in the (host page) ==
(guest page) != 4kiB case we want to support.
If the guest scatters 4kiB requests across different host pages, we don't
discard anything, and issue a warning. Not ideal, but at least we don't
corrupt guest memory as the previous version could.
Warning reporting is kind of a compromise here. Determining whether we're
in a problematic state at realize() time is tricky, because we'd have to
look at the host pagesizes of all memory backends, but we can't really know
if some of those backends could be for special purpose memory that's not
subject to ballooning.
Reporting only when the guest tries to balloon a partial page also isn't
great because if the guest page size happens to line up it won't indicate
that we're in a non ideal situation. It could also cause alarming repeated
warnings whenever a migration is attempted.
So, what we do is warn the first time the guest attempts balloon a partial
host page, whether or not it will end up ballooning the rest of the page
immediately afterwards.
[1] Because when the guest attempts to balloon a page, it will submit
requests for each 4kiB subpage. Most will be ignored, but the one
which happens to be host page aligned will discard the whole lot.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190214043916.22128-6-david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-02-14 07:39:16 +03:00
|
|
|
/* We ignore errors from ram_block_discard_range(), because it
|
|
|
|
* has already reported them, and failing to discard a balloon
|
|
|
|
* page is not fatal */
|
2019-07-25 14:54:25 +03:00
|
|
|
virtio_balloon_pbp_free(pbp);
|
virtio-balloon: Safely handle BALLOON_PAGE_SIZE < host page size
The virtio-balloon always works in units of 4kiB (BALLOON_PAGE_SIZE), but
we can only actually discard memory in units of the host page size.
Now, we handle this very badly: we silently ignore balloon requests that
aren't host page aligned, and for requests that are host page aligned we
discard the entire host page. The latter can corrupt guest memory if its
page size is smaller than the host's.
The obvious choice would be to disable the balloon if the host page size is
not 4kiB. However, that would break the special case where host and guest
have the same page size, but that's larger than 4kiB. That case currently
works by accident[1] - and is used in practice on many production POWER
systems where 64kiB has long been the Linux default page size on both host
and guest.
To make the balloon safe, without breaking that useful special case, we
need to accumulate 4kiB balloon requests until we have a whole contiguous
host page to discard.
We could in principle do that across all guest memory, but it would require
a large bitmap to track. This patch represents a compromise: we track
ballooned subpages for a single contiguous host page at a time. This means
that if the guest discards all 4kiB chunks of a host page in succession,
we will discard it. This is the expected behaviour in the (host page) ==
(guest page) != 4kiB case we want to support.
If the guest scatters 4kiB requests across different host pages, we don't
discard anything, and issue a warning. Not ideal, but at least we don't
corrupt guest memory as the previous version could.
Warning reporting is kind of a compromise here. Determining whether we're
in a problematic state at realize() time is tricky, because we'd have to
look at the host pagesizes of all memory backends, but we can't really know
if some of those backends could be for special purpose memory that's not
subject to ballooning.
Reporting only when the guest tries to balloon a partial page also isn't
great because if the guest page size happens to line up it won't indicate
that we're in a non ideal situation. It could also cause alarming repeated
warnings whenever a migration is attempted.
So, what we do is warn the first time the guest attempts balloon a partial
host page, whether or not it will end up ballooning the rest of the page
immediately afterwards.
[1] Because when the guest attempts to balloon a page, it will submit
requests for each 4kiB subpage. Most will be ignored, but the one
which happens to be host page aligned will discard the whole lot.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190214043916.22128-6-david@gibson.dropbear.id.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-02-14 07:39:16 +03:00
|
|
|
}
|
2008-12-04 23:33:06 +03:00
|
|
|
}
|
|
|
|
|
2019-03-06 06:06:00 +03:00
|
|
|
static void balloon_deflate_page(VirtIOBalloon *balloon,
|
2019-07-22 16:41:06 +03:00
|
|
|
MemoryRegion *mr, hwaddr mr_offset)
|
2019-03-06 06:06:00 +03:00
|
|
|
{
|
2019-07-22 16:41:06 +03:00
|
|
|
void *addr = memory_region_get_ram_ptr(mr) + mr_offset;
|
|
|
|
ram_addr_t rb_offset;
|
2019-03-06 06:06:00 +03:00
|
|
|
RAMBlock *rb;
|
|
|
|
size_t rb_page_size;
|
2019-03-06 06:06:01 +03:00
|
|
|
void *host_addr;
|
|
|
|
int ret;
|
2019-03-06 06:06:00 +03:00
|
|
|
|
|
|
|
/* XXX is there a better way to get to the RAMBlock than via a
|
|
|
|
* host address? */
|
2019-07-22 16:41:06 +03:00
|
|
|
rb = qemu_ram_block_from_host(addr, false, &rb_offset);
|
2019-03-06 06:06:00 +03:00
|
|
|
rb_page_size = qemu_ram_pagesize(rb);
|
|
|
|
|
2019-03-06 06:06:01 +03:00
|
|
|
host_addr = (void *)((uintptr_t)addr & ~(rb_page_size - 1));
|
|
|
|
|
|
|
|
/* When a page is deflated, we hint the whole host page it lives
|
|
|
|
* on, since we can't do anything smaller */
|
|
|
|
ret = qemu_madvise(host_addr, rb_page_size, QEMU_MADV_WILLNEED);
|
|
|
|
if (ret != 0) {
|
|
|
|
warn_report("Couldn't MADV_WILLNEED on balloon deflate: %s",
|
|
|
|
strerror(errno));
|
|
|
|
/* Otherwise ignore, failing to page hint shouldn't be fatal */
|
|
|
|
}
|
2019-03-06 06:06:00 +03:00
|
|
|
}
|
|
|
|
|
balloon: re-enable balloon stats
The statistics are now available through device properties via a
polling mechanism. First a client has to enable polling, then it
can query available stats.
Polling is enabled by setting an update interval (in seconds)
to a property named guest-stats-polling-interval, like this:
{ "execute": "qom-set",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats-polling-interval", "value": 4 } }
Then the available stats can be retrieved by querying the
guest-stats property. The returned object is a dict containing
all available stats. Example:
{ "execute": "qom-get",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats" } }
{
"return": {
"stats": {
"stat-swap-out": 0,
"stat-free-memory": 844943360,
"stat-minor-faults": 219028,
"stat-major-faults": 235,
"stat-total-memory": 1044406272,
"stat-swap-in": 0
},
"last-update": 1358529861
}
}
Please, check the next commit for full documentation.
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2012-12-01 06:14:57 +04:00
|
|
|
static const char *balloon_stat_names[] = {
|
|
|
|
[VIRTIO_BALLOON_S_SWAP_IN] = "stat-swap-in",
|
|
|
|
[VIRTIO_BALLOON_S_SWAP_OUT] = "stat-swap-out",
|
|
|
|
[VIRTIO_BALLOON_S_MAJFLT] = "stat-major-faults",
|
|
|
|
[VIRTIO_BALLOON_S_MINFLT] = "stat-minor-faults",
|
|
|
|
[VIRTIO_BALLOON_S_MEMFREE] = "stat-free-memory",
|
|
|
|
[VIRTIO_BALLOON_S_MEMTOT] = "stat-total-memory",
|
2016-02-24 10:50:48 +03:00
|
|
|
[VIRTIO_BALLOON_S_AVAIL] = "stat-available-memory",
|
2017-12-05 15:14:46 +03:00
|
|
|
[VIRTIO_BALLOON_S_CACHES] = "stat-disk-caches",
|
2018-03-20 01:28:49 +03:00
|
|
|
[VIRTIO_BALLOON_S_HTLB_PGALLOC] = "stat-htlb-pgalloc",
|
|
|
|
[VIRTIO_BALLOON_S_HTLB_PGFAIL] = "stat-htlb-pgfail",
|
balloon: re-enable balloon stats
The statistics are now available through device properties via a
polling mechanism. First a client has to enable polling, then it
can query available stats.
Polling is enabled by setting an update interval (in seconds)
to a property named guest-stats-polling-interval, like this:
{ "execute": "qom-set",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats-polling-interval", "value": 4 } }
Then the available stats can be retrieved by querying the
guest-stats property. The returned object is a dict containing
all available stats. Example:
{ "execute": "qom-get",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats" } }
{
"return": {
"stats": {
"stat-swap-out": 0,
"stat-free-memory": 844943360,
"stat-minor-faults": 219028,
"stat-major-faults": 235,
"stat-total-memory": 1044406272,
"stat-swap-in": 0
},
"last-update": 1358529861
}
}
Please, check the next commit for full documentation.
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2012-12-01 06:14:57 +04:00
|
|
|
[VIRTIO_BALLOON_S_NR] = NULL
|
|
|
|
};
|
|
|
|
|
2010-01-26 23:17:35 +03:00
|
|
|
/*
|
|
|
|
* reset_stats - Mark all items in the stats array as unset
|
|
|
|
*
|
2013-07-24 21:48:56 +04:00
|
|
|
* This function needs to be called at device initialization and before
|
|
|
|
* updating to a set of newly-generated stats. This will ensure that no
|
2010-01-26 23:17:35 +03:00
|
|
|
* stale values stick around in case the guest reports a subset of the supported
|
|
|
|
* statistics.
|
|
|
|
*/
|
|
|
|
static inline void reset_stats(VirtIOBalloon *dev)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < VIRTIO_BALLOON_S_NR; dev->stats[i++] = -1);
|
|
|
|
}
|
|
|
|
|
balloon: re-enable balloon stats
The statistics are now available through device properties via a
polling mechanism. First a client has to enable polling, then it
can query available stats.
Polling is enabled by setting an update interval (in seconds)
to a property named guest-stats-polling-interval, like this:
{ "execute": "qom-set",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats-polling-interval", "value": 4 } }
Then the available stats can be retrieved by querying the
guest-stats property. The returned object is a dict containing
all available stats. Example:
{ "execute": "qom-get",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats" } }
{
"return": {
"stats": {
"stat-swap-out": 0,
"stat-free-memory": 844943360,
"stat-minor-faults": 219028,
"stat-major-faults": 235,
"stat-total-memory": 1044406272,
"stat-swap-in": 0
},
"last-update": 1358529861
}
}
Please, check the next commit for full documentation.
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2012-12-01 06:14:57 +04:00
|
|
|
static bool balloon_stats_supported(const VirtIOBalloon *s)
|
|
|
|
{
|
2013-03-27 13:49:14 +04:00
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(s);
|
2015-08-17 12:48:29 +03:00
|
|
|
return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_STATS_VQ);
|
balloon: re-enable balloon stats
The statistics are now available through device properties via a
polling mechanism. First a client has to enable polling, then it
can query available stats.
Polling is enabled by setting an update interval (in seconds)
to a property named guest-stats-polling-interval, like this:
{ "execute": "qom-set",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats-polling-interval", "value": 4 } }
Then the available stats can be retrieved by querying the
guest-stats property. The returned object is a dict containing
all available stats. Example:
{ "execute": "qom-get",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats" } }
{
"return": {
"stats": {
"stat-swap-out": 0,
"stat-free-memory": 844943360,
"stat-minor-faults": 219028,
"stat-major-faults": 235,
"stat-total-memory": 1044406272,
"stat-swap-in": 0
},
"last-update": 1358529861
}
}
Please, check the next commit for full documentation.
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2012-12-01 06:14:57 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static bool balloon_stats_enabled(const VirtIOBalloon *s)
|
|
|
|
{
|
|
|
|
return s->stats_poll_interval > 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void balloon_stats_destroy_timer(VirtIOBalloon *s)
|
|
|
|
{
|
|
|
|
if (balloon_stats_enabled(s)) {
|
2013-08-21 19:03:08 +04:00
|
|
|
timer_free(s->stats_timer);
|
balloon: re-enable balloon stats
The statistics are now available through device properties via a
polling mechanism. First a client has to enable polling, then it
can query available stats.
Polling is enabled by setting an update interval (in seconds)
to a property named guest-stats-polling-interval, like this:
{ "execute": "qom-set",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats-polling-interval", "value": 4 } }
Then the available stats can be retrieved by querying the
guest-stats property. The returned object is a dict containing
all available stats. Example:
{ "execute": "qom-get",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats" } }
{
"return": {
"stats": {
"stat-swap-out": 0,
"stat-free-memory": 844943360,
"stat-minor-faults": 219028,
"stat-major-faults": 235,
"stat-total-memory": 1044406272,
"stat-swap-in": 0
},
"last-update": 1358529861
}
}
Please, check the next commit for full documentation.
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2012-12-01 06:14:57 +04:00
|
|
|
s->stats_timer = NULL;
|
|
|
|
s->stats_poll_interval = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-09-15 20:00:11 +04:00
|
|
|
static void balloon_stats_change_timer(VirtIOBalloon *s, int64_t secs)
|
balloon: re-enable balloon stats
The statistics are now available through device properties via a
polling mechanism. First a client has to enable polling, then it
can query available stats.
Polling is enabled by setting an update interval (in seconds)
to a property named guest-stats-polling-interval, like this:
{ "execute": "qom-set",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats-polling-interval", "value": 4 } }
Then the available stats can be retrieved by querying the
guest-stats property. The returned object is a dict containing
all available stats. Example:
{ "execute": "qom-get",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats" } }
{
"return": {
"stats": {
"stat-swap-out": 0,
"stat-free-memory": 844943360,
"stat-minor-faults": 219028,
"stat-major-faults": 235,
"stat-total-memory": 1044406272,
"stat-swap-in": 0
},
"last-update": 1358529861
}
}
Please, check the next commit for full documentation.
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2012-12-01 06:14:57 +04:00
|
|
|
{
|
2013-08-21 19:03:08 +04:00
|
|
|
timer_mod(s->stats_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + secs * 1000);
|
balloon: re-enable balloon stats
The statistics are now available through device properties via a
polling mechanism. First a client has to enable polling, then it
can query available stats.
Polling is enabled by setting an update interval (in seconds)
to a property named guest-stats-polling-interval, like this:
{ "execute": "qom-set",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats-polling-interval", "value": 4 } }
Then the available stats can be retrieved by querying the
guest-stats property. The returned object is a dict containing
all available stats. Example:
{ "execute": "qom-get",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats" } }
{
"return": {
"stats": {
"stat-swap-out": 0,
"stat-free-memory": 844943360,
"stat-minor-faults": 219028,
"stat-major-faults": 235,
"stat-total-memory": 1044406272,
"stat-swap-in": 0
},
"last-update": 1358529861
}
}
Please, check the next commit for full documentation.
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2012-12-01 06:14:57 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static void balloon_stats_poll_cb(void *opaque)
|
|
|
|
{
|
|
|
|
VirtIOBalloon *s = opaque;
|
2013-03-27 13:49:14 +04:00
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(s);
|
balloon: re-enable balloon stats
The statistics are now available through device properties via a
polling mechanism. First a client has to enable polling, then it
can query available stats.
Polling is enabled by setting an update interval (in seconds)
to a property named guest-stats-polling-interval, like this:
{ "execute": "qom-set",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats-polling-interval", "value": 4 } }
Then the available stats can be retrieved by querying the
guest-stats property. The returned object is a dict containing
all available stats. Example:
{ "execute": "qom-get",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats" } }
{
"return": {
"stats": {
"stat-swap-out": 0,
"stat-free-memory": 844943360,
"stat-minor-faults": 219028,
"stat-major-faults": 235,
"stat-total-memory": 1044406272,
"stat-swap-in": 0
},
"last-update": 1358529861
}
}
Please, check the next commit for full documentation.
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2012-12-01 06:14:57 +04:00
|
|
|
|
2016-03-01 14:14:03 +03:00
|
|
|
if (s->stats_vq_elem == NULL || !balloon_stats_supported(s)) {
|
balloon: re-enable balloon stats
The statistics are now available through device properties via a
polling mechanism. First a client has to enable polling, then it
can query available stats.
Polling is enabled by setting an update interval (in seconds)
to a property named guest-stats-polling-interval, like this:
{ "execute": "qom-set",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats-polling-interval", "value": 4 } }
Then the available stats can be retrieved by querying the
guest-stats property. The returned object is a dict containing
all available stats. Example:
{ "execute": "qom-get",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats" } }
{
"return": {
"stats": {
"stat-swap-out": 0,
"stat-free-memory": 844943360,
"stat-minor-faults": 219028,
"stat-major-faults": 235,
"stat-total-memory": 1044406272,
"stat-swap-in": 0
},
"last-update": 1358529861
}
}
Please, check the next commit for full documentation.
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2012-12-01 06:14:57 +04:00
|
|
|
/* re-schedule */
|
|
|
|
balloon_stats_change_timer(s, s->stats_poll_interval);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2021-11-29 06:08:41 +03:00
|
|
|
virtqueue_push(s->svq, s->stats_vq_elem, 0);
|
2013-03-27 13:49:14 +04:00
|
|
|
virtio_notify(vdev, s->svq);
|
2016-02-04 17:26:51 +03:00
|
|
|
g_free(s->stats_vq_elem);
|
|
|
|
s->stats_vq_elem = NULL;
|
balloon: re-enable balloon stats
The statistics are now available through device properties via a
polling mechanism. First a client has to enable polling, then it
can query available stats.
Polling is enabled by setting an update interval (in seconds)
to a property named guest-stats-polling-interval, like this:
{ "execute": "qom-set",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats-polling-interval", "value": 4 } }
Then the available stats can be retrieved by querying the
guest-stats property. The returned object is a dict containing
all available stats. Example:
{ "execute": "qom-get",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats" } }
{
"return": {
"stats": {
"stat-swap-out": 0,
"stat-free-memory": 844943360,
"stat-minor-faults": 219028,
"stat-major-faults": 235,
"stat-total-memory": 1044406272,
"stat-swap-in": 0
},
"last-update": 1358529861
}
}
Please, check the next commit for full documentation.
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2012-12-01 06:14:57 +04:00
|
|
|
}
|
|
|
|
|
qom: Swap 'name' next to visitor in ObjectPropertyAccessor
Similar to the previous patch, it's nice to have all functions
in the tree that involve a visitor and a name for conversion to
or from QAPI to consistently stick the 'name' parameter next
to the Visitor parameter.
Done by manually changing include/qom/object.h and qom/object.c,
then running this Coccinelle script and touching up the fallout
(Coccinelle insisted on adding some trailing whitespace).
@ rule1 @
identifier fn;
typedef Object, Visitor, Error;
identifier obj, v, opaque, name, errp;
@@
void fn
- (Object *obj, Visitor *v, void *opaque, const char *name,
+ (Object *obj, Visitor *v, const char *name, void *opaque,
Error **errp) { ... }
@@
identifier rule1.fn;
expression obj, v, opaque, name, errp;
@@
fn(obj, v,
- opaque, name,
+ name, opaque,
errp)
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <1454075341-13658-20-git-send-email-eblake@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-01-29 16:48:55 +03:00
|
|
|
static void balloon_stats_get_all(Object *obj, Visitor *v, const char *name,
|
|
|
|
void *opaque, Error **errp)
|
balloon: re-enable balloon stats
The statistics are now available through device properties via a
polling mechanism. First a client has to enable polling, then it
can query available stats.
Polling is enabled by setting an update interval (in seconds)
to a property named guest-stats-polling-interval, like this:
{ "execute": "qom-set",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats-polling-interval", "value": 4 } }
Then the available stats can be retrieved by querying the
guest-stats property. The returned object is a dict containing
all available stats. Example:
{ "execute": "qom-get",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats" } }
{
"return": {
"stats": {
"stat-swap-out": 0,
"stat-free-memory": 844943360,
"stat-minor-faults": 219028,
"stat-major-faults": 235,
"stat-total-memory": 1044406272,
"stat-swap-in": 0
},
"last-update": 1358529861
}
}
Please, check the next commit for full documentation.
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2012-12-01 06:14:57 +04:00
|
|
|
{
|
2014-05-07 11:53:52 +04:00
|
|
|
Error *err = NULL;
|
balloon: re-enable balloon stats
The statistics are now available through device properties via a
polling mechanism. First a client has to enable polling, then it
can query available stats.
Polling is enabled by setting an update interval (in seconds)
to a property named guest-stats-polling-interval, like this:
{ "execute": "qom-set",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats-polling-interval", "value": 4 } }
Then the available stats can be retrieved by querying the
guest-stats property. The returned object is a dict containing
all available stats. Example:
{ "execute": "qom-get",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats" } }
{
"return": {
"stats": {
"stat-swap-out": 0,
"stat-free-memory": 844943360,
"stat-minor-faults": 219028,
"stat-major-faults": 235,
"stat-total-memory": 1044406272,
"stat-swap-in": 0
},
"last-update": 1358529861
}
}
Please, check the next commit for full documentation.
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2012-12-01 06:14:57 +04:00
|
|
|
VirtIOBalloon *s = opaque;
|
|
|
|
int i;
|
|
|
|
|
2020-07-07 19:05:46 +03:00
|
|
|
if (!visit_start_struct(v, name, NULL, 0, &err)) {
|
2014-05-07 11:53:52 +04:00
|
|
|
goto out;
|
|
|
|
}
|
2020-07-07 19:05:46 +03:00
|
|
|
if (!visit_type_int(v, "last-update", &s->stats_last_update, &err)) {
|
qapi: Replace uncommon use of the error API by the common one
We commonly use the error API like this:
err = NULL;
foo(..., &err);
if (err) {
goto out;
}
bar(..., &err);
Every error source is checked separately. The second function is only
called when the first one succeeds. Both functions are free to pass
their argument to error_set(). Because error_set() asserts no error
has been set, this effectively means they must not be called with an
error set.
The qapi-generated code uses the error API differently:
// *errp was initialized to NULL somewhere up the call chain
frob(..., errp);
gnat(..., errp);
Errors accumulate in *errp: first error wins, subsequent errors get
dropped. To make this work, the second function does nothing when
called with an error set. Requires non-null errp, or else the second
function can't see the first one fail.
This usage has also bled into visitor tests, and two device model
object property getters rtc_get_date() and balloon_stats_get_all().
With the "accumulate" technique, you need fewer error checks in
callers, and buy that with an error check in every callee. Can be
nice.
However, mixing the two techniques is confusing. You can't use the
"accumulate" technique with functions designed for the "check
separately" technique. You can use the "check separately" technique
with functions designed for the "accumulate" technique, but then
error_set() can't catch you setting an error more than once.
Standardize on the "check separately" technique for now, because it's
overwhelmingly prevalent.
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
2014-05-07 11:53:54 +04:00
|
|
|
goto out_end;
|
|
|
|
}
|
balloon: re-enable balloon stats
The statistics are now available through device properties via a
polling mechanism. First a client has to enable polling, then it
can query available stats.
Polling is enabled by setting an update interval (in seconds)
to a property named guest-stats-polling-interval, like this:
{ "execute": "qom-set",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats-polling-interval", "value": 4 } }
Then the available stats can be retrieved by querying the
guest-stats property. The returned object is a dict containing
all available stats. Example:
{ "execute": "qom-get",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats" } }
{
"return": {
"stats": {
"stat-swap-out": 0,
"stat-free-memory": 844943360,
"stat-minor-faults": 219028,
"stat-major-faults": 235,
"stat-total-memory": 1044406272,
"stat-swap-in": 0
},
"last-update": 1358529861
}
}
Please, check the next commit for full documentation.
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2012-12-01 06:14:57 +04:00
|
|
|
|
2020-07-07 19:05:46 +03:00
|
|
|
if (!visit_start_struct(v, "stats", NULL, 0, &err)) {
|
2014-05-07 11:53:52 +04:00
|
|
|
goto out_end;
|
|
|
|
}
|
2016-01-29 16:48:45 +03:00
|
|
|
for (i = 0; i < VIRTIO_BALLOON_S_NR; i++) {
|
2020-07-07 19:05:46 +03:00
|
|
|
if (!visit_type_uint64(v, balloon_stat_names[i], &s->stats[i], &err)) {
|
qapi: Split visit_end_struct() into pieces
As mentioned in previous patches, we want to call visit_end_struct()
functions unconditionally, so that visitors can release resources
tied up since the matching visit_start_struct() without also having
to worry about error priority if more than one error occurs.
Even though error_propagate() can be safely used to ignore a second
error during cleanup caused by a first error, it is simpler if the
cleanup cannot set an error. So, split out the error checking
portion (basically, input visitors checking for unvisited keys) into
a new function visit_check_struct(), which can be safely skipped if
any earlier errors are encountered, and leave the cleanup portion
(which never fails, but must be called unconditionally if
visit_start_struct() succeeded) in visit_end_struct().
Generated code in qapi-visit.c has diffs resembling:
|@@ -59,10 +59,12 @@ void visit_type_ACPIOSTInfo(Visitor *v,
| goto out_obj;
| }
| visit_type_ACPIOSTInfo_members(v, obj, &err);
|- error_propagate(errp, err);
|- err = NULL;
|+ if (err) {
|+ goto out_obj;
|+ }
|+ visit_check_struct(v, &err);
| out_obj:
|- visit_end_struct(v, &err);
|+ visit_end_struct(v);
| out:
and in qapi-event.c:
@@ -47,7 +47,10 @@ void qapi_event_send_acpi_device_ost(ACP
| goto out;
| }
| visit_type_q_obj_ACPI_DEVICE_OST_arg_members(v, ¶m, &err);
|- visit_end_struct(v, err ? NULL : &err);
|+ if (!err) {
|+ visit_check_struct(v, &err);
|+ }
|+ visit_end_struct(v);
| if (err) {
| goto out;
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1461879932-9020-20-git-send-email-eblake@redhat.com>
[Conflict with a doc fixup resolved]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-04-29 00:45:27 +03:00
|
|
|
goto out_nested;
|
2016-01-29 16:48:45 +03:00
|
|
|
}
|
balloon: re-enable balloon stats
The statistics are now available through device properties via a
polling mechanism. First a client has to enable polling, then it
can query available stats.
Polling is enabled by setting an update interval (in seconds)
to a property named guest-stats-polling-interval, like this:
{ "execute": "qom-set",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats-polling-interval", "value": 4 } }
Then the available stats can be retrieved by querying the
guest-stats property. The returned object is a dict containing
all available stats. Example:
{ "execute": "qom-get",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats" } }
{
"return": {
"stats": {
"stat-swap-out": 0,
"stat-free-memory": 844943360,
"stat-minor-faults": 219028,
"stat-major-faults": 235,
"stat-total-memory": 1044406272,
"stat-swap-in": 0
},
"last-update": 1358529861
}
}
Please, check the next commit for full documentation.
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2012-12-01 06:14:57 +04:00
|
|
|
}
|
qapi: Split visit_end_struct() into pieces
As mentioned in previous patches, we want to call visit_end_struct()
functions unconditionally, so that visitors can release resources
tied up since the matching visit_start_struct() without also having
to worry about error priority if more than one error occurs.
Even though error_propagate() can be safely used to ignore a second
error during cleanup caused by a first error, it is simpler if the
cleanup cannot set an error. So, split out the error checking
portion (basically, input visitors checking for unvisited keys) into
a new function visit_check_struct(), which can be safely skipped if
any earlier errors are encountered, and leave the cleanup portion
(which never fails, but must be called unconditionally if
visit_start_struct() succeeded) in visit_end_struct().
Generated code in qapi-visit.c has diffs resembling:
|@@ -59,10 +59,12 @@ void visit_type_ACPIOSTInfo(Visitor *v,
| goto out_obj;
| }
| visit_type_ACPIOSTInfo_members(v, obj, &err);
|- error_propagate(errp, err);
|- err = NULL;
|+ if (err) {
|+ goto out_obj;
|+ }
|+ visit_check_struct(v, &err);
| out_obj:
|- visit_end_struct(v, &err);
|+ visit_end_struct(v);
| out:
and in qapi-event.c:
@@ -47,7 +47,10 @@ void qapi_event_send_acpi_device_ost(ACP
| goto out;
| }
| visit_type_q_obj_ACPI_DEVICE_OST_arg_members(v, ¶m, &err);
|- visit_end_struct(v, err ? NULL : &err);
|+ if (!err) {
|+ visit_check_struct(v, &err);
|+ }
|+ visit_end_struct(v);
| if (err) {
| goto out;
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1461879932-9020-20-git-send-email-eblake@redhat.com>
[Conflict with a doc fixup resolved]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-04-29 00:45:27 +03:00
|
|
|
visit_check_struct(v, &err);
|
|
|
|
out_nested:
|
qapi: Add parameter to visit_end_*
Rather than making the dealloc visitor track of stack of pointers
remembered during visit_start_* in order to free them during
visit_end_*, it's a lot easier to just make all callers pass the
same pointer to visit_end_*. The generated code has access to the
same pointer, while all other users are doing virtual walks and
can pass NULL. The dealloc visitor is then greatly simplified.
All three visit_end_*() functions intentionally take a void**,
even though the visit_start_*() functions differ between void**,
GenericList**, and GenericAlternate**. This is done for several
reasons: when doing a virtual walk, passing NULL doesn't care
what the type is, but when doing a generated walk, we already
have to cast the caller's specific FOO* to call visit_start,
while using void** lets us use visit_end without a cast. Also,
an upcoming patch will add a clone visitor that wants to use
the same implementation for all three visit_end callbacks,
which is made easier if all three share the same signature.
For visitors with already track per-object state (the QMP visitors
via a stack, and the string visitors which do not allow nesting),
add an assertion that the caller is indeed passing the same
pointer to paired calls.
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1465490926-28625-4-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-06-09 19:48:34 +03:00
|
|
|
visit_end_struct(v, NULL);
|
2014-05-07 11:53:52 +04:00
|
|
|
|
qapi: Split visit_end_struct() into pieces
As mentioned in previous patches, we want to call visit_end_struct()
functions unconditionally, so that visitors can release resources
tied up since the matching visit_start_struct() without also having
to worry about error priority if more than one error occurs.
Even though error_propagate() can be safely used to ignore a second
error during cleanup caused by a first error, it is simpler if the
cleanup cannot set an error. So, split out the error checking
portion (basically, input visitors checking for unvisited keys) into
a new function visit_check_struct(), which can be safely skipped if
any earlier errors are encountered, and leave the cleanup portion
(which never fails, but must be called unconditionally if
visit_start_struct() succeeded) in visit_end_struct().
Generated code in qapi-visit.c has diffs resembling:
|@@ -59,10 +59,12 @@ void visit_type_ACPIOSTInfo(Visitor *v,
| goto out_obj;
| }
| visit_type_ACPIOSTInfo_members(v, obj, &err);
|- error_propagate(errp, err);
|- err = NULL;
|+ if (err) {
|+ goto out_obj;
|+ }
|+ visit_check_struct(v, &err);
| out_obj:
|- visit_end_struct(v, &err);
|+ visit_end_struct(v);
| out:
and in qapi-event.c:
@@ -47,7 +47,10 @@ void qapi_event_send_acpi_device_ost(ACP
| goto out;
| }
| visit_type_q_obj_ACPI_DEVICE_OST_arg_members(v, ¶m, &err);
|- visit_end_struct(v, err ? NULL : &err);
|+ if (!err) {
|+ visit_check_struct(v, &err);
|+ }
|+ visit_end_struct(v);
| if (err) {
| goto out;
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1461879932-9020-20-git-send-email-eblake@redhat.com>
[Conflict with a doc fixup resolved]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-04-29 00:45:27 +03:00
|
|
|
if (!err) {
|
|
|
|
visit_check_struct(v, &err);
|
|
|
|
}
|
2014-05-07 11:53:52 +04:00
|
|
|
out_end:
|
qapi: Add parameter to visit_end_*
Rather than making the dealloc visitor track of stack of pointers
remembered during visit_start_* in order to free them during
visit_end_*, it's a lot easier to just make all callers pass the
same pointer to visit_end_*. The generated code has access to the
same pointer, while all other users are doing virtual walks and
can pass NULL. The dealloc visitor is then greatly simplified.
All three visit_end_*() functions intentionally take a void**,
even though the visit_start_*() functions differ between void**,
GenericList**, and GenericAlternate**. This is done for several
reasons: when doing a virtual walk, passing NULL doesn't care
what the type is, but when doing a generated walk, we already
have to cast the caller's specific FOO* to call visit_start,
while using void** lets us use visit_end without a cast. Also,
an upcoming patch will add a clone visitor that wants to use
the same implementation for all three visit_end callbacks,
which is made easier if all three share the same signature.
For visitors with already track per-object state (the QMP visitors
via a stack, and the string visitors which do not allow nesting),
add an assertion that the caller is indeed passing the same
pointer to paired calls.
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1465490926-28625-4-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-06-09 19:48:34 +03:00
|
|
|
visit_end_struct(v, NULL);
|
2014-05-07 11:53:52 +04:00
|
|
|
out:
|
|
|
|
error_propagate(errp, err);
|
balloon: re-enable balloon stats
The statistics are now available through device properties via a
polling mechanism. First a client has to enable polling, then it
can query available stats.
Polling is enabled by setting an update interval (in seconds)
to a property named guest-stats-polling-interval, like this:
{ "execute": "qom-set",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats-polling-interval", "value": 4 } }
Then the available stats can be retrieved by querying the
guest-stats property. The returned object is a dict containing
all available stats. Example:
{ "execute": "qom-get",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats" } }
{
"return": {
"stats": {
"stat-swap-out": 0,
"stat-free-memory": 844943360,
"stat-minor-faults": 219028,
"stat-major-faults": 235,
"stat-total-memory": 1044406272,
"stat-swap-in": 0
},
"last-update": 1358529861
}
}
Please, check the next commit for full documentation.
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2012-12-01 06:14:57 +04:00
|
|
|
}
|
|
|
|
|
2016-01-29 16:48:53 +03:00
|
|
|
static void balloon_stats_get_poll_interval(Object *obj, Visitor *v,
|
qom: Swap 'name' next to visitor in ObjectPropertyAccessor
Similar to the previous patch, it's nice to have all functions
in the tree that involve a visitor and a name for conversion to
or from QAPI to consistently stick the 'name' parameter next
to the Visitor parameter.
Done by manually changing include/qom/object.h and qom/object.c,
then running this Coccinelle script and touching up the fallout
(Coccinelle insisted on adding some trailing whitespace).
@ rule1 @
identifier fn;
typedef Object, Visitor, Error;
identifier obj, v, opaque, name, errp;
@@
void fn
- (Object *obj, Visitor *v, void *opaque, const char *name,
+ (Object *obj, Visitor *v, const char *name, void *opaque,
Error **errp) { ... }
@@
identifier rule1.fn;
expression obj, v, opaque, name, errp;
@@
fn(obj, v,
- opaque, name,
+ name, opaque,
errp)
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <1454075341-13658-20-git-send-email-eblake@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-01-29 16:48:55 +03:00
|
|
|
const char *name, void *opaque,
|
balloon: re-enable balloon stats
The statistics are now available through device properties via a
polling mechanism. First a client has to enable polling, then it
can query available stats.
Polling is enabled by setting an update interval (in seconds)
to a property named guest-stats-polling-interval, like this:
{ "execute": "qom-set",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats-polling-interval", "value": 4 } }
Then the available stats can be retrieved by querying the
guest-stats property. The returned object is a dict containing
all available stats. Example:
{ "execute": "qom-get",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats" } }
{
"return": {
"stats": {
"stat-swap-out": 0,
"stat-free-memory": 844943360,
"stat-minor-faults": 219028,
"stat-major-faults": 235,
"stat-total-memory": 1044406272,
"stat-swap-in": 0
},
"last-update": 1358529861
}
}
Please, check the next commit for full documentation.
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2012-12-01 06:14:57 +04:00
|
|
|
Error **errp)
|
|
|
|
{
|
|
|
|
VirtIOBalloon *s = opaque;
|
qapi: Swap visit_* arguments for consistent 'name' placement
JSON uses "name":value, but many of our visitor interfaces were
called with visit_type_FOO(v, &value, name, errp). This can be
a bit confusing to have to mentally swap the parameter order to
match JSON order. It's particularly bad for visit_start_struct(),
where the 'name' parameter is smack in the middle of the
otherwise-related group of 'obj, kind, size' parameters! It's
time to do a global swap of the parameter ordering, so that the
'name' parameter is always immediately after the Visitor argument.
Additional reason in favor of the swap: the existing include/qjson.h
prefers listing 'name' first in json_prop_*(), and I have plans to
unify that file with the qapi visitors; listing 'name' first in
qapi will minimize churn to the (admittedly few) qjson.h clients.
Later patches will then fix docs, object.h, visitor-impl.h, and
those clients to match.
Done by first patching scripts/qapi*.py by hand to make generated
files do what I want, then by running the following Coccinelle
script to affect the rest of the code base:
$ spatch --sp-file script `git grep -l '\bvisit_' -- '**/*.[ch]'`
I then had to apply some touchups (Coccinelle insisted on TAB
indentation in visitor.h, and botched the signature of
visit_type_enum() by rewriting 'const char *const strings[]' to
the syntactically invalid 'const char*const[] strings'). The
movement of parameters is sufficient to provoke compiler errors
if any callers were missed.
// Part 1: Swap declaration order
@@
type TV, TErr, TObj, T1, T2;
identifier OBJ, ARG1, ARG2;
@@
void visit_start_struct
-(TV v, TObj OBJ, T1 ARG1, const char *name, T2 ARG2, TErr errp)
+(TV v, const char *name, TObj OBJ, T1 ARG1, T2 ARG2, TErr errp)
{ ... }
@@
type bool, TV, T1;
identifier ARG1;
@@
bool visit_optional
-(TV v, T1 ARG1, const char *name)
+(TV v, const char *name, T1 ARG1)
{ ... }
@@
type TV, TErr, TObj, T1;
identifier OBJ, ARG1;
@@
void visit_get_next_type
-(TV v, TObj OBJ, T1 ARG1, const char *name, TErr errp)
+(TV v, const char *name, TObj OBJ, T1 ARG1, TErr errp)
{ ... }
@@
type TV, TErr, TObj, T1, T2;
identifier OBJ, ARG1, ARG2;
@@
void visit_type_enum
-(TV v, TObj OBJ, T1 ARG1, T2 ARG2, const char *name, TErr errp)
+(TV v, const char *name, TObj OBJ, T1 ARG1, T2 ARG2, TErr errp)
{ ... }
@@
type TV, TErr, TObj;
identifier OBJ;
identifier VISIT_TYPE =~ "^visit_type_";
@@
void VISIT_TYPE
-(TV v, TObj OBJ, const char *name, TErr errp)
+(TV v, const char *name, TObj OBJ, TErr errp)
{ ... }
// Part 2: swap caller order
@@
expression V, NAME, OBJ, ARG1, ARG2, ERR;
identifier VISIT_TYPE =~ "^visit_type_";
@@
(
-visit_start_struct(V, OBJ, ARG1, NAME, ARG2, ERR)
+visit_start_struct(V, NAME, OBJ, ARG1, ARG2, ERR)
|
-visit_optional(V, ARG1, NAME)
+visit_optional(V, NAME, ARG1)
|
-visit_get_next_type(V, OBJ, ARG1, NAME, ERR)
+visit_get_next_type(V, NAME, OBJ, ARG1, ERR)
|
-visit_type_enum(V, OBJ, ARG1, ARG2, NAME, ERR)
+visit_type_enum(V, NAME, OBJ, ARG1, ARG2, ERR)
|
-VISIT_TYPE(V, OBJ, NAME, ERR)
+VISIT_TYPE(V, NAME, OBJ, ERR)
)
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <1454075341-13658-19-git-send-email-eblake@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-01-29 16:48:54 +03:00
|
|
|
visit_type_int(v, name, &s->stats_poll_interval, errp);
|
balloon: re-enable balloon stats
The statistics are now available through device properties via a
polling mechanism. First a client has to enable polling, then it
can query available stats.
Polling is enabled by setting an update interval (in seconds)
to a property named guest-stats-polling-interval, like this:
{ "execute": "qom-set",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats-polling-interval", "value": 4 } }
Then the available stats can be retrieved by querying the
guest-stats property. The returned object is a dict containing
all available stats. Example:
{ "execute": "qom-get",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats" } }
{
"return": {
"stats": {
"stat-swap-out": 0,
"stat-free-memory": 844943360,
"stat-minor-faults": 219028,
"stat-major-faults": 235,
"stat-total-memory": 1044406272,
"stat-swap-in": 0
},
"last-update": 1358529861
}
}
Please, check the next commit for full documentation.
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2012-12-01 06:14:57 +04:00
|
|
|
}
|
|
|
|
|
2016-01-29 16:48:53 +03:00
|
|
|
static void balloon_stats_set_poll_interval(Object *obj, Visitor *v,
|
qom: Swap 'name' next to visitor in ObjectPropertyAccessor
Similar to the previous patch, it's nice to have all functions
in the tree that involve a visitor and a name for conversion to
or from QAPI to consistently stick the 'name' parameter next
to the Visitor parameter.
Done by manually changing include/qom/object.h and qom/object.c,
then running this Coccinelle script and touching up the fallout
(Coccinelle insisted on adding some trailing whitespace).
@ rule1 @
identifier fn;
typedef Object, Visitor, Error;
identifier obj, v, opaque, name, errp;
@@
void fn
- (Object *obj, Visitor *v, void *opaque, const char *name,
+ (Object *obj, Visitor *v, const char *name, void *opaque,
Error **errp) { ... }
@@
identifier rule1.fn;
expression obj, v, opaque, name, errp;
@@
fn(obj, v,
- opaque, name,
+ name, opaque,
errp)
Signed-off-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <1454075341-13658-20-git-send-email-eblake@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-01-29 16:48:55 +03:00
|
|
|
const char *name, void *opaque,
|
balloon: re-enable balloon stats
The statistics are now available through device properties via a
polling mechanism. First a client has to enable polling, then it
can query available stats.
Polling is enabled by setting an update interval (in seconds)
to a property named guest-stats-polling-interval, like this:
{ "execute": "qom-set",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats-polling-interval", "value": 4 } }
Then the available stats can be retrieved by querying the
guest-stats property. The returned object is a dict containing
all available stats. Example:
{ "execute": "qom-get",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats" } }
{
"return": {
"stats": {
"stat-swap-out": 0,
"stat-free-memory": 844943360,
"stat-minor-faults": 219028,
"stat-major-faults": 235,
"stat-total-memory": 1044406272,
"stat-swap-in": 0
},
"last-update": 1358529861
}
}
Please, check the next commit for full documentation.
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2012-12-01 06:14:57 +04:00
|
|
|
Error **errp)
|
|
|
|
{
|
|
|
|
VirtIOBalloon *s = opaque;
|
|
|
|
int64_t value;
|
|
|
|
|
error: Eliminate error_propagate() with Coccinelle, part 1
When all we do with an Error we receive into a local variable is
propagating to somewhere else, we can just as well receive it there
right away. Convert
if (!foo(..., &err)) {
...
error_propagate(errp, err);
...
return ...
}
to
if (!foo(..., errp)) {
...
...
return ...
}
where nothing else needs @err. Coccinelle script:
@rule1 forall@
identifier fun, err, errp, lbl;
expression list args, args2;
binary operator op;
constant c1, c2;
symbol false;
@@
if (
(
- fun(args, &err, args2)
+ fun(args, errp, args2)
|
- !fun(args, &err, args2)
+ !fun(args, errp, args2)
|
- fun(args, &err, args2) op c1
+ fun(args, errp, args2) op c1
)
)
{
... when != err
when != lbl:
when strict
- error_propagate(errp, err);
... when != err
(
return;
|
return c2;
|
return false;
)
}
@rule2 forall@
identifier fun, err, errp, lbl;
expression list args, args2;
expression var;
binary operator op;
constant c1, c2;
symbol false;
@@
- var = fun(args, &err, args2);
+ var = fun(args, errp, args2);
... when != err
if (
(
var
|
!var
|
var op c1
)
)
{
... when != err
when != lbl:
when strict
- error_propagate(errp, err);
... when != err
(
return;
|
return c2;
|
return false;
|
return var;
)
}
@depends on rule1 || rule2@
identifier err;
@@
- Error *err = NULL;
... when != err
Not exactly elegant, I'm afraid.
The "when != lbl:" is necessary to avoid transforming
if (fun(args, &err)) {
goto out
}
...
out:
error_propagate(errp, err);
even though other paths to label out still need the error_propagate().
For an actual example, see sclp_realize().
Without the "when strict", Coccinelle transforms vfio_msix_setup(),
incorrectly. I don't know what exactly "when strict" does, only that
it helps here.
The match of return is narrower than what I want, but I can't figure
out how to express "return where the operand doesn't use @err". For
an example where it's too narrow, see vfio_intx_enable().
Silently fails to convert hw/arm/armsse.c, because Coccinelle gets
confused by ARMSSE being used both as typedef and function-like macro
there. Converted manually.
Line breaks tidied up manually. One nested declaration of @local_err
deleted manually. Preexisting unwanted blank line dropped in
hw/riscv/sifive_e.c.
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20200707160613.848843-35-armbru@redhat.com>
2020-07-07 19:06:02 +03:00
|
|
|
if (!visit_type_int(v, name, &value, errp)) {
|
balloon: re-enable balloon stats
The statistics are now available through device properties via a
polling mechanism. First a client has to enable polling, then it
can query available stats.
Polling is enabled by setting an update interval (in seconds)
to a property named guest-stats-polling-interval, like this:
{ "execute": "qom-set",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats-polling-interval", "value": 4 } }
Then the available stats can be retrieved by querying the
guest-stats property. The returned object is a dict containing
all available stats. Example:
{ "execute": "qom-get",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats" } }
{
"return": {
"stats": {
"stat-swap-out": 0,
"stat-free-memory": 844943360,
"stat-minor-faults": 219028,
"stat-major-faults": 235,
"stat-total-memory": 1044406272,
"stat-swap-in": 0
},
"last-update": 1358529861
}
}
Please, check the next commit for full documentation.
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2012-12-01 06:14:57 +04:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (value < 0) {
|
|
|
|
error_setg(errp, "timer value must be greater than zero");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-10-01 20:43:44 +04:00
|
|
|
if (value > UINT32_MAX) {
|
2014-09-15 20:00:11 +04:00
|
|
|
error_setg(errp, "timer value is too big");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
balloon: re-enable balloon stats
The statistics are now available through device properties via a
polling mechanism. First a client has to enable polling, then it
can query available stats.
Polling is enabled by setting an update interval (in seconds)
to a property named guest-stats-polling-interval, like this:
{ "execute": "qom-set",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats-polling-interval", "value": 4 } }
Then the available stats can be retrieved by querying the
guest-stats property. The returned object is a dict containing
all available stats. Example:
{ "execute": "qom-get",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats" } }
{
"return": {
"stats": {
"stat-swap-out": 0,
"stat-free-memory": 844943360,
"stat-minor-faults": 219028,
"stat-major-faults": 235,
"stat-total-memory": 1044406272,
"stat-swap-in": 0
},
"last-update": 1358529861
}
}
Please, check the next commit for full documentation.
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2012-12-01 06:14:57 +04:00
|
|
|
if (value == s->stats_poll_interval) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (value == 0) {
|
|
|
|
/* timer=0 disables the timer */
|
|
|
|
balloon_stats_destroy_timer(s);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (balloon_stats_enabled(s)) {
|
|
|
|
/* timer interval change */
|
|
|
|
s->stats_poll_interval = value;
|
|
|
|
balloon_stats_change_timer(s, value);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* create a new timer */
|
|
|
|
g_assert(s->stats_timer == NULL);
|
2013-08-21 19:03:08 +04:00
|
|
|
s->stats_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, balloon_stats_poll_cb, s);
|
balloon: re-enable balloon stats
The statistics are now available through device properties via a
polling mechanism. First a client has to enable polling, then it
can query available stats.
Polling is enabled by setting an update interval (in seconds)
to a property named guest-stats-polling-interval, like this:
{ "execute": "qom-set",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats-polling-interval", "value": 4 } }
Then the available stats can be retrieved by querying the
guest-stats property. The returned object is a dict containing
all available stats. Example:
{ "execute": "qom-get",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats" } }
{
"return": {
"stats": {
"stat-swap-out": 0,
"stat-free-memory": 844943360,
"stat-minor-faults": 219028,
"stat-major-faults": 235,
"stat-total-memory": 1044406272,
"stat-swap-in": 0
},
"last-update": 1358529861
}
}
Please, check the next commit for full documentation.
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2012-12-01 06:14:57 +04:00
|
|
|
s->stats_poll_interval = value;
|
|
|
|
balloon_stats_change_timer(s, 0);
|
|
|
|
}
|
|
|
|
|
2020-05-27 07:14:07 +03:00
|
|
|
static void virtio_balloon_handle_report(VirtIODevice *vdev, VirtQueue *vq)
|
|
|
|
{
|
|
|
|
VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
|
|
|
|
VirtQueueElement *elem;
|
|
|
|
|
|
|
|
while ((elem = virtqueue_pop(vq, sizeof(VirtQueueElement)))) {
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* When we discard the page it has the effect of removing the page
|
|
|
|
* from the hypervisor itself and causing it to be zeroed when it
|
|
|
|
* is returned to us. So we must not discard the page if it is
|
|
|
|
* accessible by another device or process, or if the guest is
|
|
|
|
* expecting it to retain a non-zero value.
|
|
|
|
*/
|
2020-06-26 10:22:33 +03:00
|
|
|
if (virtio_balloon_inhibited() || dev->poison_val) {
|
2020-05-27 07:14:07 +03:00
|
|
|
goto skip_element;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < elem->in_num; i++) {
|
|
|
|
void *addr = elem->in_sg[i].iov_base;
|
|
|
|
size_t size = elem->in_sg[i].iov_len;
|
|
|
|
ram_addr_t ram_offset;
|
|
|
|
RAMBlock *rb;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* There is no need to check the memory section to see if
|
|
|
|
* it is ram/readonly/romd like there is for handle_output
|
|
|
|
* below. If the region is not meant to be written to then
|
|
|
|
* address_space_map will have allocated a bounce buffer
|
|
|
|
* and it will be freed in address_space_unmap and trigger
|
|
|
|
* and unassigned_mem_write before failing to copy over the
|
|
|
|
* buffer. If more than one bad descriptor is provided it
|
|
|
|
* will return NULL after the first bounce buffer and fail
|
|
|
|
* to map any resources.
|
|
|
|
*/
|
|
|
|
rb = qemu_ram_block_from_host(addr, false, &ram_offset);
|
|
|
|
if (!rb) {
|
|
|
|
trace_virtio_balloon_bad_addr(elem->in_addr[i]);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For now we will simply ignore unaligned memory regions, or
|
|
|
|
* regions that overrun the end of the RAMBlock.
|
|
|
|
*/
|
|
|
|
if (!QEMU_IS_ALIGNED(ram_offset | size, qemu_ram_pagesize(rb)) ||
|
|
|
|
(ram_offset + size) > qemu_ram_get_used_length(rb)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
ram_block_discard_range(rb, ram_offset, size);
|
|
|
|
}
|
|
|
|
|
|
|
|
skip_element:
|
|
|
|
virtqueue_push(vq, elem, 0);
|
|
|
|
virtio_notify(vdev, vq);
|
|
|
|
g_free(elem);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-12-04 23:33:06 +03:00
|
|
|
static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
|
|
|
|
{
|
2013-03-27 13:49:14 +04:00
|
|
|
VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
|
2016-02-04 17:26:51 +03:00
|
|
|
VirtQueueElement *elem;
|
2011-12-19 15:18:13 +04:00
|
|
|
MemoryRegionSection section;
|
2008-12-04 23:33:06 +03:00
|
|
|
|
2016-02-04 17:26:51 +03:00
|
|
|
for (;;) {
|
2019-07-25 14:54:25 +03:00
|
|
|
PartiallyBalloonedPage pbp = {};
|
2008-12-04 23:33:06 +03:00
|
|
|
size_t offset = 0;
|
|
|
|
uint32_t pfn;
|
2019-07-25 14:54:25 +03:00
|
|
|
|
2016-02-04 17:26:51 +03:00
|
|
|
elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
|
|
|
|
if (!elem) {
|
2019-07-22 16:41:08 +03:00
|
|
|
break;
|
2016-02-04 17:26:51 +03:00
|
|
|
}
|
2008-12-04 23:33:06 +03:00
|
|
|
|
2016-02-04 17:26:51 +03:00
|
|
|
while (iov_to_buf(elem->out_sg, elem->out_num, offset, &pfn, 4) == 4) {
|
2019-07-22 16:41:03 +03:00
|
|
|
unsigned int p = virtio_ldl_p(vdev, &pfn);
|
2019-02-14 07:39:13 +03:00
|
|
|
hwaddr pa;
|
2008-12-04 23:33:06 +03:00
|
|
|
|
2019-02-14 07:39:13 +03:00
|
|
|
pa = (hwaddr) p << VIRTIO_BALLOON_PFN_SHIFT;
|
2008-12-04 23:33:06 +03:00
|
|
|
offset += 4;
|
|
|
|
|
2019-02-14 07:39:13 +03:00
|
|
|
section = memory_region_find(get_system_memory(), pa,
|
|
|
|
BALLOON_PAGE_SIZE);
|
|
|
|
if (!section.mr) {
|
|
|
|
trace_virtio_balloon_bad_addr(pa);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!memory_region_is_ram(section.mr) ||
|
2016-12-16 14:41:55 +03:00
|
|
|
memory_region_is_rom(section.mr) ||
|
|
|
|
memory_region_is_romd(section.mr)) {
|
|
|
|
trace_virtio_balloon_bad_addr(pa);
|
2018-01-25 10:12:43 +03:00
|
|
|
memory_region_unref(section.mr);
|
2008-12-04 23:33:06 +03:00
|
|
|
continue;
|
2016-12-16 14:41:55 +03:00
|
|
|
}
|
2008-12-04 23:33:06 +03:00
|
|
|
|
2014-11-17 08:11:10 +03:00
|
|
|
trace_virtio_balloon_handle_output(memory_region_name(section.mr),
|
|
|
|
pa);
|
2020-06-26 10:22:33 +03:00
|
|
|
if (!virtio_balloon_inhibited()) {
|
2019-03-06 06:06:00 +03:00
|
|
|
if (vq == s->ivq) {
|
|
|
|
balloon_inflate_page(s, section.mr,
|
2019-07-22 16:41:08 +03:00
|
|
|
section.offset_within_region, &pbp);
|
2019-03-06 06:06:00 +03:00
|
|
|
} else if (vq == s->dvq) {
|
|
|
|
balloon_deflate_page(s, section.mr, section.offset_within_region);
|
|
|
|
} else {
|
|
|
|
g_assert_not_reached();
|
|
|
|
}
|
2019-02-14 07:39:14 +03:00
|
|
|
}
|
2013-05-06 12:46:11 +04:00
|
|
|
memory_region_unref(section.mr);
|
2008-12-04 23:33:06 +03:00
|
|
|
}
|
|
|
|
|
2021-11-29 06:08:41 +03:00
|
|
|
virtqueue_push(vq, elem, 0);
|
2008-12-04 23:33:06 +03:00
|
|
|
virtio_notify(vdev, vq);
|
2016-02-04 17:26:51 +03:00
|
|
|
g_free(elem);
|
2019-07-25 14:54:25 +03:00
|
|
|
virtio_balloon_pbp_free(&pbp);
|
2008-12-04 23:33:06 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-01-26 23:17:35 +03:00
|
|
|
static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq)
|
|
|
|
{
|
2013-03-27 13:49:14 +04:00
|
|
|
VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
|
2016-02-04 17:26:51 +03:00
|
|
|
VirtQueueElement *elem;
|
2010-01-26 23:17:35 +03:00
|
|
|
VirtIOBalloonStat stat;
|
|
|
|
size_t offset = 0;
|
balloon: re-enable balloon stats
The statistics are now available through device properties via a
polling mechanism. First a client has to enable polling, then it
can query available stats.
Polling is enabled by setting an update interval (in seconds)
to a property named guest-stats-polling-interval, like this:
{ "execute": "qom-set",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats-polling-interval", "value": 4 } }
Then the available stats can be retrieved by querying the
guest-stats property. The returned object is a dict containing
all available stats. Example:
{ "execute": "qom-get",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats" } }
{
"return": {
"stats": {
"stat-swap-out": 0,
"stat-free-memory": 844943360,
"stat-minor-faults": 219028,
"stat-major-faults": 235,
"stat-total-memory": 1044406272,
"stat-swap-in": 0
},
"last-update": 1358529861
}
}
Please, check the next commit for full documentation.
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2012-12-01 06:14:57 +04:00
|
|
|
qemu_timeval tv;
|
2010-01-26 23:17:35 +03:00
|
|
|
|
2016-03-01 14:14:03 +03:00
|
|
|
elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
|
2016-02-04 17:26:51 +03:00
|
|
|
if (!elem) {
|
balloon: re-enable balloon stats
The statistics are now available through device properties via a
polling mechanism. First a client has to enable polling, then it
can query available stats.
Polling is enabled by setting an update interval (in seconds)
to a property named guest-stats-polling-interval, like this:
{ "execute": "qom-set",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats-polling-interval", "value": 4 } }
Then the available stats can be retrieved by querying the
guest-stats property. The returned object is a dict containing
all available stats. Example:
{ "execute": "qom-get",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats" } }
{
"return": {
"stats": {
"stat-swap-out": 0,
"stat-free-memory": 844943360,
"stat-minor-faults": 219028,
"stat-major-faults": 235,
"stat-total-memory": 1044406272,
"stat-swap-in": 0
},
"last-update": 1358529861
}
}
Please, check the next commit for full documentation.
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2012-12-01 06:14:57 +04:00
|
|
|
goto out;
|
2010-01-26 23:17:35 +03:00
|
|
|
}
|
|
|
|
|
2016-03-01 14:14:03 +03:00
|
|
|
if (s->stats_vq_elem != NULL) {
|
|
|
|
/* This should never happen if the driver follows the spec. */
|
|
|
|
virtqueue_push(vq, s->stats_vq_elem, 0);
|
|
|
|
virtio_notify(vdev, vq);
|
|
|
|
g_free(s->stats_vq_elem);
|
|
|
|
}
|
|
|
|
|
|
|
|
s->stats_vq_elem = elem;
|
|
|
|
|
2010-01-26 23:17:35 +03:00
|
|
|
/* Initialize the stats to get rid of any stale values. This is only
|
|
|
|
* needed to handle the case where a guest supports fewer stats than it
|
|
|
|
* used to (ie. it has booted into an old kernel).
|
|
|
|
*/
|
|
|
|
reset_stats(s);
|
|
|
|
|
change iov_* function prototypes to be more appropriate
Reorder arguments to be more natural, readable and
consistent with other iov_* functions, and change
argument names, from:
iov_from_buf(iov, iov_cnt, buf, iov_off, size)
to
iov_from_buf(iov, iov_cnt, offset, buf, bytes)
The result becomes natural English:
copy data to this `iov' vector with `iov_cnt'
elements starting at byte offset `offset'
from memory buffer `buf', processing `bytes'
bytes max.
(Try to read the original prototype this way).
Also change iov_clear() to more general iov_memset()
(it uses memset() internally anyway).
While at it, add comments to the header file
describing what the routines actually does.
The patch only renames argumens in the header, but
keeps old names in the implementation. The next
patch will touch actual code to match.
Now, it might look wrong to pay so much attention
to so small things. But we've so many badly designed
interfaces already so the whole thing becomes rather
confusing or error prone. One example of this is
previous commit and small discussion which emerged
from it, with an outcome that the utility functions
like these aren't well-understdandable, leading to
strange usage cases. That's why I paid quite some
attention to this set of functions and a few
others in subsequent patches.
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2012-03-11 18:05:12 +04:00
|
|
|
while (iov_to_buf(elem->out_sg, elem->out_num, offset, &stat, sizeof(stat))
|
2010-04-27 16:34:06 +04:00
|
|
|
== sizeof(stat)) {
|
2014-06-24 21:43:22 +04:00
|
|
|
uint16_t tag = virtio_tswap16(vdev, stat.tag);
|
|
|
|
uint64_t val = virtio_tswap64(vdev, stat.val);
|
2010-01-26 23:17:35 +03:00
|
|
|
|
|
|
|
offset += sizeof(stat);
|
|
|
|
if (tag < VIRTIO_BALLOON_S_NR)
|
|
|
|
s->stats[tag] = val;
|
|
|
|
}
|
|
|
|
s->stats_vq_offset = offset;
|
balloon: re-enable balloon stats
The statistics are now available through device properties via a
polling mechanism. First a client has to enable polling, then it
can query available stats.
Polling is enabled by setting an update interval (in seconds)
to a property named guest-stats-polling-interval, like this:
{ "execute": "qom-set",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats-polling-interval", "value": 4 } }
Then the available stats can be retrieved by querying the
guest-stats property. The returned object is a dict containing
all available stats. Example:
{ "execute": "qom-get",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats" } }
{
"return": {
"stats": {
"stat-swap-out": 0,
"stat-free-memory": 844943360,
"stat-minor-faults": 219028,
"stat-major-faults": 235,
"stat-total-memory": 1044406272,
"stat-swap-in": 0
},
"last-update": 1358529861
}
}
Please, check the next commit for full documentation.
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2012-12-01 06:14:57 +04:00
|
|
|
|
|
|
|
if (qemu_gettimeofday(&tv) < 0) {
|
2017-09-11 22:52:50 +03:00
|
|
|
warn_report("%s: failed to get time of day", __func__);
|
balloon: re-enable balloon stats
The statistics are now available through device properties via a
polling mechanism. First a client has to enable polling, then it
can query available stats.
Polling is enabled by setting an update interval (in seconds)
to a property named guest-stats-polling-interval, like this:
{ "execute": "qom-set",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats-polling-interval", "value": 4 } }
Then the available stats can be retrieved by querying the
guest-stats property. The returned object is a dict containing
all available stats. Example:
{ "execute": "qom-get",
"arguments": { "path": "/machine/peripheral-anon/device[1]",
"property": "guest-stats" } }
{
"return": {
"stats": {
"stat-swap-out": 0,
"stat-free-memory": 844943360,
"stat-minor-faults": 219028,
"stat-major-faults": 235,
"stat-total-memory": 1044406272,
"stat-swap-in": 0
},
"last-update": 1358529861
}
}
Please, check the next commit for full documentation.
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2012-12-01 06:14:57 +04:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->stats_last_update = tv.tv_sec;
|
|
|
|
|
|
|
|
out:
|
|
|
|
if (balloon_stats_enabled(s)) {
|
|
|
|
balloon_stats_change_timer(s, s->stats_poll_interval);
|
|
|
|
}
|
2010-01-26 23:17:35 +03:00
|
|
|
}
|
|
|
|
|
2018-12-11 11:24:53 +03:00
|
|
|
static void virtio_balloon_handle_free_page_vq(VirtIODevice *vdev,
|
|
|
|
VirtQueue *vq)
|
|
|
|
{
|
|
|
|
VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
|
|
|
|
qemu_bh_schedule(s->free_page_bh);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool get_free_page_hints(VirtIOBalloon *dev)
|
|
|
|
{
|
|
|
|
VirtQueueElement *elem;
|
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
|
|
|
|
VirtQueue *vq = dev->free_page_vq;
|
2019-03-12 12:34:40 +03:00
|
|
|
bool ret = true;
|
2021-11-29 06:08:40 +03:00
|
|
|
int i;
|
2018-12-11 11:24:53 +03:00
|
|
|
|
|
|
|
while (dev->block_iothread) {
|
|
|
|
qemu_cond_wait(&dev->free_page_cond, &dev->free_page_lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
|
|
|
|
if (!elem) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (elem->out_num) {
|
|
|
|
uint32_t id;
|
|
|
|
size_t size = iov_to_buf(elem->out_sg, elem->out_num, 0,
|
|
|
|
&id, sizeof(id));
|
|
|
|
|
|
|
|
virtio_tswap32s(vdev, &id);
|
|
|
|
if (unlikely(size != sizeof(id))) {
|
|
|
|
virtio_error(vdev, "received an incorrect cmd id");
|
2019-03-12 12:34:40 +03:00
|
|
|
ret = false;
|
|
|
|
goto out;
|
2018-12-11 11:24:53 +03:00
|
|
|
}
|
2020-07-20 20:51:28 +03:00
|
|
|
if (dev->free_page_hint_status == FREE_PAGE_HINT_S_REQUESTED &&
|
|
|
|
id == dev->free_page_hint_cmd_id) {
|
|
|
|
dev->free_page_hint_status = FREE_PAGE_HINT_S_START;
|
2021-07-08 12:53:39 +03:00
|
|
|
} else if (dev->free_page_hint_status == FREE_PAGE_HINT_S_START) {
|
2018-12-11 11:24:53 +03:00
|
|
|
/*
|
|
|
|
* Stop the optimization only when it has started. This
|
|
|
|
* avoids a stale stop sign for the previous command.
|
|
|
|
*/
|
2021-07-08 12:53:39 +03:00
|
|
|
dev->free_page_hint_status = FREE_PAGE_HINT_S_STOP;
|
2018-12-11 11:24:53 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-07-08 12:53:39 +03:00
|
|
|
if (elem->in_num && dev->free_page_hint_status == FREE_PAGE_HINT_S_START) {
|
2021-11-29 06:08:40 +03:00
|
|
|
for (i = 0; i < elem->in_num; i++) {
|
|
|
|
qemu_guest_free_page_hint(elem->in_sg[i].iov_base,
|
|
|
|
elem->in_sg[i].iov_len);
|
|
|
|
}
|
2018-12-11 11:24:53 +03:00
|
|
|
}
|
|
|
|
|
2019-03-12 12:34:40 +03:00
|
|
|
out:
|
2021-11-29 06:08:41 +03:00
|
|
|
virtqueue_push(vq, elem, 0);
|
2019-03-12 12:34:40 +03:00
|
|
|
g_free(elem);
|
|
|
|
return ret;
|
2018-12-11 11:24:53 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_ballloon_get_free_page_hints(void *opaque)
|
|
|
|
{
|
|
|
|
VirtIOBalloon *dev = opaque;
|
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
|
|
|
|
VirtQueue *vq = dev->free_page_vq;
|
|
|
|
bool continue_to_get_hints;
|
|
|
|
|
|
|
|
do {
|
|
|
|
qemu_mutex_lock(&dev->free_page_lock);
|
|
|
|
virtio_queue_set_notification(vq, 0);
|
|
|
|
continue_to_get_hints = get_free_page_hints(dev);
|
|
|
|
qemu_mutex_unlock(&dev->free_page_lock);
|
|
|
|
virtio_notify(vdev, vq);
|
|
|
|
/*
|
2020-07-20 20:51:28 +03:00
|
|
|
* Start to poll the vq once the hinting started. Otherwise, continue
|
2018-12-11 11:24:53 +03:00
|
|
|
* only when there are entries on the vq, which need to be given back.
|
|
|
|
*/
|
|
|
|
} while (continue_to_get_hints ||
|
2020-07-20 20:51:28 +03:00
|
|
|
dev->free_page_hint_status == FREE_PAGE_HINT_S_START);
|
2018-12-11 11:24:53 +03:00
|
|
|
virtio_queue_set_notification(vq, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool virtio_balloon_free_page_support(void *opaque)
|
|
|
|
{
|
|
|
|
VirtIOBalloon *s = opaque;
|
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(s);
|
|
|
|
|
|
|
|
return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_balloon_free_page_start(VirtIOBalloon *s)
|
|
|
|
{
|
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(s);
|
|
|
|
|
2020-07-20 20:51:22 +03:00
|
|
|
qemu_mutex_lock(&s->free_page_lock);
|
|
|
|
|
2020-07-20 20:51:28 +03:00
|
|
|
if (s->free_page_hint_cmd_id == UINT_MAX) {
|
2021-07-08 12:53:39 +03:00
|
|
|
s->free_page_hint_cmd_id = VIRTIO_BALLOON_FREE_PAGE_HINT_CMD_ID_MIN;
|
2018-12-11 11:24:53 +03:00
|
|
|
} else {
|
2020-07-20 20:51:28 +03:00
|
|
|
s->free_page_hint_cmd_id++;
|
2018-12-11 11:24:53 +03:00
|
|
|
}
|
|
|
|
|
2020-07-20 20:51:28 +03:00
|
|
|
s->free_page_hint_status = FREE_PAGE_HINT_S_REQUESTED;
|
2020-07-20 20:51:22 +03:00
|
|
|
qemu_mutex_unlock(&s->free_page_lock);
|
|
|
|
|
2018-12-11 11:24:53 +03:00
|
|
|
virtio_notify_config(vdev);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_balloon_free_page_stop(VirtIOBalloon *s)
|
|
|
|
{
|
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(s);
|
|
|
|
|
2020-07-20 20:51:28 +03:00
|
|
|
if (s->free_page_hint_status != FREE_PAGE_HINT_S_STOP) {
|
2018-12-11 11:24:53 +03:00
|
|
|
/*
|
|
|
|
* The lock also guarantees us that the
|
|
|
|
* virtio_ballloon_get_free_page_hints exits after the
|
2020-07-20 20:51:28 +03:00
|
|
|
* free_page_hint_status is set to S_STOP.
|
2018-12-11 11:24:53 +03:00
|
|
|
*/
|
|
|
|
qemu_mutex_lock(&s->free_page_lock);
|
|
|
|
/*
|
2020-07-20 20:51:28 +03:00
|
|
|
* The guest isn't done hinting, so send a notification
|
|
|
|
* to the guest to actively stop the hinting.
|
2018-12-11 11:24:53 +03:00
|
|
|
*/
|
2020-07-20 20:51:28 +03:00
|
|
|
s->free_page_hint_status = FREE_PAGE_HINT_S_STOP;
|
2018-12-11 11:24:53 +03:00
|
|
|
qemu_mutex_unlock(&s->free_page_lock);
|
|
|
|
virtio_notify_config(vdev);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_balloon_free_page_done(VirtIOBalloon *s)
|
|
|
|
{
|
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(s);
|
|
|
|
|
2020-07-20 20:51:28 +03:00
|
|
|
if (s->free_page_hint_status != FREE_PAGE_HINT_S_DONE) {
|
2020-06-29 11:06:15 +03:00
|
|
|
/* See virtio_balloon_free_page_stop() */
|
|
|
|
qemu_mutex_lock(&s->free_page_lock);
|
2020-07-20 20:51:28 +03:00
|
|
|
s->free_page_hint_status = FREE_PAGE_HINT_S_DONE;
|
2020-06-29 11:06:15 +03:00
|
|
|
qemu_mutex_unlock(&s->free_page_lock);
|
|
|
|
virtio_notify_config(vdev);
|
|
|
|
}
|
2018-12-11 11:24:53 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2020-07-20 20:51:28 +03:00
|
|
|
virtio_balloon_free_page_hint_notify(NotifierWithReturn *n, void *data)
|
2018-12-11 11:24:53 +03:00
|
|
|
{
|
2021-07-08 12:53:39 +03:00
|
|
|
VirtIOBalloon *dev = container_of(n, VirtIOBalloon, free_page_hint_notify);
|
2018-12-11 11:24:53 +03:00
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
|
|
|
|
PrecopyNotifyData *pnd = data;
|
|
|
|
|
|
|
|
if (!virtio_balloon_free_page_support(dev)) {
|
|
|
|
/*
|
|
|
|
* This is an optimization provided to migration, so just return 0 to
|
|
|
|
* have the normal migration process not affected when this feature is
|
|
|
|
* not supported.
|
|
|
|
*/
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
virtio-balloon: don't start free page hinting if postcopy is possible
Postcopy never worked properly with 'free-page-hint=on', as there are
at least two issues:
1) With postcopy, the guest will never receive a VIRTIO_BALLOON_CMD_ID_DONE
and consequently won't release free pages back to the OS once
migration finishes.
The issue is that for postcopy, we won't do a final bitmap sync while
the guest is stopped on the source and
virtio_balloon_free_page_hint_notify() will only call
virtio_balloon_free_page_done() on the source during
PRECOPY_NOTIFY_CLEANUP, after the VM state was already migrated to
the destination.
2) Once the VM touches a page on the destination that has been excluded
from migration on the source via qemu_guest_free_page_hint() while
postcopy is active, that thread will stall until postcopy finishes
and all threads are woken up. (with older Linux kernels that won't
retry faults when woken up via userfaultfd, we might actually get a
SEGFAULT)
The issue is that the source will refuse to migrate any pages that
are not marked as dirty in the dirty bmap -- for example, because the
page might just have been sent. Consequently, the faulting thread will
stall, waiting for the page to be migrated -- which could take quite
a while and result in guest OS issues.
While we could fix 1) comparatively easily, 2) is harder to get right and
might require more involved RAM migration changes on source and destination
[1].
As it never worked properly, let's not start free page hinting in the
precopy notifier if the postcopy migration capability was enabled to fix
it easily. Capabilities cannot be enabled once migration is already
running.
Note 1: in the future we might either adjust migration code on the source
to track pages that have actually been sent or adjust
migration code on source and destination to eventually send
pages multiple times from the source and and deal with pages
that are sent multiple times on the destination.
Note 2: virtio-mem has similar issues, however, access to "unplugged"
memory by the guest is very rare and we would have to be very
lucky for it to happen during migration. The spec states
"The driver SHOULD NOT read from unplugged memory blocks ..."
and "The driver MUST NOT write to unplugged memory blocks".
virtio-mem will move away from virtio_balloon_free_page_done()
soon and handle this case explicitly on the destination.
[1] https://lkml.kernel.org/r/e79fd18c-aa62-c1d8-c7f3-ba3fc2c25fc8@redhat.com
Fixes: c13c4153f76d ("virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT")
Cc: qemu-stable@nongnu.org
Cc: Wei Wang <wei.w.wang@intel.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Philippe Mathieu-Daudé <philmd@redhat.com>
Cc: Alexander Duyck <alexander.duyck@gmail.com>
Cc: Juan Quintela <quintela@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
Message-Id: <20210708095339.20274-2-david@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
2021-07-08 12:53:38 +03:00
|
|
|
/*
|
|
|
|
* Pages hinted via qemu_guest_free_page_hint() are cleared from the dirty
|
|
|
|
* bitmap and will not get migrated, especially also not when the postcopy
|
|
|
|
* destination starts using them and requests migration from the source; the
|
|
|
|
* faulting thread will stall until postcopy migration finishes and
|
|
|
|
* all threads are woken up. Let's not start free page hinting if postcopy
|
|
|
|
* is possible.
|
|
|
|
*/
|
|
|
|
if (migrate_postcopy_ram()) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-12-11 11:24:53 +03:00
|
|
|
switch (pnd->reason) {
|
|
|
|
case PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC:
|
|
|
|
virtio_balloon_free_page_stop(dev);
|
|
|
|
break;
|
|
|
|
case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC:
|
|
|
|
if (vdev->vm_running) {
|
|
|
|
virtio_balloon_free_page_start(dev);
|
2020-06-29 11:06:15 +03:00
|
|
|
break;
|
2018-12-11 11:24:53 +03:00
|
|
|
}
|
2020-06-29 11:06:15 +03:00
|
|
|
/*
|
|
|
|
* Set S_DONE before migrating the vmstate, so the guest will reuse
|
|
|
|
* all hinted pages once running on the destination. Fall through.
|
|
|
|
*/
|
|
|
|
case PRECOPY_NOTIFY_CLEANUP:
|
|
|
|
/*
|
|
|
|
* Especially, if something goes wrong during precopy or if migration
|
|
|
|
* is canceled, we have to properly communicate S_DONE to the VM.
|
|
|
|
*/
|
|
|
|
virtio_balloon_free_page_done(dev);
|
|
|
|
break;
|
migrate/ram: remove "ram_bulk_stage" and "fpo_enabled"
The bulk stage is kind of weird: migration_bitmap_find_dirty() will
indicate a dirty page, however, ram_save_host_page() will never save it, as
migration_bitmap_clear_dirty() detects that it is not dirty.
We already fill the bitmap in ram_list_init_bitmaps() with ones, marking
everything dirty - it didn't used to be that way, which is why we needed
an explicit first bulk stage.
Let's simplify: make the bitmap the single source of thuth. Explicitly
handle the "xbzrle_enabled after first round" case.
Regarding XBZRLE (implicitly handled via "ram_bulk_stage = false" right
now), there is now a slight change in behavior:
- Colo: When starting, it will be disabled (was implicitly enabled)
until the first round actually finishes.
- Free page hinting: When starting, XBZRLE will be disabled (was implicitly
enabled) until the first round actually finished.
- Snapshots: When starting, XBZRLE will be disabled. We essentially only
do a single run, so I guess it will never actually get disabled.
Postcopy seems to indirectly disable it in ram_save_page(), so there
shouldn't be really any change.
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Juan Quintela <quintela@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Andrey Gruzdev <andrey.gruzdev@virtuozzo.com>
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
Message-Id: <20210216105039.40680-1-david@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
2021-02-16 13:50:39 +03:00
|
|
|
case PRECOPY_NOTIFY_SETUP:
|
2020-06-29 11:06:15 +03:00
|
|
|
case PRECOPY_NOTIFY_COMPLETE:
|
2018-12-11 11:24:53 +03:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
virtio_error(vdev, "%s: %d reason unknown", __func__, pnd->reason);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-07-10 17:14:40 +03:00
|
|
|
static size_t virtio_balloon_config_size(VirtIOBalloon *s)
|
|
|
|
{
|
|
|
|
uint64_t features = s->host_features;
|
|
|
|
|
|
|
|
if (s->qemu_4_0_config_size) {
|
|
|
|
return sizeof(struct virtio_balloon_config);
|
|
|
|
}
|
|
|
|
if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON)) {
|
|
|
|
return sizeof(struct virtio_balloon_config);
|
|
|
|
}
|
|
|
|
if (virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
|
|
|
|
return offsetof(struct virtio_balloon_config, poison_val);
|
|
|
|
}
|
2020-07-20 20:51:28 +03:00
|
|
|
return offsetof(struct virtio_balloon_config, free_page_hint_cmd_id);
|
2019-07-10 17:14:40 +03:00
|
|
|
}
|
|
|
|
|
2008-12-04 23:33:06 +03:00
|
|
|
static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
|
|
|
|
{
|
2013-03-27 13:49:14 +04:00
|
|
|
VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
|
2019-01-18 21:36:03 +03:00
|
|
|
struct virtio_balloon_config config = {};
|
2008-12-04 23:33:06 +03:00
|
|
|
|
|
|
|
config.num_pages = cpu_to_le32(dev->num_pages);
|
|
|
|
config.actual = cpu_to_le32(dev->actual);
|
2020-05-27 07:14:00 +03:00
|
|
|
config.poison_val = cpu_to_le32(dev->poison_val);
|
2008-12-04 23:33:06 +03:00
|
|
|
|
2020-07-20 20:51:28 +03:00
|
|
|
if (dev->free_page_hint_status == FREE_PAGE_HINT_S_REQUESTED) {
|
|
|
|
config.free_page_hint_cmd_id =
|
|
|
|
cpu_to_le32(dev->free_page_hint_cmd_id);
|
|
|
|
} else if (dev->free_page_hint_status == FREE_PAGE_HINT_S_STOP) {
|
|
|
|
config.free_page_hint_cmd_id =
|
2018-12-11 11:24:53 +03:00
|
|
|
cpu_to_le32(VIRTIO_BALLOON_CMD_ID_STOP);
|
2020-07-20 20:51:28 +03:00
|
|
|
} else if (dev->free_page_hint_status == FREE_PAGE_HINT_S_DONE) {
|
|
|
|
config.free_page_hint_cmd_id =
|
2018-12-11 11:24:53 +03:00
|
|
|
cpu_to_le32(VIRTIO_BALLOON_CMD_ID_DONE);
|
|
|
|
}
|
|
|
|
|
2014-11-17 08:11:10 +03:00
|
|
|
trace_virtio_balloon_get_config(config.num_pages, config.actual);
|
2019-07-10 17:14:40 +03:00
|
|
|
memcpy(config_data, &config, virtio_balloon_config_size(dev));
|
2008-12-04 23:33:06 +03:00
|
|
|
}
|
|
|
|
|
2016-02-10 11:49:26 +03:00
|
|
|
static int build_dimm_list(Object *obj, void *opaque)
|
|
|
|
{
|
|
|
|
GSList **list = opaque;
|
|
|
|
|
|
|
|
if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
|
|
|
|
DeviceState *dev = DEVICE(obj);
|
|
|
|
if (dev->realized) { /* only realized DIMMs matter */
|
|
|
|
*list = g_slist_prepend(*list, dev);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
object_child_foreach(obj, build_dimm_list, opaque);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-02-10 11:49:22 +03:00
|
|
|
static ram_addr_t get_current_ram_size(void)
|
|
|
|
{
|
2016-02-10 11:49:25 +03:00
|
|
|
GSList *list = NULL, *item;
|
2020-10-28 13:24:22 +03:00
|
|
|
ram_addr_t size = current_machine->ram_size;
|
2016-02-10 11:49:22 +03:00
|
|
|
|
2016-02-10 11:49:26 +03:00
|
|
|
build_dimm_list(qdev_get_machine(), &list);
|
2016-02-10 11:49:25 +03:00
|
|
|
for (item = list; item; item = g_slist_next(item)) {
|
|
|
|
Object *obj = OBJECT(item->data);
|
2016-02-10 11:49:26 +03:00
|
|
|
if (!strcmp(object_get_typename(obj), TYPE_PC_DIMM)) {
|
|
|
|
size += object_property_get_int(obj, PC_DIMM_SIZE_PROP,
|
|
|
|
&error_abort);
|
|
|
|
}
|
2016-02-10 11:49:22 +03:00
|
|
|
}
|
2016-02-10 11:49:25 +03:00
|
|
|
g_slist_free(list);
|
2016-02-10 11:49:22 +03:00
|
|
|
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
2020-05-27 07:14:00 +03:00
|
|
|
static bool virtio_balloon_page_poison_support(void *opaque)
|
|
|
|
{
|
|
|
|
VirtIOBalloon *s = opaque;
|
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(s);
|
|
|
|
|
|
|
|
return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON);
|
|
|
|
}
|
|
|
|
|
2008-12-04 23:33:06 +03:00
|
|
|
static void virtio_balloon_set_config(VirtIODevice *vdev,
|
|
|
|
const uint8_t *config_data)
|
|
|
|
{
|
2013-03-27 13:49:14 +04:00
|
|
|
VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
|
2008-12-04 23:33:06 +03:00
|
|
|
struct virtio_balloon_config config;
|
2012-06-14 21:12:56 +04:00
|
|
|
uint32_t oldactual = dev->actual;
|
2014-11-17 08:11:09 +03:00
|
|
|
ram_addr_t vm_ram_size = get_current_ram_size();
|
|
|
|
|
2019-07-10 17:14:40 +03:00
|
|
|
memcpy(&config, config_data, virtio_balloon_config_size(dev));
|
virtio-balloon: fixed endianness bug in the config space
The specification for the virtio balloon device requres that the values
in the config space be encoded little-endian. This differs from most
virtio things, where guest-native endian is the norm.
Currently, the qemu virtio-balloon code correctly makes the conversion
on get_config(), but doesn't on set_config for the 'actual' field. The
kernel driver, on the other hand, correctly converts when setting the
actual field, but does not convert when reading the config space. The
upshot is that virtio-balloon will only work correctly if both host and
guest are LE, making all the conversions nops.
This patch corrects the qemu side, correctly doing host-native <-> LE
conversions when accessing the config space. This won't break any setups
that aren't already broken, and fixes the case of BE host, LE guest.
Fixing the BE guest case will require kernel fixes as well.
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
2011-04-07 07:02:04 +04:00
|
|
|
dev->actual = le32_to_cpu(config.actual);
|
2012-06-14 21:12:56 +04:00
|
|
|
if (dev->actual != oldactual) {
|
2014-11-17 08:11:09 +03:00
|
|
|
qapi_event_send_balloon_change(vm_ram_size -
|
2018-08-15 16:37:37 +03:00
|
|
|
((ram_addr_t) dev->actual << VIRTIO_BALLOON_PFN_SHIFT));
|
2012-06-14 21:12:56 +04:00
|
|
|
}
|
2020-05-27 07:14:00 +03:00
|
|
|
dev->poison_val = 0;
|
|
|
|
if (virtio_balloon_page_poison_support(dev)) {
|
|
|
|
dev->poison_val = le32_to_cpu(config.poison_val);
|
|
|
|
}
|
2014-11-17 08:11:10 +03:00
|
|
|
trace_virtio_balloon_set_config(dev->actual, oldactual);
|
2008-12-04 23:33:06 +03:00
|
|
|
}
|
|
|
|
|
2015-07-27 12:49:19 +03:00
|
|
|
static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f,
|
|
|
|
Error **errp)
|
2008-12-04 23:33:06 +03:00
|
|
|
{
|
2015-06-15 13:52:52 +03:00
|
|
|
VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
|
|
|
|
f |= dev->host_features;
|
2015-06-04 13:34:32 +03:00
|
|
|
virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ);
|
2018-12-11 11:24:53 +03:00
|
|
|
|
2010-01-10 14:52:53 +03:00
|
|
|
return f;
|
2008-12-04 23:33:06 +03:00
|
|
|
}
|
|
|
|
|
2011-10-21 17:41:37 +04:00
|
|
|
static void virtio_balloon_stat(void *opaque, BalloonInfo *info)
|
2011-07-20 11:49:07 +04:00
|
|
|
{
|
|
|
|
VirtIOBalloon *dev = opaque;
|
2014-11-17 08:11:09 +03:00
|
|
|
info->actual = get_current_ram_size() - ((uint64_t) dev->actual <<
|
|
|
|
VIRTIO_BALLOON_PFN_SHIFT);
|
2011-07-20 11:49:07 +04:00
|
|
|
}
|
|
|
|
|
balloon: Separate out stat and balloon handling
Passing on '0' as ballooning target to indicate retrieval of stats is
bad API. It also makes 'balloon 0' in the monitor cause a segfault.
Have two different functions handle the different functionality instead.
Detailed explanation from Markus's review:
1. do_info_balloon() is an info_async() method. It receives a callback
with argument, to be called exactly once (callback frees the
argument). It passes the callback via qemu_balloon_status() and
indirectly through qemu_balloon_event to virtio_balloon_to_target().
virtio_balloon_to_target() executes its balloon stats half. It
stores the callback in the device state.
If it can't send a stats request, it resets stats and calls the
callback right away.
Else, it sends a stats request. The device model runs the callback
when it receives the answer.
Works.
2. do_balloon() is a cmd_async() method. It receives a callback with
argument, to be called when the command completes. do_balloon()
calls it right before it succeeds. Odd, but should work.
Nevertheless, it passes the callback on via qemu_ballon() and
indirectly through qemu_balloon_event to virtio_balloon_to_target().
a. If the argument is non-zero, virtio_balloon_to_target() executes
its balloon half, which doesn't use the callback in any way.
Odd, but works.
b. If the argument is zero, virtio_balloon_to_target() executes its
balloon stats half, just like in 1. It either calls the callback
right away, or arranges for it to be called later.
Thus, the callback runs twice: use after free and double free.
Test case: start with -S -device virtio-balloon, execute "balloon 0" in
human monitor. Runs the callback first from virtio_balloon_to_target(),
then again from do_balloon().
Reported-by: Mike Cao <bcao@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
2011-07-20 12:00:56 +04:00
|
|
|
static void virtio_balloon_to_target(void *opaque, ram_addr_t target)
|
2008-12-04 23:33:06 +03:00
|
|
|
{
|
2013-03-27 13:49:14 +04:00
|
|
|
VirtIOBalloon *dev = VIRTIO_BALLOON(opaque);
|
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
|
2014-11-17 08:11:09 +03:00
|
|
|
ram_addr_t vm_ram_size = get_current_ram_size();
|
2008-12-04 23:33:06 +03:00
|
|
|
|
2014-11-17 08:11:09 +03:00
|
|
|
if (target > vm_ram_size) {
|
|
|
|
target = vm_ram_size;
|
2011-07-20 11:49:07 +04:00
|
|
|
}
|
2008-12-04 23:33:06 +03:00
|
|
|
if (target) {
|
2014-11-17 08:11:09 +03:00
|
|
|
dev->num_pages = (vm_ram_size - target) >> VIRTIO_BALLOON_PFN_SHIFT;
|
2013-03-27 13:49:14 +04:00
|
|
|
virtio_notify_config(vdev);
|
2008-12-04 23:33:06 +03:00
|
|
|
}
|
2014-11-17 08:11:10 +03:00
|
|
|
trace_virtio_balloon_to_target(target, dev->num_pages);
|
2008-12-04 23:33:06 +03:00
|
|
|
}
|
|
|
|
|
2016-10-27 20:36:37 +03:00
|
|
|
static int virtio_balloon_post_load_device(void *opaque, int version_id)
|
2014-06-24 21:20:08 +04:00
|
|
|
{
|
2016-10-27 20:36:37 +03:00
|
|
|
VirtIOBalloon *s = VIRTIO_BALLOON(opaque);
|
2016-03-29 17:00:49 +03:00
|
|
|
|
|
|
|
if (balloon_stats_enabled(s)) {
|
|
|
|
balloon_stats_change_timer(s, s->stats_poll_interval);
|
|
|
|
}
|
2008-12-04 23:33:06 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-07-20 20:51:28 +03:00
|
|
|
static const VMStateDescription vmstate_virtio_balloon_free_page_hint = {
|
2018-12-11 11:24:53 +03:00
|
|
|
.name = "virtio-balloon-device/free-page-report",
|
|
|
|
.version_id = 1,
|
|
|
|
.minimum_version_id = 1,
|
|
|
|
.needed = virtio_balloon_free_page_support,
|
|
|
|
.fields = (VMStateField[]) {
|
2020-07-20 20:51:28 +03:00
|
|
|
VMSTATE_UINT32(free_page_hint_cmd_id, VirtIOBalloon),
|
|
|
|
VMSTATE_UINT32(free_page_hint_status, VirtIOBalloon),
|
2018-12-11 11:24:53 +03:00
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2020-05-27 07:14:00 +03:00
|
|
|
static const VMStateDescription vmstate_virtio_balloon_page_poison = {
|
2021-09-14 16:17:16 +03:00
|
|
|
.name = "virtio-balloon-device/page-poison",
|
2020-05-27 07:14:00 +03:00
|
|
|
.version_id = 1,
|
|
|
|
.minimum_version_id = 1,
|
|
|
|
.needed = virtio_balloon_page_poison_support,
|
|
|
|
.fields = (VMStateField[]) {
|
|
|
|
VMSTATE_UINT32(poison_val, VirtIOBalloon),
|
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2016-10-27 20:36:37 +03:00
|
|
|
static const VMStateDescription vmstate_virtio_balloon_device = {
|
|
|
|
.name = "virtio-balloon-device",
|
|
|
|
.version_id = 1,
|
|
|
|
.minimum_version_id = 1,
|
|
|
|
.post_load = virtio_balloon_post_load_device,
|
|
|
|
.fields = (VMStateField[]) {
|
|
|
|
VMSTATE_UINT32(num_pages, VirtIOBalloon),
|
|
|
|
VMSTATE_UINT32(actual, VirtIOBalloon),
|
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
},
|
2018-12-11 11:24:53 +03:00
|
|
|
.subsections = (const VMStateDescription * []) {
|
2020-07-20 20:51:28 +03:00
|
|
|
&vmstate_virtio_balloon_free_page_hint,
|
2020-05-27 07:14:00 +03:00
|
|
|
&vmstate_virtio_balloon_page_poison,
|
2018-12-11 11:24:53 +03:00
|
|
|
NULL
|
|
|
|
}
|
2016-10-27 20:36:37 +03:00
|
|
|
};
|
|
|
|
|
2013-07-30 04:51:37 +04:00
|
|
|
static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
|
2008-12-04 23:33:06 +03:00
|
|
|
{
|
2013-07-30 04:51:37 +04:00
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
|
2013-07-30 07:33:58 +04:00
|
|
|
VirtIOBalloon *s = VIRTIO_BALLOON(dev);
|
2011-07-27 10:59:33 +04:00
|
|
|
int ret;
|
2008-12-04 23:33:06 +03:00
|
|
|
|
2014-01-09 18:58:16 +04:00
|
|
|
virtio_init(vdev, "virtio-balloon", VIRTIO_ID_BALLOON,
|
2019-07-10 17:14:40 +03:00
|
|
|
virtio_balloon_config_size(s));
|
2008-12-04 23:33:06 +03:00
|
|
|
|
2011-07-27 10:59:33 +04:00
|
|
|
ret = qemu_add_balloon_handler(virtio_balloon_to_target,
|
|
|
|
virtio_balloon_stat, s);
|
2013-03-27 13:49:13 +04:00
|
|
|
|
2013-03-27 13:49:10 +04:00
|
|
|
if (ret < 0) {
|
balloon: improve error msg when adding second device
A VM supports only one balloon device, but due to several changes
in infrastructure the error message got messed up when trying
to add a second device. Fix it.
Before this fix
Command-line:
qemu-qmp: -device virtio-balloon-pci,id=balloon0: Another balloon device already registered
qemu-qmp: -device virtio-balloon-pci,id=balloon0: Adding balloon handler failed
qemu-qmp: -device virtio-balloon-pci,id=balloon0: Device 'virtio-balloon-pci' could not be initialized
HMP:
Another balloon device already registered
Adding balloon handler failed
Device 'virtio-balloon-pci' could not be initialized
QMP:
{ "execute": "device_add", "arguments": { "driver": "virtio-balloon-pci", "id": "balloon0" } }
{
"error": {
"class": "GenericError",
"desc": "Adding balloon handler failed"
}
}
After this fix
Command-line:
qemu-qmp: -device virtio-balloon-pci,id=balloon0: Only one balloon device is supported
qemu-qmp: -device virtio-balloon-pci,id=balloon0: Device 'virtio-balloon-pci' could not be initialized
HMP:
(qemu) device_add virtio-balloon-pci,id=balloon0
Only one balloon device is supported
Device 'virtio-balloon-pci' could not be initialized
(qemu)
QMP:
{ "execute": "device_add",
"arguments": { "driver": "virtio-balloon-pci", "id": "balloon0" } }
{
"error": {
"class": "GenericError",
"desc": "Only one balloon device is supported"
}
}
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
2015-03-31 20:00:26 +03:00
|
|
|
error_setg(errp, "Only one balloon device is supported");
|
2013-07-30 07:33:58 +04:00
|
|
|
virtio_cleanup(vdev);
|
2013-07-30 04:51:37 +04:00
|
|
|
return;
|
2013-03-27 13:49:10 +04:00
|
|
|
}
|
2011-07-27 10:59:33 +04:00
|
|
|
|
2020-05-20 13:04:37 +03:00
|
|
|
if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_FREE_PAGE_HINT) &&
|
|
|
|
!s->iothread) {
|
|
|
|
error_setg(errp, "'free-page-hint' requires 'iothread' to be set");
|
|
|
|
virtio_cleanup(vdev);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2013-03-27 13:49:13 +04:00
|
|
|
s->ivq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
|
|
|
|
s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
|
|
|
|
s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats);
|
2008-12-04 23:33:06 +03:00
|
|
|
|
2021-07-08 12:53:39 +03:00
|
|
|
if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
|
2018-12-11 11:24:53 +03:00
|
|
|
s->free_page_vq = virtio_add_queue(vdev, VIRTQUEUE_MAX_SIZE,
|
|
|
|
virtio_balloon_handle_free_page_vq);
|
2020-07-20 20:51:28 +03:00
|
|
|
precopy_add_notifier(&s->free_page_hint_notify);
|
2020-05-20 13:04:37 +03:00
|
|
|
|
|
|
|
object_ref(OBJECT(s->iothread));
|
|
|
|
s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread),
|
|
|
|
virtio_ballloon_get_free_page_hints, s);
|
2018-12-11 11:24:53 +03:00
|
|
|
}
|
2020-05-27 07:14:07 +03:00
|
|
|
|
|
|
|
if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_REPORTING)) {
|
|
|
|
s->reporting_vq = virtio_add_queue(vdev, 32,
|
|
|
|
virtio_balloon_handle_report);
|
|
|
|
}
|
|
|
|
|
2014-05-21 13:03:47 +04:00
|
|
|
reset_stats(s);
|
2013-03-27 13:49:10 +04:00
|
|
|
}
|
|
|
|
|
qdev: Unrealize must not fail
Devices may have component devices and buses.
Device realization may fail. Realization is recursive: a device's
realize() method realizes its components, and device_set_realized()
realizes its buses (which should in turn realize the devices on that
bus, except bus_set_realized() doesn't implement that, yet).
When realization of a component or bus fails, we need to roll back:
unrealize everything we realized so far. If any of these unrealizes
failed, the device would be left in an inconsistent state. Must not
happen.
device_set_realized() lets it happen: it ignores errors in the roll
back code starting at label child_realize_fail.
Since realization is recursive, unrealization must be recursive, too.
But how could a partly failed unrealize be rolled back? We'd have to
re-realize, which can fail. This design is fundamentally broken.
device_set_realized() does not roll back at all. Instead, it keeps
unrealizing, ignoring further errors.
It can screw up even for a device with no buses: if the lone
dc->unrealize() fails, it still unregisters vmstate, and calls
listeners' unrealize() callback.
bus_set_realized() does not roll back either. Instead, it stops
unrealizing.
Fortunately, no unrealize method can fail, as we'll see below.
To fix the design error, drop parameter @errp from all the unrealize
methods.
Any unrealize method that uses @errp now needs an update. This leads
us to unrealize() methods that can fail. Merely passing it to another
unrealize method cannot cause failure, though. Here are the ones that
do other things with @errp:
* virtio_serial_device_unrealize()
Fails when qbus_set_hotplug_handler() fails, but still does all the
other work. On failure, the device would stay realized with its
resources completely gone. Oops. Can't happen, because
qbus_set_hotplug_handler() can't actually fail here. Pass
&error_abort to qbus_set_hotplug_handler() instead.
* hw/ppc/spapr_drc.c's unrealize()
Fails when object_property_del() fails, but all the other work is
already done. On failure, the device would stay realized with its
vmstate registration gone. Oops. Can't happen, because
object_property_del() can't actually fail here. Pass &error_abort
to object_property_del() instead.
* spapr_phb_unrealize()
Fails and bails out when remove_drcs() fails, but other work is
already done. On failure, the device would stay realized with some
of its resources gone. Oops. remove_drcs() fails only when
chassis_from_bus()'s object_property_get_uint() fails, and it can't
here. Pass &error_abort to remove_drcs() instead.
Therefore, no unrealize method can fail before this patch.
device_set_realized()'s recursive unrealization via bus uses
object_property_set_bool(). Can't drop @errp there, so pass
&error_abort.
We similarly unrealize with object_property_set_bool() elsewhere,
always ignoring errors. Pass &error_abort instead.
Several unrealize methods no longer handle errors from other unrealize
methods: virtio_9p_device_unrealize(),
virtio_input_device_unrealize(), scsi_qdev_unrealize(), ...
Much of the deleted error handling looks wrong anyway.
One unrealize methods no longer ignore such errors:
usb_ehci_pci_exit().
Several realize methods no longer ignore errors when rolling back:
v9fs_device_realize_common(), pci_qdev_unrealize(),
spapr_phb_realize(), usb_qdev_realize(), vfio_ccw_realize(),
virtio_device_realize().
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20200505152926.18877-17-armbru@redhat.com>
2020-05-05 18:29:24 +03:00
|
|
|
static void virtio_balloon_device_unrealize(DeviceState *dev)
|
2013-03-27 13:49:10 +04:00
|
|
|
{
|
2013-07-30 05:50:44 +04:00
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
|
|
|
|
VirtIOBalloon *s = VIRTIO_BALLOON(dev);
|
2013-03-27 13:49:10 +04:00
|
|
|
|
2020-05-20 13:04:38 +03:00
|
|
|
if (s->free_page_bh) {
|
2018-12-11 11:24:53 +03:00
|
|
|
qemu_bh_delete(s->free_page_bh);
|
2020-05-20 13:04:39 +03:00
|
|
|
object_unref(OBJECT(s->iothread));
|
2018-12-11 11:24:53 +03:00
|
|
|
virtio_balloon_free_page_stop(s);
|
2020-07-20 20:51:28 +03:00
|
|
|
precopy_remove_notifier(&s->free_page_hint_notify);
|
2018-12-11 11:24:53 +03:00
|
|
|
}
|
2013-03-27 13:49:10 +04:00
|
|
|
balloon_stats_destroy_timer(s);
|
|
|
|
qemu_remove_balloon_handler(s);
|
2019-12-04 10:31:55 +03:00
|
|
|
|
|
|
|
virtio_delete_queue(s->ivq);
|
|
|
|
virtio_delete_queue(s->dvq);
|
|
|
|
virtio_delete_queue(s->svq);
|
|
|
|
if (s->free_page_vq) {
|
|
|
|
virtio_delete_queue(s->free_page_vq);
|
|
|
|
}
|
2020-05-27 07:14:07 +03:00
|
|
|
if (s->reporting_vq) {
|
|
|
|
virtio_delete_queue(s->reporting_vq);
|
|
|
|
}
|
2013-04-24 12:21:22 +04:00
|
|
|
virtio_cleanup(vdev);
|
2013-03-27 13:49:10 +04:00
|
|
|
}
|
|
|
|
|
2016-03-01 14:14:03 +03:00
|
|
|
static void virtio_balloon_device_reset(VirtIODevice *vdev)
|
|
|
|
{
|
|
|
|
VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
|
|
|
|
|
2018-12-11 11:24:53 +03:00
|
|
|
if (virtio_balloon_free_page_support(s)) {
|
|
|
|
virtio_balloon_free_page_stop(s);
|
|
|
|
}
|
|
|
|
|
2016-03-01 14:14:03 +03:00
|
|
|
if (s->stats_vq_elem != NULL) {
|
2016-11-03 11:55:49 +03:00
|
|
|
virtqueue_unpop(s->svq, s->stats_vq_elem, 0);
|
2016-03-01 14:14:03 +03:00
|
|
|
g_free(s->stats_vq_elem);
|
|
|
|
s->stats_vq_elem = NULL;
|
|
|
|
}
|
2020-05-27 07:14:00 +03:00
|
|
|
|
|
|
|
s->poison_val = 0;
|
2016-03-01 14:14:03 +03:00
|
|
|
}
|
|
|
|
|
2016-09-07 18:20:49 +03:00
|
|
|
static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status)
|
|
|
|
{
|
|
|
|
VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
|
|
|
|
|
|
|
|
if (!s->stats_vq_elem && vdev->vm_running &&
|
|
|
|
(status & VIRTIO_CONFIG_S_DRIVER_OK) && virtqueue_rewind(s->svq, 1)) {
|
|
|
|
/* poll stats queue for the element we have discarded when the VM
|
|
|
|
* was stopped */
|
|
|
|
virtio_balloon_receive_stats(vdev, s->svq);
|
|
|
|
}
|
2018-12-11 11:24:53 +03:00
|
|
|
|
|
|
|
if (virtio_balloon_free_page_support(s)) {
|
|
|
|
/*
|
|
|
|
* The VM is woken up and the iothread was blocked, so signal it to
|
|
|
|
* continue.
|
|
|
|
*/
|
|
|
|
if (vdev->vm_running && s->block_iothread) {
|
|
|
|
qemu_mutex_lock(&s->free_page_lock);
|
|
|
|
s->block_iothread = false;
|
|
|
|
qemu_cond_signal(&s->free_page_cond);
|
|
|
|
qemu_mutex_unlock(&s->free_page_lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* The VM is stopped, block the iothread. */
|
|
|
|
if (!vdev->vm_running) {
|
|
|
|
qemu_mutex_lock(&s->free_page_lock);
|
|
|
|
s->block_iothread = true;
|
|
|
|
qemu_mutex_unlock(&s->free_page_lock);
|
|
|
|
}
|
|
|
|
}
|
2016-09-07 18:20:49 +03:00
|
|
|
}
|
|
|
|
|
2015-05-11 12:34:05 +03:00
|
|
|
static void virtio_balloon_instance_init(Object *obj)
|
|
|
|
{
|
|
|
|
VirtIOBalloon *s = VIRTIO_BALLOON(obj);
|
|
|
|
|
2020-05-20 13:04:37 +03:00
|
|
|
qemu_mutex_init(&s->free_page_lock);
|
|
|
|
qemu_cond_init(&s->free_page_cond);
|
2020-07-20 20:51:28 +03:00
|
|
|
s->free_page_hint_cmd_id = VIRTIO_BALLOON_FREE_PAGE_HINT_CMD_ID_MIN;
|
|
|
|
s->free_page_hint_notify.notify = virtio_balloon_free_page_hint_notify;
|
2020-05-20 13:04:37 +03:00
|
|
|
|
2015-05-11 12:34:05 +03:00
|
|
|
object_property_add(obj, "guest-stats", "guest statistics",
|
qom: Drop parameter @errp of object_property_add() & friends
The only way object_property_add() can fail is when a property with
the same name already exists. Since our property names are all
hardcoded, failure is a programming error, and the appropriate way to
handle it is passing &error_abort.
Same for its variants, except for object_property_add_child(), which
additionally fails when the child already has a parent. Parentage is
also under program control, so this is a programming error, too.
We have a bit over 500 callers. Almost half of them pass
&error_abort, slightly fewer ignore errors, one test case handles
errors, and the remaining few callers pass them to their own callers.
The previous few commits demonstrated once again that ignoring
programming errors is a bad idea.
Of the few ones that pass on errors, several violate the Error API.
The Error ** argument must be NULL, &error_abort, &error_fatal, or a
pointer to a variable containing NULL. Passing an argument of the
latter kind twice without clearing it in between is wrong: if the
first call sets an error, it no longer points to NULL for the second
call. ich9_pm_add_properties(), sparc32_ledma_realize(),
sparc32_dma_realize(), xilinx_axidma_realize(), xilinx_enet_realize()
are wrong that way.
When the one appropriate choice of argument is &error_abort, letting
users pick the argument is a bad idea.
Drop parameter @errp and assert the preconditions instead.
There's one exception to "duplicate property name is a programming
error": the way object_property_add() implements the magic (and
undocumented) "automatic arrayification". Don't drop @errp there.
Instead, rename object_property_add() to object_property_try_add(),
and add the obvious wrapper object_property_add().
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20200505152926.18877-15-armbru@redhat.com>
[Two semantic rebase conflicts resolved]
2020-05-05 18:29:22 +03:00
|
|
|
balloon_stats_get_all, NULL, NULL, s);
|
2015-05-11 12:34:05 +03:00
|
|
|
|
|
|
|
object_property_add(obj, "guest-stats-polling-interval", "int",
|
|
|
|
balloon_stats_get_poll_interval,
|
|
|
|
balloon_stats_set_poll_interval,
|
qom: Drop parameter @errp of object_property_add() & friends
The only way object_property_add() can fail is when a property with
the same name already exists. Since our property names are all
hardcoded, failure is a programming error, and the appropriate way to
handle it is passing &error_abort.
Same for its variants, except for object_property_add_child(), which
additionally fails when the child already has a parent. Parentage is
also under program control, so this is a programming error, too.
We have a bit over 500 callers. Almost half of them pass
&error_abort, slightly fewer ignore errors, one test case handles
errors, and the remaining few callers pass them to their own callers.
The previous few commits demonstrated once again that ignoring
programming errors is a bad idea.
Of the few ones that pass on errors, several violate the Error API.
The Error ** argument must be NULL, &error_abort, &error_fatal, or a
pointer to a variable containing NULL. Passing an argument of the
latter kind twice without clearing it in between is wrong: if the
first call sets an error, it no longer points to NULL for the second
call. ich9_pm_add_properties(), sparc32_ledma_realize(),
sparc32_dma_realize(), xilinx_axidma_realize(), xilinx_enet_realize()
are wrong that way.
When the one appropriate choice of argument is &error_abort, letting
users pick the argument is a bad idea.
Drop parameter @errp and assert the preconditions instead.
There's one exception to "duplicate property name is a programming
error": the way object_property_add() implements the magic (and
undocumented) "automatic arrayification". Don't drop @errp there.
Instead, rename object_property_add() to object_property_try_add(),
and add the obvious wrapper object_property_add().
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20200505152926.18877-15-armbru@redhat.com>
[Two semantic rebase conflicts resolved]
2020-05-05 18:29:22 +03:00
|
|
|
NULL, s);
|
2015-05-11 12:34:05 +03:00
|
|
|
}
|
|
|
|
|
2016-10-06 15:55:47 +03:00
|
|
|
static const VMStateDescription vmstate_virtio_balloon = {
|
|
|
|
.name = "virtio-balloon",
|
|
|
|
.minimum_version_id = 1,
|
|
|
|
.version_id = 1,
|
|
|
|
.fields = (VMStateField[]) {
|
|
|
|
VMSTATE_VIRTIO_DEVICE,
|
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
},
|
|
|
|
};
|
2016-07-14 20:22:49 +03:00
|
|
|
|
2013-03-27 13:49:10 +04:00
|
|
|
static Property virtio_balloon_properties[] = {
|
2015-06-15 13:52:52 +03:00
|
|
|
DEFINE_PROP_BIT("deflate-on-oom", VirtIOBalloon, host_features,
|
|
|
|
VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false),
|
2018-12-11 11:24:53 +03:00
|
|
|
DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features,
|
|
|
|
VIRTIO_BALLOON_F_FREE_PAGE_HINT, false),
|
2020-05-27 07:14:00 +03:00
|
|
|
DEFINE_PROP_BIT("page-poison", VirtIOBalloon, host_features,
|
|
|
|
VIRTIO_BALLOON_F_PAGE_POISON, true),
|
2020-05-27 07:14:07 +03:00
|
|
|
DEFINE_PROP_BIT("free-page-reporting", VirtIOBalloon, host_features,
|
|
|
|
VIRTIO_BALLOON_F_REPORTING, false),
|
2019-07-10 17:14:40 +03:00
|
|
|
/* QEMU 4.0 accidentally changed the config size even when free-page-hint
|
|
|
|
* is disabled, resulting in QEMU 3.1 migration incompatibility. This
|
|
|
|
* property retains this quirk for QEMU 4.1 machine types.
|
|
|
|
*/
|
|
|
|
DEFINE_PROP_BOOL("qemu-4-0-config-size", VirtIOBalloon,
|
|
|
|
qemu_4_0_config_size, false),
|
2018-12-11 11:24:53 +03:00
|
|
|
DEFINE_PROP_LINK("iothread", VirtIOBalloon, iothread, TYPE_IOTHREAD,
|
|
|
|
IOThread *),
|
2013-03-27 13:49:10 +04:00
|
|
|
DEFINE_PROP_END_OF_LIST(),
|
|
|
|
};
|
|
|
|
|
|
|
|
static void virtio_balloon_class_init(ObjectClass *klass, void *data)
|
|
|
|
{
|
|
|
|
DeviceClass *dc = DEVICE_CLASS(klass);
|
|
|
|
VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
|
2013-07-30 04:51:37 +04:00
|
|
|
|
2020-01-10 18:30:32 +03:00
|
|
|
device_class_set_props(dc, virtio_balloon_properties);
|
2016-07-14 20:22:49 +03:00
|
|
|
dc->vmsd = &vmstate_virtio_balloon;
|
2013-07-29 18:17:45 +04:00
|
|
|
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
|
2013-07-30 04:51:37 +04:00
|
|
|
vdc->realize = virtio_balloon_device_realize;
|
2013-07-30 05:50:44 +04:00
|
|
|
vdc->unrealize = virtio_balloon_device_unrealize;
|
2016-03-01 14:14:03 +03:00
|
|
|
vdc->reset = virtio_balloon_device_reset;
|
2013-03-27 13:49:10 +04:00
|
|
|
vdc->get_config = virtio_balloon_get_config;
|
|
|
|
vdc->set_config = virtio_balloon_set_config;
|
|
|
|
vdc->get_features = virtio_balloon_get_features;
|
2016-09-07 18:20:49 +03:00
|
|
|
vdc->set_status = virtio_balloon_set_status;
|
2016-10-27 20:36:37 +03:00
|
|
|
vdc->vmsd = &vmstate_virtio_balloon_device;
|
2013-03-27 13:49:10 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static const TypeInfo virtio_balloon_info = {
|
|
|
|
.name = TYPE_VIRTIO_BALLOON,
|
|
|
|
.parent = TYPE_VIRTIO_DEVICE,
|
|
|
|
.instance_size = sizeof(VirtIOBalloon),
|
2015-05-11 12:34:05 +03:00
|
|
|
.instance_init = virtio_balloon_instance_init,
|
2013-03-27 13:49:10 +04:00
|
|
|
.class_init = virtio_balloon_class_init,
|
|
|
|
};
|
|
|
|
|
|
|
|
static void virtio_register_types(void)
|
|
|
|
{
|
|
|
|
type_register_static(&virtio_balloon_info);
|
|
|
|
}
|
|
|
|
|
|
|
|
type_init(virtio_register_types)
|