qemu/hw/hyperv/hyperv.c

659 lines
18 KiB
C

/*
* Hyper-V guest/hypervisor interaction
*
* Copyright (c) 2015-2018 Virtuozzo International GmbH.
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
#include "qemu/main-loop.h"
#include "qapi/error.h"
#include "exec/address-spaces.h"
#include "sysemu/kvm.h"
#include "qemu/bitops.h"
#include "qemu/error-report.h"
#include "qemu/queue.h"
#include "qemu/rcu.h"
#include "qemu/rcu_queue.h"
#include "hw/hyperv/hyperv.h"
typedef struct SynICState {
DeviceState parent_obj;
CPUState *cs;
bool enabled;
hwaddr msg_page_addr;
hwaddr event_page_addr;
MemoryRegion msg_page_mr;
MemoryRegion event_page_mr;
struct hyperv_message_page *msg_page;
struct hyperv_event_flags_page *event_page;
} SynICState;
#define TYPE_SYNIC "hyperv-synic"
#define SYNIC(obj) OBJECT_CHECK(SynICState, (obj), TYPE_SYNIC)
static SynICState *get_synic(CPUState *cs)
{
return SYNIC(object_resolve_path_component(OBJECT(cs), "synic"));
}
static void synic_update(SynICState *synic, bool enable,
hwaddr msg_page_addr, hwaddr event_page_addr)
{
synic->enabled = enable;
if (synic->msg_page_addr != msg_page_addr) {
if (synic->msg_page_addr) {
memory_region_del_subregion(get_system_memory(),
&synic->msg_page_mr);
}
if (msg_page_addr) {
memory_region_add_subregion(get_system_memory(), msg_page_addr,
&synic->msg_page_mr);
}
synic->msg_page_addr = msg_page_addr;
}
if (synic->event_page_addr != event_page_addr) {
if (synic->event_page_addr) {
memory_region_del_subregion(get_system_memory(),
&synic->event_page_mr);
}
if (event_page_addr) {
memory_region_add_subregion(get_system_memory(), event_page_addr,
&synic->event_page_mr);
}
synic->event_page_addr = event_page_addr;
}
}
void hyperv_synic_update(CPUState *cs, bool enable,
hwaddr msg_page_addr, hwaddr event_page_addr)
{
SynICState *synic = get_synic(cs);
if (!synic) {
return;
}
synic_update(synic, enable, msg_page_addr, event_page_addr);
}
static void synic_realize(DeviceState *dev, Error **errp)
{
Object *obj = OBJECT(dev);
SynICState *synic = SYNIC(dev);
char *msgp_name, *eventp_name;
uint32_t vp_index;
/* memory region names have to be globally unique */
vp_index = hyperv_vp_index(synic->cs);
msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index);
eventp_name = g_strdup_printf("synic-%u-event-page", vp_index);
memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name,
sizeof(*synic->msg_page), &error_abort);
memory_region_init_ram(&synic->event_page_mr, obj, eventp_name,
sizeof(*synic->event_page), &error_abort);
synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr);
synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr);
g_free(msgp_name);
g_free(eventp_name);
}
static void synic_reset(DeviceState *dev)
{
SynICState *synic = SYNIC(dev);
memset(synic->msg_page, 0, sizeof(*synic->msg_page));
memset(synic->event_page, 0, sizeof(*synic->event_page));
synic_update(synic, false, 0, 0);
}
static void synic_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
dc->realize = synic_realize;
dc->reset = synic_reset;
dc->user_creatable = false;
}
void hyperv_synic_add(CPUState *cs)
{
Object *obj;
SynICState *synic;
obj = object_new(TYPE_SYNIC);
synic = SYNIC(obj);
synic->cs = cs;
object_property_add_child(OBJECT(cs), "synic", obj, &error_abort);
object_unref(obj);
object_property_set_bool(obj, true, "realized", &error_abort);
}
void hyperv_synic_reset(CPUState *cs)
{
SynICState *synic = get_synic(cs);
if (synic) {
device_reset(DEVICE(synic));
}
}
static const TypeInfo synic_type_info = {
.name = TYPE_SYNIC,
.parent = TYPE_DEVICE,
.instance_size = sizeof(SynICState),
.class_init = synic_class_init,
};
static void synic_register_types(void)
{
type_register_static(&synic_type_info);
}
type_init(synic_register_types)
/*
* KVM has its own message producers (SynIC timers). To guarantee
* serialization with both KVM vcpu and the guest cpu, the messages are first
* staged in an intermediate area and then posted to the SynIC message page in
* the vcpu thread.
*/
typedef struct HvSintStagedMessage {
/* message content staged by hyperv_post_msg */
struct hyperv_message msg;
/* callback + data (r/o) to complete the processing in a BH */
HvSintMsgCb cb;
void *cb_data;
/* message posting status filled by cpu_post_msg */
int status;
/* passing the buck: */
enum {
/* initial state */
HV_STAGED_MSG_FREE,
/*
* hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE ->
* BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu
*/
HV_STAGED_MSG_BUSY,
/*
* cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot,
* notify the guest, records the status, marks the posting done (BUSY
* -> POSTED), and schedules sint_msg_bh BH
*/
HV_STAGED_MSG_POSTED,
/*
* sint_msg_bh (BH) verifies that the posting is done, runs the
* callback, and starts over (POSTED -> FREE)
*/
} state;
} HvSintStagedMessage;
struct HvSintRoute {
uint32_t sint;
SynICState *synic;
int gsi;
EventNotifier sint_set_notifier;
EventNotifier sint_ack_notifier;
HvSintStagedMessage *staged_msg;
unsigned refcount;
};
static CPUState *hyperv_find_vcpu(uint32_t vp_index)
{
CPUState *cs = qemu_get_cpu(vp_index);
assert(hyperv_vp_index(cs) == vp_index);
return cs;
}
/*
* BH to complete the processing of a staged message.
*/
static void sint_msg_bh(void *opaque)
{
HvSintRoute *sint_route = opaque;
HvSintStagedMessage *staged_msg = sint_route->staged_msg;
if (atomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) {
/* status nor ready yet (spurious ack from guest?), ignore */
return;
}
staged_msg->cb(staged_msg->cb_data, staged_msg->status);
staged_msg->status = 0;
/* staged message processing finished, ready to start over */
atomic_set(&staged_msg->state, HV_STAGED_MSG_FREE);
/* drop the reference taken in hyperv_post_msg */
hyperv_sint_route_unref(sint_route);
}
/*
* Worker to transfer the message from the staging area into the SynIC message
* page in vcpu context.
*/
static void cpu_post_msg(CPUState *cs, run_on_cpu_data data)
{
HvSintRoute *sint_route = data.host_ptr;
HvSintStagedMessage *staged_msg = sint_route->staged_msg;
SynICState *synic = sint_route->synic;
struct hyperv_message *dst_msg;
bool wait_for_sint_ack = false;
assert(staged_msg->state == HV_STAGED_MSG_BUSY);
if (!synic->enabled || !synic->msg_page_addr) {
staged_msg->status = -ENXIO;
goto posted;
}
dst_msg = &synic->msg_page->slot[sint_route->sint];
if (dst_msg->header.message_type != HV_MESSAGE_NONE) {
dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING;
staged_msg->status = -EAGAIN;
wait_for_sint_ack = true;
} else {
memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg));
staged_msg->status = hyperv_sint_route_set_sint(sint_route);
}
memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page));
posted:
atomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED);
/*
* Notify the msg originator of the progress made; if the slot was busy we
* set msg_pending flag in it so it will be the guest who will do EOM and
* trigger the notification from KVM via sint_ack_notifier
*/
if (!wait_for_sint_ack) {
aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh,
sint_route);
}
}
/*
* Post a Hyper-V message to the staging area, for delivery to guest in the
* vcpu thread.
*/
int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg)
{
HvSintStagedMessage *staged_msg = sint_route->staged_msg;
assert(staged_msg);
/* grab the staging area */
if (atomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE,
HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) {
return -EAGAIN;
}
memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg));
/* hold a reference on sint_route until the callback is finished */
hyperv_sint_route_ref(sint_route);
/* schedule message posting attempt in vcpu thread */
async_run_on_cpu(sint_route->synic->cs, cpu_post_msg,
RUN_ON_CPU_HOST_PTR(sint_route));
return 0;
}
static void sint_ack_handler(EventNotifier *notifier)
{
HvSintRoute *sint_route = container_of(notifier, HvSintRoute,
sint_ack_notifier);
event_notifier_test_and_clear(notifier);
/*
* the guest consumed the previous message so complete the current one with
* -EAGAIN and let the msg originator retry
*/
aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route);
}
/*
* Set given event flag for a given sint on a given vcpu, and signal the sint.
*/
int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno)
{
int ret;
SynICState *synic = sint_route->synic;
unsigned long *flags, set_mask;
unsigned set_idx;
if (eventno > HV_EVENT_FLAGS_COUNT) {
return -EINVAL;
}
if (!synic->enabled || !synic->event_page_addr) {
return -ENXIO;
}
set_idx = BIT_WORD(eventno);
set_mask = BIT_MASK(eventno);
flags = synic->event_page->slot[sint_route->sint].flags;
if ((atomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) {
memory_region_set_dirty(&synic->event_page_mr, 0,
sizeof(*synic->event_page));
ret = hyperv_sint_route_set_sint(sint_route);
} else {
ret = 0;
}
return ret;
}
HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint,
HvSintMsgCb cb, void *cb_data)
{
HvSintRoute *sint_route;
EventNotifier *ack_notifier;
int r, gsi;
CPUState *cs;
SynICState *synic;
cs = hyperv_find_vcpu(vp_index);
if (!cs) {
return NULL;
}
synic = get_synic(cs);
if (!synic) {
return NULL;
}
sint_route = g_new0(HvSintRoute, 1);
r = event_notifier_init(&sint_route->sint_set_notifier, false);
if (r) {
goto err;
}
ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL;
if (ack_notifier) {
sint_route->staged_msg = g_new0(HvSintStagedMessage, 1);
sint_route->staged_msg->cb = cb;
sint_route->staged_msg->cb_data = cb_data;
r = event_notifier_init(ack_notifier, false);
if (r) {
goto err_sint_set_notifier;
}
event_notifier_set_handler(ack_notifier, sint_ack_handler);
}
gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint);
if (gsi < 0) {
goto err_gsi;
}
r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
&sint_route->sint_set_notifier,
ack_notifier, gsi);
if (r) {
goto err_irqfd;
}
sint_route->gsi = gsi;
sint_route->synic = synic;
sint_route->sint = sint;
sint_route->refcount = 1;
return sint_route;
err_irqfd:
kvm_irqchip_release_virq(kvm_state, gsi);
err_gsi:
if (ack_notifier) {
event_notifier_set_handler(ack_notifier, NULL);
event_notifier_cleanup(ack_notifier);
g_free(sint_route->staged_msg);
}
err_sint_set_notifier:
event_notifier_cleanup(&sint_route->sint_set_notifier);
err:
g_free(sint_route);
return NULL;
}
void hyperv_sint_route_ref(HvSintRoute *sint_route)
{
sint_route->refcount++;
}
void hyperv_sint_route_unref(HvSintRoute *sint_route)
{
if (!sint_route) {
return;
}
assert(sint_route->refcount > 0);
if (--sint_route->refcount) {
return;
}
kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state,
&sint_route->sint_set_notifier,
sint_route->gsi);
kvm_irqchip_release_virq(kvm_state, sint_route->gsi);
if (sint_route->staged_msg) {
event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL);
event_notifier_cleanup(&sint_route->sint_ack_notifier);
g_free(sint_route->staged_msg);
}
event_notifier_cleanup(&sint_route->sint_set_notifier);
g_free(sint_route);
}
int hyperv_sint_route_set_sint(HvSintRoute *sint_route)
{
return event_notifier_set(&sint_route->sint_set_notifier);
}
typedef struct MsgHandler {
struct rcu_head rcu;
QLIST_ENTRY(MsgHandler) link;
uint32_t conn_id;
HvMsgHandler handler;
void *data;
} MsgHandler;
typedef struct EventFlagHandler {
struct rcu_head rcu;
QLIST_ENTRY(EventFlagHandler) link;
uint32_t conn_id;
EventNotifier *notifier;
} EventFlagHandler;
static QLIST_HEAD(, MsgHandler) msg_handlers;
static QLIST_HEAD(, EventFlagHandler) event_flag_handlers;
static QemuMutex handlers_mutex;
static void __attribute__((constructor)) hv_init(void)
{
QLIST_INIT(&msg_handlers);
QLIST_INIT(&event_flag_handlers);
qemu_mutex_init(&handlers_mutex);
}
int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data)
{
int ret;
MsgHandler *mh;
qemu_mutex_lock(&handlers_mutex);
QLIST_FOREACH(mh, &msg_handlers, link) {
if (mh->conn_id == conn_id) {
if (handler) {
ret = -EEXIST;
} else {
QLIST_REMOVE_RCU(mh, link);
g_free_rcu(mh, rcu);
ret = 0;
}
goto unlock;
}
}
if (handler) {
mh = g_new(MsgHandler, 1);
mh->conn_id = conn_id;
mh->handler = handler;
mh->data = data;
QLIST_INSERT_HEAD_RCU(&msg_handlers, mh, link);
ret = 0;
} else {
ret = -ENOENT;
}
unlock:
qemu_mutex_unlock(&handlers_mutex);
return ret;
}
uint16_t hyperv_hcall_post_message(uint64_t param, bool fast)
{
uint16_t ret;
hwaddr len;
struct hyperv_post_message_input *msg;
MsgHandler *mh;
if (fast) {
return HV_STATUS_INVALID_HYPERCALL_CODE;
}
if (param & (__alignof__(*msg) - 1)) {
return HV_STATUS_INVALID_ALIGNMENT;
}
len = sizeof(*msg);
msg = cpu_physical_memory_map(param, &len, 0);
if (len < sizeof(*msg)) {
ret = HV_STATUS_INSUFFICIENT_MEMORY;
goto unmap;
}
if (msg->payload_size > sizeof(msg->payload)) {
ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
goto unmap;
}
ret = HV_STATUS_INVALID_CONNECTION_ID;
rcu_read_lock();
QLIST_FOREACH_RCU(mh, &msg_handlers, link) {
if (mh->conn_id == (msg->connection_id & HV_CONNECTION_ID_MASK)) {
ret = mh->handler(msg, mh->data);
break;
}
}
rcu_read_unlock();
unmap:
cpu_physical_memory_unmap(msg, len, 0, 0);
return ret;
}
static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
{
int ret;
EventFlagHandler *handler;
qemu_mutex_lock(&handlers_mutex);
QLIST_FOREACH(handler, &event_flag_handlers, link) {
if (handler->conn_id == conn_id) {
if (notifier) {
ret = -EEXIST;
} else {
QLIST_REMOVE_RCU(handler, link);
g_free_rcu(handler, rcu);
ret = 0;
}
goto unlock;
}
}
if (notifier) {
handler = g_new(EventFlagHandler, 1);
handler->conn_id = conn_id;
handler->notifier = notifier;
QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link);
ret = 0;
} else {
ret = -ENOENT;
}
unlock:
qemu_mutex_unlock(&handlers_mutex);
return ret;
}
static bool process_event_flags_userspace;
int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
{
if (!process_event_flags_userspace &&
!kvm_check_extension(kvm_state, KVM_CAP_HYPERV_EVENTFD)) {
process_event_flags_userspace = true;
warn_report("Hyper-V event signaling is not supported by this kernel; "
"using slower userspace hypercall processing");
}
if (!process_event_flags_userspace) {
struct kvm_hyperv_eventfd hvevfd = {
.conn_id = conn_id,
.fd = notifier ? event_notifier_get_fd(notifier) : -1,
.flags = notifier ? 0 : KVM_HYPERV_EVENTFD_DEASSIGN,
};
return kvm_vm_ioctl(kvm_state, KVM_HYPERV_EVENTFD, &hvevfd);
}
return set_event_flag_handler(conn_id, notifier);
}
uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast)
{
uint16_t ret;
EventFlagHandler *handler;
if (unlikely(!fast)) {
hwaddr addr = param;
if (addr & (__alignof__(addr) - 1)) {
return HV_STATUS_INVALID_ALIGNMENT;
}
param = ldq_phys(&address_space_memory, addr);
}
/*
* Per spec, bits 32-47 contain the extra "flag number". However, we
* have no use for it, and in all known usecases it is zero, so just
* report lookup failure if it isn't.
*/
if (param & 0xffff00000000ULL) {
return HV_STATUS_INVALID_PORT_ID;
}
/* remaining bits are reserved-zero */
if (param & ~HV_CONNECTION_ID_MASK) {
return HV_STATUS_INVALID_HYPERCALL_INPUT;
}
ret = HV_STATUS_INVALID_CONNECTION_ID;
rcu_read_lock();
QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) {
if (handler->conn_id == param) {
event_notifier_set(handler->notifier);
ret = 0;
break;
}
}
rcu_read_unlock();
return ret;
}