hypervisor_unmask_event(): don't check/update evtchn_pending_sel for the

current CPU, but for any CPU which may accept this event.
xen/xenevt.c: more use of atomic ops and locks where appropriate, and some
  other SMP fixes. Handle all events on the primary CPU (may be revisited
  later). Set/clear ci_evtmask[] for watched events.

This should fix the problems on dom0 kernels reported by jym@
This commit is contained in:
bouyer 2011-12-03 22:41:40 +00:00
parent 4d61ee8d61
commit ad7affb170
2 changed files with 110 additions and 45 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: hypervisor_machdep.c,v 1.17 2011/11/19 17:13:39 cherry Exp $ */ /* $NetBSD: hypervisor_machdep.c,v 1.18 2011/12/03 22:41:40 bouyer Exp $ */
/* /*
* *
@ -54,7 +54,7 @@
#include <sys/cdefs.h> #include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: hypervisor_machdep.c,v 1.17 2011/11/19 17:13:39 cherry Exp $"); __KERNEL_RCSID(0, "$NetBSD: hypervisor_machdep.c,v 1.18 2011/12/03 22:41:40 bouyer Exp $");
#include <sys/param.h> #include <sys/param.h>
#include <sys/systm.h> #include <sys/systm.h>
@ -305,8 +305,9 @@ hypervisor_send_event(struct cpu_info *ci, unsigned int ev)
if (__predict_true(ci == curcpu())) { if (__predict_true(ci == curcpu())) {
hypervisor_force_callback(); hypervisor_force_callback();
} else { } else {
if (xen_send_ipi(ci, XEN_IPI_HVCB)) { if (__predict_false(xen_send_ipi(ci, XEN_IPI_HVCB))) {
panic("xen_send_ipi(cpu%d, XEN_IPI_HVCB) failed\n", (int) ci->ci_cpuid); panic("xen_send_ipi(cpu%d, XEN_IPI_HVCB) failed\n",
(int) ci->ci_cpuid);
} }
} }
} }
@ -315,7 +316,9 @@ void
hypervisor_unmask_event(unsigned int ev) hypervisor_unmask_event(unsigned int ev)
{ {
volatile shared_info_t *s = HYPERVISOR_shared_info; volatile shared_info_t *s = HYPERVISOR_shared_info;
volatile struct vcpu_info *vci = curcpu()->ci_vcpu; CPU_INFO_ITERATOR cii;
struct cpu_info *ci;
volatile struct vcpu_info *vci;
#ifdef PORT_DEBUG #ifdef PORT_DEBUG
if (ev == PORT_DEBUG) if (ev == PORT_DEBUG)
@ -328,11 +331,28 @@ hypervisor_unmask_event(unsigned int ev)
* 'hw_resend_irq'. Just like a real IO-APIC we 'lose the * 'hw_resend_irq'. Just like a real IO-APIC we 'lose the
* interrupt edge' if the channel is masked. * interrupt edge' if the channel is masked.
*/ */
if (xen_atomic_test_bit(&s->evtchn_pending[0], ev) && if (!xen_atomic_test_bit(&s->evtchn_pending[0], ev))
!xen_atomic_test_and_set_bit(&vci->evtchn_pending_sel, ev>>LONG_SHIFT)) { return;
xen_atomic_set_bit(&vci->evtchn_upcall_pending, 0);
if (!vci->evtchn_upcall_mask) for (CPU_INFO_FOREACH(cii, ci)) {
hypervisor_force_callback(); if (!xen_atomic_test_bit(&ci->ci_evtmask[0], ev))
continue;
vci = ci->ci_vcpu;
if (!xen_atomic_test_and_set_bit(&vci->evtchn_pending_sel,
ev>>LONG_SHIFT))
xen_atomic_set_bit(&vci->evtchn_upcall_pending, 0);
if (!vci->evtchn_upcall_mask) {
if (__predict_true(ci == curcpu())) {
hypervisor_force_callback();
} else {
if (__predict_false(
xen_send_ipi(ci, XEN_IPI_HVCB))) {
panic("xen_send_ipi(cpu%d, "
"XEN_IPI_HVCB) failed\n",
(int) ci->ci_cpuid);
}
}
}
} }
} }

View File

@ -1,4 +1,4 @@
/* $NetBSD: xenevt.c,v 1.38 2011/08/11 17:59:00 cherry Exp $ */ /* $NetBSD: xenevt.c,v 1.39 2011/12/03 22:41:40 bouyer Exp $ */
/* /*
* Copyright (c) 2005 Manuel Bouyer. * Copyright (c) 2005 Manuel Bouyer.
@ -26,7 +26,7 @@
*/ */
#include <sys/cdefs.h> #include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: xenevt.c,v 1.38 2011/08/11 17:59:00 cherry Exp $"); __KERNEL_RCSID(0, "$NetBSD: xenevt.c,v 1.39 2011/12/03 22:41:40 bouyer Exp $");
#include "opt_xen.h" #include "opt_xen.h"
#include <sys/param.h> #include <sys/param.h>
@ -112,7 +112,9 @@ struct xenevt_d {
u_int ring_write; /* pointer of the writer */ u_int ring_write; /* pointer of the writer */
u_int flags; u_int flags;
#define XENEVT_F_OVERFLOW 0x01 /* ring overflow */ #define XENEVT_F_OVERFLOW 0x01 /* ring overflow */
#define XENEVT_F_FREE 0x02 /* free entry */
struct selinfo sel; /* used by poll */ struct selinfo sel; /* used by poll */
struct cpu_info *ci; /* prefered CPU for events for this device */
}; };
/* event -> user device mapping */ /* event -> user device mapping */
@ -123,8 +125,8 @@ static void *devevent_sih;
static kmutex_t devevent_lock; static kmutex_t devevent_lock;
static STAILQ_HEAD(, xenevt_d) devevent_pending; static STAILQ_HEAD(, xenevt_d) devevent_pending;
static void xenevt_donotify(struct xenevt_d *);
static void xenevt_record(struct xenevt_d *, evtchn_port_t); static void xenevt_record(struct xenevt_d *, evtchn_port_t);
static void xenevt_free(struct xenevt_d *);
/* pending events */ /* pending events */
long xenevt_ev1; long xenevt_ev1;
@ -160,7 +162,7 @@ xenevtattach(int n)
ih->ih_fun = ih->ih_realfun = xenevt_processevt; ih->ih_fun = ih->ih_realfun = xenevt_processevt;
ih->ih_arg = ih->ih_realarg = NULL; ih->ih_arg = ih->ih_realarg = NULL;
ih->ih_ipl_next = NULL; ih->ih_ipl_next = NULL;
ih->ih_cpu = curcpu(); ih->ih_cpu = &cpu_info_primary;
#ifdef MULTIPROCESSOR #ifdef MULTIPROCESSOR
if (!mpsafe) { if (!mpsafe) {
ih->ih_fun = intr_biglock_wrapper; ih->ih_fun = intr_biglock_wrapper;
@ -177,9 +179,9 @@ xenevtattach(int n)
void void
xenevt_setipending(int l1, int l2) xenevt_setipending(int l1, int l2)
{ {
xenevt_ev1 |= 1UL << l1; atomic_or_ulong(&xenevt_ev1, 1UL << l1);
xenevt_ev2[l1] |= 1UL << l2; atomic_or_ulong(&xenevt_ev2[l1], 1UL << l2);
curcpu()/*XXX*/->ci_ipending |= 1 << IPL_HIGH; atomic_or_32(&cpu_info_primary.ci_ipending, 1 << IPL_HIGH);
} }
/* process pending events */ /* process pending events */
@ -213,21 +215,20 @@ xenevt_event(int port)
{ {
struct xenevt_d *d; struct xenevt_d *d;
mutex_enter(&devevent_lock);
d = devevent[port]; d = devevent[port];
if (d != NULL) { if (d != NULL) {
xenevt_record(d, port); xenevt_record(d, port);
if (d->pending) { if (d->pending == false) {
STAILQ_INSERT_TAIL(&devevent_pending, d, pendingq);
d->pending = true;
mutex_exit(&devevent_lock);
softint_schedule(devevent_sih);
return; return;
} }
mutex_enter(&devevent_lock);
STAILQ_INSERT_TAIL(&devevent_pending, d, pendingq);
d->pending = true;
mutex_exit(&devevent_lock);
softint_schedule(devevent_sih);
} }
mutex_exit(&devevent_lock);
} }
void void
@ -244,22 +245,19 @@ xenevt_notify(void)
} }
STAILQ_REMOVE_HEAD(&devevent_pending, pendingq); STAILQ_REMOVE_HEAD(&devevent_pending, pendingq);
d->pending = false; d->pending = false;
mutex_exit(&devevent_lock); mutex_enter(&d->lock);
if (d->flags & XENEVT_F_FREE) {
xenevt_donotify(d); xenevt_free(d);
mutex_exit(&devevent_lock);
} else {
mutex_exit(&devevent_lock);
selnotify(&d->sel, 0, 1);
cv_broadcast(&d->cv);
mutex_exit(&d->lock);
}
} }
} }
static void
xenevt_donotify(struct xenevt_d *d)
{
mutex_enter(&d->lock);
selnotify(&d->sel, 0, 1);
cv_broadcast(&d->cv);
mutex_exit(&d->lock);
}
static void static void
xenevt_record(struct xenevt_d *d, evtchn_port_t port) xenevt_record(struct xenevt_d *d, evtchn_port_t port)
{ {
@ -270,6 +268,7 @@ xenevt_record(struct xenevt_d *d, evtchn_port_t port)
* complex * complex
*/ */
mutex_enter(&d->lock);
if (d->ring_read == if (d->ring_read ==
((d->ring_write + 1) & XENEVT_RING_MASK)) { ((d->ring_write + 1) & XENEVT_RING_MASK)) {
d->flags |= XENEVT_F_OVERFLOW; d->flags |= XENEVT_F_OVERFLOW;
@ -278,6 +277,7 @@ xenevt_record(struct xenevt_d *d, evtchn_port_t port)
d->ring[d->ring_write] = port; d->ring[d->ring_write] = port;
d->ring_write = (d->ring_write + 1) & XENEVT_RING_MASK; d->ring_write = (d->ring_write + 1) & XENEVT_RING_MASK;
} }
mutex_exit(&d->lock);
} }
/* open the xenevt device; this is where we clone */ /* open the xenevt device; this is where we clone */
@ -295,7 +295,8 @@ xenevtopen(dev_t dev, int flags, int mode, struct lwp *l)
return error; return error;
d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK | M_ZERO); d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK | M_ZERO);
mutex_init(&d->lock, MUTEX_DEFAULT, IPL_SOFTSERIAL); d->ci = &cpu_info_primary;
mutex_init(&d->lock, MUTEX_DEFAULT, IPL_HIGH);
cv_init(&d->cv, "xenevt"); cv_init(&d->cv, "xenevt");
selinit(&d->sel); selinit(&d->sel);
return fd_clone(fp, fd, flags, &xenevt_fileops, d); return fd_clone(fp, fd, flags, &xenevt_fileops, d);
@ -350,11 +351,12 @@ xenevtmmap(dev_t dev, off_t off, int prot)
return -1; return -1;
} }
static int static void
xenevt_fclose(struct file *fp) xenevt_free(struct xenevt_d *d)
{ {
struct xenevt_d *d = fp->f_data;
int i; int i;
KASSERT(mutex_owned(&devevent_lock));
KASSERT(mutex_owned(&d->lock));
for (i = 0; i < NR_EVENT_CHANNELS; i++ ) { for (i = 0; i < NR_EVENT_CHANNELS; i++ ) {
if (devevent[i] == d) { if (devevent[i] == d) {
@ -362,6 +364,7 @@ xenevt_fclose(struct file *fp)
int error; int error;
hypervisor_mask_event(i); hypervisor_mask_event(i);
xen_atomic_clear_bit(&d->ci->ci_evtmask[0], i);
devevent[i] = NULL; devevent[i] = NULL;
op.cmd = EVTCHNOP_close; op.cmd = EVTCHNOP_close;
@ -372,12 +375,29 @@ xenevt_fclose(struct file *fp)
} }
} }
} }
mutex_exit(&d->lock);
seldestroy(&d->sel); seldestroy(&d->sel);
cv_destroy(&d->cv); cv_destroy(&d->cv);
mutex_destroy(&d->lock); mutex_destroy(&d->lock);
fp->f_data = NULL;
free(d, M_DEVBUF); free(d, M_DEVBUF);
}
static int
xenevt_fclose(struct file *fp)
{
struct xenevt_d *d = fp->f_data;
mutex_enter(&devevent_lock);
mutex_enter(&d->lock);
if (d->pending) {
d->flags |= XENEVT_F_FREE;
mutex_exit(&d->lock);
} else {
xenevt_free(d);
}
mutex_exit(&devevent_lock);
fp->f_data = NULL;
return (0); return (0);
} }
@ -467,12 +487,14 @@ xenevt_fwrite(struct file *fp, off_t *offp, struct uio *uio,
error = uiomove(chans, uio->uio_resid, uio); error = uiomove(chans, uio->uio_resid, uio);
if (error) if (error)
goto out; goto out;
mutex_enter(&devevent_lock);
for (i = 0; i < nentries; i++) { for (i = 0; i < nentries; i++) {
if (chans[i] < NR_EVENT_CHANNELS && if (chans[i] < NR_EVENT_CHANNELS &&
devevent[chans[i]] == d) { devevent[chans[i]] == d) {
hypervisor_unmask_event(chans[i]); hypervisor_unmask_event(chans[i]);
} }
} }
mutex_exit(&devevent_lock);
out: out:
kmem_free(chans, nentries * sizeof(uint16_t)); kmem_free(chans, nentries * sizeof(uint16_t));
return 0; return 0;
@ -488,8 +510,10 @@ xenevt_fioctl(struct file *fp, u_long cmd, void *addr)
switch(cmd) { switch(cmd) {
case EVTCHN_RESET: case EVTCHN_RESET:
case IOCTL_EVTCHN_RESET: case IOCTL_EVTCHN_RESET:
mutex_enter(&d->lock);
d->ring_read = d->ring_write = 0; d->ring_read = d->ring_write = 0;
d->flags = 0; d->flags = 0;
mutex_exit(&d->lock);
break; break;
case IOCTL_EVTCHN_BIND_VIRQ: case IOCTL_EVTCHN_BIND_VIRQ:
{ {
@ -502,7 +526,11 @@ xenevt_fioctl(struct file *fp, u_long cmd, void *addr)
return -error; return -error;
} }
bind_virq->port = op.u.bind_virq.port; bind_virq->port = op.u.bind_virq.port;
mutex_enter(&devevent_lock);
KASSERT(devevent[bind_virq->port] == NULL);
devevent[bind_virq->port] = d; devevent[bind_virq->port] = d;
mutex_exit(&devevent_lock);
xen_atomic_set_bit(&d->ci->ci_evtmask[0], bind_virq->port);
hypervisor_unmask_event(bind_virq->port); hypervisor_unmask_event(bind_virq->port);
break; break;
} }
@ -515,7 +543,11 @@ xenevt_fioctl(struct file *fp, u_long cmd, void *addr)
if ((error = HYPERVISOR_event_channel_op(&op))) if ((error = HYPERVISOR_event_channel_op(&op)))
return -error; return -error;
bind_intd->port = op.u.bind_interdomain.local_port; bind_intd->port = op.u.bind_interdomain.local_port;
mutex_enter(&devevent_lock);
KASSERT(devevent[bind_intd->port] == NULL);
devevent[bind_intd->port] = d; devevent[bind_intd->port] = d;
mutex_exit(&devevent_lock);
xen_atomic_set_bit(&d->ci->ci_evtmask[0], bind_intd->port);
hypervisor_unmask_event(bind_intd->port); hypervisor_unmask_event(bind_intd->port);
break; break;
} }
@ -528,7 +560,11 @@ xenevt_fioctl(struct file *fp, u_long cmd, void *addr)
if ((error = HYPERVISOR_event_channel_op(&op))) if ((error = HYPERVISOR_event_channel_op(&op)))
return -error; return -error;
bind_unbound->port = op.u.alloc_unbound.port; bind_unbound->port = op.u.alloc_unbound.port;
mutex_enter(&devevent_lock);
KASSERT(devevent[bind_unbound->port] == NULL);
devevent[bind_unbound->port] = d; devevent[bind_unbound->port] = d;
mutex_exit(&devevent_lock);
xen_atomic_set_bit(&d->ci->ci_evtmask[0], bind_unbound->port);
hypervisor_unmask_event(bind_unbound->port); hypervisor_unmask_event(bind_unbound->port);
break; break;
} }
@ -538,10 +574,15 @@ xenevt_fioctl(struct file *fp, u_long cmd, void *addr)
if (unbind->port > NR_EVENT_CHANNELS) if (unbind->port > NR_EVENT_CHANNELS)
return EINVAL; return EINVAL;
if (devevent[unbind->port] != d) mutex_enter(&devevent_lock);
if (devevent[unbind->port] != d) {
mutex_exit(&devevent_lock);
return ENOTCONN; return ENOTCONN;
}
devevent[unbind->port] = NULL; devevent[unbind->port] = NULL;
mutex_exit(&devevent_lock);
hypervisor_mask_event(unbind->port); hypervisor_mask_event(unbind->port);
xen_atomic_clear_bit(&d->ci->ci_evtmask[0], unbind->port);
op.cmd = EVTCHNOP_close; op.cmd = EVTCHNOP_close;
op.u.close.port = unbind->port; op.u.close.port = unbind->port;
if ((error = HYPERVISOR_event_channel_op(&op))) if ((error = HYPERVISOR_event_channel_op(&op)))
@ -554,9 +595,13 @@ xenevt_fioctl(struct file *fp, u_long cmd, void *addr)
if (notify->port > NR_EVENT_CHANNELS) if (notify->port > NR_EVENT_CHANNELS)
return EINVAL; return EINVAL;
if (devevent[notify->port] != d) mutex_enter(&devevent_lock);
if (devevent[notify->port] != d) {
mutex_exit(&devevent_lock);
return ENOTCONN; return ENOTCONN;
}
hypervisor_notify_via_evtchn(notify->port); hypervisor_notify_via_evtchn(notify->port);
mutex_exit(&devevent_lock);
break; break;
} }
case FIONBIO: case FIONBIO: