Implement feature-rx-copy support in xennet (domU network frontend).

Instead of flipping pages back and forth between dom0 and domU for the
network RX queue, feature-rx-copy tells frontend to use content copy
instead.

This is the only mode supported by the dom0 Linux pv_ops backend. NetBSD
domU and dom0 can still fall back to flipping, when needed.

Copy is supposed to be faster than flipping, as it does not require
MMU manipulation and TLB shootdowns.

Based on patch provided by Hideki ONO. Thanks!

See also http://mail-index.netbsd.org/port-xen/2010/09/24/msg006265.html
and http://mail-index.netbsd.org/port-xen/2010/10/16/msg006312.html

ok bouyer@.

XXX will ask for a pull-up after 5.1 is branched.
This commit is contained in:
jym 2010-10-17 17:10:44 +00:00
parent 5b00191068
commit 5c3f6e3eb4

View File

@ -1,4 +1,4 @@
/* $NetBSD: if_xennet_xenbus.c,v 1.44 2010/10/16 00:20:05 jym Exp $ */ /* $NetBSD: if_xennet_xenbus.c,v 1.45 2010/10/17 17:10:44 jym Exp $ */
/* /*
* Copyright (c) 2006 Manuel Bouyer. * Copyright (c) 2006 Manuel Bouyer.
@ -85,7 +85,7 @@
*/ */
#include <sys/cdefs.h> #include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_xennet_xenbus.c,v 1.44 2010/10/16 00:20:05 jym Exp $"); __KERNEL_RCSID(0, "$NetBSD: if_xennet_xenbus.c,v 1.45 2010/10/17 17:10:44 jym Exp $");
#include "opt_xen.h" #include "opt_xen.h"
#include "opt_nfs_boot.h" #include "opt_nfs_boot.h"
@ -199,6 +199,9 @@ struct xennet_xenbus_softc {
#define BEST_DISCONNECTED 1 #define BEST_DISCONNECTED 1
#define BEST_CONNECTED 2 #define BEST_CONNECTED 2
#define BEST_SUSPENDED 3 #define BEST_SUSPENDED 3
unsigned long sc_rx_feature;
#define FEATURE_RX_FLIP 0
#define FEATURE_RX_COPY 1
#if NRND > 0 #if NRND > 0
rndsource_element_t sc_rnd_source; rndsource_element_t sc_rnd_source;
#endif #endif
@ -430,6 +433,7 @@ xennet_xenbus_resume(void *p)
{ {
struct xennet_xenbus_softc *sc = p; struct xennet_xenbus_softc *sc = p;
struct xenbus_transaction *xbt; struct xenbus_transaction *xbt;
unsigned long rx_copy;
int error; int error;
netif_tx_sring_t *tx_ring; netif_tx_sring_t *tx_ring;
netif_rx_sring_t *rx_ring; netif_rx_sring_t *rx_ring;
@ -439,7 +443,6 @@ xennet_xenbus_resume(void *p)
sc->sc_tx_ring_gntref = GRANT_INVALID_REF; sc->sc_tx_ring_gntref = GRANT_INVALID_REF;
sc->sc_rx_ring_gntref = GRANT_INVALID_REF; sc->sc_rx_ring_gntref = GRANT_INVALID_REF;
/* setup device: alloc event channel and shared rings */ /* setup device: alloc event channel and shared rings */
tx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0, tx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
UVM_KMF_WIRED | UVM_KMF_ZERO); UVM_KMF_WIRED | UVM_KMF_ZERO);
@ -469,6 +472,19 @@ xennet_xenbus_resume(void *p)
event_set_handler(sc->sc_evtchn, &xennet_handler, sc, event_set_handler(sc->sc_evtchn, &xennet_handler, sc,
IPL_NET, device_xname(sc->sc_dev)); IPL_NET, device_xname(sc->sc_dev));
error = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
"feature-rx-copy", &rx_copy, 10);
if (error)
rx_copy = 0; /* default value if key is absent */
if (rx_copy == 1) {
aprint_normal_dev(sc->sc_dev, "using RX copy mode\n");
sc->sc_rx_feature = FEATURE_RX_COPY;
} else {
aprint_normal_dev(sc->sc_dev, "using RX flip mode\n");
sc->sc_rx_feature = FEATURE_RX_FLIP;
}
again: again:
xbt = xenbus_transaction_start(); xbt = xenbus_transaction_start();
if (xbt == NULL) if (xbt == NULL)
@ -485,6 +501,12 @@ again:
errmsg = "writing rx ring-ref"; errmsg = "writing rx ring-ref";
goto abort_transaction; goto abort_transaction;
} }
error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
"request-rx-copy", "%lu", rx_copy);
if (error) {
errmsg = "writing request-rx-copy";
goto abort_transaction;
}
error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
"feature-rx-notify", "%u", 1); "feature-rx-notify", "%u", 1);
if (error) { if (error) {
@ -553,9 +575,11 @@ xennet_alloc_rx_buffer(struct xennet_xenbus_softc *sc)
RING_IDX i; RING_IDX i;
struct xennet_rxreq *req; struct xennet_rxreq *req;
struct xen_memory_reservation reservation; struct xen_memory_reservation reservation;
int s1, s2; int s1, s2, otherend_id;
paddr_t pfn; paddr_t pfn;
otherend_id = sc->sc_xbusd->xbusd_otherend_id;
s1 = splnet(); s1 = splnet();
for (i = 0; sc->sc_free_rxreql != 0; i++) { for (i = 0; sc->sc_free_rxreql != 0; i++) {
req = SLIST_FIRST(&sc->sc_rxreq_head); req = SLIST_FIRST(&sc->sc_rxreq_head);
@ -563,53 +587,80 @@ xennet_alloc_rx_buffer(struct xennet_xenbus_softc *sc)
KASSERT(req == &sc->sc_rxreqs[req->rxreq_id]); KASSERT(req == &sc->sc_rxreqs[req->rxreq_id]);
RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i)->id = RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i)->id =
req->rxreq_id; req->rxreq_id;
if (xengnt_grant_transfer(sc->sc_xbusd->xbusd_otherend_id,
&req->rxreq_gntref) != 0) { switch (sc->sc_rx_feature) {
case FEATURE_RX_COPY:
if (xengnt_grant_access(otherend_id,
xpmap_ptom_masked(req->rxreq_pa),
0, &req->rxreq_gntref) != 0) {
goto out_loop;
}
break; break;
case FEATURE_RX_FLIP:
if (xengnt_grant_transfer(otherend_id,
&req->rxreq_gntref) != 0) {
goto out_loop;
}
break;
default:
panic("%s: unsupported RX feature mode: %ld\n",
__func__, sc->sc_rx_feature);
} }
RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i)->gref = RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i)->gref =
req->rxreq_gntref; req->rxreq_gntref;
SLIST_REMOVE_HEAD(&sc->sc_rxreq_head, rxreq_next); SLIST_REMOVE_HEAD(&sc->sc_rxreq_head, rxreq_next);
sc->sc_free_rxreql--; sc->sc_free_rxreql--;
/* unmap the page */ if (sc->sc_rx_feature == FEATURE_RX_FLIP) {
MULTI_update_va_mapping(&rx_mcl[i], req->rxreq_va, 0, 0); /* unmap the page */
/* MULTI_update_va_mapping(&rx_mcl[i],
* Remove this page from pseudo phys map before req->rxreq_va, 0, 0);
* passing back to Xen. /*
*/ * Remove this page from pseudo phys map before
pfn = (req->rxreq_pa - XPMAP_OFFSET) >> PAGE_SHIFT; * passing back to Xen.
xennet_pages[i] = xpmap_phys_to_machine_mapping[pfn]; */
xpmap_phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY; pfn = (req->rxreq_pa - XPMAP_OFFSET) >> PAGE_SHIFT;
xennet_pages[i] = xpmap_phys_to_machine_mapping[pfn];
xpmap_phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY;
}
} }
out_loop:
if (i == 0) { if (i == 0) {
splx(s1); splx(s1);
return; return;
} }
/* also make sure to flush all TLB entries */
rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; if (sc->sc_rx_feature == FEATURE_RX_FLIP) {
/* /* also make sure to flush all TLB entries */
* We may have allocated buffers which have entries rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
* outstanding in the page update queue -- make sure we flush UVMF_TLB_FLUSH | UVMF_ALL;
* those first! /*
*/ * We may have allocated buffers which have entries
s2 = splvm(); * outstanding in the page update queue -- make sure we flush
xpq_flush_queue(); * those first!
splx(s2); */
/* now decrease reservation */ s2 = splvm();
xenguest_handle(reservation.extent_start) = xennet_pages; xpq_flush_queue();
reservation.nr_extents = i; splx(s2);
reservation.extent_order = 0; /* now decrease reservation */
reservation.address_bits = 0; xenguest_handle(reservation.extent_start) = xennet_pages;
reservation.domid = DOMID_SELF; reservation.nr_extents = i;
rx_mcl[i].op = __HYPERVISOR_memory_op; reservation.extent_order = 0;
rx_mcl[i].args[0] = XENMEM_decrease_reservation; reservation.address_bits = 0;
rx_mcl[i].args[1] = (unsigned long)&reservation; reservation.domid = DOMID_SELF;
HYPERVISOR_multicall(rx_mcl, i+1); rx_mcl[i].op = __HYPERVISOR_memory_op;
if (__predict_false(rx_mcl[i].result != i)) { rx_mcl[i].args[0] = XENMEM_decrease_reservation;
panic("xennet_alloc_rx_buffer: XENMEM_decrease_reservation"); rx_mcl[i].args[1] = (unsigned long)&reservation;
HYPERVISOR_multicall(rx_mcl, i+1);
if (__predict_false(rx_mcl[i].result != i)) {
panic("xennet_alloc_rx_buffer: "
"XENMEM_decrease_reservation");
}
} }
sc->sc_rx_ring.req_prod_pvt = req_prod + i; sc->sc_rx_ring.req_prod_pvt = req_prod + i;
RING_PUSH_REQUESTS(&sc->sc_rx_ring); RING_PUSH_REQUESTS(&sc->sc_rx_ring);
@ -652,44 +703,57 @@ xennet_free_rx_buffer(struct xennet_xenbus_softc *sc)
SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq, SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq,
rxreq_next); rxreq_next);
sc->sc_free_rxreql++; sc->sc_free_rxreql++;
ma = xengnt_revoke_transfer(rxreq->rxreq_gntref);
rxreq->rxreq_gntref = GRANT_INVALID_REF; switch (sc->sc_rx_feature) {
if (ma == 0) { case FEATURE_RX_COPY:
u_long pfn; xengnt_revoke_access(rxreq->rxreq_gntref);
struct xen_memory_reservation xenres; rxreq->rxreq_gntref = GRANT_INVALID_REF;
/* break;
* transfer not complete, we lost the page. case FEATURE_RX_FLIP:
* Get one from hypervisor ma = xengnt_revoke_transfer(
*/ rxreq->rxreq_gntref);
xenguest_handle(xenres.extent_start) = &pfn; rxreq->rxreq_gntref = GRANT_INVALID_REF;
xenres.nr_extents = 1; if (ma == 0) {
xenres.extent_order = 0; u_long pfn;
xenres.address_bits = 31; struct xen_memory_reservation xenres;
xenres.domid = DOMID_SELF; /*
if (HYPERVISOR_memory_op( * transfer not complete, we lost the page.
XENMEM_increase_reservation, &xenres) < 0) { * Get one from hypervisor
panic("xennet_free_rx_buffer: " */
"can't get memory back"); xenguest_handle(xenres.extent_start) = &pfn;
xenres.nr_extents = 1;
xenres.extent_order = 0;
xenres.address_bits = 31;
xenres.domid = DOMID_SELF;
if (HYPERVISOR_memory_op(
XENMEM_increase_reservation, &xenres) < 0) {
panic("xennet_free_rx_buffer: "
"can't get memory back");
}
ma = pfn;
KASSERT(ma != 0);
} }
ma = pfn; pa = rxreq->rxreq_pa;
KASSERT(ma != 0); va = rxreq->rxreq_va;
/* remap the page */
mmu[0].ptr = (ma << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
mmu[0].val = ((pa - XPMAP_OFFSET) >> PAGE_SHIFT);
MULTI_update_va_mapping(&mcl[0], va,
(ma << PAGE_SHIFT) | PG_V | PG_KW,
UVMF_TLB_FLUSH|UVMF_ALL);
xpmap_phys_to_machine_mapping[
(pa - XPMAP_OFFSET) >> PAGE_SHIFT] = ma;
mcl[1].op = __HYPERVISOR_mmu_update;
mcl[1].args[0] = (unsigned long)mmu;
mcl[1].args[1] = 1;
mcl[1].args[2] = 0;
mcl[1].args[3] = DOMID_SELF;
HYPERVISOR_multicall(mcl, 2);
break;
default:
panic("%s: unsupported RX feature mode: %ld\n",
__func__, sc->sc_rx_feature);
} }
pa = rxreq->rxreq_pa;
va = rxreq->rxreq_va;
/* remap the page */
mmu[0].ptr = (ma << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
mmu[0].val = ((pa - XPMAP_OFFSET) >> PAGE_SHIFT);
MULTI_update_va_mapping(&mcl[0], va,
(ma << PAGE_SHIFT) | PG_V | PG_KW,
UVMF_TLB_FLUSH|UVMF_ALL);
xpmap_phys_to_machine_mapping[
(pa - XPMAP_OFFSET) >> PAGE_SHIFT] = ma;
mcl[1].op = __HYPERVISOR_mmu_update;
mcl[1].args[0] = (unsigned long)mmu;
mcl[1].args[1] = 1;
mcl[1].args[2] = 0;
mcl[1].args[3] = DOMID_SELF;
HYPERVISOR_multicall(mcl, 2);
} }
} }
@ -820,41 +884,58 @@ again:
req = &sc->sc_rxreqs[rx->id]; req = &sc->sc_rxreqs[rx->id];
KASSERT(req->rxreq_gntref != GRANT_INVALID_REF); KASSERT(req->rxreq_gntref != GRANT_INVALID_REF);
KASSERT(req->rxreq_id == rx->id); KASSERT(req->rxreq_id == rx->id);
ma = xengnt_revoke_transfer(req->rxreq_gntref);
if (ma == 0) { ma = 0;
DPRINTFN(XEDB_EVENT, ("xennet_handler ma == 0\n")); switch (sc->sc_rx_feature) {
/* case FEATURE_RX_COPY:
* the remote could't send us a packet. xengnt_revoke_access(req->rxreq_gntref);
* we can't free this rxreq as no page will be mapped break;
* here. Instead give it back immediatly to backend. case FEATURE_RX_FLIP:
*/ ma = xengnt_revoke_transfer(req->rxreq_gntref);
ifp->if_ierrors++; if (ma == 0) {
RING_GET_REQUEST(&sc->sc_rx_ring, DPRINTFN(XEDB_EVENT, ("xennet_handler ma == 0\n"));
sc->sc_rx_ring.req_prod_pvt)->id = req->rxreq_id; /*
RING_GET_REQUEST(&sc->sc_rx_ring, * the remote could't send us a packet.
sc->sc_rx_ring.req_prod_pvt)->gref = * we can't free this rxreq as no page will be mapped
req->rxreq_gntref; * here. Instead give it back immediatly to backend.
sc->sc_rx_ring.req_prod_pvt++; */
RING_PUSH_REQUESTS(&sc->sc_rx_ring); ifp->if_ierrors++;
continue; RING_GET_REQUEST(&sc->sc_rx_ring,
sc->sc_rx_ring.req_prod_pvt)->id = req->rxreq_id;
RING_GET_REQUEST(&sc->sc_rx_ring,
sc->sc_rx_ring.req_prod_pvt)->gref =
req->rxreq_gntref;
sc->sc_rx_ring.req_prod_pvt++;
RING_PUSH_REQUESTS(&sc->sc_rx_ring);
continue;
}
break;
default:
panic("%s: unsupported RX feature mode: %ld\n",
__func__, sc->sc_rx_feature);
} }
req->rxreq_gntref = GRANT_INVALID_REF; req->rxreq_gntref = GRANT_INVALID_REF;
pa = req->rxreq_pa; pa = req->rxreq_pa;
va = req->rxreq_va; va = req->rxreq_va;
/* remap the page */
mmu[0].ptr = (ma << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; if (sc->sc_rx_feature == FEATURE_RX_FLIP) {
mmu[0].val = ((pa - XPMAP_OFFSET) >> PAGE_SHIFT); /* remap the page */
MULTI_update_va_mapping(&mcl[0], va, mmu[0].ptr = (ma << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
(ma << PAGE_SHIFT) | PG_V | PG_KW, UVMF_TLB_FLUSH|UVMF_ALL); mmu[0].val = ((pa - XPMAP_OFFSET) >> PAGE_SHIFT);
xpmap_phys_to_machine_mapping[ MULTI_update_va_mapping(&mcl[0], va,
(pa - XPMAP_OFFSET) >> PAGE_SHIFT] = ma; (ma << PAGE_SHIFT) | PG_V | PG_KW, UVMF_TLB_FLUSH|UVMF_ALL);
mcl[1].op = __HYPERVISOR_mmu_update; xpmap_phys_to_machine_mapping[
mcl[1].args[0] = (unsigned long)mmu; (pa - XPMAP_OFFSET) >> PAGE_SHIFT] = ma;
mcl[1].args[1] = 1; mcl[1].op = __HYPERVISOR_mmu_update;
mcl[1].args[2] = 0; mcl[1].args[0] = (unsigned long)mmu;
mcl[1].args[3] = DOMID_SELF; mcl[1].args[1] = 1;
HYPERVISOR_multicall(mcl, 2); mcl[1].args[2] = 0;
mcl[1].args[3] = DOMID_SELF;
HYPERVISOR_multicall(mcl, 2);
}
pktp = (void *)(va + rx->offset); pktp = (void *)(va + rx->offset);
#ifdef XENNET_DEBUG_DUMP #ifdef XENNET_DEBUG_DUMP
xennet_hex_dump(pktp, rx->status, "r", rx->id); xennet_hex_dump(pktp, rx->status, "r", rx->id);