-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1 iQEcBAABAgAGBQJlB/SLAAoJEO8Ells5jWIR7EQH/1kAbxHcSGJXDOgQAXJ/rOZi UKn3ugJzD0Hxd4Xz8cvdVLM+9/JoEEOK1uB+NIG7Ask/gA5D7eUYzaLtp1OJ8VNO mamfKmn3EIBWJoLSHH19TKzfW2tGMJHQ0Nj+sbDQRkK5f2c7hwLTRXa1EmlJd4dB VoVzX4OiJtrQyv4OVmpP/PSETXJDvYYX/DNcRl9/3ccKtQW/wVDI3YzrMzXrsgyc w9ItJi8k+19mVH6RgQwciqRvTbVMdzkOxqvU//LY0TxnjsHfbyHr+KlNAa2WTY2N QgpAlMZhHqUG6/XXAs0o2VEtA66zmw932Xfy/CZUEcdGWfkG/9CEVfbuT4CKGY4= =tF7K -----END PGP SIGNATURE----- Merge tag 'net-pull-request' of https://github.com/jasowang/qemu into staging # -----BEGIN PGP SIGNATURE----- # Version: GnuPG v1 # # iQEcBAABAgAGBQJlB/SLAAoJEO8Ells5jWIR7EQH/1kAbxHcSGJXDOgQAXJ/rOZi # UKn3ugJzD0Hxd4Xz8cvdVLM+9/JoEEOK1uB+NIG7Ask/gA5D7eUYzaLtp1OJ8VNO # mamfKmn3EIBWJoLSHH19TKzfW2tGMJHQ0Nj+sbDQRkK5f2c7hwLTRXa1EmlJd4dB # VoVzX4OiJtrQyv4OVmpP/PSETXJDvYYX/DNcRl9/3ccKtQW/wVDI3YzrMzXrsgyc # w9ItJi8k+19mVH6RgQwciqRvTbVMdzkOxqvU//LY0TxnjsHfbyHr+KlNAa2WTY2N # QgpAlMZhHqUG6/XXAs0o2VEtA66zmw932Xfy/CZUEcdGWfkG/9CEVfbuT4CKGY4= # =tF7K # -----END PGP SIGNATURE----- # gpg: Signature made Mon 18 Sep 2023 02:56:11 EDT # gpg: using RSA key EF04965B398D6211 # gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>" [full] # Primary key fingerprint: 215D 46F4 8246 689E C77F 3562 EF04 965B 398D 6211 * tag 'net-pull-request' of https://github.com/jasowang/qemu: net/tap: Avoid variable-length array net/dump: Avoid variable length array hw/net/rocker: Avoid variable length array hw/net/fsl_etsec/rings.c: Avoid variable length array net: add initial support for AF_XDP network backend tests: bump libvirt-ci for libasan and libxdp e1000e: rename e1000e_ba_state and e1000e_write_hdr_to_rx_buffers igb: packet-split descriptors support igb: add IPv6 extended headers traffic detection igb: RX payload guest writting refactoring igb: RX descriptors guest writting refactoring igb: rename E1000E_RingInfo_st igb: remove TCP ACK detection virtio-net: Add support for USO features virtio-net: Add USO flags to vhost support. tap: Add check for USO features tap: Add USO support to tap device. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
commit
dd0c84983d
@ -2957,6 +2957,10 @@ W: http://info.iet.unipi.it/~luigi/netmap/
|
||||
S: Maintained
|
||||
F: net/netmap.c
|
||||
|
||||
AF_XDP network backend
|
||||
R: Ilya Maximets <i.maximets@ovn.org>
|
||||
F: net/af-xdp.c
|
||||
|
||||
Host Memory Backends
|
||||
M: David Hildenbrand <david@redhat.com>
|
||||
M: Igor Mammedov <imammedo@redhat.com>
|
||||
|
@ -1296,6 +1296,9 @@ ERST
|
||||
.name = "netdev_add",
|
||||
.args_type = "netdev:O",
|
||||
.params = "[user|tap|socket|stream|dgram|vde|bridge|hubport|netmap|vhost-user"
|
||||
#ifdef CONFIG_AF_XDP
|
||||
"|af-xdp"
|
||||
#endif
|
||||
#ifdef CONFIG_VMNET
|
||||
"|vmnet-host|vmnet-shared|vmnet-bridged"
|
||||
#endif
|
||||
|
@ -38,6 +38,7 @@
|
||||
#include "exec/confidential-guest-support.h"
|
||||
#include "hw/virtio/virtio.h"
|
||||
#include "hw/virtio/virtio-pci.h"
|
||||
#include "hw/virtio/virtio-net.h"
|
||||
|
||||
GlobalProperty hw_compat_8_1[] = {};
|
||||
const size_t hw_compat_8_1_len = G_N_ELEMENTS(hw_compat_8_1);
|
||||
@ -45,6 +46,9 @@ const size_t hw_compat_8_1_len = G_N_ELEMENTS(hw_compat_8_1);
|
||||
GlobalProperty hw_compat_8_0[] = {
|
||||
{ "migration", "multifd-flush-after-each-section", "on"},
|
||||
{ TYPE_PCI_DEVICE, "x-pcie-ari-nextfn-1", "on" },
|
||||
{ TYPE_VIRTIO_NET, "host_uso", "off"},
|
||||
{ TYPE_VIRTIO_NET, "guest_uso4", "off"},
|
||||
{ TYPE_VIRTIO_NET, "guest_uso6", "off"},
|
||||
};
|
||||
const size_t hw_compat_8_0_len = G_N_ELEMENTS(hw_compat_8_0);
|
||||
|
||||
|
@ -810,24 +810,24 @@ e1000e_txdesc_writeback(E1000ECore *core, dma_addr_t base,
|
||||
return e1000e_tx_wb_interrupt_cause(core, queue_idx);
|
||||
}
|
||||
|
||||
typedef struct E1000E_RingInfo_st {
|
||||
typedef struct E1000ERingInfo {
|
||||
int dbah;
|
||||
int dbal;
|
||||
int dlen;
|
||||
int dh;
|
||||
int dt;
|
||||
int idx;
|
||||
} E1000E_RingInfo;
|
||||
} E1000ERingInfo;
|
||||
|
||||
static inline bool
|
||||
e1000e_ring_empty(E1000ECore *core, const E1000E_RingInfo *r)
|
||||
e1000e_ring_empty(E1000ECore *core, const E1000ERingInfo *r)
|
||||
{
|
||||
return core->mac[r->dh] == core->mac[r->dt] ||
|
||||
core->mac[r->dt] >= core->mac[r->dlen] / E1000_RING_DESC_LEN;
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
e1000e_ring_base(E1000ECore *core, const E1000E_RingInfo *r)
|
||||
e1000e_ring_base(E1000ECore *core, const E1000ERingInfo *r)
|
||||
{
|
||||
uint64_t bah = core->mac[r->dbah];
|
||||
uint64_t bal = core->mac[r->dbal];
|
||||
@ -836,13 +836,13 @@ e1000e_ring_base(E1000ECore *core, const E1000E_RingInfo *r)
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
e1000e_ring_head_descr(E1000ECore *core, const E1000E_RingInfo *r)
|
||||
e1000e_ring_head_descr(E1000ECore *core, const E1000ERingInfo *r)
|
||||
{
|
||||
return e1000e_ring_base(core, r) + E1000_RING_DESC_LEN * core->mac[r->dh];
|
||||
}
|
||||
|
||||
static inline void
|
||||
e1000e_ring_advance(E1000ECore *core, const E1000E_RingInfo *r, uint32_t count)
|
||||
e1000e_ring_advance(E1000ECore *core, const E1000ERingInfo *r, uint32_t count)
|
||||
{
|
||||
core->mac[r->dh] += count;
|
||||
|
||||
@ -852,7 +852,7 @@ e1000e_ring_advance(E1000ECore *core, const E1000E_RingInfo *r, uint32_t count)
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
e1000e_ring_free_descr_num(E1000ECore *core, const E1000E_RingInfo *r)
|
||||
e1000e_ring_free_descr_num(E1000ECore *core, const E1000ERingInfo *r)
|
||||
{
|
||||
trace_e1000e_ring_free_space(r->idx, core->mac[r->dlen],
|
||||
core->mac[r->dh], core->mac[r->dt]);
|
||||
@ -871,19 +871,19 @@ e1000e_ring_free_descr_num(E1000ECore *core, const E1000E_RingInfo *r)
|
||||
}
|
||||
|
||||
static inline bool
|
||||
e1000e_ring_enabled(E1000ECore *core, const E1000E_RingInfo *r)
|
||||
e1000e_ring_enabled(E1000ECore *core, const E1000ERingInfo *r)
|
||||
{
|
||||
return core->mac[r->dlen] > 0;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
e1000e_ring_len(E1000ECore *core, const E1000E_RingInfo *r)
|
||||
e1000e_ring_len(E1000ECore *core, const E1000ERingInfo *r)
|
||||
{
|
||||
return core->mac[r->dlen];
|
||||
}
|
||||
|
||||
typedef struct E1000E_TxRing_st {
|
||||
const E1000E_RingInfo *i;
|
||||
const E1000ERingInfo *i;
|
||||
struct e1000e_tx *tx;
|
||||
} E1000E_TxRing;
|
||||
|
||||
@ -896,7 +896,7 @@ e1000e_mq_queue_idx(int base_reg_idx, int reg_idx)
|
||||
static inline void
|
||||
e1000e_tx_ring_init(E1000ECore *core, E1000E_TxRing *txr, int idx)
|
||||
{
|
||||
static const E1000E_RingInfo i[E1000E_NUM_QUEUES] = {
|
||||
static const E1000ERingInfo i[E1000E_NUM_QUEUES] = {
|
||||
{ TDBAH, TDBAL, TDLEN, TDH, TDT, 0 },
|
||||
{ TDBAH1, TDBAL1, TDLEN1, TDH1, TDT1, 1 }
|
||||
};
|
||||
@ -908,13 +908,13 @@ e1000e_tx_ring_init(E1000ECore *core, E1000E_TxRing *txr, int idx)
|
||||
}
|
||||
|
||||
typedef struct E1000E_RxRing_st {
|
||||
const E1000E_RingInfo *i;
|
||||
const E1000ERingInfo *i;
|
||||
} E1000E_RxRing;
|
||||
|
||||
static inline void
|
||||
e1000e_rx_ring_init(E1000ECore *core, E1000E_RxRing *rxr, int idx)
|
||||
{
|
||||
static const E1000E_RingInfo i[E1000E_NUM_QUEUES] = {
|
||||
static const E1000ERingInfo i[E1000E_NUM_QUEUES] = {
|
||||
{ RDBAH0, RDBAL0, RDLEN0, RDH0, RDT0, 0 },
|
||||
{ RDBAH1, RDBAL1, RDLEN1, RDH1, RDT1, 1 }
|
||||
};
|
||||
@ -930,7 +930,7 @@ e1000e_start_xmit(E1000ECore *core, const E1000E_TxRing *txr)
|
||||
dma_addr_t base;
|
||||
struct e1000_tx_desc desc;
|
||||
bool ide = false;
|
||||
const E1000E_RingInfo *txi = txr->i;
|
||||
const E1000ERingInfo *txi = txr->i;
|
||||
uint32_t cause = E1000_ICS_TXQE;
|
||||
|
||||
if (!(core->mac[TCTL] & E1000_TCTL_EN)) {
|
||||
@ -960,7 +960,7 @@ e1000e_start_xmit(E1000ECore *core, const E1000E_TxRing *txr)
|
||||
}
|
||||
|
||||
static bool
|
||||
e1000e_has_rxbufs(E1000ECore *core, const E1000E_RingInfo *r,
|
||||
e1000e_has_rxbufs(E1000ECore *core, const E1000ERingInfo *r,
|
||||
size_t total_size)
|
||||
{
|
||||
uint32_t bufs = e1000e_ring_free_descr_num(core, r);
|
||||
@ -1397,15 +1397,15 @@ e1000e_pci_dma_write_rx_desc(E1000ECore *core, dma_addr_t addr,
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct e1000e_ba_state_st {
|
||||
typedef struct E1000EBAState {
|
||||
uint16_t written[MAX_PS_BUFFERS];
|
||||
uint8_t cur_idx;
|
||||
} e1000e_ba_state;
|
||||
} E1000EBAState;
|
||||
|
||||
static inline void
|
||||
e1000e_write_hdr_to_rx_buffers(E1000ECore *core,
|
||||
e1000e_write_hdr_frag_to_rx_buffers(E1000ECore *core,
|
||||
hwaddr ba[MAX_PS_BUFFERS],
|
||||
e1000e_ba_state *bastate,
|
||||
E1000EBAState *bastate,
|
||||
const char *data,
|
||||
dma_addr_t data_len)
|
||||
{
|
||||
@ -1418,9 +1418,9 @@ e1000e_write_hdr_to_rx_buffers(E1000ECore *core,
|
||||
}
|
||||
|
||||
static void
|
||||
e1000e_write_to_rx_buffers(E1000ECore *core,
|
||||
e1000e_write_payload_frag_to_rx_buffers(E1000ECore *core,
|
||||
hwaddr ba[MAX_PS_BUFFERS],
|
||||
e1000e_ba_state *bastate,
|
||||
E1000EBAState *bastate,
|
||||
const char *data,
|
||||
dma_addr_t data_len)
|
||||
{
|
||||
@ -1460,7 +1460,7 @@ e1000e_update_rx_stats(E1000ECore *core, size_t pkt_size, size_t pkt_fcs_size)
|
||||
}
|
||||
|
||||
static inline bool
|
||||
e1000e_rx_descr_threshold_hit(E1000ECore *core, const E1000E_RingInfo *rxi)
|
||||
e1000e_rx_descr_threshold_hit(E1000ECore *core, const E1000ERingInfo *rxi)
|
||||
{
|
||||
return e1000e_ring_free_descr_num(core, rxi) ==
|
||||
e1000e_ring_len(core, rxi) >> core->rxbuf_min_shift;
|
||||
@ -1521,7 +1521,7 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
|
||||
struct iovec *iov = net_rx_pkt_get_iovec(pkt);
|
||||
size_t size = net_rx_pkt_get_total_len(pkt);
|
||||
size_t total_size = size + e1000x_fcs_len(core->mac);
|
||||
const E1000E_RingInfo *rxi;
|
||||
const E1000ERingInfo *rxi;
|
||||
size_t ps_hdr_len = 0;
|
||||
bool do_ps = e1000e_do_ps(core, pkt, &ps_hdr_len);
|
||||
bool is_first = true;
|
||||
@ -1530,7 +1530,7 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
|
||||
|
||||
do {
|
||||
hwaddr ba[MAX_PS_BUFFERS];
|
||||
e1000e_ba_state bastate = { { 0 } };
|
||||
E1000EBAState bastate = { { 0 } };
|
||||
bool is_last = false;
|
||||
|
||||
desc_size = total_size - desc_offset;
|
||||
@ -1568,8 +1568,10 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
|
||||
iov_copy = MIN(ps_hdr_len - ps_hdr_copied,
|
||||
iov->iov_len - iov_ofs);
|
||||
|
||||
e1000e_write_hdr_to_rx_buffers(core, ba, &bastate,
|
||||
iov->iov_base, iov_copy);
|
||||
e1000e_write_hdr_frag_to_rx_buffers(core, ba,
|
||||
&bastate,
|
||||
iov->iov_base,
|
||||
iov_copy);
|
||||
|
||||
copy_size -= iov_copy;
|
||||
ps_hdr_copied += iov_copy;
|
||||
@ -1585,7 +1587,7 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
|
||||
} else {
|
||||
/* Leave buffer 0 of each descriptor except first */
|
||||
/* empty as per spec 7.1.5.1 */
|
||||
e1000e_write_hdr_to_rx_buffers(core, ba, &bastate,
|
||||
e1000e_write_hdr_frag_to_rx_buffers(core, ba, &bastate,
|
||||
NULL, 0);
|
||||
}
|
||||
}
|
||||
@ -1594,8 +1596,10 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
|
||||
while (copy_size) {
|
||||
iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
|
||||
|
||||
e1000e_write_to_rx_buffers(core, ba, &bastate,
|
||||
iov->iov_base + iov_ofs, iov_copy);
|
||||
e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate,
|
||||
iov->iov_base +
|
||||
iov_ofs,
|
||||
iov_copy);
|
||||
|
||||
copy_size -= iov_copy;
|
||||
iov_ofs += iov_copy;
|
||||
@ -1607,7 +1611,7 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
|
||||
|
||||
if (desc_offset + desc_size >= total_size) {
|
||||
/* Simulate FCS checksum presence in the last descriptor */
|
||||
e1000e_write_to_rx_buffers(core, ba, &bastate,
|
||||
e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate,
|
||||
(const char *) &fcs_pad, e1000x_fcs_len(core->mac));
|
||||
}
|
||||
}
|
||||
@ -2852,7 +2856,7 @@ e1000e_update_rx_offloads(E1000ECore *core)
|
||||
|
||||
if (core->has_vnet) {
|
||||
qemu_set_offload(qemu_get_queue(core->owner_nic)->peer,
|
||||
cso_state, 0, 0, 0, 0);
|
||||
cso_state, 0, 0, 0, 0, 0, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -372,6 +372,12 @@ void etsec_walk_tx_ring(eTSEC *etsec, int ring_nbr)
|
||||
etsec->regs[TSTAT].value |= 1 << (31 - ring_nbr);
|
||||
}
|
||||
|
||||
/*
|
||||
* rx_init_frame() ensures we never do more padding than this
|
||||
* (checksum plus minimum data packet size)
|
||||
*/
|
||||
#define MAX_RX_PADDING 64
|
||||
|
||||
static void fill_rx_bd(eTSEC *etsec,
|
||||
eTSEC_rxtx_bd *bd,
|
||||
const uint8_t **buf,
|
||||
@ -380,9 +386,11 @@ static void fill_rx_bd(eTSEC *etsec,
|
||||
uint16_t to_write;
|
||||
hwaddr bufptr = bd->bufptr +
|
||||
((hwaddr)(etsec->regs[TBDBPH].value & 0xF) << 32);
|
||||
uint8_t padd[etsec->rx_padding];
|
||||
uint8_t padd[MAX_RX_PADDING];
|
||||
uint8_t rem;
|
||||
|
||||
assert(etsec->rx_padding <= MAX_RX_PADDING);
|
||||
|
||||
RING_DEBUG("eTSEC fill Rx buffer @ 0x%016" HWADDR_PRIx
|
||||
" size:%zu(padding + crc:%u) + fcb:%u\n",
|
||||
bufptr, *size, etsec->rx_padding, etsec->rx_fcb_size);
|
||||
@ -426,7 +434,7 @@ static void fill_rx_bd(eTSEC *etsec,
|
||||
rem = MIN(etsec->regs[MRBLR].value - bd->length, etsec->rx_padding);
|
||||
|
||||
if (rem > 0) {
|
||||
memset(padd, 0x0, sizeof(padd));
|
||||
memset(padd, 0x0, rem);
|
||||
etsec->rx_padding -= rem;
|
||||
*size -= rem;
|
||||
bd->length += rem;
|
||||
|
@ -267,6 +267,29 @@ igb_rx_use_legacy_descriptor(IGBCore *core)
|
||||
return false;
|
||||
}
|
||||
|
||||
typedef struct E1000ERingInfo {
|
||||
int dbah;
|
||||
int dbal;
|
||||
int dlen;
|
||||
int dh;
|
||||
int dt;
|
||||
int idx;
|
||||
} E1000ERingInfo;
|
||||
|
||||
static uint32_t
|
||||
igb_rx_queue_desctyp_get(IGBCore *core, const E1000ERingInfo *r)
|
||||
{
|
||||
return core->mac[E1000_SRRCTL(r->idx) >> 2] & E1000_SRRCTL_DESCTYPE_MASK;
|
||||
}
|
||||
|
||||
static bool
|
||||
igb_rx_use_ps_descriptor(IGBCore *core, const E1000ERingInfo *r)
|
||||
{
|
||||
uint32_t desctyp = igb_rx_queue_desctyp_get(core, r);
|
||||
return desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT ||
|
||||
desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
igb_rss_enabled(IGBCore *core)
|
||||
{
|
||||
@ -694,24 +717,15 @@ static uint32_t igb_rx_wb_eic(IGBCore *core, int queue_idx)
|
||||
return (ent & E1000_IVAR_VALID) ? BIT(ent & 0x1f) : 0;
|
||||
}
|
||||
|
||||
typedef struct E1000E_RingInfo_st {
|
||||
int dbah;
|
||||
int dbal;
|
||||
int dlen;
|
||||
int dh;
|
||||
int dt;
|
||||
int idx;
|
||||
} E1000E_RingInfo;
|
||||
|
||||
static inline bool
|
||||
igb_ring_empty(IGBCore *core, const E1000E_RingInfo *r)
|
||||
igb_ring_empty(IGBCore *core, const E1000ERingInfo *r)
|
||||
{
|
||||
return core->mac[r->dh] == core->mac[r->dt] ||
|
||||
core->mac[r->dt] >= core->mac[r->dlen] / E1000_RING_DESC_LEN;
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
igb_ring_base(IGBCore *core, const E1000E_RingInfo *r)
|
||||
igb_ring_base(IGBCore *core, const E1000ERingInfo *r)
|
||||
{
|
||||
uint64_t bah = core->mac[r->dbah];
|
||||
uint64_t bal = core->mac[r->dbal];
|
||||
@ -720,13 +734,13 @@ igb_ring_base(IGBCore *core, const E1000E_RingInfo *r)
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
igb_ring_head_descr(IGBCore *core, const E1000E_RingInfo *r)
|
||||
igb_ring_head_descr(IGBCore *core, const E1000ERingInfo *r)
|
||||
{
|
||||
return igb_ring_base(core, r) + E1000_RING_DESC_LEN * core->mac[r->dh];
|
||||
}
|
||||
|
||||
static inline void
|
||||
igb_ring_advance(IGBCore *core, const E1000E_RingInfo *r, uint32_t count)
|
||||
igb_ring_advance(IGBCore *core, const E1000ERingInfo *r, uint32_t count)
|
||||
{
|
||||
core->mac[r->dh] += count;
|
||||
|
||||
@ -736,7 +750,7 @@ igb_ring_advance(IGBCore *core, const E1000E_RingInfo *r, uint32_t count)
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
igb_ring_free_descr_num(IGBCore *core, const E1000E_RingInfo *r)
|
||||
igb_ring_free_descr_num(IGBCore *core, const E1000ERingInfo *r)
|
||||
{
|
||||
trace_e1000e_ring_free_space(r->idx, core->mac[r->dlen],
|
||||
core->mac[r->dh], core->mac[r->dt]);
|
||||
@ -755,13 +769,13 @@ igb_ring_free_descr_num(IGBCore *core, const E1000E_RingInfo *r)
|
||||
}
|
||||
|
||||
static inline bool
|
||||
igb_ring_enabled(IGBCore *core, const E1000E_RingInfo *r)
|
||||
igb_ring_enabled(IGBCore *core, const E1000ERingInfo *r)
|
||||
{
|
||||
return core->mac[r->dlen] > 0;
|
||||
}
|
||||
|
||||
typedef struct IGB_TxRing_st {
|
||||
const E1000E_RingInfo *i;
|
||||
const E1000ERingInfo *i;
|
||||
struct igb_tx *tx;
|
||||
} IGB_TxRing;
|
||||
|
||||
@ -774,7 +788,7 @@ igb_mq_queue_idx(int base_reg_idx, int reg_idx)
|
||||
static inline void
|
||||
igb_tx_ring_init(IGBCore *core, IGB_TxRing *txr, int idx)
|
||||
{
|
||||
static const E1000E_RingInfo i[IGB_NUM_QUEUES] = {
|
||||
static const E1000ERingInfo i[IGB_NUM_QUEUES] = {
|
||||
{ TDBAH0, TDBAL0, TDLEN0, TDH0, TDT0, 0 },
|
||||
{ TDBAH1, TDBAL1, TDLEN1, TDH1, TDT1, 1 },
|
||||
{ TDBAH2, TDBAL2, TDLEN2, TDH2, TDT2, 2 },
|
||||
@ -800,13 +814,13 @@ igb_tx_ring_init(IGBCore *core, IGB_TxRing *txr, int idx)
|
||||
}
|
||||
|
||||
typedef struct E1000E_RxRing_st {
|
||||
const E1000E_RingInfo *i;
|
||||
const E1000ERingInfo *i;
|
||||
} E1000E_RxRing;
|
||||
|
||||
static inline void
|
||||
igb_rx_ring_init(IGBCore *core, E1000E_RxRing *rxr, int idx)
|
||||
{
|
||||
static const E1000E_RingInfo i[IGB_NUM_QUEUES] = {
|
||||
static const E1000ERingInfo i[IGB_NUM_QUEUES] = {
|
||||
{ RDBAH0, RDBAL0, RDLEN0, RDH0, RDT0, 0 },
|
||||
{ RDBAH1, RDBAL1, RDLEN1, RDH1, RDT1, 1 },
|
||||
{ RDBAH2, RDBAL2, RDLEN2, RDH2, RDT2, 2 },
|
||||
@ -833,7 +847,7 @@ igb_rx_ring_init(IGBCore *core, E1000E_RxRing *rxr, int idx)
|
||||
static uint32_t
|
||||
igb_txdesc_writeback(IGBCore *core, dma_addr_t base,
|
||||
union e1000_adv_tx_desc *tx_desc,
|
||||
const E1000E_RingInfo *txi)
|
||||
const E1000ERingInfo *txi)
|
||||
{
|
||||
PCIDevice *d;
|
||||
uint32_t cmd_type_len = le32_to_cpu(tx_desc->read.cmd_type_len);
|
||||
@ -866,7 +880,7 @@ igb_txdesc_writeback(IGBCore *core, dma_addr_t base,
|
||||
}
|
||||
|
||||
static inline bool
|
||||
igb_tx_enabled(IGBCore *core, const E1000E_RingInfo *txi)
|
||||
igb_tx_enabled(IGBCore *core, const E1000ERingInfo *txi)
|
||||
{
|
||||
bool vmdq = core->mac[MRQC] & 1;
|
||||
uint16_t qn = txi->idx;
|
||||
@ -883,7 +897,7 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
|
||||
PCIDevice *d;
|
||||
dma_addr_t base;
|
||||
union e1000_adv_tx_desc desc;
|
||||
const E1000E_RingInfo *txi = txr->i;
|
||||
const E1000ERingInfo *txi = txr->i;
|
||||
uint32_t eic = 0;
|
||||
|
||||
if (!igb_tx_enabled(core, txi)) {
|
||||
@ -918,7 +932,7 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
igb_rxbufsize(IGBCore *core, const E1000E_RingInfo *r)
|
||||
igb_rxbufsize(IGBCore *core, const E1000ERingInfo *r)
|
||||
{
|
||||
uint32_t srrctl = core->mac[E1000_SRRCTL(r->idx) >> 2];
|
||||
uint32_t bsizepkt = srrctl & E1000_SRRCTL_BSIZEPKT_MASK;
|
||||
@ -930,7 +944,7 @@ igb_rxbufsize(IGBCore *core, const E1000E_RingInfo *r)
|
||||
}
|
||||
|
||||
static bool
|
||||
igb_has_rxbufs(IGBCore *core, const E1000E_RingInfo *r, size_t total_size)
|
||||
igb_has_rxbufs(IGBCore *core, const E1000ERingInfo *r, size_t total_size)
|
||||
{
|
||||
uint32_t bufs = igb_ring_free_descr_num(core, r);
|
||||
uint32_t bufsize = igb_rxbufsize(core, r);
|
||||
@ -941,6 +955,14 @@ igb_has_rxbufs(IGBCore *core, const E1000E_RingInfo *r, size_t total_size)
|
||||
bufsize;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
igb_rxhdrbufsize(IGBCore *core, const E1000ERingInfo *r)
|
||||
{
|
||||
uint32_t srrctl = core->mac[E1000_SRRCTL(r->idx) >> 2];
|
||||
return (srrctl & E1000_SRRCTL_BSIZEHDRSIZE_MASK) >>
|
||||
E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
|
||||
}
|
||||
|
||||
void
|
||||
igb_start_recv(IGBCore *core)
|
||||
{
|
||||
@ -1225,21 +1247,77 @@ igb_read_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc,
|
||||
}
|
||||
|
||||
static inline void
|
||||
igb_read_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
|
||||
igb_read_adv_rx_single_buf_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
|
||||
hwaddr *buff_addr)
|
||||
{
|
||||
*buff_addr = le64_to_cpu(desc->read.pkt_addr);
|
||||
}
|
||||
|
||||
static inline void
|
||||
igb_read_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc,
|
||||
igb_read_adv_rx_split_buf_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
|
||||
hwaddr *buff_addr)
|
||||
{
|
||||
if (igb_rx_use_legacy_descriptor(core)) {
|
||||
igb_read_lgcy_rx_descr(core, &desc->legacy, buff_addr);
|
||||
} else {
|
||||
igb_read_adv_rx_descr(core, &desc->adv, buff_addr);
|
||||
buff_addr[0] = le64_to_cpu(desc->read.hdr_addr);
|
||||
buff_addr[1] = le64_to_cpu(desc->read.pkt_addr);
|
||||
}
|
||||
|
||||
typedef struct IGBBAState {
|
||||
uint16_t written[IGB_MAX_PS_BUFFERS];
|
||||
uint8_t cur_idx;
|
||||
} IGBBAState;
|
||||
|
||||
typedef struct IGBSplitDescriptorData {
|
||||
bool sph;
|
||||
bool hbo;
|
||||
size_t hdr_len;
|
||||
} IGBSplitDescriptorData;
|
||||
|
||||
typedef struct IGBPacketRxDMAState {
|
||||
size_t size;
|
||||
size_t total_size;
|
||||
size_t ps_hdr_len;
|
||||
size_t desc_size;
|
||||
size_t desc_offset;
|
||||
uint32_t rx_desc_packet_buf_size;
|
||||
uint32_t rx_desc_header_buf_size;
|
||||
struct iovec *iov;
|
||||
size_t iov_ofs;
|
||||
bool do_ps;
|
||||
bool is_first;
|
||||
IGBBAState bastate;
|
||||
hwaddr ba[IGB_MAX_PS_BUFFERS];
|
||||
IGBSplitDescriptorData ps_desc_data;
|
||||
} IGBPacketRxDMAState;
|
||||
|
||||
static inline void
|
||||
igb_read_rx_descr(IGBCore *core,
|
||||
union e1000_rx_desc_union *desc,
|
||||
IGBPacketRxDMAState *pdma_st,
|
||||
const E1000ERingInfo *r)
|
||||
{
|
||||
uint32_t desc_type;
|
||||
|
||||
if (igb_rx_use_legacy_descriptor(core)) {
|
||||
igb_read_lgcy_rx_descr(core, &desc->legacy, &pdma_st->ba[1]);
|
||||
pdma_st->ba[0] = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
/* advanced header split descriptor */
|
||||
if (igb_rx_use_ps_descriptor(core, r)) {
|
||||
igb_read_adv_rx_split_buf_descr(core, &desc->adv, &pdma_st->ba[0]);
|
||||
return;
|
||||
}
|
||||
|
||||
/* descriptor replication modes not supported */
|
||||
desc_type = igb_rx_queue_desctyp_get(core, r);
|
||||
if (desc_type != E1000_SRRCTL_DESCTYPE_ADV_ONEBUF) {
|
||||
trace_igb_wrn_rx_desc_modes_not_supp(desc_type);
|
||||
}
|
||||
|
||||
/* advanced single buffer descriptor */
|
||||
igb_read_adv_rx_single_buf_descr(core, &desc->adv, &pdma_st->ba[1]);
|
||||
pdma_st->ba[0] = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
@ -1281,14 +1359,10 @@ igb_verify_csum_in_sw(IGBCore *core,
|
||||
}
|
||||
|
||||
static void
|
||||
igb_build_rx_metadata(IGBCore *core,
|
||||
igb_build_rx_metadata_common(IGBCore *core,
|
||||
struct NetRxPkt *pkt,
|
||||
bool is_eop,
|
||||
const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts,
|
||||
uint16_t *pkt_info, uint16_t *hdr_info,
|
||||
uint32_t *rss,
|
||||
uint32_t *status_flags,
|
||||
uint16_t *ip_id,
|
||||
uint16_t *vlan_tag)
|
||||
{
|
||||
struct virtio_net_hdr *vhdr;
|
||||
@ -1298,7 +1372,6 @@ igb_build_rx_metadata(IGBCore *core,
|
||||
*status_flags = E1000_RXD_STAT_DD;
|
||||
|
||||
/* No additional metadata needed for non-EOP descriptors */
|
||||
/* TODO: EOP apply only to status so don't skip whole function. */
|
||||
if (!is_eop) {
|
||||
goto func_exit;
|
||||
}
|
||||
@ -1315,64 +1388,6 @@ igb_build_rx_metadata(IGBCore *core,
|
||||
trace_e1000e_rx_metadata_vlan(*vlan_tag);
|
||||
}
|
||||
|
||||
/* Packet parsing results */
|
||||
if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) {
|
||||
if (rss_info->enabled) {
|
||||
*rss = cpu_to_le32(rss_info->hash);
|
||||
trace_igb_rx_metadata_rss(*rss);
|
||||
}
|
||||
} else if (hasip4) {
|
||||
*status_flags |= E1000_RXD_STAT_IPIDV;
|
||||
*ip_id = cpu_to_le16(net_rx_pkt_get_ip_id(pkt));
|
||||
trace_e1000e_rx_metadata_ip_id(*ip_id);
|
||||
}
|
||||
|
||||
if (l4hdr_proto == ETH_L4_HDR_PROTO_TCP && net_rx_pkt_is_tcp_ack(pkt)) {
|
||||
*status_flags |= E1000_RXD_STAT_ACK;
|
||||
trace_e1000e_rx_metadata_ack();
|
||||
}
|
||||
|
||||
if (pkt_info) {
|
||||
*pkt_info = rss_info->enabled ? rss_info->type : 0;
|
||||
|
||||
if (etqf < 8) {
|
||||
*pkt_info |= (BIT(11) | etqf) << 4;
|
||||
} else {
|
||||
if (hasip4) {
|
||||
*pkt_info |= E1000_ADVRXD_PKT_IP4;
|
||||
}
|
||||
|
||||
if (hasip6) {
|
||||
*pkt_info |= E1000_ADVRXD_PKT_IP6;
|
||||
}
|
||||
|
||||
switch (l4hdr_proto) {
|
||||
case ETH_L4_HDR_PROTO_TCP:
|
||||
*pkt_info |= E1000_ADVRXD_PKT_TCP;
|
||||
break;
|
||||
|
||||
case ETH_L4_HDR_PROTO_UDP:
|
||||
*pkt_info |= E1000_ADVRXD_PKT_UDP;
|
||||
break;
|
||||
|
||||
case ETH_L4_HDR_PROTO_SCTP:
|
||||
*pkt_info |= E1000_ADVRXD_PKT_SCTP;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (hdr_info) {
|
||||
*hdr_info = 0;
|
||||
}
|
||||
|
||||
if (ts) {
|
||||
*status_flags |= BIT(16);
|
||||
}
|
||||
|
||||
/* RX CSO information */
|
||||
if (hasip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_XSUM_DIS)) {
|
||||
trace_e1000e_rx_metadata_ipv6_sum_disabled();
|
||||
@ -1428,56 +1443,168 @@ func_exit:
|
||||
static inline void
|
||||
igb_write_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc,
|
||||
struct NetRxPkt *pkt,
|
||||
const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts,
|
||||
const E1000E_RSSInfo *rss_info,
|
||||
uint16_t length)
|
||||
{
|
||||
uint32_t status_flags, rss;
|
||||
uint16_t ip_id;
|
||||
uint32_t status_flags;
|
||||
|
||||
assert(!rss_info->enabled);
|
||||
desc->length = cpu_to_le16(length);
|
||||
desc->csum = 0;
|
||||
|
||||
igb_build_rx_metadata(core, pkt, pkt != NULL,
|
||||
rss_info, etqf, ts,
|
||||
NULL, NULL, &rss,
|
||||
&status_flags, &ip_id,
|
||||
memset(desc, 0, sizeof(*desc));
|
||||
desc->length = cpu_to_le16(length);
|
||||
igb_build_rx_metadata_common(core, pkt, pkt != NULL,
|
||||
&status_flags,
|
||||
&desc->special);
|
||||
|
||||
desc->errors = (uint8_t) (le32_to_cpu(status_flags) >> 24);
|
||||
desc->status = (uint8_t) le32_to_cpu(status_flags);
|
||||
}
|
||||
|
||||
static bool
|
||||
igb_rx_ps_descriptor_split_always(IGBCore *core, const E1000ERingInfo *r)
|
||||
{
|
||||
uint32_t desctyp = igb_rx_queue_desctyp_get(core, r);
|
||||
return desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
|
||||
}
|
||||
|
||||
static uint16_t
|
||||
igb_rx_desc_get_packet_type(IGBCore *core, struct NetRxPkt *pkt, uint16_t etqf)
|
||||
{
|
||||
uint16_t pkt_type;
|
||||
bool hasip4, hasip6;
|
||||
EthL4HdrProto l4hdr_proto;
|
||||
|
||||
if (etqf < 8) {
|
||||
pkt_type = BIT(11) | etqf;
|
||||
return pkt_type;
|
||||
}
|
||||
|
||||
net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
|
||||
|
||||
if (hasip6 && !(core->mac[RFCTL] & E1000_RFCTL_IPV6_DIS)) {
|
||||
eth_ip6_hdr_info *ip6hdr_info = net_rx_pkt_get_ip6_info(pkt);
|
||||
pkt_type = ip6hdr_info->has_ext_hdrs ? E1000_ADVRXD_PKT_IP6E :
|
||||
E1000_ADVRXD_PKT_IP6;
|
||||
} else if (hasip4) {
|
||||
pkt_type = E1000_ADVRXD_PKT_IP4;
|
||||
} else {
|
||||
pkt_type = 0;
|
||||
}
|
||||
|
||||
switch (l4hdr_proto) {
|
||||
case ETH_L4_HDR_PROTO_TCP:
|
||||
pkt_type |= E1000_ADVRXD_PKT_TCP;
|
||||
break;
|
||||
case ETH_L4_HDR_PROTO_UDP:
|
||||
pkt_type |= E1000_ADVRXD_PKT_UDP;
|
||||
break;
|
||||
case ETH_L4_HDR_PROTO_SCTP:
|
||||
pkt_type |= E1000_ADVRXD_PKT_SCTP;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return pkt_type;
|
||||
}
|
||||
|
||||
static inline void
|
||||
igb_write_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
|
||||
struct NetRxPkt *pkt,
|
||||
const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts,
|
||||
uint16_t length)
|
||||
{
|
||||
bool hasip4, hasip6;
|
||||
EthL4HdrProto l4hdr_proto;
|
||||
uint16_t rss_type = 0, pkt_type;
|
||||
bool eop = (pkt != NULL);
|
||||
uint32_t adv_desc_status_error = 0;
|
||||
memset(&desc->wb, 0, sizeof(desc->wb));
|
||||
|
||||
desc->wb.upper.length = cpu_to_le16(length);
|
||||
|
||||
igb_build_rx_metadata(core, pkt, pkt != NULL,
|
||||
rss_info, etqf, ts,
|
||||
&desc->wb.lower.lo_dword.pkt_info,
|
||||
&desc->wb.lower.lo_dword.hdr_info,
|
||||
&desc->wb.lower.hi_dword.rss,
|
||||
igb_build_rx_metadata_common(core, pkt, eop,
|
||||
&desc->wb.upper.status_error,
|
||||
&desc->wb.lower.hi_dword.csum_ip.ip_id,
|
||||
&desc->wb.upper.vlan);
|
||||
|
||||
if (!eop) {
|
||||
return;
|
||||
}
|
||||
|
||||
net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
|
||||
|
||||
if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) {
|
||||
if (rss_info->enabled) {
|
||||
desc->wb.lower.hi_dword.rss = cpu_to_le32(rss_info->hash);
|
||||
rss_type = rss_info->type;
|
||||
trace_igb_rx_metadata_rss(desc->wb.lower.hi_dword.rss, rss_type);
|
||||
}
|
||||
} else if (hasip4) {
|
||||
adv_desc_status_error |= E1000_RXD_STAT_IPIDV;
|
||||
desc->wb.lower.hi_dword.csum_ip.ip_id =
|
||||
cpu_to_le16(net_rx_pkt_get_ip_id(pkt));
|
||||
trace_e1000e_rx_metadata_ip_id(
|
||||
desc->wb.lower.hi_dword.csum_ip.ip_id);
|
||||
}
|
||||
|
||||
if (ts) {
|
||||
adv_desc_status_error |= BIT(16);
|
||||
}
|
||||
|
||||
pkt_type = igb_rx_desc_get_packet_type(core, pkt, etqf);
|
||||
trace_e1000e_rx_metadata_pkt_type(pkt_type);
|
||||
desc->wb.lower.lo_dword.pkt_info = cpu_to_le16(rss_type | (pkt_type << 4));
|
||||
desc->wb.upper.status_error |= cpu_to_le32(adv_desc_status_error);
|
||||
}
|
||||
|
||||
static inline void
|
||||
igb_write_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc,
|
||||
struct NetRxPkt *pkt, const E1000E_RSSInfo *rss_info,
|
||||
uint16_t etqf, bool ts, uint16_t length)
|
||||
igb_write_adv_ps_rx_descr(IGBCore *core,
|
||||
union e1000_adv_rx_desc *desc,
|
||||
struct NetRxPkt *pkt,
|
||||
const E1000E_RSSInfo *rss_info,
|
||||
const E1000ERingInfo *r,
|
||||
uint16_t etqf,
|
||||
bool ts,
|
||||
IGBPacketRxDMAState *pdma_st)
|
||||
{
|
||||
size_t pkt_len;
|
||||
uint16_t hdr_info = 0;
|
||||
|
||||
if (pdma_st->do_ps) {
|
||||
pkt_len = pdma_st->bastate.written[1];
|
||||
} else {
|
||||
pkt_len = pdma_st->bastate.written[0] + pdma_st->bastate.written[1];
|
||||
}
|
||||
|
||||
igb_write_adv_rx_descr(core, desc, pkt, rss_info, etqf, ts, pkt_len);
|
||||
|
||||
hdr_info = (pdma_st->ps_desc_data.hdr_len << E1000_ADVRXD_HDR_LEN_OFFSET) &
|
||||
E1000_ADVRXD_ADV_HDR_LEN_MASK;
|
||||
hdr_info |= pdma_st->ps_desc_data.sph ? E1000_ADVRXD_HDR_SPH : 0;
|
||||
desc->wb.lower.lo_dword.hdr_info = cpu_to_le16(hdr_info);
|
||||
|
||||
desc->wb.upper.status_error |= cpu_to_le32(
|
||||
pdma_st->ps_desc_data.hbo ? E1000_ADVRXD_ST_ERR_HBO_OFFSET : 0);
|
||||
}
|
||||
|
||||
static inline void
|
||||
igb_write_rx_descr(IGBCore *core,
|
||||
union e1000_rx_desc_union *desc,
|
||||
struct NetRxPkt *pkt,
|
||||
const E1000E_RSSInfo *rss_info,
|
||||
uint16_t etqf,
|
||||
bool ts,
|
||||
IGBPacketRxDMAState *pdma_st,
|
||||
const E1000ERingInfo *r)
|
||||
{
|
||||
if (igb_rx_use_legacy_descriptor(core)) {
|
||||
igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info,
|
||||
etqf, ts, length);
|
||||
pdma_st->bastate.written[1]);
|
||||
} else if (igb_rx_use_ps_descriptor(core, r)) {
|
||||
igb_write_adv_ps_rx_descr(core, &desc->adv, pkt, rss_info, r, etqf, ts,
|
||||
pdma_st);
|
||||
} else {
|
||||
igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info,
|
||||
etqf, ts, length);
|
||||
etqf, ts, pdma_st->bastate.written[1]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1514,20 +1641,7 @@ igb_pci_dma_write_rx_desc(IGBCore *core, PCIDevice *dev, dma_addr_t addr,
|
||||
}
|
||||
|
||||
static void
|
||||
igb_write_to_rx_buffers(IGBCore *core,
|
||||
PCIDevice *d,
|
||||
hwaddr ba,
|
||||
uint16_t *written,
|
||||
const char *data,
|
||||
dma_addr_t data_len)
|
||||
{
|
||||
trace_igb_rx_desc_buff_write(ba, *written, data, data_len);
|
||||
pci_dma_write(d, ba + *written, data, data_len);
|
||||
*written += data_len;
|
||||
}
|
||||
|
||||
static void
|
||||
igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi,
|
||||
igb_update_rx_stats(IGBCore *core, const E1000ERingInfo *rxi,
|
||||
size_t pkt_size, size_t pkt_fcs_size)
|
||||
{
|
||||
eth_pkt_types_e pkt_type = net_rx_pkt_get_packet_type(core->rx_pkt);
|
||||
@ -1545,12 +1659,256 @@ igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi,
|
||||
}
|
||||
|
||||
static inline bool
|
||||
igb_rx_descr_threshold_hit(IGBCore *core, const E1000E_RingInfo *rxi)
|
||||
igb_rx_descr_threshold_hit(IGBCore *core, const E1000ERingInfo *rxi)
|
||||
{
|
||||
return igb_ring_free_descr_num(core, rxi) ==
|
||||
((core->mac[E1000_SRRCTL(rxi->idx) >> 2] >> 20) & 31) * 16;
|
||||
}
|
||||
|
||||
static bool
|
||||
igb_do_ps(IGBCore *core,
|
||||
const E1000ERingInfo *r,
|
||||
struct NetRxPkt *pkt,
|
||||
IGBPacketRxDMAState *pdma_st)
|
||||
{
|
||||
bool hasip4, hasip6;
|
||||
EthL4HdrProto l4hdr_proto;
|
||||
bool fragment;
|
||||
bool split_always;
|
||||
size_t bheader_size;
|
||||
size_t total_pkt_len;
|
||||
|
||||
if (!igb_rx_use_ps_descriptor(core, r)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
total_pkt_len = net_rx_pkt_get_total_len(pkt);
|
||||
bheader_size = igb_rxhdrbufsize(core, r);
|
||||
split_always = igb_rx_ps_descriptor_split_always(core, r);
|
||||
if (split_always && total_pkt_len <= bheader_size) {
|
||||
pdma_st->ps_hdr_len = total_pkt_len;
|
||||
pdma_st->ps_desc_data.hdr_len = total_pkt_len;
|
||||
return true;
|
||||
}
|
||||
|
||||
net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
|
||||
|
||||
if (hasip4) {
|
||||
fragment = net_rx_pkt_get_ip4_info(pkt)->fragment;
|
||||
} else if (hasip6) {
|
||||
fragment = net_rx_pkt_get_ip6_info(pkt)->fragment;
|
||||
} else {
|
||||
pdma_st->ps_desc_data.hdr_len = bheader_size;
|
||||
goto header_not_handled;
|
||||
}
|
||||
|
||||
if (fragment && (core->mac[RFCTL] & E1000_RFCTL_IPFRSP_DIS)) {
|
||||
pdma_st->ps_desc_data.hdr_len = bheader_size;
|
||||
goto header_not_handled;
|
||||
}
|
||||
|
||||
/* no header splitting for SCTP */
|
||||
if (!fragment && (l4hdr_proto == ETH_L4_HDR_PROTO_UDP ||
|
||||
l4hdr_proto == ETH_L4_HDR_PROTO_TCP)) {
|
||||
pdma_st->ps_hdr_len = net_rx_pkt_get_l5_hdr_offset(pkt);
|
||||
} else {
|
||||
pdma_st->ps_hdr_len = net_rx_pkt_get_l4_hdr_offset(pkt);
|
||||
}
|
||||
|
||||
pdma_st->ps_desc_data.sph = true;
|
||||
pdma_st->ps_desc_data.hdr_len = pdma_st->ps_hdr_len;
|
||||
|
||||
if (pdma_st->ps_hdr_len > bheader_size) {
|
||||
pdma_st->ps_desc_data.hbo = true;
|
||||
goto header_not_handled;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
header_not_handled:
|
||||
if (split_always) {
|
||||
pdma_st->ps_hdr_len = bheader_size;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
igb_truncate_to_descriptor_size(IGBPacketRxDMAState *pdma_st, size_t *size)
|
||||
{
|
||||
if (pdma_st->do_ps && pdma_st->is_first) {
|
||||
if (*size > pdma_st->rx_desc_packet_buf_size + pdma_st->ps_hdr_len) {
|
||||
*size = pdma_st->rx_desc_packet_buf_size + pdma_st->ps_hdr_len;
|
||||
}
|
||||
} else {
|
||||
if (*size > pdma_st->rx_desc_packet_buf_size) {
|
||||
*size = pdma_st->rx_desc_packet_buf_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
igb_write_hdr_frag_to_rx_buffers(IGBCore *core,
|
||||
PCIDevice *d,
|
||||
IGBPacketRxDMAState *pdma_st,
|
||||
const char *data,
|
||||
dma_addr_t data_len)
|
||||
{
|
||||
assert(data_len <= pdma_st->rx_desc_header_buf_size -
|
||||
pdma_st->bastate.written[0]);
|
||||
pci_dma_write(d,
|
||||
pdma_st->ba[0] + pdma_st->bastate.written[0],
|
||||
data, data_len);
|
||||
pdma_st->bastate.written[0] += data_len;
|
||||
pdma_st->bastate.cur_idx = 1;
|
||||
}
|
||||
|
||||
static void
|
||||
igb_write_header_to_rx_buffers(IGBCore *core,
|
||||
struct NetRxPkt *pkt,
|
||||
PCIDevice *d,
|
||||
IGBPacketRxDMAState *pdma_st,
|
||||
size_t *copy_size)
|
||||
{
|
||||
size_t iov_copy;
|
||||
size_t ps_hdr_copied = 0;
|
||||
|
||||
if (!pdma_st->is_first) {
|
||||
/* Leave buffer 0 of each descriptor except first */
|
||||
/* empty */
|
||||
pdma_st->bastate.cur_idx = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
do {
|
||||
iov_copy = MIN(pdma_st->ps_hdr_len - ps_hdr_copied,
|
||||
pdma_st->iov->iov_len - pdma_st->iov_ofs);
|
||||
|
||||
igb_write_hdr_frag_to_rx_buffers(core, d, pdma_st,
|
||||
pdma_st->iov->iov_base,
|
||||
iov_copy);
|
||||
|
||||
*copy_size -= iov_copy;
|
||||
ps_hdr_copied += iov_copy;
|
||||
|
||||
pdma_st->iov_ofs += iov_copy;
|
||||
if (pdma_st->iov_ofs == pdma_st->iov->iov_len) {
|
||||
pdma_st->iov++;
|
||||
pdma_st->iov_ofs = 0;
|
||||
}
|
||||
} while (ps_hdr_copied < pdma_st->ps_hdr_len);
|
||||
|
||||
pdma_st->is_first = false;
|
||||
}
|
||||
|
||||
static void
|
||||
igb_write_payload_frag_to_rx_buffers(IGBCore *core,
|
||||
PCIDevice *d,
|
||||
IGBPacketRxDMAState *pdma_st,
|
||||
const char *data,
|
||||
dma_addr_t data_len)
|
||||
{
|
||||
while (data_len > 0) {
|
||||
assert(pdma_st->bastate.cur_idx < IGB_MAX_PS_BUFFERS);
|
||||
|
||||
uint32_t cur_buf_bytes_left =
|
||||
pdma_st->rx_desc_packet_buf_size -
|
||||
pdma_st->bastate.written[pdma_st->bastate.cur_idx];
|
||||
uint32_t bytes_to_write = MIN(data_len, cur_buf_bytes_left);
|
||||
|
||||
trace_igb_rx_desc_buff_write(
|
||||
pdma_st->bastate.cur_idx,
|
||||
pdma_st->ba[pdma_st->bastate.cur_idx],
|
||||
pdma_st->bastate.written[pdma_st->bastate.cur_idx],
|
||||
data,
|
||||
bytes_to_write);
|
||||
|
||||
pci_dma_write(d,
|
||||
pdma_st->ba[pdma_st->bastate.cur_idx] +
|
||||
pdma_st->bastate.written[pdma_st->bastate.cur_idx],
|
||||
data, bytes_to_write);
|
||||
|
||||
pdma_st->bastate.written[pdma_st->bastate.cur_idx] += bytes_to_write;
|
||||
data += bytes_to_write;
|
||||
data_len -= bytes_to_write;
|
||||
|
||||
if (pdma_st->bastate.written[pdma_st->bastate.cur_idx] ==
|
||||
pdma_st->rx_desc_packet_buf_size) {
|
||||
pdma_st->bastate.cur_idx++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
igb_write_payload_to_rx_buffers(IGBCore *core,
|
||||
struct NetRxPkt *pkt,
|
||||
PCIDevice *d,
|
||||
IGBPacketRxDMAState *pdma_st,
|
||||
size_t *copy_size)
|
||||
{
|
||||
static const uint32_t fcs_pad;
|
||||
size_t iov_copy;
|
||||
|
||||
/* Copy packet payload */
|
||||
while (*copy_size) {
|
||||
iov_copy = MIN(*copy_size, pdma_st->iov->iov_len - pdma_st->iov_ofs);
|
||||
igb_write_payload_frag_to_rx_buffers(core, d,
|
||||
pdma_st,
|
||||
pdma_st->iov->iov_base +
|
||||
pdma_st->iov_ofs,
|
||||
iov_copy);
|
||||
|
||||
*copy_size -= iov_copy;
|
||||
pdma_st->iov_ofs += iov_copy;
|
||||
if (pdma_st->iov_ofs == pdma_st->iov->iov_len) {
|
||||
pdma_st->iov++;
|
||||
pdma_st->iov_ofs = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (pdma_st->desc_offset + pdma_st->desc_size >= pdma_st->total_size) {
|
||||
/* Simulate FCS checksum presence in the last descriptor */
|
||||
igb_write_payload_frag_to_rx_buffers(core, d,
|
||||
pdma_st,
|
||||
(const char *) &fcs_pad,
|
||||
e1000x_fcs_len(core->mac));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
igb_write_to_rx_buffers(IGBCore *core,
|
||||
struct NetRxPkt *pkt,
|
||||
PCIDevice *d,
|
||||
IGBPacketRxDMAState *pdma_st)
|
||||
{
|
||||
size_t copy_size;
|
||||
|
||||
if (!(pdma_st->ba)[1] || (pdma_st->do_ps && !(pdma_st->ba[0]))) {
|
||||
/* as per intel docs; skip descriptors with null buf addr */
|
||||
trace_e1000e_rx_null_descriptor();
|
||||
return;
|
||||
}
|
||||
|
||||
if (pdma_st->desc_offset >= pdma_st->size) {
|
||||
return;
|
||||
}
|
||||
|
||||
pdma_st->desc_size = pdma_st->total_size - pdma_st->desc_offset;
|
||||
igb_truncate_to_descriptor_size(pdma_st, &pdma_st->desc_size);
|
||||
copy_size = pdma_st->size - pdma_st->desc_offset;
|
||||
igb_truncate_to_descriptor_size(pdma_st, ©_size);
|
||||
|
||||
/* For PS mode copy the packet header first */
|
||||
if (pdma_st->do_ps) {
|
||||
igb_write_header_to_rx_buffers(core, pkt, d, pdma_st, ©_size);
|
||||
} else {
|
||||
pdma_st->bastate.cur_idx = 1;
|
||||
}
|
||||
|
||||
igb_write_payload_to_rx_buffers(core, pkt, d, pdma_st, ©_size);
|
||||
}
|
||||
|
||||
static void
|
||||
igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
|
||||
const E1000E_RxRing *rxr,
|
||||
@ -1560,95 +1918,61 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
|
||||
PCIDevice *d;
|
||||
dma_addr_t base;
|
||||
union e1000_rx_desc_union desc;
|
||||
size_t desc_size;
|
||||
size_t desc_offset = 0;
|
||||
size_t iov_ofs = 0;
|
||||
const E1000ERingInfo *rxi;
|
||||
size_t rx_desc_len;
|
||||
|
||||
struct iovec *iov = net_rx_pkt_get_iovec(pkt);
|
||||
size_t size = net_rx_pkt_get_total_len(pkt);
|
||||
size_t total_size = size + e1000x_fcs_len(core->mac);
|
||||
const E1000E_RingInfo *rxi = rxr->i;
|
||||
size_t bufsize = igb_rxbufsize(core, rxi);
|
||||
IGBPacketRxDMAState pdma_st = {0};
|
||||
pdma_st.is_first = true;
|
||||
pdma_st.size = net_rx_pkt_get_total_len(pkt);
|
||||
pdma_st.total_size = pdma_st.size + e1000x_fcs_len(core->mac);
|
||||
|
||||
rxi = rxr->i;
|
||||
rx_desc_len = core->rx_desc_len;
|
||||
pdma_st.rx_desc_packet_buf_size = igb_rxbufsize(core, rxi);
|
||||
pdma_st.rx_desc_header_buf_size = igb_rxhdrbufsize(core, rxi);
|
||||
pdma_st.iov = net_rx_pkt_get_iovec(pkt);
|
||||
d = pcie_sriov_get_vf_at_index(core->owner, rxi->idx % 8);
|
||||
if (!d) {
|
||||
d = core->owner;
|
||||
}
|
||||
|
||||
pdma_st.do_ps = igb_do_ps(core, rxi, pkt, &pdma_st);
|
||||
|
||||
do {
|
||||
hwaddr ba;
|
||||
uint16_t written = 0;
|
||||
memset(&pdma_st.bastate, 0, sizeof(IGBBAState));
|
||||
bool is_last = false;
|
||||
|
||||
desc_size = total_size - desc_offset;
|
||||
|
||||
if (desc_size > bufsize) {
|
||||
desc_size = bufsize;
|
||||
}
|
||||
|
||||
if (igb_ring_empty(core, rxi)) {
|
||||
return;
|
||||
}
|
||||
|
||||
base = igb_ring_head_descr(core, rxi);
|
||||
pci_dma_read(d, base, &desc, rx_desc_len);
|
||||
trace_e1000e_rx_descr(rxi->idx, base, rx_desc_len);
|
||||
|
||||
pci_dma_read(d, base, &desc, core->rx_desc_len);
|
||||
igb_read_rx_descr(core, &desc, &pdma_st, rxi);
|
||||
|
||||
trace_e1000e_rx_descr(rxi->idx, base, core->rx_desc_len);
|
||||
|
||||
igb_read_rx_descr(core, &desc, &ba);
|
||||
|
||||
if (ba) {
|
||||
if (desc_offset < size) {
|
||||
static const uint32_t fcs_pad;
|
||||
size_t iov_copy;
|
||||
size_t copy_size = size - desc_offset;
|
||||
if (copy_size > bufsize) {
|
||||
copy_size = bufsize;
|
||||
}
|
||||
|
||||
/* Copy packet payload */
|
||||
while (copy_size) {
|
||||
iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
|
||||
|
||||
igb_write_to_rx_buffers(core, d, ba, &written,
|
||||
iov->iov_base + iov_ofs, iov_copy);
|
||||
|
||||
copy_size -= iov_copy;
|
||||
iov_ofs += iov_copy;
|
||||
if (iov_ofs == iov->iov_len) {
|
||||
iov++;
|
||||
iov_ofs = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (desc_offset + desc_size >= total_size) {
|
||||
/* Simulate FCS checksum presence in the last descriptor */
|
||||
igb_write_to_rx_buffers(core, d, ba, &written,
|
||||
(const char *) &fcs_pad, e1000x_fcs_len(core->mac));
|
||||
}
|
||||
}
|
||||
} else { /* as per intel docs; skip descriptors with null buf addr */
|
||||
trace_e1000e_rx_null_descriptor();
|
||||
}
|
||||
desc_offset += desc_size;
|
||||
if (desc_offset >= total_size) {
|
||||
igb_write_to_rx_buffers(core, pkt, d, &pdma_st);
|
||||
pdma_st.desc_offset += pdma_st.desc_size;
|
||||
if (pdma_st.desc_offset >= pdma_st.total_size) {
|
||||
is_last = true;
|
||||
}
|
||||
|
||||
igb_write_rx_descr(core, &desc, is_last ? core->rx_pkt : NULL,
|
||||
rss_info, etqf, ts, written);
|
||||
igb_pci_dma_write_rx_desc(core, d, base, &desc, core->rx_desc_len);
|
||||
igb_write_rx_descr(core, &desc,
|
||||
is_last ? pkt : NULL,
|
||||
rss_info,
|
||||
etqf, ts,
|
||||
&pdma_st,
|
||||
rxi);
|
||||
igb_pci_dma_write_rx_desc(core, d, base, &desc, rx_desc_len);
|
||||
igb_ring_advance(core, rxi, rx_desc_len / E1000_MIN_RX_DESC_LEN);
|
||||
} while (pdma_st.desc_offset < pdma_st.total_size);
|
||||
|
||||
igb_ring_advance(core, rxi, core->rx_desc_len / E1000_MIN_RX_DESC_LEN);
|
||||
|
||||
} while (desc_offset < total_size);
|
||||
|
||||
igb_update_rx_stats(core, rxi, size, total_size);
|
||||
igb_update_rx_stats(core, rxi, pdma_st.size, pdma_st.total_size);
|
||||
}
|
||||
|
||||
static bool
|
||||
igb_rx_strip_vlan(IGBCore *core, const E1000E_RingInfo *rxi)
|
||||
igb_rx_strip_vlan(IGBCore *core, const E1000ERingInfo *rxi)
|
||||
{
|
||||
if (core->mac[MRQC] & 1) {
|
||||
uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS;
|
||||
@ -2753,7 +3077,7 @@ igb_update_rx_offloads(IGBCore *core)
|
||||
|
||||
if (core->has_vnet) {
|
||||
qemu_set_offload(qemu_get_queue(core->owner_nic)->peer,
|
||||
cso_state, 0, 0, 0, 0);
|
||||
cso_state, 0, 0, 0, 0, 0, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -452,6 +452,7 @@ union e1000_adv_rx_desc {
|
||||
#define E1000_SRRCTL_BSIZEHDRSIZE_MASK 0x00000F00
|
||||
#define E1000_SRRCTL_BSIZEHDRSIZE_SHIFT 2 /* Shift _left_ */
|
||||
#define E1000_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000
|
||||
#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT 0x04000000
|
||||
#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS 0x0A000000
|
||||
#define E1000_SRRCTL_DESCTYPE_MASK 0x0E000000
|
||||
#define E1000_SRRCTL_DROP_EN 0x80000000
|
||||
@ -692,11 +693,20 @@ union e1000_adv_rx_desc {
|
||||
|
||||
#define E1000_STATUS_NUM_VFS_SHIFT 14
|
||||
|
||||
#define E1000_ADVRXD_PKT_IP4 BIT(4)
|
||||
#define E1000_ADVRXD_PKT_IP6 BIT(6)
|
||||
#define E1000_ADVRXD_PKT_TCP BIT(8)
|
||||
#define E1000_ADVRXD_PKT_UDP BIT(9)
|
||||
#define E1000_ADVRXD_PKT_SCTP BIT(10)
|
||||
#define E1000_ADVRXD_PKT_IP4 BIT(0)
|
||||
#define E1000_ADVRXD_PKT_IP6 BIT(2)
|
||||
#define E1000_ADVRXD_PKT_IP6E BIT(3)
|
||||
#define E1000_ADVRXD_PKT_TCP BIT(4)
|
||||
#define E1000_ADVRXD_PKT_UDP BIT(5)
|
||||
#define E1000_ADVRXD_PKT_SCTP BIT(6)
|
||||
|
||||
#define IGB_MAX_PS_BUFFERS 2
|
||||
|
||||
#define E1000_ADVRXD_HDR_LEN_OFFSET (21 - 16)
|
||||
#define E1000_ADVRXD_ADV_HDR_LEN_MASK ((BIT(10) - 1) << \
|
||||
E1000_ADVRXD_HDR_LEN_OFFSET)
|
||||
#define E1000_ADVRXD_HDR_SPH BIT(15)
|
||||
#define E1000_ADVRXD_ST_ERR_HBO_OFFSET BIT(3 + 20)
|
||||
|
||||
static inline uint8_t igb_ivar_entry_rx(uint8_t i)
|
||||
{
|
||||
|
@ -1043,7 +1043,7 @@ static void of_dpa_flow_ig_tbl(OfDpaFlowContext *fc, uint32_t tbl_id)
|
||||
static ssize_t of_dpa_ig(World *world, uint32_t pport,
|
||||
const struct iovec *iov, int iovcnt)
|
||||
{
|
||||
struct iovec iov_copy[iovcnt + 2];
|
||||
g_autofree struct iovec *iov_copy = g_new(struct iovec, iovcnt + 2);
|
||||
OfDpaFlowContext fc = {
|
||||
.of_dpa = world_private(world),
|
||||
.in_pport = pport,
|
||||
|
@ -278,9 +278,9 @@ igb_core_mdic_write_unhandled(uint32_t addr) "MDIC WRITE: PHY[%u] UNHANDLED"
|
||||
igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool pfrstd) "Set extended link params: ASD check: %d, Speed select bypass: %d, PF reset done: %d"
|
||||
|
||||
igb_rx_desc_buff_size(uint32_t b) "buffer size: %u"
|
||||
igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u"
|
||||
igb_rx_desc_buff_write(uint8_t idx, uint64_t addr, uint16_t offset, const void* source, uint32_t len) "buffer %u, addr: 0x%"PRIx64", offset: %u, from: %p, length: %u"
|
||||
|
||||
igb_rx_metadata_rss(uint32_t rss) "RSS data: 0x%X"
|
||||
igb_rx_metadata_rss(uint32_t rss, uint16_t rss_pkt_type) "RSS data: rss: 0x%X, rss_pkt_type: 0x%X"
|
||||
|
||||
igb_irq_icr_clear_gpie_nsicr(void) "Clearing ICR on read due to GPIE.NSICR enabled"
|
||||
igb_irq_set_iam(uint32_t icr) "Update IAM: 0x%x"
|
||||
@ -295,6 +295,8 @@ igb_irq_eitr_set(uint32_t eitr_num, uint32_t val) "EITR[%u] = 0x%x"
|
||||
igb_set_pfmailbox(uint32_t vf_num, uint32_t val) "PFMailbox[%d]: 0x%x"
|
||||
igb_set_vfmailbox(uint32_t vf_num, uint32_t val) "VFMailbox[%d]: 0x%x"
|
||||
|
||||
igb_wrn_rx_desc_modes_not_supp(int desc_type) "Not supported descriptor type: %d"
|
||||
|
||||
# igbvf.c
|
||||
igbvf_wrn_io_addr_unknown(uint64_t addr) "IO unknown register 0x%"PRIx64
|
||||
|
||||
|
@ -78,6 +78,9 @@ static const int user_feature_bits[] = {
|
||||
VIRTIO_F_RING_RESET,
|
||||
VIRTIO_NET_F_RSS,
|
||||
VIRTIO_NET_F_HASH_REPORT,
|
||||
VIRTIO_NET_F_GUEST_USO4,
|
||||
VIRTIO_NET_F_GUEST_USO6,
|
||||
VIRTIO_NET_F_HOST_USO,
|
||||
|
||||
/* This bit implies RARP isn't sent by QEMU out of band */
|
||||
VIRTIO_NET_F_GUEST_ANNOUNCE,
|
||||
|
@ -659,6 +659,15 @@ static int peer_has_ufo(VirtIONet *n)
|
||||
return n->has_ufo;
|
||||
}
|
||||
|
||||
static int peer_has_uso(VirtIONet *n)
|
||||
{
|
||||
if (!peer_has_vnet_hdr(n)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return qemu_has_uso(qemu_get_queue(n->nic)->peer);
|
||||
}
|
||||
|
||||
static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
|
||||
int version_1, int hash_report)
|
||||
{
|
||||
@ -796,6 +805,10 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
|
||||
virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
|
||||
virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
|
||||
|
||||
virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
|
||||
virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
|
||||
virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
|
||||
|
||||
virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
|
||||
}
|
||||
|
||||
@ -804,6 +817,12 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
|
||||
virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
|
||||
}
|
||||
|
||||
if (!peer_has_uso(n)) {
|
||||
virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
|
||||
virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
|
||||
virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
|
||||
}
|
||||
|
||||
if (!get_vhost_net(nc->peer)) {
|
||||
return features;
|
||||
}
|
||||
@ -859,17 +878,21 @@ static void virtio_net_apply_guest_offloads(VirtIONet *n)
|
||||
!!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
|
||||
!!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
|
||||
!!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
|
||||
!!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
|
||||
!!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
|
||||
!!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
|
||||
!!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)));
|
||||
}
|
||||
|
||||
static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
|
||||
static uint64_t virtio_net_guest_offloads_by_features(uint64_t features)
|
||||
{
|
||||
static const uint64_t guest_offloads_mask =
|
||||
(1ULL << VIRTIO_NET_F_GUEST_CSUM) |
|
||||
(1ULL << VIRTIO_NET_F_GUEST_TSO4) |
|
||||
(1ULL << VIRTIO_NET_F_GUEST_TSO6) |
|
||||
(1ULL << VIRTIO_NET_F_GUEST_ECN) |
|
||||
(1ULL << VIRTIO_NET_F_GUEST_UFO);
|
||||
(1ULL << VIRTIO_NET_F_GUEST_UFO) |
|
||||
(1ULL << VIRTIO_NET_F_GUEST_USO4) |
|
||||
(1ULL << VIRTIO_NET_F_GUEST_USO6);
|
||||
|
||||
return guest_offloads_mask & features;
|
||||
}
|
||||
@ -3922,6 +3945,12 @@ static Property virtio_net_properties[] = {
|
||||
DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
|
||||
DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
|
||||
DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
|
||||
DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features,
|
||||
VIRTIO_NET_F_GUEST_USO4, true),
|
||||
DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features,
|
||||
VIRTIO_NET_F_GUEST_USO6, true),
|
||||
DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
|
||||
VIRTIO_NET_F_HOST_USO, true),
|
||||
DEFINE_PROP_END_OF_LIST(),
|
||||
};
|
||||
|
||||
|
@ -1341,6 +1341,8 @@ static void vmxnet3_update_features(VMXNET3State *s)
|
||||
s->lro_supported,
|
||||
s->lro_supported,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0);
|
||||
}
|
||||
}
|
||||
|
@ -54,11 +54,12 @@ typedef void (LinkStatusChanged)(NetClientState *);
|
||||
typedef void (NetClientDestructor)(NetClientState *);
|
||||
typedef RxFilterInfo *(QueryRxFilter)(NetClientState *);
|
||||
typedef bool (HasUfo)(NetClientState *);
|
||||
typedef bool (HasUso)(NetClientState *);
|
||||
typedef bool (HasVnetHdr)(NetClientState *);
|
||||
typedef bool (HasVnetHdrLen)(NetClientState *, int);
|
||||
typedef bool (GetUsingVnetHdr)(NetClientState *);
|
||||
typedef void (UsingVnetHdr)(NetClientState *, bool);
|
||||
typedef void (SetOffload)(NetClientState *, int, int, int, int, int);
|
||||
typedef void (SetOffload)(NetClientState *, int, int, int, int, int, int, int);
|
||||
typedef int (GetVnetHdrLen)(NetClientState *);
|
||||
typedef void (SetVnetHdrLen)(NetClientState *, int);
|
||||
typedef int (SetVnetLE)(NetClientState *, bool);
|
||||
@ -84,6 +85,7 @@ typedef struct NetClientInfo {
|
||||
QueryRxFilter *query_rx_filter;
|
||||
NetPoll *poll;
|
||||
HasUfo *has_ufo;
|
||||
HasUso *has_uso;
|
||||
HasVnetHdr *has_vnet_hdr;
|
||||
HasVnetHdrLen *has_vnet_hdr_len;
|
||||
GetUsingVnetHdr *get_using_vnet_hdr;
|
||||
@ -187,12 +189,13 @@ void qemu_set_info_str(NetClientState *nc,
|
||||
const char *fmt, ...) G_GNUC_PRINTF(2, 3);
|
||||
void qemu_format_nic_info_str(NetClientState *nc, uint8_t macaddr[6]);
|
||||
bool qemu_has_ufo(NetClientState *nc);
|
||||
bool qemu_has_uso(NetClientState *nc);
|
||||
bool qemu_has_vnet_hdr(NetClientState *nc);
|
||||
bool qemu_has_vnet_hdr_len(NetClientState *nc, int len);
|
||||
bool qemu_get_using_vnet_hdr(NetClientState *nc);
|
||||
void qemu_using_vnet_hdr(NetClientState *nc, bool enable);
|
||||
void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
|
||||
int ecn, int ufo);
|
||||
int ecn, int ufo, int uso4, int uso6);
|
||||
int qemu_get_vnet_hdr_len(NetClientState *nc);
|
||||
void qemu_set_vnet_hdr_len(NetClientState *nc, int len);
|
||||
int qemu_set_vnet_le(NetClientState *nc, bool is_le);
|
||||
|
@ -1873,6 +1873,13 @@ if libbpf.found() and not cc.links('''
|
||||
endif
|
||||
endif
|
||||
|
||||
# libxdp
|
||||
libxdp = not_found
|
||||
if not get_option('af_xdp').auto() or have_system
|
||||
libxdp = dependency('libxdp', required: get_option('af_xdp'),
|
||||
version: '>=1.4.0', method: 'pkg-config')
|
||||
endif
|
||||
|
||||
# libdw
|
||||
libdw = not_found
|
||||
if not get_option('libdw').auto() or \
|
||||
@ -2099,6 +2106,7 @@ config_host_data.set('CONFIG_HEXAGON_IDEF_PARSER', get_option('hexagon_idef_pars
|
||||
config_host_data.set('CONFIG_LIBATTR', have_old_libattr)
|
||||
config_host_data.set('CONFIG_LIBCAP_NG', libcap_ng.found())
|
||||
config_host_data.set('CONFIG_EBPF', libbpf.found())
|
||||
config_host_data.set('CONFIG_AF_XDP', libxdp.found())
|
||||
config_host_data.set('CONFIG_LIBDAXCTL', libdaxctl.found())
|
||||
config_host_data.set('CONFIG_LIBISCSI', libiscsi.found())
|
||||
config_host_data.set('CONFIG_LIBNFS', libnfs.found())
|
||||
@ -4270,6 +4278,7 @@ summary_info = {}
|
||||
if targetos == 'darwin'
|
||||
summary_info += {'vmnet.framework support': vmnet}
|
||||
endif
|
||||
summary_info += {'AF_XDP support': libxdp}
|
||||
summary_info += {'slirp support': slirp}
|
||||
summary_info += {'vde support': vde}
|
||||
summary_info += {'netmap support': have_netmap}
|
||||
|
@ -122,6 +122,8 @@ option('avx512bw', type: 'feature', value: 'auto',
|
||||
option('keyring', type: 'feature', value: 'auto',
|
||||
description: 'Linux keyring support')
|
||||
|
||||
option('af_xdp', type : 'feature', value : 'auto',
|
||||
description: 'AF_XDP network backend support')
|
||||
option('attr', type : 'feature', value : 'auto',
|
||||
description: 'attr/xattr support')
|
||||
option('auth_pam', type : 'feature', value : 'auto',
|
||||
|
526
net/af-xdp.c
Normal file
526
net/af-xdp.c
Normal file
@ -0,0 +1,526 @@
|
||||
/*
|
||||
* AF_XDP network backend.
|
||||
*
|
||||
* Copyright (c) 2023 Red Hat, Inc.
|
||||
*
|
||||
* Authors:
|
||||
* Ilya Maximets <i.maximets@ovn.org>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include <bpf/bpf.h>
|
||||
#include <inttypes.h>
|
||||
#include <linux/if_link.h>
|
||||
#include <linux/if_xdp.h>
|
||||
#include <net/if.h>
|
||||
#include <xdp/xsk.h>
|
||||
|
||||
#include "clients.h"
|
||||
#include "monitor/monitor.h"
|
||||
#include "net/net.h"
|
||||
#include "qapi/error.h"
|
||||
#include "qemu/cutils.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "qemu/iov.h"
|
||||
#include "qemu/main-loop.h"
|
||||
#include "qemu/memalign.h"
|
||||
|
||||
|
||||
typedef struct AFXDPState {
|
||||
NetClientState nc;
|
||||
|
||||
struct xsk_socket *xsk;
|
||||
struct xsk_ring_cons rx;
|
||||
struct xsk_ring_prod tx;
|
||||
struct xsk_ring_cons cq;
|
||||
struct xsk_ring_prod fq;
|
||||
|
||||
char ifname[IFNAMSIZ];
|
||||
int ifindex;
|
||||
bool read_poll;
|
||||
bool write_poll;
|
||||
uint32_t outstanding_tx;
|
||||
|
||||
uint64_t *pool;
|
||||
uint32_t n_pool;
|
||||
char *buffer;
|
||||
struct xsk_umem *umem;
|
||||
|
||||
uint32_t n_queues;
|
||||
uint32_t xdp_flags;
|
||||
bool inhibit;
|
||||
} AFXDPState;
|
||||
|
||||
#define AF_XDP_BATCH_SIZE 64
|
||||
|
||||
static void af_xdp_send(void *opaque);
|
||||
static void af_xdp_writable(void *opaque);
|
||||
|
||||
/* Set the event-loop handlers for the af-xdp backend. */
|
||||
static void af_xdp_update_fd_handler(AFXDPState *s)
|
||||
{
|
||||
qemu_set_fd_handler(xsk_socket__fd(s->xsk),
|
||||
s->read_poll ? af_xdp_send : NULL,
|
||||
s->write_poll ? af_xdp_writable : NULL,
|
||||
s);
|
||||
}
|
||||
|
||||
/* Update the read handler. */
|
||||
static void af_xdp_read_poll(AFXDPState *s, bool enable)
|
||||
{
|
||||
if (s->read_poll != enable) {
|
||||
s->read_poll = enable;
|
||||
af_xdp_update_fd_handler(s);
|
||||
}
|
||||
}
|
||||
|
||||
/* Update the write handler. */
|
||||
static void af_xdp_write_poll(AFXDPState *s, bool enable)
|
||||
{
|
||||
if (s->write_poll != enable) {
|
||||
s->write_poll = enable;
|
||||
af_xdp_update_fd_handler(s);
|
||||
}
|
||||
}
|
||||
|
||||
static void af_xdp_poll(NetClientState *nc, bool enable)
|
||||
{
|
||||
AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc);
|
||||
|
||||
if (s->read_poll != enable || s->write_poll != enable) {
|
||||
s->write_poll = enable;
|
||||
s->read_poll = enable;
|
||||
af_xdp_update_fd_handler(s);
|
||||
}
|
||||
}
|
||||
|
||||
static void af_xdp_complete_tx(AFXDPState *s)
|
||||
{
|
||||
uint32_t idx = 0;
|
||||
uint32_t done, i;
|
||||
uint64_t *addr;
|
||||
|
||||
done = xsk_ring_cons__peek(&s->cq, XSK_RING_CONS__DEFAULT_NUM_DESCS, &idx);
|
||||
|
||||
for (i = 0; i < done; i++) {
|
||||
addr = (void *) xsk_ring_cons__comp_addr(&s->cq, idx++);
|
||||
s->pool[s->n_pool++] = *addr;
|
||||
s->outstanding_tx--;
|
||||
}
|
||||
|
||||
if (done) {
|
||||
xsk_ring_cons__release(&s->cq, done);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The fd_write() callback, invoked if the fd is marked as writable
|
||||
* after a poll.
|
||||
*/
|
||||
static void af_xdp_writable(void *opaque)
|
||||
{
|
||||
AFXDPState *s = opaque;
|
||||
|
||||
/* Try to recover buffers that are already sent. */
|
||||
af_xdp_complete_tx(s);
|
||||
|
||||
/*
|
||||
* Unregister the handler, unless we still have packets to transmit
|
||||
* and kernel needs a wake up.
|
||||
*/
|
||||
if (!s->outstanding_tx || !xsk_ring_prod__needs_wakeup(&s->tx)) {
|
||||
af_xdp_write_poll(s, false);
|
||||
}
|
||||
|
||||
/* Flush any buffered packets. */
|
||||
qemu_flush_queued_packets(&s->nc);
|
||||
}
|
||||
|
||||
static ssize_t af_xdp_receive(NetClientState *nc,
|
||||
const uint8_t *buf, size_t size)
|
||||
{
|
||||
AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc);
|
||||
struct xdp_desc *desc;
|
||||
uint32_t idx;
|
||||
void *data;
|
||||
|
||||
/* Try to recover buffers that are already sent. */
|
||||
af_xdp_complete_tx(s);
|
||||
|
||||
if (size > XSK_UMEM__DEFAULT_FRAME_SIZE) {
|
||||
/* We can't transmit packet this size... */
|
||||
return size;
|
||||
}
|
||||
|
||||
if (!s->n_pool || !xsk_ring_prod__reserve(&s->tx, 1, &idx)) {
|
||||
/*
|
||||
* Out of buffers or space in tx ring. Poll until we can write.
|
||||
* This will also kick the Tx, if it was waiting on CQ.
|
||||
*/
|
||||
af_xdp_write_poll(s, true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
desc = xsk_ring_prod__tx_desc(&s->tx, idx);
|
||||
desc->addr = s->pool[--s->n_pool];
|
||||
desc->len = size;
|
||||
|
||||
data = xsk_umem__get_data(s->buffer, desc->addr);
|
||||
memcpy(data, buf, size);
|
||||
|
||||
xsk_ring_prod__submit(&s->tx, 1);
|
||||
s->outstanding_tx++;
|
||||
|
||||
if (xsk_ring_prod__needs_wakeup(&s->tx)) {
|
||||
af_xdp_write_poll(s, true);
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
/*
|
||||
* Complete a previous send (backend --> guest) and enable the
|
||||
* fd_read callback.
|
||||
*/
|
||||
static void af_xdp_send_completed(NetClientState *nc, ssize_t len)
|
||||
{
|
||||
AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc);
|
||||
|
||||
af_xdp_read_poll(s, true);
|
||||
}
|
||||
|
||||
static void af_xdp_fq_refill(AFXDPState *s, uint32_t n)
|
||||
{
|
||||
uint32_t i, idx = 0;
|
||||
|
||||
/* Leave one packet for Tx, just in case. */
|
||||
if (s->n_pool < n + 1) {
|
||||
n = s->n_pool;
|
||||
}
|
||||
|
||||
if (!n || !xsk_ring_prod__reserve(&s->fq, n, &idx)) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
*xsk_ring_prod__fill_addr(&s->fq, idx++) = s->pool[--s->n_pool];
|
||||
}
|
||||
xsk_ring_prod__submit(&s->fq, n);
|
||||
|
||||
if (xsk_ring_prod__needs_wakeup(&s->fq)) {
|
||||
/* Receive was blocked by not having enough buffers. Wake it up. */
|
||||
af_xdp_read_poll(s, true);
|
||||
}
|
||||
}
|
||||
|
||||
static void af_xdp_send(void *opaque)
|
||||
{
|
||||
uint32_t i, n_rx, idx = 0;
|
||||
AFXDPState *s = opaque;
|
||||
|
||||
n_rx = xsk_ring_cons__peek(&s->rx, AF_XDP_BATCH_SIZE, &idx);
|
||||
if (!n_rx) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < n_rx; i++) {
|
||||
const struct xdp_desc *desc;
|
||||
struct iovec iov;
|
||||
|
||||
desc = xsk_ring_cons__rx_desc(&s->rx, idx++);
|
||||
|
||||
iov.iov_base = xsk_umem__get_data(s->buffer, desc->addr);
|
||||
iov.iov_len = desc->len;
|
||||
|
||||
s->pool[s->n_pool++] = desc->addr;
|
||||
|
||||
if (!qemu_sendv_packet_async(&s->nc, &iov, 1,
|
||||
af_xdp_send_completed)) {
|
||||
/*
|
||||
* The peer does not receive anymore. Packet is queued, stop
|
||||
* reading from the backend until af_xdp_send_completed().
|
||||
*/
|
||||
af_xdp_read_poll(s, false);
|
||||
|
||||
/* Return unused descriptors to not break the ring cache. */
|
||||
xsk_ring_cons__cancel(&s->rx, n_rx - i - 1);
|
||||
n_rx = i + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Release actually sent descriptors and try to re-fill. */
|
||||
xsk_ring_cons__release(&s->rx, n_rx);
|
||||
af_xdp_fq_refill(s, AF_XDP_BATCH_SIZE);
|
||||
}
|
||||
|
||||
/* Flush and close. */
|
||||
static void af_xdp_cleanup(NetClientState *nc)
|
||||
{
|
||||
AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc);
|
||||
|
||||
qemu_purge_queued_packets(nc);
|
||||
|
||||
af_xdp_poll(nc, false);
|
||||
|
||||
xsk_socket__delete(s->xsk);
|
||||
s->xsk = NULL;
|
||||
g_free(s->pool);
|
||||
s->pool = NULL;
|
||||
xsk_umem__delete(s->umem);
|
||||
s->umem = NULL;
|
||||
qemu_vfree(s->buffer);
|
||||
s->buffer = NULL;
|
||||
|
||||
/* Remove the program if it's the last open queue. */
|
||||
if (!s->inhibit && nc->queue_index == s->n_queues - 1 && s->xdp_flags
|
||||
&& bpf_xdp_detach(s->ifindex, s->xdp_flags, NULL) != 0) {
|
||||
fprintf(stderr,
|
||||
"af-xdp: unable to remove XDP program from '%s', ifindex: %d\n",
|
||||
s->ifname, s->ifindex);
|
||||
}
|
||||
}
|
||||
|
||||
static int af_xdp_umem_create(AFXDPState *s, int sock_fd, Error **errp)
|
||||
{
|
||||
struct xsk_umem_config config = {
|
||||
.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
|
||||
.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
|
||||
.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
|
||||
.frame_headroom = 0,
|
||||
};
|
||||
uint64_t n_descs;
|
||||
uint64_t size;
|
||||
int64_t i;
|
||||
int ret;
|
||||
|
||||
/* Number of descriptors if all 4 queues (rx, tx, cq, fq) are full. */
|
||||
n_descs = (XSK_RING_PROD__DEFAULT_NUM_DESCS
|
||||
+ XSK_RING_CONS__DEFAULT_NUM_DESCS) * 2;
|
||||
size = n_descs * XSK_UMEM__DEFAULT_FRAME_SIZE;
|
||||
|
||||
s->buffer = qemu_memalign(qemu_real_host_page_size(), size);
|
||||
memset(s->buffer, 0, size);
|
||||
|
||||
if (sock_fd < 0) {
|
||||
ret = xsk_umem__create(&s->umem, s->buffer, size,
|
||||
&s->fq, &s->cq, &config);
|
||||
} else {
|
||||
ret = xsk_umem__create_with_fd(&s->umem, sock_fd, s->buffer, size,
|
||||
&s->fq, &s->cq, &config);
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
qemu_vfree(s->buffer);
|
||||
error_setg_errno(errp, errno,
|
||||
"failed to create umem for %s queue_index: %d",
|
||||
s->ifname, s->nc.queue_index);
|
||||
return -1;
|
||||
}
|
||||
|
||||
s->pool = g_new(uint64_t, n_descs);
|
||||
/* Fill the pool in the opposite order, because it's a LIFO queue. */
|
||||
for (i = n_descs; i >= 0; i--) {
|
||||
s->pool[i] = i * XSK_UMEM__DEFAULT_FRAME_SIZE;
|
||||
}
|
||||
s->n_pool = n_descs;
|
||||
|
||||
af_xdp_fq_refill(s, XSK_RING_PROD__DEFAULT_NUM_DESCS);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int af_xdp_socket_create(AFXDPState *s,
|
||||
const NetdevAFXDPOptions *opts, Error **errp)
|
||||
{
|
||||
struct xsk_socket_config cfg = {
|
||||
.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
|
||||
.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
|
||||
.libxdp_flags = 0,
|
||||
.bind_flags = XDP_USE_NEED_WAKEUP,
|
||||
.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST,
|
||||
};
|
||||
int queue_id, error = 0;
|
||||
|
||||
s->inhibit = opts->has_inhibit && opts->inhibit;
|
||||
if (s->inhibit) {
|
||||
cfg.libxdp_flags |= XSK_LIBXDP_FLAGS__INHIBIT_PROG_LOAD;
|
||||
}
|
||||
|
||||
if (opts->has_force_copy && opts->force_copy) {
|
||||
cfg.bind_flags |= XDP_COPY;
|
||||
}
|
||||
|
||||
queue_id = s->nc.queue_index;
|
||||
if (opts->has_start_queue && opts->start_queue > 0) {
|
||||
queue_id += opts->start_queue;
|
||||
}
|
||||
|
||||
if (opts->has_mode) {
|
||||
/* Specific mode requested. */
|
||||
cfg.xdp_flags |= (opts->mode == AFXDP_MODE_NATIVE)
|
||||
? XDP_FLAGS_DRV_MODE : XDP_FLAGS_SKB_MODE;
|
||||
if (xsk_socket__create(&s->xsk, s->ifname, queue_id,
|
||||
s->umem, &s->rx, &s->tx, &cfg)) {
|
||||
error = errno;
|
||||
}
|
||||
} else {
|
||||
/* No mode requested, try native first. */
|
||||
cfg.xdp_flags |= XDP_FLAGS_DRV_MODE;
|
||||
|
||||
if (xsk_socket__create(&s->xsk, s->ifname, queue_id,
|
||||
s->umem, &s->rx, &s->tx, &cfg)) {
|
||||
/* Can't use native mode, try skb. */
|
||||
cfg.xdp_flags &= ~XDP_FLAGS_DRV_MODE;
|
||||
cfg.xdp_flags |= XDP_FLAGS_SKB_MODE;
|
||||
|
||||
if (xsk_socket__create(&s->xsk, s->ifname, queue_id,
|
||||
s->umem, &s->rx, &s->tx, &cfg)) {
|
||||
error = errno;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (error) {
|
||||
error_setg_errno(errp, error,
|
||||
"failed to create AF_XDP socket for %s queue_id: %d",
|
||||
s->ifname, queue_id);
|
||||
return -1;
|
||||
}
|
||||
|
||||
s->xdp_flags = cfg.xdp_flags;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* NetClientInfo methods. */
|
||||
static NetClientInfo net_af_xdp_info = {
|
||||
.type = NET_CLIENT_DRIVER_AF_XDP,
|
||||
.size = sizeof(AFXDPState),
|
||||
.receive = af_xdp_receive,
|
||||
.poll = af_xdp_poll,
|
||||
.cleanup = af_xdp_cleanup,
|
||||
};
|
||||
|
||||
static int *parse_socket_fds(const char *sock_fds_str,
|
||||
int64_t n_expected, Error **errp)
|
||||
{
|
||||
gchar **substrings = g_strsplit(sock_fds_str, ":", -1);
|
||||
int64_t i, n_sock_fds = g_strv_length(substrings);
|
||||
int *sock_fds = NULL;
|
||||
|
||||
if (n_sock_fds != n_expected) {
|
||||
error_setg(errp, "expected %"PRIi64" socket fds, got %"PRIi64,
|
||||
n_expected, n_sock_fds);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
sock_fds = g_new(int, n_sock_fds);
|
||||
|
||||
for (i = 0; i < n_sock_fds; i++) {
|
||||
sock_fds[i] = monitor_fd_param(monitor_cur(), substrings[i], errp);
|
||||
if (sock_fds[i] < 0) {
|
||||
g_free(sock_fds);
|
||||
sock_fds = NULL;
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
exit:
|
||||
g_strfreev(substrings);
|
||||
return sock_fds;
|
||||
}
|
||||
|
||||
/*
|
||||
* The exported init function.
|
||||
*
|
||||
* ... -netdev af-xdp,ifname="..."
|
||||
*/
|
||||
int net_init_af_xdp(const Netdev *netdev,
|
||||
const char *name, NetClientState *peer, Error **errp)
|
||||
{
|
||||
const NetdevAFXDPOptions *opts = &netdev->u.af_xdp;
|
||||
NetClientState *nc, *nc0 = NULL;
|
||||
unsigned int ifindex;
|
||||
uint32_t prog_id = 0;
|
||||
int *sock_fds = NULL;
|
||||
int64_t i, queues;
|
||||
Error *err = NULL;
|
||||
AFXDPState *s;
|
||||
|
||||
ifindex = if_nametoindex(opts->ifname);
|
||||
if (!ifindex) {
|
||||
error_setg_errno(errp, errno, "failed to get ifindex for '%s'",
|
||||
opts->ifname);
|
||||
return -1;
|
||||
}
|
||||
|
||||
queues = opts->has_queues ? opts->queues : 1;
|
||||
if (queues < 1) {
|
||||
error_setg(errp, "invalid number of queues (%" PRIi64 ") for '%s'",
|
||||
queues, opts->ifname);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((opts->has_inhibit && opts->inhibit) != !!opts->sock_fds) {
|
||||
error_setg(errp, "'inhibit=on' requires 'sock-fds' and vice versa");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (opts->sock_fds) {
|
||||
sock_fds = parse_socket_fds(opts->sock_fds, queues, errp);
|
||||
if (!sock_fds) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < queues; i++) {
|
||||
nc = qemu_new_net_client(&net_af_xdp_info, peer, "af-xdp", name);
|
||||
qemu_set_info_str(nc, "af-xdp%"PRIi64" to %s", i, opts->ifname);
|
||||
nc->queue_index = i;
|
||||
|
||||
if (!nc0) {
|
||||
nc0 = nc;
|
||||
}
|
||||
|
||||
s = DO_UPCAST(AFXDPState, nc, nc);
|
||||
|
||||
pstrcpy(s->ifname, sizeof(s->ifname), opts->ifname);
|
||||
s->ifindex = ifindex;
|
||||
s->n_queues = queues;
|
||||
|
||||
if (af_xdp_umem_create(s, sock_fds ? sock_fds[i] : -1, errp)
|
||||
|| af_xdp_socket_create(s, opts, errp)) {
|
||||
/* Make sure the XDP program will be removed. */
|
||||
s->n_queues = i;
|
||||
error_propagate(errp, err);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
if (nc0) {
|
||||
s = DO_UPCAST(AFXDPState, nc, nc0);
|
||||
if (bpf_xdp_query_id(s->ifindex, s->xdp_flags, &prog_id) || !prog_id) {
|
||||
error_setg_errno(errp, errno,
|
||||
"no XDP program loaded on '%s', ifindex: %d",
|
||||
s->ifname, s->ifindex);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
af_xdp_read_poll(s, true); /* Initially only poll for reads. */
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
g_free(sock_fds);
|
||||
if (nc0) {
|
||||
qemu_del_net_client(nc0);
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
@ -64,6 +64,11 @@ int net_init_netmap(const Netdev *netdev, const char *name,
|
||||
NetClientState *peer, Error **errp);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_AF_XDP
|
||||
int net_init_af_xdp(const Netdev *netdev, const char *name,
|
||||
NetClientState *peer, Error **errp);
|
||||
#endif
|
||||
|
||||
int net_init_vhost_user(const Netdev *netdev, const char *name,
|
||||
NetClientState *peer, Error **errp);
|
||||
|
||||
|
@ -68,7 +68,7 @@ static ssize_t dump_receive_iov(DumpState *s, const struct iovec *iov, int cnt,
|
||||
int64_t ts;
|
||||
int caplen;
|
||||
size_t size = iov_size(iov, cnt) - offset;
|
||||
struct iovec dumpiov[cnt + 1];
|
||||
g_autofree struct iovec *dumpiov = g_new(struct iovec, cnt + 1);
|
||||
|
||||
/* Early return in case of previous error. */
|
||||
if (s->fd < 0) {
|
||||
|
@ -36,6 +36,9 @@ system_ss.add(when: vde, if_true: files('vde.c'))
|
||||
if have_netmap
|
||||
system_ss.add(files('netmap.c'))
|
||||
endif
|
||||
|
||||
system_ss.add(when: libxdp, if_true: files('af-xdp.c'))
|
||||
|
||||
if have_vhost_net_user
|
||||
system_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-user.c'), if_false: files('vhost-user-stub.c'))
|
||||
system_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-user-stub.c'))
|
||||
|
19
net/net.c
19
net/net.c
@ -495,6 +495,15 @@ bool qemu_has_ufo(NetClientState *nc)
|
||||
return nc->info->has_ufo(nc);
|
||||
}
|
||||
|
||||
bool qemu_has_uso(NetClientState *nc)
|
||||
{
|
||||
if (!nc || !nc->info->has_uso) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return nc->info->has_uso(nc);
|
||||
}
|
||||
|
||||
bool qemu_has_vnet_hdr(NetClientState *nc)
|
||||
{
|
||||
if (!nc || !nc->info->has_vnet_hdr) {
|
||||
@ -532,13 +541,13 @@ void qemu_using_vnet_hdr(NetClientState *nc, bool enable)
|
||||
}
|
||||
|
||||
void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
|
||||
int ecn, int ufo)
|
||||
int ecn, int ufo, int uso4, int uso6)
|
||||
{
|
||||
if (!nc || !nc->info->set_offload) {
|
||||
return;
|
||||
}
|
||||
|
||||
nc->info->set_offload(nc, csum, tso4, tso6, ecn, ufo);
|
||||
nc->info->set_offload(nc, csum, tso4, tso6, ecn, ufo, uso4, uso6);
|
||||
}
|
||||
|
||||
int qemu_get_vnet_hdr_len(NetClientState *nc)
|
||||
@ -1082,6 +1091,9 @@ static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])(
|
||||
#ifdef CONFIG_NETMAP
|
||||
[NET_CLIENT_DRIVER_NETMAP] = net_init_netmap,
|
||||
#endif
|
||||
#ifdef CONFIG_AF_XDP
|
||||
[NET_CLIENT_DRIVER_AF_XDP] = net_init_af_xdp,
|
||||
#endif
|
||||
#ifdef CONFIG_NET_BRIDGE
|
||||
[NET_CLIENT_DRIVER_BRIDGE] = net_init_bridge,
|
||||
#endif
|
||||
@ -1186,6 +1198,9 @@ void show_netdevs(void)
|
||||
#ifdef CONFIG_NETMAP
|
||||
"netmap",
|
||||
#endif
|
||||
#ifdef CONFIG_AF_XDP
|
||||
"af-xdp",
|
||||
#endif
|
||||
#ifdef CONFIG_POSIX
|
||||
"vhost-user",
|
||||
#endif
|
||||
|
@ -371,7 +371,7 @@ static void netmap_set_vnet_hdr_len(NetClientState *nc, int len)
|
||||
}
|
||||
|
||||
static void netmap_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
|
||||
int ecn, int ufo)
|
||||
int ecn, int ufo, int uso4, int uso6)
|
||||
{
|
||||
NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
|
||||
|
||||
|
@ -212,6 +212,11 @@ int tap_probe_has_ufo(int fd)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int tap_probe_has_uso(int fd)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int tap_probe_vnet_hdr_len(int fd, int len)
|
||||
{
|
||||
return 0;
|
||||
@ -232,7 +237,7 @@ int tap_fd_set_vnet_be(int fd, int is_be)
|
||||
}
|
||||
|
||||
void tap_fd_set_offload(int fd, int csum, int tso4,
|
||||
int tso6, int ecn, int ufo)
|
||||
int tso6, int ecn, int ufo, int uso4, int uso6)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -173,6 +173,18 @@ int tap_probe_has_ufo(int fd)
|
||||
return 1;
|
||||
}
|
||||
|
||||
int tap_probe_has_uso(int fd)
|
||||
{
|
||||
unsigned offload;
|
||||
|
||||
offload = TUN_F_CSUM | TUN_F_USO4 | TUN_F_USO6;
|
||||
|
||||
if (ioctl(fd, TUNSETOFFLOAD, offload) < 0) {
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Verify that we can assign given length */
|
||||
int tap_probe_vnet_hdr_len(int fd, int len)
|
||||
{
|
||||
@ -237,7 +249,7 @@ int tap_fd_set_vnet_be(int fd, int is_be)
|
||||
}
|
||||
|
||||
void tap_fd_set_offload(int fd, int csum, int tso4,
|
||||
int tso6, int ecn, int ufo)
|
||||
int tso6, int ecn, int ufo, int uso4, int uso6)
|
||||
{
|
||||
unsigned int offload = 0;
|
||||
|
||||
@ -256,8 +268,16 @@ void tap_fd_set_offload(int fd, int csum, int tso4,
|
||||
offload |= TUN_F_TSO_ECN;
|
||||
if (ufo)
|
||||
offload |= TUN_F_UFO;
|
||||
if (uso4) {
|
||||
offload |= TUN_F_USO4;
|
||||
}
|
||||
if (uso6) {
|
||||
offload |= TUN_F_USO6;
|
||||
}
|
||||
}
|
||||
|
||||
if (ioctl(fd, TUNSETOFFLOAD, offload) != 0) {
|
||||
offload &= ~(TUN_F_USO4 | TUN_F_USO6);
|
||||
if (ioctl(fd, TUNSETOFFLOAD, offload) != 0) {
|
||||
offload &= ~TUN_F_UFO;
|
||||
if (ioctl(fd, TUNSETOFFLOAD, offload) != 0) {
|
||||
@ -266,6 +286,7 @@ void tap_fd_set_offload(int fd, int csum, int tso4,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Enable a specific queue of tap. */
|
||||
int tap_fd_enable(int fd)
|
||||
|
@ -50,5 +50,7 @@
|
||||
#define TUN_F_TSO6 0x04 /* I can handle TSO for IPv6 packets */
|
||||
#define TUN_F_TSO_ECN 0x08 /* I can handle TSO with ECN bits. */
|
||||
#define TUN_F_UFO 0x10 /* I can handle UFO packets */
|
||||
#define TUN_F_USO4 0x20 /* I can handle USO for IPv4 packets */
|
||||
#define TUN_F_USO6 0x40 /* I can handle USO for IPv6 packets */
|
||||
|
||||
#endif /* QEMU_TAP_LINUX_H */
|
||||
|
@ -216,6 +216,11 @@ int tap_probe_has_ufo(int fd)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int tap_probe_has_uso(int fd)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int tap_probe_vnet_hdr_len(int fd, int len)
|
||||
{
|
||||
return 0;
|
||||
@ -236,7 +241,7 @@ int tap_fd_set_vnet_be(int fd, int is_be)
|
||||
}
|
||||
|
||||
void tap_fd_set_offload(int fd, int csum, int tso4,
|
||||
int tso6, int ecn, int ufo)
|
||||
int tso6, int ecn, int ufo, int uso4, int uso6)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -47,6 +47,11 @@ int tap_probe_has_ufo(int fd)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int tap_probe_has_uso(int fd)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int tap_probe_vnet_hdr_len(int fd, int len)
|
||||
{
|
||||
return 0;
|
||||
@ -67,7 +72,7 @@ int tap_fd_set_vnet_be(int fd, int is_be)
|
||||
}
|
||||
|
||||
void tap_fd_set_offload(int fd, int csum, int tso4,
|
||||
int tso6, int ecn, int ufo)
|
||||
int tso6, int ecn, int ufo, int uso4, int uso6)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -741,7 +741,7 @@ static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
|
||||
}
|
||||
|
||||
static void tap_set_offload(NetClientState *nc, int csum, int tso4,
|
||||
int tso6, int ecn, int ufo)
|
||||
int tso6, int ecn, int ufo, int uso4, int uso6)
|
||||
{
|
||||
}
|
||||
|
||||
|
21
net/tap.c
21
net/tap.c
@ -57,6 +57,7 @@ typedef struct TAPState {
|
||||
bool write_poll;
|
||||
bool using_vnet_hdr;
|
||||
bool has_ufo;
|
||||
bool has_uso;
|
||||
bool enabled;
|
||||
VHostNetState *vhost_net;
|
||||
unsigned host_vnet_hdr_len;
|
||||
@ -117,10 +118,11 @@ static ssize_t tap_receive_iov(NetClientState *nc, const struct iovec *iov,
|
||||
{
|
||||
TAPState *s = DO_UPCAST(TAPState, nc, nc);
|
||||
const struct iovec *iovp = iov;
|
||||
struct iovec iov_copy[iovcnt + 1];
|
||||
g_autofree struct iovec *iov_copy = NULL;
|
||||
struct virtio_net_hdr_mrg_rxbuf hdr = { };
|
||||
|
||||
if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
|
||||
iov_copy = g_new(struct iovec, iovcnt + 1);
|
||||
iov_copy[0].iov_base = &hdr;
|
||||
iov_copy[0].iov_len = s->host_vnet_hdr_len;
|
||||
memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
|
||||
@ -237,6 +239,15 @@ static bool tap_has_ufo(NetClientState *nc)
|
||||
return s->has_ufo;
|
||||
}
|
||||
|
||||
static bool tap_has_uso(NetClientState *nc)
|
||||
{
|
||||
TAPState *s = DO_UPCAST(TAPState, nc, nc);
|
||||
|
||||
assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
|
||||
|
||||
return s->has_uso;
|
||||
}
|
||||
|
||||
static bool tap_has_vnet_hdr(NetClientState *nc)
|
||||
{
|
||||
TAPState *s = DO_UPCAST(TAPState, nc, nc);
|
||||
@ -307,14 +318,14 @@ static int tap_set_vnet_be(NetClientState *nc, bool is_be)
|
||||
}
|
||||
|
||||
static void tap_set_offload(NetClientState *nc, int csum, int tso4,
|
||||
int tso6, int ecn, int ufo)
|
||||
int tso6, int ecn, int ufo, int uso4, int uso6)
|
||||
{
|
||||
TAPState *s = DO_UPCAST(TAPState, nc, nc);
|
||||
if (s->fd < 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo);
|
||||
tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo, uso4, uso6);
|
||||
}
|
||||
|
||||
static void tap_exit_notify(Notifier *notifier, void *data)
|
||||
@ -384,6 +395,7 @@ static NetClientInfo net_tap_info = {
|
||||
.poll = tap_poll,
|
||||
.cleanup = tap_cleanup,
|
||||
.has_ufo = tap_has_ufo,
|
||||
.has_uso = tap_has_uso,
|
||||
.has_vnet_hdr = tap_has_vnet_hdr,
|
||||
.has_vnet_hdr_len = tap_has_vnet_hdr_len,
|
||||
.get_using_vnet_hdr = tap_get_using_vnet_hdr,
|
||||
@ -413,8 +425,9 @@ static TAPState *net_tap_fd_init(NetClientState *peer,
|
||||
s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
|
||||
s->using_vnet_hdr = false;
|
||||
s->has_ufo = tap_probe_has_ufo(s->fd);
|
||||
s->has_uso = tap_probe_has_uso(s->fd);
|
||||
s->enabled = true;
|
||||
tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
|
||||
tap_set_offload(&s->nc, 0, 0, 0, 0, 0, 0, 0);
|
||||
/*
|
||||
* Make sure host header length is set correctly in tap:
|
||||
* it might have been modified by another instance of qemu.
|
||||
|
@ -37,7 +37,9 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp);
|
||||
int tap_probe_vnet_hdr(int fd, Error **errp);
|
||||
int tap_probe_vnet_hdr_len(int fd, int len);
|
||||
int tap_probe_has_ufo(int fd);
|
||||
void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo);
|
||||
int tap_probe_has_uso(int fd);
|
||||
void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo,
|
||||
int uso4, int uso6);
|
||||
void tap_fd_set_vnet_hdr_len(int fd, int len);
|
||||
int tap_fd_set_vnet_le(int fd, int vnet_is_le);
|
||||
int tap_fd_set_vnet_be(int fd, int vnet_is_be);
|
||||
|
@ -75,11 +75,14 @@ const int vdpa_feature_bits[] = {
|
||||
VIRTIO_NET_F_GUEST_TSO4,
|
||||
VIRTIO_NET_F_GUEST_TSO6,
|
||||
VIRTIO_NET_F_GUEST_UFO,
|
||||
VIRTIO_NET_F_GUEST_USO4,
|
||||
VIRTIO_NET_F_GUEST_USO6,
|
||||
VIRTIO_NET_F_HASH_REPORT,
|
||||
VIRTIO_NET_F_HOST_ECN,
|
||||
VIRTIO_NET_F_HOST_TSO4,
|
||||
VIRTIO_NET_F_HOST_TSO6,
|
||||
VIRTIO_NET_F_HOST_UFO,
|
||||
VIRTIO_NET_F_HOST_USO,
|
||||
VIRTIO_NET_F_MQ,
|
||||
VIRTIO_NET_F_MRG_RXBUF,
|
||||
VIRTIO_NET_F_MTU,
|
||||
|
@ -408,6 +408,60 @@
|
||||
'ifname': 'str',
|
||||
'*devname': 'str' } }
|
||||
|
||||
##
|
||||
# @AFXDPMode:
|
||||
#
|
||||
# Attach mode for a default XDP program
|
||||
#
|
||||
# @skb: generic mode, no driver support necessary
|
||||
#
|
||||
# @native: DRV mode, program is attached to a driver, packets are passed to
|
||||
# the socket without allocation of skb.
|
||||
#
|
||||
# Since: 8.2
|
||||
##
|
||||
{ 'enum': 'AFXDPMode',
|
||||
'data': [ 'native', 'skb' ],
|
||||
'if': 'CONFIG_AF_XDP' }
|
||||
|
||||
##
|
||||
# @NetdevAFXDPOptions:
|
||||
#
|
||||
# AF_XDP network backend
|
||||
#
|
||||
# @ifname: The name of an existing network interface.
|
||||
#
|
||||
# @mode: Attach mode for a default XDP program. If not specified, then
|
||||
# 'native' will be tried first, then 'skb'.
|
||||
#
|
||||
# @force-copy: Force XDP copy mode even if device supports zero-copy.
|
||||
# (default: false)
|
||||
#
|
||||
# @queues: number of queues to be used for multiqueue interfaces (default: 1).
|
||||
#
|
||||
# @start-queue: Use @queues starting from this queue number (default: 0).
|
||||
#
|
||||
# @inhibit: Don't load a default XDP program, use one already loaded to
|
||||
# the interface (default: false). Requires @sock-fds.
|
||||
#
|
||||
# @sock-fds: A colon (:) separated list of file descriptors for already open
|
||||
# but not bound AF_XDP sockets in the queue order. One fd per queue.
|
||||
# These descriptors should already be added into XDP socket map for
|
||||
# corresponding queues. Requires @inhibit.
|
||||
#
|
||||
# Since: 8.2
|
||||
##
|
||||
{ 'struct': 'NetdevAFXDPOptions',
|
||||
'data': {
|
||||
'ifname': 'str',
|
||||
'*mode': 'AFXDPMode',
|
||||
'*force-copy': 'bool',
|
||||
'*queues': 'int',
|
||||
'*start-queue': 'int',
|
||||
'*inhibit': 'bool',
|
||||
'*sock-fds': 'str' },
|
||||
'if': 'CONFIG_AF_XDP' }
|
||||
|
||||
##
|
||||
# @NetdevVhostUserOptions:
|
||||
#
|
||||
@ -642,6 +696,7 @@
|
||||
# @vmnet-bridged: since 7.1
|
||||
# @stream: since 7.2
|
||||
# @dgram: since 7.2
|
||||
# @af-xdp: since 8.2
|
||||
#
|
||||
# Since: 2.7
|
||||
##
|
||||
@ -649,6 +704,7 @@
|
||||
'data': [ 'none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'stream',
|
||||
'dgram', 'vde', 'bridge', 'hubport', 'netmap', 'vhost-user',
|
||||
'vhost-vdpa',
|
||||
{ 'name': 'af-xdp', 'if': 'CONFIG_AF_XDP' },
|
||||
{ 'name': 'vmnet-host', 'if': 'CONFIG_VMNET' },
|
||||
{ 'name': 'vmnet-shared', 'if': 'CONFIG_VMNET' },
|
||||
{ 'name': 'vmnet-bridged', 'if': 'CONFIG_VMNET' }] }
|
||||
@ -679,6 +735,8 @@
|
||||
'bridge': 'NetdevBridgeOptions',
|
||||
'hubport': 'NetdevHubPortOptions',
|
||||
'netmap': 'NetdevNetmapOptions',
|
||||
'af-xdp': { 'type': 'NetdevAFXDPOptions',
|
||||
'if': 'CONFIG_AF_XDP' },
|
||||
'vhost-user': 'NetdevVhostUserOptions',
|
||||
'vhost-vdpa': 'NetdevVhostVDPAOptions',
|
||||
'vmnet-host': { 'type': 'NetdevVmnetHostOptions',
|
||||
|
@ -2882,6 +2882,19 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
|
||||
" VALE port (created on the fly) called 'name' ('nmname' is name of the \n"
|
||||
" netmap device, defaults to '/dev/netmap')\n"
|
||||
#endif
|
||||
#ifdef CONFIG_AF_XDP
|
||||
"-netdev af-xdp,id=str,ifname=name[,mode=native|skb][,force-copy=on|off]\n"
|
||||
" [,queues=n][,start-queue=m][,inhibit=on|off][,sock-fds=x:y:...:z]\n"
|
||||
" attach to the existing network interface 'name' with AF_XDP socket\n"
|
||||
" use 'mode=MODE' to specify an XDP program attach mode\n"
|
||||
" use 'force-copy=on|off' to force XDP copy mode even if device supports zero-copy (default: off)\n"
|
||||
" use 'inhibit=on|off' to inhibit loading of a default XDP program (default: off)\n"
|
||||
" with inhibit=on,\n"
|
||||
" use 'sock-fds' to provide file descriptors for already open AF_XDP sockets\n"
|
||||
" added to a socket map in XDP program. One socket per queue.\n"
|
||||
" use 'queues=n' to specify how many queues of a multiqueue interface should be used\n"
|
||||
" use 'start-queue=m' to specify the first queue that should be used\n"
|
||||
#endif
|
||||
#ifdef CONFIG_POSIX
|
||||
"-netdev vhost-user,id=str,chardev=dev[,vhostforce=on|off]\n"
|
||||
" configure a vhost-user network, backed by a chardev 'dev'\n"
|
||||
@ -2927,6 +2940,9 @@ DEF("nic", HAS_ARG, QEMU_OPTION_nic,
|
||||
#ifdef CONFIG_NETMAP
|
||||
"netmap|"
|
||||
#endif
|
||||
#ifdef CONFIG_AF_XDP
|
||||
"af-xdp|"
|
||||
#endif
|
||||
#ifdef CONFIG_POSIX
|
||||
"vhost-user|"
|
||||
#endif
|
||||
@ -2955,6 +2971,9 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
|
||||
#ifdef CONFIG_NETMAP
|
||||
"netmap|"
|
||||
#endif
|
||||
#ifdef CONFIG_AF_XDP
|
||||
"af-xdp|"
|
||||
#endif
|
||||
#ifdef CONFIG_VMNET
|
||||
"vmnet-host|vmnet-shared|vmnet-bridged|"
|
||||
#endif
|
||||
@ -2962,7 +2981,7 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
|
||||
" old way to initialize a host network interface\n"
|
||||
" (use the -netdev option if possible instead)\n", QEMU_ARCH_ALL)
|
||||
SRST
|
||||
``-nic [tap|bridge|user|l2tpv3|vde|netmap|vhost-user|socket][,...][,mac=macaddr][,model=mn]``
|
||||
``-nic [tap|bridge|user|l2tpv3|vde|netmap|af-xdp|vhost-user|socket][,...][,mac=macaddr][,model=mn]``
|
||||
This option is a shortcut for configuring both the on-board
|
||||
(default) guest NIC hardware and the host network backend in one go.
|
||||
The host backend options are the same as with the corresponding
|
||||
@ -3376,6 +3395,55 @@ SRST
|
||||
# launch QEMU instance
|
||||
|qemu_system| linux.img -nic vde,sock=/tmp/myswitch
|
||||
|
||||
``-netdev af-xdp,id=str,ifname=name[,mode=native|skb][,force-copy=on|off][,queues=n][,start-queue=m][,inhibit=on|off][,sock-fds=x:y:...:z]``
|
||||
Configure AF_XDP backend to connect to a network interface 'name'
|
||||
using AF_XDP socket. A specific program attach mode for a default
|
||||
XDP program can be forced with 'mode', defaults to best-effort,
|
||||
where the likely most performant mode will be in use. Number of queues
|
||||
'n' should generally match the number or queues in the interface,
|
||||
defaults to 1. Traffic arriving on non-configured device queues will
|
||||
not be delivered to the network backend.
|
||||
|
||||
.. parsed-literal::
|
||||
|
||||
# set number of queues to 4
|
||||
ethtool -L eth0 combined 4
|
||||
# launch QEMU instance
|
||||
|qemu_system| linux.img -device virtio-net-pci,netdev=n1 \\
|
||||
-netdev af-xdp,id=n1,ifname=eth0,queues=4
|
||||
|
||||
'start-queue' option can be specified if a particular range of queues
|
||||
[m, m + n] should be in use. For example, this is may be necessary in
|
||||
order to use certain NICs in native mode. Kernel allows the driver to
|
||||
create a separate set of XDP queues on top of regular ones, and only
|
||||
these queues can be used for AF_XDP sockets. NICs that work this way
|
||||
may also require an additional traffic redirection with ethtool to these
|
||||
special queues.
|
||||
|
||||
.. parsed-literal::
|
||||
|
||||
# set number of queues to 1
|
||||
ethtool -L eth0 combined 1
|
||||
# redirect all the traffic to the second queue (id: 1)
|
||||
# note: drivers may require non-empty key/mask pair.
|
||||
ethtool -N eth0 flow-type ether \\
|
||||
dst 00:00:00:00:00:00 m FF:FF:FF:FF:FF:FE action 1
|
||||
ethtool -N eth0 flow-type ether \\
|
||||
dst 00:00:00:00:00:01 m FF:FF:FF:FF:FF:FE action 1
|
||||
# launch QEMU instance
|
||||
|qemu_system| linux.img -device virtio-net-pci,netdev=n1 \\
|
||||
-netdev af-xdp,id=n1,ifname=eth0,queues=1,start-queue=1
|
||||
|
||||
XDP program can also be loaded externally. In this case 'inhibit' option
|
||||
should be set to 'on' and 'sock-fds' provided with file descriptors for
|
||||
already open but not bound XDP sockets already added to a socket map for
|
||||
corresponding queues. One socket per queue.
|
||||
|
||||
.. parsed-literal::
|
||||
|
||||
|qemu_system| linux.img -device virtio-net-pci,netdev=n1 \\
|
||||
-netdev af-xdp,id=n1,ifname=eth0,queues=3,inhibit=on,sock-fds=15:16:17
|
||||
|
||||
``-netdev vhost-user,chardev=id[,vhostforce=on|off][,queues=n]``
|
||||
Establish a vhost-user netdev, backed by a chardev id. The chardev
|
||||
should be a unix domain socket backed one. The vhost-user uses a
|
||||
|
@ -35,6 +35,7 @@
|
||||
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
|
||||
--with-coroutine=ucontext \
|
||||
--tls-priority=@QEMU,SYSTEM \
|
||||
--disable-af-xdp \
|
||||
--disable-attr \
|
||||
--disable-auth-pam \
|
||||
--disable-avx2 \
|
||||
|
@ -76,6 +76,7 @@ meson_options_help() {
|
||||
printf "%s\n" 'disabled with --disable-FEATURE, default is enabled if available'
|
||||
printf "%s\n" '(unless built with --without-default-features):'
|
||||
printf "%s\n" ''
|
||||
printf "%s\n" ' af-xdp AF_XDP network backend support'
|
||||
printf "%s\n" ' alsa ALSA sound support'
|
||||
printf "%s\n" ' attr attr/xattr support'
|
||||
printf "%s\n" ' auth-pam PAM access control'
|
||||
@ -208,6 +209,8 @@ meson_options_help() {
|
||||
}
|
||||
_meson_option_parse() {
|
||||
case $1 in
|
||||
--enable-af-xdp) printf "%s" -Daf_xdp=enabled ;;
|
||||
--disable-af-xdp) printf "%s" -Daf_xdp=disabled ;;
|
||||
--enable-alsa) printf "%s" -Dalsa=enabled ;;
|
||||
--disable-alsa) printf "%s" -Dalsa=disabled ;;
|
||||
--enable-attr) printf "%s" -Dattr=enabled ;;
|
||||
|
@ -59,6 +59,7 @@ RUN apk update && \
|
||||
libtasn1-dev \
|
||||
liburing-dev \
|
||||
libusb-dev \
|
||||
libxdp-dev \
|
||||
linux-pam-dev \
|
||||
llvm \
|
||||
lttng-ust-dev \
|
||||
|
@ -75,6 +75,7 @@ RUN dnf distro-sync -y && \
|
||||
libubsan \
|
||||
liburing-devel \
|
||||
libusbx-devel \
|
||||
libxdp-devel \
|
||||
libzstd-devel \
|
||||
llvm \
|
||||
lttng-ust-devel \
|
||||
|
@ -84,7 +84,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
|
||||
g++-x86-64-linux-gnu \
|
||||
gcc-x86-64-linux-gnu \
|
||||
libaio-dev:amd64 \
|
||||
libasan5:amd64 \
|
||||
libasan6:amd64 \
|
||||
libasound2-dev:amd64 \
|
||||
libattr1-dev:amd64 \
|
||||
libbpf-dev:amd64 \
|
||||
|
@ -32,7 +32,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
|
||||
git \
|
||||
hostname \
|
||||
libaio-dev \
|
||||
libasan5 \
|
||||
libasan6 \
|
||||
libasound2-dev \
|
||||
libattr1-dev \
|
||||
libbpf-dev \
|
||||
|
@ -84,7 +84,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
|
||||
g++-aarch64-linux-gnu \
|
||||
gcc-aarch64-linux-gnu \
|
||||
libaio-dev:arm64 \
|
||||
libasan5:arm64 \
|
||||
libasan6:arm64 \
|
||||
libasound2-dev:arm64 \
|
||||
libattr1-dev:arm64 \
|
||||
libbpf-dev:arm64 \
|
||||
|
@ -84,7 +84,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
|
||||
g++-arm-linux-gnueabi \
|
||||
gcc-arm-linux-gnueabi \
|
||||
libaio-dev:armel \
|
||||
libasan5:armel \
|
||||
libasan6:armel \
|
||||
libasound2-dev:armel \
|
||||
libattr1-dev:armel \
|
||||
libbpf-dev:armel \
|
||||
|
@ -84,7 +84,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
|
||||
g++-arm-linux-gnueabihf \
|
||||
gcc-arm-linux-gnueabihf \
|
||||
libaio-dev:armhf \
|
||||
libasan5:armhf \
|
||||
libasan6:armhf \
|
||||
libasound2-dev:armhf \
|
||||
libattr1-dev:armhf \
|
||||
libbpf-dev:armhf \
|
||||
|
@ -84,7 +84,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
|
||||
g++-powerpc64le-linux-gnu \
|
||||
gcc-powerpc64le-linux-gnu \
|
||||
libaio-dev:ppc64el \
|
||||
libasan5:ppc64el \
|
||||
libasan6:ppc64el \
|
||||
libasound2-dev:ppc64el \
|
||||
libattr1-dev:ppc64el \
|
||||
libbpf-dev:ppc64el \
|
||||
|
@ -84,7 +84,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
|
||||
g++-s390x-linux-gnu \
|
||||
gcc-s390x-linux-gnu \
|
||||
libaio-dev:s390x \
|
||||
libasan5:s390x \
|
||||
libasan6:s390x \
|
||||
libasound2-dev:s390x \
|
||||
libattr1-dev:s390x \
|
||||
libbpf-dev:s390x \
|
||||
|
@ -82,6 +82,7 @@ exec "$@"\n' > /usr/bin/nosync && \
|
||||
libubsan \
|
||||
liburing-devel \
|
||||
libusbx-devel \
|
||||
libxdp-devel \
|
||||
libzstd-devel \
|
||||
llvm \
|
||||
lttng-ust-devel \
|
||||
|
@ -40,7 +40,7 @@ RUN zypper update -y && \
|
||||
libSDL2-devel \
|
||||
libSDL2_image-devel \
|
||||
libaio-devel \
|
||||
libasan6 \
|
||||
libasan8 \
|
||||
libattr-devel \
|
||||
libbpf-devel \
|
||||
libbz2-devel \
|
||||
|
@ -32,7 +32,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
|
||||
git \
|
||||
hostname \
|
||||
libaio-dev \
|
||||
libasan5 \
|
||||
libasan6 \
|
||||
libasound2-dev \
|
||||
libattr1-dev \
|
||||
libbrlapi-dev \
|
||||
|
@ -32,7 +32,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
|
||||
git \
|
||||
hostname \
|
||||
libaio-dev \
|
||||
libasan5 \
|
||||
libasan6 \
|
||||
libasound2-dev \
|
||||
libattr1-dev \
|
||||
libbpf-dev \
|
||||
|
@ -1 +1 @@
|
||||
Subproject commit bbd55b4d18cce8f89b5167675e434a6941315634
|
||||
Subproject commit 5f84a21881577a5fb56cc956f6fe4e2abd6fcff0
|
@ -69,6 +69,7 @@ packages:
|
||||
- liburing
|
||||
- libusbx
|
||||
- libvdeplug
|
||||
- libxdp
|
||||
- libzstd
|
||||
- llvm
|
||||
- lttng-ust
|
||||
|
@ -109,6 +109,11 @@ static void igb_pci_start_hw(QOSGraphObject *obj)
|
||||
E1000_RAH_AV | E1000_RAH_POOL_1 |
|
||||
le16_to_cpu(*(uint16_t *)(address + 4)));
|
||||
|
||||
/* Set supported receive descriptor mode */
|
||||
e1000e_macreg_write(&d->e1000e,
|
||||
E1000_SRRCTL(0),
|
||||
E1000_SRRCTL_DESCTYPE_ADV_ONEBUF);
|
||||
|
||||
/* Enable receive */
|
||||
e1000e_macreg_write(&d->e1000e, E1000_RFCTL, E1000_RFCTL_EXTEN);
|
||||
e1000e_macreg_write(&d->e1000e, E1000_RCTL, E1000_RCTL_EN);
|
||||
|
Loading…
Reference in New Issue
Block a user