-----BEGIN PGP SIGNATURE-----

iQEzBAABCAAdFiEEIV1G9IJGaJ7HfzVi7wSWWzmNYhEFAmZewo4ACgkQ7wSWWzmN
 YhHhxgf/ZaECxru4fP8wi34XdSG/PR+BF+W5M9gZIRGrHg3vIf3/LRTpZTDccbRN
 Qpwtypr9O6/AWG9Os80rn7alsmMDxN8PDDNLa9T3wf5pJUQSyQ87Yy0MiuTNPSKD
 HKYUIfIlbFCM5WUW4huMmg98gKTgnzZMqOoRyMFZitbkR59qCm+Exws4HtXvCH68
 3k4lgvnFccmzO9iIzaOUIPs+Yf04Kw/FrY0Q/6nypvqbF2W80Md6w02JMQuTLwdF
 Guxeg/n6g0NLvCBbkjiM2VWfTaWJYbwFSwRTAMxM/geqh7qAgGsmD0N5lPlgqRDy
 uAy2GvFyrwzcD0lYqf0/fRK0Go0HPA==
 =J70K
 -----END PGP SIGNATURE-----

Merge tag 'net-pull-request' of https://github.com/jasowang/qemu into staging

# -----BEGIN PGP SIGNATURE-----
#
# iQEzBAABCAAdFiEEIV1G9IJGaJ7HfzVi7wSWWzmNYhEFAmZewo4ACgkQ7wSWWzmN
# YhHhxgf/ZaECxru4fP8wi34XdSG/PR+BF+W5M9gZIRGrHg3vIf3/LRTpZTDccbRN
# Qpwtypr9O6/AWG9Os80rn7alsmMDxN8PDDNLa9T3wf5pJUQSyQ87Yy0MiuTNPSKD
# HKYUIfIlbFCM5WUW4huMmg98gKTgnzZMqOoRyMFZitbkR59qCm+Exws4HtXvCH68
# 3k4lgvnFccmzO9iIzaOUIPs+Yf04Kw/FrY0Q/6nypvqbF2W80Md6w02JMQuTLwdF
# Guxeg/n6g0NLvCBbkjiM2VWfTaWJYbwFSwRTAMxM/geqh7qAgGsmD0N5lPlgqRDy
# uAy2GvFyrwzcD0lYqf0/fRK0Go0HPA==
# =J70K
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 04 Jun 2024 02:30:22 AM CDT
# gpg:                using RSA key 215D46F48246689EC77F3562EF04965B398D6211
# gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>" [undefined]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 215D 46F4 8246 689E C77F  3562 EF04 965B 398D 6211

* tag 'net-pull-request' of https://github.com/jasowang/qemu:
  ebpf: Added traces back. Changed source set for eBPF to 'system'.
  virtio-net: drop too short packets early
  ebpf: Add a separate target for skeleton
  ebpf: Refactor tun_rss_steering_prog()
  ebpf: Return 0 when configuration fails
  ebpf: Fix RSS error handling
  virtio-net: Do not write hashes to peer buffer
  virtio-net: Always set populate_hash
  virtio-net: Unify the logic to update NIC state for RSS
  virtio-net: Disable RSS on reset
  virtio-net: Shrink header byte swapping buffer
  virtio-net: Copy header only when necessary
  virtio-net: Add only one queue pair when realizing
  virtio-net: Do not propagate ebpf-rss-fds errors
  tap: Shrink zeroed virtio-net header
  tap: Call tap_receive_iov() from tap_receive()
  net: Remove receive_raw()
  net: Move virtio-net header length assertion
  tap: Remove qemu_using_vnet_hdr()
  tap: Remove tap_probe_vnet_hdr_len()

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2024-06-04 12:28:57 -05:00
commit 6e47f7cfcd
20 changed files with 974 additions and 1130 deletions

View File

@ -25,6 +25,8 @@
#include "ebpf/rss.bpf.skeleton.h"
#include "ebpf/ebpf.h"
#include "trace.h"
void ebpf_rss_init(struct EBPFRSSContext *ctx)
{
if (ctx != NULL) {
@ -55,18 +57,21 @@ static bool ebpf_rss_mmap(struct EBPFRSSContext *ctx)
PROT_READ | PROT_WRITE, MAP_SHARED,
ctx->map_configuration, 0);
if (ctx->mmap_configuration == MAP_FAILED) {
trace_ebpf_error("eBPF RSS", "can not mmap eBPF configuration array");
return false;
}
ctx->mmap_toeplitz_key = mmap(NULL, qemu_real_host_page_size(),
PROT_READ | PROT_WRITE, MAP_SHARED,
ctx->map_toeplitz_key, 0);
if (ctx->mmap_toeplitz_key == MAP_FAILED) {
trace_ebpf_error("eBPF RSS", "can not mmap eBPF toeplitz key");
goto toeplitz_fail;
}
ctx->mmap_indirections_table = mmap(NULL, qemu_real_host_page_size(),
PROT_READ | PROT_WRITE, MAP_SHARED,
ctx->map_indirections_table, 0);
if (ctx->mmap_indirections_table == MAP_FAILED) {
trace_ebpf_error("eBPF RSS", "can not mmap eBPF indirection table");
goto indirection_fail;
}
@ -108,12 +113,14 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx)
rss_bpf_ctx = rss_bpf__open();
if (rss_bpf_ctx == NULL) {
trace_ebpf_error("eBPF RSS", "can not open eBPF RSS object");
goto error;
}
bpf_program__set_type(rss_bpf_ctx->progs.tun_rss_steering_prog, BPF_PROG_TYPE_SOCKET_FILTER);
if (rss_bpf__load(rss_bpf_ctx)) {
trace_ebpf_error("eBPF RSS", "can not load RSS program");
goto error;
}

File diff suppressed because it is too large Load Diff

1
ebpf/trace.h Normal file
View File

@ -0,0 +1 @@
#include "trace/trace-ebpf.h"

View File

@ -352,7 +352,6 @@ e1000e_init_net_peer(E1000EState *s, PCIDevice *pci_dev, uint8_t *macaddr)
for (i = 0; i < s->conf.peers.queues; i++) {
nc = qemu_get_subqueue(s->nic, i);
qemu_set_vnet_hdr_len(nc->peer, sizeof(struct virtio_net_hdr));
qemu_using_vnet_hdr(nc->peer, true);
}
}

View File

@ -349,7 +349,6 @@ igb_init_net_peer(IGBState *s, PCIDevice *pci_dev, uint8_t *macaddr)
for (i = 0; i < s->conf.peers.queues; i++) {
nc = qemu_get_subqueue(s->nic, i);
qemu_set_vnet_hdr_len(nc->peer, sizeof(struct virtio_net_hdr));
qemu_using_vnet_hdr(nc->peer, true);
}
}

View File

@ -582,7 +582,7 @@ static void net_tx_pkt_sendv(
{
NetClientState *nc = opaque;
if (qemu_get_using_vnet_hdr(nc->peer)) {
if (qemu_get_vnet_hdr_len(nc->peer)) {
qemu_sendv_packet(nc, virt_iov, virt_iov_cnt);
} else {
qemu_sendv_packet(nc, iov, iov_cnt);
@ -812,7 +812,7 @@ static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt,
bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc)
{
bool offload = qemu_get_using_vnet_hdr(nc->peer);
bool offload = qemu_get_vnet_hdr_len(nc->peer);
return net_tx_pkt_send_custom(pkt, offload, net_tx_pkt_sendv, nc);
}

View File

@ -360,7 +360,8 @@ static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
* can't do it, we fallback onto fixing the headers in the core
* virtio-net code.
*/
n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
n->needs_vnet_hdr_swap = n->has_vnet_hdr &&
virtio_net_set_vnet_endian(vdev, n->nic->ncs,
queue_pairs, true);
} else if (virtio_net_started(n, vdev->status)) {
/* After using the device, we need to reset the network backend to
@ -599,40 +600,6 @@ static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
}
}
static void virtio_net_reset(VirtIODevice *vdev)
{
VirtIONet *n = VIRTIO_NET(vdev);
int i;
/* Reset back to compatibility mode */
n->promisc = 1;
n->allmulti = 0;
n->alluni = 0;
n->nomulti = 0;
n->nouni = 0;
n->nobcast = 0;
/* multiqueue is disabled by default */
n->curr_queue_pairs = 1;
timer_del(n->announce_timer.tm);
n->announce_timer.round = 0;
n->status &= ~VIRTIO_NET_S_ANNOUNCE;
/* Flush any MAC and VLAN filter table state */
n->mac_table.in_use = 0;
n->mac_table.first_multi = 0;
n->mac_table.multi_overflow = 0;
n->mac_table.uni_overflow = 0;
memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
memset(n->vlans, 0, MAX_VLAN >> 3);
/* Flush any async TX */
for (i = 0; i < n->max_queue_pairs; i++) {
flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
}
}
static void peer_test_vnet_hdr(VirtIONet *n)
{
NetClientState *nc = qemu_get_queue(n->nic);
@ -675,11 +642,6 @@ static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
n->mergeable_rx_bufs = mergeable_rx_bufs;
/*
* Note: when extending the vnet header, please make sure to
* change the vnet header copying logic in virtio_net_flush_tx()
* as well.
*/
if (version_1) {
n->guest_hdr_len = hash_report ?
sizeof(struct virtio_net_hdr_v1_hash) :
@ -689,6 +651,7 @@ static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
n->guest_hdr_len = n->mergeable_rx_bufs ?
sizeof(struct virtio_net_hdr_mrg_rxbuf) :
sizeof(struct virtio_net_hdr);
n->rss_data.populate_hash = false;
}
for (i = 0; i < n->max_queue_pairs; i++) {
@ -1270,18 +1233,6 @@ static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
}
}
static void virtio_net_detach_epbf_rss(VirtIONet *n);
static void virtio_net_disable_rss(VirtIONet *n)
{
if (n->rss_data.enabled) {
trace_virtio_net_rss_disable();
}
n->rss_data.enabled = false;
virtio_net_detach_epbf_rss(n);
}
static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
{
NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
@ -1329,24 +1280,56 @@ static void virtio_net_detach_epbf_rss(VirtIONet *n)
virtio_net_attach_ebpf_to_backend(n->nic, -1);
}
static bool virtio_net_load_ebpf_fds(VirtIONet *n, Error **errp)
static void virtio_net_commit_rss_config(VirtIONet *n)
{
if (n->rss_data.enabled) {
n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
if (n->rss_data.populate_hash) {
virtio_net_detach_epbf_rss(n);
} else if (!virtio_net_attach_epbf_rss(n)) {
if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
warn_report("Can't load eBPF RSS for vhost");
} else {
warn_report("Can't load eBPF RSS - fallback to software RSS");
n->rss_data.enabled_software_rss = true;
}
}
trace_virtio_net_rss_enable(n->rss_data.hash_types,
n->rss_data.indirections_len,
sizeof(n->rss_data.key));
} else {
virtio_net_detach_epbf_rss(n);
trace_virtio_net_rss_disable();
}
}
static void virtio_net_disable_rss(VirtIONet *n)
{
if (!n->rss_data.enabled) {
return;
}
n->rss_data.enabled = false;
virtio_net_commit_rss_config(n);
}
static bool virtio_net_load_ebpf_fds(VirtIONet *n)
{
int fds[EBPF_RSS_MAX_FDS] = { [0 ... EBPF_RSS_MAX_FDS - 1] = -1};
int ret = true;
int i = 0;
ERRP_GUARD();
if (n->nr_ebpf_rss_fds != EBPF_RSS_MAX_FDS) {
error_setg(errp,
"Expected %d file descriptors but got %d",
EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds);
warn_report("Expected %d file descriptors but got %d",
EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds);
return false;
}
for (i = 0; i < n->nr_ebpf_rss_fds; i++) {
fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i], errp);
if (*errp) {
fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i],
&error_warn);
if (fds[i] < 0) {
ret = false;
goto exit;
}
@ -1355,7 +1338,7 @@ static bool virtio_net_load_ebpf_fds(VirtIONet *n, Error **errp)
ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3]);
exit:
if (!ret || *errp) {
if (!ret) {
for (i = 0; i < n->nr_ebpf_rss_fds && fds[i] != -1; i++) {
close(fds[i]);
}
@ -1364,13 +1347,12 @@ exit:
return ret;
}
static bool virtio_net_load_ebpf(VirtIONet *n, Error **errp)
static bool virtio_net_load_ebpf(VirtIONet *n)
{
bool ret = false;
if (virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
if (!(n->ebpf_rss_fds
&& virtio_net_load_ebpf_fds(n, errp))) {
if (!(n->ebpf_rss_fds && virtio_net_load_ebpf_fds(n))) {
ret = ebpf_rss_load(&n->ebpf_rss);
}
}
@ -1496,28 +1478,7 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n,
goto error;
}
n->rss_data.enabled = true;
if (!n->rss_data.populate_hash) {
if (!virtio_net_attach_epbf_rss(n)) {
/* EBPF must be loaded for vhost */
if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
warn_report("Can't load eBPF RSS for vhost");
goto error;
}
/* fallback to software RSS */
warn_report("Can't load eBPF RSS - fallback to software RSS");
n->rss_data.enabled_software_rss = true;
}
} else {
/* use software RSS for hash populating */
/* and detach eBPF if was loaded before */
virtio_net_detach_epbf_rss(n);
n->rss_data.enabled_software_rss = true;
}
trace_virtio_net_rss_enable(n->rss_data.hash_types,
n->rss_data.indirections_len,
temp.b);
virtio_net_commit_rss_config(n);
return queue_pairs;
error:
trace_virtio_net_rss_error(err_msg, err_value);
@ -1869,16 +1830,9 @@ static uint8_t virtio_net_get_hash_type(bool hasip4,
return 0xff;
}
static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
uint32_t hash)
{
struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
hdr->hash_value = hash;
hdr->hash_report = report;
}
static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
size_t size)
size_t size,
struct virtio_net_hdr_v1_hash *hdr)
{
VirtIONet *n = qemu_get_nic_opaque(nc);
unsigned int index = nc->queue_index, new_index = index;
@ -1909,7 +1863,8 @@ static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
n->rss_data.hash_types);
if (net_hash_type > NetPktRssIpV6UdpEx) {
if (n->rss_data.populate_hash) {
virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
hdr->hash_value = VIRTIO_NET_HASH_REPORT_NONE;
hdr->hash_report = 0;
}
return n->rss_data.redirect ? n->rss_data.default_queue : -1;
}
@ -1917,7 +1872,8 @@ static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
if (n->rss_data.populate_hash) {
virtio_set_packet_hash(buf, reports[net_hash_type], hash);
hdr->hash_value = hash;
hdr->hash_report = reports[net_hash_type];
}
if (n->rss_data.redirect) {
@ -1937,7 +1893,7 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
size_t lens[VIRTQUEUE_MAX_SIZE];
struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
struct virtio_net_hdr_mrg_rxbuf mhdr;
struct virtio_net_hdr_v1_hash extra_hdr;
unsigned mhdr_cnt = 0;
size_t offset, i, guest_offset, j;
ssize_t err;
@ -1947,7 +1903,7 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
}
if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
int index = virtio_net_process_rss(nc, buf, size);
int index = virtio_net_process_rss(nc, buf, size, &extra_hdr);
if (index >= 0) {
NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
return virtio_net_receive_rcu(nc2, buf, size, true);
@ -2007,15 +1963,17 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
if (n->mergeable_rx_bufs) {
mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
sg, elem->in_num,
offsetof(typeof(mhdr), num_buffers),
sizeof(mhdr.num_buffers));
offsetof(typeof(extra_hdr), hdr.num_buffers),
sizeof(extra_hdr.hdr.num_buffers));
}
receive_header(n, sg, elem->in_num, buf, size);
if (n->rss_data.populate_hash) {
offset = sizeof(mhdr);
offset = offsetof(typeof(extra_hdr), hash_value);
iov_from_buf(sg, elem->in_num, offset,
buf + offset, n->host_hdr_len - sizeof(mhdr));
(char *)&extra_hdr + offset,
sizeof(extra_hdr.hash_value) +
sizeof(extra_hdr.hash_report));
}
offset = n->host_hdr_len;
total += n->guest_hdr_len;
@ -2045,10 +2003,11 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
}
if (mhdr_cnt) {
virtio_stw_p(vdev, &mhdr.num_buffers, i);
virtio_stw_p(vdev, &extra_hdr.hdr.num_buffers, i);
iov_from_buf(mhdr_sg, mhdr_cnt,
0,
&mhdr.num_buffers, sizeof mhdr.num_buffers);
&extra_hdr.hdr.num_buffers,
sizeof extra_hdr.hdr.num_buffers);
}
for (j = 0; j < i; j++) {
@ -2738,7 +2697,7 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
ssize_t ret;
unsigned int out_num;
struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
struct virtio_net_hdr_v1_hash vhdr;
struct virtio_net_hdr vhdr;
elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
if (!elem) {
@ -2749,32 +2708,25 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
out_sg = elem->out_sg;
if (out_num < 1) {
virtio_error(vdev, "virtio-net header not in first element");
virtqueue_detach_element(q->tx_vq, elem, 0);
g_free(elem);
return -EINVAL;
goto detach;
}
if (n->has_vnet_hdr) {
if (iov_to_buf(out_sg, out_num, 0, &vhdr, n->guest_hdr_len) <
n->guest_hdr_len) {
if (n->needs_vnet_hdr_swap) {
if (iov_to_buf(out_sg, out_num, 0, &vhdr, sizeof(vhdr)) <
sizeof(vhdr)) {
virtio_error(vdev, "virtio-net header incorrect");
virtqueue_detach_element(q->tx_vq, elem, 0);
g_free(elem);
return -EINVAL;
goto detach;
}
if (n->needs_vnet_hdr_swap) {
virtio_net_hdr_swap(vdev, (void *) &vhdr);
sg2[0].iov_base = &vhdr;
sg2[0].iov_len = n->guest_hdr_len;
out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
out_sg, out_num,
n->guest_hdr_len, -1);
if (out_num == VIRTQUEUE_MAX_SIZE) {
goto drop;
}
out_num += 1;
out_sg = sg2;
virtio_net_hdr_swap(vdev, &vhdr);
sg2[0].iov_base = &vhdr;
sg2[0].iov_len = sizeof(vhdr);
out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, out_sg, out_num,
sizeof(vhdr), -1);
if (out_num == VIRTQUEUE_MAX_SIZE) {
goto drop;
}
out_num += 1;
out_sg = sg2;
}
/*
* If host wants to see the guest header as is, we can
@ -2791,6 +2743,11 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
n->guest_hdr_len, -1);
out_num = sg_num;
out_sg = sg;
if (out_num < 1) {
virtio_error(vdev, "virtio-net nothing to send");
goto detach;
}
}
ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
@ -2811,6 +2768,11 @@ drop:
}
}
return num_packets;
detach:
virtqueue_detach_element(q->tx_vq, elem, 0);
g_free(elem);
return -EINVAL;
}
static void virtio_net_tx_timer(void *opaque);
@ -3120,26 +3082,7 @@ static int virtio_net_post_load_device(void *opaque, int version_id)
}
}
if (n->rss_data.enabled) {
n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
if (!n->rss_data.populate_hash) {
if (!virtio_net_attach_epbf_rss(n)) {
if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
warn_report("Can't post-load eBPF RSS for vhost");
} else {
warn_report("Can't post-load eBPF RSS - "
"fallback to software RSS");
n->rss_data.enabled_software_rss = true;
}
}
}
trace_virtio_net_rss_enable(n->rss_data.hash_types,
n->rss_data.indirections_len,
sizeof(n->rss_data.key));
} else {
trace_virtio_net_rss_disable();
}
virtio_net_commit_rss_config(n);
return 0;
}
@ -3746,9 +3689,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
n->net_conf.tx_queue_size);
for (i = 0; i < n->max_queue_pairs; i++) {
virtio_net_add_queue(n, i);
}
virtio_net_add_queue(n, 0);
n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
@ -3778,9 +3719,6 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
peer_test_vnet_hdr(n);
if (peer_has_vnet_hdr(n)) {
for (i = 0; i < n->max_queue_pairs; i++) {
qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
}
n->host_hdr_len = sizeof(struct virtio_net_hdr);
} else {
n->host_hdr_len = 0;
@ -3812,7 +3750,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
net_rx_pkt_init(&n->rx_pkt);
if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
virtio_net_load_ebpf(n, errp);
virtio_net_load_ebpf(n);
}
}
@ -3860,6 +3798,42 @@ static void virtio_net_device_unrealize(DeviceState *dev)
virtio_cleanup(vdev);
}
static void virtio_net_reset(VirtIODevice *vdev)
{
VirtIONet *n = VIRTIO_NET(vdev);
int i;
/* Reset back to compatibility mode */
n->promisc = 1;
n->allmulti = 0;
n->alluni = 0;
n->nomulti = 0;
n->nouni = 0;
n->nobcast = 0;
/* multiqueue is disabled by default */
n->curr_queue_pairs = 1;
timer_del(n->announce_timer.tm);
n->announce_timer.round = 0;
n->status &= ~VIRTIO_NET_S_ANNOUNCE;
/* Flush any MAC and VLAN filter table state */
n->mac_table.in_use = 0;
n->mac_table.first_multi = 0;
n->mac_table.multi_overflow = 0;
n->mac_table.uni_overflow = 0;
memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
memset(n->vlans, 0, MAX_VLAN >> 3);
/* Flush any async TX */
for (i = 0; i < n->max_queue_pairs; i++) {
flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
}
virtio_net_disable_rss(n);
}
static void virtio_net_instance_init(Object *obj)
{
VirtIONet *n = VIRTIO_NET(obj);

View File

@ -2091,8 +2091,6 @@ static void vmxnet3_net_init(VMXNET3State *s)
if (s->peer_has_vhdr) {
qemu_set_vnet_hdr_len(qemu_get_queue(s->nic)->peer,
sizeof(struct virtio_net_hdr));
qemu_using_vnet_hdr(qemu_get_queue(s->nic)->peer, 1);
}
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);

View File

@ -57,8 +57,6 @@ typedef bool (HasUfo)(NetClientState *);
typedef bool (HasUso)(NetClientState *);
typedef bool (HasVnetHdr)(NetClientState *);
typedef bool (HasVnetHdrLen)(NetClientState *, int);
typedef bool (GetUsingVnetHdr)(NetClientState *);
typedef void (UsingVnetHdr)(NetClientState *, bool);
typedef void (SetOffload)(NetClientState *, int, int, int, int, int, int, int);
typedef int (GetVnetHdrLen)(NetClientState *);
typedef void (SetVnetHdrLen)(NetClientState *, int);
@ -74,7 +72,6 @@ typedef struct NetClientInfo {
NetClientDriver type;
size_t size;
NetReceive *receive;
NetReceive *receive_raw;
NetReceiveIOV *receive_iov;
NetCanReceive *can_receive;
NetStart *start;
@ -88,10 +85,7 @@ typedef struct NetClientInfo {
HasUso *has_uso;
HasVnetHdr *has_vnet_hdr;
HasVnetHdrLen *has_vnet_hdr_len;
GetUsingVnetHdr *get_using_vnet_hdr;
UsingVnetHdr *using_vnet_hdr;
SetOffload *set_offload;
GetVnetHdrLen *get_vnet_hdr_len;
SetVnetHdrLen *set_vnet_hdr_len;
SetVnetLE *set_vnet_le;
SetVnetBE *set_vnet_be;
@ -194,8 +188,6 @@ bool qemu_has_ufo(NetClientState *nc);
bool qemu_has_uso(NetClientState *nc);
bool qemu_has_vnet_hdr(NetClientState *nc);
bool qemu_has_vnet_hdr_len(NetClientState *nc, int len);
bool qemu_get_using_vnet_hdr(NetClientState *nc);
void qemu_using_vnet_hdr(NetClientState *nc, bool enable);
void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
int ecn, int ufo, int uso4, int uso6);
int qemu_get_vnet_hdr_len(NetClientState *nc);

View File

@ -154,10 +154,8 @@ static ssize_t filter_dump_receive_iov(NetFilterState *nf, NetClientState *sndr,
int iovcnt, NetPacketSent *sent_cb)
{
NetFilterDumpState *nfds = FILTER_DUMP(nf);
int offset = qemu_get_using_vnet_hdr(nf->netdev) ?
qemu_get_vnet_hdr_len(nf->netdev) : 0;
dump_receive_iov(&nfds->ds, iov, iovcnt, offset);
dump_receive_iov(&nfds->ds, iov, iovcnt, qemu_get_vnet_hdr_len(nf->netdev));
return 0;
}

View File

@ -56,6 +56,7 @@
#include "net/filter.h"
#include "qapi/string-output-visitor.h"
#include "qapi/qobject-input-visitor.h"
#include "standard-headers/linux/virtio_net.h"
/* Net bridge is currently not supported for W32. */
#if !defined(_WIN32)
@ -529,24 +530,6 @@ bool qemu_has_vnet_hdr_len(NetClientState *nc, int len)
return nc->info->has_vnet_hdr_len(nc, len);
}
bool qemu_get_using_vnet_hdr(NetClientState *nc)
{
if (!nc || !nc->info->get_using_vnet_hdr) {
return false;
}
return nc->info->get_using_vnet_hdr(nc);
}
void qemu_using_vnet_hdr(NetClientState *nc, bool enable)
{
if (!nc || !nc->info->using_vnet_hdr) {
return;
}
nc->info->using_vnet_hdr(nc, enable);
}
void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
int ecn, int ufo, int uso4, int uso6)
{
@ -559,11 +542,7 @@ void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
int qemu_get_vnet_hdr_len(NetClientState *nc)
{
if (!nc || !nc->info->get_vnet_hdr_len) {
return 0;
}
return nc->info->get_vnet_hdr_len(nc);
return nc->vnet_hdr_len;
}
void qemu_set_vnet_hdr_len(NetClientState *nc, int len)
@ -572,6 +551,10 @@ void qemu_set_vnet_hdr_len(NetClientState *nc, int len)
return;
}
assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
len == sizeof(struct virtio_net_hdr) ||
len == sizeof(struct virtio_net_hdr_v1_hash));
nc->vnet_hdr_len = len;
nc->info->set_vnet_hdr_len(nc, len);
}
@ -804,11 +787,7 @@ static ssize_t nc_sendv_compat(NetClientState *nc, const struct iovec *iov,
offset = iov_to_buf(iov, iovcnt, 0, buf, offset);
}
if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) {
ret = nc->info->receive_raw(nc, buffer, offset);
} else {
ret = nc->info->receive(nc, buffer, offset);
}
ret = nc->info->receive(nc, buffer, offset);
g_free(buf);
return ret;
@ -823,6 +802,8 @@ static ssize_t qemu_deliver_packet_iov(NetClientState *sender,
MemReentrancyGuard *owned_reentrancy_guard;
NetClientState *nc = opaque;
int ret;
struct virtio_net_hdr_v1_hash vnet_hdr = { };
g_autofree struct iovec *iov_copy = NULL;
if (nc->link_down) {
@ -841,7 +822,15 @@ static ssize_t qemu_deliver_packet_iov(NetClientState *sender,
owned_reentrancy_guard->engaged_in_io = true;
}
if (nc->info->receive_iov && !(flags & QEMU_NET_PACKET_FLAG_RAW)) {
if ((flags & QEMU_NET_PACKET_FLAG_RAW) && nc->vnet_hdr_len) {
iov_copy = g_new(struct iovec, iovcnt + 1);
iov_copy[0].iov_base = &vnet_hdr;
iov_copy[0].iov_len = nc->vnet_hdr_len;
memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
iov = iov_copy;
}
if (nc->info->receive_iov) {
ret = nc->info->receive_iov(nc, iov, iovcnt);
} else {
ret = nc_sendv_compat(nc, iov, iovcnt, flags);

View File

@ -351,10 +351,6 @@ static bool netmap_has_vnet_hdr(NetClientState *nc)
return netmap_has_vnet_hdr_len(nc, sizeof(struct virtio_net_hdr));
}
static void netmap_using_vnet_hdr(NetClientState *nc, bool enable)
{
}
static void netmap_set_vnet_hdr_len(NetClientState *nc, int len)
{
NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
@ -393,7 +389,6 @@ static NetClientInfo net_netmap_info = {
.has_ufo = netmap_has_vnet_hdr,
.has_vnet_hdr = netmap_has_vnet_hdr,
.has_vnet_hdr_len = netmap_has_vnet_hdr_len,
.using_vnet_hdr = netmap_using_vnet_hdr,
.set_offload = netmap_set_offload,
.set_vnet_hdr_len = netmap_set_vnet_hdr_len,
};

View File

@ -217,11 +217,6 @@ int tap_probe_has_uso(int fd)
return 0;
}
int tap_probe_vnet_hdr_len(int fd, int len)
{
return 0;
}
void tap_fd_set_vnet_hdr_len(int fd, int len)
{
}

View File

@ -185,26 +185,6 @@ int tap_probe_has_uso(int fd)
return 1;
}
/* Verify that we can assign given length */
int tap_probe_vnet_hdr_len(int fd, int len)
{
int orig;
if (ioctl(fd, TUNGETVNETHDRSZ, &orig) == -1) {
return 0;
}
if (ioctl(fd, TUNSETVNETHDRSZ, &len) == -1) {
return 0;
}
/* Restore original length: we can't handle failure. */
if (ioctl(fd, TUNSETVNETHDRSZ, &orig) == -1) {
fprintf(stderr, "TUNGETVNETHDRSZ ioctl() failed: %s. Exiting.\n",
strerror(errno));
abort();
return -errno;
}
return 1;
}
void tap_fd_set_vnet_hdr_len(int fd, int len)
{
if (ioctl(fd, TUNSETVNETHDRSZ, &len) == -1) {

View File

@ -221,11 +221,6 @@ int tap_probe_has_uso(int fd)
return 0;
}
int tap_probe_vnet_hdr_len(int fd, int len)
{
return 0;
}
void tap_fd_set_vnet_hdr_len(int fd, int len)
{
}

View File

@ -52,11 +52,6 @@ int tap_probe_has_uso(int fd)
return 0;
}
int tap_probe_vnet_hdr_len(int fd, int len)
{
return 0;
}
void tap_fd_set_vnet_hdr_len(int fd, int len)
{
}

View File

@ -119,7 +119,7 @@ static ssize_t tap_receive_iov(NetClientState *nc, const struct iovec *iov,
TAPState *s = DO_UPCAST(TAPState, nc, nc);
const struct iovec *iovp = iov;
g_autofree struct iovec *iov_copy = NULL;
struct virtio_net_hdr_mrg_rxbuf hdr = { };
struct virtio_net_hdr hdr = { };
if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
iov_copy = g_new(struct iovec, iovcnt + 1);
@ -133,39 +133,14 @@ static ssize_t tap_receive_iov(NetClientState *nc, const struct iovec *iov,
return tap_write_packet(s, iovp, iovcnt);
}
static ssize_t tap_receive_raw(NetClientState *nc, const uint8_t *buf, size_t size)
{
TAPState *s = DO_UPCAST(TAPState, nc, nc);
struct iovec iov[2];
int iovcnt = 0;
struct virtio_net_hdr_mrg_rxbuf hdr = { };
if (s->host_vnet_hdr_len) {
iov[iovcnt].iov_base = &hdr;
iov[iovcnt].iov_len = s->host_vnet_hdr_len;
iovcnt++;
}
iov[iovcnt].iov_base = (char *)buf;
iov[iovcnt].iov_len = size;
iovcnt++;
return tap_write_packet(s, iov, iovcnt);
}
static ssize_t tap_receive(NetClientState *nc, const uint8_t *buf, size_t size)
{
TAPState *s = DO_UPCAST(TAPState, nc, nc);
struct iovec iov[1];
struct iovec iov = {
.iov_base = (void *)buf,
.iov_len = size
};
if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
return tap_receive_raw(nc, buf, size);
}
iov[0].iov_base = (char *)buf;
iov[0].iov_len = size;
return tap_write_packet(s, iov, 1);
return tap_receive_iov(nc, &iov, 1);
}
#ifndef __sun__
@ -259,18 +234,7 @@ static bool tap_has_vnet_hdr(NetClientState *nc)
static bool tap_has_vnet_hdr_len(NetClientState *nc, int len)
{
TAPState *s = DO_UPCAST(TAPState, nc, nc);
assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
return !!tap_probe_vnet_hdr_len(s->fd, len);
}
static int tap_get_vnet_hdr_len(NetClientState *nc)
{
TAPState *s = DO_UPCAST(TAPState, nc, nc);
return s->host_vnet_hdr_len;
return tap_has_vnet_hdr(nc);
}
static void tap_set_vnet_hdr_len(NetClientState *nc, int len)
@ -278,29 +242,10 @@ static void tap_set_vnet_hdr_len(NetClientState *nc, int len)
TAPState *s = DO_UPCAST(TAPState, nc, nc);
assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
len == sizeof(struct virtio_net_hdr) ||
len == sizeof(struct virtio_net_hdr_v1_hash));
tap_fd_set_vnet_hdr_len(s->fd, len);
s->host_vnet_hdr_len = len;
}
static bool tap_get_using_vnet_hdr(NetClientState *nc)
{
TAPState *s = DO_UPCAST(TAPState, nc, nc);
return s->using_vnet_hdr;
}
static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
{
TAPState *s = DO_UPCAST(TAPState, nc, nc);
assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
assert(!!s->host_vnet_hdr_len == using_vnet_hdr);
s->using_vnet_hdr = using_vnet_hdr;
s->using_vnet_hdr = true;
}
static int tap_set_vnet_le(NetClientState *nc, bool is_le)
@ -390,7 +335,6 @@ static NetClientInfo net_tap_info = {
.type = NET_CLIENT_DRIVER_TAP,
.size = sizeof(TAPState),
.receive = tap_receive,
.receive_raw = tap_receive_raw,
.receive_iov = tap_receive_iov,
.poll = tap_poll,
.cleanup = tap_cleanup,
@ -398,10 +342,7 @@ static NetClientInfo net_tap_info = {
.has_uso = tap_has_uso,
.has_vnet_hdr = tap_has_vnet_hdr,
.has_vnet_hdr_len = tap_has_vnet_hdr_len,
.get_using_vnet_hdr = tap_get_using_vnet_hdr,
.using_vnet_hdr = tap_using_vnet_hdr,
.set_offload = tap_set_offload,
.get_vnet_hdr_len = tap_get_vnet_hdr_len,
.set_vnet_hdr_len = tap_set_vnet_hdr_len,
.set_vnet_le = tap_set_vnet_le,
.set_vnet_be = tap_set_vnet_be,
@ -432,7 +373,7 @@ static TAPState *net_tap_fd_init(NetClientState *peer,
* Make sure host header length is set correctly in tap:
* it might have been modified by another instance of qemu.
*/
if (tap_probe_vnet_hdr_len(s->fd, s->host_vnet_hdr_len)) {
if (vnet_hdr) {
tap_fd_set_vnet_hdr_len(s->fd, s->host_vnet_hdr_len);
}
tap_read_poll(s, true);

View File

@ -35,7 +35,6 @@ ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen);
void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp);
int tap_probe_vnet_hdr(int fd, Error **errp);
int tap_probe_vnet_hdr_len(int fd, int len);
int tap_probe_has_ufo(int fd);
int tap_probe_has_uso(int fd);
void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo,

View File

@ -1,23 +1,24 @@
OBJS = rss.bpf.o
SKELETONS = rss.bpf.skeleton.h
LLVM_STRIP ?= llvm-strip
CLANG ?= clang
INC_FLAGS = `$(CLANG) -print-file-name=include`
EXTRA_CFLAGS ?= -O2 -g -target bpf
all: $(OBJS)
all: $(SKELETONS)
.PHONY: clean
clean:
rm -f $(OBJS)
rm -f rss.bpf.skeleton.h
rm -f $(SKELETONS) $(SKELETONS:%.skeleton.h=%.o)
$(OBJS): %.o:%.c
%.o: %.c
$(CLANG) $(INC_FLAGS) \
-D__KERNEL__ -D__ASM_SYSREG_H \
-I../include $(LINUXINCLUDE) \
$(EXTRA_CFLAGS) -c $< -o $@
$(LLVM_STRIP) -g $@
bpftool gen skeleton rss.bpf.o > rss.bpf.skeleton.h
cp rss.bpf.skeleton.h ../../ebpf/
%.skeleton.h: %.o
bpftool gen skeleton $< > $@
cp $@ ../../ebpf/

View File

@ -380,18 +380,19 @@ error:
return err;
}
static inline __u32 calculate_rss_hash(struct __sk_buff *skb,
struct rss_config_t *config, struct toeplitz_key_data_t *toe)
static inline bool calculate_rss_hash(struct __sk_buff *skb,
struct rss_config_t *config,
struct toeplitz_key_data_t *toe,
__u32 *result)
{
__u8 rss_input[HASH_CALCULATION_BUFFER_SIZE] = {};
size_t bytes_written = 0;
__u32 result = 0;
int err = 0;
struct packet_hash_info_t packet_info = {};
err = parse_packet(skb, &packet_info);
if (err) {
return 0;
return false;
}
if (packet_info.is_ipv4) {
@ -524,11 +525,13 @@ static inline __u32 calculate_rss_hash(struct __sk_buff *skb,
}
}
if (bytes_written) {
net_toeplitz_add(&result, rss_input, bytes_written, toe);
if (!bytes_written) {
return false;
}
return result;
net_toeplitz_add(result, rss_input, bytes_written, toe);
return true;
}
SEC("socket")
@ -544,28 +547,23 @@ int tun_rss_steering_prog(struct __sk_buff *skb)
config = bpf_map_lookup_elem(&tap_rss_map_configurations, &key);
toe = bpf_map_lookup_elem(&tap_rss_map_toeplitz_key, &key);
if (config && toe) {
if (!config->redirect) {
return config->default_queue;
}
hash = calculate_rss_hash(skb, config, toe);
if (hash) {
__u32 table_idx = hash % config->indirections_len;
__u16 *queue = 0;
queue = bpf_map_lookup_elem(&tap_rss_map_indirection_table,
&table_idx);
if (queue) {
return *queue;
}
}
return config->default_queue;
if (!config || !toe) {
return 0;
}
return -1;
if (config->redirect && calculate_rss_hash(skb, config, toe, &hash)) {
__u32 table_idx = hash % config->indirections_len;
__u16 *queue = 0;
queue = bpf_map_lookup_elem(&tap_rss_map_indirection_table,
&table_idx);
if (queue) {
return *queue;
}
}
return config->default_queue;
}
char _license[] SEC("license") = "GPL v2";