qemu/hw/net/e1000e.c

740 lines
22 KiB
C
Raw Normal View History

net: Introduce e1000e device emulation This patch introduces emulation for the Intel 82574 adapter, AKA e1000e. This implementation is derived from the e1000 emulation code, and utilizes the TX/RX packet abstractions that were initially developed for the vmxnet3 device. Although some parts of the introduced code may be shared with e1000, the differences are substantial enough so that the only shared resources for the two devices are the definitions in hw/net/e1000_regs.h. Similarly to vmxnet3, the new device uses virtio headers for task offloads (for backends that support virtio extensions). Usage of virtio headers may be forcibly disabled via a boolean device property "vnet" (which is enabled by default). In such case task offloads will be performed in software, in the same way it is done on backends that do not support virtio headers. The device code is split into two parts: 1. hw/net/e1000e.c: QEMU-specific code for a network device; 2. hw/net/e1000e_core.[hc]: Device emulation according to the spec. The new device name is e1000e. Intel specifications for the 82574 controller are available at: http://www.intel.com/content/dam/doc/datasheet/82574l-gbe-controller-datasheet.pdf Throughput measurement results (iperf2): Fedora 22 guest, TCP, RX 4 ++------------------------------------------+ | | | X X X X X 3.5 ++ X X X X | | X | | | 3 ++ | G | X | b | | / 2.5 ++ | s | | | | 2 ++ | | | | | 1.5 X+ | | | + + + + + + + + + + + + 1 ++--+---+---+---+---+---+---+---+---+---+---+ 32 64 128 256 512 1 2 4 8 16 32 64 B B B B B KB KB KB KB KB KB KB Buffer size Fedora 22 guest, TCP, TX 18 ++-------------------------------------------+ | X | 16 ++ X X X X X | X | 14 ++ | | | 12 ++ | G | X | b 10 ++ | / | | s 8 ++ | | | 6 ++ X | | | 4 ++ | | X | 2 ++ X | X + + + + + + + + + + + 0 ++--+---+---+---+---+----+---+---+---+---+---+ 32 64 128 256 512 1 2 4 8 16 32 64 B B B B B KB KB KB KB KB KB KB Buffer size Fedora 22 guest, UDP, RX 3 ++------------------------------------------+ | X | | 2.5 ++ | | | | | 2 ++ X | G | | b | | / 1.5 ++ | s | X | | | 1 ++ | | | | X | 0.5 ++ | | X | X + + + + + 0 ++-------+--------+-------+--------+--------+ 32 64 128 256 512 1 B B B B B KB Datagram size Fedora 22 guest, UDP, TX 1 ++------------------------------------------+ | X 0.9 ++ | | | 0.8 ++ | 0.7 ++ | | | G 0.6 ++ | b | | / 0.5 ++ | s | X | 0.4 ++ | | | 0.3 ++ | 0.2 ++ X | | | 0.1 ++ X | X X + + + + 0 ++-------+--------+-------+--------+--------+ 32 64 128 256 512 1 B B B B B KB Datagram size Windows 2012R2 guest, TCP, RX 3.2 ++------------------------------------------+ | X | 3 ++ | | | 2.8 ++ | | | 2.6 ++ X | G | X X X X X b 2.4 ++ X X | / | | s 2.2 ++ | | | 2 ++ | | X X | 1.8 ++ | | | 1.6 X+ | + + + + + + + + + + + + 1.4 ++--+---+---+---+---+---+---+---+---+---+---+ 32 64 128 256 512 1 2 4 8 16 32 64 B B B B B KB KB KB KB KB KB KB Buffer size Windows 2012R2 guest, TCP, TX 14 ++-------------------------------------------+ | | | X X 12 ++ | | | 10 ++ | | | G | | b 8 ++ | / | X | s 6 ++ | | | | | 4 ++ X | | | 2 ++ | | X X X | + X X + + X X + + + + + 0 X+--+---+---+---+---+----+---+---+---+---+---+ 32 64 128 256 512 1 2 4 8 16 32 64 B B B B B KB KB KB KB KB KB KB Buffer size Windows 2012R2 guest, UDP, RX 1.6 ++------------------------------------------X | | 1.4 ++ | | | 1.2 ++ | | X | | | G 1 ++ | b | | / 0.8 ++ | s | | 0.6 ++ X | | | 0.4 ++ | | X | | | 0.2 ++ X | X + + + + + 0 ++-------+--------+-------+--------+--------+ 32 64 128 256 512 1 B B B B B KB Datagram size Windows 2012R2 guest, UDP, TX 0.6 ++------------------------------------------+ | X | | 0.5 ++ | | | | | 0.4 ++ | G | | b | | / 0.3 ++ X | s | | | | 0.2 ++ | | | | X | 0.1 ++ | | X | X X + + + + 0 ++-------+--------+-------+--------+--------+ 32 64 128 256 512 1 B B B B B KB Datagram size Signed-off-by: Dmitry Fleytman <dmitry.fleytman@ravellosystems.com> Signed-off-by: Leonid Bloch <leonid.bloch@ravellosystems.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-06-01 11:23:45 +03:00
/*
* QEMU INTEL 82574 GbE NIC emulation
*
* Software developer's manuals:
* http://www.intel.com/content/dam/doc/datasheet/82574l-gbe-controller-datasheet.pdf
*
* Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com)
* Developed by Daynix Computing LTD (http://www.daynix.com)
*
* Authors:
* Dmitry Fleytman <dmitry@daynix.com>
* Leonid Bloch <leonid@daynix.com>
* Yan Vugenfirer <yan@daynix.com>
*
* Based on work done by:
* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
* Copyright (c) 2008 Qumranet
* Based on work done by:
* Copyright (c) 2007 Dan Aloni
* Copyright (c) 2004 Antony T Curtis
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "net/net.h"
#include "net/tap.h"
#include "qemu/range.h"
#include "sysemu/sysemu.h"
#include "hw/pci/msi.h"
#include "hw/pci/msix.h"
#include "hw/net/e1000_regs.h"
#include "e1000x_common.h"
#include "e1000e_core.h"
#include "trace.h"
#define TYPE_E1000E "e1000e"
#define E1000E(obj) OBJECT_CHECK(E1000EState, (obj), TYPE_E1000E)
typedef struct E1000EState {
PCIDevice parent_obj;
NICState *nic;
NICConf conf;
MemoryRegion mmio;
MemoryRegion flash;
MemoryRegion io;
MemoryRegion msix;
uint32_t ioaddr;
uint16_t subsys_ven;
uint16_t subsys;
uint16_t subsys_ven_used;
uint16_t subsys_used;
uint32_t intr_state;
bool disable_vnet;
E1000ECore core;
} E1000EState;
#define E1000E_MMIO_IDX 0
#define E1000E_FLASH_IDX 1
#define E1000E_IO_IDX 2
#define E1000E_MSIX_IDX 3
#define E1000E_MMIO_SIZE (128 * 1024)
#define E1000E_FLASH_SIZE (128 * 1024)
#define E1000E_IO_SIZE (32)
#define E1000E_MSIX_SIZE (16 * 1024)
#define E1000E_MSIX_TABLE (0x0000)
#define E1000E_MSIX_PBA (0x2000)
#define E1000E_USE_MSI BIT(0)
#define E1000E_USE_MSIX BIT(1)
static uint64_t
e1000e_mmio_read(void *opaque, hwaddr addr, unsigned size)
{
E1000EState *s = opaque;
return e1000e_core_read(&s->core, addr, size);
}
static void
e1000e_mmio_write(void *opaque, hwaddr addr,
uint64_t val, unsigned size)
{
E1000EState *s = opaque;
e1000e_core_write(&s->core, addr, val, size);
}
static bool
e1000e_io_get_reg_index(E1000EState *s, uint32_t *idx)
{
if (s->ioaddr < 0x1FFFF) {
*idx = s->ioaddr;
return true;
}
if (s->ioaddr < 0x7FFFF) {
trace_e1000e_wrn_io_addr_undefined(s->ioaddr);
return false;
}
if (s->ioaddr < 0xFFFFF) {
trace_e1000e_wrn_io_addr_flash(s->ioaddr);
return false;
}
trace_e1000e_wrn_io_addr_unknown(s->ioaddr);
return false;
}
static uint64_t
e1000e_io_read(void *opaque, hwaddr addr, unsigned size)
{
E1000EState *s = opaque;
uint32_t idx = 0;
net: Introduce e1000e device emulation This patch introduces emulation for the Intel 82574 adapter, AKA e1000e. This implementation is derived from the e1000 emulation code, and utilizes the TX/RX packet abstractions that were initially developed for the vmxnet3 device. Although some parts of the introduced code may be shared with e1000, the differences are substantial enough so that the only shared resources for the two devices are the definitions in hw/net/e1000_regs.h. Similarly to vmxnet3, the new device uses virtio headers for task offloads (for backends that support virtio extensions). Usage of virtio headers may be forcibly disabled via a boolean device property "vnet" (which is enabled by default). In such case task offloads will be performed in software, in the same way it is done on backends that do not support virtio headers. The device code is split into two parts: 1. hw/net/e1000e.c: QEMU-specific code for a network device; 2. hw/net/e1000e_core.[hc]: Device emulation according to the spec. The new device name is e1000e. Intel specifications for the 82574 controller are available at: http://www.intel.com/content/dam/doc/datasheet/82574l-gbe-controller-datasheet.pdf Throughput measurement results (iperf2): Fedora 22 guest, TCP, RX 4 ++------------------------------------------+ | | | X X X X X 3.5 ++ X X X X | | X | | | 3 ++ | G | X | b | | / 2.5 ++ | s | | | | 2 ++ | | | | | 1.5 X+ | | | + + + + + + + + + + + + 1 ++--+---+---+---+---+---+---+---+---+---+---+ 32 64 128 256 512 1 2 4 8 16 32 64 B B B B B KB KB KB KB KB KB KB Buffer size Fedora 22 guest, TCP, TX 18 ++-------------------------------------------+ | X | 16 ++ X X X X X | X | 14 ++ | | | 12 ++ | G | X | b 10 ++ | / | | s 8 ++ | | | 6 ++ X | | | 4 ++ | | X | 2 ++ X | X + + + + + + + + + + + 0 ++--+---+---+---+---+----+---+---+---+---+---+ 32 64 128 256 512 1 2 4 8 16 32 64 B B B B B KB KB KB KB KB KB KB Buffer size Fedora 22 guest, UDP, RX 3 ++------------------------------------------+ | X | | 2.5 ++ | | | | | 2 ++ X | G | | b | | / 1.5 ++ | s | X | | | 1 ++ | | | | X | 0.5 ++ | | X | X + + + + + 0 ++-------+--------+-------+--------+--------+ 32 64 128 256 512 1 B B B B B KB Datagram size Fedora 22 guest, UDP, TX 1 ++------------------------------------------+ | X 0.9 ++ | | | 0.8 ++ | 0.7 ++ | | | G 0.6 ++ | b | | / 0.5 ++ | s | X | 0.4 ++ | | | 0.3 ++ | 0.2 ++ X | | | 0.1 ++ X | X X + + + + 0 ++-------+--------+-------+--------+--------+ 32 64 128 256 512 1 B B B B B KB Datagram size Windows 2012R2 guest, TCP, RX 3.2 ++------------------------------------------+ | X | 3 ++ | | | 2.8 ++ | | | 2.6 ++ X | G | X X X X X b 2.4 ++ X X | / | | s 2.2 ++ | | | 2 ++ | | X X | 1.8 ++ | | | 1.6 X+ | + + + + + + + + + + + + 1.4 ++--+---+---+---+---+---+---+---+---+---+---+ 32 64 128 256 512 1 2 4 8 16 32 64 B B B B B KB KB KB KB KB KB KB Buffer size Windows 2012R2 guest, TCP, TX 14 ++-------------------------------------------+ | | | X X 12 ++ | | | 10 ++ | | | G | | b 8 ++ | / | X | s 6 ++ | | | | | 4 ++ X | | | 2 ++ | | X X X | + X X + + X X + + + + + 0 X+--+---+---+---+---+----+---+---+---+---+---+ 32 64 128 256 512 1 2 4 8 16 32 64 B B B B B KB KB KB KB KB KB KB Buffer size Windows 2012R2 guest, UDP, RX 1.6 ++------------------------------------------X | | 1.4 ++ | | | 1.2 ++ | | X | | | G 1 ++ | b | | / 0.8 ++ | s | | 0.6 ++ X | | | 0.4 ++ | | X | | | 0.2 ++ X | X + + + + + 0 ++-------+--------+-------+--------+--------+ 32 64 128 256 512 1 B B B B B KB Datagram size Windows 2012R2 guest, UDP, TX 0.6 ++------------------------------------------+ | X | | 0.5 ++ | | | | | 0.4 ++ | G | | b | | / 0.3 ++ X | s | | | | 0.2 ++ | | | | X | 0.1 ++ | | X | X X + + + + 0 ++-------+--------+-------+--------+--------+ 32 64 128 256 512 1 B B B B B KB Datagram size Signed-off-by: Dmitry Fleytman <dmitry.fleytman@ravellosystems.com> Signed-off-by: Leonid Bloch <leonid.bloch@ravellosystems.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-06-01 11:23:45 +03:00
uint64_t val;
switch (addr) {
case E1000_IOADDR:
trace_e1000e_io_read_addr(s->ioaddr);
return s->ioaddr;
case E1000_IODATA:
if (e1000e_io_get_reg_index(s, &idx)) {
val = e1000e_core_read(&s->core, idx, sizeof(val));
trace_e1000e_io_read_data(idx, val);
return val;
}
return 0;
default:
trace_e1000e_wrn_io_read_unknown(addr);
return 0;
}
}
static void
e1000e_io_write(void *opaque, hwaddr addr,
uint64_t val, unsigned size)
{
E1000EState *s = opaque;
uint32_t idx = 0;
net: Introduce e1000e device emulation This patch introduces emulation for the Intel 82574 adapter, AKA e1000e. This implementation is derived from the e1000 emulation code, and utilizes the TX/RX packet abstractions that were initially developed for the vmxnet3 device. Although some parts of the introduced code may be shared with e1000, the differences are substantial enough so that the only shared resources for the two devices are the definitions in hw/net/e1000_regs.h. Similarly to vmxnet3, the new device uses virtio headers for task offloads (for backends that support virtio extensions). Usage of virtio headers may be forcibly disabled via a boolean device property "vnet" (which is enabled by default). In such case task offloads will be performed in software, in the same way it is done on backends that do not support virtio headers. The device code is split into two parts: 1. hw/net/e1000e.c: QEMU-specific code for a network device; 2. hw/net/e1000e_core.[hc]: Device emulation according to the spec. The new device name is e1000e. Intel specifications for the 82574 controller are available at: http://www.intel.com/content/dam/doc/datasheet/82574l-gbe-controller-datasheet.pdf Throughput measurement results (iperf2): Fedora 22 guest, TCP, RX 4 ++------------------------------------------+ | | | X X X X X 3.5 ++ X X X X | | X | | | 3 ++ | G | X | b | | / 2.5 ++ | s | | | | 2 ++ | | | | | 1.5 X+ | | | + + + + + + + + + + + + 1 ++--+---+---+---+---+---+---+---+---+---+---+ 32 64 128 256 512 1 2 4 8 16 32 64 B B B B B KB KB KB KB KB KB KB Buffer size Fedora 22 guest, TCP, TX 18 ++-------------------------------------------+ | X | 16 ++ X X X X X | X | 14 ++ | | | 12 ++ | G | X | b 10 ++ | / | | s 8 ++ | | | 6 ++ X | | | 4 ++ | | X | 2 ++ X | X + + + + + + + + + + + 0 ++--+---+---+---+---+----+---+---+---+---+---+ 32 64 128 256 512 1 2 4 8 16 32 64 B B B B B KB KB KB KB KB KB KB Buffer size Fedora 22 guest, UDP, RX 3 ++------------------------------------------+ | X | | 2.5 ++ | | | | | 2 ++ X | G | | b | | / 1.5 ++ | s | X | | | 1 ++ | | | | X | 0.5 ++ | | X | X + + + + + 0 ++-------+--------+-------+--------+--------+ 32 64 128 256 512 1 B B B B B KB Datagram size Fedora 22 guest, UDP, TX 1 ++------------------------------------------+ | X 0.9 ++ | | | 0.8 ++ | 0.7 ++ | | | G 0.6 ++ | b | | / 0.5 ++ | s | X | 0.4 ++ | | | 0.3 ++ | 0.2 ++ X | | | 0.1 ++ X | X X + + + + 0 ++-------+--------+-------+--------+--------+ 32 64 128 256 512 1 B B B B B KB Datagram size Windows 2012R2 guest, TCP, RX 3.2 ++------------------------------------------+ | X | 3 ++ | | | 2.8 ++ | | | 2.6 ++ X | G | X X X X X b 2.4 ++ X X | / | | s 2.2 ++ | | | 2 ++ | | X X | 1.8 ++ | | | 1.6 X+ | + + + + + + + + + + + + 1.4 ++--+---+---+---+---+---+---+---+---+---+---+ 32 64 128 256 512 1 2 4 8 16 32 64 B B B B B KB KB KB KB KB KB KB Buffer size Windows 2012R2 guest, TCP, TX 14 ++-------------------------------------------+ | | | X X 12 ++ | | | 10 ++ | | | G | | b 8 ++ | / | X | s 6 ++ | | | | | 4 ++ X | | | 2 ++ | | X X X | + X X + + X X + + + + + 0 X+--+---+---+---+---+----+---+---+---+---+---+ 32 64 128 256 512 1 2 4 8 16 32 64 B B B B B KB KB KB KB KB KB KB Buffer size Windows 2012R2 guest, UDP, RX 1.6 ++------------------------------------------X | | 1.4 ++ | | | 1.2 ++ | | X | | | G 1 ++ | b | | / 0.8 ++ | s | | 0.6 ++ X | | | 0.4 ++ | | X | | | 0.2 ++ X | X + + + + + 0 ++-------+--------+-------+--------+--------+ 32 64 128 256 512 1 B B B B B KB Datagram size Windows 2012R2 guest, UDP, TX 0.6 ++------------------------------------------+ | X | | 0.5 ++ | | | | | 0.4 ++ | G | | b | | / 0.3 ++ X | s | | | | 0.2 ++ | | | | X | 0.1 ++ | | X | X X + + + + 0 ++-------+--------+-------+--------+--------+ 32 64 128 256 512 1 B B B B B KB Datagram size Signed-off-by: Dmitry Fleytman <dmitry.fleytman@ravellosystems.com> Signed-off-by: Leonid Bloch <leonid.bloch@ravellosystems.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Jason Wang <jasowang@redhat.com>
2016-06-01 11:23:45 +03:00
switch (addr) {
case E1000_IOADDR:
trace_e1000e_io_write_addr(val);
s->ioaddr = (uint32_t) val;
return;
case E1000_IODATA:
if (e1000e_io_get_reg_index(s, &idx)) {
trace_e1000e_io_write_data(idx, val);
e1000e_core_write(&s->core, idx, val, sizeof(val));
}
return;
default:
trace_e1000e_wrn_io_write_unknown(addr);
return;
}
}
static const MemoryRegionOps mmio_ops = {
.read = e1000e_mmio_read,
.write = e1000e_mmio_write,
.endianness = DEVICE_LITTLE_ENDIAN,
.impl = {
.min_access_size = 4,
.max_access_size = 4,
},
};
static const MemoryRegionOps io_ops = {
.read = e1000e_io_read,
.write = e1000e_io_write,
.endianness = DEVICE_LITTLE_ENDIAN,
.impl = {
.min_access_size = 4,
.max_access_size = 4,
},
};
static int
e1000e_nc_can_receive(NetClientState *nc)
{
E1000EState *s = qemu_get_nic_opaque(nc);
return e1000e_can_receive(&s->core);
}
static ssize_t
e1000e_nc_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
{
E1000EState *s = qemu_get_nic_opaque(nc);
return e1000e_receive_iov(&s->core, iov, iovcnt);
}
static ssize_t
e1000e_nc_receive(NetClientState *nc, const uint8_t *buf, size_t size)
{
E1000EState *s = qemu_get_nic_opaque(nc);
return e1000e_receive(&s->core, buf, size);
}
static void
e1000e_set_link_status(NetClientState *nc)
{
E1000EState *s = qemu_get_nic_opaque(nc);
e1000e_core_set_link_status(&s->core);
}
static NetClientInfo net_e1000e_info = {
.type = NET_CLIENT_OPTIONS_KIND_NIC,
.size = sizeof(NICState),
.can_receive = e1000e_nc_can_receive,
.receive = e1000e_nc_receive,
.receive_iov = e1000e_nc_receive_iov,
.link_status_changed = e1000e_set_link_status,
};
/*
* EEPROM (NVM) contents documented in Table 36, section 6.1
* and generally 6.1.2 Software accessed words.
*/
static const uint16_t e1000e_eeprom_template[64] = {
/* Address | Compat. | ImVer | Compat. */
0x0000, 0x0000, 0x0000, 0x0420, 0xf746, 0x2010, 0xffff, 0xffff,
/* PBA |ICtrl1 | SSID | SVID | DevID |-------|ICtrl2 */
0x0000, 0x0000, 0x026b, 0x0000, 0x8086, 0x0000, 0x0000, 0x8058,
/* NVM words 1,2,3 |-------------------------------|PCI-EID*/
0x0000, 0x2001, 0x7e7c, 0xffff, 0x1000, 0x00c8, 0x0000, 0x2704,
/* PCIe Init. Conf 1,2,3 |PCICtrl|PHY|LD1|-------| RevID | LD0,2 */
0x6cc9, 0x3150, 0x070e, 0x460b, 0x2d84, 0x0100, 0xf000, 0x0706,
/* FLPAR |FLANADD|LAN-PWR|FlVndr |ICtrl3 |APTSMBA|APTRxEP|APTSMBC*/
0x6000, 0x0080, 0x0f04, 0x7fff, 0x4f01, 0xc600, 0x0000, 0x20ff,
/* APTIF | APTMC |APTuCP |LSWFWID|MSWFWID|NC-SIMC|NC-SIC | VPDP */
0x0028, 0x0003, 0x0000, 0x0000, 0x0000, 0x0003, 0x0000, 0xffff,
/* SW Section */
0x0100, 0xc000, 0x121c, 0xc007, 0xffff, 0xffff, 0xffff, 0xffff,
/* SW Section |CHKSUM */
0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0120, 0xffff, 0x0000,
};
static void e1000e_core_realize(E1000EState *s)
{
s->core.owner = &s->parent_obj;
s->core.owner_nic = s->nic;
}
static void
e1000e_init_msi(E1000EState *s)
{
int res;
res = msi_init(PCI_DEVICE(s),
0xD0, /* MSI capability offset */
1, /* MAC MSI interrupts */
true, /* 64-bit message addresses supported */
false); /* Per vector mask supported */
if (res > 0) {
s->intr_state |= E1000E_USE_MSI;
} else {
trace_e1000e_msi_init_fail(res);
}
}
static void
e1000e_cleanup_msi(E1000EState *s)
{
if (s->intr_state & E1000E_USE_MSI) {
msi_uninit(PCI_DEVICE(s));
}
}
static void
e1000e_unuse_msix_vectors(E1000EState *s, int num_vectors)
{
int i;
for (i = 0; i < num_vectors; i++) {
msix_vector_unuse(PCI_DEVICE(s), i);
}
}
static bool
e1000e_use_msix_vectors(E1000EState *s, int num_vectors)
{
int i;
for (i = 0; i < num_vectors; i++) {
int res = msix_vector_use(PCI_DEVICE(s), i);
if (res < 0) {
trace_e1000e_msix_use_vector_fail(i, res);
e1000e_unuse_msix_vectors(s, i);
return false;
}
}
return true;
}
static void
e1000e_init_msix(E1000EState *s)
{
PCIDevice *d = PCI_DEVICE(s);
int res = msix_init(PCI_DEVICE(s), E1000E_MSIX_VEC_NUM,
&s->msix,
E1000E_MSIX_IDX, E1000E_MSIX_TABLE,
&s->msix,
E1000E_MSIX_IDX, E1000E_MSIX_PBA,
0xA0);
if (res < 0) {
trace_e1000e_msix_init_fail(res);
} else {
if (!e1000e_use_msix_vectors(s, E1000E_MSIX_VEC_NUM)) {
msix_uninit(d, &s->msix, &s->msix);
} else {
s->intr_state |= E1000E_USE_MSIX;
}
}
}
static void
e1000e_cleanup_msix(E1000EState *s)
{
if (s->intr_state & E1000E_USE_MSIX) {
e1000e_unuse_msix_vectors(s, E1000E_MSIX_VEC_NUM);
msix_uninit(PCI_DEVICE(s), &s->msix, &s->msix);
}
}
static void
e1000e_init_net_peer(E1000EState *s, PCIDevice *pci_dev, uint8_t *macaddr)
{
DeviceState *dev = DEVICE(pci_dev);
NetClientState *nc;
int i;
s->nic = qemu_new_nic(&net_e1000e_info, &s->conf,
object_get_typename(OBJECT(s)), dev->id, s);
s->core.max_queue_num = s->conf.peers.queues - 1;
trace_e1000e_mac_set_permanent(MAC_ARG(macaddr));
memcpy(s->core.permanent_mac, macaddr, sizeof(s->core.permanent_mac));
qemu_format_nic_info_str(qemu_get_queue(s->nic), macaddr);
/* Setup virtio headers */
if (s->disable_vnet) {
s->core.has_vnet = false;
trace_e1000e_cfg_support_virtio(false);
return;
} else {
s->core.has_vnet = true;
}
for (i = 0; i < s->conf.peers.queues; i++) {
nc = qemu_get_subqueue(s->nic, i);
if (!nc->peer || !qemu_has_vnet_hdr(nc->peer)) {
s->core.has_vnet = false;
trace_e1000e_cfg_support_virtio(false);
return;
}
}
trace_e1000e_cfg_support_virtio(true);
for (i = 0; i < s->conf.peers.queues; i++) {
nc = qemu_get_subqueue(s->nic, i);
qemu_set_vnet_hdr_len(nc->peer, sizeof(struct virtio_net_hdr));
qemu_using_vnet_hdr(nc->peer, true);
}
}
static inline uint64_t
e1000e_gen_dsn(uint8_t *mac)
{
return (uint64_t)(mac[5]) |
(uint64_t)(mac[4]) << 8 |
(uint64_t)(mac[3]) << 16 |
(uint64_t)(0x00FF) << 24 |
(uint64_t)(0x00FF) << 32 |
(uint64_t)(mac[2]) << 40 |
(uint64_t)(mac[1]) << 48 |
(uint64_t)(mac[0]) << 56;
}
static int
e1000e_add_pm_capability(PCIDevice *pdev, uint8_t offset, uint16_t pmc)
{
int ret = pci_add_capability(pdev, PCI_CAP_ID_PM, offset, PCI_PM_SIZEOF);
if (ret >= 0) {
pci_set_word(pdev->config + offset + PCI_PM_PMC,
PCI_PM_CAP_VER_1_1 |
pmc);
pci_set_word(pdev->wmask + offset + PCI_PM_CTRL,
PCI_PM_CTRL_STATE_MASK |
PCI_PM_CTRL_PME_ENABLE |
PCI_PM_CTRL_DATA_SEL_MASK);
pci_set_word(pdev->w1cmask + offset + PCI_PM_CTRL,
PCI_PM_CTRL_PME_STATUS);
}
return ret;
}
static void e1000e_write_config(PCIDevice *pci_dev, uint32_t address,
uint32_t val, int len)
{
E1000EState *s = E1000E(pci_dev);
pci_default_write_config(pci_dev, address, val, len);
if (range_covers_byte(address, len, PCI_COMMAND) &&
(pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
qemu_flush_queued_packets(qemu_get_queue(s->nic));
}
}
static void e1000e_pci_realize(PCIDevice *pci_dev, Error **errp)
{
static const uint16_t e1000e_pmrb_offset = 0x0C8;
static const uint16_t e1000e_pcie_offset = 0x0E0;
static const uint16_t e1000e_aer_offset = 0x100;
static const uint16_t e1000e_dsn_offset = 0x140;
E1000EState *s = E1000E(pci_dev);
uint8_t *macaddr;
trace_e1000e_cb_pci_realize();
pci_dev->config_write = e1000e_write_config;
pci_dev->config[PCI_CACHE_LINE_SIZE] = 0x10;
pci_dev->config[PCI_INTERRUPT_PIN] = 1;
pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID, s->subsys_ven);
pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID, s->subsys);
s->subsys_ven_used = s->subsys_ven;
s->subsys_used = s->subsys;
/* Define IO/MMIO regions */
memory_region_init_io(&s->mmio, OBJECT(s), &mmio_ops, s,
"e1000e-mmio", E1000E_MMIO_SIZE);
pci_register_bar(pci_dev, E1000E_MMIO_IDX,
PCI_BASE_ADDRESS_SPACE_MEMORY, &s->mmio);
/*
* We provide a dummy implementation for the flash BAR
* for drivers that may theoretically probe for its presence.
*/
memory_region_init(&s->flash, OBJECT(s),
"e1000e-flash", E1000E_FLASH_SIZE);
pci_register_bar(pci_dev, E1000E_FLASH_IDX,
PCI_BASE_ADDRESS_SPACE_MEMORY, &s->flash);
memory_region_init_io(&s->io, OBJECT(s), &io_ops, s,
"e1000e-io", E1000E_IO_SIZE);
pci_register_bar(pci_dev, E1000E_IO_IDX,
PCI_BASE_ADDRESS_SPACE_IO, &s->io);
memory_region_init(&s->msix, OBJECT(s), "e1000e-msix",
E1000E_MSIX_SIZE);
pci_register_bar(pci_dev, E1000E_MSIX_IDX,
PCI_BASE_ADDRESS_SPACE_MEMORY, &s->msix);
/* Create networking backend */
qemu_macaddr_default_if_unset(&s->conf.macaddr);
macaddr = s->conf.macaddr.a;
e1000e_init_msix(s);
if (pcie_endpoint_cap_v1_init(pci_dev, e1000e_pcie_offset) < 0) {
hw_error("Failed to initialize PCIe capability");
}
e1000e_init_msi(s);
if (e1000e_add_pm_capability(pci_dev, e1000e_pmrb_offset,
PCI_PM_CAP_DSI) < 0) {
hw_error("Failed to initialize PM capability");
}
if (pcie_aer_init(pci_dev, e1000e_aer_offset, PCI_ERR_SIZEOF) < 0) {
hw_error("Failed to initialize AER capability");
}
pcie_dev_ser_num_init(pci_dev, e1000e_dsn_offset,
e1000e_gen_dsn(macaddr));
e1000e_init_net_peer(s, pci_dev, macaddr);
/* Initialize core */
e1000e_core_realize(s);
e1000e_core_pci_realize(&s->core,
e1000e_eeprom_template,
sizeof(e1000e_eeprom_template),
macaddr);
}
static void e1000e_pci_uninit(PCIDevice *pci_dev)
{
E1000EState *s = E1000E(pci_dev);
trace_e1000e_cb_pci_uninit();
e1000e_core_pci_uninit(&s->core);
pcie_aer_exit(pci_dev);
pcie_cap_exit(pci_dev);
qemu_del_nic(s->nic);
e1000e_cleanup_msix(s);
e1000e_cleanup_msi(s);
}
static void e1000e_qdev_reset(DeviceState *dev)
{
E1000EState *s = E1000E(dev);
trace_e1000e_cb_qdev_reset();
e1000e_core_reset(&s->core);
}
static void e1000e_pre_save(void *opaque)
{
E1000EState *s = opaque;
trace_e1000e_cb_pre_save();
e1000e_core_pre_save(&s->core);
}
static int e1000e_post_load(void *opaque, int version_id)
{
E1000EState *s = opaque;
trace_e1000e_cb_post_load();
if ((s->subsys != s->subsys_used) ||
(s->subsys_ven != s->subsys_ven_used)) {
fprintf(stderr,
"ERROR: Cannot migrate while device properties "
"(subsys/subsys_ven) differ");
return -1;
}
return e1000e_core_post_load(&s->core);
}
static const VMStateDescription e1000e_vmstate_tx = {
.name = "e1000e-tx",
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_UINT8(props.sum_needed, struct e1000e_tx),
VMSTATE_UINT8(props.ipcss, struct e1000e_tx),
VMSTATE_UINT8(props.ipcso, struct e1000e_tx),
VMSTATE_UINT16(props.ipcse, struct e1000e_tx),
VMSTATE_UINT8(props.tucss, struct e1000e_tx),
VMSTATE_UINT8(props.tucso, struct e1000e_tx),
VMSTATE_UINT16(props.tucse, struct e1000e_tx),
VMSTATE_UINT8(props.hdr_len, struct e1000e_tx),
VMSTATE_UINT16(props.mss, struct e1000e_tx),
VMSTATE_UINT32(props.paylen, struct e1000e_tx),
VMSTATE_INT8(props.ip, struct e1000e_tx),
VMSTATE_INT8(props.tcp, struct e1000e_tx),
VMSTATE_BOOL(props.tse, struct e1000e_tx),
VMSTATE_BOOL(props.cptse, struct e1000e_tx),
VMSTATE_BOOL(skip_cp, struct e1000e_tx),
VMSTATE_END_OF_LIST()
}
};
static const VMStateDescription e1000e_vmstate_intr_timer = {
.name = "e1000e-intr-timer",
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_TIMER_PTR(timer, E1000IntrDelayTimer),
VMSTATE_BOOL(running, E1000IntrDelayTimer),
VMSTATE_END_OF_LIST()
}
};
#define VMSTATE_E1000E_INTR_DELAY_TIMER(_f, _s) \
VMSTATE_STRUCT(_f, _s, 0, \
e1000e_vmstate_intr_timer, E1000IntrDelayTimer)
#define VMSTATE_E1000E_INTR_DELAY_TIMER_ARRAY(_f, _s, _num) \
VMSTATE_STRUCT_ARRAY(_f, _s, _num, 0, \
e1000e_vmstate_intr_timer, E1000IntrDelayTimer)
static const VMStateDescription e1000e_vmstate = {
.name = "e1000e",
.version_id = 1,
.minimum_version_id = 1,
.pre_save = e1000e_pre_save,
.post_load = e1000e_post_load,
.fields = (VMStateField[]) {
VMSTATE_PCIE_DEVICE(parent_obj, E1000EState),
VMSTATE_MSIX(parent_obj, E1000EState),
VMSTATE_UINT32(ioaddr, E1000EState),
VMSTATE_UINT32(intr_state, E1000EState),
VMSTATE_UINT32(core.rxbuf_min_shift, E1000EState),
VMSTATE_UINT8(core.rx_desc_len, E1000EState),
VMSTATE_UINT32_ARRAY(core.rxbuf_sizes, E1000EState,
E1000_PSRCTL_BUFFS_PER_DESC),
VMSTATE_UINT32(core.rx_desc_buf_size, E1000EState),
VMSTATE_UINT16_ARRAY(core.eeprom, E1000EState, E1000E_EEPROM_SIZE),
VMSTATE_UINT16_2DARRAY(core.phy, E1000EState,
E1000E_PHY_PAGES, E1000E_PHY_PAGE_SIZE),
VMSTATE_UINT32_ARRAY(core.mac, E1000EState, E1000E_MAC_SIZE),
VMSTATE_UINT8_ARRAY(core.permanent_mac, E1000EState, ETH_ALEN),
VMSTATE_UINT32(core.delayed_causes, E1000EState),
VMSTATE_UINT16(subsys, E1000EState),
VMSTATE_UINT16(subsys_ven, E1000EState),
VMSTATE_E1000E_INTR_DELAY_TIMER(core.rdtr, E1000EState),
VMSTATE_E1000E_INTR_DELAY_TIMER(core.radv, E1000EState),
VMSTATE_E1000E_INTR_DELAY_TIMER(core.raid, E1000EState),
VMSTATE_E1000E_INTR_DELAY_TIMER(core.tadv, E1000EState),
VMSTATE_E1000E_INTR_DELAY_TIMER(core.tidv, E1000EState),
VMSTATE_E1000E_INTR_DELAY_TIMER(core.itr, E1000EState),
VMSTATE_BOOL(core.itr_intr_pending, E1000EState),
VMSTATE_E1000E_INTR_DELAY_TIMER_ARRAY(core.eitr, E1000EState,
E1000E_MSIX_VEC_NUM),
VMSTATE_BOOL_ARRAY(core.eitr_intr_pending, E1000EState,
E1000E_MSIX_VEC_NUM),
VMSTATE_UINT32(core.itr_guest_value, E1000EState),
VMSTATE_UINT32_ARRAY(core.eitr_guest_value, E1000EState,
E1000E_MSIX_VEC_NUM),
VMSTATE_UINT16(core.vet, E1000EState),
VMSTATE_STRUCT_ARRAY(core.tx, E1000EState, E1000E_NUM_QUEUES, 0,
e1000e_vmstate_tx, struct e1000e_tx),
VMSTATE_END_OF_LIST()
}
};
static PropertyInfo e1000e_prop_disable_vnet,
e1000e_prop_subsys_ven,
e1000e_prop_subsys;
static Property e1000e_properties[] = {
DEFINE_NIC_PROPERTIES(E1000EState, conf),
DEFINE_PROP_DEFAULT("disable_vnet_hdr", E1000EState, disable_vnet, false,
e1000e_prop_disable_vnet, bool),
DEFINE_PROP_DEFAULT("subsys_ven", E1000EState, subsys_ven,
PCI_VENDOR_ID_INTEL,
e1000e_prop_subsys_ven, uint16_t),
DEFINE_PROP_DEFAULT("subsys", E1000EState, subsys, 0,
e1000e_prop_subsys, uint16_t),
DEFINE_PROP_END_OF_LIST(),
};
static void e1000e_class_init(ObjectClass *class, void *data)
{
DeviceClass *dc = DEVICE_CLASS(class);
PCIDeviceClass *c = PCI_DEVICE_CLASS(class);
c->realize = e1000e_pci_realize;
c->exit = e1000e_pci_uninit;
c->vendor_id = PCI_VENDOR_ID_INTEL;
c->device_id = E1000_DEV_ID_82574L;
c->revision = 0;
c->class_id = PCI_CLASS_NETWORK_ETHERNET;
c->is_express = 1;
dc->desc = "Intel 82574L GbE Controller";
dc->reset = e1000e_qdev_reset;
dc->vmsd = &e1000e_vmstate;
dc->props = e1000e_properties;
e1000e_prop_disable_vnet = qdev_prop_uint8;
e1000e_prop_disable_vnet.description = "Do not use virtio headers, "
"perform SW offloads emulation "
"instead";
e1000e_prop_subsys_ven = qdev_prop_uint16;
e1000e_prop_subsys_ven.description = "PCI device Subsystem Vendor ID";
e1000e_prop_subsys = qdev_prop_uint16;
e1000e_prop_subsys.description = "PCI device Subsystem ID";
set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
}
static void e1000e_instance_init(Object *obj)
{
E1000EState *s = E1000E(obj);
device_add_bootindex_property(obj, &s->conf.bootindex,
"bootindex", "/ethernet-phy@0",
DEVICE(obj), NULL);
}
static const TypeInfo e1000e_info = {
.name = TYPE_E1000E,
.parent = TYPE_PCI_DEVICE,
.instance_size = sizeof(E1000EState),
.class_init = e1000e_class_init,
.instance_init = e1000e_instance_init,
};
static void e1000e_register_types(void)
{
type_register_static(&e1000e_info);
}
type_init(e1000e_register_types)