/* * QEMU rocker switch emulation - PCI device * * Copyright (c) 2014 Scott Feldman * Copyright (c) 2014 Jiri Pirko * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ #include "qemu/osdep.h" #include "hw/hw.h" #include "hw/pci/pci.h" #include "hw/pci/msix.h" #include "net/net.h" #include "net/eth.h" #include "qemu/iov.h" #include "qemu/bitops.h" #include "qmp-commands.h" #include "rocker.h" #include "rocker_hw.h" #include "rocker_fp.h" #include "rocker_desc.h" #include "rocker_tlv.h" #include "rocker_world.h" #include "rocker_of_dpa.h" struct rocker { /* private */ PCIDevice parent_obj; /* public */ MemoryRegion mmio; MemoryRegion msix_bar; /* switch configuration */ char *name; /* switch name */ uint32_t fp_ports; /* front-panel port count */ NICPeers *fp_ports_peers; MACAddr fp_start_macaddr; /* front-panel port 0 mac addr */ uint64_t switch_id; /* switch id */ /* front-panel ports */ FpPort *fp_port[ROCKER_FP_PORTS_MAX]; /* register backings */ uint32_t test_reg; uint64_t test_reg64; dma_addr_t test_dma_addr; uint32_t test_dma_size; uint64_t lower32; /* lower 32-bit val in 2-part 64-bit access */ /* desc rings */ DescRing **rings; /* switch worlds */ World *worlds[ROCKER_WORLD_TYPE_MAX]; World *world_dflt; QLIST_ENTRY(rocker) next; }; #define ROCKER "rocker" #define to_rocker(obj) \ OBJECT_CHECK(Rocker, (obj), ROCKER) static QLIST_HEAD(, rocker) rockers; Rocker *rocker_find(const char *name) { Rocker *r; QLIST_FOREACH(r, &rockers, next) if (strcmp(r->name, name) == 0) { return r; } return NULL; } World *rocker_get_world(Rocker *r, enum rocker_world_type type) { if (type < ROCKER_WORLD_TYPE_MAX) { return r->worlds[type]; } return NULL; } RockerSwitch *qmp_query_rocker(const char *name, Error **errp) { RockerSwitch *rocker; Rocker *r; r = rocker_find(name); if (!r) { error_setg(errp, "rocker %s not found", name); return NULL; } rocker = g_new0(RockerSwitch, 1); rocker->name = g_strdup(r->name); rocker->id = r->switch_id; rocker->ports = r->fp_ports; return rocker; } RockerPortList *qmp_query_rocker_ports(const char *name, Error **errp) { RockerPortList *list = NULL; Rocker *r; int i; r = rocker_find(name); if (!r) { error_setg(errp, "rocker %s not found", name); return NULL; } for (i = r->fp_ports - 1; i >= 0; i--) { RockerPortList *info = g_malloc0(sizeof(*info)); info->value = g_malloc0(sizeof(*info->value)); struct fp_port *port = r->fp_port[i]; fp_port_get_info(port, info); info->next = list; list = info; } return list; } uint32_t rocker_fp_ports(Rocker *r) { return r->fp_ports; } static uint32_t rocker_get_pport_by_tx_ring(Rocker *r, DescRing *ring) { return (desc_ring_index(ring) - 2) / 2 + 1; } static int tx_consume(Rocker *r, DescInfo *info) { PCIDevice *dev = PCI_DEVICE(r); char *buf = desc_get_buf(info, true); RockerTlv *tlv_frag; RockerTlv *tlvs[ROCKER_TLV_TX_MAX + 1]; struct iovec iov[ROCKER_TX_FRAGS_MAX] = { { 0, }, }; uint32_t pport; uint32_t port; uint16_t tx_offload = ROCKER_TX_OFFLOAD_NONE; uint16_t tx_l3_csum_off = 0; uint16_t tx_tso_mss = 0; uint16_t tx_tso_hdr_len = 0; int iovcnt = 0; int err = ROCKER_OK; int rem; int i; if (!buf) { return -ROCKER_ENXIO; } rocker_tlv_parse(tlvs, ROCKER_TLV_TX_MAX, buf, desc_tlv_size(info)); if (!tlvs[ROCKER_TLV_TX_FRAGS]) { return -ROCKER_EINVAL; } pport = rocker_get_pport_by_tx_ring(r, desc_get_ring(info)); if (!fp_port_from_pport(pport, &port)) { return -ROCKER_EINVAL; } if (tlvs[ROCKER_TLV_TX_OFFLOAD]) { tx_offload = rocker_tlv_get_u8(tlvs[ROCKER_TLV_TX_OFFLOAD]); } switch (tx_offload) { case ROCKER_TX_OFFLOAD_L3_CSUM: if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) { return -ROCKER_EINVAL; } break; case ROCKER_TX_OFFLOAD_TSO: if (!tlvs[ROCKER_TLV_TX_TSO_MSS] || !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) { return -ROCKER_EINVAL; } break; } if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) { tx_l3_csum_off = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]); } if (tlvs[ROCKER_TLV_TX_TSO_MSS]) { tx_tso_mss = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_MSS]); } if (tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) { tx_tso_hdr_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]); } rocker_tlv_for_each_nested(tlv_frag, tlvs[ROCKER_TLV_TX_FRAGS], rem) { hwaddr frag_addr; uint16_t frag_len; if (rocker_tlv_type(tlv_frag) != ROCKER_TLV_TX_FRAG) { err = -ROCKER_EINVAL; goto err_bad_attr; } rocker_tlv_parse_nested(tlvs, ROCKER_TLV_TX_FRAG_ATTR_MAX, tlv_frag); if (!tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] || !tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]) { err = -ROCKER_EINVAL; goto err_bad_attr; } frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]); frag_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_TX_FRAG_ATTR_LEN]); if (iovcnt >= ROCKER_TX_FRAGS_MAX) { goto err_too_many_frags; } iov[iovcnt].iov_len = frag_len; iov[iovcnt].iov_base = g_malloc(frag_len); if (!iov[iovcnt].iov_base) { err = -ROCKER_ENOMEM; goto err_no_mem; } if (pci_dma_read(dev, frag_addr, iov[iovcnt].iov_base, iov[iovcnt].iov_len)) { err = -ROCKER_ENXIO; goto err_bad_io; } iovcnt++; } if (iovcnt) { /* XXX perform Tx offloads */ /* XXX silence compiler for now */ tx_l3_csum_off += tx_tso_mss = tx_tso_hdr_len = 0; } err = fp_port_eg(r->fp_port[port], iov, iovcnt); err_too_many_frags: err_bad_io: err_no_mem: err_bad_attr: for (i = 0; i < ROCKER_TX_FRAGS_MAX; i++) { g_free(iov[i].iov_base); } return err; } static int cmd_get_port_settings(Rocker *r, DescInfo *info, char *buf, RockerTlv *cmd_info_tlv) { RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1]; RockerTlv *nest; FpPort *fp_port; uint32_t pport; uint32_t port; uint32_t speed; uint8_t duplex; uint8_t autoneg; uint8_t learning; char *phys_name; MACAddr macaddr; enum rocker_world_type mode; size_t tlv_size; int pos; int err; rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX, cmd_info_tlv); if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) { return -ROCKER_EINVAL; } pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]); if (!fp_port_from_pport(pport, &port)) { return -ROCKER_EINVAL; } fp_port = r->fp_port[port]; err = fp_port_get_settings(fp_port, &speed, &duplex, &autoneg); if (err) { return err; } fp_port_get_macaddr(fp_port, &macaddr); mode = world_type(fp_port_get_world(fp_port)); learning = fp_port_get_learning(fp_port); phys_name = fp_port_get_name(fp_port); tlv_size = rocker_tlv_total_size(0) + /* nest */ rocker_tlv_total_size(sizeof(uint32_t)) + /* pport */ rocker_tlv_total_size(sizeof(uint32_t)) + /* speed */ rocker_tlv_total_size(sizeof(uint8_t)) + /* duplex */ rocker_tlv_total_size(sizeof(uint8_t)) + /* autoneg */ rocker_tlv_total_size(sizeof(macaddr.a)) + /* macaddr */ rocker_tlv_total_size(sizeof(uint8_t)) + /* mode */ rocker_tlv_total_size(sizeof(uint8_t)) + /* learning */ rocker_tlv_total_size(strlen(phys_name)); if (tlv_size > desc_buf_size(info)) { return -ROCKER_EMSGSIZE; } pos = 0; nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_CMD_INFO); rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT, pport); rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, speed); rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, duplex); rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, autoneg); rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR, sizeof(macaddr.a), macaddr.a); rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_MODE, mode); rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING, learning); rocker_tlv_put(buf, &pos, ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME, strlen(phys_name), phys_name); rocker_tlv_nest_end(buf, &pos, nest); return desc_set_buf(info, tlv_size); } static int cmd_set_port_settings(Rocker *r, RockerTlv *cmd_info_tlv) { RockerTlv *tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1]; FpPort *fp_port; uint32_t pport; uint32_t port; uint32_t speed; uint8_t duplex; uint8_t autoneg; uint8_t learning; MACAddr macaddr; enum rocker_world_type mode; int err; rocker_tlv_parse_nested(tlvs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX, cmd_info_tlv); if (!tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]) { return -ROCKER_EINVAL; } pport = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_PPORT]); if (!fp_port_from_pport(pport, &port)) { return -ROCKER_EINVAL; } fp_port = r->fp_port[port]; if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] && tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] && tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]) { speed = rocker_tlv_get_le32(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]); duplex = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]); autoneg = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]); err = fp_port_set_settings(fp_port, speed, duplex, autoneg); if (err) { return err; } } if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) { if (rocker_tlv_len(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]) != sizeof(macaddr.a)) { return -ROCKER_EINVAL; } memcpy(macaddr.a, rocker_tlv_data(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR]), sizeof(macaddr.a)); fp_port_set_macaddr(fp_port, &macaddr); } if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]) { mode = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]); if (mode >= ROCKER_WORLD_TYPE_MAX) { return -ROCKER_EINVAL; } /* We don't support world change. */ if (!fp_port_check_world(fp_port, r->worlds[mode])) { return -ROCKER_EINVAL; } } if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]) { learning = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]); fp_port_set_learning(fp_port, learning); } return ROCKER_OK; } static int cmd_consume(Rocker *r, DescInfo *info) { char *buf = desc_get_buf(info, false); RockerTlv *tlvs[ROCKER_TLV_CMD_MAX + 1]; RockerTlv *info_tlv; World *world; uint16_t cmd; int err; if (!buf) { return -ROCKER_ENXIO; } rocker_tlv_parse(tlvs, ROCKER_TLV_CMD_MAX, buf, desc_tlv_size(info)); if (!tlvs[ROCKER_TLV_CMD_TYPE] || !tlvs[ROCKER_TLV_CMD_INFO]) { return -ROCKER_EINVAL; } cmd = rocker_tlv_get_le16(tlvs[ROCKER_TLV_CMD_TYPE]); info_tlv = tlvs[ROCKER_TLV_CMD_INFO]; /* This might be reworked to something like this: * Every world will have an array of command handlers from * ROCKER_TLV_CMD_TYPE_UNSPEC to ROCKER_TLV_CMD_TYPE_MAX. There is * up to each world to implement whatever command it want. * It can reference "generic" commands as cmd_set_port_settings or * cmd_get_port_settings */ switch (cmd) { case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD: case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD: case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL: case ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS: case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD: case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD: case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL: case ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS: world = r->worlds[ROCKER_WORLD_TYPE_OF_DPA]; err = world_do_cmd(world, info, buf, cmd, info_tlv); break; case ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS: err = cmd_get_port_settings(r, info, buf, info_tlv); break; case ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS: err = cmd_set_port_settings(r, info_tlv); break; default: err = -ROCKER_EINVAL; break; } return err; } static void rocker_msix_irq(Rocker *r, unsigned vector) { PCIDevice *dev = PCI_DEVICE(r); DPRINTF("MSI-X notify request for vector %d\n", vector); if (vector >= ROCKER_MSIX_VEC_COUNT(r->fp_ports)) { DPRINTF("incorrect vector %d\n", vector); return; } msix_notify(dev, vector); } int rocker_event_link_changed(Rocker *r, uint32_t pport, bool link_up) { DescRing *ring = r->rings[ROCKER_RING_EVENT]; DescInfo *info = desc_ring_fetch_desc(ring); RockerTlv *nest; char *buf; size_t tlv_size; int pos; int err; if (!info) { return -ROCKER_ENOBUFS; } tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* event type */ rocker_tlv_total_size(0) + /* nest */ rocker_tlv_total_size(sizeof(uint32_t)) + /* pport */ rocker_tlv_total_size(sizeof(uint8_t)); /* link up */ if (tlv_size > desc_buf_size(info)) { err = -ROCKER_EMSGSIZE; goto err_too_big; } buf = desc_get_buf(info, false); if (!buf) { err = -ROCKER_ENOMEM; goto err_no_mem; } pos = 0; rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE, ROCKER_TLV_EVENT_TYPE_LINK_CHANGED); nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO); rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_PPORT, pport); rocker_tlv_put_u8(buf, &pos, ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP, link_up ? 1 : 0); rocker_tlv_nest_end(buf, &pos, nest); err = desc_set_buf(info, tlv_size); err_too_big: err_no_mem: if (desc_ring_post_desc(ring, err)) { rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT); } return err; } int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr, uint16_t vlan_id) { DescRing *ring = r->rings[ROCKER_RING_EVENT]; DescInfo *info; FpPort *fp_port; uint32_t port; RockerTlv *nest; char *buf; size_t tlv_size; int pos; int err; if (!fp_port_from_pport(pport, &port)) { return -ROCKER_EINVAL; } fp_port = r->fp_port[port]; if (!fp_port_get_learning(fp_port)) { return ROCKER_OK; } info = desc_ring_fetch_desc(ring); if (!info) { return -ROCKER_ENOBUFS; } tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* event type */ rocker_tlv_total_size(0) + /* nest */ rocker_tlv_total_size(sizeof(uint32_t)) + /* pport */ rocker_tlv_total_size(ETH_ALEN) + /* mac addr */ rocker_tlv_total_size(sizeof(uint16_t)); /* vlan_id */ if (tlv_size > desc_buf_size(info)) { err = -ROCKER_EMSGSIZE; goto err_too_big; } buf = desc_get_buf(info, false); if (!buf) { err = -ROCKER_ENOMEM; goto err_no_mem; } pos = 0; rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_TYPE, ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN); nest = rocker_tlv_nest_start(buf, &pos, ROCKER_TLV_EVENT_INFO); rocker_tlv_put_le32(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_PPORT, pport); rocker_tlv_put(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_MAC, ETH_ALEN, addr); rocker_tlv_put_u16(buf, &pos, ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, vlan_id); rocker_tlv_nest_end(buf, &pos, nest); err = desc_set_buf(info, tlv_size); err_too_big: err_no_mem: if (desc_ring_post_desc(ring, err)) { rocker_msix_irq(r, ROCKER_MSIX_VEC_EVENT); } return err; } static DescRing *rocker_get_rx_ring_by_pport(Rocker *r, uint32_t pport) { return r->rings[(pport - 1) * 2 + 3]; } int rx_produce(World *world, uint32_t pport, const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu) { Rocker *r = world_rocker(world); PCIDevice *dev = (PCIDevice *)r; DescRing *ring = rocker_get_rx_ring_by_pport(r, pport); DescInfo *info = desc_ring_fetch_desc(ring); char *data; size_t data_size = iov_size(iov, iovcnt); char *buf; uint16_t rx_flags = 0; uint16_t rx_csum = 0; size_t tlv_size; RockerTlv *tlvs[ROCKER_TLV_RX_MAX + 1]; hwaddr frag_addr; uint16_t frag_max_len; int pos; int err; if (!info) { return -ROCKER_ENOBUFS; } buf = desc_get_buf(info, false); if (!buf) { err = -ROCKER_ENXIO; goto out; } rocker_tlv_parse(tlvs, ROCKER_TLV_RX_MAX, buf, desc_tlv_size(info)); if (!tlvs[ROCKER_TLV_RX_FRAG_ADDR] || !tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]) { err = -ROCKER_EINVAL; goto out; } frag_addr = rocker_tlv_get_le64(tlvs[ROCKER_TLV_RX_FRAG_ADDR]); frag_max_len = rocker_tlv_get_le16(tlvs[ROCKER_TLV_RX_FRAG_MAX_LEN]); if (data_size > frag_max_len) { err = -ROCKER_EMSGSIZE; goto out; } if (copy_to_cpu) { rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD; } /* XXX calc rx flags/csum */ tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */ rocker_tlv_total_size(sizeof(uint16_t)) + /* scum */ rocker_tlv_total_size(sizeof(uint64_t)) + /* frag addr */ rocker_tlv_total_size(sizeof(uint16_t)) + /* frag max len */ rocker_tlv_total_size(sizeof(uint16_t)); /* frag len */ if (tlv_size > desc_buf_size(info)) { err = -ROCKER_EMSGSIZE; goto out; } /* TODO: * iov dma write can be optimized in similar way e1000 does it in * e1000_receive_iov. But maybe if would make sense to introduce * generic helper iov_dma_write. */ data = g_malloc(data_size); if (!data) { err = -ROCKER_ENOMEM; goto out; } iov_to_buf(iov, iovcnt, 0, data, data_size); pci_dma_write(dev, frag_addr, data, data_size); g_free(data); pos = 0; rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FLAGS, rx_flags); rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_CSUM, rx_csum); rocker_tlv_put_le64(buf, &pos, ROCKER_TLV_RX_FRAG_ADDR, frag_addr); rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_MAX_LEN, frag_max_len); rocker_tlv_put_le16(buf, &pos, ROCKER_TLV_RX_FRAG_LEN, data_size); err = desc_set_buf(info, tlv_size); out: if (desc_ring_post_desc(ring, err)) { rocker_msix_irq(r, ROCKER_MSIX_VEC_RX(pport - 1)); } return err; } int rocker_port_eg(Rocker *r, uint32_t pport, const struct iovec *iov, int iovcnt) { FpPort *fp_port; uint32_t port; if (!fp_port_from_pport(pport, &port)) { return -ROCKER_EINVAL; } fp_port = r->fp_port[port]; return fp_port_eg(fp_port, iov, iovcnt); } static void rocker_test_dma_ctrl(Rocker *r, uint32_t val) { PCIDevice *dev = PCI_DEVICE(r); char *buf; int i; buf = g_malloc(r->test_dma_size); if (!buf) { DPRINTF("test dma buffer alloc failed"); return; } switch (val) { case ROCKER_TEST_DMA_CTRL_CLEAR: memset(buf, 0, r->test_dma_size); break; case ROCKER_TEST_DMA_CTRL_FILL: memset(buf, 0x96, r->test_dma_size); break; case ROCKER_TEST_DMA_CTRL_INVERT: pci_dma_read(dev, r->test_dma_addr, buf, r->test_dma_size); for (i = 0; i < r->test_dma_size; i++) { buf[i] = ~buf[i]; } break; default: DPRINTF("not test dma control val=0x%08x\n", val); goto err_out; } pci_dma_write(dev, r->test_dma_addr, buf, r->test_dma_size); rocker_msix_irq(r, ROCKER_MSIX_VEC_TEST); err_out: g_free(buf); } static void rocker_reset(DeviceState *dev); static void rocker_control(Rocker *r, uint32_t val) { if (val & ROCKER_CONTROL_RESET) { rocker_reset(DEVICE(r)); } } static int rocker_pci_ring_count(Rocker *r) { /* There are: * - command ring * - event ring * - tx and rx ring per each port */ return 2 + (2 * r->fp_ports); } static bool rocker_addr_is_desc_reg(Rocker *r, hwaddr addr) { hwaddr start = ROCKER_DMA_DESC_BASE; hwaddr end = start + (ROCKER_DMA_DESC_SIZE * rocker_pci_ring_count(r)); return addr >= start && addr < end; } static void rocker_port_phys_enable_write(Rocker *r, uint64_t new) { int i; bool old_enabled; bool new_enabled; FpPort *fp_port; for (i = 0; i < r->fp_ports; i++) { fp_port = r->fp_port[i]; old_enabled = fp_port_enabled(fp_port); new_enabled = (new >> (i + 1)) & 0x1; if (new_enabled == old_enabled) { continue; } if (new_enabled) { fp_port_enable(r->fp_port[i]); } else { fp_port_disable(r->fp_port[i]); } } } static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val) { Rocker *r = opaque; if (rocker_addr_is_desc_reg(r, addr)) { unsigned index = ROCKER_RING_INDEX(addr); unsigned offset = addr & ROCKER_DMA_DESC_MASK; switch (offset) { case ROCKER_DMA_DESC_ADDR_OFFSET: r->lower32 = (uint64_t)val; break; case ROCKER_DMA_DESC_ADDR_OFFSET + 4: desc_ring_set_base_addr(r->rings[index], ((uint64_t)val) << 32 | r->lower32); r->lower32 = 0; break; case ROCKER_DMA_DESC_SIZE_OFFSET: desc_ring_set_size(r->rings[index], val); break; case ROCKER_DMA_DESC_HEAD_OFFSET: if (desc_ring_set_head(r->rings[index], val)) { rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index])); } break; case ROCKER_DMA_DESC_CTRL_OFFSET: desc_ring_set_ctrl(r->rings[index], val); break; case ROCKER_DMA_DESC_CREDITS_OFFSET: if (desc_ring_ret_credits(r->rings[index], val)) { rocker_msix_irq(r, desc_ring_get_msix_vector(r->rings[index])); } break; default: DPRINTF("not implemented dma reg write(l) addr=0x" TARGET_FMT_plx " val=0x%08x (ring %d, addr=0x%02x)\n", addr, val, index, offset); break; } return; } switch (addr) { case ROCKER_TEST_REG: r->test_reg = val; break; case ROCKER_TEST_REG64: case ROCKER_TEST_DMA_ADDR: case ROCKER_PORT_PHYS_ENABLE: r->lower32 = (uint64_t)val; break; case ROCKER_TEST_REG64 + 4: r->test_reg64 = ((uint64_t)val) << 32 | r->lower32; r->lower32 = 0; break; case ROCKER_TEST_IRQ: rocker_msix_irq(r, val); break; case ROCKER_TEST_DMA_SIZE: r->test_dma_size = val; break; case ROCKER_TEST_DMA_ADDR + 4: r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32; r->lower32 = 0; break; case ROCKER_TEST_DMA_CTRL: rocker_test_dma_ctrl(r, val); break; case ROCKER_CONTROL: rocker_control(r, val); break; case ROCKER_PORT_PHYS_ENABLE + 4: rocker_port_phys_enable_write(r, ((uint64_t)val) << 32 | r->lower32); r->lower32 = 0; break; default: DPRINTF("not implemented write(l) addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, val); break; } } static void rocker_io_writeq(void *opaque, hwaddr addr, uint64_t val) { Rocker *r = opaque; if (rocker_addr_is_desc_reg(r, addr)) { unsigned index = ROCKER_RING_INDEX(addr); unsigned offset = addr & ROCKER_DMA_DESC_MASK; switch (offset) { case ROCKER_DMA_DESC_ADDR_OFFSET: desc_ring_set_base_addr(r->rings[index], val); break; default: DPRINTF("not implemented dma reg write(q) addr=0x" TARGET_FMT_plx " val=0x" TARGET_FMT_plx " (ring %d, offset=0x%02x)\n", addr, val, index, offset); break; } return; } switch (addr) { case ROCKER_TEST_REG64: r->test_reg64 = val; break; case ROCKER_TEST_DMA_ADDR: r->test_dma_addr = val; break; case ROCKER_PORT_PHYS_ENABLE: rocker_port_phys_enable_write(r, val); break; default: DPRINTF("not implemented write(q) addr=0x" TARGET_FMT_plx " val=0x" TARGET_FMT_plx "\n", addr, val); break; } } #ifdef DEBUG_ROCKER #define regname(reg) case (reg): return #reg static const char *rocker_reg_name(void *opaque, hwaddr addr) { Rocker *r = opaque; if (rocker_addr_is_desc_reg(r, addr)) { unsigned index = ROCKER_RING_INDEX(addr); unsigned offset = addr & ROCKER_DMA_DESC_MASK; static char buf[100]; char ring_name[10]; switch (index) { case 0: sprintf(ring_name, "cmd"); break; case 1: sprintf(ring_name, "event"); break; default: sprintf(ring_name, "%s-%d", index % 2 ? "rx" : "tx", (index - 2) / 2); } switch (offset) { case ROCKER_DMA_DESC_ADDR_OFFSET: sprintf(buf, "Ring[%s] ADDR", ring_name); return buf; case ROCKER_DMA_DESC_ADDR_OFFSET+4: sprintf(buf, "Ring[%s] ADDR+4", ring_name); return buf; case ROCKER_DMA_DESC_SIZE_OFFSET: sprintf(buf, "Ring[%s] SIZE", ring_name); return buf; case ROCKER_DMA_DESC_HEAD_OFFSET: sprintf(buf, "Ring[%s] HEAD", ring_name); return buf; case ROCKER_DMA_DESC_TAIL_OFFSET: sprintf(buf, "Ring[%s] TAIL", ring_name); return buf; case ROCKER_DMA_DESC_CTRL_OFFSET: sprintf(buf, "Ring[%s] CTRL", ring_name); return buf; case ROCKER_DMA_DESC_CREDITS_OFFSET: sprintf(buf, "Ring[%s] CREDITS", ring_name); return buf; default: sprintf(buf, "Ring[%s] ???", ring_name); return buf; } } else { switch (addr) { regname(ROCKER_BOGUS_REG0); regname(ROCKER_BOGUS_REG1); regname(ROCKER_BOGUS_REG2); regname(ROCKER_BOGUS_REG3); regname(ROCKER_TEST_REG); regname(ROCKER_TEST_REG64); regname(ROCKER_TEST_REG64+4); regname(ROCKER_TEST_IRQ); regname(ROCKER_TEST_DMA_ADDR); regname(ROCKER_TEST_DMA_ADDR+4); regname(ROCKER_TEST_DMA_SIZE); regname(ROCKER_TEST_DMA_CTRL); regname(ROCKER_CONTROL); regname(ROCKER_PORT_PHYS_COUNT); regname(ROCKER_PORT_PHYS_LINK_STATUS); regname(ROCKER_PORT_PHYS_LINK_STATUS+4); regname(ROCKER_PORT_PHYS_ENABLE); regname(ROCKER_PORT_PHYS_ENABLE+4); regname(ROCKER_SWITCH_ID); regname(ROCKER_SWITCH_ID+4); } } return "???"; } #else static const char *rocker_reg_name(void *opaque, hwaddr addr) { return NULL; } #endif static void rocker_mmio_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) { DPRINTF("Write %s addr " TARGET_FMT_plx ", size %u, val " TARGET_FMT_plx "\n", rocker_reg_name(opaque, addr), addr, size, val); switch (size) { case 4: rocker_io_writel(opaque, addr, val); break; case 8: rocker_io_writeq(opaque, addr, val); break; } } static uint64_t rocker_port_phys_link_status(Rocker *r) { int i; uint64_t status = 0; for (i = 0; i < r->fp_ports; i++) { FpPort *port = r->fp_port[i]; if (fp_port_get_link_up(port)) { status |= 1 << (i + 1); } } return status; } static uint64_t rocker_port_phys_enable_read(Rocker *r) { int i; uint64_t ret = 0; for (i = 0; i < r->fp_ports; i++) { FpPort *port = r->fp_port[i]; if (fp_port_enabled(port)) { ret |= 1 << (i + 1); } } return ret; } static uint32_t rocker_io_readl(void *opaque, hwaddr addr) { Rocker *r = opaque; uint32_t ret; if (rocker_addr_is_desc_reg(r, addr)) { unsigned index = ROCKER_RING_INDEX(addr); unsigned offset = addr & ROCKER_DMA_DESC_MASK; switch (offset) { case ROCKER_DMA_DESC_ADDR_OFFSET: ret = (uint32_t)desc_ring_get_base_addr(r->rings[index]); break; case ROCKER_DMA_DESC_ADDR_OFFSET + 4: ret = (uint32_t)(desc_ring_get_base_addr(r->rings[index]) >> 32); break; case ROCKER_DMA_DESC_SIZE_OFFSET: ret = desc_ring_get_size(r->rings[index]); break; case ROCKER_DMA_DESC_HEAD_OFFSET: ret = desc_ring_get_head(r->rings[index]); break; case ROCKER_DMA_DESC_TAIL_OFFSET: ret = desc_ring_get_tail(r->rings[index]); break; case ROCKER_DMA_DESC_CREDITS_OFFSET: ret = desc_ring_get_credits(r->rings[index]); break; default: DPRINTF("not implemented dma reg read(l) addr=0x" TARGET_FMT_plx " (ring %d, addr=0x%02x)\n", addr, index, offset); ret = 0; break; } return ret; } switch (addr) { case ROCKER_BOGUS_REG0: case ROCKER_BOGUS_REG1: case ROCKER_BOGUS_REG2: case ROCKER_BOGUS_REG3: ret = 0xDEADBABE; break; case ROCKER_TEST_REG: ret = r->test_reg * 2; break; case ROCKER_TEST_REG64: ret = (uint32_t)(r->test_reg64 * 2); break; case ROCKER_TEST_REG64 + 4: ret = (uint32_t)((r->test_reg64 * 2) >> 32); break; case ROCKER_TEST_DMA_SIZE: ret = r->test_dma_size; break; case ROCKER_TEST_DMA_ADDR: ret = (uint32_t)r->test_dma_addr; break; case ROCKER_TEST_DMA_ADDR + 4: ret = (uint32_t)(r->test_dma_addr >> 32); break; case ROCKER_PORT_PHYS_COUNT: ret = r->fp_ports; break; case ROCKER_PORT_PHYS_LINK_STATUS: ret = (uint32_t)rocker_port_phys_link_status(r); break; case ROCKER_PORT_PHYS_LINK_STATUS + 4: ret = (uint32_t)(rocker_port_phys_link_status(r) >> 32); break; case ROCKER_PORT_PHYS_ENABLE: ret = (uint32_t)rocker_port_phys_enable_read(r); break; case ROCKER_PORT_PHYS_ENABLE + 4: ret = (uint32_t)(rocker_port_phys_enable_read(r) >> 32); break; case ROCKER_SWITCH_ID: ret = (uint32_t)r->switch_id; break; case ROCKER_SWITCH_ID + 4: ret = (uint32_t)(r->switch_id >> 32); break; default: DPRINTF("not implemented read(l) addr=0x" TARGET_FMT_plx "\n", addr); ret = 0; break; } return ret; } static uint64_t rocker_io_readq(void *opaque, hwaddr addr) { Rocker *r = opaque; uint64_t ret; if (rocker_addr_is_desc_reg(r, addr)) { unsigned index = ROCKER_RING_INDEX(addr); unsigned offset = addr & ROCKER_DMA_DESC_MASK; switch (addr & ROCKER_DMA_DESC_MASK) { case ROCKER_DMA_DESC_ADDR_OFFSET: ret = desc_ring_get_base_addr(r->rings[index]); break; default: DPRINTF("not implemented dma reg read(q) addr=0x" TARGET_FMT_plx " (ring %d, addr=0x%02x)\n", addr, index, offset); ret = 0; break; } return ret; } switch (addr) { case ROCKER_BOGUS_REG0: case ROCKER_BOGUS_REG2: ret = 0xDEADBABEDEADBABEULL; break; case ROCKER_TEST_REG64: ret = r->test_reg64 * 2; break; case ROCKER_TEST_DMA_ADDR: ret = r->test_dma_addr; break; case ROCKER_PORT_PHYS_LINK_STATUS: ret = rocker_port_phys_link_status(r); break; case ROCKER_PORT_PHYS_ENABLE: ret = rocker_port_phys_enable_read(r); break; case ROCKER_SWITCH_ID: ret = r->switch_id; break; default: DPRINTF("not implemented read(q) addr=0x" TARGET_FMT_plx "\n", addr); ret = 0; break; } return ret; } static uint64_t rocker_mmio_read(void *opaque, hwaddr addr, unsigned size) { DPRINTF("Read %s addr " TARGET_FMT_plx ", size %u\n", rocker_reg_name(opaque, addr), addr, size); switch (size) { case 4: return rocker_io_readl(opaque, addr); case 8: return rocker_io_readq(opaque, addr); } return -1; } static const MemoryRegionOps rocker_mmio_ops = { .read = rocker_mmio_read, .write = rocker_mmio_write, .endianness = DEVICE_LITTLE_ENDIAN, .valid = { .min_access_size = 4, .max_access_size = 8, }, .impl = { .min_access_size = 4, .max_access_size = 8, }, }; static void rocker_msix_vectors_unuse(Rocker *r, unsigned int num_vectors) { PCIDevice *dev = PCI_DEVICE(r); int i; for (i = 0; i < num_vectors; i++) { msix_vector_unuse(dev, i); } } static int rocker_msix_vectors_use(Rocker *r, unsigned int num_vectors) { PCIDevice *dev = PCI_DEVICE(r); int err; int i; for (i = 0; i < num_vectors; i++) { err = msix_vector_use(dev, i); if (err) { goto rollback; } } return 0; rollback: rocker_msix_vectors_unuse(r, i); return err; } static int rocker_msix_init(Rocker *r) { PCIDevice *dev = PCI_DEVICE(r); int err; err = msix_init(dev, ROCKER_MSIX_VEC_COUNT(r->fp_ports), &r->msix_bar, ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_TABLE_OFFSET, &r->msix_bar, ROCKER_PCI_MSIX_BAR_IDX, ROCKER_PCI_MSIX_PBA_OFFSET, 0); if (err) { return err; } err = rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports)); if (err) { goto err_msix_vectors_use; } return 0; err_msix_vectors_use: msix_uninit(dev, &r->msix_bar, &r->msix_bar); return err; } static void rocker_msix_uninit(Rocker *r) { PCIDevice *dev = PCI_DEVICE(r); msix_uninit(dev, &r->msix_bar, &r->msix_bar); rocker_msix_vectors_unuse(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports)); } static int pci_rocker_init(PCIDevice *dev) { Rocker *r = to_rocker(dev); const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } }; const MACAddr dflt = { .a = { 0x52, 0x54, 0x00, 0x12, 0x35, 0x01 } }; static int sw_index; int i, err = 0; /* allocate worlds */ r->worlds[ROCKER_WORLD_TYPE_OF_DPA] = of_dpa_world_alloc(r); r->world_dflt = r->worlds[ROCKER_WORLD_TYPE_OF_DPA]; for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) { if (!r->worlds[i]) { err = -ENOMEM; goto err_world_alloc; } } /* set up memory-mapped region at BAR0 */ memory_region_init_io(&r->mmio, OBJECT(r), &rocker_mmio_ops, r, "rocker-mmio", ROCKER_PCI_BAR0_SIZE); pci_register_bar(dev, ROCKER_PCI_BAR0_IDX, PCI_BASE_ADDRESS_SPACE_MEMORY, &r->mmio); /* set up memory-mapped region for MSI-X */ memory_region_init(&r->msix_bar, OBJECT(r), "rocker-msix-bar", ROCKER_PCI_MSIX_BAR_SIZE); pci_register_bar(dev, ROCKER_PCI_MSIX_BAR_IDX, PCI_BASE_ADDRESS_SPACE_MEMORY, &r->msix_bar); /* MSI-X init */ err = rocker_msix_init(r); if (err) { goto err_msix_init; } /* validate switch properties */ if (!r->name) { r->name = g_strdup(ROCKER); } if (rocker_find(r->name)) { err = -EEXIST; goto err_duplicate; } /* Rocker name is passed in port name requests to OS with the intention * that the name is used in interface names. Limit the length of the * rocker name to avoid naming problems in the OS. Also, adding the * port number as p# and unganged breakout b#, where # is at most 2 * digits, so leave room for it too (-1 for string terminator, -3 for * p# and -3 for b#) */ #define ROCKER_IFNAMSIZ 16 #define MAX_ROCKER_NAME_LEN (ROCKER_IFNAMSIZ - 1 - 3 - 3) if (strlen(r->name) > MAX_ROCKER_NAME_LEN) { fprintf(stderr, "rocker: name too long; please shorten to at most %d chars\n", MAX_ROCKER_NAME_LEN); return -EINVAL; } if (memcmp(&r->fp_start_macaddr, &zero, sizeof(zero)) == 0) { memcpy(&r->fp_start_macaddr, &dflt, sizeof(dflt)); r->fp_start_macaddr.a[4] += (sw_index++); } if (!r->switch_id) { memcpy(&r->switch_id, &r->fp_start_macaddr, sizeof(r->fp_start_macaddr)); } if (r->fp_ports > ROCKER_FP_PORTS_MAX) { r->fp_ports = ROCKER_FP_PORTS_MAX; } r->rings = g_new(DescRing *, rocker_pci_ring_count(r)); if (!r->rings) { goto err_rings_alloc; } /* Rings are ordered like this: * - command ring * - event ring * - port0 tx ring * - port0 rx ring * - port1 tx ring * - port1 rx ring * ..... */ err = -ENOMEM; for (i = 0; i < rocker_pci_ring_count(r); i++) { DescRing *ring = desc_ring_alloc(r, i); if (!ring) { goto err_ring_alloc; } if (i == ROCKER_RING_CMD) { desc_ring_set_consume(ring, cmd_consume, ROCKER_MSIX_VEC_CMD); } else if (i == ROCKER_RING_EVENT) { desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_EVENT); } else if (i % 2 == 0) { desc_ring_set_consume(ring, tx_consume, ROCKER_MSIX_VEC_TX((i - 2) / 2)); } else if (i % 2 == 1) { desc_ring_set_consume(ring, NULL, ROCKER_MSIX_VEC_RX((i - 3) / 2)); } r->rings[i] = ring; } for (i = 0; i < r->fp_ports; i++) { FpPort *port = fp_port_alloc(r, r->name, &r->fp_start_macaddr, i, &r->fp_ports_peers[i]); if (!port) { goto err_port_alloc; } r->fp_port[i] = port; fp_port_set_world(port, r->world_dflt); } QLIST_INSERT_HEAD(&rockers, r, next); return 0; err_port_alloc: for (--i; i >= 0; i--) { FpPort *port = r->fp_port[i]; fp_port_free(port); } i = rocker_pci_ring_count(r); err_ring_alloc: for (--i; i >= 0; i--) { desc_ring_free(r->rings[i]); } g_free(r->rings); err_rings_alloc: err_duplicate: rocker_msix_uninit(r); err_msix_init: object_unparent(OBJECT(&r->msix_bar)); object_unparent(OBJECT(&r->mmio)); err_world_alloc: for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) { if (r->worlds[i]) { world_free(r->worlds[i]); } } return err; } static void pci_rocker_uninit(PCIDevice *dev) { Rocker *r = to_rocker(dev); int i; QLIST_REMOVE(r, next); for (i = 0; i < r->fp_ports; i++) { FpPort *port = r->fp_port[i]; fp_port_free(port); r->fp_port[i] = NULL; } for (i = 0; i < rocker_pci_ring_count(r); i++) { if (r->rings[i]) { desc_ring_free(r->rings[i]); } } g_free(r->rings); rocker_msix_uninit(r); object_unparent(OBJECT(&r->msix_bar)); object_unparent(OBJECT(&r->mmio)); for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) { if (r->worlds[i]) { world_free(r->worlds[i]); } } g_free(r->fp_ports_peers); } static void rocker_reset(DeviceState *dev) { Rocker *r = to_rocker(dev); int i; for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) { if (r->worlds[i]) { world_reset(r->worlds[i]); } } for (i = 0; i < r->fp_ports; i++) { fp_port_reset(r->fp_port[i]); fp_port_set_world(r->fp_port[i], r->world_dflt); } r->test_reg = 0; r->test_reg64 = 0; r->test_dma_addr = 0; r->test_dma_size = 0; for (i = 0; i < rocker_pci_ring_count(r); i++) { desc_ring_reset(r->rings[i]); } DPRINTF("Reset done\n"); } static Property rocker_properties[] = { DEFINE_PROP_STRING("name", Rocker, name), DEFINE_PROP_MACADDR("fp_start_macaddr", Rocker, fp_start_macaddr), DEFINE_PROP_UINT64("switch_id", Rocker, switch_id, 0), DEFINE_PROP_ARRAY("ports", Rocker, fp_ports, fp_ports_peers, qdev_prop_netdev, NICPeers), DEFINE_PROP_END_OF_LIST(), }; static const VMStateDescription rocker_vmsd = { .name = ROCKER, .unmigratable = 1, }; static void rocker_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); k->init = pci_rocker_init; k->exit = pci_rocker_uninit; k->vendor_id = PCI_VENDOR_ID_REDHAT; k->device_id = PCI_DEVICE_ID_REDHAT_ROCKER; k->revision = ROCKER_PCI_REVISION; k->class_id = PCI_CLASS_NETWORK_OTHER; set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); dc->desc = "Rocker Switch"; dc->reset = rocker_reset; dc->props = rocker_properties; dc->vmsd = &rocker_vmsd; } static const TypeInfo rocker_info = { .name = ROCKER, .parent = TYPE_PCI_DEVICE, .instance_size = sizeof(Rocker), .class_init = rocker_class_init, }; static void rocker_register_types(void) { type_register_static(&rocker_info); } type_init(rocker_register_types)