From 6ad99cdddff87be7f08cd7c9274c02ede525c590 Mon Sep 17 00:00:00 2001 From: Augustin Cavalier Date: Sat, 21 Sep 2019 18:38:35 -0400 Subject: [PATCH] freebsd_iflib: Synchronize with FreeBSD trunk. Up through Sep. 20th (yesterday.) Includes some pretty substantial fixes around MSI interrupts, among other things. --- .../compat/freebsd_iflib/compat/net/iflib.h | 57 +- .../freebsd_iflib/compat/net/iflib_private.h | 12 +- .../compat/freebsd_iflib/compat/net/mp_ring.h | 10 +- src/libs/compat/freebsd_iflib/iflib.c | 891 ++++++++++++------ src/libs/compat/freebsd_iflib/mp_ring.c | 44 +- 5 files changed, 650 insertions(+), 364 deletions(-) diff --git a/src/libs/compat/freebsd_iflib/compat/net/iflib.h b/src/libs/compat/freebsd_iflib/compat/net/iflib.h index 2d42a82843..c73c458fce 100644 --- a/src/libs/compat/freebsd_iflib/compat/net/iflib.h +++ b/src/libs/compat/freebsd_iflib/compat/net/iflib.h @@ -78,7 +78,7 @@ typedef struct if_rxd_info { /* XXX redundant with the new irf_len field */ uint16_t iri_len; /* packet length */ qidx_t iri_cidx; /* consumer index of cq */ - struct ifnet *iri_ifp; /* some drivers >1 interface per softc */ + if_t iri_ifp; /* driver may have >1 iface per softc */ /* updated by driver */ if_rxd_frag_t iri_frags; @@ -131,12 +131,12 @@ typedef struct if_pkt_info { uint8_t ipi_mflags; /* packet mbuf flags */ uint32_t ipi_tcp_seq; /* tcp seqno */ - uint32_t ipi_tcp_sum; /* tcp csum */ + uint32_t __spare0__; } *if_pkt_info_t; typedef struct if_irq { struct resource *ii_res; - int ii_rid; + int __spare0__; void *ii_tag; } *if_irq_t; @@ -165,7 +165,7 @@ typedef struct pci_vendor_info { uint32_t pvi_subdevice_id; uint32_t pvi_rev_id; uint32_t pvi_class_mask; - caddr_t pvi_name; + const char *pvi_name; } pci_vendor_info_t; #define PVID(vendor, devid, name) {vendor, devid, 0, 0, 0, 0, name} @@ -193,9 +193,8 @@ typedef struct if_softc_ctx { int isc_vectors; int isc_nrxqsets; int isc_ntxqsets; - uint8_t isc_min_tx_latency; /* disable doorbell update batching */ - uint8_t isc_rx_mvec_enable; /* generate mvecs on rx */ - uint32_t isc_txrx_budget_bytes_max; + uint16_t __spare0__; + uint32_t __spare1__; int isc_msix_bar; /* can be model specific - initialize in attach_pre */ int isc_tx_nsegments; /* can be model specific - initialize in attach_pre */ int isc_ntxd[8]; @@ -217,16 +216,23 @@ typedef struct if_softc_ctx { int isc_rss_table_mask; int isc_nrxqsets_max; int isc_ntxqsets_max; - uint32_t isc_tx_qdepth; + uint32_t __spare2__; iflib_intr_mode_t isc_intr; uint16_t isc_max_frame_size; /* set at init time by driver */ uint16_t isc_min_frame_size; /* set at init time by driver, only used if IFLIB_NEED_ETHER_PAD is set. */ uint32_t isc_pause_frames; /* set by driver for iflib_timer to detect */ - pci_vendor_info_t isc_vendor_info; /* set by iflib prior to attach_pre */ + uint32_t __spare3__; + uint32_t __spare4__; + uint32_t __spare5__; + uint32_t __spare6__; + uint32_t __spare7__; + uint32_t __spare8__; + caddr_t __spare9__; int isc_disable_msix; if_txrx_t isc_txrx; + struct ifmedia *isc_media; } *if_softc_ctx_t; /* @@ -246,7 +252,7 @@ struct if_shared_ctx { int isc_admin_intrcnt; /* # of admin/link interrupts */ /* fields necessary for probe */ - pci_vendor_info_t *isc_vendor_info; + const pci_vendor_info_t *isc_vendor_info; const char *isc_driver_version; /* optional function to transform the read values to match the table*/ void (*isc_parse_devinfo) (uint16_t *device_id, uint16_t *subvendor_id, @@ -262,7 +268,7 @@ struct if_shared_ctx { int isc_nfl __aligned(CACHE_LINE_SIZE); int isc_ntxqs; /* # of tx queues per tx qset - usually 1 */ int isc_nrxqs; /* # of rx queues per rx qset - intel 1, chelsio 2, broadcom 3 */ - int isc_rx_process_limit; + int __spare0__; int isc_tx_reclaim_thresh; int isc_flags; const char *isc_name; @@ -286,11 +292,6 @@ typedef enum { IFLIB_INTR_IOV, } iflib_intr_type_t; -#ifndef ETH_ADDR_LEN -#define ETH_ADDR_LEN 6 -#endif - - /* * Interface has a separate command queue for RX */ @@ -360,7 +361,10 @@ typedef enum { * Interface needs admin task to ignore interface up/down status */ #define IFLIB_ADMIN_ALWAYS_RUN 0x10000 - +/* + * Driver will pass the media + */ +#define IFLIB_DRIVER_MEDIA 0x20000 /* * field accessors @@ -392,6 +396,12 @@ int iflib_device_suspend(device_t); int iflib_device_resume(device_t); int iflib_device_shutdown(device_t); +/* + * Use this instead of iflib_device_probe if the driver should report + * BUS_PROBE_VENDOR instead of BUS_PROBE_DEFAULT. (For example, an out-of-tree + * driver based on iflib). + */ +int iflib_device_probe_vendor(device_t); int iflib_device_iov_init(device_t, uint16_t, const nvlist_t *); void iflib_device_iov_uninit(device_t); @@ -404,8 +414,6 @@ int iflib_device_iov_add_vf(device_t, uint16_t, const nvlist_t *); int iflib_device_register(device_t dev, void *softc, if_shared_ctx_t sctx, if_ctx_t *ctxp); int iflib_device_deregister(if_ctx_t); - - int iflib_irq_alloc(if_ctx_t, if_irq_t, int, driver_filter_t, void *filter_arg, driver_intr_t, void *arg, const char *name); int iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, iflib_intr_type_t type, driver_filter_t *filter, @@ -414,34 +422,28 @@ void iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t t void iflib_irq_free(if_ctx_t ctx, if_irq_t irq); -void iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name); +void iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, + const char *name); void iflib_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn, const char *name); - void iflib_config_gtask_deinit(struct grouptask *gtask); - - void iflib_tx_intr_deferred(if_ctx_t ctx, int txqid); void iflib_rx_intr_deferred(if_ctx_t ctx, int rxqid); void iflib_admin_intr_deferred(if_ctx_t ctx); void iflib_iov_intr_deferred(if_ctx_t ctx); - void iflib_link_state_change(if_ctx_t ctx, int linkstate, uint64_t baudrate); int iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags); int iflib_dma_alloc_align(if_ctx_t ctx, int size, int align, iflib_dma_info_t dma, int mapflags); void iflib_dma_free(iflib_dma_info_t dma); - int iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count); void iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count); - struct sx *iflib_ctx_lock_get(if_ctx_t); -struct mtx *iflib_qset_lock_get(if_ctx_t, uint16_t); void iflib_led_create(if_ctx_t ctx); @@ -453,4 +455,5 @@ void iflib_add_int_delay_sysctl(if_ctx_t, const char *, const char *, */ if_pseudo_t iflib_clone_register(if_shared_ctx_t); void iflib_clone_deregister(if_pseudo_t); + #endif /* __IFLIB_H_ */ diff --git a/src/libs/compat/freebsd_iflib/compat/net/iflib_private.h b/src/libs/compat/freebsd_iflib/compat/net/iflib_private.h index 341deb43d7..eca6be6384 100644 --- a/src/libs/compat/freebsd_iflib/compat/net/iflib_private.h +++ b/src/libs/compat/freebsd_iflib/compat/net/iflib_private.h @@ -28,8 +28,7 @@ */ #ifndef __NET_IFLIB_PRIVATE_H_ -#define __NET_IFLIB_PRIVATE_H_ - +#define __NET_IFLIB_PRIVATE_H_ #define IFC_LEGACY 0x001 #define IFC_QFLUSH 0x002 @@ -40,19 +39,14 @@ #define IFC_PREFETCH 0x040 #define IFC_DO_RESET 0x080 #define IFC_DO_WATCHDOG 0x100 -#define IFC_CHECK_HUNG 0x200 +#define IFC_SPARE0 0x200 #define IFC_PSEUDO 0x400 #define IFC_IN_DETACH 0x800 -#define IFC_NETMAP_TX_IRQ 0x80000000 +#define IFC_NETMAP_TX_IRQ 0x80000000 MALLOC_DECLARE(M_IFLIB); -#define IFLIB_MAX_TX_BYTES (2*1024*1024) -#define IFLIB_MIN_TX_BYTES (8*1024) -#define IFLIB_DEFAULT_TX_QDEPTH 2048 - - struct iflib_cloneattach_ctx { struct if_clone *cc_ifc; caddr_t cc_params; diff --git a/src/libs/compat/freebsd_iflib/compat/net/mp_ring.h b/src/libs/compat/freebsd_iflib/compat/net/mp_ring.h index fe29d8acd5..03ac7b831c 100644 --- a/src/libs/compat/freebsd_iflib/compat/net/mp_ring.h +++ b/src/libs/compat/freebsd_iflib/compat/net/mp_ring.h @@ -35,15 +35,15 @@ #error "no user-serviceable parts inside" #endif -#if defined(__powerpc__) || defined(__mips__) || defined(__i386__) || defined(__HAIKU__) -#define NO_64BIT_ATOMICS -#endif - struct ifmp_ring; typedef u_int (*mp_ring_drain_t)(struct ifmp_ring *, u_int, u_int); typedef u_int (*mp_ring_can_drain_t)(struct ifmp_ring *); typedef void (*mp_ring_serial_t)(struct ifmp_ring *); +#if defined(__powerpc__) || defined(__mips__) || defined(__i386__) || defined(__HAIKU__) +#define MP_RING_NO_64BIT_ATOMICS +#endif + struct ifmp_ring { volatile uint64_t state __aligned(CACHE_LINE_SIZE); @@ -58,7 +58,7 @@ struct ifmp_ring { counter_u64_t stalls; counter_u64_t restarts; /* recovered after stalling */ counter_u64_t abdications; -#ifdef NO_64BIT_ATOMICS +#ifdef MP_RING_NO_64BIT_ATOMICS struct mtx lock; #endif void * volatile items[ diff --git a/src/libs/compat/freebsd_iflib/iflib.c b/src/libs/compat/freebsd_iflib/iflib.c index 7eafee7933..559ebe30b0 100644 --- a/src/libs/compat/freebsd_iflib/iflib.c +++ b/src/libs/compat/freebsd_iflib/iflib.c @@ -45,7 +45,6 @@ __FBSDID("$FreeBSD$"); #endif #include #include -#include #include #include #include @@ -192,7 +191,6 @@ struct iflib_ctx { uint32_t ifc_rx_mbuf_sz; int ifc_link_state; - int ifc_link_irq; int ifc_watchdog_events; struct cdev *ifc_led_dev; struct resource *ifc_msix_mem; @@ -202,6 +200,7 @@ struct iflib_ctx { struct grouptask ifc_vflr_task; struct iflib_filter_info ifc_filter_info; struct ifmedia ifc_media; + struct ifmedia *ifc_mediap; struct sysctl_oid *ifc_sysctl_node; uint16_t ifc_sysctl_ntxqs; @@ -209,6 +208,9 @@ struct iflib_ctx { uint16_t ifc_sysctl_qs_eq_override; uint16_t ifc_sysctl_rx_budget; uint16_t ifc_sysctl_tx_abdicate; + uint16_t ifc_sysctl_core_offset; +#define CORE_OFFSET_UNSPECIFIED 0xffff + uint8_t ifc_sysctl_separate_txrx; qidx_t ifc_sysctl_ntxds[8]; qidx_t ifc_sysctl_nrxds[8]; @@ -226,10 +228,8 @@ struct iflib_ctx { eventhandler_tag ifc_vlan_attach_event; eventhandler_tag ifc_vlan_detach_event; struct ether_addr ifc_mac; - char ifc_mtx_name[16]; }; - void * iflib_get_softc(if_ctx_t ctx) { @@ -255,7 +255,7 @@ struct ifmedia * iflib_get_media(if_ctx_t ctx) { - return (&ctx->ifc_media); + return (ctx->ifc_mediap); } uint32_t @@ -305,7 +305,6 @@ typedef struct iflib_sw_tx_desc_array { struct mbuf **ifsd_m; /* pkthdr mbufs */ } if_txsd_vec_t; - /* magic number that should be high enough for any hardware */ #define IFLIB_MAX_TX_SEGS 128 #define IFLIB_RX_COPY_THRESH 128 @@ -324,10 +323,10 @@ typedef struct iflib_sw_tx_desc_array { #define IFLIB_RESTART_BUDGET 8 - #define CSUM_OFFLOAD (CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \ CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \ CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP) + struct iflib_txq { qidx_t ift_in_use; qidx_t ift_cidx; @@ -377,7 +376,6 @@ struct iflib_txq { iflib_dma_info_t ift_ifdi; #define MTX_NAME_LEN 16 char ift_mtx_name[MTX_NAME_LEN]; - char ift_db_mtx_name[MTX_NAME_LEN]; bus_dma_segment_t ift_segs[IFLIB_MAX_TX_SEGS] __aligned(CACHE_LINE_SIZE); #ifdef IFLIB_DIAGNOSTICS uint64_t ift_cpu_exec_count[256]; @@ -397,7 +395,6 @@ struct iflib_fl { uint64_t ifl_cl_dequeued; #endif /* implicit pad */ - bitstr_t *ifl_rx_bitmap; qidx_t ifl_fragidx; /* constant */ @@ -442,24 +439,24 @@ get_inuse(int size, qidx_t cidx, qidx_t pidx, uint8_t gen) ((head) >= (tail) ? (head) - (tail) : (wrap) - (tail) + (head)) struct iflib_rxq { - /* If there is a separate completion queue - - * these are the cq cidx and pidx. Otherwise - * these are unused. - */ - qidx_t ifr_size; - qidx_t ifr_cq_cidx; - qidx_t ifr_cq_pidx; - uint8_t ifr_cq_gen; - uint8_t ifr_fl_offset; - if_ctx_t ifr_ctx; iflib_fl_t ifr_fl; uint64_t ifr_rx_irq; +#ifndef __HAIKU__ + struct pfil_head *pfil; +#else +#define PFIL_PASS 0 +#endif + /* + * If there is a separate completion queue (IFLIB_HAS_RXCQ), this is + * the command queue consumer index. Otherwise it's unused. + */ + qidx_t ifr_cq_cidx; uint16_t ifr_id; - uint8_t ifr_lro_enabled; uint8_t ifr_nfl; uint8_t ifr_ntxqirq; uint8_t ifr_txqid[IFLIB_MAX_TX_SHARED_INTR]; + uint8_t ifr_fl_offset; #ifndef __HAIKU__ struct lro_ctrl ifr_lc; #endif @@ -476,7 +473,6 @@ struct iflib_rxq { typedef struct if_rxsd { caddr_t *ifsd_cl; - struct mbuf **ifsd_m; iflib_fl_t ifsd_fl; qidx_t ifsd_cidx; } *if_rxsd_t; @@ -516,7 +512,7 @@ pkt_info_zero(if_pkt_info_t pi) #ifndef __LP64__ pi_pad->pkt_val[6] = 0; pi_pad->pkt_val[7] = 0; pi_pad->pkt_val[8] = 0; pi_pad->pkt_val[9] = 0; pi_pad->pkt_val[10] = 0; -#endif +#endif } #ifndef __HAIKU__ @@ -562,14 +558,11 @@ rxd_info_zero(if_rxd_info_t ri) #define CTX_UNLOCK(ctx) sx_xunlock(&(ctx)->ifc_ctx_sx) #define CTX_LOCK_DESTROY(ctx) sx_destroy(&(ctx)->ifc_ctx_sx) - #define STATE_LOCK_INIT(_sc, _name) mtx_init(&(_sc)->ifc_state_mtx, _name, "iflib state lock", MTX_DEF) #define STATE_LOCK(ctx) mtx_lock(&(ctx)->ifc_state_mtx) #define STATE_UNLOCK(ctx) mtx_unlock(&(ctx)->ifc_state_mtx) #define STATE_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_state_mtx) - - #define CALLOUT_LOCK(txq) mtx_lock(&txq->ift_mtx) #define CALLOUT_UNLOCK(txq) mtx_unlock(&txq->ift_mtx) @@ -613,7 +606,7 @@ static SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD, 0, "iflib driver parameters"); /* - * XXX need to ensure that this can't accidentally cause the head to be moved backwards + * XXX need to ensure that this can't accidentally cause the head to be moved backwards */ static int iflib_min_tx_latency = 0; SYSCTL_INT(_net_iflib, OID_AUTO, min_tx_latency, CTLFLAG_RW, @@ -634,15 +627,15 @@ static int iflib_fl_refills_large; static int iflib_tx_frees; SYSCTL_INT(_net_iflib, OID_AUTO, tx_seen, CTLFLAG_RD, - &iflib_tx_seen, 0, "# tx mbufs seen"); + &iflib_tx_seen, 0, "# TX mbufs seen"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_sent, CTLFLAG_RD, - &iflib_tx_sent, 0, "# tx mbufs sent"); + &iflib_tx_sent, 0, "# TX mbufs sent"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_encap, CTLFLAG_RD, - &iflib_tx_encap, 0, "# tx mbufs encapped"); + &iflib_tx_encap, 0, "# TX mbufs encapped"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_frees, CTLFLAG_RD, - &iflib_tx_frees, 0, "# tx frees"); + &iflib_tx_frees, 0, "# TX frees"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_allocs, CTLFLAG_RD, - &iflib_rx_allocs, 0, "# rx allocations"); + &iflib_rx_allocs, 0, "# RX allocations"); SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills, CTLFLAG_RD, &iflib_fl_refills, 0, "# refills"); SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills_large, CTLFLAG_RD, @@ -681,7 +674,6 @@ static int iflib_fast_intrs; static int iflib_rx_unavail; static int iflib_rx_ctx_inactive; static int iflib_rx_if_input; -static int iflib_rx_mbuf_null; static int iflib_rxd_flush; static int iflib_verbose_debug; @@ -689,7 +681,7 @@ static int iflib_verbose_debug; SYSCTL_INT(_net_iflib, OID_AUTO, task_fn_rx, CTLFLAG_RD, &iflib_task_fn_rxs, 0, "# task_fn_rx calls"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_intr_enables, CTLFLAG_RD, - &iflib_rx_intr_enables, 0, "# rx intr enables"); + &iflib_rx_intr_enables, 0, "# RX intr enables"); SYSCTL_INT(_net_iflib, OID_AUTO, fast_intrs, CTLFLAG_RD, &iflib_fast_intrs, 0, "# fast_intr calls"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_unavail, CTLFLAG_RD, @@ -698,8 +690,6 @@ SYSCTL_INT(_net_iflib, OID_AUTO, rx_ctx_inactive, CTLFLAG_RD, &iflib_rx_ctx_inactive, 0, "# times rxeof called with inactive context"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_if_input, CTLFLAG_RD, &iflib_rx_if_input, 0, "# times rxeof called if_input"); -SYSCTL_INT(_net_iflib, OID_AUTO, rx_mbuf_null, CTLFLAG_RD, - &iflib_rx_mbuf_null, 0, "# times rxeof got null mbuf"); SYSCTL_INT(_net_iflib, OID_AUTO, rxd_flush, CTLFLAG_RD, &iflib_rxd_flush, 0, "# times rxd_flush called"); SYSCTL_INT(_net_iflib, OID_AUTO, verbose_debug, CTLFLAG_RW, @@ -718,7 +708,7 @@ iflib_debug_reset(void) iflib_task_fn_rxs = iflib_rx_intr_enables = iflib_fast_intrs = iflib_rx_unavail = iflib_rx_ctx_inactive = iflib_rx_if_input = - iflib_rx_mbuf_null = iflib_rxd_flush = 0; + iflib_rxd_flush = 0; } #else @@ -743,6 +733,7 @@ static void iflib_altq_if_start(if_t ifp); static int iflib_altq_if_transmit(if_t ifp, struct mbuf *m); #endif static int iflib_register(if_ctx_t); +static void iflib_deregister(if_ctx_t); static void iflib_init_locked(if_ctx_t ctx); static void iflib_add_device_sysctl_pre(if_ctx_t ctx); static void iflib_add_device_sysctl_post(if_ctx_t ctx); @@ -754,6 +745,20 @@ static void iflib_free_intr_mem(if_ctx_t ctx); static struct mbuf * iflib_fixup_rx(struct mbuf *m); #endif +#ifndef __HAIKU__ +static SLIST_HEAD(cpu_offset_list, cpu_offset) cpu_offsets = + SLIST_HEAD_INITIALIZER(cpu_offsets); +struct cpu_offset { + SLIST_ENTRY(cpu_offset) entries; + cpuset_t set; + unsigned int refcount; + uint16_t offset; +}; +static struct mtx cpu_offset_mtx; +MTX_SYSINIT(iflib_cpu_offset, &cpu_offset_mtx, "iflib_cpu_offset lock", + MTX_DEF); +#endif + NETDUMP_DEFINE(iflib); #ifdef DEV_NETMAP @@ -783,13 +788,13 @@ SYSCTL_DECL(_dev_netmap); int iflib_crcstrip = 1; SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_crcstrip, - CTLFLAG_RW, &iflib_crcstrip, 1, "strip CRC on rx frames"); + CTLFLAG_RW, &iflib_crcstrip, 1, "strip CRC on RX frames"); int iflib_rx_miss, iflib_rx_miss_bufs; SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss, - CTLFLAG_RW, &iflib_rx_miss, 0, "potentially missed rx intr"); + CTLFLAG_RW, &iflib_rx_miss, 0, "potentially missed RX intr"); SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss_bufs, - CTLFLAG_RW, &iflib_rx_miss_bufs, 0, "potentially missed rx intr bufs"); + CTLFLAG_RW, &iflib_rx_miss_bufs, 0, "potentially missed RX intr bufs"); /* * Register/unregister. We are already under netmap lock. @@ -798,7 +803,7 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss_bufs, static int iflib_netmap_register(struct netmap_adapter *na, int onoff) { - struct ifnet *ifp = na->ifp; + if_t ifp = na->ifp; if_ctx_t ctx = ifp->if_softc; int status; @@ -928,7 +933,7 @@ static int iflib_netmap_txsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; + if_t ifp = na->ifp; struct netmap_ring *ring = kring->ring; u_int nm_i; /* index into the netmap kring */ u_int nic_i; /* index into the NIC ring */ @@ -1076,6 +1081,7 @@ iflib_netmap_rxsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct netmap_ring *ring = kring->ring; + if_t ifp = na->ifp; iflib_fl_t fl; uint32_t nm_i; /* index into the netmap ring */ uint32_t nic_i; /* index into the NIC ring */ @@ -1085,7 +1091,6 @@ iflib_netmap_rxsync(struct netmap_kring *kring, int flags) int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; struct if_rxd_info ri; - struct ifnet *ifp = na->ifp; if_ctx_t ctx = ifp->if_softc; iflib_rxq_t rxq = &ctx->ifc_rxqs[kring->ring_id]; if (head > lim) @@ -1169,8 +1174,7 @@ iflib_netmap_rxsync(struct netmap_kring *kring, int flags) static void iflib_netmap_intr(struct netmap_adapter *na, int onoff) { - struct ifnet *ifp = na->ifp; - if_ctx_t ctx = ifp->if_softc; + if_ctx_t ctx = na->ifp->if_softc; CTX_LOCK(ctx); if (onoff) { @@ -1211,13 +1215,11 @@ iflib_netmap_txq_init(if_ctx_t ctx, iflib_txq_t txq) { struct netmap_adapter *na = NA(ctx->ifc_ifp); struct netmap_slot *slot; - int i; slot = netmap_reset(na, NR_TX, txq->ift_id, 0); if (slot == NULL) return; - - for (i = 0; i < ctx->ifc_softc_ctx.isc_ntxd[0]; i++) { + for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxd[0]; i++) { /* * In netmap mode, set the map for the packet buffer. @@ -1281,10 +1283,9 @@ iflib_netmap_timer_adjust(if_ctx_t ctx, iflib_txq_t txq, uint32_t *reset_on) #define netmap_rx_irq(ifp, qid, budget) (0) #define netmap_tx_irq(ifp, qid) do {} while (0) #define iflib_netmap_timer_adjust(ctx, txq, reset_on) - #endif -#if (defined(__i386__) || defined(__amd64__)) && !defined(__HAIKU__) +#if defined(__i386__) || defined(__amd64__) static __inline void prefetch(void *x) { @@ -1492,6 +1493,7 @@ iflib_fast_intr_rxtx(void *arg) void *sc; int i, cidx, result; qidx_t txqid; + bool intr_enable, intr_legacy; if (!iflib_started) return (FILTER_STRAY); @@ -1505,6 +1507,8 @@ iflib_fast_intr_rxtx(void *arg) ctx = rxq->ifr_ctx; sc = ctx->ifc_softc; + intr_enable = false; + intr_legacy = !!(ctx->ifc_flags & IFC_LEGACY); MPASS(rxq->ifr_ntxqirq); for (i = 0; i < rxq->ifr_ntxqirq; i++) { txqid = rxq->ifr_txqid[i]; @@ -1512,7 +1516,10 @@ iflib_fast_intr_rxtx(void *arg) bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_POSTREAD); if (!ctx->isc_txd_credits_update(sc, txqid, false)) { - IFDI_TX_QUEUE_INTR_ENABLE(ctx, txqid); + if (intr_legacy) + intr_enable = true; + else + IFDI_TX_QUEUE_INTR_ENABLE(ctx, txqid); continue; } GROUPTASK_ENQUEUE(&txq->ift_task); @@ -1524,9 +1531,14 @@ iflib_fast_intr_rxtx(void *arg) if (iflib_rxd_avail(ctx, rxq, cidx, 1)) GROUPTASK_ENQUEUE(gtask); else { - IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); + if (intr_legacy) + intr_enable = true; + else + IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); DBG_COUNTER_INC(rx_intr_enables); } + if (intr_enable) + IFDI_INTR_ENABLE(ctx); return (FILTER_SCHEDULE_THREAD); } @@ -1557,17 +1569,17 @@ _iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, driver_filter_t filter, driver_intr_t handler, void *arg, const char *name) { - int rc, flags; struct resource *res; void *tag = NULL; device_t dev = ctx->ifc_dev; + int flags, i, rc; flags = RF_ACTIVE; if (ctx->ifc_flags & IFC_LEGACY) flags |= RF_SHAREABLE; MPASS(rid < 512); - irq->ii_rid = rid; - res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &irq->ii_rid, flags); + i = rid; + res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &i, flags); if (res == NULL) { device_printf(dev, "failed to allocate IRQ for rid %d, name %s.\n", rid, name); @@ -1952,13 +1964,13 @@ _rxq_refill_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) } /** - * rxq_refill - refill an rxq free-buffer list - * @ctx: the iflib context - * @rxq: the free-list to refill - * @n: the number of new buffers to allocate + * _iflib_fl_refill - refill an rxq free-buffer list + * @ctx: the iflib context + * @fl: the free list to refill + * @count: the number of new buffers to allocate * - * (Re)populate an rxq free-buffer list with up to @n new packet buffers. - * The caller must assure that @n does not exceed the queue's capacity. + * (Re)populate an rxq free-buffer list with up to @count new packet buffers. + * The caller must assure that @count does not exceed the queue's capacity. */ static void _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count) @@ -2041,11 +2053,12 @@ _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count) bus_dmamap_sync(fl->ifl_buf_tag, sd_map[frag_idx], BUS_DMASYNC_PREREAD); - MPASS(sd_m[frag_idx] == NULL); - if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { - break; + if (sd_m[frag_idx] == NULL) { + if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { + break; + } + sd_m[frag_idx] = m; } - sd_m[frag_idx] = m; bit_set(fl->ifl_rx_bitmap, frag_idx); #if MEMORY_LOGGING fl->ifl_m_enqueued++; @@ -2265,13 +2278,12 @@ iflib_rx_sds_free(iflib_rxq_t rxq) } free(rxq->ifr_fl, M_IFLIB); rxq->ifr_fl = NULL; - rxq->ifr_cq_gen = rxq->ifr_cq_cidx = rxq->ifr_cq_pidx = 0; + rxq->ifr_cq_cidx = 0; } } /* - * MI independent logic - * + * Timer routine */ static void iflib_timer(void *arg) @@ -2284,6 +2296,7 @@ iflib_timer(void *arg) if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; + /* ** Check on the state of the TX queue(s), this ** can be done without the lock because its RO @@ -2310,12 +2323,14 @@ iflib_timer(void *arg) GROUPTASK_ENQUEUE(&txq->ift_task); sctx->isc_pause_frames = 0; - if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) + if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) callout_reset_on(&txq->ift_timer, reset_on, iflib_timer, txq, txq->ift_timer.c_cpu); return; + hung: - device_printf(ctx->ifc_dev, "TX(%d) desc avail = %d, pidx = %d\n", - txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx); + device_printf(ctx->ifc_dev, + "Watchdog timeout (TX: %d desc avail: %d pidx: %d) -- resetting\n", + txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx); STATE_LOCK(ctx); if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); ctx->ifc_flags |= (IFC_DO_WATCHDOG|IFC_DO_RESET); @@ -2341,6 +2356,7 @@ iflib_calc_rx_mbuf_sz(if_ctx_t ctx) uint32_t iflib_get_rx_mbuf_sz(if_ctx_t ctx) { + return (ctx->ifc_rx_mbuf_sz); } @@ -2355,7 +2371,6 @@ iflib_init_locked(if_ctx_t ctx) iflib_rxq_t rxq; int i, j, tx_ip_csum_flags, tx_ip6_csum_flags; - if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); IFDI_INTR_DISABLE(ctx); @@ -2400,7 +2415,9 @@ iflib_init_locked(if_ctx_t ctx) } for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { if (iflib_fl_setup(fl)) { - device_printf(ctx->ifc_dev, "freelist setup failed - check cluster settings\n"); + device_printf(ctx->ifc_dev, + "setting up free list %d failed - " + "check cluster settings\n", j); goto done; } } @@ -2484,7 +2501,7 @@ iflib_stop(if_ctx_t ctx) for (i = 0; i < scctx->isc_nrxqsets; i++, rxq++) { /* make sure all transmitters have completed before proceeding XXX */ - rxq->ifr_cq_gen = rxq->ifr_cq_cidx = rxq->ifr_cq_pidx = 0; + rxq->ifr_cq_cidx = 0; for (j = 0, di = rxq->ifr_ifdi; j < sctx->isc_nrxqs; j++, di++) bzero((void *)di->idi_vaddr, di->idi_size); /* also resets the free lists pidx/cidx */ @@ -2535,13 +2552,15 @@ prefetch_pkts(iflib_fl_t fl, int cidx) prefetch(fl->ifl_sds.ifsd_cl[(cidx + 4) & (nrxd-1)]); } -static void -rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, int unload, if_rxsd_t sd) +static struct mbuf * +rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, bool unload, if_rxsd_t sd, + int *pf_rv, if_rxd_info_t ri) { - int flid, cidx; bus_dmamap_t map; iflib_fl_t fl; - int next; + caddr_t payload; + struct mbuf *m; + int flid, cidx, len, next; map = NULL; flid = irf->irf_flid; @@ -2549,7 +2568,7 @@ rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, int unload, if_rxsd_t sd) fl = &rxq->ifr_fl[flid]; sd->ifsd_fl = fl; sd->ifsd_cidx = cidx; - sd->ifsd_m = &fl->ifl_sds.ifsd_m[cidx]; + m = fl->ifl_sds.ifsd_m[cidx]; sd->ifsd_cl = &fl->ifl_sds.ifsd_cl[cidx]; fl->ifl_credits--; #if MEMORY_LOGGING @@ -2565,39 +2584,97 @@ rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, int unload, if_rxsd_t sd) /* not valid assert if bxe really does SGE from non-contiguous elements */ MPASS(fl->ifl_cidx == cidx); bus_dmamap_sync(fl->ifl_buf_tag, map, BUS_DMASYNC_POSTREAD); + +#ifndef __HAIKU__ + if (rxq->pfil != NULL && PFIL_HOOKED_IN(rxq->pfil) && pf_rv != NULL) { + payload = *sd->ifsd_cl; + payload += ri->iri_pad; + len = ri->iri_len - ri->iri_pad; + *pf_rv = pfil_run_hooks(rxq->pfil, payload, ri->iri_ifp, + len | PFIL_MEMPTR | PFIL_IN, NULL); + switch (*pf_rv) { + case PFIL_DROPPED: + case PFIL_CONSUMED: + /* + * The filter ate it. Everything is recycled. + */ + m = NULL; + unload = 0; + break; + case PFIL_REALLOCED: + /* + * The filter copied it. Everything is recycled. + */ + m = pfil_mem2mbuf(payload); + unload = 0; + break; + case PFIL_PASS: + /* + * Filter said it was OK, so receive like + * normal + */ + fl->ifl_sds.ifsd_m[cidx] = NULL; + break; + default: + MPASS(0); + } + } else +#endif + { + fl->ifl_sds.ifsd_m[cidx] = NULL; + *pf_rv = PFIL_PASS; + } + if (unload) bus_dmamap_unload(fl->ifl_buf_tag, map); fl->ifl_cidx = (fl->ifl_cidx + 1) & (fl->ifl_size-1); if (__predict_false(fl->ifl_cidx == 0)) fl->ifl_gen = 0; bit_clear(fl->ifl_rx_bitmap, cidx); + return (m); } static struct mbuf * -assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri, if_rxsd_t sd) +assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri, if_rxsd_t sd, int *pf_rv) { - int i, padlen , flags; struct mbuf *m, *mh, *mt; caddr_t cl; + int *pf_rv_ptr, flags, i, padlen; + bool consumed; i = 0; mh = NULL; + consumed = false; + *pf_rv = PFIL_PASS; + pf_rv_ptr = pf_rv; do { - rxd_frag_to_sd(rxq, &ri->iri_frags[i], TRUE, sd); + m = rxd_frag_to_sd(rxq, &ri->iri_frags[i], !consumed, sd, + pf_rv_ptr, ri); MPASS(*sd->ifsd_cl != NULL); - MPASS(*sd->ifsd_m != NULL); - /* Don't include zero-length frags */ - if (ri->iri_frags[i].irf_len == 0) { + /* + * Exclude zero-length frags & frags from + * packets the filter has consumed or dropped + */ + if (ri->iri_frags[i].irf_len == 0 || consumed || +#ifndef __HAIKU__ + *pf_rv == PFIL_CONSUMED || *pf_rv == PFIL_DROPPED +#else + 0 +#endif + ) { + if (mh == NULL) { + /* everything saved here */ + consumed = true; + pf_rv_ptr = NULL; + continue; + } /* XXX we can save the cluster here, but not the mbuf */ - m_init(*sd->ifsd_m, M_NOWAIT, MT_DATA, 0); - m_free(*sd->ifsd_m); - *sd->ifsd_m = NULL; + m_init(m, M_NOWAIT, MT_DATA, 0); + m_free(m); continue; } - m = *sd->ifsd_m; - *sd->ifsd_m = NULL; if (mh == NULL) { flags = M_PKTHDR|M_EXT; mh = mt = m; @@ -2634,22 +2711,36 @@ iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri) { struct if_rxsd sd; struct mbuf *m; + int pf_rv; /* should I merge this back in now that the two paths are basically duplicated? */ if (ri->iri_nfrags == 1 && ri->iri_frags[0].irf_len <= MIN(IFLIB_RX_COPY_THRESH, MHLEN)) { - rxd_frag_to_sd(rxq, &ri->iri_frags[0], FALSE, &sd); - m = *sd.ifsd_m; - *sd.ifsd_m = NULL; - m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR); -#ifndef __NO_STRICT_ALIGNMENT - if (!IP_ALIGNED(m)) - m->m_data += 2; + m = rxd_frag_to_sd(rxq, &ri->iri_frags[0], false, &sd, + &pf_rv, ri); + if (pf_rv != PFIL_PASS +#ifndef __HAIKU__ + && pf_rv != PFIL_REALLOCED #endif - memcpy(m->m_data, *sd.ifsd_cl, ri->iri_len); - m->m_len = ri->iri_frags[0].irf_len; - } else { - m = assemble_segments(rxq, ri, &sd); + ) + return (m); + if (pf_rv == PFIL_PASS) { + m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR); +#ifndef __NO_STRICT_ALIGNMENT + if (!IP_ALIGNED(m)) + m->m_data += 2; +#endif + memcpy(m->m_data, *sd.ifsd_cl, ri->iri_len); + m->m_len = ri->iri_frags[0].irf_len; + } + } else { + m = assemble_segments(rxq, ri, &sd, &pf_rv); + if (pf_rv != PFIL_PASS +#ifndef __HAIKU__ + && pf_rv != PFIL_REALLOCED +#endif + ) + return (m); } m->m_pkthdr.len = ri->iri_len; m->m_pkthdr.rcvif = ri->iri_ifp; @@ -2686,18 +2777,16 @@ iflib_check_lro_possible(struct mbuf *m, bool v4_forwarding, bool v6_forwarding) { #ifndef __HAIKU__ struct ether_header *eh; - uint16_t eh_type; eh = mtod(m, struct ether_header *); - eh_type = ntohs(eh->ether_type); - switch (eh_type) { + switch (eh->ether_type) { #if defined(INET6) - case ETHERTYPE_IPV6: - return !v6_forwarding; + case htons(ETHERTYPE_IPV6): + return (!v6_forwarding); #endif #if defined (INET) - case ETHERTYPE_IP: - return !v4_forwarding; + case htons(ETHERTYPE_IP): + return (!v4_forwarding); #endif } #endif @@ -2714,6 +2803,7 @@ iflib_get_ip_forwarding(struct lro_ctrl *lc __unused, bool *v4 __unused, bool *v static bool iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) { + if_t ifp; if_ctx_t ctx = rxq->ifr_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; @@ -2722,7 +2812,6 @@ iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) struct if_rxd_info ri; int err, budget_left, rx_bytes, rx_pkts; iflib_fl_t fl; - struct ifnet *ifp; int lro_enabled; bool v4_forwarding, v6_forwarding, lro_possible; @@ -2748,6 +2837,8 @@ iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) return (false); } + /* pfil needs the vnet to be set */ + CURVNET_SET_QUIET(ifp->if_vnet); for (budget_left = budget; budget_left > 0 && avail > 0;) { if (__predict_false(!CTX_ACTIVE(ctx))) { DBG_COUNTER_INC(rx_ctx_inactive); @@ -2765,14 +2856,14 @@ iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) if (err) goto err; + rx_pkts += 1; + rx_bytes += ri.iri_len; if (sctx->isc_flags & IFLIB_HAS_RXCQ) { *cidxp = ri.iri_cidx; /* Update our consumer index */ /* XXX NB: shurd - check if this is still safe */ - while (rxq->ifr_cq_cidx >= scctx->isc_nrxd[0]) { + while (rxq->ifr_cq_cidx >= scctx->isc_nrxd[0]) rxq->ifr_cq_cidx -= scctx->isc_nrxd[0]; - rxq->ifr_cq_gen = 0; - } /* was this only a completion queue message? */ if (__predict_false(ri.iri_nfrags == 0)) continue; @@ -2787,10 +2878,9 @@ iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) if (avail == 0 && budget_left) avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget_left); - if (__predict_false(m == NULL)) { - DBG_COUNTER_INC(rx_mbuf_null); + if (__predict_false(m == NULL)) continue; - } + /* imm_pkt: -- cxgb */ if (mh == NULL) mh = mt = m; @@ -2799,6 +2889,7 @@ iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) mt = m; } } + CURVNET_RESTORE(); /* make sure that we can refill faster than drain */ for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) __iflib_fl_refill_lt(ctx, fl, budget + 8); @@ -3303,7 +3394,7 @@ iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) pi.ipi_qsidx = txq->ift_id; pi.ipi_len = m_head->m_pkthdr.len; pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags; - pi.ipi_vtag = (m_head->m_flags & M_VLANTAG) ? m_head->m_pkthdr.ether_vtag : 0; + pi.ipi_vtag = M_HAS_VLANTAG(m_head) ? m_head->m_pkthdr.ether_vtag : 0; /* deliberate bitwise OR to make one condition */ if (__predict_true((pi.ipi_csum_flags | pi.ipi_vtag))) { @@ -3404,7 +3495,7 @@ defrag: txq->ift_gen = 1; } /* - * drivers can need as many as + * drivers can need as many as * two sentinels */ MPASS(ndesc <= pi.ipi_nsegs + 2); @@ -3576,11 +3667,11 @@ iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) { iflib_txq_t txq = r->cookie; if_ctx_t ctx = txq->ift_ctx; - struct ifnet *ifp = ctx->ifc_ifp; - struct mbuf **mp, *m; - int i, count, consumed, pkt_sent, bytes_sent, mcast_sent, avail; - int reclaimed, err, in_use_prev, desc_used; - bool do_prefetch, ring, rang; + if_t ifp = ctx->ifc_ifp; + struct mbuf *m, **mp; + int avail, bytes_sent, consumed, count, err, i, in_use_prev; + int mcast_sent, pkt_sent, reclaimed, txq_avail; + bool do_prefetch, rang, ring; if (__predict_false(!(if_getdrvflags(ifp) & IFF_DRV_RUNNING) || !LINK_ACTIVE(ctx))) { @@ -3618,16 +3709,15 @@ iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) avail, ctx->ifc_flags, TXQ_AVAIL(txq)); #endif do_prefetch = (ctx->ifc_flags & IFC_PREFETCH); - avail = TXQ_AVAIL(txq); + txq_avail = TXQ_AVAIL(txq); err = 0; - for (desc_used = i = 0; i < count && avail > MAX_TX_DESC(ctx) + 2; i++) { + for (i = 0; i < count && txq_avail > MAX_TX_DESC(ctx) + 2; i++) { int rem = do_prefetch ? count - i : 0; mp = _ring_peek_one(r, cidx, i, rem); MPASS(mp != NULL && *mp != NULL); if (__predict_false(*mp == (struct mbuf *)txq)) { consumed++; - reclaimed++; continue; } in_use_prev = txq->ift_in_use; @@ -3646,10 +3736,9 @@ iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) DBG_COUNTER_INC(tx_sent); bytes_sent += m->m_pkthdr.len; mcast_sent += !!(m->m_flags & M_MCAST); - avail = TXQ_AVAIL(txq); + txq_avail = TXQ_AVAIL(txq); txq->ift_db_pending += (txq->ift_in_use - in_use_prev); - desc_used += (txq->ift_in_use - in_use_prev); ETHER_BPF_MTAP(ifp, m); if (__predict_false(!(ifp->if_drv_flags & IFF_DRV_RUNNING))) break; @@ -3738,7 +3827,10 @@ _task_fn_tx(void *context) BUS_DMASYNC_POSTREAD); if (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false)) netmap_tx_irq(ifp, txq->ift_id); - IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id); + if (ctx->ifc_flags & IFC_LEGACY) + IFDI_INTR_ENABLE(ctx); + else + IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id); return; } #endif @@ -3757,13 +3849,8 @@ _task_fn_tx(void *context) ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); if (ctx->ifc_flags & IFC_LEGACY) IFDI_INTR_ENABLE(ctx); - else { -#ifdef INVARIANTS - int rc = -#endif - IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id); - KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver")); - } + else + IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id); } static void @@ -3795,14 +3882,9 @@ _task_fn_rx(void *context) if (more == false || (more = iflib_rxeof(rxq, budget)) == false) { if (ctx->ifc_flags & IFC_LEGACY) IFDI_INTR_ENABLE(ctx); - else { -#ifdef INVARIANTS - int rc = -#endif - IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); - KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver")); - DBG_COUNTER_INC(rx_intr_enables); - } + else + IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); + DBG_COUNTER_INC(rx_intr_enables); } if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) return; @@ -4027,7 +4109,7 @@ iflib_altq_if_start(if_t ifp) { struct ifaltq *ifq = &ifp->if_snd; struct mbuf *m; - + IFQ_LOCK(ifq); IFQ_DEQUEUE_NOLOCK(ifq, m); while (m != NULL) { @@ -4091,18 +4173,18 @@ iflib_if_ioctl(if_t ifp, u_long command, caddr_t data) #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; #endif - bool avoid_reset = FALSE; + bool avoid_reset = false; int err = 0, reinit = 0, bits; switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) - avoid_reset = TRUE; + avoid_reset = true; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) - avoid_reset = TRUE; + avoid_reset = true; #endif /* ** Calling init results in link renegotiation, @@ -4174,12 +4256,12 @@ iflib_if_ioctl(if_t ifp, u_long command, caddr_t data) CTX_LOCK(ctx); IFDI_MEDIA_SET(ctx); CTX_UNLOCK(ctx); - /* falls thru */ + /* FALLTHROUGH */ case SIOCGIFMEDIA: #ifndef __HAIKU__ case SIOCGIFXMEDIA: #endif - err = ifmedia_ioctl(ifp, ifr, &ctx->ifc_media, command); + err = ifmedia_ioctl(ifp, ifr, ctx->ifc_mediap, command); break; #ifndef __HAIKU__ case SIOCGI2C: @@ -4335,12 +4417,10 @@ iflib_led_func(void *arg, int onoff) int iflib_device_probe(device_t dev) { - pci_vendor_info_t *ent; - - uint16_t pci_vendor_id, pci_device_id; - uint16_t pci_subvendor_id, pci_subdevice_id; - uint16_t pci_rev_id; + const pci_vendor_info_t *ent; if_shared_ctx_t sctx; + uint16_t pci_device_id, pci_rev_id, pci_subdevice_id, pci_subvendor_id; + uint16_t pci_vendor_id; if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC) return (ENOTSUP); @@ -4380,6 +4460,20 @@ iflib_device_probe(device_t dev) return (ENXIO); } +int +iflib_device_probe_vendor(device_t dev) +{ + int probe; + + probe = iflib_device_probe(dev); +#ifndef __HAIKU__ + if (probe == BUS_PROBE_DEFAULT) + return (BUS_PROBE_VENDOR); + else +#endif + return (probe); +} + static void iflib_reset_qvalues(if_ctx_t ctx) { @@ -4388,11 +4482,6 @@ iflib_reset_qvalues(if_ctx_t ctx) device_t dev = ctx->ifc_dev; int i; - scctx->isc_txrx_budget_bytes_max = IFLIB_MAX_TX_BYTES; - scctx->isc_tx_qdepth = IFLIB_DEFAULT_TX_QDEPTH; - /* - * XXX sanity check that ntxd & nrxd are a power of 2 - */ if (ctx->ifc_sysctl_ntxqs != 0) scctx->isc_ntxqsets = ctx->ifc_sysctl_ntxqs; if (ctx->ifc_sysctl_nrxqs != 0) @@ -4423,6 +4512,11 @@ iflib_reset_qvalues(if_ctx_t ctx) i, scctx->isc_nrxd[i], sctx->isc_nrxd_max[i]); scctx->isc_nrxd[i] = sctx->isc_nrxd_max[i]; } + if (!powerof2(scctx->isc_nrxd[i])) { + device_printf(dev, "nrxd%d: %d is not a power of 2 - using default value of %d\n", + i, scctx->isc_nrxd[i], sctx->isc_nrxd_default[i]); + scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i]; + } } for (i = 0; i < sctx->isc_ntxqs; i++) { @@ -4436,20 +4530,133 @@ iflib_reset_qvalues(if_ctx_t ctx) i, scctx->isc_ntxd[i], sctx->isc_ntxd_max[i]); scctx->isc_ntxd[i] = sctx->isc_ntxd_max[i]; } + if (!powerof2(scctx->isc_ntxd[i])) { + device_printf(dev, "ntxd%d: %d is not a power of 2 - using default value of %d\n", + i, scctx->isc_ntxd[i], sctx->isc_ntxd_default[i]); + scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i]; + } } } +static void +iflib_add_pfil(if_ctx_t ctx) +{ +#ifndef __HAIKU__ + struct pfil_head *pfil; + struct pfil_head_args pa; + iflib_rxq_t rxq; + int i; + + pa.pa_version = PFIL_VERSION; + pa.pa_flags = PFIL_IN; + pa.pa_type = PFIL_TYPE_ETHERNET; + pa.pa_headname = ctx->ifc_ifp->if_xname; + pfil = pfil_head_register(&pa); + + for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) { + rxq->pfil = pfil; + } +#endif +} + +static void +iflib_rem_pfil(if_ctx_t ctx) +{ +#ifndef __HAIKU__ + struct pfil_head *pfil; + iflib_rxq_t rxq; + int i; + + rxq = ctx->ifc_rxqs; + pfil = rxq->pfil; + for (i = 0; i < NRXQSETS(ctx); i++, rxq++) { + rxq->pfil = NULL; + } + pfil_head_unregister(pfil); +#endif +} + +static uint16_t +get_ctx_core_offset(if_ctx_t ctx) +{ +#ifndef __HAIKU__ + if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; + struct cpu_offset *op; + uint16_t qc; + uint16_t ret = ctx->ifc_sysctl_core_offset; + + if (ret != CORE_OFFSET_UNSPECIFIED) + return (ret); + + if (ctx->ifc_sysctl_separate_txrx) + qc = scctx->isc_ntxqsets + scctx->isc_nrxqsets; + else + qc = max(scctx->isc_ntxqsets, scctx->isc_nrxqsets); + + mtx_lock(&cpu_offset_mtx); + SLIST_FOREACH(op, &cpu_offsets, entries) { + if (CPU_CMP(&ctx->ifc_cpus, &op->set) == 0) { + ret = op->offset; + op->offset += qc; + MPASS(op->refcount < UINT_MAX); + op->refcount++; + break; + } + } + if (ret == CORE_OFFSET_UNSPECIFIED) { + ret = 0; + op = malloc(sizeof(struct cpu_offset), M_IFLIB, + M_NOWAIT | M_ZERO); + if (op == NULL) { + device_printf(ctx->ifc_dev, + "allocation for cpu offset failed.\n"); + } else { + op->offset = qc; + op->refcount = 1; + CPU_COPY(&ctx->ifc_cpus, &op->set); + SLIST_INSERT_HEAD(&cpu_offsets, op, entries); + } + } + mtx_unlock(&cpu_offset_mtx); + + return (ret); +#else + return 0; +#endif +} + +static void +unref_ctx_core_offset(if_ctx_t ctx) +{ +#ifndef __HAIKU__ + struct cpu_offset *op, *top; + + mtx_lock(&cpu_offset_mtx); + SLIST_FOREACH_SAFE(op, &cpu_offsets, entries, top) { + if (CPU_CMP(&ctx->ifc_cpus, &op->set) == 0) { + MPASS(op->refcount > 0); + op->refcount--; + if (op->refcount == 0) { + SLIST_REMOVE(&cpu_offsets, op, cpu_offset, entries); + free(op, M_IFLIB); + } + break; + } + } + mtx_unlock(&cpu_offset_mtx); +#endif +} + int iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ctxp) { - int err, rid, msix; if_ctx_t ctx; if_t ifp; if_softc_ctx_t scctx; - int i; - uint16_t main_txq; - uint16_t main_rxq; - + kobjop_desc_t kobj_desc; + kobj_method_t *kobj_method; + int err, msix, rid; + uint16_t main_rxq, main_txq; ctx = malloc(sizeof(* ctx), M_IFLIB, M_WAITOK|M_ZERO); @@ -4481,8 +4688,10 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct _iflib_pre_assert(scctx); ctx->ifc_txrx = *scctx->isc_txrx; + if (sctx->isc_flags & IFLIB_DRIVER_MEDIA) + ctx->ifc_mediap = scctx->isc_media; + #ifdef INVARIANTS - MPASS(scctx->isc_capabilities); if (scctx->isc_capabilities & IFCAP_TXCSUM) MPASS(scctx->isc_tx_csum_flags); #endif @@ -4499,24 +4708,8 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct main_rxq = (sctx->isc_flags & IFLIB_HAS_RXCQ) ? 1 : 0; /* XXX change for per-queue sizes */ - device_printf(dev, "Using %d tx descriptors and %d rx descriptors\n", + device_printf(dev, "Using %d TX descriptors and %d RX descriptors\n", scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]); - for (i = 0; i < sctx->isc_nrxqs; i++) { - if (!powerof2(scctx->isc_nrxd[i])) { - /* round down instead? */ - device_printf(dev, "# rx descriptors must be a power of 2\n"); - err = EINVAL; - goto fail_iflib_detach; - } - } - for (i = 0; i < sctx->isc_ntxqs; i++) { - if (!powerof2(scctx->isc_ntxd[i])) { - device_printf(dev, - "# tx descriptors must be a power of 2"); - err = EINVAL; - goto fail_iflib_detach; - } - } if (scctx->isc_tx_nsegments > scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION) @@ -4594,6 +4787,11 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct if ((err = iflib_qset_structures_setup(ctx))) goto fail_queues; + /* + * Now that we know how many queues there are, get the core offset. + */ + ctx->ifc_sysctl_core_offset = get_ctx_core_offset(ctx); + /* * Group taskqueues aren't properly set up until SMP is started, * so we disable interrupts until we can handle them post @@ -4605,11 +4803,43 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct * interrupt storm. */ IFDI_INTR_DISABLE(ctx); - if (msix > 1 && (err = IFDI_MSIX_INTR_ASSIGN(ctx, msix)) != 0) { - device_printf(dev, "IFDI_MSIX_INTR_ASSIGN failed %d\n", err); - goto fail_queues; - } - if (msix <= 1) { + + if (msix > 1) { + /* + * When using MSI-X, ensure that ifdi_{r,t}x_queue_intr_enable + * aren't the default NULL implementation. + */ + kobj_desc = &ifdi_rx_queue_intr_enable_desc; + kobj_method = kobj_lookup_method(((kobj_t)ctx), NULL, + kobj_desc); + if (kobj_method == &kobj_desc->deflt) { + device_printf(dev, + "MSI-X requires ifdi_rx_queue_intr_enable method"); + err = EOPNOTSUPP; + goto fail_queues; + } + kobj_desc = &ifdi_tx_queue_intr_enable_desc; + kobj_method = kobj_lookup_method(((kobj_t)ctx), NULL, + kobj_desc); + if (kobj_method == &kobj_desc->deflt) { + device_printf(dev, + "MSI-X requires ifdi_tx_queue_intr_enable method"); + err = EOPNOTSUPP; + goto fail_queues; + } + + /* + * Assign the MSI-X vectors. + * Note that the default NULL ifdi_msix_intr_assign method will + * fail here, too. + */ + err = IFDI_MSIX_INTR_ASSIGN(ctx, msix); + if (err != 0) { + device_printf(dev, "IFDI_MSIX_INTR_ASSIGN failed %d\n", + err); + goto fail_queues; + } + } else if (scctx->isc_intr != IFLIB_INTR_MSIX) { rid = 0; if (scctx->isc_intr == IFLIB_INTR_MSI) { MPASS(msix == 1); @@ -4619,6 +4849,11 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct device_printf(dev, "iflib_legacy_setup failed %d\n", err); goto fail_queues; } + } else { + device_printf(dev, + "Cannot use iflib with only 1 MSI-X interrupt!\n"); + err = ENODEV; + goto fail_intr_free; } ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac.octet); @@ -4646,8 +4881,10 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); iflib_add_device_sysctl_post(ctx); + iflib_add_pfil(ctx); ctx->ifc_flags |= IFC_INIT_DONE; CTX_UNLOCK(ctx); + return (0); fail_detach: @@ -4657,11 +4894,13 @@ fail_intr_free: fail_queues: iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); -fail_iflib_detach: + taskqgroup_detach(qgroup_if_config_tqg, &ctx->ifc_admin_task); IFDI_DETACH(ctx); fail_unlock: CTX_UNLOCK(ctx); + iflib_deregister(ctx); fail_ctx_free: + device_set_softc(ctx->ifc_dev, NULL); if (ctx->ifc_flags & IFC_SC_ALLOCATED) free(ctx->ifc_softc, M_IFLIB); free(ctx, M_IFLIB); @@ -4700,9 +4939,6 @@ iflib_pseudo_register(device_t dev, if_shared_ctx_t sctx, if_ctx_t *ctxp, scctx = &ctx->ifc_softc_ctx; ifp = ctx->ifc_ifp; - /* - * XXX sanity check that ntxd & nrxd are a power of 2 - */ iflib_reset_qvalues(ctx); CTX_LOCK(ctx); if ((err = IFDI_ATTACH_PRE(ctx)) != 0) { @@ -4718,12 +4954,11 @@ iflib_pseudo_register(device_t dev, if_shared_ctx_t sctx, if_ctx_t *ctxp, device_printf(dev, "IFDI_CLONEATTACH failed %d\n", err); goto fail_ctx_free; } - ifmedia_add(&ctx->ifc_media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); - ifmedia_add(&ctx->ifc_media, IFM_ETHER | IFM_AUTO, 0, NULL); - ifmedia_set(&ctx->ifc_media, IFM_ETHER | IFM_AUTO); + ifmedia_add(ctx->ifc_mediap, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); + ifmedia_add(ctx->ifc_mediap, IFM_ETHER | IFM_AUTO, 0, NULL); + ifmedia_set(ctx->ifc_mediap, IFM_ETHER | IFM_AUTO); #ifdef INVARIANTS - MPASS(scctx->isc_capabilities); if (scctx->isc_capabilities & IFCAP_TXCSUM) MPASS(scctx->isc_tx_csum_flags); #endif @@ -4767,24 +5002,8 @@ iflib_pseudo_register(device_t dev, if_shared_ctx_t sctx, if_ctx_t *ctxp, main_rxq = (sctx->isc_flags & IFLIB_HAS_RXCQ) ? 1 : 0; /* XXX change for per-queue sizes */ - device_printf(dev, "Using %d tx descriptors and %d rx descriptors\n", + device_printf(dev, "Using %d TX descriptors and %d RX descriptors\n", scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]); - for (i = 0; i < sctx->isc_nrxqs; i++) { - if (!powerof2(scctx->isc_nrxd[i])) { - /* round down instead? */ - device_printf(dev, "# rx descriptors must be a power of 2\n"); - err = EINVAL; - goto fail_iflib_detach; - } - } - for (i = 0; i < sctx->isc_ntxqs; i++) { - if (!powerof2(scctx->isc_ntxd[i])) { - device_printf(dev, - "# tx descriptors must be a power of 2"); - err = EINVAL; - goto fail_iflib_detach; - } - } if (scctx->isc_tx_nsegments > scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION) @@ -4866,6 +5085,7 @@ iflib_pseudo_register(device_t dev, if_shared_ctx_t sctx, if_ctx_t *ctxp, iflib_add_device_sysctl_post(ctx); ctx->ifc_flags |= IFC_INIT_DONE; CTX_UNLOCK(ctx); + return (0); fail_detach: ether_ifdetach(ctx->ifc_ifp); @@ -4876,6 +5096,7 @@ fail_iflib_detach: IFDI_DETACH(ctx); fail_unlock: CTX_UNLOCK(ctx); + iflib_deregister(ctx); fail_ctx_free: free(ctx->ifc_softc, M_IFLIB); free(ctx, M_IFLIB); @@ -4892,15 +5113,7 @@ iflib_pseudo_deregister(if_ctx_t ctx) struct taskqgroup *tqg; iflib_fl_t fl; - /* Unregister VLAN events */ - if (ctx->ifc_vlan_attach_event != NULL) - EVENTHANDLER_DEREGISTER(vlan_config, ctx->ifc_vlan_attach_event); - if (ctx->ifc_vlan_detach_event != NULL) - EVENTHANDLER_DEREGISTER(vlan_unconfig, ctx->ifc_vlan_detach_event); - ether_ifdetach(ifp); - /* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/ - CTX_LOCK_DESTROY(ctx); /* XXX drain any dependent tasks */ tqg = qgroup_if_io_tqg; for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) { @@ -4921,10 +5134,11 @@ iflib_pseudo_deregister(if_ctx_t ctx) if (ctx->ifc_vflr_task.gt_uniq != NULL) taskqgroup_detach(tqg, &ctx->ifc_vflr_task); - if_free(ifp); - iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); + + iflib_deregister(ctx); + if (ctx->ifc_flags & IFC_SC_ALLOCATED) free(ctx->ifc_softc, M_IFLIB); free(ctx, M_IFLIB); @@ -4984,6 +5198,7 @@ iflib_device_deregister(if_ctx_t ctx) iflib_netmap_detach(ifp); ether_ifdetach(ifp); + iflib_rem_pfil(ctx); if (ctx->ifc_led_dev != NULL) led_destroy(ctx->ifc_led_dev); /* XXX drain any dependent tasks */ @@ -5010,18 +5225,19 @@ iflib_device_deregister(if_ctx_t ctx) CTX_UNLOCK(ctx); /* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/ - CTX_LOCK_DESTROY(ctx); - device_set_softc(ctx->ifc_dev, NULL); iflib_free_intr_mem(ctx); bus_generic_detach(dev); - if_free(ifp); iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); + + iflib_deregister(ctx); + + device_set_softc(ctx->ifc_dev, NULL); if (ctx->ifc_flags & IFC_SC_ALLOCATED) free(ctx->ifc_softc, M_IFLIB); - STATE_LOCK_DESTROY(ctx); + unref_ctx_core_offset(ctx); free(ctx, M_IFLIB); return (0); } @@ -5174,6 +5390,8 @@ iflib_module_event_handler(module_t mod, int what, void *arg) static void _iflib_assert(if_shared_ctx_t sctx) { + int i; + MPASS(sctx->isc_tx_maxsize); MPASS(sctx->isc_tx_maxsegsize); @@ -5181,12 +5399,25 @@ _iflib_assert(if_shared_ctx_t sctx) MPASS(sctx->isc_rx_nsegments); MPASS(sctx->isc_rx_maxsegsize); - MPASS(sctx->isc_nrxd_min[0]); - MPASS(sctx->isc_nrxd_max[0]); - MPASS(sctx->isc_nrxd_default[0]); - MPASS(sctx->isc_ntxd_min[0]); - MPASS(sctx->isc_ntxd_max[0]); - MPASS(sctx->isc_ntxd_default[0]); + MPASS(sctx->isc_nrxqs >= 1 && sctx->isc_nrxqs <= 8); + for (i = 0; i < sctx->isc_nrxqs; i++) { + MPASS(sctx->isc_nrxd_min[i]); + MPASS(powerof2(sctx->isc_nrxd_min[i])); + MPASS(sctx->isc_nrxd_max[i]); + MPASS(powerof2(sctx->isc_nrxd_max[i])); + MPASS(sctx->isc_nrxd_default[i]); + MPASS(powerof2(sctx->isc_nrxd_default[i])); + } + + MPASS(sctx->isc_ntxqs >= 1 && sctx->isc_ntxqs <= 8); + for (i = 0; i < sctx->isc_ntxqs; i++) { + MPASS(sctx->isc_ntxd_min[i]); + MPASS(powerof2(sctx->isc_ntxd_min[i])); + MPASS(sctx->isc_ntxd_max[i]); + MPASS(powerof2(sctx->isc_ntxd_max[i])); + MPASS(sctx->isc_ntxd_default[i]); + MPASS(powerof2(sctx->isc_ntxd_default[i])); + } } static void @@ -5251,12 +5482,46 @@ iflib_register(if_ctx_t ctx) EVENTHANDLER_REGISTER(vlan_unconfig, iflib_vlan_unregister, ctx, EVENTHANDLER_PRI_FIRST); - ifmedia_init(&ctx->ifc_media, IFM_IMASK, - iflib_media_change, iflib_media_status); - + if ((sctx->isc_flags & IFLIB_DRIVER_MEDIA) == 0) { + dprintf("DRVMEDia"); + ctx->ifc_mediap = &ctx->ifc_media; + ifmedia_init(ctx->ifc_mediap, IFM_IMASK, + iflib_media_change, iflib_media_status); + } return (0); } +static void +iflib_deregister(if_ctx_t ctx) +{ + if_t ifp = ctx->ifc_ifp; + + /* Remove all media */ + ifmedia_removeall(&ctx->ifc_media); + + /* Unregister VLAN events */ + if (ctx->ifc_vlan_attach_event != NULL) { + EVENTHANDLER_DEREGISTER(vlan_config, ctx->ifc_vlan_attach_event); + ctx->ifc_vlan_attach_event = NULL; + } + if (ctx->ifc_vlan_detach_event != NULL) { + EVENTHANDLER_DEREGISTER(vlan_unconfig, ctx->ifc_vlan_detach_event); + ctx->ifc_vlan_detach_event = NULL; + } + +#ifndef __HAIKU__ + /* Release kobject reference */ + kobj_delete((kobj_t) ctx, NULL); +#endif + + /* Free the ifnet structure */ + if_free(ifp); + + STATE_LOCK_DESTROY(ctx); + + /* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/ + CTX_LOCK_DESTROY(ctx); +} static int iflib_queues_alloc(if_ctx_t ctx) @@ -5346,14 +5611,11 @@ iflib_queues_alloc(if_ctx_t ctx) } /* Initialize the TX lock */ - snprintf(txq->ift_mtx_name, MTX_NAME_LEN, "%s:tx(%d):callout", + snprintf(txq->ift_mtx_name, MTX_NAME_LEN, "%s:TX(%d):callout", device_get_nameunit(dev), txq->ift_id); mtx_init(&txq->ift_mtx, txq->ift_mtx_name, NULL, MTX_DEF); callout_init_mtx(&txq->ift_timer, &txq->ift_mtx, 0); - snprintf(txq->ift_db_mtx_name, MTX_NAME_LEN, "%s:tx(%d):db", - device_get_nameunit(dev), txq->ift_id); - err = ifmp_ring_alloc(&txq->ift_br, 2048, txq, iflib_txq_drain, iflib_txq_can_drain, M_IFLIB, M_WAITOK); if (err) { @@ -5416,7 +5678,7 @@ iflib_queues_alloc(if_ctx_t ctx) goto err_rx_desc; } - for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) + for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) fl->ifl_rx_bitmap = bit_alloc(fl->ifl_size, M_IFLIB, M_WAITOK); } @@ -5526,16 +5788,16 @@ iflib_rx_structures_setup(if_ctx_t ctx) for (q = 0; q < ctx->ifc_softc_ctx.isc_nrxqsets; q++, rxq++) { #if defined(INET6) || defined(INET) -#ifndef __HAIKU__ - tcp_lro_free(&rxq->ifr_lc); - if ((err = tcp_lro_init_args(&rxq->ifr_lc, ctx->ifc_ifp, - TCP_LRO_ENTRIES, min(1024, - ctx->ifc_softc_ctx.isc_nrxd[rxq->ifr_fl_offset]))) != 0) { - device_printf(ctx->ifc_dev, "LRO Initialization failed!\n"); - goto fail; + if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_LRO) { + err = tcp_lro_init_args(&rxq->ifr_lc, ctx->ifc_ifp, + TCP_LRO_ENTRIES, min(1024, + ctx->ifc_softc_ctx.isc_nrxd[rxq->ifr_fl_offset])); + if (err != 0) { + device_printf(ctx->ifc_dev, + "LRO Initialization failed!\n"); + goto fail; + } } - rxq->ifr_lro_enabled = TRUE; -#endif #endif IFDI_RXQ_SETUP(ctx, rxq->ifr_id); } @@ -5543,14 +5805,14 @@ iflib_rx_structures_setup(if_ctx_t ctx) #if defined(INET6) || defined(INET) fail: /* - * Free RX software descriptors allocated so far, we will only handle + * Free LRO resources allocated so far, we will only handle * the rings that completed, the failing case will have - * cleaned up for itself. 'q' failed, so its the terminus. + * cleaned up for itself. 'q' failed, so its the terminus. */ rxq = ctx->ifc_rxqs; for (i = 0; i < q; ++i, rxq++) { - iflib_rx_sds_free(rxq); - rxq->ifr_cq_gen = rxq->ifr_cq_cidx = rxq->ifr_cq_pidx = 0; + if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_LRO) + tcp_lro_free(&rxq->ifr_lc); } return (err); #endif @@ -5564,11 +5826,15 @@ fail: static void iflib_rx_structures_free(if_ctx_t ctx) { - int i; iflib_rxq_t rxq = ctx->ifc_rxqs; + int i; for (i = 0; i < ctx->ifc_softc_ctx.isc_nrxqsets; i++, rxq++) { iflib_rx_sds_free(rxq); +#if defined(INET6) || defined(INET) + if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_LRO) + tcp_lro_free(&rxq->ifr_lc); +#endif } free(ctx->ifc_rxqs, M_IFLIB); ctx->ifc_rxqs = NULL; @@ -5647,7 +5913,7 @@ find_child_with_core(int cpu, struct cpu_group *grp) * Find the nth "close" core to the specified core * "close" is defined as the deepest level that shares * at least an L2 cache. With threads, this will be - * threads on the same core. If the sahred cache is L3 + * threads on the same core. If the shared cache is L3 * or higher, simply returns the same core. */ static int @@ -5731,12 +5997,18 @@ iflib_irq_set_affinity(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, const char *name) { device_t dev; - int err, cpuid, tid; + int co, cpuid, err, tid; dev = ctx->ifc_dev; - cpuid = find_nth(ctx, qid); + co = ctx->ifc_sysctl_core_offset; + if (ctx->ifc_sysctl_separate_txrx && type == IFLIB_INTR_TX) + co += ctx->ifc_softc_ctx.isc_nrxqsets; + cpuid = find_nth(ctx, qid + co); tid = get_core_offset(ctx, type, qid); - MPASS(tid >= 0); + if (tid < 0) { + device_printf(dev, "get_core_offset failed\n"); + return (EOPNOTSUPP); + } cpuid = find_close_core(cpuid, tid); err = taskqgroup_attach_cpu(tqg, gtask, uniq, cpuid, dev, irq->ii_res, name); @@ -5748,7 +6020,7 @@ iflib_irq_set_affinity(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, if (cpuid > ctx->ifc_cpuid_highest) ctx->ifc_cpuid_highest = cpuid; #endif - return 0; + return (0); } int @@ -5808,7 +6080,9 @@ iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, intr_fast = iflib_fast_intr_ctx; break; default: - panic("unknown net intr type"); + device_printf(ctx->ifc_dev, "%s: unknown net intr type\n", + __func__); + return (EINVAL); } info->ifi_filter = filter; @@ -5904,26 +6178,26 @@ iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int * struct resource *res; struct taskqgroup *tqg; gtask_fn_t *fn; - int tqrid; void *q; - int err; + int err, tqrid; q = &ctx->ifc_rxqs[0]; info = &rxq[0].ifr_filter_info; gtask = &rxq[0].ifr_task; tqg = qgroup_if_io_tqg; - tqrid = irq->ii_rid = *rid; + tqrid = *rid; fn = _task_fn_rx; ctx->ifc_flags |= IFC_LEGACY; info->ifi_filter = filter; info->ifi_filter_arg = filter_arg; info->ifi_task = gtask; - info->ifi_ctx = ctx; + info->ifi_ctx = q; dev = ctx->ifc_dev; /* We allocate a single interrupt resource */ - if ((err = _iflib_irq_alloc(ctx, irq, tqrid, iflib_fast_intr_ctx, NULL, info, name)) != 0) + if ((err = _iflib_irq_alloc(ctx, irq, tqrid, iflib_fast_intr_rxtx, + NULL, info, name)) != 0) return (err); GROUPTASK_INIT(gtask, 0, fn, q); res = irq->ii_res; @@ -5978,7 +6252,7 @@ iflib_iov_intr_deferred(if_ctx_t ctx) } void -iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name) +iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, const char *name) { taskqgroup_attach_cpu(qgroup_if_io_tqg, gt, uniq, cpu, NULL, NULL, @@ -5999,7 +6273,7 @@ void iflib_config_gtask_deinit(struct grouptask *gtask) { - taskqgroup_detach(qgroup_if_config_tqg, gtask); + taskqgroup_detach(qgroup_if_config_tqg, gtask); } void @@ -6032,9 +6306,6 @@ iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq) int credits_pre = txq->ift_cidx_processed; #endif - if (ctx->isc_txd_credits_update == NULL) - return (0); - bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_POSTREAD); if ((credits = ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, true)) == 0) @@ -6089,9 +6360,8 @@ iflib_msix_init(if_ctx_t ctx) device_t dev = ctx->ifc_dev; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; - int vectors, queues, rx_queues, tx_queues, queuemsgs, msgs; - int iflib_num_tx_queues, iflib_num_rx_queues; - int err, admincnt, bar; + int admincnt, bar, err, iflib_num_rx_queues, iflib_num_tx_queues; + int msgs, queuemsgs, queues, rx_queues, tx_queues, vectors; iflib_num_tx_queues = ctx->ifc_sysctl_ntxqs; iflib_num_rx_queues = ctx->ifc_sysctl_nrxqs; @@ -6100,8 +6370,6 @@ iflib_msix_init(if_ctx_t ctx) device_printf(dev, "msix_init qsets capped at %d\n", imax(scctx->isc_ntxqsets, scctx->isc_nrxqsets)); - bar = ctx->ifc_softc_ctx.isc_msix_bar; - admincnt = sctx->isc_admin_intrcnt; /* Override by tuneable */ if (scctx->isc_disable_msix) goto msi; @@ -6112,6 +6380,8 @@ iflib_msix_init(if_ctx_t ctx) device_printf(dev, "MSI-X not supported or disabled\n"); goto msi; } + + bar = ctx->ifc_softc_ctx.isc_msix_bar; /* * bar == -1 => "trust me I know what I'm doing" * Some drivers are for hardware that is so shoddily @@ -6127,6 +6397,8 @@ iflib_msix_init(if_ctx_t ctx) goto msi; } } + + admincnt = sctx->isc_admin_intrcnt; #if IFLIB_DEBUG /* use only 1 qset in debug mode */ queuemsgs = min(msgs - admincnt, 1); @@ -6180,11 +6452,30 @@ iflib_msix_init(if_ctx_t ctx) rx_queues = min(rx_queues, tx_queues); } - device_printf(dev, "Using %d rx queues %d tx queues\n", - rx_queues, tx_queues); - vectors = rx_queues + admincnt; + if (msgs < vectors) { + device_printf(dev, + "insufficient number of MSI-X vectors " + "(supported %d, need %d)\n", msgs, vectors); + goto msi; + } + + device_printf(dev, "Using %d RX queues %d TX queues\n", rx_queues, + tx_queues); + msgs = vectors; if ((err = pci_alloc_msix(dev, &vectors)) == 0) { + if (vectors != msgs) { + device_printf(dev, + "Unable to allocate sufficient MSI-X vectors " + "(got %d, need %d)\n", vectors, msgs); + pci_release_msi(dev); + if (bar != -1) { + bus_release_resource(dev, SYS_RES_MEMORY, bar, + ctx->ifc_msix_mem); + ctx->ifc_msix_mem = NULL; + } + goto msi; + } device_printf(dev, "Using MSI-X interrupts with %d vectors\n", vectors); scctx->isc_vectors = vectors; @@ -6195,12 +6486,15 @@ iflib_msix_init(if_ctx_t ctx) return (vectors); } else { device_printf(dev, - "failed to allocate %d MSI-X vectors, err: %d - using MSI\n", - vectors, err); - bus_release_resource(dev, SYS_RES_MEMORY, bar, - ctx->ifc_msix_mem); - ctx->ifc_msix_mem = NULL; + "failed to allocate %d MSI-X vectors, err: %d\n", vectors, + err); + if (bar != -1) { + bus_release_resource(dev, SYS_RES_MEMORY, bar, + ctx->ifc_msix_mem); + ctx->ifc_msix_mem = NULL; + } } + msi: vectors = pci_msi_count(dev); scctx->isc_nrxqsets = 1; @@ -6264,8 +6558,6 @@ mp_ndesc_handler(SYSCTL_HANDLER_ARGS) char *p, *next; int nqs, rc, i; - MPASS(type == IFLIB_NTXD_HANDLER || type == IFLIB_NRXD_HANDLER); - nqs = 8; switch(type) { case IFLIB_NTXD_HANDLER: @@ -6279,7 +6571,8 @@ mp_ndesc_handler(SYSCTL_HANDLER_ARGS) nqs = ctx->ifc_sctx->isc_nrxqs; break; default: - panic("unhandled type"); + printf("%s: unhandled type\n", __func__); + return (EINVAL); } if (nqs == 0) nqs = 8; @@ -6338,20 +6631,27 @@ iflib_add_device_sysctl_pre(if_ctx_t ctx) "disable MSI-X (default 0)"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "rx_budget", CTLFLAG_RWTUN, &ctx->ifc_sysctl_rx_budget, 0, - "set the rx budget"); + "set the RX budget"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "tx_abdicate", CTLFLAG_RWTUN, &ctx->ifc_sysctl_tx_abdicate, 0, - "cause tx to abdicate instead of running to completion"); + "cause TX to abdicate instead of running to completion"); + ctx->ifc_sysctl_core_offset = CORE_OFFSET_UNSPECIFIED; + SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "core_offset", + CTLFLAG_RDTUN, &ctx->ifc_sysctl_core_offset, 0, + "offset to start using cores at"); + SYSCTL_ADD_U8(ctx_list, oid_list, OID_AUTO, "separate_txrx", + CTLFLAG_RDTUN, &ctx->ifc_sysctl_separate_txrx, 0, + "use separate cores for TX and RX"); /* XXX change for per-queue sizes */ SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds", CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NTXD_HANDLER, mp_ndesc_handler, "A", - "list of # of tx descriptors to use, 0 = use default #"); + "list of # of TX descriptors to use, 0 = use default #"); SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_nrxds", CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NRXD_HANDLER, mp_ndesc_handler, "A", - "list of # of rx descriptors to use, 0 = use default #"); + "list of # of RX descriptors to use, 0 = use default #"); #endif } @@ -6410,7 +6710,7 @@ iflib_add_device_sysctl_post(if_ctx_t ctx) &txq->ift_no_desc_avail, "# of times no descriptors were available"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "tx_map_failed", CTLFLAG_RD, - &txq->ift_map_failed, "# of times dma map failed"); + &txq->ift_map_failed, "# of times DMA map failed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txd_encap_efbig", CTLFLAG_RD, &txq->ift_txd_encap_efbig, "# of times txd_encap returned EFBIG"); @@ -6470,9 +6770,6 @@ iflib_add_device_sysctl_post(if_ctx_t ctx) CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); if (sctx->isc_flags & IFLIB_HAS_RXCQ) { - SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_pidx", - CTLFLAG_RD, - &rxq->ifr_cq_pidx, 1, "Producer Index"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_cidx", CTLFLAG_RD, &rxq->ifr_cq_cidx, 1, "Consumer Index"); @@ -6550,7 +6847,7 @@ iflib_fixup_rx(struct mbuf *m) #ifdef NETDUMP static void -iflib_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) +iflib_netdump_init(if_t ifp, int *nrxr, int *ncl, int *clsize) { if_ctx_t ctx; @@ -6563,7 +6860,7 @@ iflib_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) } static void -iflib_netdump_event(struct ifnet *ifp, enum netdump_ev event) +iflib_netdump_event(if_t ifp, enum netdump_ev event) { if_ctx_t ctx; if_softc_ctx_t scctx; @@ -6593,7 +6890,7 @@ iflib_netdump_event(struct ifnet *ifp, enum netdump_ev event) } static int -iflib_netdump_transmit(struct ifnet *ifp, struct mbuf *m) +iflib_netdump_transmit(if_t ifp, struct mbuf *m) { if_ctx_t ctx; iflib_txq_t txq; @@ -6612,7 +6909,7 @@ iflib_netdump_transmit(struct ifnet *ifp, struct mbuf *m) } static int -iflib_netdump_poll(struct ifnet *ifp, int count) +iflib_netdump_poll(if_t ifp, int count) { if_ctx_t ctx; if_softc_ctx_t scctx; diff --git a/src/libs/compat/freebsd_iflib/mp_ring.c b/src/libs/compat/freebsd_iflib/mp_ring.c index 4700c60de7..26e7922495 100644 --- a/src/libs/compat/freebsd_iflib/mp_ring.c +++ b/src/libs/compat/freebsd_iflib/mp_ring.c @@ -36,12 +36,6 @@ __FBSDID("$FreeBSD$"); #include #include #include - -#if defined(__i386__) -#define atomic_cmpset_acq_64 atomic_cmpset_64 -#define atomic_cmpset_rel_64 atomic_cmpset_64 -#endif - #include union ring_state { @@ -97,7 +91,7 @@ state_to_flags(union ring_state s, int abdicate) return (BUSY); } -#ifdef NO_64BIT_ATOMICS +#ifdef MP_RING_NO_64BIT_ATOMICS static void drain_ring_locked(struct ifmp_ring *r, union ring_state os, uint16_t prev, int budget) { @@ -195,11 +189,12 @@ drain_ring_lockless(struct ifmp_ring *r, union ring_state os, uint16_t prev, int n = r->drain(r, cidx, pidx); if (n == 0) { critical_enter(); + os.state = r->state; do { - os.state = ns.state = r->state; + ns.state = os.state; ns.cidx = cidx; ns.flags = STALLED; - } while (atomic_cmpset_64(&r->state, os.state, + } while (atomic_fcmpset_64(&r->state, &os.state, ns.state) == 0); critical_exit(); if (prev != STALLED) @@ -222,11 +217,13 @@ drain_ring_lockless(struct ifmp_ring *r, union ring_state os, uint16_t prev, int if (cidx != pidx && pending < 64 && total < budget) continue; critical_enter(); + os.state = r->state; do { - os.state = ns.state = r->state; + ns.state = os.state; ns.cidx = cidx; ns.flags = state_to_flags(ns, total >= budget); - } while (atomic_cmpset_acq_64(&r->state, os.state, ns.state) == 0); + } while (atomic_fcmpset_acq_64(&r->state, &os.state, + ns.state) == 0); critical_exit(); if (ns.flags == ABDICATED) @@ -287,7 +284,7 @@ ifmp_ring_alloc(struct ifmp_ring **pr, int size, void *cookie, mp_ring_drain_t d } *pr = r; -#ifdef NO_64BIT_ATOMICS +#ifdef MP_RING_NO_64BIT_ATOMICS mtx_init(&r->lock, "mp_ring lock", NULL, MTX_DEF); #endif return (0); @@ -321,7 +318,7 @@ ifmp_ring_free(struct ifmp_ring *r) * * Returns an errno. */ -#ifdef NO_64BIT_ATOMICS +#ifdef MP_RING_NO_64BIT_ATOMICS int ifmp_ring_enqueue(struct ifmp_ring *r, void **items, int n, int budget, int abdicate) { @@ -366,7 +363,6 @@ ifmp_ring_enqueue(struct ifmp_ring *r, void **items, int n, int budget, int abdi i = pidx_start; do { r->items[i] = *items++; - /*HAIKU*/KASSERT((r->items[i] == NULL) || ((uintptr_t)(r->items[i]) > 1024UL), ("is %p", r->items[i])); if (__predict_false(++i == r->size)) i = 0; } while (i != pidx_stop); @@ -380,10 +376,8 @@ ifmp_ring_enqueue(struct ifmp_ring *r, void **items, int n, int budget, int abdi if (abdicate) { if (os.flags == IDLE) ns.flags = ABDICATED; - } - else { + } else ns.flags = BUSY; - } r->state = ns.state; counter_u64_add(r->enqueues, n); @@ -399,7 +393,6 @@ ifmp_ring_enqueue(struct ifmp_ring *r, void **items, int n, int budget, int abdi mtx_unlock(&r->lock); return (0); } - #else int ifmp_ring_enqueue(struct ifmp_ring *r, void **items, int n, int budget, int abdicate) @@ -415,8 +408,8 @@ ifmp_ring_enqueue(struct ifmp_ring *r, void **items, int n, int budget, int abdi * Reserve room for the new items. Our reservation, if successful, is * from 'pidx_start' to 'pidx_stop'. */ + os.state = r->state; for (;;) { - os.state = r->state; if (n >= space_available(r, os)) { counter_u64_add(r->drops, n); MPASS(os.flags != IDLE); @@ -427,7 +420,7 @@ ifmp_ring_enqueue(struct ifmp_ring *r, void **items, int n, int budget, int abdi ns.state = os.state; ns.pidx_head = increment_idx(r, os.pidx_head, n); critical_enter(); - if (atomic_cmpset_64(&r->state, os.state, ns.state)) + if (atomic_fcmpset_64(&r->state, &os.state, ns.state)) break; critical_exit(); cpu_spinwait(); @@ -457,17 +450,16 @@ ifmp_ring_enqueue(struct ifmp_ring *r, void **items, int n, int budget, int abdi * Update the ring's pidx_tail. The release style atomic guarantees * that the items are visible to any thread that sees the updated pidx. */ + os.state = r->state; do { - os.state = ns.state = r->state; + ns.state = os.state; ns.pidx_tail = pidx_stop; if (abdicate) { if (os.flags == IDLE) ns.flags = ABDICATED; - } - else { + } else ns.flags = BUSY; - } - } while (atomic_cmpset_rel_64(&r->state, os.state, ns.state) == 0); + } while (atomic_fcmpset_rel_64(&r->state, &os.state, ns.state) == 0); critical_exit(); counter_u64_add(r->enqueues, n); @@ -500,7 +492,7 @@ ifmp_ring_check_drainage(struct ifmp_ring *r, int budget) ns.flags = BUSY; -#ifdef NO_64BIT_ATOMICS +#ifdef MP_RING_NO_64BIT_ATOMICS mtx_lock(&r->lock); if (r->state != os.state) { mtx_unlock(&r->lock);