summaryrefslogtreecommitdiffstats
path: root/freebsd/sys/dev/e1000/if_igb.c
diff options
context:
space:
mode:
Diffstat (limited to 'freebsd/sys/dev/e1000/if_igb.c')
-rw-r--r--freebsd/sys/dev/e1000/if_igb.c1466
1 files changed, 994 insertions, 472 deletions
diff --git a/freebsd/sys/dev/e1000/if_igb.c b/freebsd/sys/dev/e1000/if_igb.c
index 27d9c92b..bf88f60f 100644
--- a/freebsd/sys/dev/e1000/if_igb.c
+++ b/freebsd/sys/dev/e1000/if_igb.c
@@ -2,7 +2,7 @@
/******************************************************************************
- Copyright (c) 2001-2010, Intel Corporation
+ Copyright (c) 2001-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -38,6 +38,7 @@
#ifdef HAVE_KERNEL_OPTION_HEADERS
#include <rtems/bsd/local/opt_device_polling.h>
#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/local/opt_altq.h>
#endif
@@ -107,7 +108,7 @@ int igb_display_debug_stats = 0;
/*********************************************************************
* Driver version:
*********************************************************************/
-char igb_driver_version[] = "version - 2.0.7";
+char igb_driver_version[] = "version - 2.3.9 - 8";
/*********************************************************************
@@ -149,6 +150,22 @@ static igb_vendor_info_t igb_vendor_info_array[] =
PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_DH89XXCC_SFP, PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
+ PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_I350_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_I350_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_I350_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_I350_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_I350_VF, PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_I210_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_I210_COPPER_IT, PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
+ PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_I210_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_I210_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_I210_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_I211_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
/* required last entry */
{ 0, 0, 0, 0, 0}
};
@@ -170,13 +187,14 @@ static int igb_detach(device_t);
static int igb_shutdown(device_t);
static int igb_suspend(device_t);
static int igb_resume(device_t);
-static void igb_start(struct ifnet *);
-static void igb_start_locked(struct tx_ring *, struct ifnet *ifp);
#if __FreeBSD_version >= 800000
static int igb_mq_start(struct ifnet *, struct mbuf *);
-static int igb_mq_start_locked(struct ifnet *,
- struct tx_ring *, struct mbuf *);
+static int igb_mq_start_locked(struct ifnet *, struct tx_ring *);
static void igb_qflush(struct ifnet *);
+static void igb_deferred_mq_start(void *, int);
+#else
+static void igb_start(struct ifnet *);
+static void igb_start_locked(struct tx_ring *, struct ifnet *ifp);
#endif
static int igb_ioctl(struct ifnet *, u_long, caddr_t);
static void igb_init(void *);
@@ -220,19 +238,11 @@ static __inline void igb_rx_discard(struct rx_ring *, int);
static __inline void igb_rx_input(struct rx_ring *,
struct ifnet *, struct mbuf *, u32);
-#ifdef __rtems__
-/* XXX report inconsistent proto/body to FreeBSD */
static bool igb_rxeof(struct igb_queue *, int, int *);
-#else
-static boolean_t igb_rxeof(struct igb_queue *, int, int *);
-#endif
static void igb_rx_checksum(u32, struct mbuf *, u32);
-#ifdef __rtems__
-/* XXX report inconsistent proto/body to FreeBSD */
static bool igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
-#else
-#endif
-static boolean_t igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
+static bool igb_tso_setup(struct tx_ring *, struct mbuf *, int,
+ struct ip *, struct tcphdr *);
static void igb_set_promisc(struct adapter *);
static void igb_disable_promisc(struct adapter *);
static void igb_set_multi(struct adapter *);
@@ -264,14 +274,17 @@ static void igb_enable_wakeup(device_t);
static void igb_led_func(void *, int);
static int igb_irq_fast(void *);
-static void igb_add_rx_process_limit(struct adapter *, const char *,
- const char *, int *, int);
+static void igb_msix_que(void *);
+static void igb_msix_link(void *);
static void igb_handle_que(void *context, int pending);
static void igb_handle_link(void *context, int pending);
+static void igb_handle_link_locked(struct adapter *);
-/* These are MSIX only irq handlers */
-static void igb_msix_que(void *);
-static void igb_msix_link(void *);
+static void igb_set_sysctl_value(struct adapter *, const char *,
+ const char *, int *, int);
+static int igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
+static int igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
+static int igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
#ifdef DEVICE_POLLING
static poll_handler_t igb_poll;
@@ -289,7 +302,7 @@ static device_method_t igb_methods[] = {
DEVMETHOD(device_shutdown, igb_shutdown),
DEVMETHOD(device_suspend, igb_suspend),
DEVMETHOD(device_resume, igb_resume),
- {0, 0}
+ DEVMETHOD_END
};
static driver_t igb_driver = {
@@ -305,11 +318,17 @@ MODULE_DEPEND(igb, ether, 1, 1, 1);
* Tunable default values.
*********************************************************************/
+static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
+
/* Descriptor defaults */
static int igb_rxd = IGB_DEFAULT_RXD;
static int igb_txd = IGB_DEFAULT_TXD;
TUNABLE_INT("hw.igb.rxd", &igb_rxd);
TUNABLE_INT("hw.igb.txd", &igb_txd);
+SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
+ "Number of receive descriptors per queue");
+SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
+ "Number of transmit descriptors per queue");
/*
** AIM: Adaptive Interrupt Moderation
@@ -319,6 +338,8 @@ TUNABLE_INT("hw.igb.txd", &igb_txd);
*/
static int igb_enable_aim = TRUE;
TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
+SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
+ "Enable adaptive interrupt moderation");
/*
* MSIX should be the default for best performance,
@@ -326,12 +347,26 @@ TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
*/
static int igb_enable_msix = 1;
TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
+SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
+ "Enable MSI-X interrupts");
/*
** Tuneable Interrupt rate
*/
static int igb_max_interrupt_rate = 8000;
TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
+SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
+ &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
+
+#if __FreeBSD_version >= 800000
+/*
+** Tuneable number of buffers in the buf-ring (drbr_xxx)
+*/
+static int igb_buf_ring_size = IGB_BR_SIZE;
+TUNABLE_INT("hw.igb.buf_ring_size", &igb_buf_ring_size);
+SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
+ &igb_buf_ring_size, 0, "Size of the bufring");
+#endif
/*
** Header split causes the packet header to
@@ -341,8 +376,10 @@ TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
** into the header and thus use no cluster. Its
** a very workload dependent type feature.
*/
-static bool igb_header_split = FALSE;
+static int igb_header_split = FALSE;
TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
+SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
+ "Enable receive mbuf header split");
/*
** This will autoconfigure based on
@@ -350,15 +387,26 @@ TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
*/
static int igb_num_queues = 0;
TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
+SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
+ "Number of queues to configure, 0 indicates autoconfigure");
+
+/*
+** Global variable to store last used CPU when binding queues
+** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a
+** queue is bound to a cpu.
+*/
+static int igb_last_bind_cpu = -1;
/* How many packets rxeof tries to clean at a time */
static int igb_rx_process_limit = 100;
TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
+SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
+ &igb_rx_process_limit, 0,
+ "Maximum number of received packets to process at a time, -1 means unlimited");
-/* Flow control setting - default to FULL */
-static int igb_fc_setting = e1000_fc_full;
-TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
-
+#ifdef DEV_NETMAP /* see ixgbe.c for details */
+#include <dev/netmap/if_igb_netmap.h>
+#endif /* DEV_NETMAP */
/*********************************************************************
* Device identification routine
*
@@ -429,6 +477,11 @@ igb_attach(device_t dev)
INIT_DEBUGOUT("igb_attach: begin");
+ if (resource_disabled("igb", device_get_unit(dev))) {
+ device_printf(dev, "Disabled by device hint\n");
+ return (ENXIO);
+ }
+
adapter = device_get_softc(dev);
adapter->dev = adapter->osdep.dev = dev;
IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
@@ -439,15 +492,14 @@ igb_attach(device_t dev)
OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
igb_sysctl_nvm_info, "I", "NVM Information");
- SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
- SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
- OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
- &igb_fc_setting, 0, "Flow Control");
+ igb_set_sysctl_value(adapter, "enable_aim",
+ "Interrupt Moderation", &adapter->enable_aim,
+ igb_enable_aim);
- SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
+ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
- OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
- &igb_enable_aim, 1, "Interrupt Moderation");
+ OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
+ adapter, 0, igb_set_flowcntl, "I", "Flow Control");
callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
@@ -470,10 +522,10 @@ igb_attach(device_t dev)
e1000_get_bus_info(&adapter->hw);
- /* Sysctls for limiting the amount of work done in the taskqueue */
- igb_add_rx_process_limit(adapter, "rx_processing_limit",
- "max number of rx packets to process", &adapter->rx_process_limit,
- igb_rx_process_limit);
+ /* Sysctl for limiting the amount of work done in the taskqueue */
+ igb_set_sysctl_value(adapter, "rx_processing_limit",
+ "max number of rx packets to process",
+ &adapter->rx_process_limit, igb_rx_process_limit);
/*
* Validate number of transmit and receive descriptors. It
@@ -522,7 +574,7 @@ igb_attach(device_t dev)
}
/* Allocate the appropriate stats memory */
- if (adapter->hw.mac.type == e1000_vfadapt) {
+ if (adapter->vf_ifp) {
adapter->stats =
(struct e1000_vf_stats *)malloc(sizeof \
(struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
@@ -546,6 +598,21 @@ igb_attach(device_t dev)
goto err_late;
}
+ /* Some adapter-specific advanced features */
+ if (adapter->hw.mac.type >= e1000_i350) {
+ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
+ SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+ OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
+ adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
+ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
+ SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+ OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
+ adapter, 0, igb_sysctl_eee, "I",
+ "Disable Energy Efficient Ethernet");
+ if (adapter->hw.phy.media_type == e1000_media_type_copper)
+ e1000_set_eee_i350(&adapter->hw);
+ }
+
/*
** Start from a known state, this is
** important in reading the nvm and
@@ -554,7 +621,9 @@ igb_attach(device_t dev)
e1000_reset_hw(&adapter->hw);
/* Make sure we have a good EEPROM before we read from it */
- if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
+ if (((adapter->hw.mac.type != e1000_i210) &&
+ (adapter->hw.mac.type != e1000_i211)) &&
+ (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
/*
** Some PCI-E parts fail the first check due to
** the link being in sleep state, call it again,
@@ -584,16 +653,6 @@ igb_attach(device_t dev)
goto err_late;
}
- /*
- ** Configure Interrupts
- */
- if ((adapter->msix > 1) && (igb_enable_msix))
- error = igb_allocate_msix(adapter);
- else /* MSI or Legacy */
- error = igb_allocate_legacy(adapter);
- if (error)
- goto err_late;
-
/* Setup OS specific network interface */
if (igb_setup_interface(dev, adapter) != 0)
goto err_late;
@@ -632,23 +691,38 @@ igb_attach(device_t dev)
igb_add_hw_stats(adapter);
/* Tell the stack that the interface is not active */
- adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+ adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
adapter->led_dev = led_create(igb_led_func, adapter,
device_get_nameunit(dev));
+ /*
+ ** Configure Interrupts
+ */
+ if ((adapter->msix > 1) && (igb_enable_msix))
+ error = igb_allocate_msix(adapter);
+ else /* MSI or Legacy */
+ error = igb_allocate_legacy(adapter);
+ if (error)
+ goto err_late;
+
+#ifdef DEV_NETMAP
+ igb_netmap_attach(adapter);
+#endif /* DEV_NETMAP */
INIT_DEBUGOUT("igb_attach: end");
return (0);
err_late:
+ igb_detach(dev);
igb_free_transmit_structures(adapter);
igb_free_receive_structures(adapter);
igb_release_hw_control(adapter);
- if (adapter->ifp != NULL)
- if_free(adapter->ifp);
err_pci:
igb_free_pci_resources(adapter);
+ if (adapter->ifp != NULL)
+ if_free(adapter->ifp);
free(adapter->mta, M_DEVBUF);
IGB_CORE_LOCK_DESTROY(adapter);
@@ -679,6 +753,8 @@ igb_detach(device_t dev)
return (EBUSY);
}
+ ether_ifdetach(adapter->ifp);
+
if (adapter->led_dev != NULL)
led_destroy(adapter->led_dev);
@@ -710,17 +786,19 @@ igb_detach(device_t dev)
if (adapter->vlan_detach != NULL)
EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
- ether_ifdetach(adapter->ifp);
-
callout_drain(&adapter->timer);
+#ifdef DEV_NETMAP
+ netmap_detach(adapter->ifp);
+#endif /* DEV_NETMAP */
igb_free_pci_resources(adapter);
bus_generic_detach(dev);
if_free(ifp);
igb_free_transmit_structures(adapter);
igb_free_receive_structures(adapter);
- free(adapter->mta, M_DEVBUF);
+ if (adapter->mta != NULL)
+ free(adapter->mta, M_DEVBUF);
IGB_CORE_LOCK_DESTROY(adapter);
@@ -769,6 +847,7 @@ static int
igb_resume(device_t dev)
{
struct adapter *adapter = device_get_softc(dev);
+ struct tx_ring *txr = adapter->tx_rings;
struct ifnet *ifp = adapter->ifp;
IGB_CORE_LOCK(adapter);
@@ -776,15 +855,29 @@ igb_resume(device_t dev)
igb_init_manageability(adapter);
if ((ifp->if_flags & IFF_UP) &&
- (ifp->if_drv_flags & IFF_DRV_RUNNING))
- igb_start(ifp);
-
+ (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
+ for (int i = 0; i < adapter->num_queues; i++, txr++) {
+ IGB_TX_LOCK(txr);
+#if __FreeBSD_version >= 800000
+ /* Process the stack queue only if not depleted */
+ if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
+ !drbr_empty(ifp, txr->br))
+ igb_mq_start_locked(ifp, txr);
+#else
+ if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ igb_start_locked(txr, ifp);
+#endif
+ IGB_TX_UNLOCK(txr);
+ }
+ }
IGB_CORE_UNLOCK(adapter);
return bus_generic_resume(dev);
}
+#if __FreeBSD_version < 800000
+
/*********************************************************************
* Transmit entry point
*
@@ -814,8 +907,8 @@ igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
igb_txeof(txr);
while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
- if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+ if (txr->tx_avail <= IGB_MAX_SCATTER) {
+ txr->queue_status |= IGB_QUEUE_DEPLETED;
break;
}
IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
@@ -826,10 +919,10 @@ igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
* NULL on failure. In that event, we can't requeue.
*/
if (igb_xmit(txr, &m_head)) {
- if (m_head == NULL)
- break;
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
- IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
+ if (m_head != NULL)
+ IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
+ if (txr->tx_avail <= IGB_MAX_SCATTER)
+ txr->queue_status |= IGB_QUEUE_DEPLETED;
break;
}
@@ -838,7 +931,7 @@ igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
/* Set watchdog on */
txr->watchdog_time = ticks;
- txr->queue_status = IGB_QUEUE_WORKING;
+ txr->queue_status |= IGB_QUEUE_WORKING;
}
}
@@ -861,9 +954,11 @@ igb_start(struct ifnet *ifp)
return;
}
-#if __FreeBSD_version >= 800000
+#else /* __FreeBSD_version >= 800000 */
+
/*
-** Multiqueue Transmit driver
+** Multiqueue Transmit Entry:
+** quick turnaround to the stack
**
*/
static int
@@ -872,83 +967,81 @@ igb_mq_start(struct ifnet *ifp, struct mbuf *m)
struct adapter *adapter = ifp->if_softc;
struct igb_queue *que;
struct tx_ring *txr;
- int i = 0, err = 0;
+ int i, err = 0;
/* Which queue to use */
if ((m->m_flags & M_FLOWID) != 0)
i = m->m_pkthdr.flowid % adapter->num_queues;
-
+ else
+ i = curcpu % adapter->num_queues;
txr = &adapter->tx_rings[i];
que = &adapter->queues[i];
- if (IGB_TX_TRYLOCK(txr)) {
- err = igb_mq_start_locked(ifp, txr, m);
- IGB_TX_UNLOCK(txr);
- } else {
- err = drbr_enqueue(ifp, txr->br, m);
- taskqueue_enqueue(que->tq, &que->que_task);
- }
+ err = drbr_enqueue(ifp, txr->br, m);
+ taskqueue_enqueue(que->tq, &txr->txq_task);
return (err);
}
static int
-igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
+igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
{
struct adapter *adapter = txr->adapter;
- struct mbuf *next;
- int err = 0, enq;
+ struct mbuf *buf;
+ int err = 0, enq = 0;
IGB_TX_LOCK_ASSERT(txr);
- if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
- IFF_DRV_RUNNING || adapter->link_active == 0) {
- if (m != NULL)
- err = drbr_enqueue(ifp, txr->br, m);
- return (err);
- }
-
- /* Call cleanup if number of TX descriptors low */
- if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
- igb_txeof(txr);
-
- enq = 0;
- if (m == NULL) {
- next = drbr_dequeue(ifp, txr->br);
- } else if (drbr_needs_enqueue(ifp, txr->br)) {
- if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
- return (err);
- next = drbr_dequeue(ifp, txr->br);
- } else
- next = m;
+ if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
+ adapter->link_active == 0)
+ return (ENETDOWN);
/* Process the queue */
- while (next != NULL) {
- if ((err = igb_xmit(txr, &next)) != 0) {
- if (next != NULL)
- err = drbr_enqueue(ifp, txr->br, next);
+ buf = drbr_dequeue(ifp, txr->br);
+ while (buf != NULL) {
+ if ((err = igb_xmit(txr, &buf)) != 0) {
+ if (buf != NULL)
+ err = drbr_enqueue(ifp, txr->br, buf);
break;
}
enq++;
- drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
- ETHER_BPF_MTAP(ifp, next);
+ ifp->if_obytes += buf->m_pkthdr.len;
+ if (buf->m_flags & M_MCAST)
+ ifp->if_omcasts++;
+ ETHER_BPF_MTAP(ifp, buf);
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
break;
- if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
- break;
- }
- next = drbr_dequeue(ifp, txr->br);
+ buf = drbr_dequeue(ifp, txr->br);
}
if (enq > 0) {
/* Set the watchdog */
- txr->queue_status = IGB_QUEUE_WORKING;
+ txr->queue_status |= IGB_QUEUE_WORKING;
txr->watchdog_time = ticks;
}
+ if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
+ igb_txeof(txr);
+ if (txr->tx_avail <= IGB_MAX_SCATTER)
+ txr->queue_status |= IGB_QUEUE_DEPLETED;
return (err);
}
/*
+ * Called from a taskqueue to drain queued transmit packets.
+ */
+static void
+igb_deferred_mq_start(void *arg, int pending)
+{
+ struct tx_ring *txr = arg;
+ struct adapter *adapter = txr->adapter;
+ struct ifnet *ifp = adapter->ifp;
+
+ IGB_TX_LOCK(txr);
+ if (!drbr_empty(ifp, txr->br))
+ igb_mq_start_locked(ifp, txr);
+ IGB_TX_UNLOCK(txr);
+}
+
+/*
** Flush all ring buffers
*/
static void
@@ -981,11 +1074,12 @@ static int
igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
{
struct adapter *adapter = ifp->if_softc;
- struct ifreq *ifr = (struct ifreq *)data;
-#ifdef INET
- struct ifaddr *ifa = (struct ifaddr *)data;
+ struct ifreq *ifr = (struct ifreq *)data;
+#if defined(INET) || defined(INET6)
+ struct ifaddr *ifa = (struct ifaddr *)data;
#endif
- int error = 0;
+ bool avoid_reset = FALSE;
+ int error = 0;
if (adapter->in_detach)
return (error);
@@ -993,24 +1087,26 @@ igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
switch (command) {
case SIOCSIFADDR:
#ifdef INET
- if (ifa->ifa_addr->sa_family == AF_INET) {
- /*
- * XXX
- * Since resetting hardware takes a very long time
- * and results in link renegotiation we only
- * initialize the hardware only when it is absolutely
- * required.
- */
+ if (ifa->ifa_addr->sa_family == AF_INET)
+ avoid_reset = TRUE;
+#endif
+#ifdef INET6
+ if (ifa->ifa_addr->sa_family == AF_INET6)
+ avoid_reset = TRUE;
+#endif
+ /*
+ ** Calling init results in link renegotiation,
+ ** so we avoid doing it when possible.
+ */
+ if (avoid_reset) {
ifp->if_flags |= IFF_UP;
- if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
- IGB_CORE_LOCK(adapter);
- igb_init_locked(adapter);
- IGB_CORE_UNLOCK(adapter);
- }
+ if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
+ igb_init(adapter);
+#ifdef INET
if (!(ifp->if_flags & IFF_NOARP))
arp_ifinit(ifp, ifa);
- } else
#endif
+ } else
error = ether_ioctl(ifp, command, data);
break;
case SIOCSIFMTU:
@@ -1069,11 +1165,6 @@ igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
}
break;
case SIOCSIFMEDIA:
- /*
- ** As the speed/duplex settings are being
- ** changed, we need toreset the PHY.
- */
- adapter->hw.phy.reset_disable = FALSE;
/* Check SOL/IDER usage */
IGB_CORE_LOCK(adapter);
if (e1000_check_reset_block(&adapter->hw)) {
@@ -1131,6 +1222,10 @@ igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
reinit = 1;
}
+ if (mask & IFCAP_VLAN_HWTSO) {
+ ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
+ reinit = 1;
+ }
if (mask & IFCAP_LRO) {
ifp->if_capenable ^= IFCAP_LRO;
reinit = 1;
@@ -1227,18 +1322,9 @@ igb_init_locked(struct adapter *adapter)
}
igb_initialize_receive_units(adapter);
- /* Use real VLAN Filter support? */
- if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
- if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
- /* Use real VLAN Filter support */
- igb_setup_vlan_hw_support(adapter);
- else {
- u32 ctrl;
- ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
- ctrl |= E1000_CTRL_VME;
- E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
- }
- }
+ /* Enable VLAN support */
+ if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
+ igb_setup_vlan_hw_support(adapter);
/* Don't lose promiscuous settings */
igb_set_promisc(adapter);
@@ -1264,12 +1350,13 @@ igb_init_locked(struct adapter *adapter)
else
#endif /* DEVICE_POLLING */
{
- igb_enable_intr(adapter);
- E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
+ igb_enable_intr(adapter);
+ E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
}
- /* Don't reset the phy next time init gets called */
- adapter->hw.phy.reset_disable = TRUE;
+ /* Set Energy Efficient Ethernet */
+ if (adapter->hw.phy.media_type == e1000_media_type_copper)
+ e1000_set_eee_i350(&adapter->hw);
}
static void
@@ -1294,19 +1381,21 @@ igb_handle_que(void *context, int pending)
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
bool more;
- more = igb_rxeof(que, -1, NULL);
+ more = igb_rxeof(que, adapter->rx_process_limit, NULL);
IGB_TX_LOCK(txr);
- if (igb_txeof(txr))
- more = TRUE;
+ igb_txeof(txr);
#if __FreeBSD_version >= 800000
- if (!drbr_empty(ifp, txr->br))
- igb_mq_start_locked(ifp, txr, NULL);
+ /* Process the stack queue only if not depleted */
+ if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
+ !drbr_empty(ifp, txr->br))
+ igb_mq_start_locked(ifp, txr);
#else
if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
igb_start_locked(txr, ifp);
#endif
IGB_TX_UNLOCK(txr);
+ /* Do we need another? */
if (more) {
taskqueue_enqueue(que->tq, &que->que_task);
return;
@@ -1330,8 +1419,35 @@ igb_handle_link(void *context, int pending)
{
struct adapter *adapter = context;
+ IGB_CORE_LOCK(adapter);
+ igb_handle_link_locked(adapter);
+ IGB_CORE_UNLOCK(adapter);
+}
+
+static void
+igb_handle_link_locked(struct adapter *adapter)
+{
+ struct tx_ring *txr = adapter->tx_rings;
+ struct ifnet *ifp = adapter->ifp;
+
+ IGB_CORE_LOCK_ASSERT(adapter);
adapter->hw.mac.get_link_status = 1;
igb_update_link_status(adapter);
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
+ for (int i = 0; i < adapter->num_queues; i++, txr++) {
+ IGB_TX_LOCK(txr);
+#if __FreeBSD_version >= 800000
+ /* Process the stack queue only if not depleted */
+ if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
+ !drbr_empty(ifp, txr->br))
+ igb_mq_start_locked(ifp, txr);
+#else
+ if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ igb_start_locked(txr, ifp);
+#endif
+ IGB_TX_UNLOCK(txr);
+ }
+ }
}
/*********************************************************************
@@ -1379,12 +1495,6 @@ igb_irq_fast(void *arg)
}
#ifdef DEVICE_POLLING
-/*********************************************************************
- *
- * Legacy polling routine : if using this code you MUST be sure that
- * multiqueue is not defined, ie, set igb_num_queues to 1.
- *
- *********************************************************************/
#if __FreeBSD_version >= 800000
#define POLL_RETURN_COUNT(a) (a)
static int
@@ -1395,8 +1505,8 @@ static void
igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
{
struct adapter *adapter = ifp->if_softc;
- struct igb_queue *que = adapter->queues;
- struct tx_ring *txr = adapter->tx_rings;
+ struct igb_queue *que;
+ struct tx_ring *txr;
u32 reg_icr, rx_done = 0;
u32 loop = IGB_MAX_LOOP;
bool more;
@@ -1411,34 +1521,40 @@ igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
/* Link status change */
if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
- igb_handle_link(adapter, 0);
+ igb_handle_link_locked(adapter);
if (reg_icr & E1000_ICR_RXO)
adapter->rx_overruns++;
}
IGB_CORE_UNLOCK(adapter);
- igb_rxeof(que, count, &rx_done);
+ for (int i = 0; i < adapter->num_queues; i++) {
+ que = &adapter->queues[i];
+ txr = que->txr;
- IGB_TX_LOCK(txr);
- do {
- more = igb_txeof(txr);
- } while (loop-- && more);
+ igb_rxeof(que, count, &rx_done);
+
+ IGB_TX_LOCK(txr);
+ do {
+ more = igb_txeof(txr);
+ } while (loop-- && more);
#if __FreeBSD_version >= 800000
- if (!drbr_empty(ifp, txr->br))
- igb_mq_start_locked(ifp, txr, NULL);
+ if (!drbr_empty(ifp, txr->br))
+ igb_mq_start_locked(ifp, txr);
#else
- if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
- igb_start_locked(txr, ifp);
+ if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ igb_start_locked(txr, ifp);
#endif
- IGB_TX_UNLOCK(txr);
+ IGB_TX_UNLOCK(txr);
+ }
+
return POLL_RETURN_COUNT(rx_done);
}
#endif /* DEVICE_POLLING */
/*********************************************************************
*
- * MSIX TX Interrupt Service routine
+ * MSIX Que Interrupt Service routine
*
**********************************************************************/
static void
@@ -1446,21 +1562,31 @@ igb_msix_que(void *arg)
{
struct igb_queue *que = arg;
struct adapter *adapter = que->adapter;
+ struct ifnet *ifp = adapter->ifp;
struct tx_ring *txr = que->txr;
struct rx_ring *rxr = que->rxr;
u32 newitr = 0;
- bool more_tx, more_rx;
+ bool more_rx;
E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
++que->irqs;
IGB_TX_LOCK(txr);
- more_tx = igb_txeof(txr);
+ igb_txeof(txr);
+#if __FreeBSD_version >= 800000
+ /* Process the stack queue only if not depleted */
+ if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
+ !drbr_empty(ifp, txr->br))
+ igb_mq_start_locked(ifp, txr);
+#else
+ if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ igb_start_locked(txr, ifp);
+#endif
IGB_TX_UNLOCK(txr);
more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
- if (igb_enable_aim == FALSE)
+ if (adapter->enable_aim == FALSE)
goto no_calc;
/*
** Do Adaptive Interrupt Moderation:
@@ -1513,7 +1639,7 @@ igb_msix_que(void *arg)
no_calc:
/* Schedule a clean task if needed*/
- if (more_tx || more_rx)
+ if (more_rx)
taskqueue_enqueue(que->tq, &que->que_task);
else
/* Reenable this interrupt */
@@ -1560,7 +1686,6 @@ static void
igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
{
struct adapter *adapter = ifp->if_softc;
- u_char fiber_type = IFM_1000_SX;
INIT_DEBUGOUT("igb_media_status: begin");
@@ -1577,26 +1702,31 @@ igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
ifmr->ifm_status |= IFM_ACTIVE;
- if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
- (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
- ifmr->ifm_active |= fiber_type | IFM_FDX;
- else {
- switch (adapter->link_speed) {
- case 10:
- ifmr->ifm_active |= IFM_10_T;
- break;
- case 100:
- ifmr->ifm_active |= IFM_100_TX;
- break;
- case 1000:
- ifmr->ifm_active |= IFM_1000_T;
- break;
- }
- if (adapter->link_duplex == FULL_DUPLEX)
- ifmr->ifm_active |= IFM_FDX;
+ switch (adapter->link_speed) {
+ case 10:
+ ifmr->ifm_active |= IFM_10_T;
+ break;
+ case 100:
+ /*
+ ** Support for 100Mb SFP - these are Fiber
+ ** but the media type appears as serdes
+ */
+ if (adapter->hw.phy.media_type ==
+ e1000_media_type_internal_serdes)
+ ifmr->ifm_active |= IFM_100_FX;
else
- ifmr->ifm_active |= IFM_HDX;
+ ifmr->ifm_active |= IFM_100_TX;
+ break;
+ case 1000:
+ ifmr->ifm_active |= IFM_1000_T;
+ break;
}
+
+ if (adapter->link_duplex == FULL_DUPLEX)
+ ifmr->ifm_active |= IFM_FDX;
+ else
+ ifmr->ifm_active |= IFM_HDX;
+
IGB_CORE_UNLOCK(adapter);
}
@@ -1661,10 +1791,8 @@ igb_media_change(struct ifnet *ifp)
/*********************************************************************
*
* This routine maps the mbufs to Advanced TX descriptors.
- * used by the 82575 adapter.
*
**********************************************************************/
-
static int
igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
{
@@ -1673,35 +1801,123 @@ igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
bus_dmamap_t map;
struct igb_tx_buffer *tx_buffer, *tx_buffer_mapped;
union e1000_adv_tx_desc *txd = NULL;
- struct mbuf *m_head;
- u32 olinfo_status = 0, cmd_type_len = 0;
- int nsegs, i, j, error, first, last = 0;
- u32 hdrlen = 0;
-
- m_head = *m_headp;
-
+ struct mbuf *m_head = *m_headp;
+ struct ether_vlan_header *eh = NULL;
+ struct ip *ip = NULL;
+ struct tcphdr *th = NULL;
+ u32 hdrlen, cmd_type_len, olinfo_status = 0;
+ int ehdrlen, poff;
+ int nsegs, i, first, last = 0;
+ int error, do_tso, remap = 1;
/* Set basic descriptor constants */
- cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
+ cmd_type_len = E1000_ADVTXD_DTYP_DATA;
cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
if (m_head->m_flags & M_VLANTAG)
cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
- /*
- * Force a cleanup if number of TX descriptors
- * available hits the threshold
- */
- if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
- igb_txeof(txr);
- /* Now do we at least have a minimal? */
- if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
- txr->no_desc_avail++;
+retry:
+ m_head = *m_headp;
+ do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
+ hdrlen = ehdrlen = poff = 0;
+
+ /*
+ * Intel recommends entire IP/TCP header length reside in a single
+ * buffer. If multiple descriptors are used to describe the IP and
+ * TCP header, each descriptor should describe one or more
+ * complete headers; descriptors referencing only parts of headers
+ * are not supported. If all layer headers are not coalesced into
+ * a single buffer, each buffer should not cross a 4KB boundary,
+ * or be larger than the maximum read request size.
+ * Controller also requires modifing IP/TCP header to make TSO work
+ * so we firstly get a writable mbuf chain then coalesce ethernet/
+ * IP/TCP header into a single buffer to meet the requirement of
+ * controller. This also simplifies IP/TCP/UDP checksum offloading
+ * which also has similiar restrictions.
+ */
+ if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
+ if (do_tso || (m_head->m_next != NULL &&
+ m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
+ if (M_WRITABLE(*m_headp) == 0) {
+ m_head = m_dup(*m_headp, M_NOWAIT);
+ m_freem(*m_headp);
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
+ *m_headp = m_head;
+ }
+ }
+ /*
+ * Assume IPv4, we don't have TSO/checksum offload support
+ * for IPv6 yet.
+ */
+ ehdrlen = sizeof(struct ether_header);
+ m_head = m_pullup(m_head, ehdrlen);
+ if (m_head == NULL) {
+ *m_headp = NULL;
return (ENOBUFS);
}
+ eh = mtod(m_head, struct ether_vlan_header *);
+ if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
+ ehdrlen = sizeof(struct ether_vlan_header);
+ m_head = m_pullup(m_head, ehdrlen);
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
+ }
+ m_head = m_pullup(m_head, ehdrlen + sizeof(struct ip));
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
+ ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
+ poff = ehdrlen + (ip->ip_hl << 2);
+ if (do_tso) {
+ m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
+ /*
+ * The pseudo TCP checksum does not include TCP payload
+ * length so driver should recompute the checksum here
+ * what hardware expect to see. This is adherence of
+ * Microsoft's Large Send specification.
+ */
+ th = (struct tcphdr *)(mtod(m_head, char *) + poff);
+ th->th_sum = in_pseudo(ip->ip_src.s_addr,
+ ip->ip_dst.s_addr, htons(IPPROTO_TCP));
+ /* Keep track of the full header length */
+ hdrlen = poff + (th->th_off << 2);
+ } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
+ m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
+ th = (struct tcphdr *)(mtod(m_head, char *) + poff);
+ m_head = m_pullup(m_head, poff + (th->th_off << 2));
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
+ ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
+ th = (struct tcphdr *)(mtod(m_head, char *) + poff);
+ } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
+ m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
+ ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
+ }
+ *m_headp = m_head;
}
/*
- * Map the packet for DMA.
+ * Map the packet for DMA
*
* Capture the first descriptor index,
* this descriptor will have the index
@@ -1716,10 +1932,19 @@ igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
*m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
- if (error == EFBIG) {
+ /*
+ * There are two types of errors we can (try) to handle:
+ * - EFBIG means the mbuf chain was too long and bus_dma ran
+ * out of segments. Defragment the mbuf chain and try again.
+ * - ENOMEM means bus_dma could not obtain enough bounce buffers
+ * at this point in time. Defer sending and try again later.
+ * All other errors, in particular EINVAL, are fatal and prevent the
+ * mbuf chain from ever going through. Drop it and report error.
+ */
+ if (error == EFBIG && remap) {
struct mbuf *m;
- m = m_defrag(*m_headp, M_DONTWAIT);
+ m = m_defrag(*m_headp, M_NOWAIT);
if (m == NULL) {
adapter->mbuf_defrag_failed++;
m_freem(*m_headp);
@@ -1728,19 +1953,9 @@ igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
}
*m_headp = m;
- /* Try it again */
- error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
- *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
-
- if (error == ENOMEM) {
- adapter->no_tx_dma_setup++;
- return (error);
- } else if (error != 0) {
- adapter->no_tx_dma_setup++;
- m_freem(*m_headp);
- *m_headp = NULL;
- return (error);
- }
+ /* Try it again, but only once */
+ remap = 0;
+ goto retry;
} else if (error == ENOMEM) {
adapter->no_tx_dma_setup++;
return (error);
@@ -1751,29 +1966,35 @@ igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
return (error);
}
- /* Check again to be sure we have enough descriptors */
- if (nsegs > (txr->tx_avail - 2)) {
+ /*
+ ** Make sure we don't overrun the ring,
+ ** we need nsegs descriptors and one for
+ ** the context descriptor used for the
+ ** offloads.
+ */
+ if ((nsegs + 1) > (txr->tx_avail - 2)) {
txr->no_desc_avail++;
bus_dmamap_unload(txr->txtag, map);
return (ENOBUFS);
}
m_head = *m_headp;
- /*
- * Set up the context descriptor:
- * used when any hardware offload is done.
- * This includes CSUM, VLAN, and TSO. It
- * will use the first descriptor.
+ /* Do hardware assists:
+ * Set up the context descriptor, used
+ * when any hardware offload is done.
+ * This includes CSUM, VLAN, and TSO.
+ * It will use the first descriptor.
*/
- if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
- if (igb_tso_setup(txr, m_head, &hdrlen)) {
+
+ if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
+ if (igb_tso_setup(txr, m_head, ehdrlen, ip, th)) {
cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
} else
- return (ENXIO);
+ return (ENXIO);
} else if (igb_tx_ctx_setup(txr, m_head))
- olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
+ olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
/* Calculate payload length */
olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
@@ -1785,7 +2006,7 @@ igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
/* Set up our transmit descriptors */
i = txr->next_avail_desc;
- for (j = 0; j < nsegs; j++) {
+ for (int j = 0; j < nsegs; j++) {
bus_size_t seg_len;
bus_addr_t seg_addr;
@@ -1806,8 +2027,14 @@ igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
txr->next_avail_desc = i;
txr->tx_avail -= nsegs;
-
tx_buffer->m_head = m_head;
+
+ /*
+ ** Here we swap the map so the last descriptor,
+ ** which gets the completion interrupt has the
+ ** real map, and the first descriptor gets the
+ ** unused map from this descriptor.
+ */
tx_buffer_mapped->map = tx_buffer->map;
tx_buffer->map = map;
bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
@@ -1825,6 +2052,7 @@ igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
*/
tx_buffer = &txr->tx_buffers[first];
tx_buffer->next_eop = last;
+ /* Update the watchdog time early and often */
txr->watchdog_time = ticks;
/*
@@ -1837,9 +2065,7 @@ igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
++txr->tx_packets;
return (0);
-
}
-
static void
igb_set_promisc(struct adapter *adapter)
{
@@ -1847,7 +2073,7 @@ igb_set_promisc(struct adapter *adapter)
struct e1000_hw *hw = &adapter->hw;
u32 reg;
- if (hw->mac.type == e1000_vfadapt) {
+ if (adapter->vf_ifp) {
e1000_promisc_set_vf(hw, e1000_promisc_enabled);
return;
}
@@ -1869,7 +2095,7 @@ igb_disable_promisc(struct adapter *adapter)
struct e1000_hw *hw = &adapter->hw;
u32 reg;
- if (hw->mac.type == e1000_vfadapt) {
+ if (adapter->vf_ifp) {
e1000_promisc_set_vf(hw, e1000_promisc_disabled);
return;
}
@@ -1946,7 +2172,10 @@ igb_local_timer(void *arg)
{
struct adapter *adapter = arg;
device_t dev = adapter->dev;
+ struct ifnet *ifp = adapter->ifp;
struct tx_ring *txr = adapter->tx_rings;
+ struct igb_queue *que = adapter->queues;
+ int hung = 0, busy = 0;
IGB_CORE_LOCK_ASSERT(adapter);
@@ -1954,23 +2183,34 @@ igb_local_timer(void *arg)
igb_update_link_status(adapter);
igb_update_stats_counters(adapter);
- /*
- ** If flow control has paused us since last checking
- ** it invalidates the watchdog timing, so dont run it.
- */
- if (adapter->pause_frames) {
- adapter->pause_frames = 0;
- goto out;
- }
-
/*
- ** Watchdog: check for time since any descriptor was cleaned
+ ** Check the TX queues status
+ ** - central locked handling of OACTIVE
+ ** - watchdog only if all queues show hung
*/
- for (int i = 0; i < adapter->num_queues; i++, txr++)
- if (txr->queue_status == IGB_QUEUE_HUNG)
- goto timeout;
-out:
+ for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
+ if ((txr->queue_status & IGB_QUEUE_HUNG) &&
+ (adapter->pause_frames == 0))
+ ++hung;
+ if (txr->queue_status & IGB_QUEUE_DEPLETED)
+ ++busy;
+ if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
+ taskqueue_enqueue(que->tq, &que->que_task);
+ }
+ if (hung == adapter->num_queues)
+ goto timeout;
+ if (busy == adapter->num_queues)
+ ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+ else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
+ (busy < adapter->num_queues))
+ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+
+ adapter->pause_frames = 0;
callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
+#ifndef DEVICE_POLLING
+ /* Schedule all queue interrupts - deadlock protection */
+ E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
+#endif
return;
timeout:
@@ -1989,11 +2229,15 @@ timeout:
static void
igb_update_link_status(struct adapter *adapter)
{
- struct e1000_hw *hw = &adapter->hw;
- struct ifnet *ifp = adapter->ifp;
- device_t dev = adapter->dev;
- struct tx_ring *txr = adapter->tx_rings;
- u32 link_check = 0;
+ struct e1000_hw *hw = &adapter->hw;
+ struct e1000_fc_info *fc = &hw->fc;
+ struct ifnet *ifp = adapter->ifp;
+ device_t dev = adapter->dev;
+ struct tx_ring *txr = adapter->tx_rings;
+ u32 link_check, thstat, ctrl;
+ char *flowctl = NULL;
+
+ link_check = thstat = ctrl = 0;
/* Get the cached link value or read for real */
switch (hw->phy.media_type) {
@@ -2023,17 +2267,44 @@ igb_update_link_status(struct adapter *adapter)
break;
}
+ /* Check for thermal downshift or shutdown */
+ if (hw->mac.type == e1000_i350) {
+ thstat = E1000_READ_REG(hw, E1000_THSTAT);
+ ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
+ }
+
+ /* Get the flow control for display */
+ switch (fc->current_mode) {
+ case e1000_fc_rx_pause:
+ flowctl = "RX";
+ break;
+ case e1000_fc_tx_pause:
+ flowctl = "TX";
+ break;
+ case e1000_fc_full:
+ flowctl = "Full";
+ break;
+ case e1000_fc_none:
+ default:
+ flowctl = "None";
+ break;
+ }
+
/* Now we check if a transition has happened */
if (link_check && (adapter->link_active == 0)) {
e1000_get_speed_and_duplex(&adapter->hw,
&adapter->link_speed, &adapter->link_duplex);
if (bootverbose)
- device_printf(dev, "Link is up %d Mbps %s\n",
+ device_printf(dev, "Link is up %d Mbps %s,"
+ " Flow Control: %s\n",
adapter->link_speed,
((adapter->link_duplex == FULL_DUPLEX) ?
- "Full Duplex" : "Half Duplex"));
+ "Full Duplex" : "Half Duplex"), flowctl);
adapter->link_active = 1;
ifp->if_baudrate = adapter->link_speed * 1000000;
+ if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
+ (thstat & E1000_THSTAT_LINK_THROTTLE))
+ device_printf(dev, "Link: thermal downshift\n");
/* This can sleep */
if_link_state_change(ifp, LINK_STATE_UP);
} else if (!link_check && (adapter->link_active == 1)) {
@@ -2041,10 +2312,13 @@ igb_update_link_status(struct adapter *adapter)
adapter->link_duplex = 0;
if (bootverbose)
device_printf(dev, "Link is Down\n");
+ if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
+ (thstat & E1000_THSTAT_PWR_DOWN))
+ device_printf(dev, "Link: thermal shutdown\n");
adapter->link_active = 0;
/* This can sleep */
if_link_state_change(ifp, LINK_STATE_DOWN);
- /* Turn off watchdogs */
+ /* Reset queue state */
for (int i = 0; i < adapter->num_queues; i++, txr++)
txr->queue_status = IGB_QUEUE_IDLE;
}
@@ -2073,9 +2347,10 @@ igb_stop(void *arg)
callout_stop(&adapter->timer);
/* Tell the stack that the interface is no longer active */
- ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ ifp->if_drv_flags |= IFF_DRV_OACTIVE;
- /* Unarm watchdog timer. */
+ /* Disarm watchdog timer. */
for (int i = 0; i < adapter->num_queues; i++, txr++) {
IGB_TX_LOCK(txr);
txr->queue_status = IGB_QUEUE_IDLE;
@@ -2123,6 +2398,13 @@ igb_identify_hardware(struct adapter *adapter)
/* Set MAC type early for PCI setup */
e1000_set_mac_type(&adapter->hw);
+
+ /* Are we a VF device? */
+ if ((adapter->hw.mac.type == e1000_vfadapt) ||
+ (adapter->hw.mac.type == e1000_vfadapt_i350))
+ adapter->vf_ifp = 1;
+ else
+ adapter->vf_ifp = 0;
}
static int
@@ -2163,6 +2445,7 @@ igb_allocate_legacy(struct adapter *adapter)
{
device_t dev = adapter->dev;
struct igb_queue *que = adapter->queues;
+ struct tx_ring *txr = adapter->tx_rings;
int error, rid = 0;
/* Turn off all interrupts */
@@ -2181,6 +2464,10 @@ igb_allocate_legacy(struct adapter *adapter)
return (ENXIO);
}
+#if __FreeBSD_version >= 800000
+ TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
+#endif
+
/*
* Try allocating a fast interrupt and the associated deferred
* processing contexts.
@@ -2218,6 +2505,9 @@ igb_allocate_msix(struct adapter *adapter)
struct igb_queue *que = adapter->queues;
int error, rid, vector = 0;
+ /* Be sure to start with all interrupts disabled */
+ E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
+ E1000_WRITE_FLUSH(&adapter->hw);
for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
rid = vector +1;
@@ -2249,11 +2539,22 @@ igb_allocate_msix(struct adapter *adapter)
** Bind the msix vector, and thus the
** rings to the corresponding cpu.
*/
- if (adapter->num_queues > 1)
- bus_bind_intr(dev, que->res, i);
+ if (adapter->num_queues > 1) {
+ if (igb_last_bind_cpu < 0)
+ igb_last_bind_cpu = CPU_FIRST();
+ bus_bind_intr(dev, que->res, igb_last_bind_cpu);
+ device_printf(dev,
+ "Bound queue %d to cpu %d\n",
+ i,igb_last_bind_cpu);
+ igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
+ }
+#if __FreeBSD_version >= 800000
+ TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
+ que->txr);
+#endif
/* Make tasklet for deferred handling */
TASK_INIT(&que->que_task, 0, igb_handle_que, que);
- que->tq = taskqueue_create_fast("igb_que", M_NOWAIT,
+ que->tq = taskqueue_create("igb_que", M_NOWAIT,
taskqueue_thread_enqueue, &que->tq);
taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
device_get_nameunit(adapter->dev));
@@ -2292,7 +2593,7 @@ igb_configure_queues(struct adapter *adapter)
u32 tmp, ivar = 0, newitr = 0;
/* First turn on RSS capability */
- if (adapter->hw.mac.type > e1000_82575)
+ if (adapter->hw.mac.type != e1000_82575)
E1000_WRITE_REG(hw, E1000_GPIE,
E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
E1000_GPIE_PBA | E1000_GPIE_NSICR);
@@ -2300,7 +2601,11 @@ igb_configure_queues(struct adapter *adapter)
/* Turn on MSIX */
switch (adapter->hw.mac.type) {
case e1000_82580:
+ case e1000_i350:
+ case e1000_i210:
+ case e1000_i211:
case e1000_vfadapt:
+ case e1000_vfadapt_i350:
/* RX entries */
for (int i = 0; i < adapter->num_queues; i++) {
u32 index = i >> 1;
@@ -2328,13 +2633,12 @@ igb_configure_queues(struct adapter *adapter)
ivar |= (que->msix | E1000_IVAR_VALID) << 8;
}
E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
- adapter->eims_mask |= que->eims;
+ adapter->que_mask |= que->eims;
}
/* And for the link interrupt */
ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
adapter->link_mask = 1 << adapter->linkvec;
- adapter->eims_mask |= adapter->link_mask;
E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
break;
case e1000_82576:
@@ -2351,7 +2655,7 @@ igb_configure_queues(struct adapter *adapter)
ivar |= (que->msix | E1000_IVAR_VALID) << 16;
}
E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
- adapter->eims_mask |= que->eims;
+ adapter->que_mask |= que->eims;
}
/* TX entries */
for (int i = 0; i < adapter->num_queues; i++) {
@@ -2366,13 +2670,12 @@ igb_configure_queues(struct adapter *adapter)
ivar |= (que->msix | E1000_IVAR_VALID) << 24;
}
E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
- adapter->eims_mask |= que->eims;
+ adapter->que_mask |= que->eims;
}
/* And for the link interrupt */
ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
adapter->link_mask = 1 << adapter->linkvec;
- adapter->eims_mask |= adapter->link_mask;
E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
break;
@@ -2393,14 +2696,13 @@ igb_configure_queues(struct adapter *adapter)
que->eims = tmp;
E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
i, que->eims);
- adapter->eims_mask |= que->eims;
+ adapter->que_mask |= que->eims;
}
/* Link */
E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
E1000_EIMS_OTHER);
adapter->link_mask |= E1000_EIMS_OTHER;
- adapter->eims_mask |= adapter->link_mask;
default:
break;
}
@@ -2461,13 +2763,24 @@ igb_free_pci_resources(struct adapter *adapter)
else
(adapter->msix != 0) ? (rid = 1):(rid = 0);
+ que = adapter->queues;
if (adapter->tag != NULL) {
+ taskqueue_drain(que->tq, &adapter->link_task);
bus_teardown_intr(dev, adapter->res, adapter->tag);
adapter->tag = NULL;
}
if (adapter->res != NULL)
bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
+ for (int i = 0; i < adapter->num_queues; i++, que++) {
+ if (que->tq != NULL) {
+#if __FreeBSD_version >= 800000
+ taskqueue_drain(que->tq, &que->txr->txq_task);
+#endif
+ taskqueue_drain(que->tq, &que->que_task);
+ taskqueue_free(que->tq);
+ }
+ }
mem:
if (adapter->msix)
pci_release_msi(dev);
@@ -2489,7 +2802,7 @@ static int
igb_setup_msix(struct adapter *adapter)
{
device_t dev = adapter->dev;
- int rid, want, queues, msgs;
+ int rid, want, queues, msgs, maxqueues;
/* tuneable override */
if (igb_enable_msix == 0)
@@ -2520,16 +2833,29 @@ igb_setup_msix(struct adapter *adapter)
/* Manual override */
if (igb_num_queues != 0)
queues = igb_num_queues;
- if (queues > 8) /* max queues */
- queues = 8;
- /* Can have max of 4 queues on 82575 */
- if ((adapter->hw.mac.type == e1000_82575) && (queues > 4))
- queues = 4;
-
- /* Limit the VF adapter to one queue */
- if (adapter->hw.mac.type == e1000_vfadapt)
- queues = 1;
+ /* Sanity check based on HW */
+ switch (adapter->hw.mac.type) {
+ case e1000_82575:
+ maxqueues = 4;
+ break;
+ case e1000_82576:
+ case e1000_82580:
+ case e1000_i350:
+ maxqueues = 8;
+ break;
+ case e1000_i210:
+ maxqueues = 4;
+ break;
+ case e1000_i211:
+ maxqueues = 2;
+ break;
+ default: /* VF interfaces */
+ maxqueues = 1;
+ break;
+ }
+ if (queues > maxqueues)
+ queues = maxqueues;
/*
** One vector (RX/TX pair) per queue
@@ -2543,7 +2869,7 @@ igb_setup_msix(struct adapter *adapter)
"MSIX Configuration Problem, "
"%d vectors configured, but %d queues wanted!\n",
msgs, want);
- return (ENXIO);
+ return (0);
}
if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
device_printf(adapter->dev,
@@ -2553,9 +2879,11 @@ igb_setup_msix(struct adapter *adapter)
}
msi:
msgs = pci_msi_count(dev);
- if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
- device_printf(adapter->dev,"Using MSI interrupt\n");
- return (msgs);
+ if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0) {
+ device_printf(adapter->dev," Using MSI interrupt\n");
+ return (msgs);
+ }
+ return (0);
}
/*********************************************************************
@@ -2589,10 +2917,18 @@ igb_reset(struct adapter *adapter)
break;
case e1000_82576:
case e1000_vfadapt:
- pba = E1000_PBA_64K;
+ pba = E1000_READ_REG(hw, E1000_RXPBS);
+ pba &= E1000_RXPBS_SIZE_MASK_82576;
break;
case e1000_82580:
- pba = E1000_PBA_35K;
+ case e1000_i350:
+ case e1000_vfadapt_i350:
+ pba = E1000_READ_REG(hw, E1000_RXPBS);
+ pba = e1000_rxpbs_adjust_82580(pba);
+ break;
+ case e1000_i210:
+ case e1000_i211:
+ pba = E1000_PBA_34K;
default:
break;
}
@@ -2647,14 +2983,10 @@ igb_reset(struct adapter *adapter)
fc->pause_time = IGB_FC_PAUSE_TIME;
fc->send_xon = TRUE;
-
- /* Set Flow control, use the tunable location if sane */
- if ((igb_fc_setting >= 0) && (igb_fc_setting < 4))
- fc->requested_mode = igb_fc_setting;
+ if (adapter->fc)
+ fc->requested_mode = adapter->fc;
else
- fc->requested_mode = e1000_fc_none;
-
- fc->current_mode = fc->requested_mode;
+ fc->requested_mode = e1000_fc_default;
/* Issue a global reset */
e1000_reset_hw(hw);
@@ -2663,28 +2995,48 @@ igb_reset(struct adapter *adapter)
if (e1000_init_hw(hw) < 0)
device_printf(dev, "Hardware Initialization Failed\n");
- if (hw->mac.type == e1000_82580) {
- u32 reg;
+ /* Setup DMA Coalescing */
+ if ((hw->mac.type > e1000_82580) &&
+ (hw->mac.type != e1000_i211)) {
+ u32 dmac;
+ u32 reg = ~E1000_DMACR_DMAC_EN;
- hwm = (pba << 10) - (2 * adapter->max_frame_size);
- /*
- * 0x80000000 - enable DMA COAL
- * 0x10000000 - use L0s as low power
- * 0x20000000 - use L1 as low power
- * X << 16 - exit dma coal when rx data exceeds X kB
- * Y - upper limit to stay in dma coal in units of 32usecs
- */
- E1000_WRITE_REG(hw, E1000_DMACR,
- 0xA0000006 | ((hwm << 6) & 0x00FF0000));
+ if (adapter->dmac == 0) { /* Disabling it */
+ E1000_WRITE_REG(hw, E1000_DMACR, reg);
+ goto reset_out;
+ }
- /* set hwm to PBA - 2 * max frame size */
- E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
- /*
- * This sets the time to wait before requesting transition to
- * low power state to number of usecs needed to receive 1 512
- * byte frame at gigabit line rate
- */
- E1000_WRITE_REG(hw, E1000_DMCTLX, 4);
+ /* Set starting thresholds */
+ E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
+ E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
+
+ hwm = 64 * pba - adapter->max_frame_size / 16;
+ if (hwm < 64 * (pba - 6))
+ hwm = 64 * (pba - 6);
+ reg = E1000_READ_REG(hw, E1000_FCRTC);
+ reg &= ~E1000_FCRTC_RTH_COAL_MASK;
+ reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
+ & E1000_FCRTC_RTH_COAL_MASK);
+ E1000_WRITE_REG(hw, E1000_FCRTC, reg);
+
+
+ dmac = pba - adapter->max_frame_size / 512;
+ if (dmac < pba - 10)
+ dmac = pba - 10;
+ reg = E1000_READ_REG(hw, E1000_DMACR);
+ reg &= ~E1000_DMACR_DMACTHR_MASK;
+ reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
+ & E1000_DMACR_DMACTHR_MASK);
+ /* transition to L0x or L1 if available..*/
+ reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
+ /* timer = value in adapter->dmac in 32usec intervals */
+ reg |= (adapter->dmac >> 5);
+ E1000_WRITE_REG(hw, E1000_DMACR, reg);
+
+ /* Set the interval before transition */
+ reg = E1000_READ_REG(hw, E1000_DMCTLX);
+ reg |= 0x80000004;
+ E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
/* free space in tx packet buffer to wake from DMA coal */
E1000_WRITE_REG(hw, E1000_DMCTXTH,
@@ -2692,10 +3044,18 @@ igb_reset(struct adapter *adapter)
/* make low power state decision controlled by DMA coal */
reg = E1000_READ_REG(hw, E1000_PCIEMISC);
+ reg &= ~E1000_PCIEMISC_LX_DECISION;
+ E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
+ device_printf(dev, "DMA Coalescing enabled\n");
+
+ } else if (hw->mac.type == e1000_82580) {
+ u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
+ E1000_WRITE_REG(hw, E1000_DMACR, 0);
E1000_WRITE_REG(hw, E1000_PCIEMISC,
- reg | E1000_PCIEMISC_LX_DECISION);
+ reg & ~E1000_PCIEMISC_LX_DECISION);
}
+reset_out:
E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
e1000_get_phy_info(hw);
e1000_check_for_link(hw);
@@ -2720,19 +3080,19 @@ igb_setup_interface(device_t dev, struct adapter *adapter)
return (-1);
}
if_initname(ifp, device_get_name(dev), device_get_unit(dev));
- ifp->if_mtu = ETHERMTU;
ifp->if_init = igb_init;
ifp->if_softc = adapter;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_ioctl = igb_ioctl;
- ifp->if_start = igb_start;
#if __FreeBSD_version >= 800000
ifp->if_transmit = igb_mq_start;
ifp->if_qflush = igb_qflush;
-#endif
+#else
+ ifp->if_start = igb_start;
IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
IFQ_SET_READY(&ifp->if_snd);
+#endif
ether_ifattach(ifp, adapter->hw.mac.addr);
@@ -2755,15 +3115,19 @@ igb_setup_interface(device_t dev, struct adapter *adapter)
* support full VLAN capability.
*/
ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
- ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
- ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
+ ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
+ | IFCAP_VLAN_HWTSO
+ | IFCAP_VLAN_MTU;
+ ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
+ | IFCAP_VLAN_HWTSO
+ | IFCAP_VLAN_MTU;
/*
- ** Dont turn this on by default, if vlans are
+ ** Don't turn this on by default, if vlans are
** created on another pseudo device (eg. lagg)
** then vlan events are not passed thru, breaking
** operation, but with HW FILTER off it works. If
- ** using vlans directly on the em driver you can
+ ** using vlans directly on the igb driver you can
** enable this and get full hardware tag filtering.
*/
ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
@@ -2837,7 +3201,7 @@ igb_dma_malloc(struct adapter *adapter, bus_size_t size,
}
error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
- BUS_DMA_NOWAIT, &dma->dma_map);
+ BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
if (error) {
device_printf(adapter->dev,
"%s: bus_dmamem_alloc(%ju) failed: %d\n",
@@ -2966,7 +3330,7 @@ igb_allocate_queues(struct adapter *adapter)
}
#if __FreeBSD_version >= 800000
/* Allocate a buf ring */
- txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
+ txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
M_WAITOK, &txr->tx_mtx);
#endif
}
@@ -3105,9 +3469,16 @@ igb_setup_transmit_ring(struct tx_ring *txr)
struct adapter *adapter = txr->adapter;
struct igb_tx_buffer *txbuf;
int i;
+#ifdef DEV_NETMAP
+ struct netmap_adapter *na = NA(adapter->ifp);
+ struct netmap_slot *slot;
+#endif /* DEV_NETMAP */
/* Clear the old descriptor contents */
IGB_TX_LOCK(txr);
+#ifdef DEV_NETMAP
+ slot = netmap_reset(na, NR_TX, txr->me, 0);
+#endif /* DEV_NETMAP */
bzero((void *)txr->tx_base,
(sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
/* Reset indices */
@@ -3124,6 +3495,13 @@ igb_setup_transmit_ring(struct tx_ring *txr)
m_freem(txbuf->m_head);
txbuf->m_head = NULL;
}
+#ifdef DEV_NETMAP
+ if (slot) {
+ int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
+ /* no need to set the address */
+ netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
+ }
+#endif /* DEV_NETMAP */
/* clear the watch index */
txbuf->next_eop = -1;
}
@@ -3195,7 +3573,7 @@ igb_initialize_transmit_units(struct adapter *adapter)
E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
}
- if (adapter->hw.mac.type == e1000_vfadapt)
+ if (adapter->vf_ifp)
return;
e1000_config_collision_dist(hw);
@@ -3289,8 +3667,9 @@ igb_free_transmit_buffers(struct tx_ring *txr)
* Setup work for hardware segmentation offload (TSO)
*
**********************************************************************/
-static boolean_t
-igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
+static bool
+igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ehdrlen,
+ struct ip *ip, struct tcphdr *th)
{
struct adapter *adapter = txr->adapter;
struct e1000_adv_tx_context_desc *TXD;
@@ -3298,45 +3677,15 @@ igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
u32 mss_l4len_idx = 0;
u16 vtag = 0;
- int ctxd, ehdrlen, ip_hlen, tcp_hlen;
- struct ether_vlan_header *eh;
- struct ip *ip;
- struct tcphdr *th;
-
-
- /*
- * Determine where frame payload starts.
- * Jump over vlan headers if already present
- */
- eh = mtod(mp, struct ether_vlan_header *);
- if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
- ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
- else
- ehdrlen = ETHER_HDR_LEN;
-
- /* Ensure we have at least the IP+TCP header in the first mbuf. */
- if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
- return FALSE;
+ int ctxd, ip_hlen, tcp_hlen;
- /* Only supports IPV4 for now */
ctxd = txr->next_avail_desc;
tx_buffer = &txr->tx_buffers[ctxd];
TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
- ip = (struct ip *)(mp->m_data + ehdrlen);
- if (ip->ip_p != IPPROTO_TCP)
- return FALSE; /* 0 */
ip->ip_sum = 0;
ip_hlen = ip->ip_hl << 2;
- th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
- th->th_sum = in_pseudo(ip->ip_src.s_addr,
- ip->ip_dst.s_addr, htons(IPPROTO_TCP));
tcp_hlen = th->th_off << 2;
- /*
- * Calculate header length, this is used
- * in the transmit desc in igb_xmit
- */
- *hdrlen = ehdrlen + ip_hlen + tcp_hlen;
/* VLAN MACLEN IPLEN */
if (mp->m_flags & M_VLANTAG) {
@@ -3519,6 +3868,19 @@ igb_txeof(struct tx_ring *txr)
IGB_TX_LOCK_ASSERT(txr);
+#ifdef DEV_NETMAP
+ if (ifp->if_capenable & IFCAP_NETMAP) {
+ struct netmap_adapter *na = NA(ifp);
+
+ selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
+ IGB_TX_UNLOCK(txr);
+ IGB_CORE_LOCK(adapter);
+ selwakeuppri(&na->tx_si, PI_NET);
+ IGB_CORE_UNLOCK(adapter);
+ IGB_TX_LOCK(txr);
+ return FALSE;
+ }
+#endif /* DEV_NETMAP */
if (txr->tx_avail == adapter->num_tx_desc) {
txr->queue_status = IGB_QUEUE_IDLE;
return FALSE;
@@ -3598,25 +3960,23 @@ igb_txeof(struct tx_ring *txr)
** for too long indicates a hang.
*/
if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
- txr->queue_status = IGB_QUEUE_HUNG;
-
+ txr->queue_status |= IGB_QUEUE_HUNG;
/*
- * If we have enough room, clear IFF_DRV_OACTIVE
- * to tell the stack that it is OK to send packets.
+ * If we have a minimum free,
+ * clear depleted state bit
*/
- if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
- /* All clean, turn off the watchdog */
- if (txr->tx_avail == adapter->num_tx_desc) {
- txr->queue_status = IGB_QUEUE_IDLE;
- return (FALSE);
- }
+ if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
+ txr->queue_status &= ~IGB_QUEUE_DEPLETED;
+
+ /* All clean, turn off the watchdog */
+ if (txr->tx_avail == adapter->num_tx_desc) {
+ txr->queue_status = IGB_QUEUE_IDLE;
+ return (FALSE);
}
return (TRUE);
}
-
/*********************************************************************
*
* Refresh mbuf buffers for RX descriptor rings
@@ -3634,17 +3994,25 @@ igb_refresh_mbufs(struct rx_ring *rxr, int limit)
bus_dma_segment_t pseg[1];
struct igb_rx_buf *rxbuf;
struct mbuf *mh, *mp;
- int i, nsegs, error, cleaned;
+ int i, j, nsegs, error;
+ bool refreshed = FALSE;
- i = rxr->next_to_refresh;
- cleaned = -1; /* Signify no completions */
- while (i != limit) {
+ i = j = rxr->next_to_refresh;
+ /*
+ ** Get one descriptor beyond
+ ** our work mark to control
+ ** the loop.
+ */
+ if (++j == adapter->num_rx_desc)
+ j = 0;
+
+ while (j != limit) {
rxbuf = &rxr->rx_buffers[i];
/* No hdr mbuf used with header split off */
if (rxr->hdr_split == FALSE)
goto no_split;
if (rxbuf->m_head == NULL) {
- mh = m_gethdr(M_DONTWAIT, MT_DATA);
+ mh = m_gethdr(M_NOWAIT, MT_DATA);
if (mh == NULL)
goto update;
} else
@@ -3670,7 +4038,7 @@ igb_refresh_mbufs(struct rx_ring *rxr, int limit)
htole64(hseg[0].ds_addr);
no_split:
if (rxbuf->m_pack == NULL) {
- mp = m_getjcl(M_DONTWAIT, MT_DATA,
+ mp = m_getjcl(M_NOWAIT, MT_DATA,
M_PKTHDR, adapter->rx_mbuf_sz);
if (mp == NULL)
goto update;
@@ -3693,18 +4061,17 @@ no_split:
BUS_DMASYNC_PREREAD);
rxr->rx_base[i].read.pkt_addr =
htole64(pseg[0].ds_addr);
+ refreshed = TRUE; /* I feel wefreshed :) */
- cleaned = i;
- /* Calculate next index */
- if (++i == adapter->num_rx_desc)
- i = 0;
- /* This is the work marker for refresh */
+ i = j; /* our next is precalculated */
rxr->next_to_refresh = i;
+ if (++j == adapter->num_rx_desc)
+ j = 0;
}
update:
- if (cleaned != -1) /* If we refreshed some, bump tail */
+ if (refreshed) /* update tail */
E1000_WRITE_REG(&adapter->hw,
- E1000_RDT(rxr->me), cleaned);
+ E1000_RDT(rxr->me), rxr->next_to_refresh);
return;
}
@@ -3796,12 +4163,11 @@ fail:
static void
igb_free_receive_ring(struct rx_ring *rxr)
{
- struct adapter *adapter;
+ struct adapter *adapter = rxr->adapter;
struct igb_rx_buf *rxbuf;
- int i;
- adapter = rxr->adapter;
- for (i = 0; i < adapter->num_rx_desc; i++) {
+
+ for (int i = 0; i < adapter->num_rx_desc; i++) {
rxbuf = &rxr->rx_buffers[i];
if (rxbuf->m_head != NULL) {
bus_dmamap_sync(rxr->htag, rxbuf->hmap,
@@ -3838,6 +4204,10 @@ igb_setup_receive_ring(struct rx_ring *rxr)
bus_dma_segment_t pseg[1], hseg[1];
struct lro_ctrl *lro = &rxr->lro;
int rsize, nsegs, error = 0;
+#ifdef DEV_NETMAP
+ struct netmap_adapter *na = NA(rxr->adapter->ifp);
+ struct netmap_slot *slot;
+#endif /* DEV_NETMAP */
adapter = rxr->adapter;
dev = adapter->dev;
@@ -3845,6 +4215,9 @@ igb_setup_receive_ring(struct rx_ring *rxr)
/* Clear the ring contents */
IGB_RX_LOCK(rxr);
+#ifdef DEV_NETMAP
+ slot = netmap_reset(na, NR_RX, rxr->me, 0);
+#endif /* DEV_NETMAP */
rsize = roundup2(adapter->num_rx_desc *
sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
bzero((void *)rxr->rx_base, rsize);
@@ -3863,11 +4236,25 @@ igb_setup_receive_ring(struct rx_ring *rxr)
struct mbuf *mh, *mp;
rxbuf = &rxr->rx_buffers[j];
+#ifdef DEV_NETMAP
+ if (slot) {
+ /* slot sj is mapped to the i-th NIC-ring entry */
+ int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
+ uint64_t paddr;
+ void *addr;
+
+ addr = PNMB(slot + sj, &paddr);
+ netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
+ /* Update descriptor */
+ rxr->rx_base[j].read.pkt_addr = htole64(paddr);
+ continue;
+ }
+#endif /* DEV_NETMAP */
if (rxr->hdr_split == FALSE)
goto skip_head;
/* First the header */
- rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
+ rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
if (rxbuf->m_head == NULL) {
error = ENOBUFS;
goto fail;
@@ -3889,7 +4276,7 @@ igb_setup_receive_ring(struct rx_ring *rxr)
skip_head:
/* Now the payload cluster */
- rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
+ rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
M_PKTHDR, adapter->rx_mbuf_sz);
if (rxbuf->m_pack == NULL) {
error = ENOBUFS;
@@ -3911,7 +4298,7 @@ skip_head:
/* Setup our descriptor indices */
rxr->next_to_check = 0;
- rxr->next_to_refresh = 0;
+ rxr->next_to_refresh = adapter->num_rx_desc - 1;
rxr->lro_enabled = FALSE;
rxr->rx_split_packets = 0;
rxr->rx_bytes = 0;
@@ -3949,6 +4336,7 @@ fail:
return (error);
}
+
/*********************************************************************
*
* Initialize all receive rings.
@@ -3971,8 +4359,8 @@ fail:
* the rings that completed, the failing case will have
* cleaned up for itself. 'i' is the endpoint.
*/
- for (int j = 0; j > i; ++j) {
- rxr = &adapter->rx_rings[i];
+ for (int j = 0; j < i; ++j) {
+ rxr = &adapter->rx_rings[j];
IGB_RX_LOCK(rxr);
igb_free_receive_ring(rxr);
IGB_RX_UNLOCK(rxr);
@@ -4006,7 +4394,7 @@ igb_initialize_receive_units(struct adapter *adapter)
/*
** Set up for header split
*/
- if (rxr->hdr_split) {
+ if (igb_header_split) {
/* Use a standard mbuf for the header */
srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
@@ -4144,9 +4532,29 @@ igb_initialize_receive_units(struct adapter *adapter)
* - needs to be after enable
*/
for (int i = 0; i < adapter->num_queues; i++) {
- E1000_WRITE_REG(hw, E1000_RDH(i), 0);
- E1000_WRITE_REG(hw, E1000_RDT(i),
- adapter->num_rx_desc - 1);
+ rxr = &adapter->rx_rings[i];
+ E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
+#ifdef DEV_NETMAP
+ /*
+ * an init() while a netmap client is active must
+ * preserve the rx buffers passed to userspace.
+ * In this driver it means we adjust RDT to
+ * somthing different from next_to_refresh
+ * (which is not used in netmap mode).
+ */
+ if (ifp->if_capenable & IFCAP_NETMAP) {
+ struct netmap_adapter *na = NA(adapter->ifp);
+ struct netmap_kring *kring = &na->rx_rings[i];
+ int t = rxr->next_to_refresh - kring->nr_hwavail;
+
+ if (t >= adapter->num_rx_desc)
+ t -= adapter->num_rx_desc;
+ else if (t < 0)
+ t += adapter->num_rx_desc;
+ E1000_WRITE_REG(hw, E1000_RDT(i), t);
+ } else
+#endif /* DEV_NETMAP */
+ E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
}
return;
}
@@ -4325,6 +4733,20 @@ igb_rxeof(struct igb_queue *que, int count, int *done)
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+#ifdef DEV_NETMAP
+ if (ifp->if_capenable & IFCAP_NETMAP) {
+ struct netmap_adapter *na = NA(ifp);
+
+ na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
+ selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
+ IGB_RX_UNLOCK(rxr);
+ IGB_CORE_LOCK(adapter);
+ selwakeuppri(&na->rx_si, PI_NET);
+ IGB_CORE_UNLOCK(adapter);
+ return (0);
+ }
+#endif /* DEV_NETMAP */
+
/* Main clean loop */
for (i = rxr->next_to_check; count != 0;) {
struct mbuf *sendmp, *mh, *mp;
@@ -4344,14 +4766,18 @@ igb_rxeof(struct igb_queue *que, int count, int *done)
rxbuf = &rxr->rx_buffers[i];
plen = le16toh(cur->wb.upper.length);
ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
- vtag = le16toh(cur->wb.upper.vlan);
+ if ((adapter->hw.mac.type == e1000_i350) &&
+ (staterr & E1000_RXDEXT_STATERR_LB))
+ vtag = be16toh(cur->wb.upper.vlan);
+ else
+ vtag = le16toh(cur->wb.upper.vlan);
hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
/* Make sure all segments of a bad packet are discarded */
if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
(rxr->discard)) {
- ifp->if_ierrors++;
+ adapter->dropped_pkts++;
++rxr->rx_discarded;
if (!eop) /* Catch subsequent segs */
rxr->discard = TRUE;
@@ -4479,10 +4905,8 @@ next_desc:
}
/* Catch any remainders */
- if (processed != 0) {
+ if (igb_rx_unrefreshed(rxr))
igb_refresh_mbufs(rxr, i);
- processed = 0;
- }
rxr->next_to_check = i;
@@ -4494,19 +4918,11 @@ next_desc:
tcp_lro_flush(lro, queued);
}
- IGB_RX_UNLOCK(rxr);
-
if (done != NULL)
- *done = rxdone;
+ *done += rxdone;
- /*
- ** We still have cleaning to do?
- ** Schedule another interrupt if so.
- */
- if ((staterr & E1000_RXD_STAT_DD) != 0)
- return (TRUE);
-
- return (FALSE);
+ IGB_RX_UNLOCK(rxr);
+ return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
}
/*********************************************************************
@@ -4581,9 +4997,9 @@ igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
bit = vtag & 0x1F;
adapter->shadow_vfta[index] |= (1 << bit);
++adapter->num_vlans;
- /* Re-init to load the changes */
+ /* Change hw filter setting */
if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
- igb_init_locked(adapter);
+ igb_setup_vlan_hw_support(adapter);
IGB_CORE_UNLOCK(adapter);
}
@@ -4608,9 +5024,9 @@ igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
bit = vtag & 0x1F;
adapter->shadow_vfta[index] &= ~(1 << bit);
--adapter->num_vlans;
- /* Re-init to load the changes */
+ /* Change hw filter setting */
if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
- igb_init_locked(adapter);
+ igb_setup_vlan_hw_support(adapter);
IGB_CORE_UNLOCK(adapter);
}
@@ -4618,49 +5034,48 @@ static void
igb_setup_vlan_hw_support(struct adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
+ struct ifnet *ifp = adapter->ifp;
u32 reg;
- /*
- ** We get here thru init_locked, meaning
- ** a soft reset, this has already cleared
- ** the VFTA and other state, so if there
- ** have been no vlan's registered do nothing.
- */
- if (adapter->num_vlans == 0)
- return;
+ if (adapter->vf_ifp) {
+ e1000_rlpml_set_vf(hw,
+ adapter->max_frame_size + VLAN_TAG_SIZE);
+ return;
+ }
+
+ reg = E1000_READ_REG(hw, E1000_CTRL);
+ reg |= E1000_CTRL_VME;
+ E1000_WRITE_REG(hw, E1000_CTRL, reg);
+
+ /* Enable the Filter Table */
+ if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
+ reg = E1000_READ_REG(hw, E1000_RCTL);
+ reg &= ~E1000_RCTL_CFIEN;
+ reg |= E1000_RCTL_VFE;
+ E1000_WRITE_REG(hw, E1000_RCTL, reg);
+ }
+ /* Update the frame size */
+ E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
+ adapter->max_frame_size + VLAN_TAG_SIZE);
+
+ /* Don't bother with table if no vlans */
+ if ((adapter->num_vlans == 0) ||
+ ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
+ return;
/*
** A soft reset zero's out the VFTA, so
** we need to repopulate it now.
*/
for (int i = 0; i < IGB_VFTA_SIZE; i++)
if (adapter->shadow_vfta[i] != 0) {
- if (hw->mac.type == e1000_vfadapt)
+ if (adapter->vf_ifp)
e1000_vfta_set_vf(hw,
adapter->shadow_vfta[i], TRUE);
else
- E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
- i, adapter->shadow_vfta[i]);
+ e1000_write_vfta(hw,
+ i, adapter->shadow_vfta[i]);
}
-
- if (hw->mac.type == e1000_vfadapt)
- e1000_rlpml_set_vf(hw,
- adapter->max_frame_size + VLAN_TAG_SIZE);
- else {
- reg = E1000_READ_REG(hw, E1000_CTRL);
- reg |= E1000_CTRL_VME;
- E1000_WRITE_REG(hw, E1000_CTRL, reg);
-
- /* Enable the Filter Table */
- reg = E1000_READ_REG(hw, E1000_RCTL);
- reg &= ~E1000_RCTL_CFIEN;
- reg |= E1000_RCTL_VFE;
- E1000_WRITE_REG(hw, E1000_RCTL, reg);
-
- /* Update the frame size */
- E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
- adapter->max_frame_size + VLAN_TAG_SIZE);
- }
}
static void
@@ -4668,12 +5083,10 @@ igb_enable_intr(struct adapter *adapter)
{
/* With RSS set up what to auto clear */
if (adapter->msix_mem) {
- E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
- adapter->eims_mask);
- E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
- adapter->eims_mask);
- E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
- adapter->eims_mask);
+ u32 mask = (adapter->que_mask | adapter->link_mask);
+ E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
+ E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
+ E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
E1000_WRITE_REG(&adapter->hw, E1000_IMS,
E1000_IMS_LSC);
} else {
@@ -4750,7 +5163,7 @@ igb_get_hw_control(struct adapter *adapter)
{
u32 ctrl_ext;
- if (adapter->hw.mac.type == e1000_vfadapt)
+ if (adapter->vf_ifp)
return;
/* Let firmware know the driver has taken over */
@@ -4770,7 +5183,7 @@ igb_release_hw_control(struct adapter *adapter)
{
u32 ctrl_ext;
- if (adapter->hw.mac.type == e1000_vfadapt)
+ if (adapter->vf_ifp)
return;
/* Let firmware taken over control of h/w */
@@ -4849,7 +5262,7 @@ igb_update_stats_counters(struct adapter *adapter)
** small controlled set of stats, do only
** those and return.
*/
- if (adapter->hw.mac.type == e1000_vfadapt) {
+ if (adapter->vf_ifp) {
igb_update_vf_stats_counters(adapter);
return;
}
@@ -5190,7 +5603,7 @@ igb_add_hw_stats(struct adapter *adapter)
** VF adapter has a very limited set of stats
** since its not managing the metal, so to speak.
*/
- if (adapter->hw.mac.type == e1000_vfadapt) {
+ if (adapter->vf_ifp) {
SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
CTLFLAG_RD, &stats->gprc,
"Good Packets Received");
@@ -5507,7 +5920,7 @@ igb_print_nvm_info(struct adapter *adapter)
}
static void
-igb_add_rx_process_limit(struct adapter *adapter, const char *name,
+igb_set_sysctl_value(struct adapter *adapter, const char *name,
const char *description, int *limit, int value)
{
*limit = value;
@@ -5515,3 +5928,112 @@ igb_add_rx_process_limit(struct adapter *adapter, const char *name,
SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
}
+
+/*
+** Set flow control using sysctl:
+** Flow control values:
+** 0 - off
+** 1 - rx pause
+** 2 - tx pause
+** 3 - full
+*/
+static int
+igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ static int input = 3; /* default is full */
+ struct adapter *adapter = (struct adapter *) arg1;
+
+ error = sysctl_handle_int(oidp, &input, 0, req);
+
+ if ((error) || (req->newptr == NULL))
+ return (error);
+
+ switch (input) {
+ case e1000_fc_rx_pause:
+ case e1000_fc_tx_pause:
+ case e1000_fc_full:
+ case e1000_fc_none:
+ adapter->hw.fc.requested_mode = input;
+ adapter->fc = input;
+ break;
+ default:
+ /* Do nothing */
+ return (error);
+ }
+
+ adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
+ e1000_force_mac_fc(&adapter->hw);
+ return (error);
+}
+
+/*
+** Manage DMA Coalesce:
+** Control values:
+** 0/1 - off/on
+** Legal timer values are:
+** 250,500,1000-10000 in thousands
+*/
+static int
+igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
+{
+ struct adapter *adapter = (struct adapter *) arg1;
+ int error;
+
+ error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
+
+ if ((error) || (req->newptr == NULL))
+ return (error);
+
+ switch (adapter->dmac) {
+ case 0:
+ /*Disabling */
+ break;
+ case 1: /* Just enable and use default */
+ adapter->dmac = 1000;
+ break;
+ case 250:
+ case 500:
+ case 1000:
+ case 2000:
+ case 3000:
+ case 4000:
+ case 5000:
+ case 6000:
+ case 7000:
+ case 8000:
+ case 9000:
+ case 10000:
+ /* Legal values - allow */
+ break;
+ default:
+ /* Do nothing, illegal value */
+ adapter->dmac = 0;
+ return (error);
+ }
+ /* Reinit the interface */
+ igb_init(adapter);
+ return (error);
+}
+
+/*
+** Manage Energy Efficient Ethernet:
+** Control values:
+** 0/1 - enabled/disabled
+*/
+static int
+igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
+{
+ struct adapter *adapter = (struct adapter *) arg1;
+ int error, value;
+
+ value = adapter->hw.dev_spec._82575.eee_disable;
+ error = sysctl_handle_int(oidp, &value, 0, req);
+ if (error || req->newptr == NULL)
+ return (error);
+ IGB_CORE_LOCK(adapter);
+ adapter->hw.dev_spec._82575.eee_disable = (value != 0);
+ igb_init_locked(adapter);
+ IGB_CORE_UNLOCK(adapter);
+ return (0);
+}