summaryrefslogtreecommitdiffstats
path: root/freebsd/sys/dev/e1000/if_em.c
diff options
context:
space:
mode:
Diffstat (limited to 'freebsd/sys/dev/e1000/if_em.c')
-rw-r--r--freebsd/sys/dev/e1000/if_em.c1759
1 files changed, 1097 insertions, 662 deletions
diff --git a/freebsd/sys/dev/e1000/if_em.c b/freebsd/sys/dev/e1000/if_em.c
index 05d7fbc2..fa34dd62 100644
--- a/freebsd/sys/dev/e1000/if_em.c
+++ b/freebsd/sys/dev/e1000/if_em.c
@@ -2,7 +2,7 @@
/******************************************************************************
- Copyright (c) 2001-2014, Intel Corporation
+ Copyright (c) 2001-2015, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -34,6 +34,8 @@
******************************************************************************/
/*$FreeBSD$*/
+#include <rtems/bsd/local/opt_em.h>
+#include <rtems/bsd/local/opt_ddb.h>
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
@@ -43,6 +45,10 @@
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
+#ifdef DDB
+#include <sys/types.h>
+#include <ddb/ddb.h>
+#endif
#if __FreeBSD_version >= 800000
#include <sys/buf_ring.h>
#endif
@@ -54,6 +60,7 @@
#include <sys/mbuf.h>
#include <sys/module.h>
#include <sys/rman.h>
+#include <sys/smp.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/sysctl.h>
@@ -65,6 +72,7 @@
#include <net/bpf.h>
#include <net/ethernet.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_arp.h>
#include <net/if_dl.h>
#include <net/if_media.h>
@@ -90,14 +98,9 @@
#include "if_em.h"
/*********************************************************************
- * Set this to one to display debug statistics
- *********************************************************************/
-int em_display_debug_stats = 0;
-
-/*********************************************************************
* Driver version:
*********************************************************************/
-char em_driver_version[] = "7.4.2";
+char em_driver_version[] = "7.6.1-k";
/*********************************************************************
* PCI Device ID Table
@@ -185,6 +188,19 @@ static em_vendor_info_t em_vendor_info_array[] =
{ 0x8086, E1000_DEV_ID_PCH_I218_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_PCH_I218_LM3, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_PCH_I218_V3, PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V, PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
+ PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
+ PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
+ PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
+ PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
/* required last entry */
{ 0, 0, 0, 0, 0}
};
@@ -207,20 +223,21 @@ static int em_shutdown(device_t);
static int em_suspend(device_t);
static int em_resume(device_t);
#ifdef EM_MULTIQUEUE
-static int em_mq_start(struct ifnet *, struct mbuf *);
-static int em_mq_start_locked(struct ifnet *,
- struct tx_ring *, struct mbuf *);
-static void em_qflush(struct ifnet *);
+static int em_mq_start(if_t, struct mbuf *);
+static int em_mq_start_locked(if_t,
+ struct tx_ring *);
+static void em_qflush(if_t);
#else
-static void em_start(struct ifnet *);
-static void em_start_locked(struct ifnet *, struct tx_ring *);
+static void em_start(if_t);
+static void em_start_locked(if_t, struct tx_ring *);
#endif
-static int em_ioctl(struct ifnet *, u_long, caddr_t);
+static int em_ioctl(if_t, u_long, caddr_t);
+static uint64_t em_get_counter(if_t, ift_counter);
static void em_init(void *);
static void em_init_locked(struct adapter *);
static void em_stop(void *);
-static void em_media_status(struct ifnet *, struct ifmediareq *);
-static int em_media_change(struct ifnet *);
+static void em_media_status(if_t, struct ifmediareq *);
+static int em_media_change(if_t);
static void em_identify_hardware(struct adapter *);
static int em_allocate_pci_resources(struct adapter *);
static int em_allocate_legacy(struct adapter *);
@@ -231,6 +248,7 @@ static void em_free_pci_resources(struct adapter *);
static void em_local_timer(void *);
static void em_reset(struct adapter *);
static int em_setup_interface(device_t, struct adapter *);
+static void em_flush_desc_rings(struct adapter *);
static void em_setup_transmit_structures(struct adapter *);
static void em_initialize_transmit_unit(struct adapter *);
@@ -253,7 +271,9 @@ static bool em_rxeof(struct rx_ring *, int, int *);
#ifndef __NO_STRICT_ALIGNMENT
static int em_fixup_rx(struct rx_ring *);
#endif
-static void em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
+static void em_setup_rxdesc(union e1000_rx_desc_extended *,
+ const struct em_rxbuffer *rxbuf);
+static void em_receive_checksum(uint32_t status, struct mbuf *);
static void em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
struct ip *, u32 *, u32 *);
static void em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
@@ -263,8 +283,8 @@ static void em_disable_promisc(struct adapter *);
static void em_set_multi(struct adapter *);
static void em_update_link_status(struct adapter *);
static void em_refresh_mbufs(struct rx_ring *, int);
-static void em_register_vlan(void *, struct ifnet *, u16);
-static void em_unregister_vlan(void *, struct ifnet *, u16);
+static void em_register_vlan(void *, if_t, u16);
+static void em_unregister_vlan(void *, if_t, u16);
static void em_setup_vlan_hw_support(struct adapter *);
static int em_xmit(struct tx_ring *, struct mbuf **);
static int em_dma_malloc(struct adapter *, bus_size_t,
@@ -299,6 +319,10 @@ static void em_handle_tx(void *context, int pending);
static void em_handle_rx(void *context, int pending);
static void em_handle_link(void *context, int pending);
+#ifdef EM_MULTIQUEUE
+static void em_enable_vectors_82574(struct adapter *);
+#endif
+
static void em_set_sysctl_value(struct adapter *, const char *,
const char *, int *, int);
static int em_set_flowcntl(SYSCTL_HANDLER_ARGS);
@@ -333,6 +357,9 @@ devclass_t em_devclass;
DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
MODULE_DEPEND(em, pci, 1, 1, 1);
MODULE_DEPEND(em, ether, 1, 1, 1);
+#ifdef DEV_NETMAP
+MODULE_DEPEND(em, netmap, 1, 1, 1);
+#endif /* DEV_NETMAP */
/*********************************************************************
* Tunable default values.
@@ -350,12 +377,16 @@ MODULE_DEPEND(em, ether, 1, 1, 1);
#define CSUM_TSO 0
#endif
+#define TSO_WORKAROUND 4
+
static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
+static int em_disable_crc_stripping = 0;
+SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
+ &em_disable_crc_stripping, 0, "Disable CRC Stripping");
+
static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
-TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
-TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
0, "Default transmit interrupt delay in usecs");
SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
@@ -363,8 +394,6 @@ SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
-TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
-TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
&em_tx_abs_int_delay_dflt, 0,
"Default transmit interrupt delay limit in usecs");
@@ -374,32 +403,39 @@ SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
static int em_rxd = EM_DEFAULT_RXD;
static int em_txd = EM_DEFAULT_TXD;
-TUNABLE_INT("hw.em.rxd", &em_rxd);
-TUNABLE_INT("hw.em.txd", &em_txd);
SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
"Number of receive descriptors per queue");
SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
"Number of transmit descriptors per queue");
static int em_smart_pwr_down = FALSE;
-TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
0, "Set to true to leave smart power down enabled on newer adapters");
/* Controls whether promiscuous also shows bad packets */
static int em_debug_sbp = FALSE;
-TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
"Show bad packets in promiscuous mode");
static int em_enable_msix = TRUE;
-TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
"Enable MSI-X interrupts");
+#ifdef EM_MULTIQUEUE
+static int em_num_queues = 1;
+SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
+ "82574 only: Number of queues to configure, 0 indicates autoconfigure");
+#endif
+
+/*
+** Global variable to store last used CPU when binding queues
+** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a
+** queue is bound to a cpu.
+*/
+static int em_last_bind_cpu = -1;
+
/* How many packets rxeof tries to clean at a time */
static int em_rx_process_limit = 100;
-TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
&em_rx_process_limit, 0,
"Maximum number of received packets to process "
@@ -407,7 +443,6 @@ SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
/* Energy efficient ethernet - default to OFF */
static int eee_setting = 1;
-TUNABLE_INT("hw.em.eee_setting", &eee_setting);
SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
"Enable Energy Efficient Ethernet");
@@ -431,10 +466,10 @@ static int
em_probe(device_t dev)
{
char adapter_name[60];
- u16 pci_vendor_id = 0;
- u16 pci_device_id = 0;
- u16 pci_subvendor_id = 0;
- u16 pci_subdevice_id = 0;
+ uint16_t pci_vendor_id = 0;
+ uint16_t pci_device_id = 0;
+ uint16_t pci_subvendor_id = 0;
+ uint16_t pci_subdevice_id = 0;
em_vendor_info_t *ent;
INIT_DEBUGOUT("em_probe: begin");
@@ -553,14 +588,34 @@ em_attach(device_t dev)
adapter->osdep.flash_bus_space_handle =
rman_get_bushandle(adapter->flash);
}
+ /*
+ ** In the new SPT device flash is not a
+ ** separate BAR, rather it is also in BAR0,
+ ** so use the same tag and an offset handle for the
+ ** FLASH read/write macros in the shared code.
+ */
+ else if (hw->mac.type == e1000_pch_spt) {
+ adapter->osdep.flash_bus_space_tag =
+ adapter->osdep.mem_bus_space_tag;
+ adapter->osdep.flash_bus_space_handle =
+ adapter->osdep.mem_bus_space_handle
+ + E1000_FLASH_BASE_ADDR;
+ }
/* Do Shared Code initialization */
- if (e1000_setup_init_funcs(hw, TRUE)) {
- device_printf(dev, "Setup of Shared code failed\n");
+ error = e1000_setup_init_funcs(hw, TRUE);
+ if (error) {
+ device_printf(dev, "Setup of Shared code failed, error %d\n",
+ error);
error = ENXIO;
goto err_pci;
}
+ /*
+ * Setup MSI/X or MSI if PCI Express
+ */
+ adapter->msix = em_setup_msix(adapter);
+
e1000_get_bus_info(hw);
/* Set up some sysctls for the tunable interrupt delays */
@@ -604,7 +659,7 @@ em_attach(device_t dev)
} else
adapter->num_tx_desc = em_txd;
- if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
+ if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
(em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
device_printf(dev, "Using %d RX descriptors instead of %d!\n",
EM_DEFAULT_RXD, em_rxd);
@@ -746,8 +801,7 @@ em_attach(device_t dev)
em_get_hw_control(adapter);
/* Tell the stack that the interface is not active */
- adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
- adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+ if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
adapter->led_dev = led_create(em_led_func, adapter,
device_get_nameunit(dev));
@@ -763,7 +817,7 @@ err_late:
em_free_transmit_structures(adapter);
em_free_receive_structures(adapter);
em_release_hw_control(adapter);
- if (adapter->ifp != NULL)
+ if (adapter->ifp != (void *)NULL)
if_free(adapter->ifp);
err_pci:
em_free_pci_resources(adapter);
@@ -787,18 +841,18 @@ static int
em_detach(device_t dev)
{
struct adapter *adapter = device_get_softc(dev);
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
INIT_DEBUGOUT("em_detach: begin");
/* Make sure VLANS are not using driver */
- if (adapter->ifp->if_vlantrunk != NULL) {
+ if (if_vlantrunkinuse(ifp)) {
device_printf(dev,"Vlan in use, detach first\n");
return (EBUSY);
}
#ifdef DEVICE_POLLING
- if (ifp->if_capenable & IFCAP_POLLING)
+ if (if_getcapenable(ifp) & IFCAP_POLLING)
ether_poll_deregister(ifp);
#endif
@@ -878,7 +932,7 @@ em_resume(device_t dev)
{
struct adapter *adapter = device_get_softc(dev);
struct tx_ring *txr = adapter->tx_rings;
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
EM_CORE_LOCK(adapter);
if (adapter->hw.mac.type == e1000_pch2lan)
@@ -886,15 +940,15 @@ em_resume(device_t dev)
em_init_locked(adapter);
em_init_manageability(adapter);
- if ((ifp->if_flags & IFF_UP) &&
- (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
+ if ((if_getflags(ifp) & IFF_UP) &&
+ (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
for (int i = 0; i < adapter->num_queues; i++, txr++) {
EM_TX_LOCK(txr);
#ifdef EM_MULTIQUEUE
if (!drbr_empty(ifp, txr->br))
- em_mq_start_locked(ifp, txr, NULL);
+ em_mq_start_locked(ifp, txr);
#else
- if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ if (!if_sendq_empty(ifp))
em_start_locked(ifp, txr);
#endif
EM_TX_UNLOCK(txr);
@@ -906,7 +960,70 @@ em_resume(device_t dev)
}
-#ifdef EM_MULTIQUEUE
+#ifndef EM_MULTIQUEUE
+static void
+em_start_locked(if_t ifp, struct tx_ring *txr)
+{
+ struct adapter *adapter = if_getsoftc(ifp);
+ struct mbuf *m_head;
+
+ EM_TX_LOCK_ASSERT(txr);
+
+ if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
+ IFF_DRV_RUNNING)
+ return;
+
+ if (!adapter->link_active)
+ return;
+
+ while (!if_sendq_empty(ifp)) {
+ /* Call cleanup if number of TX descriptors low */
+ if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
+ em_txeof(txr);
+ if (txr->tx_avail < EM_MAX_SCATTER) {
+ if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
+ break;
+ }
+ m_head = if_dequeue(ifp);
+ if (m_head == NULL)
+ break;
+ /*
+ * Encapsulation can modify our pointer, and or make it
+ * NULL on failure. In that event, we can't requeue.
+ */
+ if (em_xmit(txr, &m_head)) {
+ if (m_head == NULL)
+ break;
+ if_sendq_prepend(ifp, m_head);
+ break;
+ }
+
+ /* Mark the queue as having work */
+ if (txr->busy == EM_TX_IDLE)
+ txr->busy = EM_TX_BUSY;
+
+ /* Send a copy of the frame to the BPF listener */
+ ETHER_BPF_MTAP(ifp, m_head);
+
+ }
+
+ return;
+}
+
+static void
+em_start(if_t ifp)
+{
+ struct adapter *adapter = if_getsoftc(ifp);
+ struct tx_ring *txr = adapter->tx_rings;
+
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
+ EM_TX_LOCK(txr);
+ em_start_locked(ifp, txr);
+ EM_TX_UNLOCK(txr);
+ }
+ return;
+}
+#else /* EM_MULTIQUEUE */
/*********************************************************************
* Multiqueue Transmit routines
*
@@ -915,85 +1032,95 @@ em_resume(device_t dev)
* than do an immediate send. It is this that is an advantage
* in this driver, rather than also having multiple tx queues.
**********************************************************************/
+/*
+** Multiqueue capable stack interface
+*/
+static int
+em_mq_start(if_t ifp, struct mbuf *m)
+{
+ struct adapter *adapter = if_getsoftc(ifp);
+ struct tx_ring *txr = adapter->tx_rings;
+ unsigned int i, error;
+
+ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
+ i = m->m_pkthdr.flowid % adapter->num_queues;
+ else
+ i = curcpu % adapter->num_queues;
+
+ txr = &adapter->tx_rings[i];
+
+ error = drbr_enqueue(ifp, txr->br, m);
+ if (error)
+ return (error);
+
+ if (EM_TX_TRYLOCK(txr)) {
+ em_mq_start_locked(ifp, txr);
+ EM_TX_UNLOCK(txr);
+ } else
+ taskqueue_enqueue(txr->tq, &txr->tx_task);
+
+ return (0);
+}
+
static int
-em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
+em_mq_start_locked(if_t ifp, struct tx_ring *txr)
{
struct adapter *adapter = txr->adapter;
struct mbuf *next;
int err = 0, enq = 0;
- if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
- IFF_DRV_RUNNING || adapter->link_active == 0) {
- if (m != NULL)
- err = drbr_enqueue(ifp, txr->br, m);
- return (err);
- }
+ EM_TX_LOCK_ASSERT(txr);
- enq = 0;
- if (m != NULL) {
- err = drbr_enqueue(ifp, txr->br, m);
- if (err)
- return (err);
- }
+ if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
+ adapter->link_active == 0) {
+ return (ENETDOWN);
+ }
/* Process the queue */
while ((next = drbr_peek(ifp, txr->br)) != NULL) {
if ((err = em_xmit(txr, &next)) != 0) {
- if (next == NULL)
+ if (next == NULL) {
+ /* It was freed, move forward */
drbr_advance(ifp, txr->br);
- else
+ } else {
+ /*
+ * Still have one left, it may not be
+ * the same since the transmit function
+ * may have changed it.
+ */
drbr_putback(ifp, txr->br, next);
+ }
break;
}
drbr_advance(ifp, txr->br);
enq++;
- ifp->if_obytes += next->m_pkthdr.len;
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
if (next->m_flags & M_MCAST)
- ifp->if_omcasts++;
+ if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
ETHER_BPF_MTAP(ifp, next);
- if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+ if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
break;
}
- if (enq > 0) {
- /* Set the watchdog */
- txr->queue_status = EM_QUEUE_WORKING;
- txr->watchdog_time = ticks;
- }
+ /* Mark the queue as having work */
+ if ((enq > 0) && (txr->busy == EM_TX_IDLE))
+ txr->busy = EM_TX_BUSY;
if (txr->tx_avail < EM_MAX_SCATTER)
em_txeof(txr);
- if (txr->tx_avail < EM_MAX_SCATTER)
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+ if (txr->tx_avail < EM_MAX_SCATTER) {
+ if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
+ }
return (err);
}
/*
-** Multiqueue capable stack interface
-*/
-static int
-em_mq_start(struct ifnet *ifp, struct mbuf *m)
-{
- struct adapter *adapter = ifp->if_softc;
- struct tx_ring *txr = adapter->tx_rings;
- int error;
-
- if (EM_TX_TRYLOCK(txr)) {
- error = em_mq_start_locked(ifp, txr, m);
- EM_TX_UNLOCK(txr);
- } else
- error = drbr_enqueue(ifp, txr->br, m);
-
- return (error);
-}
-
-/*
** Flush all ring buffers
*/
static void
-em_qflush(struct ifnet *ifp)
+em_qflush(if_t ifp)
{
- struct adapter *adapter = ifp->if_softc;
+ struct adapter *adapter = if_getsoftc(ifp);
struct tx_ring *txr = adapter->tx_rings;
struct mbuf *m;
@@ -1005,69 +1132,6 @@ em_qflush(struct ifnet *ifp)
}
if_qflush(ifp);
}
-#else /* !EM_MULTIQUEUE */
-
-static void
-em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
-{
- struct adapter *adapter = ifp->if_softc;
- struct mbuf *m_head;
-
- EM_TX_LOCK_ASSERT(txr);
-
- if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
- IFF_DRV_RUNNING)
- return;
-
- if (!adapter->link_active)
- return;
-
- while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
- /* Call cleanup if number of TX descriptors low */
- if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
- em_txeof(txr);
- if (txr->tx_avail < EM_MAX_SCATTER) {
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
- break;
- }
- IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
- if (m_head == NULL)
- break;
- /*
- * Encapsulation can modify our pointer, and or make it
- * NULL on failure. In that event, we can't requeue.
- */
- if (em_xmit(txr, &m_head)) {
- if (m_head == NULL)
- break;
- IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
- break;
- }
-
- /* Send a copy of the frame to the BPF listener */
- ETHER_BPF_MTAP(ifp, m_head);
-
- /* Set timeout in case hardware has problems transmitting. */
- txr->watchdog_time = ticks;
- txr->queue_status = EM_QUEUE_WORKING;
- }
-
- return;
-}
-
-static void
-em_start(struct ifnet *ifp)
-{
- struct adapter *adapter = ifp->if_softc;
- struct tx_ring *txr = adapter->tx_rings;
-
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
- EM_TX_LOCK(txr);
- em_start_locked(ifp, txr);
- EM_TX_UNLOCK(txr);
- }
- return;
-}
#endif /* EM_MULTIQUEUE */
/*********************************************************************
@@ -1080,9 +1144,9 @@ em_start(struct ifnet *ifp)
**********************************************************************/
static int
-em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
+em_ioctl(if_t ifp, u_long command, caddr_t data)
{
- struct adapter *adapter = ifp->if_softc;
+ struct adapter *adapter = if_getsoftc(ifp);
struct ifreq *ifr = (struct ifreq *)data;
#if defined(INET) || defined(INET6)
struct ifaddr *ifa = (struct ifaddr *)data;
@@ -1108,11 +1172,11 @@ em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
** so we avoid doing it when possible.
*/
if (avoid_reset) {
- ifp->if_flags |= IFF_UP;
- if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
+ if_setflagbits(ifp,IFF_UP,0);
+ if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
em_init(adapter);
#ifdef INET
- if (!(ifp->if_flags & IFF_NOARP))
+ if (!(if_getflags(ifp) & IFF_NOARP))
arp_ifinit(ifp, ifa);
#endif
} else
@@ -1132,6 +1196,7 @@ em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
case e1000_ich10lan:
case e1000_pch2lan:
case e1000_pch_lpt:
+ case e1000_pch_spt:
case e1000_82574:
case e1000_82583:
case e1000_80003es2lan: /* 9K Jumbo Frame size */
@@ -1154,10 +1219,11 @@ em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
break;
}
- ifp->if_mtu = ifr->ifr_mtu;
+ if_setmtu(ifp, ifr->ifr_mtu);
adapter->hw.mac.max_frame_size =
- ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
- em_init_locked(adapter);
+ if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
+ em_init_locked(adapter);
EM_CORE_UNLOCK(adapter);
break;
}
@@ -1165,9 +1231,9 @@ em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
IOCTL_DEBUGOUT("ioctl rcv'd:\
SIOCSIFFLAGS (Set Interface Flags)");
EM_CORE_LOCK(adapter);
- if (ifp->if_flags & IFF_UP) {
- if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
- if ((ifp->if_flags ^ adapter->if_flags) &
+ if (if_getflags(ifp) & IFF_UP) {
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
+ if ((if_getflags(ifp) ^ adapter->if_flags) &
(IFF_PROMISC | IFF_ALLMULTI)) {
em_disable_promisc(adapter);
em_set_promisc(adapter);
@@ -1175,20 +1241,20 @@ em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
} else
em_init_locked(adapter);
} else
- if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
em_stop(adapter);
- adapter->if_flags = ifp->if_flags;
+ adapter->if_flags = if_getflags(ifp);
EM_CORE_UNLOCK(adapter);
break;
case SIOCADDMULTI:
case SIOCDELMULTI:
IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
EM_CORE_LOCK(adapter);
em_disable_intr(adapter);
em_set_multi(adapter);
#ifdef DEVICE_POLLING
- if (!(ifp->if_capenable & IFCAP_POLLING))
+ if (!(if_getcapenable(ifp) & IFCAP_POLLING))
#endif
em_enable_intr(adapter);
EM_CORE_UNLOCK(adapter);
@@ -1216,7 +1282,7 @@ em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
reinit = 0;
- mask = ifr->ifr_reqcap ^ ifp->if_capenable;
+ mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
#ifdef DEVICE_POLLING
if (mask & IFCAP_POLLING) {
if (ifr->ifr_reqcap & IFCAP_POLLING) {
@@ -1225,48 +1291,48 @@ em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
return (error);
EM_CORE_LOCK(adapter);
em_disable_intr(adapter);
- ifp->if_capenable |= IFCAP_POLLING;
+ if_setcapenablebit(ifp, IFCAP_POLLING, 0);
EM_CORE_UNLOCK(adapter);
} else {
error = ether_poll_deregister(ifp);
/* Enable interrupt even in error case */
EM_CORE_LOCK(adapter);
em_enable_intr(adapter);
- ifp->if_capenable &= ~IFCAP_POLLING;
+ if_setcapenablebit(ifp, 0, IFCAP_POLLING);
EM_CORE_UNLOCK(adapter);
}
}
#endif
if (mask & IFCAP_HWCSUM) {
- ifp->if_capenable ^= IFCAP_HWCSUM;
+ if_togglecapenable(ifp,IFCAP_HWCSUM);
reinit = 1;
}
if (mask & IFCAP_TSO4) {
- ifp->if_capenable ^= IFCAP_TSO4;
+ if_togglecapenable(ifp,IFCAP_TSO4);
reinit = 1;
}
if (mask & IFCAP_VLAN_HWTAGGING) {
- ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
+ if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
reinit = 1;
}
if (mask & IFCAP_VLAN_HWFILTER) {
- ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
+ if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
reinit = 1;
}
if (mask & IFCAP_VLAN_HWTSO) {
- ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
+ if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
reinit = 1;
}
if ((mask & IFCAP_WOL) &&
- (ifp->if_capabilities & IFCAP_WOL) != 0) {
+ (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
if (mask & IFCAP_WOL_MCAST)
- ifp->if_capenable ^= IFCAP_WOL_MCAST;
+ if_togglecapenable(ifp, IFCAP_WOL_MCAST);
if (mask & IFCAP_WOL_MAGIC)
- ifp->if_capenable ^= IFCAP_WOL_MAGIC;
+ if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
}
- if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
+ if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
em_init(adapter);
- VLAN_CAPABILITIES(ifp);
+ if_vlancap(ifp);
break;
}
@@ -1293,7 +1359,7 @@ em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
static void
em_init_locked(struct adapter *adapter)
{
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
device_t dev = adapter->dev;
INIT_DEBUGOUT("em_init: begin");
@@ -1304,7 +1370,7 @@ em_init_locked(struct adapter *adapter)
callout_stop(&adapter->timer);
/* Get the latest mac address, User can use a LAA */
- bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
+ bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
ETHER_ADDR_LEN);
/* Put the address into the Receive Address Array */
@@ -1330,11 +1396,18 @@ em_init_locked(struct adapter *adapter)
E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
/* Set hardware offload abilities */
- ifp->if_hwassist = 0;
- if (ifp->if_capenable & IFCAP_TXCSUM)
- ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
- if (ifp->if_capenable & IFCAP_TSO4)
- ifp->if_hwassist |= CSUM_TSO;
+ if_clearhwassist(ifp);
+ if (if_getcapenable(ifp) & IFCAP_TXCSUM)
+ if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
+ /*
+ ** There have proven to be problems with TSO when not
+ ** at full gigabit speed, so disable the assist automatically
+ ** when at lower speeds. -jfv
+ */
+ if (if_getcapenable(ifp) & IFCAP_TSO4) {
+ if (adapter->link_speed == SPEED_1000)
+ if_sethwassistbits(ifp, CSUM_TSO, 0);
+ }
/* Configure for OS presence */
em_init_manageability(adapter);
@@ -1366,8 +1439,8 @@ em_init_locked(struct adapter *adapter)
em_initialize_receive_unit(adapter);
/* Use real VLAN Filter support? */
- if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
- if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
+ if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
+ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
/* Use real VLAN Filter support */
em_setup_vlan_hw_support(adapter);
else {
@@ -1382,8 +1455,7 @@ em_init_locked(struct adapter *adapter)
em_set_promisc(adapter);
/* Set the interface as ACTIVE */
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
callout_reset(&adapter->timer, hz, em_local_timer, adapter);
e1000_clear_hw_cntrs_base_generic(&adapter->hw);
@@ -1403,7 +1475,7 @@ em_init_locked(struct adapter *adapter)
* Only enable interrupts if we are not polling, make sure
* they are off otherwise.
*/
- if (ifp->if_capenable & IFCAP_POLLING)
+ if (if_getcapenable(ifp) & IFCAP_POLLING)
em_disable_intr(adapter);
else
#endif /* DEVICE_POLLING */
@@ -1432,16 +1504,16 @@ em_init(void *arg)
*
*********************************************************************/
static int
-em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
+em_poll(if_t ifp, enum poll_cmd cmd, int count)
{
- struct adapter *adapter = ifp->if_softc;
+ struct adapter *adapter = if_getsoftc(ifp);
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
u32 reg_icr;
int rx_done;
EM_CORE_LOCK(adapter);
- if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+ if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
EM_CORE_UNLOCK(adapter);
return (0);
}
@@ -1464,9 +1536,9 @@ em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
em_txeof(txr);
#ifdef EM_MULTIQUEUE
if (!drbr_empty(ifp, txr->br))
- em_mq_start_locked(ifp, txr, NULL);
+ em_mq_start_locked(ifp, txr);
#else
- if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ if (!if_sendq_empty(ifp))
em_start_locked(ifp, txr);
#endif
EM_TX_UNLOCK(txr);
@@ -1485,7 +1557,7 @@ static int
em_irq_fast(void *arg)
{
struct adapter *adapter = arg;
- struct ifnet *ifp;
+ if_t ifp;
u32 reg_icr;
ifp = adapter->ifp;
@@ -1527,20 +1599,20 @@ static void
em_handle_que(void *context, int pending)
{
struct adapter *adapter = context;
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
-
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
+
EM_TX_LOCK(txr);
em_txeof(txr);
#ifdef EM_MULTIQUEUE
if (!drbr_empty(ifp, txr->br))
- em_mq_start_locked(ifp, txr, NULL);
+ em_mq_start_locked(ifp, txr);
#else
- if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ if (!if_sendq_empty(ifp))
em_start_locked(ifp, txr);
#endif
EM_TX_UNLOCK(txr);
@@ -1565,18 +1637,19 @@ em_msix_tx(void *arg)
{
struct tx_ring *txr = arg;
struct adapter *adapter = txr->adapter;
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
++txr->tx_irq;
EM_TX_LOCK(txr);
em_txeof(txr);
#ifdef EM_MULTIQUEUE
if (!drbr_empty(ifp, txr->br))
- em_mq_start_locked(ifp, txr, NULL);
+ em_mq_start_locked(ifp, txr);
#else
- if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ if (!if_sendq_empty(ifp))
em_start_locked(ifp, txr);
#endif
+
/* Reenable this interrupt */
E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
EM_TX_UNLOCK(txr);
@@ -1597,14 +1670,15 @@ em_msix_rx(void *arg)
bool more;
++rxr->rx_irq;
- if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
+ if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
return;
more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
if (more)
taskqueue_enqueue(rxr->tq, &rxr->rx_task);
- else
+ else {
/* Reenable this interrupt */
E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
+ }
return;
}
@@ -1622,12 +1696,25 @@ em_msix_link(void *arg)
++adapter->link_irq;
reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
+ if (reg_icr & E1000_ICR_RXO)
+ adapter->rx_overruns++;
+
if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
adapter->hw.mac.get_link_status = 1;
em_handle_link(adapter, 0);
} else
E1000_WRITE_REG(&adapter->hw, E1000_IMS,
EM_MSIX_LINK | E1000_IMS_LSC);
+ /*
+ ** Because we must read the ICR for this interrupt
+ ** it may clear other causes using autoclear, for
+ ** this reason we simply create a soft interrupt
+ ** for all these vectors.
+ */
+ if (reg_icr) {
+ E1000_WRITE_REG(&adapter->hw,
+ E1000_ICS, adapter->ims);
+ }
return;
}
@@ -1641,9 +1728,10 @@ em_handle_rx(void *context, int pending)
more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
if (more)
taskqueue_enqueue(rxr->tq, &rxr->rx_task);
- else
+ else {
/* Reenable this interrupt */
E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
+ }
}
static void
@@ -1651,15 +1739,15 @@ em_handle_tx(void *context, int pending)
{
struct tx_ring *txr = context;
struct adapter *adapter = txr->adapter;
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
EM_TX_LOCK(txr);
em_txeof(txr);
#ifdef EM_MULTIQUEUE
if (!drbr_empty(ifp, txr->br))
- em_mq_start_locked(ifp, txr, NULL);
+ em_mq_start_locked(ifp, txr);
#else
- if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ if (!if_sendq_empty(ifp))
em_start_locked(ifp, txr);
#endif
E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
@@ -1671,9 +1759,9 @@ em_handle_link(void *context, int pending)
{
struct adapter *adapter = context;
struct tx_ring *txr = adapter->tx_rings;
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
- if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
+ if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
return;
EM_CORE_LOCK(adapter);
@@ -1687,9 +1775,9 @@ em_handle_link(void *context, int pending)
EM_TX_LOCK(txr);
#ifdef EM_MULTIQUEUE
if (!drbr_empty(ifp, txr->br))
- em_mq_start_locked(ifp, txr, NULL);
+ em_mq_start_locked(ifp, txr);
#else
- if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+ if (if_sendq_empty(ifp))
em_start_locked(ifp, txr);
#endif
EM_TX_UNLOCK(txr);
@@ -1708,9 +1796,9 @@ em_handle_link(void *context, int pending)
*
**********************************************************************/
static void
-em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
+em_media_status(if_t ifp, struct ifmediareq *ifmr)
{
- struct adapter *adapter = ifp->if_softc;
+ struct adapter *adapter = if_getsoftc(ifp);
u_char fiber_type = IFM_1000_SX;
INIT_DEBUGOUT("em_media_status: begin");
@@ -1760,9 +1848,9 @@ em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
*
**********************************************************************/
static int
-em_media_change(struct ifnet *ifp)
+em_media_change(if_t ifp)
{
- struct adapter *adapter = ifp->if_softc;
+ struct adapter *adapter = if_getsoftc(ifp);
struct ifmedia *ifm = &adapter->media;
INIT_DEBUGOUT("em_media_change: begin");
@@ -1821,20 +1909,21 @@ em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
struct adapter *adapter = txr->adapter;
bus_dma_segment_t segs[EM_MAX_SCATTER];
bus_dmamap_t map;
- struct em_buffer *tx_buffer, *tx_buffer_mapped;
+ struct em_txbuffer *tx_buffer, *tx_buffer_mapped;
struct e1000_tx_desc *ctxd = NULL;
struct mbuf *m_head;
struct ether_header *eh;
struct ip *ip = NULL;
struct tcphdr *tp = NULL;
- u32 txd_upper, txd_lower, txd_used, txd_saved;
+ u32 txd_upper = 0, txd_lower = 0;
int ip_off, poff;
int nsegs, i, j, first, last = 0;
- int error, do_tso, tso_desc = 0, remap = 1;
+ int error;
+ bool do_tso, tso_desc, remap = TRUE;
m_head = *m_headp;
- txd_upper = txd_lower = txd_used = txd_saved = 0;
- do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
+ do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO);
+ tso_desc = FALSE;
ip_off = poff = 0;
/*
@@ -1849,7 +1938,7 @@ em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
* so we firstly get a writable mbuf chain then coalesce ethernet/
* IP/TCP header into a single buffer to meet the requirement of
* controller. This also simplifies IP/TCP/UDP checksum offloading
- * which also has similiar restrictions.
+ * which also has similar restrictions.
*/
if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
if (do_tso || (m_head->m_next != NULL &&
@@ -1870,74 +1959,82 @@ em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
* for IPv6 yet.
*/
ip_off = sizeof(struct ether_header);
- m_head = m_pullup(m_head, ip_off);
- if (m_head == NULL) {
- *m_headp = NULL;
- return (ENOBUFS);
+ if (m_head->m_len < ip_off) {
+ m_head = m_pullup(m_head, ip_off);
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
}
eh = mtod(m_head, struct ether_header *);
if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
ip_off = sizeof(struct ether_vlan_header);
- m_head = m_pullup(m_head, ip_off);
+ if (m_head->m_len < ip_off) {
+ m_head = m_pullup(m_head, ip_off);
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
+ }
+ }
+ if (m_head->m_len < ip_off + sizeof(struct ip)) {
+ m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
if (m_head == NULL) {
*m_headp = NULL;
return (ENOBUFS);
}
}
- m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
- if (m_head == NULL) {
- *m_headp = NULL;
- return (ENOBUFS);
- }
ip = (struct ip *)(mtod(m_head, char *) + ip_off);
poff = ip_off + (ip->ip_hl << 2);
- if (do_tso) {
- m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
- if (m_head == NULL) {
- *m_headp = NULL;
- return (ENOBUFS);
+
+ if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
+ if (m_head->m_len < poff + sizeof(struct tcphdr)) {
+ m_head = m_pullup(m_head, poff +
+ sizeof(struct tcphdr));
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
}
tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
/*
* TSO workaround:
* pull 4 more bytes of data into it.
*/
- m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
- if (m_head == NULL) {
- *m_headp = NULL;
- return (ENOBUFS);
+ if (m_head->m_len < poff + (tp->th_off << 2)) {
+ m_head = m_pullup(m_head, poff +
+ (tp->th_off << 2) +
+ TSO_WORKAROUND);
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
}
ip = (struct ip *)(mtod(m_head, char *) + ip_off);
- ip->ip_len = 0;
- ip->ip_sum = 0;
- /*
- * The pseudo TCP checksum does not include TCP payload
- * length so driver should recompute the checksum here
- * what hardware expect to see. This is adherence of
- * Microsoft's Large Send specification.
- */
tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
- tp->th_sum = in_pseudo(ip->ip_src.s_addr,
- ip->ip_dst.s_addr, htons(IPPROTO_TCP));
- } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
- m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
- if (m_head == NULL) {
- *m_headp = NULL;
- return (ENOBUFS);
- }
- tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
- m_head = m_pullup(m_head, poff + (tp->th_off << 2));
- if (m_head == NULL) {
- *m_headp = NULL;
- return (ENOBUFS);
+ if (do_tso) {
+ ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
+ (ip->ip_hl << 2) +
+ (tp->th_off << 2));
+ ip->ip_sum = 0;
+ /*
+ * The pseudo TCP checksum does not include TCP
+ * payload length so driver should recompute
+ * the checksum here what hardware expect to
+ * see. This is adherence of Microsoft's Large
+ * Send specification.
+ */
+ tp->th_sum = in_pseudo(ip->ip_src.s_addr,
+ ip->ip_dst.s_addr, htons(IPPROTO_TCP));
}
- ip = (struct ip *)(mtod(m_head, char *) + ip_off);
- tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
- m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
- if (m_head == NULL) {
- *m_headp = NULL;
- return (ENOBUFS);
+ if (m_head->m_len < poff + sizeof(struct udphdr)) {
+ m_head = m_pullup(m_head, poff +
+ sizeof(struct udphdr));
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
}
ip = (struct ip *)(mtod(m_head, char *) + ip_off);
}
@@ -1973,9 +2070,9 @@ retry:
if (error == EFBIG && remap) {
struct mbuf *m;
- m = m_defrag(*m_headp, M_NOWAIT);
+ m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
if (m == NULL) {
- adapter->mbuf_alloc_failed++;
+ adapter->mbuf_defrag_failed++;
m_freem(*m_headp);
*m_headp = NULL;
return (ENOBUFS);
@@ -1983,11 +2080,8 @@ retry:
*m_headp = m;
/* Try it again, but only once */
- remap = 0;
+ remap = FALSE;
goto retry;
- } else if (error == ENOMEM) {
- adapter->no_tx_dma_setup++;
- return (error);
} else if (error != 0) {
adapter->no_tx_dma_setup++;
m_freem(*m_headp);
@@ -2001,13 +2095,13 @@ retry:
* it follows a TSO burst, then we need to add a
* sentinel descriptor to prevent premature writeback.
*/
- if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
+ if ((!do_tso) && (txr->tx_tso == TRUE)) {
if (nsegs == 1)
tso_desc = TRUE;
txr->tx_tso = FALSE;
}
- if (nsegs > (txr->tx_avail - 2)) {
+ if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) {
txr->no_desc_avail++;
bus_dmamap_unload(txr->txtag, map);
return (ENOBUFS);
@@ -2026,8 +2120,7 @@ retry:
if (m_head->m_flags & M_VLANTAG) {
/* Set the vlan id. */
- txd_upper |=
- (htole16(m_head->m_pkthdr.ether_vtag) << 16);
+ txd_upper |= htole16(if_getvtag(m_head)) << 16;
/* Tell hardware to add tag */
txd_lower |= htole32(E1000_TXD_CMD_VLE);
}
@@ -2048,23 +2141,23 @@ retry:
** If this is the last descriptor, we want to
** split it so we have a small final sentinel
*/
- if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
- seg_len -= 4;
+ if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
+ seg_len -= TSO_WORKAROUND;
ctxd->buffer_addr = htole64(seg_addr);
ctxd->lower.data = htole32(
- adapter->txd_cmd | txd_lower | seg_len);
- ctxd->upper.data =
- htole32(txd_upper);
+ adapter->txd_cmd | txd_lower | seg_len);
+ ctxd->upper.data = htole32(txd_upper);
if (++i == adapter->num_tx_desc)
i = 0;
+
/* Now make the sentinel */
- ++txd_used; /* using an extra txd */
+ txr->tx_avail--;
ctxd = &txr->tx_base[i];
tx_buffer = &txr->tx_buffers[i];
ctxd->buffer_addr =
htole64(seg_addr + seg_len);
ctxd->lower.data = htole32(
- adapter->txd_cmd | txd_lower | 4);
+ adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
ctxd->upper.data =
htole32(txd_upper);
last = i;
@@ -2074,8 +2167,7 @@ retry:
ctxd->buffer_addr = htole64(seg_addr);
ctxd->lower.data = htole32(
adapter->txd_cmd | txd_lower | seg_len);
- ctxd->upper.data =
- htole32(txd_upper);
+ ctxd->upper.data = htole32(txd_upper);
last = i;
if (++i == adapter->num_tx_desc)
i = 0;
@@ -2086,8 +2178,6 @@ retry:
txr->next_avail_desc = i;
txr->tx_avail -= nsegs;
- if (tso_desc) /* TSO used an extra for sentinel */
- txr->tx_avail -= txd_used;
tx_buffer->m_head = m_head;
/*
@@ -2113,8 +2203,6 @@ retry:
*/
tx_buffer = &txr->tx_buffers[first];
tx_buffer->next_eop = last;
- /* Update the watchdog time early and often */
- txr->watchdog_time = ticks;
/*
* Advance the Transmit Descriptor Tail (TDT), this tells the E1000
@@ -2130,18 +2218,18 @@ retry:
static void
em_set_promisc(struct adapter *adapter)
{
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
u32 reg_rctl;
reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
- if (ifp->if_flags & IFF_PROMISC) {
+ if (if_getflags(ifp) & IFF_PROMISC) {
reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
/* Turn this on if you want to see bad packets */
if (em_debug_sbp)
reg_rctl |= E1000_RCTL_SBP;
E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
- } else if (ifp->if_flags & IFF_ALLMULTI) {
+ } else if (if_getflags(ifp) & IFF_ALLMULTI) {
reg_rctl |= E1000_RCTL_MPE;
reg_rctl &= ~E1000_RCTL_UPE;
E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
@@ -2151,34 +2239,16 @@ em_set_promisc(struct adapter *adapter)
static void
em_disable_promisc(struct adapter *adapter)
{
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
u32 reg_rctl;
int mcnt = 0;
reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
reg_rctl &= (~E1000_RCTL_UPE);
- if (ifp->if_flags & IFF_ALLMULTI)
+ if (if_getflags(ifp) & IFF_ALLMULTI)
mcnt = MAX_NUM_MULTICAST_ADDRESSES;
- else {
- struct ifmultiaddr *ifma;
-#if __FreeBSD_version < 800000
- IF_ADDR_LOCK(ifp);
-#else
- if_maddr_rlock(ifp);
-#endif
- TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
- if (ifma->ifma_addr->sa_family != AF_LINK)
- continue;
- if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
- break;
- mcnt++;
- }
-#if __FreeBSD_version < 800000
- IF_ADDR_UNLOCK(ifp);
-#else
- if_maddr_runlock(ifp);
-#endif
- }
+ else
+ mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
/* Don't disable if in MAX groups */
if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
reg_rctl &= (~E1000_RCTL_MPE);
@@ -2197,8 +2267,7 @@ em_disable_promisc(struct adapter *adapter)
static void
em_set_multi(struct adapter *adapter)
{
- struct ifnet *ifp = adapter->ifp;
- struct ifmultiaddr *ifma;
+ if_t ifp = adapter->ifp;
u32 reg_rctl = 0;
u8 *mta; /* Multicast array memory */
int mcnt = 0;
@@ -2218,27 +2287,8 @@ em_set_multi(struct adapter *adapter)
msec_delay(5);
}
-#if __FreeBSD_version < 800000
- IF_ADDR_LOCK(ifp);
-#else
- if_maddr_rlock(ifp);
-#endif
- TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
- if (ifma->ifma_addr->sa_family != AF_LINK)
- continue;
-
- if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
- break;
+ if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
- bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
- &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
- mcnt++;
- }
-#if __FreeBSD_version < 800000
- IF_ADDR_UNLOCK(ifp);
-#else
- if_maddr_runlock(ifp);
-#endif
if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
reg_rctl |= E1000_RCTL_MPE;
@@ -2269,10 +2319,10 @@ static void
em_local_timer(void *arg)
{
struct adapter *adapter = arg;
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
- u32 trigger;
+ u32 trigger = 0;
EM_CORE_LOCK_ASSERT(adapter);
@@ -2285,9 +2335,11 @@ em_local_timer(void *arg)
e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
/* Mask to use in the irq trigger */
- if (adapter->msix_mem)
- trigger = rxr->ims;
- else
+ if (adapter->msix_mem) {
+ for (int i = 0; i < adapter->num_queues; i++, rxr++)
+ trigger |= rxr->ims;
+ rxr = adapter->rx_rings;
+ } else
trigger = E1000_ICS_RXDMT0;
/*
@@ -2296,15 +2348,15 @@ em_local_timer(void *arg)
** and the HUNG state will be static if set.
*/
for (int i = 0; i < adapter->num_queues; i++, txr++) {
- if ((txr->queue_status == EM_QUEUE_HUNG) &&
- (adapter->pause_frames == 0))
+ if (txr->busy == EM_TX_HUNG)
goto hung;
+ if (txr->busy >= EM_TX_MAXTRIES)
+ txr->busy = EM_TX_HUNG;
/* Schedule a TX tasklet if needed */
if (txr->tx_avail <= EM_MAX_SCATTER)
taskqueue_enqueue(txr->tq, &txr->tx_task);
}
- adapter->pause_frames = 0;
callout_reset(&adapter->timer, hz, em_local_timer, adapter);
#ifndef DEVICE_POLLING
/* Trigger an RX interrupt to guarantee mbuf refresh */
@@ -2313,17 +2365,11 @@ em_local_timer(void *arg)
return;
hung:
/* Looks like we're hung */
- device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
- device_printf(adapter->dev,
- "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
- E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
- E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
- device_printf(adapter->dev,"TX(%d) desc avail = %d,"
- "Next TX to Clean = %d\n",
- txr->me, txr->tx_avail, txr->next_to_clean);
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
+ txr->me);
+ em_print_debug_info(adapter);
+ if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
adapter->watchdog_events++;
- adapter->pause_frames = 0;
em_init_locked(adapter);
}
@@ -2332,7 +2378,7 @@ static void
em_update_link_status(struct adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
device_t dev = adapter->dev;
struct tx_ring *txr = adapter->tx_rings;
u32 link_check = 0;
@@ -2341,6 +2387,8 @@ em_update_link_status(struct adapter *adapter)
switch (hw->phy.media_type) {
case e1000_media_type_copper:
if (hw->mac.get_link_status) {
+ if (hw->mac.type == e1000_pch_spt)
+ msec_delay(50);
/* Do the work to read phy */
e1000_check_for_link(hw);
link_check = !hw->mac.get_link_status;
@@ -2373,7 +2421,7 @@ em_update_link_status(struct adapter *adapter)
(hw->mac.type == e1000_82572))) {
int tarc0;
tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
- tarc0 &= ~SPEED_MODE_BIT;
+ tarc0 &= ~TARC_SPEED_MODE_BIT;
E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
}
if (bootverbose)
@@ -2383,17 +2431,18 @@ em_update_link_status(struct adapter *adapter)
"Full Duplex" : "Half Duplex"));
adapter->link_active = 1;
adapter->smartspeed = 0;
- ifp->if_baudrate = adapter->link_speed * 1000000;
+ if_setbaudrate(ifp, adapter->link_speed * 1000000);
if_link_state_change(ifp, LINK_STATE_UP);
} else if (!link_check && (adapter->link_active == 1)) {
- ifp->if_baudrate = adapter->link_speed = 0;
+ if_setbaudrate(ifp, 0);
+ adapter->link_speed = 0;
adapter->link_duplex = 0;
if (bootverbose)
device_printf(dev, "Link is Down\n");
adapter->link_active = 0;
- /* Link down, disable watchdog */
+ /* Link down, disable hang detection */
for (int i = 0; i < adapter->num_queues; i++, txr++)
- txr->queue_status = EM_QUEUE_IDLE;
+ txr->busy = EM_TX_IDLE;
if_link_state_change(ifp, LINK_STATE_DOWN);
}
}
@@ -2411,7 +2460,7 @@ static void
em_stop(void *arg)
{
struct adapter *adapter = arg;
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
struct tx_ring *txr = adapter->tx_rings;
EM_CORE_LOCK_ASSERT(adapter);
@@ -2422,16 +2471,19 @@ em_stop(void *arg)
callout_stop(&adapter->timer);
/* Tell the stack that the interface is no longer active */
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+ if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
- /* Unarm watchdog timer. */
+ /* Disarm Hang Detection. */
for (int i = 0; i < adapter->num_queues; i++, txr++) {
EM_TX_LOCK(txr);
- txr->queue_status = EM_QUEUE_IDLE;
+ txr->busy = EM_TX_IDLE;
EM_TX_UNLOCK(txr);
}
+ /* I219 needs some special flushing to avoid hangs */
+ if (adapter->hw.mac.type == e1000_pch_spt)
+ em_flush_desc_rings(adapter);
+
e1000_reset_hw(&adapter->hw);
E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
@@ -2489,14 +2541,6 @@ em_allocate_pci_resources(struct adapter *adapter)
rman_get_bushandle(adapter->memory);
adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
- /* Default to a single queue */
- adapter->num_queues = 1;
-
- /*
- * Setup MSI/X or MSI if PCI Express
- */
- adapter->msix = em_setup_msix(adapter);
-
adapter->hw.back = &adapter->osdep;
return (0);
@@ -2560,7 +2604,7 @@ em_allocate_legacy(struct adapter *adapter)
*
* Setup the MSIX Interrupt handlers
* This is not really Multiqueue, rather
- * its just seperate interrupt vectors
+ * its just separate interrupt vectors
* for TX, RX, and Link.
*
**********************************************************************/
@@ -2571,13 +2615,14 @@ em_allocate_msix(struct adapter *adapter)
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
int error, rid, vector = 0;
+ int cpu_id = 0;
/* Make sure all interrupts are disabled */
E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
/* First set up ring resources */
- for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
+ for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
/* RX ring */
rid = vector + 1;
@@ -2597,14 +2642,20 @@ em_allocate_msix(struct adapter *adapter)
return (error);
}
#if __FreeBSD_version >= 800504
- bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
+ bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
#endif
- rxr->msix = vector++; /* NOTE increment vector for TX */
+ rxr->msix = vector;
+
+ if (em_last_bind_cpu < 0)
+ em_last_bind_cpu = CPU_FIRST();
+ cpu_id = em_last_bind_cpu;
+ bus_bind_intr(dev, rxr->res, cpu_id);
+
TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
taskqueue_thread_enqueue, &rxr->tq);
- taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
- device_get_nameunit(adapter->dev));
+ taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
+ device_get_nameunit(adapter->dev), cpu_id);
/*
** Set the bit to enable interrupt
** in E1000_IMS -- bits 20 and 21
@@ -2612,8 +2663,13 @@ em_allocate_msix(struct adapter *adapter)
** NOTHING to do with the MSIX vector
*/
rxr->ims = 1 << (20 + i);
+ adapter->ims |= rxr->ims;
adapter->ivars |= (8 | rxr->msix) << (i * 4);
+ em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
+ }
+
+ for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
/* TX ring */
rid = vector + 1;
txr->res = bus_alloc_resource_any(dev,
@@ -2631,14 +2687,20 @@ em_allocate_msix(struct adapter *adapter)
return (error);
}
#if __FreeBSD_version >= 800504
- bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
+ bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
#endif
- txr->msix = vector++; /* Increment vector for next pass */
+ txr->msix = vector;
+
+ if (em_last_bind_cpu < 0)
+ em_last_bind_cpu = CPU_FIRST();
+ cpu_id = em_last_bind_cpu;
+ bus_bind_intr(dev, txr->res, cpu_id);
+
TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
taskqueue_thread_enqueue, &txr->tq);
- taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
- device_get_nameunit(adapter->dev));
+ taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
+ device_get_nameunit(adapter->dev), cpu_id);
/*
** Set the bit to enable interrupt
** in E1000_IMS -- bits 22 and 23
@@ -2646,13 +2708,16 @@ em_allocate_msix(struct adapter *adapter)
** NOTHING to do with the MSIX vector
*/
txr->ims = 1 << (22 + i);
+ adapter->ims |= txr->ims;
adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
+
+ em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
}
/* Link interrupt */
- ++rid;
+ rid = vector + 1;
adapter->res = bus_alloc_resource_any(dev,
- SYS_RES_IRQ, &rid, RF_ACTIVE);
+ SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
if (!adapter->res) {
device_printf(dev,"Unable to allocate "
"bus resource: Link interrupt [%d]\n", rid);
@@ -2668,7 +2733,7 @@ em_allocate_msix(struct adapter *adapter)
return (error);
}
#if __FreeBSD_version >= 800504
- bus_describe_intr(dev, adapter->res, adapter->tag, "link");
+ bus_describe_intr(dev, adapter->res, adapter->tag, "link");
#endif
adapter->linkvec = vector;
adapter->ivars |= (8 | vector) << 16;
@@ -2692,9 +2757,8 @@ em_free_pci_resources(struct adapter *adapter)
*/
for (int i = 0; i < adapter->num_queues; i++) {
txr = &adapter->tx_rings[i];
- rxr = &adapter->rx_rings[i];
/* an early abort? */
- if ((txr == NULL) || (rxr == NULL))
+ if (txr == NULL)
break;
rid = txr->msix +1;
if (txr->tag != NULL) {
@@ -2704,6 +2768,11 @@ em_free_pci_resources(struct adapter *adapter)
if (txr->res != NULL)
bus_release_resource(dev, SYS_RES_IRQ,
rid, txr->res);
+
+ rxr = &adapter->rx_rings[i];
+ /* an early abort? */
+ if (rxr == NULL)
+ break;
rid = rxr->msix +1;
if (rxr->tag != NULL) {
bus_teardown_intr(dev, rxr->res, rxr->tag);
@@ -2753,14 +2822,19 @@ em_setup_msix(struct adapter *adapter)
device_t dev = adapter->dev;
int val;
+ /* Nearly always going to use one queue */
+ adapter->num_queues = 1;
+
/*
- ** Setup MSI/X for Hartwell: tests have shown
- ** use of two queues to be unstable, and to
- ** provide no great gain anyway, so we simply
- ** seperate the interrupts and use a single queue.
+ ** Try using MSI-X for Hartwell adapters
*/
if ((adapter->hw.mac.type == e1000_82574) &&
(em_enable_msix == TRUE)) {
+#ifdef EM_MULTIQUEUE
+ adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
+ if (adapter->num_queues > 1)
+ em_enable_vectors_82574(adapter);
+#endif
/* Map the MSIX BAR */
int rid = PCIR_BAR(EM_MSIX_BAR);
adapter->msix_mem = bus_alloc_resource_any(dev,
@@ -2772,16 +2846,34 @@ em_setup_msix(struct adapter *adapter)
goto msi;
}
val = pci_msix_count(dev);
- /* We only need/want 3 vectors */
- if (val >= 3)
- val = 3;
- else {
- device_printf(adapter->dev,
- "MSIX: insufficient vectors, using MSI\n");
- goto msi;
+
+#ifdef EM_MULTIQUEUE
+ /* We need 5 vectors in the multiqueue case */
+ if (adapter->num_queues > 1 ) {
+ if (val >= 5)
+ val = 5;
+ else {
+ adapter->num_queues = 1;
+ device_printf(adapter->dev,
+ "Insufficient MSIX vectors for >1 queue, "
+ "using single queue...\n");
+ goto msix_one;
+ }
+ } else {
+msix_one:
+#endif
+ if (val >= 3)
+ val = 3;
+ else {
+ device_printf(adapter->dev,
+ "Insufficient MSIX vectors, using MSI\n");
+ goto msi;
+ }
+#ifdef EM_MULTIQUEUE
}
+#endif
- if ((pci_alloc_msix(dev, &val) == 0) && (val == 3)) {
+ if ((pci_alloc_msix(dev, &val) == 0)) {
device_printf(adapter->dev,
"Using MSIX interrupts "
"with %d vectors\n", val);
@@ -2802,7 +2894,7 @@ msi:
}
val = 1;
if (pci_alloc_msi(dev, &val) == 0) {
- device_printf(adapter->dev,"Using an MSI interrupt\n");
+ device_printf(adapter->dev, "Using an MSI interrupt\n");
return (val);
}
/* Should only happen due to manual configuration */
@@ -2811,6 +2903,116 @@ msi:
}
+/*
+** The 3 following flush routines are used as a workaround in the
+** I219 client parts and only for them.
+**
+** em_flush_tx_ring - remove all descriptors from the tx_ring
+**
+** We want to clear all pending descriptors from the TX ring.
+** zeroing happens when the HW reads the regs. We assign the ring itself as
+** the data of the next descriptor. We don't care about the data we are about
+** to reset the HW.
+*/
+static void
+em_flush_tx_ring(struct adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ struct tx_ring *txr = adapter->tx_rings;
+ struct e1000_tx_desc *txd;
+ u32 tctl, txd_lower = E1000_TXD_CMD_IFCS;
+ u16 size = 512;
+
+ tctl = E1000_READ_REG(hw, E1000_TCTL);
+ E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
+
+ txd = &txr->tx_base[txr->next_avail_desc++];
+ if (txr->next_avail_desc == adapter->num_tx_desc)
+ txr->next_avail_desc = 0;
+
+ /* Just use the ring as a dummy buffer addr */
+ txd->buffer_addr = txr->txdma.dma_paddr;
+ txd->lower.data = htole32(txd_lower | size);
+ txd->upper.data = 0;
+
+ /* flush descriptors to memory before notifying the HW */
+ wmb();
+
+ E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
+ mb();
+ usec_delay(250);
+}
+
+/*
+** em_flush_rx_ring - remove all descriptors from the rx_ring
+**
+** Mark all descriptors in the RX ring as consumed and disable the rx ring
+*/
+static void
+em_flush_rx_ring(struct adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 rctl, rxdctl;
+
+ rctl = E1000_READ_REG(hw, E1000_RCTL);
+ E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
+ E1000_WRITE_FLUSH(hw);
+ usec_delay(150);
+
+ rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
+ /* zero the lower 14 bits (prefetch and host thresholds) */
+ rxdctl &= 0xffffc000;
+ /*
+ * update thresholds: prefetch threshold to 31, host threshold to 1
+ * and make sure the granularity is "descriptors" and not "cache lines"
+ */
+ rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
+ E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
+
+ /* momentarily enable the RX ring for the changes to take effect */
+ E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
+ E1000_WRITE_FLUSH(hw);
+ usec_delay(150);
+ E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
+}
+
+/*
+** em_flush_desc_rings - remove all descriptors from the descriptor rings
+**
+** In i219, the descriptor rings must be emptied before resetting the HW
+** or before changing the device state to D3 during runtime (runtime PM).
+**
+** Failure to do this will cause the HW to enter a unit hang state which can
+** only be released by PCI reset on the device
+**
+*/
+static void
+em_flush_desc_rings(struct adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ device_t dev = adapter->dev;
+ u16 hang_state;
+ u32 fext_nvm11, tdlen;
+
+ /* First, disable MULR fix in FEXTNVM11 */
+ fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
+ fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
+ E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
+
+ /* do nothing if we're not in faulty state, or if the queue is empty */
+ tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
+ hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
+ if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
+ return;
+ em_flush_tx_ring(adapter);
+
+ /* recheck, maybe the fault is caused by the rx ring */
+ hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
+ if (hang_state & FLUSH_DESC_REQUIRED)
+ em_flush_rx_ring(adapter);
+}
+
+
/*********************************************************************
*
* Initialize the hardware to a configuration
@@ -2821,7 +3023,7 @@ static void
em_reset(struct adapter *adapter)
{
device_t dev = adapter->dev;
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
struct e1000_hw *hw = &adapter->hw;
u16 rx_buffer_size;
u32 pba;
@@ -2872,6 +3074,7 @@ em_reset(struct adapter *adapter)
case e1000_pchlan:
case e1000_pch2lan:
case e1000_pch_lpt:
+ case e1000_pch_spt:
pba = E1000_PBA_26K;
break;
default:
@@ -2889,7 +3092,7 @@ em_reset(struct adapter *adapter)
* received after sending an XOFF.
* - Low water mark works best when it is very near the high water mark.
* This allows the receiver to restart by sending XON when it has
- * drained a bit. Here we use an arbitary value of 1500 which will
+ * drained a bit. Here we use an arbitrary value of 1500 which will
* restart after one full frame is pulled from the buffer. There
* could be several smaller frames in the buffer and if so they will
* not trigger the XON until their total number reduces the buffer
@@ -2919,7 +3122,7 @@ em_reset(struct adapter *adapter)
/* Workaround: no TX flow ctrl for PCH */
hw->fc.requested_mode = e1000_fc_rx_pause;
hw->fc.pause_time = 0xFFFF; /* override */
- if (ifp->if_mtu > ETHERMTU) {
+ if (if_getmtu(ifp) > ETHERMTU) {
hw->fc.high_water = 0x3500;
hw->fc.low_water = 0x1500;
} else {
@@ -2930,19 +3133,20 @@ em_reset(struct adapter *adapter)
break;
case e1000_pch2lan:
case e1000_pch_lpt:
+ case e1000_pch_spt:
hw->fc.high_water = 0x5C20;
hw->fc.low_water = 0x5048;
hw->fc.pause_time = 0x0650;
hw->fc.refresh_time = 0x0400;
/* Jumbos need adjusted PBA */
- if (ifp->if_mtu > ETHERMTU)
+ if (if_getmtu(ifp) > ETHERMTU)
E1000_WRITE_REG(hw, E1000_PBA, 12);
else
E1000_WRITE_REG(hw, E1000_PBA, 26);
break;
case e1000_ich9lan:
case e1000_ich10lan:
- if (ifp->if_mtu > ETHERMTU) {
+ if (if_getmtu(ifp) > ETHERMTU) {
hw->fc.high_water = 0x2800;
hw->fc.low_water = hw->fc.high_water - 8;
break;
@@ -2954,6 +3158,10 @@ em_reset(struct adapter *adapter)
break;
}
+ /* I219 needs some special flushing to avoid hangs */
+ if (hw->mac.type == e1000_pch_spt)
+ em_flush_desc_rings(adapter);
+
/* Issue a global reset */
e1000_reset_hw(hw);
E1000_WRITE_REG(hw, E1000_WUC, 0);
@@ -2978,47 +3186,55 @@ em_reset(struct adapter *adapter)
static int
em_setup_interface(device_t dev, struct adapter *adapter)
{
- struct ifnet *ifp;
+ if_t ifp;
INIT_DEBUGOUT("em_setup_interface: begin");
- ifp = adapter->ifp = if_alloc(IFT_ETHER);
- if (ifp == NULL) {
+ ifp = adapter->ifp = if_gethandle(IFT_ETHER);
+ if (ifp == 0) {
device_printf(dev, "can not allocate ifnet structure\n");
return (-1);
}
if_initname(ifp, device_get_name(dev), device_get_unit(dev));
- ifp->if_init = em_init;
- ifp->if_softc = adapter;
- ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
- ifp->if_ioctl = em_ioctl;
+ if_setdev(ifp, dev);
+ if_setinitfn(ifp, em_init);
+ if_setsoftc(ifp, adapter);
+ if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
+ if_setioctlfn(ifp, em_ioctl);
+ if_setgetcounterfn(ifp, em_get_counter);
+
+ /* TSO parameters */
+ ifp->if_hw_tsomax = IP_MAXPACKET;
+ /* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
+ ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5;
+ ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
+
#ifdef EM_MULTIQUEUE
/* Multiqueue stack interface */
- ifp->if_transmit = em_mq_start;
- ifp->if_qflush = em_qflush;
+ if_settransmitfn(ifp, em_mq_start);
+ if_setqflushfn(ifp, em_qflush);
#else
- ifp->if_start = em_start;
- IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
- ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
- IFQ_SET_READY(&ifp->if_snd);
+ if_setstartfn(ifp, em_start);
+ if_setsendqlen(ifp, adapter->num_tx_desc - 1);
+ if_setsendqready(ifp);
#endif
ether_ifattach(ifp, adapter->hw.mac.addr);
- ifp->if_capabilities = ifp->if_capenable = 0;
+ if_setcapabilities(ifp, 0);
+ if_setcapenable(ifp, 0);
- ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
- ifp->if_capabilities |= IFCAP_TSO4;
+ if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
+ IFCAP_TSO4, 0);
/*
* Tell the upper layer(s) we
* support full VLAN capability
*/
- ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
- ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
- | IFCAP_VLAN_HWTSO
- | IFCAP_VLAN_MTU;
- ifp->if_capenable = ifp->if_capabilities;
+ if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
+ if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
+ IFCAP_VLAN_MTU, 0);
+ if_setcapenable(ifp, if_getcapabilities(ifp));
/*
** Don't turn this on by default, if vlans are
@@ -3028,16 +3244,16 @@ em_setup_interface(device_t dev, struct adapter *adapter)
** using vlans directly on the em driver you can
** enable this and get full hardware tag filtering.
*/
- ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
+ if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
#ifdef DEVICE_POLLING
- ifp->if_capabilities |= IFCAP_POLLING;
+ if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
#endif
/* Enable only WOL MAGIC by default */
if (adapter->wol) {
- ifp->if_capabilities |= IFCAP_WOL;
- ifp->if_capenable |= IFCAP_WOL_MAGIC;
+ if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
+ if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
}
/*
@@ -3137,7 +3353,6 @@ fail_2:
bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
bus_dma_tag_destroy(dma->dma_tag);
fail_0:
- dma->dma_map = NULL;
dma->dma_tag = NULL;
return (error);
@@ -3148,12 +3363,15 @@ em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
{
if (dma->dma_tag == NULL)
return;
- if (dma->dma_map != NULL) {
+ if (dma->dma_paddr != 0) {
bus_dmamap_sync(dma->dma_tag, dma->dma_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(dma->dma_tag, dma->dma_map);
+ dma->dma_paddr = 0;
+ }
+ if (dma->dma_vaddr != NULL) {
bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
- dma->dma_map = NULL;
+ dma->dma_vaddr = NULL;
}
bus_dma_tag_destroy(dma->dma_tag);
dma->dma_tag = NULL;
@@ -3239,7 +3457,7 @@ em_allocate_queues(struct adapter *adapter)
* Next the RX queues...
*/
rsize = roundup2(adapter->num_rx_desc *
- sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
+ sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
rxr = &adapter->rx_rings[i];
rxr->adapter = adapter;
@@ -3257,7 +3475,7 @@ em_allocate_queues(struct adapter *adapter)
error = ENOMEM;
goto err_rx_desc;
}
- rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
+ rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
bzero((void *)rxr->rx_base, rsize);
/* Allocate receive buffers for the ring*/
@@ -3300,7 +3518,7 @@ em_allocate_transmit_buffers(struct tx_ring *txr)
{
struct adapter *adapter = txr->adapter;
device_t dev = adapter->dev;
- struct em_buffer *txbuf;
+ struct em_txbuffer *txbuf;
int error, i;
/*
@@ -3323,7 +3541,7 @@ em_allocate_transmit_buffers(struct tx_ring *txr)
}
if (!(txr->tx_buffers =
- (struct em_buffer *) malloc(sizeof(struct em_buffer) *
+ (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
device_printf(dev, "Unable to allocate tx_buffer memory\n");
error = ENOMEM;
@@ -3356,11 +3574,11 @@ static void
em_setup_transmit_ring(struct tx_ring *txr)
{
struct adapter *adapter = txr->adapter;
- struct em_buffer *txbuf;
+ struct em_txbuffer *txbuf;
int i;
#ifdef DEV_NETMAP
- struct netmap_adapter *na = NA(adapter->ifp);
struct netmap_slot *slot;
+ struct netmap_adapter *na = netmap_getna(adapter->ifp);
#endif /* DEV_NETMAP */
/* Clear the old descriptor contents */
@@ -3391,10 +3609,10 @@ em_setup_transmit_ring(struct tx_ring *txr)
uint64_t paddr;
void *addr;
- addr = PNMB(slot + si, &paddr);
+ addr = PNMB(na, slot + si, &paddr);
txr->tx_base[i].buffer_addr = htole64(paddr);
/* reload the map for netmap mode */
- netmap_load_map(txr->txtag, txbuf->map, addr);
+ netmap_load_map(na, txr->txtag, txbuf->map, addr);
}
#endif /* DEV_NETMAP */
@@ -3404,7 +3622,7 @@ em_setup_transmit_ring(struct tx_ring *txr)
/* Set number of descriptors available */
txr->tx_avail = adapter->num_tx_desc;
- txr->queue_status = EM_QUEUE_IDLE;
+ txr->busy = EM_TX_IDLE;
/* Clear checksum offload context. */
txr->last_hw_offload = 0;
@@ -3444,7 +3662,7 @@ em_initialize_transmit_unit(struct adapter *adapter)
{
struct tx_ring *txr = adapter->tx_rings;
struct e1000_hw *hw = &adapter->hw;
- u32 tctl, tarc, tipg = 0;
+ u32 tctl, txdctl = 0, tarc, tipg = 0;
INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
@@ -3465,7 +3683,16 @@ em_initialize_transmit_unit(struct adapter *adapter)
E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
- txr->queue_status = EM_QUEUE_IDLE;
+ txr->busy = EM_TX_IDLE;
+ txdctl = 0; /* clear txdctl */
+ txdctl |= 0x1f; /* PTHRESH */
+ txdctl |= 1 << 8; /* HTHRESH */
+ txdctl |= 1 << 16;/* WTHRESH */
+ txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
+ txdctl |= E1000_TXDCTL_GRAN;
+ txdctl |= 1 << 25; /* LWTHRESH */
+
+ E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
}
/* Set the default values for the Tx Inter Packet Gap timer */
@@ -3496,15 +3723,25 @@ em_initialize_transmit_unit(struct adapter *adapter)
if ((adapter->hw.mac.type == e1000_82571) ||
(adapter->hw.mac.type == e1000_82572)) {
tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
- tarc |= SPEED_MODE_BIT;
+ tarc |= TARC_SPEED_MODE_BIT;
E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
} else if (adapter->hw.mac.type == e1000_80003es2lan) {
+ /* errata: program both queues to unweighted RR */
tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
tarc |= 1;
E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
tarc |= 1;
E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
+ } else if (adapter->hw.mac.type == e1000_82574) {
+ tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
+ tarc |= TARC_ERRATA_BIT;
+ if ( adapter->num_queues > 1) {
+ tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
+ E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
+ E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
+ } else
+ E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
}
adapter->txd_cmd = E1000_TXD_CMD_IFCS;
@@ -3523,6 +3760,15 @@ em_initialize_transmit_unit(struct adapter *adapter)
/* This write will effectively turn on the transmit unit. */
E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
+ if (hw->mac.type == e1000_pch_spt) {
+ u32 reg;
+ reg = E1000_READ_REG(hw, E1000_IOSFPC);
+ reg |= E1000_RCTL_RDMTS_HEX;
+ E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
+ reg = E1000_READ_REG(hw, E1000_TARC(0));
+ reg |= E1000_TARC0_CB_MULTIQ_3_REQ;
+ E1000_WRITE_REG(hw, E1000_TARC(0), reg);
+ }
}
@@ -3556,7 +3802,7 @@ static void
em_free_transmit_buffers(struct tx_ring *txr)
{
struct adapter *adapter = txr->adapter;
- struct em_buffer *txbuf;
+ struct em_txbuffer *txbuf;
INIT_DEBUGOUT("free_transmit_ring: begin");
@@ -3623,7 +3869,7 @@ em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
{
struct adapter *adapter = txr->adapter;
struct e1000_context_desc *TXD = NULL;
- struct em_buffer *tx_buffer;
+ struct em_txbuffer *tx_buffer;
int cur, hdr_len;
u32 cmd = 0;
u16 offload = 0;
@@ -3657,29 +3903,38 @@ em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
offload |= CSUM_TCP;
tucss = hdr_len;
tucso = hdr_len + offsetof(struct tcphdr, th_sum);
- /*
- * Setting up new checksum offload context for every frames
- * takes a lot of processing time for hardware. This also
- * reduces performance a lot for small sized frames so avoid
- * it if driver can use previously configured checksum
- * offload context.
- */
- if (txr->last_hw_offload == offload) {
- if (offload & CSUM_IP) {
- if (txr->last_hw_ipcss == ipcss &&
- txr->last_hw_ipcso == ipcso &&
- txr->last_hw_tucss == tucss &&
- txr->last_hw_tucso == tucso)
- return;
- } else {
- if (txr->last_hw_tucss == tucss &&
- txr->last_hw_tucso == tucso)
- return;
- }
- }
- txr->last_hw_offload = offload;
- txr->last_hw_tucss = tucss;
- txr->last_hw_tucso = tucso;
+ /*
+ * The 82574L can only remember the *last* context used
+ * regardless of queue that it was use for. We cannot reuse
+ * contexts on this hardware platform and must generate a new
+ * context every time. 82574L hardware spec, section 7.2.6,
+ * second note.
+ */
+ if (adapter->num_queues < 2) {
+ /*
+ * Setting up new checksum offload context for every
+ * frames takes a lot of processing time for hardware.
+ * This also reduces performance a lot for small sized
+ * frames so avoid it if driver can use previously
+ * configured checksum offload context.
+ */
+ if (txr->last_hw_offload == offload) {
+ if (offload & CSUM_IP) {
+ if (txr->last_hw_ipcss == ipcss &&
+ txr->last_hw_ipcso == ipcso &&
+ txr->last_hw_tucss == tucss &&
+ txr->last_hw_tucso == tucso)
+ return;
+ } else {
+ if (txr->last_hw_tucss == tucss &&
+ txr->last_hw_tucso == tucso)
+ return;
+ }
+ }
+ txr->last_hw_offload = offload;
+ txr->last_hw_tucss = tucss;
+ txr->last_hw_tucso = tucso;
+ }
/*
* Start offset for payload checksum calculation.
* End offset for payload checksum calculation.
@@ -3695,29 +3950,38 @@ em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
tucss = hdr_len;
tucso = hdr_len + offsetof(struct udphdr, uh_sum);
- /*
- * Setting up new checksum offload context for every frames
- * takes a lot of processing time for hardware. This also
- * reduces performance a lot for small sized frames so avoid
- * it if driver can use previously configured checksum
- * offload context.
- */
- if (txr->last_hw_offload == offload) {
- if (offload & CSUM_IP) {
- if (txr->last_hw_ipcss == ipcss &&
- txr->last_hw_ipcso == ipcso &&
- txr->last_hw_tucss == tucss &&
- txr->last_hw_tucso == tucso)
- return;
- } else {
- if (txr->last_hw_tucss == tucss &&
- txr->last_hw_tucso == tucso)
- return;
+ /*
+ * The 82574L can only remember the *last* context used
+ * regardless of queue that it was use for. We cannot reuse
+ * contexts on this hardware platform and must generate a new
+ * context every time. 82574L hardware spec, section 7.2.6,
+ * second note.
+ */
+ if (adapter->num_queues < 2) {
+ /*
+ * Setting up new checksum offload context for every
+ * frames takes a lot of processing time for hardware.
+ * This also reduces performance a lot for small sized
+ * frames so avoid it if driver can use previously
+ * configured checksum offload context.
+ */
+ if (txr->last_hw_offload == offload) {
+ if (offload & CSUM_IP) {
+ if (txr->last_hw_ipcss == ipcss &&
+ txr->last_hw_ipcso == ipcso &&
+ txr->last_hw_tucss == tucss &&
+ txr->last_hw_tucso == tucso)
+ return;
+ } else {
+ if (txr->last_hw_tucss == tucss &&
+ txr->last_hw_tucso == tucso)
+ return;
+ }
}
- }
- txr->last_hw_offload = offload;
- txr->last_hw_tucss = tucss;
- txr->last_hw_tucso = tucso;
+ txr->last_hw_offload = offload;
+ txr->last_hw_tucss = tucss;
+ txr->last_hw_tucso = tucso;
+ }
/*
* Start offset for header checksum calculation.
* End offset for header checksum calculation.
@@ -3760,7 +4024,7 @@ em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
{
struct adapter *adapter = txr->adapter;
struct e1000_context_desc *TXD;
- struct em_buffer *tx_buffer;
+ struct em_txbuffer *tx_buffer;
int cur, hdr_len;
/*
@@ -3838,9 +4102,9 @@ em_txeof(struct tx_ring *txr)
{
struct adapter *adapter = txr->adapter;
int first, last, done, processed;
- struct em_buffer *tx_buffer;
+ struct em_txbuffer *tx_buffer;
struct e1000_tx_desc *tx_desc, *eop_desc;
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
EM_TX_LOCK_ASSERT(txr);
#ifdef DEV_NETMAP
@@ -3848,9 +4112,9 @@ em_txeof(struct tx_ring *txr)
return;
#endif /* DEV_NETMAP */
- /* No work, make sure watchdog is off */
+ /* No work, make sure hang detection is disabled */
if (txr->tx_avail == adapter->num_tx_desc) {
- txr->queue_status = EM_QUEUE_IDLE;
+ txr->busy = EM_TX_IDLE;
return;
}
@@ -3893,7 +4157,6 @@ em_txeof(struct tx_ring *txr)
tx_buffer->m_head = NULL;
}
tx_buffer->next_eop = -1;
- txr->watchdog_time = ticks;
if (++first == adapter->num_tx_desc)
first = 0;
@@ -3901,7 +4164,7 @@ em_txeof(struct tx_ring *txr)
tx_buffer = &txr->tx_buffers[first];
tx_desc = &txr->tx_base[first];
}
- ++ifp->if_opackets;
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
/* See if we can continue to the next packet */
last = tx_buffer->next_eop;
if (last != -1) {
@@ -3918,14 +4181,16 @@ em_txeof(struct tx_ring *txr)
txr->next_to_clean = first;
/*
- ** Watchdog calculation, we know there's
- ** work outstanding or the first return
- ** would have been taken, so none processed
- ** for too long indicates a hang. local timer
- ** will examine this and do a reset if needed.
+ ** Hang detection: we know there's work outstanding
+ ** or the entry return would have been taken, so no
+ ** descriptor processed here indicates a potential hang.
+ ** The local timer will examine this and do a reset if needed.
*/
- if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
- txr->queue_status = EM_QUEUE_HUNG;
+ if (processed == 0) {
+ if (txr->busy != EM_TX_HUNG)
+ ++txr->busy;
+ } else /* At least one descriptor was cleaned */
+ txr->busy = EM_TX_BUSY; /* note this clears HUNG */
/*
* If we have a minimum free, clear IFF_DRV_OACTIVE
@@ -3934,16 +4199,15 @@ em_txeof(struct tx_ring *txr)
* TX lock which, with a single queue, guarantees
* sanity.
*/
- if (txr->tx_avail >= EM_MAX_SCATTER)
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ if (txr->tx_avail >= EM_MAX_SCATTER) {
+ if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
+ }
- /* Disable watchdog if all clean */
- if (txr->tx_avail == adapter->num_tx_desc) {
- txr->queue_status = EM_QUEUE_IDLE;
- }
+ /* Disable hang detection if all clean */
+ if (txr->tx_avail == adapter->num_tx_desc)
+ txr->busy = EM_TX_IDLE;
}
-
/*********************************************************************
*
* Refresh RX descriptor mbufs from system mbuf buffer pool.
@@ -3954,8 +4218,8 @@ em_refresh_mbufs(struct rx_ring *rxr, int limit)
{
struct adapter *adapter = rxr->adapter;
struct mbuf *m;
- bus_dma_segment_t segs[1];
- struct em_buffer *rxbuf;
+ bus_dma_segment_t segs;
+ struct em_rxbuffer *rxbuf;
int i, j, error, nsegs;
bool cleaned = FALSE;
@@ -3990,7 +4254,7 @@ em_refresh_mbufs(struct rx_ring *rxr, int limit)
/* Use bus_dma machinery to setup the memory mapping */
error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
- m, segs, &nsegs, BUS_DMA_NOWAIT);
+ m, &segs, &nsegs, BUS_DMA_NOWAIT);
if (error != 0) {
printf("Refresh mbufs: hdr dmamap load"
" failure - %d\n", error);
@@ -3999,9 +4263,10 @@ em_refresh_mbufs(struct rx_ring *rxr, int limit)
goto update;
}
rxbuf->m_head = m;
+ rxbuf->paddr = segs.ds_addr;
bus_dmamap_sync(rxr->rxtag,
rxbuf->map, BUS_DMASYNC_PREREAD);
- rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
+ em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
cleaned = TRUE;
i = j; /* Next is precalulated for us */
@@ -4036,10 +4301,10 @@ em_allocate_receive_buffers(struct rx_ring *rxr)
{
struct adapter *adapter = rxr->adapter;
device_t dev = adapter->dev;
- struct em_buffer *rxbuf;
+ struct em_rxbuffer *rxbuf;
int error;
- rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
+ rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
if (rxr->rx_buffers == NULL) {
device_printf(dev, "Unable to allocate rx_buffer memory\n");
@@ -4092,22 +4357,22 @@ static int
em_setup_receive_ring(struct rx_ring *rxr)
{
struct adapter *adapter = rxr->adapter;
- struct em_buffer *rxbuf;
+ struct em_rxbuffer *rxbuf;
bus_dma_segment_t seg[1];
int rsize, nsegs, error = 0;
#ifdef DEV_NETMAP
- struct netmap_adapter *na = NA(adapter->ifp);
struct netmap_slot *slot;
+ struct netmap_adapter *na = netmap_getna(adapter->ifp);
#endif
/* Clear the ring contents */
EM_RX_LOCK(rxr);
rsize = roundup2(adapter->num_rx_desc *
- sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
+ sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
bzero((void *)rxr->rx_base, rsize);
#ifdef DEV_NETMAP
- slot = netmap_reset(na, NR_RX, 0, 0);
+ slot = netmap_reset(na, NR_RX, rxr->me, 0);
#endif
/*
@@ -4133,10 +4398,10 @@ em_setup_receive_ring(struct rx_ring *rxr)
uint64_t paddr;
void *addr;
- addr = PNMB(slot + si, &paddr);
- netmap_load_map(rxr->rxtag, rxbuf->map, addr);
- /* Update descriptor */
- rxr->rx_base[j].buffer_addr = htole64(paddr);
+ addr = PNMB(na, slot + si, &paddr);
+ netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
+ rxbuf->paddr = paddr;
+ em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
continue;
}
#endif /* DEV_NETMAP */
@@ -4162,8 +4427,8 @@ em_setup_receive_ring(struct rx_ring *rxr)
bus_dmamap_sync(rxr->rxtag,
rxbuf->map, BUS_DMASYNC_PREREAD);
- /* Update descriptor */
- rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
+ rxbuf->paddr = seg[0].ds_addr;
+ em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
}
rxr->next_to_check = 0;
rxr->next_to_refresh = 0;
@@ -4200,7 +4465,7 @@ fail:
for (int i = 0; i < q; ++i) {
rxr = &adapter->rx_rings[i];
for (int n = 0; n < adapter->num_rx_desc; n++) {
- struct em_buffer *rxbuf;
+ struct em_rxbuffer *rxbuf;
rxbuf = &rxr->rx_buffers[n];
if (rxbuf->m_head != NULL) {
bus_dmamap_sync(rxr->rxtag, rxbuf->map,
@@ -4247,7 +4512,7 @@ static void
em_free_receive_buffers(struct rx_ring *rxr)
{
struct adapter *adapter = rxr->adapter;
- struct em_buffer *rxbuf = NULL;
+ struct em_rxbuffer *rxbuf = NULL;
INIT_DEBUGOUT("free_receive_buffers: begin");
@@ -4289,11 +4554,10 @@ em_free_receive_buffers(struct rx_ring *rxr)
static void
em_initialize_receive_unit(struct adapter *adapter)
{
- struct rx_ring *rxr = adapter->rx_rings;
- struct ifnet *ifp = adapter->ifp;
+ struct rx_ring *rxr = adapter->rx_rings;
+ if_t ifp = adapter->ifp;
struct e1000_hw *hw = &adapter->hw;
- u64 bus_addr;
- u32 rctl, rxcsum;
+ u32 rctl, rxcsum, rfctl;
INIT_DEBUGOUT("em_initialize_receive_units: begin");
@@ -4306,14 +4570,39 @@ em_initialize_receive_unit(struct adapter *adapter)
if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
+ /* Setup the Receive Control Register */
+ rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
+ rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
+ E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
+ (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
+
+ /* Do not store bad packets */
+ rctl &= ~E1000_RCTL_SBP;
+
+ /* Enable Long Packet receive */
+ if (if_getmtu(ifp) > ETHERMTU)
+ rctl |= E1000_RCTL_LPE;
+ else
+ rctl &= ~E1000_RCTL_LPE;
+
+ /* Strip the CRC */
+ if (!em_disable_crc_stripping)
+ rctl |= E1000_RCTL_SECRC;
+
E1000_WRITE_REG(&adapter->hw, E1000_RADV,
adapter->rx_abs_int_delay.value);
+
+ E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
+ adapter->rx_int_delay.value);
/*
* Set the interrupt throttling rate. Value is calculated
* as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
*/
E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
+ /* Use extended rx descriptor formats */
+ rfctl = E1000_READ_REG(hw, E1000_RFCTL);
+ rfctl |= E1000_RFCTL_EXTEN;
/*
** When using MSIX interrupts we need to throttle
** using the EITR register (82574 only)
@@ -4323,16 +4612,65 @@ em_initialize_receive_unit(struct adapter *adapter)
E1000_WRITE_REG(hw, E1000_EITR_82574(i),
DEFAULT_ITR);
/* Disable accelerated acknowledge */
- E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
+ rfctl |= E1000_RFCTL_ACK_DIS;
}
+ E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
- if (ifp->if_capenable & IFCAP_RXCSUM)
+ if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
+#ifdef EM_MULTIQUEUE
+ rxcsum |= E1000_RXCSUM_TUOFL |
+ E1000_RXCSUM_IPOFL |
+ E1000_RXCSUM_PCSD;
+#else
rxcsum |= E1000_RXCSUM_TUOFL;
- else
+#endif
+ } else
rxcsum &= ~E1000_RXCSUM_TUOFL;
+
E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
+#ifdef EM_MULTIQUEUE
+#define RSSKEYLEN 10
+ if (adapter->num_queues > 1) {
+ uint8_t rss_key[4 * RSSKEYLEN];
+ uint32_t reta = 0;
+ int i;
+
+ /*
+ * Configure RSS key
+ */
+ arc4rand(rss_key, sizeof(rss_key), 0);
+ for (i = 0; i < RSSKEYLEN; ++i) {
+ uint32_t rssrk = 0;
+
+ rssrk = EM_RSSRK_VAL(rss_key, i);
+ E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
+ }
+
+ /*
+ * Configure RSS redirect table in following fashion:
+ * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
+ */
+ for (i = 0; i < sizeof(reta); ++i) {
+ uint32_t q;
+
+ q = (i % adapter->num_queues) << 7;
+ reta |= q << (8 * i);
+ }
+
+ for (i = 0; i < 32; ++i) {
+ E1000_WRITE_REG(hw, E1000_RETA(i), reta);
+ }
+
+ E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
+ E1000_MRQC_RSS_FIELD_IPV4_TCP |
+ E1000_MRQC_RSS_FIELD_IPV4 |
+ E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
+ E1000_MRQC_RSS_FIELD_IPV6_EX |
+ E1000_MRQC_RSS_FIELD_IPV6);
+ }
+#endif
/*
** XXX TEMPORARY WORKAROUND: on some systems with 82573
** long latencies are observed, like Lenovo X60. This
@@ -4345,11 +4683,11 @@ em_initialize_receive_unit(struct adapter *adapter)
for (int i = 0; i < adapter->num_queues; i++, rxr++) {
/* Setup the Base and Length of the Rx Descriptor Ring */
+ u64 bus_addr = rxr->rxdma.dma_paddr;
u32 rdt = adapter->num_rx_desc - 1; /* default */
- bus_addr = rxr->rxdma.dma_paddr;
E1000_WRITE_REG(hw, E1000_RDLEN(i),
- adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
+ adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
/* Setup the Head and Tail Descriptor Pointers */
@@ -4359,40 +4697,48 @@ em_initialize_receive_unit(struct adapter *adapter)
* an init() while a netmap client is active must
* preserve the rx buffers passed to userspace.
*/
- if (ifp->if_capenable & IFCAP_NETMAP)
- rdt -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[i]);
+ if (if_getcapenable(ifp) & IFCAP_NETMAP) {
+ struct netmap_adapter *na = netmap_getna(adapter->ifp);
+ rdt -= nm_kr_rxspace(&na->rx_rings[i]);
+ }
#endif /* DEV_NETMAP */
E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
}
- /* Set PTHRESH for improved jumbo performance */
+ /*
+ * Set PTHRESH for improved jumbo performance
+ * According to 10.2.5.11 of Intel 82574 Datasheet,
+ * RXDCTL(1) is written whenever RXDCTL(0) is written.
+ * Only write to RXDCTL(1) if there is a need for different
+ * settings.
+ */
if (((adapter->hw.mac.type == e1000_ich9lan) ||
(adapter->hw.mac.type == e1000_pch2lan) ||
(adapter->hw.mac.type == e1000_ich10lan)) &&
- (ifp->if_mtu > ETHERMTU)) {
+ (if_getmtu(ifp) > ETHERMTU)) {
u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
+ } else if (adapter->hw.mac.type == e1000_82574) {
+ for (int i = 0; i < adapter->num_queues; i++) {
+ u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
+
+ rxdctl |= 0x20; /* PTHRESH */
+ rxdctl |= 4 << 8; /* HTHRESH */
+ rxdctl |= 4 << 16;/* WTHRESH */
+ rxdctl |= 1 << 24; /* Switch to granularity */
+ E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
+ }
}
if (adapter->hw.mac.type >= e1000_pch2lan) {
- if (ifp->if_mtu > ETHERMTU)
+ if (if_getmtu(ifp) > ETHERMTU)
e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
else
e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
}
- /* Setup the Receive Control Register */
- rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
- rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
- E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
- (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
-
- /* Strip the CRC */
- rctl |= E1000_RCTL_SECRC;
-
/* Make sure VLAN Filters are off */
rctl &= ~E1000_RCTL_VFE;
- rctl &= ~E1000_RCTL_SBP;
if (adapter->rx_mbuf_sz == MCLBYTES)
rctl |= E1000_RCTL_SZ_2048;
@@ -4401,11 +4747,8 @@ em_initialize_receive_unit(struct adapter *adapter)
else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
- if (ifp->if_mtu > ETHERMTU)
- rctl |= E1000_RCTL_LPE;
- else
- rctl &= ~E1000_RCTL_LPE;
-
+ /* ensure we clear use DTYPE of 00 here */
+ rctl &= ~0x00000C00;
/* Write out the settings */
E1000_WRITE_REG(hw, E1000_RCTL, rctl);
@@ -4428,16 +4771,21 @@ static bool
em_rxeof(struct rx_ring *rxr, int count, int *done)
{
struct adapter *adapter = rxr->adapter;
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
struct mbuf *mp, *sendmp;
- u8 status = 0;
+ u32 status = 0;
u16 len;
int i, processed, rxdone = 0;
bool eop;
- struct e1000_rx_desc *cur;
+ union e1000_rx_desc_extended *cur;
EM_RX_LOCK(rxr);
+ /* Sync the ring */
+ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
+ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+
+
#ifdef DEV_NETMAP
if (netmap_rx_irq(ifp, rxr->me, &processed)) {
EM_RX_UNLOCK(rxr);
@@ -4446,24 +4794,20 @@ em_rxeof(struct rx_ring *rxr, int count, int *done)
#endif /* DEV_NETMAP */
for (i = rxr->next_to_check, processed = 0; count != 0;) {
-
- if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+ if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
break;
- bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
- BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
-
cur = &rxr->rx_base[i];
- status = cur->status;
+ status = le32toh(cur->wb.upper.status_error);
mp = sendmp = NULL;
if ((status & E1000_RXD_STAT_DD) == 0)
break;
- len = le16toh(cur->length);
+ len = le16toh(cur->wb.upper.length);
eop = (status & E1000_RXD_STAT_EOP) != 0;
- if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
+ if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
(rxr->discard == TRUE)) {
adapter->dropped_pkts++;
++rxr->rx_discarded;
@@ -4498,9 +4842,9 @@ em_rxeof(struct rx_ring *rxr, int count, int *done)
if (eop) {
--count;
sendmp = rxr->fmp;
- sendmp->m_pkthdr.rcvif = ifp;
- ifp->if_ipackets++;
- em_receive_checksum(cur, sendmp);
+ if_setrcvif(sendmp, ifp);
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ em_receive_checksum(status, sendmp);
#ifndef __NO_STRICT_ALIGNMENT
if (adapter->hw.mac.max_frame_size >
(MCLBYTES - ETHER_ALIGN) &&
@@ -4508,8 +4852,8 @@ em_rxeof(struct rx_ring *rxr, int count, int *done)
goto skip;
#endif
if (status & E1000_RXD_STAT_VP) {
- sendmp->m_pkthdr.ether_vtag =
- le16toh(cur->special);
+ if_setvtag(sendmp,
+ le16toh(cur->wb.upper.vlan));
sendmp->m_flags |= M_VLANTAG;
}
#ifndef __NO_STRICT_ALIGNMENT
@@ -4518,8 +4862,12 @@ skip:
rxr->fmp = rxr->lmp = NULL;
}
next_desc:
+ /* Sync the ring */
+ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
+ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+
/* Zero out the receive descriptors status. */
- cur->status = 0;
+ cur->wb.upper.status_error &= htole32(~0xFF);
++rxdone; /* cumulative for POLL */
++processed;
@@ -4531,7 +4879,7 @@ next_desc:
if (sendmp != NULL) {
rxr->next_to_check = i;
EM_RX_UNLOCK(rxr);
- (*ifp->if_input)(ifp, sendmp);
+ if_input(ifp, sendmp);
EM_RX_LOCK(rxr);
i = rxr->next_to_check;
}
@@ -4558,7 +4906,7 @@ next_desc:
static __inline void
em_rx_discard(struct rx_ring *rxr, int i)
{
- struct em_buffer *rbuf;
+ struct em_rxbuffer *rbuf;
rbuf = &rxr->rx_buffers[i];
bus_dmamap_unload(rxr->rxtag, rbuf->map);
@@ -4630,6 +4978,14 @@ em_fixup_rx(struct rx_ring *rxr)
}
#endif
+static void
+em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
+{
+ rxd->read.buffer_addr = htole64(rxbuf->paddr);
+ /* DD bits must be cleared */
+ rxd->wb.upper.status_error= 0;
+}
+
/*********************************************************************
*
* Verify that the hardware indicated that the checksum is valid.
@@ -4638,23 +4994,27 @@ em_fixup_rx(struct rx_ring *rxr)
*
*********************************************************************/
static void
-em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
+em_receive_checksum(uint32_t status, struct mbuf *mp)
{
mp->m_pkthdr.csum_flags = 0;
/* Ignore Checksum bit is set */
- if (rx_desc->status & E1000_RXD_STAT_IXSM)
+ if (status & E1000_RXD_STAT_IXSM)
return;
- if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
- return;
-
- /* IP Checksum Good? */
- if (rx_desc->status & E1000_RXD_STAT_IPCS)
+ /* If the IP checksum exists and there is no IP Checksum error */
+ if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
+ E1000_RXD_STAT_IPCS) {
mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
+ }
/* TCP or UDP checksum */
- if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
+ if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
+ E1000_RXD_STAT_TCPCS) {
+ mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
+ mp->m_pkthdr.csum_data = htons(0xffff);
+ }
+ if (status & E1000_RXD_STAT_UDPCS) {
mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
mp->m_pkthdr.csum_data = htons(0xffff);
}
@@ -4665,12 +5025,12 @@ em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
* config EVENT
*/
static void
-em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
+em_register_vlan(void *arg, if_t ifp, u16 vtag)
{
- struct adapter *adapter = ifp->if_softc;
+ struct adapter *adapter = if_getsoftc(ifp);
u32 index, bit;
- if (ifp->if_softc != arg) /* Not our event */
+ if ((void*)adapter != arg) /* Not our event */
return;
if ((vtag == 0) || (vtag > 4095)) /* Invalid ID */
@@ -4682,7 +5042,7 @@ em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
adapter->shadow_vfta[index] |= (1 << bit);
++adapter->num_vlans;
/* Re-init to load the changes */
- if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
+ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
em_init_locked(adapter);
EM_CORE_UNLOCK(adapter);
}
@@ -4692,12 +5052,12 @@ em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
* unconfig EVENT
*/
static void
-em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
+em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
{
- struct adapter *adapter = ifp->if_softc;
+ struct adapter *adapter = if_getsoftc(ifp);
u32 index, bit;
- if (ifp->if_softc != arg)
+ if (adapter != arg)
return;
if ((vtag == 0) || (vtag > 4095)) /* Invalid */
@@ -4709,7 +5069,7 @@ em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
adapter->shadow_vfta[index] &= ~(1 << bit);
--adapter->num_vlans;
/* Re-init to load the changes */
- if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
+ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
em_init_locked(adapter);
EM_CORE_UNLOCK(adapter);
}
@@ -4756,8 +5116,8 @@ em_enable_intr(struct adapter *adapter)
u32 ims_mask = IMS_ENABLE_MASK;
if (hw->mac.type == e1000_82574) {
- E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
- ims_mask |= EM_MSIX_MASK;
+ E1000_WRITE_REG(hw, EM_EIAC, adapter->ims);
+ ims_mask |= adapter->ims;
}
E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
}
@@ -4963,7 +5323,7 @@ static void
em_enable_wakeup(device_t dev)
{
struct adapter *adapter = device_get_softc(dev);
- struct ifnet *ifp = adapter->ifp;
+ if_t ifp = adapter->ifp;
u32 pmc, ctrl, ctrl_ext, rctl;
u16 status;
@@ -4994,10 +5354,10 @@ em_enable_wakeup(device_t dev)
** Determine type of Wakeup: note that wol
** is set with all bits on by default.
*/
- if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
+ if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
adapter->wol &= ~E1000_WUFC_MAG;
- if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
+ if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
adapter->wol &= ~E1000_WUFC_MC;
else {
rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
@@ -5020,7 +5380,7 @@ em_enable_wakeup(device_t dev)
/* Request PME */
status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
- if (ifp->if_capenable & IFCAP_WOL)
+ if (if_getcapenable(ifp) & IFCAP_WOL)
status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
@@ -5156,7 +5516,6 @@ em_disable_aspm(struct adapter *adapter)
static void
em_update_stats_counters(struct adapter *adapter)
{
- struct ifnet *ifp;
if(adapter->hw.phy.media_type == e1000_media_type_copper ||
(E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
@@ -5175,12 +5534,7 @@ em_update_stats_counters(struct adapter *adapter)
adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
- /*
- ** For watchdog management we need to know if we have been
- ** paused during the last interval, so capture that here.
- */
- adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
- adapter->stats.xoffrxc += adapter->pause_frames;
+ adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
@@ -5248,19 +5602,29 @@ em_update_stats_counters(struct adapter *adapter)
adapter->stats.tsctfc +=
E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
}
- ifp = adapter->ifp;
-
- ifp->if_collisions = adapter->stats.colc;
+}
- /* Rx Errors */
- ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
- adapter->stats.crcerrs + adapter->stats.algnerrc +
- adapter->stats.ruc + adapter->stats.roc +
- adapter->stats.mpc + adapter->stats.cexterr;
+static uint64_t
+em_get_counter(if_t ifp, ift_counter cnt)
+{
+ struct adapter *adapter;
- /* Tx Errors */
- ifp->if_oerrors = adapter->stats.ecol +
- adapter->stats.latecol + adapter->watchdog_events;
+ adapter = if_getsoftc(ifp);
+
+ switch (cnt) {
+ case IFCOUNTER_COLLISIONS:
+ return (adapter->stats.colc);
+ case IFCOUNTER_IERRORS:
+ return (adapter->dropped_pkts + adapter->stats.rxerrc +
+ adapter->stats.crcerrs + adapter->stats.algnerrc +
+ adapter->stats.ruc + adapter->stats.roc +
+ adapter->stats.mpc + adapter->stats.cexterr);
+ case IFCOUNTER_OERRORS:
+ return (adapter->stats.ecol + adapter->stats.latecol +
+ adapter->watchdog_events);
+ default:
+ return (if_get_counter_default(ifp, cnt));
+ }
}
/* Export a single 32-bit register via a read-only sysctl. */
@@ -5298,18 +5662,15 @@ em_add_hw_stats(struct adapter *adapter)
char namebuf[QUEUE_NAME_LEN];
/* Driver Statistics */
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
- CTLFLAG_RD, &adapter->link_irq,
- "Link MSIX IRQ Handled");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
- CTLFLAG_RD, &adapter->mbuf_alloc_failed,
- "Std mbuf failed");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
- CTLFLAG_RD, &adapter->mbuf_cluster_failed,
- "Std mbuf cluster failed");
SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
CTLFLAG_RD, &adapter->dropped_pkts,
"Driver dropped packets");
+ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
+ CTLFLAG_RD, &adapter->link_irq,
+ "Link MSIX IRQ Handled");
+ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
+ CTLFLAG_RD, &adapter->mbuf_defrag_failed,
+ "Defragmenting mbuf chain failed");
SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
CTLFLAG_RD, &adapter->no_tx_dma_setup,
"Driver tx dma failure in xmit");
@@ -5335,10 +5696,10 @@ em_add_hw_stats(struct adapter *adapter)
CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
"Flow Control Low Watermark");
- for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
- snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
+ for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
+ snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
- CTLFLAG_RD, NULL, "Queue Name");
+ CTLFLAG_RD, NULL, "TX Queue Name");
queue_list = SYSCTL_CHILDREN(queue_node);
SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
@@ -5357,7 +5718,12 @@ em_add_hw_stats(struct adapter *adapter)
SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
CTLFLAG_RD, &txr->no_desc_avail,
"Queue No Descriptor Available");
-
+
+ snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
+ queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
+ CTLFLAG_RD, NULL, "RX Queue Name");
+ queue_list = SYSCTL_CHILDREN(queue_node);
+
SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
CTLTYPE_UINT | CTLFLAG_RD, adapter,
E1000_RDH(rxr->me),
@@ -5781,29 +6147,98 @@ em_print_debug_info(struct adapter *adapter)
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
- if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
+ if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
printf("Interface is RUNNING ");
else
printf("Interface is NOT RUNNING\n");
- if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
+ if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
printf("and INACTIVE\n");
else
printf("and ACTIVE\n");
- device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
- E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
- E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
- device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
- E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
- E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
- device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
- device_printf(dev, "TX descriptors avail = %d\n",
- txr->tx_avail);
- device_printf(dev, "Tx Descriptors avail failure = %ld\n",
- txr->no_desc_avail);
- device_printf(dev, "RX discarded packets = %ld\n",
- rxr->rx_discarded);
- device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
- device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
+ for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
+ device_printf(dev, "TX Queue %d ------\n", i);
+ device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
+ E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
+ E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
+ device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
+ device_printf(dev, "TX descriptors avail = %d\n",
+ txr->tx_avail);
+ device_printf(dev, "Tx Descriptors avail failure = %ld\n",
+ txr->no_desc_avail);
+ device_printf(dev, "RX Queue %d ------\n", i);
+ device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
+ E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
+ E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
+ device_printf(dev, "RX discarded packets = %ld\n",
+ rxr->rx_discarded);
+ device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
+ device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
+ }
+}
+
+#ifdef EM_MULTIQUEUE
+/*
+ * 82574 only:
+ * Write a new value to the EEPROM increasing the number of MSIX
+ * vectors from 3 to 5, for proper multiqueue support.
+ */
+static void
+em_enable_vectors_82574(struct adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ device_t dev = adapter->dev;
+ u16 edata;
+
+ e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
+ printf("Current cap: %#06x\n", edata);
+ if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
+ device_printf(dev, "Writing to eeprom: increasing "
+ "reported MSIX vectors from 3 to 5...\n");
+ edata &= ~(EM_NVM_MSIX_N_MASK);
+ edata |= 4 << EM_NVM_MSIX_N_SHIFT;
+ e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
+ e1000_update_nvm_checksum(hw);
+ device_printf(dev, "Writing to eeprom: done\n");
+ }
+}
+#endif
+
+#ifdef DDB
+DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
+{
+ devclass_t dc;
+ int max_em;
+
+ dc = devclass_find("em");
+ max_em = devclass_get_maxunit(dc);
+
+ for (int index = 0; index < (max_em - 1); index++) {
+ device_t dev;
+ dev = devclass_get_device(dc, index);
+ if (device_get_driver(dev) == &em_driver) {
+ struct adapter *adapter = device_get_softc(dev);
+ EM_CORE_LOCK(adapter);
+ em_init_locked(adapter);
+ EM_CORE_UNLOCK(adapter);
+ }
+ }
}
+DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
+{
+ devclass_t dc;
+ int max_em;
+
+ dc = devclass_find("em");
+ max_em = devclass_get_maxunit(dc);
+
+ for (int index = 0; index < (max_em - 1); index++) {
+ device_t dev;
+ dev = devclass_get_device(dc, index);
+ if (device_get_driver(dev) == &em_driver)
+ em_print_debug_info(device_get_softc(dev));
+ }
+
+}
+#endif