summaryrefslogtreecommitdiffstats
path: root/freebsd/sys/net/ieee8023ad_lacp.c
diff options
context:
space:
mode:
Diffstat (limited to 'freebsd/sys/net/ieee8023ad_lacp.c')
-rw-r--r--freebsd/sys/net/ieee8023ad_lacp.c201
1 files changed, 142 insertions, 59 deletions
diff --git a/freebsd/sys/net/ieee8023ad_lacp.c b/freebsd/sys/net/ieee8023ad_lacp.c
index 5172ad54..619db8af 100644
--- a/freebsd/sys/net/ieee8023ad_lacp.c
+++ b/freebsd/sys/net/ieee8023ad_lacp.c
@@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/callout.h>
+#include <sys/eventhandler.h>
#include <sys/mbuf.h>
#include <sys/systm.h>
#include <sys/malloc.h>
@@ -44,8 +45,10 @@ __FBSDID("$FreeBSD$");
#include <machine/stdarg.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/rwlock.h>
+#include <sys/taskqueue.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/ethernet.h>
#include <net/if_media.h>
@@ -189,30 +192,37 @@ static const char *lacp_format_portid(const struct lacp_portid *, char *,
static void lacp_dprintf(const struct lacp_port *, const char *, ...)
__attribute__((__format__(__printf__, 2, 3)));
-static int lacp_debug = 0;
-SYSCTL_INT(_net, OID_AUTO, lacp_debug, CTLFLAG_RW | CTLFLAG_TUN,
- &lacp_debug, 0, "Enable LACP debug logging (1=debug, 2=trace)");
-TUNABLE_INT("net.lacp_debug", &lacp_debug);
+static VNET_DEFINE(int, lacp_debug);
+#define V_lacp_debug VNET(lacp_debug)
+SYSCTL_NODE(_net_link_lagg, OID_AUTO, lacp, CTLFLAG_RD, 0, "ieee802.3ad");
+SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, debug, CTLFLAG_RWTUN | CTLFLAG_VNET,
+ &VNET_NAME(lacp_debug), 0, "Enable LACP debug logging (1=debug, 2=trace)");
-#define LACP_DPRINTF(a) if (lacp_debug > 0) { lacp_dprintf a ; }
-#define LACP_TRACE(a) if (lacp_debug > 1) { lacp_dprintf(a,"%s\n",__func__); }
+static VNET_DEFINE(int, lacp_default_strict_mode) = 1;
+SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, default_strict_mode, CTLFLAG_RWTUN,
+ &VNET_NAME(lacp_default_strict_mode), 0,
+ "LACP strict protocol compliance default");
+
+#define LACP_DPRINTF(a) if (V_lacp_debug & 0x01) { lacp_dprintf a ; }
+#define LACP_TRACE(a) if (V_lacp_debug & 0x02) { lacp_dprintf(a,"%s\n",__func__); }
+#define LACP_TPRINTF(a) if (V_lacp_debug & 0x04) { lacp_dprintf a ; }
/*
* partner administration variables.
* XXX should be configurable.
*/
-static const struct lacp_peerinfo lacp_partner_admin = {
+static const struct lacp_peerinfo lacp_partner_admin_optimistic = {
.lip_systemid = { .lsi_prio = 0xffff },
.lip_portid = { .lpi_prio = 0xffff },
-#if 1
- /* optimistic */
.lip_state = LACP_STATE_SYNC | LACP_STATE_AGGREGATION |
LACP_STATE_COLLECTING | LACP_STATE_DISTRIBUTING,
-#else
- /* pessimistic */
+};
+
+static const struct lacp_peerinfo lacp_partner_admin_strict = {
+ .lip_systemid = { .lsi_prio = 0xffff },
+ .lip_portid = { .lpi_prio = 0xffff },
.lip_state = 0,
-#endif
};
static const lacp_timer_func_t lacp_timer_funcs[LACP_NTIMER] = {
@@ -298,11 +308,16 @@ lacp_pdu_input(struct lacp_port *lp, struct mbuf *m)
goto bad;
}
- if (lacp_debug > 0) {
+ if (V_lacp_debug > 0) {
lacp_dprintf(lp, "lacpdu receive\n");
lacp_dump_lacpdu(du);
}
+ if ((1 << lp->lp_ifp->if_dunit) & lp->lp_lsc->lsc_debug.lsc_rx_test) {
+ LACP_TPRINTF((lp, "Dropping RX PDU\n"));
+ goto bad;
+ }
+
LACP_LOCK(lsc);
lacp_sm_rx(lp, du);
LACP_UNLOCK(lsc);
@@ -350,7 +365,7 @@ lacp_xmit_lacpdu(struct lacp_port *lp)
LACP_LOCK_ASSERT(lp->lp_lsc);
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL) {
return (ENOMEM);
}
@@ -378,7 +393,7 @@ lacp_xmit_lacpdu(struct lacp_port *lp)
sizeof(du->ldu_collector));
du->ldu_collector.lci_maxdelay = 0;
- if (lacp_debug > 0) {
+ if (V_lacp_debug > 0) {
lacp_dprintf(lp, "lacpdu transmit\n");
lacp_dump_lacpdu(du);
}
@@ -404,7 +419,7 @@ lacp_xmit_marker(struct lacp_port *lp)
LACP_LOCK_ASSERT(lp->lp_lsc);
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL) {
return (ENOMEM);
}
@@ -490,12 +505,14 @@ lacp_tick(void *arg)
if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0)
continue;
+ CURVNET_SET(lp->lp_ifp->if_vnet);
lacp_run_timers(lp);
lacp_select(lp);
lacp_sm_mux(lp);
lacp_sm_tx(lp);
lacp_sm_ptx_tx_schedule(lp);
+ CURVNET_RESTORE();
}
callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc);
}
@@ -512,20 +529,17 @@ lacp_port_create(struct lagg_port *lgp)
int error;
boolean_t active = TRUE; /* XXX should be configurable */
- boolean_t fast = FALSE; /* XXX should be configurable */
+ boolean_t fast = FALSE; /* Configurable via ioctl */
- bzero((char *)&sdl, sizeof(sdl));
- sdl.sdl_len = sizeof(sdl);
- sdl.sdl_family = AF_LINK;
- sdl.sdl_index = ifp->if_index;
- sdl.sdl_type = IFT_ETHER;
+ link_init_sdl(ifp, (struct sockaddr *)&sdl, IFT_ETHER);
sdl.sdl_alen = ETHER_ADDR_LEN;
bcopy(&ethermulticastaddr_slowprotocols,
LLADDR(&sdl), ETHER_ADDR_LEN);
error = if_addmulti(ifp, (struct sockaddr *)&sdl, &rifma);
if (error) {
- printf("%s: ADDMULTI failed on %s\n", __func__, lgp->lp_ifname);
+ printf("%s: ADDMULTI failed on %s\n", __func__,
+ lgp->lp_ifp->if_xname);
return (error);
}
@@ -535,7 +549,7 @@ lacp_port_create(struct lagg_port *lgp)
return (ENOMEM);
LACP_LOCK(lsc);
- lgp->lp_psc = (caddr_t)lp;
+ lgp->lp_psc = lp;
lp->lp_ifp = ifp;
lp->lp_lagg = lgp;
lp->lp_lsc = lsc;
@@ -572,17 +586,18 @@ lacp_port_destroy(struct lagg_port *lgp)
lacp_disable_distributing(lp);
lacp_unselect(lp);
+ LIST_REMOVE(lp, lp_next);
+ LACP_UNLOCK(lsc);
+
/* The address may have already been removed by if_purgemaddrs() */
if (!lgp->lp_detaching)
if_delmulti_ifma(lp->lp_ifma);
- LIST_REMOVE(lp, lp_next);
- LACP_UNLOCK(lsc);
free(lp, M_DEVBUF);
}
void
-lacp_req(struct lagg_softc *sc, caddr_t data)
+lacp_req(struct lagg_softc *sc, void *data)
{
struct lacp_opreq *req = (struct lacp_opreq *)data;
struct lacp_softc *lsc = LACP_SOFTC(sc);
@@ -590,7 +605,7 @@ lacp_req(struct lagg_softc *sc, caddr_t data)
bzero(req, sizeof(struct lacp_opreq));
- /*
+ /*
* If the LACP softc is NULL, return with the opreq structure full of
* zeros. It is normal for the softc to be NULL while the lagg is
* being destroyed.
@@ -621,7 +636,7 @@ lacp_req(struct lagg_softc *sc, caddr_t data)
}
void
-lacp_portreq(struct lagg_port *lgp, caddr_t data)
+lacp_portreq(struct lagg_port *lgp, void *data)
{
struct lacp_opreq *req = (struct lacp_opreq *)data;
struct lacp_port *lp = LACP_PORT(lgp);
@@ -665,6 +680,7 @@ lacp_disable_distributing(struct lacp_port *lp)
{
struct lacp_aggregator *la = lp->lp_aggregator;
struct lacp_softc *lsc = lp->lp_lsc;
+ struct lagg_softc *sc = lsc->lsc_softc;
char buf[LACP_LAGIDSTR_MAX+1];
LACP_LOCK_ASSERT(lsc);
@@ -684,6 +700,7 @@ lacp_disable_distributing(struct lacp_port *lp)
TAILQ_REMOVE(&la->la_ports, lp, lp_dist_q);
la->la_nports--;
+ sc->sc_active = la->la_nports;
if (lsc->lsc_active_aggregator == la) {
lacp_suppress_distributing(lsc, la);
@@ -700,6 +717,7 @@ lacp_enable_distributing(struct lacp_port *lp)
{
struct lacp_aggregator *la = lp->lp_aggregator;
struct lacp_softc *lsc = lp->lp_lsc;
+ struct lagg_softc *sc = lsc->lsc_softc;
char buf[LACP_LAGIDSTR_MAX+1];
LACP_LOCK_ASSERT(lsc);
@@ -716,6 +734,7 @@ lacp_enable_distributing(struct lacp_port *lp)
KASSERT(la->la_refcnt > la->la_nports, ("aggregator refcnt invalid"));
TAILQ_INSERT_HEAD(&la->la_ports, lp, lp_dist_q);
la->la_nports++;
+ sc->sc_active = la->la_nports;
lp->lp_state |= LACP_STATE_DISTRIBUTING;
@@ -734,26 +753,26 @@ lacp_transit_expire(void *vp)
LACP_LOCK_ASSERT(lsc);
+ CURVNET_SET(lsc->lsc_softc->sc_ifp->if_vnet);
LACP_TRACE(NULL);
+ CURVNET_RESTORE();
lsc->lsc_suppress_distributing = FALSE;
}
-int
+void
lacp_attach(struct lagg_softc *sc)
{
struct lacp_softc *lsc;
- lsc = malloc(sizeof(struct lacp_softc),
- M_DEVBUF, M_NOWAIT|M_ZERO);
- if (lsc == NULL)
- return (ENOMEM);
+ lsc = malloc(sizeof(struct lacp_softc), M_DEVBUF, M_WAITOK | M_ZERO);
- sc->sc_psc = (caddr_t)lsc;
+ sc->sc_psc = lsc;
lsc->lsc_softc = sc;
- lsc->lsc_hashkey = arc4random();
+ lsc->lsc_hashkey = m_ether_tcpip_hash_init();
lsc->lsc_active_aggregator = NULL;
+ lsc->lsc_strict_mode = VNET(lacp_default_strict_mode);
LACP_LOCK_INIT(lsc);
TAILQ_INIT(&lsc->lsc_aggregators);
LIST_INIT(&lsc->lsc_ports);
@@ -764,27 +783,23 @@ lacp_attach(struct lagg_softc *sc)
/* if the lagg is already up then do the same */
if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING)
lacp_init(sc);
-
- return (0);
}
-int
-lacp_detach(struct lagg_softc *sc)
+void
+lacp_detach(void *psc)
{
- struct lacp_softc *lsc = LACP_SOFTC(sc);
+ struct lacp_softc *lsc = (struct lacp_softc *)psc;
KASSERT(TAILQ_EMPTY(&lsc->lsc_aggregators),
("aggregators still active"));
KASSERT(lsc->lsc_active_aggregator == NULL,
("aggregator still attached"));
- sc->sc_psc = NULL;
callout_drain(&lsc->lsc_transit_callout);
callout_drain(&lsc->lsc_callout);
LACP_LOCK_DESTROY(lsc);
free(lsc, M_DEVBUF);
- return (0);
}
void
@@ -827,10 +842,11 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
return (NULL);
}
- if (sc->use_flowid && (m->m_flags & M_FLOWID))
- hash = m->m_pkthdr.flowid;
+ if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
+ M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
+ hash = m->m_pkthdr.flowid >> sc->flowid_shift;
else
- hash = lagg_hashmbuf(sc, m, lsc->lsc_hashkey);
+ hash = m_ether_tcpip_hash(sc->sc_flags, m, lsc->lsc_hashkey);
hash %= pm->pm_count;
lp = pm->pm_map[hash];
@@ -920,7 +936,6 @@ lacp_aggregator_bandwidth(struct lacp_aggregator *la)
static void
lacp_select_active_aggregator(struct lacp_softc *lsc)
{
- struct lagg_softc *sc = lsc->lsc_softc;
struct lacp_aggregator *la;
struct lacp_aggregator *best_la = NULL;
uint64_t best_speed = 0;
@@ -940,13 +955,13 @@ lacp_select_active_aggregator(struct lacp_softc *lsc)
lacp_format_lagid_aggregator(la, buf, sizeof(buf)),
speed, la->la_nports));
- /* This aggregator is chosen if
- * the partner has a better system priority
- * or, the total aggregated speed is higher
- * or, it is already the chosen aggregator
+ /*
+ * This aggregator is chosen if the partner has a better
+ * system priority or, the total aggregated speed is higher
+ * or, it is already the chosen aggregator
*/
if ((best_la != NULL && LACP_SYS_PRI(la->la_partner) <
- LACP_SYS_PRI(best_la->la_partner)) ||
+ LACP_SYS_PRI(best_la->la_partner)) ||
speed > best_speed ||
(speed == best_speed &&
la == lsc->lsc_active_aggregator)) {
@@ -972,7 +987,6 @@ lacp_select_active_aggregator(struct lacp_softc *lsc)
lacp_format_lagid_aggregator(best_la, buf, sizeof(buf))));
if (lsc->lsc_active_aggregator != best_la) {
- sc->sc_ifp->if_baudrate = best_speed;
lsc->lsc_active_aggregator = best_la;
lacp_update_portmap(lsc);
if (best_la) {
@@ -988,15 +1002,18 @@ lacp_select_active_aggregator(struct lacp_softc *lsc)
static void
lacp_update_portmap(struct lacp_softc *lsc)
{
+ struct lagg_softc *sc = lsc->lsc_softc;
struct lacp_aggregator *la;
struct lacp_portmap *p;
struct lacp_port *lp;
+ uint64_t speed;
u_int newmap;
int i;
newmap = lsc->lsc_activemap == 0 ? 1 : 0;
p = &lsc->lsc_pmap[newmap];
la = lsc->lsc_active_aggregator;
+ speed = 0;
bzero(p, sizeof(struct lacp_portmap));
if (la != NULL && la->la_nports > 0) {
@@ -1005,7 +1022,9 @@ lacp_update_portmap(struct lacp_softc *lsc)
TAILQ_FOREACH(lp, &la->la_ports, lp_dist_q)
p->pm_map[i++] = lp;
KASSERT(i == p->pm_count, ("Invalid port count"));
+ speed = lacp_aggregator_bandwidth(la);
}
+ sc->sc_ifp->if_baudrate = speed;
/* switch the active portmap over */
atomic_store_rel_int(&lsc->lsc_activemap, newmap);
@@ -1054,12 +1073,16 @@ lacp_compose_key(struct lacp_port *lp)
case IFM_100_T4:
case IFM_100_VG:
case IFM_100_T2:
+ case IFM_100_T:
key = IFM_100_TX;
break;
case IFM_1000_SX:
case IFM_1000_LX:
case IFM_1000_CX:
case IFM_1000_T:
+ case IFM_1000_KX:
+ case IFM_1000_SGMII:
+ case IFM_1000_CX_SGMII:
key = IFM_1000_SX;
break;
case IFM_10G_LR:
@@ -1069,15 +1092,53 @@ lacp_compose_key(struct lacp_port *lp)
case IFM_10G_TWINAX_LONG:
case IFM_10G_LRM:
case IFM_10G_T:
+ case IFM_10G_KX4:
+ case IFM_10G_KR:
+ case IFM_10G_CR1:
+ case IFM_10G_ER:
+ case IFM_10G_SFI:
key = IFM_10G_LR;
break;
+ case IFM_20G_KR2:
+ key = IFM_20G_KR2;
+ break;
+ case IFM_2500_KX:
+ case IFM_2500_T:
+ key = IFM_2500_KX;
+ break;
+ case IFM_5000_T:
+ key = IFM_5000_T;
+ break;
+ case IFM_50G_PCIE:
+ case IFM_50G_CR2:
+ case IFM_50G_KR2:
+ key = IFM_50G_PCIE;
+ break;
+ case IFM_56G_R4:
+ key = IFM_56G_R4;
+ break;
+ case IFM_25G_PCIE:
+ case IFM_25G_CR:
+ case IFM_25G_KR:
+ case IFM_25G_SR:
+ key = IFM_25G_PCIE;
+ break;
case IFM_40G_CR4:
case IFM_40G_SR4:
case IFM_40G_LR4:
+ case IFM_40G_XLPPI:
+ case IFM_40G_KR4:
key = IFM_40G_CR4;
break;
+ case IFM_100G_CR4:
+ case IFM_100G_SR4:
+ case IFM_100G_KR4:
+ case IFM_100G_LR4:
+ key = IFM_100G_CR4;
+ break;
default:
key = subtype;
+ break;
}
/* bit 5..14: (some bits of) if_index of lagg device */
key |= 0x7fe0 & ((sc->sc_ifp->if_index) << 5);
@@ -1313,6 +1374,8 @@ lacp_unselect(struct lacp_port *lp)
static void
lacp_sm_mux(struct lacp_port *lp)
{
+ struct lagg_port *lgp = lp->lp_lagg;
+ struct lagg_softc *sc = lgp->lp_softc;
enum lacp_mux_state new_state;
boolean_t p_sync =
(lp->lp_partner.lip_state & LACP_STATE_SYNC) != 0;
@@ -1321,8 +1384,10 @@ lacp_sm_mux(struct lacp_port *lp)
enum lacp_selected selected = lp->lp_selected;
struct lacp_aggregator *la;
- if (lacp_debug > 1)
- lacp_dprintf(lp, "%s: state %d\n", __func__, lp->lp_mux_state);
+ if (V_lacp_debug > 1)
+ lacp_dprintf(lp, "%s: state= 0x%x, selected= 0x%x, "
+ "p_sync= 0x%x, p_collecting= 0x%x\n", __func__,
+ lp->lp_mux_state, selected, p_sync, p_collecting);
re_eval:
la = lp->lp_aggregator;
@@ -1362,6 +1427,8 @@ re_eval:
case LACP_MUX_DISTRIBUTING:
if (selected != LACP_SELECTED || !p_sync || !p_collecting) {
new_state = LACP_MUX_COLLECTING;
+ lacp_dprintf(lp, "Interface stopped DISTRIBUTING, possible flapping\n");
+ sc->sc_flapping++;
}
break;
default:
@@ -1610,6 +1677,10 @@ lacp_sm_rx_record_pdu(struct lacp_port *lp, const struct lacpdu *du)
sizeof(buf))));
}
+ /* XXX Hack, still need to implement 5.4.9 para 2,3,4 */
+ if (lp->lp_lsc->lsc_strict_mode)
+ lp->lp_partner.lip_state |= LACP_STATE_SYNC;
+
lacp_sm_ptx_update_timeout(lp, oldpstate);
}
@@ -1635,7 +1706,10 @@ lacp_sm_rx_record_default(struct lacp_port *lp)
LACP_TRACE(lp);
oldpstate = lp->lp_partner.lip_state;
- lp->lp_partner = lacp_partner_admin;
+ if (lp->lp_lsc->lsc_strict_mode)
+ lp->lp_partner = lacp_partner_admin_strict;
+ else
+ lp->lp_partner = lacp_partner_admin_optimistic;
lp->lp_state |= LACP_STATE_DEFAULTED;
lacp_sm_ptx_update_timeout(lp, oldpstate);
}
@@ -1670,7 +1744,12 @@ lacp_sm_rx_update_default_selected(struct lacp_port *lp)
LACP_TRACE(lp);
- lacp_sm_rx_update_selected_from_peerinfo(lp, &lacp_partner_admin);
+ if (lp->lp_lsc->lsc_strict_mode)
+ lacp_sm_rx_update_selected_from_peerinfo(lp,
+ &lacp_partner_admin_strict);
+ else
+ lacp_sm_rx_update_selected_from_peerinfo(lp,
+ &lacp_partner_admin_optimistic);
}
/* transmit machine */
@@ -1678,7 +1757,7 @@ lacp_sm_rx_update_default_selected(struct lacp_port *lp)
static void
lacp_sm_tx(struct lacp_port *lp)
{
- int error;
+ int error = 0;
if (!(lp->lp_state & LACP_STATE_AGGREGATION)
#if 1
@@ -1700,7 +1779,11 @@ lacp_sm_tx(struct lacp_port *lp)
return;
}
- error = lacp_xmit_lacpdu(lp);
+ if (((1 << lp->lp_ifp->if_dunit) & lp->lp_lsc->lsc_debug.lsc_tx_test) == 0) {
+ error = lacp_xmit_lacpdu(lp);
+ } else {
+ LACP_TPRINTF((lp, "Dropping TX PDU\n"));
+ }
if (error == 0) {
lp->lp_flags &= ~LACP_PORT_NTT;