summaryrefslogtreecommitdiffstats
path: root/freebsd/sys/net/if_lagg.c
diff options
context:
space:
mode:
Diffstat (limited to 'freebsd/sys/net/if_lagg.c')
-rw-r--r--freebsd/sys/net/if_lagg.c242
1 files changed, 191 insertions, 51 deletions
diff --git a/freebsd/sys/net/if_lagg.c b/freebsd/sys/net/if_lagg.c
index 85099115..b82313eb 100644
--- a/freebsd/sys/net/if_lagg.c
+++ b/freebsd/sys/net/if_lagg.c
@@ -25,6 +25,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
+#include <rtems/bsd/local/opt_kern_tls.h>
#include <rtems/bsd/local/opt_ratelimit.h>
#include <sys/param.h>
@@ -97,6 +98,11 @@ static struct {
{0, NULL}
};
+struct lagg_snd_tag {
+ struct m_snd_tag com;
+ struct m_snd_tag *tag;
+};
+
VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */
#define V_lagg_list VNET(lagg_list)
VNET_DEFINE_STATIC(struct mtx, lagg_list_mtx);
@@ -113,6 +119,7 @@ static void lagg_clone_destroy(struct ifnet *);
VNET_DEFINE_STATIC(struct if_clone *, lagg_cloner);
#define V_lagg_cloner VNET(lagg_cloner)
static const char laggname[] = "lagg";
+static MALLOC_DEFINE(M_LAGG, laggname, "802.3AD Link Aggregation Interface");
static void lagg_capabilities(struct lagg_softc *);
static int lagg_port_create(struct lagg_softc *, struct ifnet *);
@@ -131,10 +138,17 @@ static void lagg_port2req(struct lagg_port *, struct lagg_reqport *);
static void lagg_init(void *);
static void lagg_stop(struct lagg_softc *);
static int lagg_ioctl(struct ifnet *, u_long, caddr_t);
-#ifdef RATELIMIT
+#if defined(KERN_TLS) || defined(RATELIMIT)
static int lagg_snd_tag_alloc(struct ifnet *,
union if_snd_tag_alloc_params *,
struct m_snd_tag **);
+static int lagg_snd_tag_modify(struct m_snd_tag *,
+ union if_snd_tag_modify_params *);
+static int lagg_snd_tag_query(struct m_snd_tag *,
+ union if_snd_tag_query_params *);
+static void lagg_snd_tag_free(struct m_snd_tag *);
+static void lagg_ratelimit_query(struct ifnet *,
+ struct if_ratelimit_query_results *);
#endif
static int lagg_setmulti(struct lagg_port *);
static int lagg_clrmulti(struct lagg_port *);
@@ -264,6 +278,13 @@ SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RWTUN,
&VNET_NAME(def_use_flowid), 0,
"Default setting for using flow id for load sharing");
+/* Default value for using numa */
+VNET_DEFINE_STATIC(int, def_use_numa) = 1;
+#define V_def_use_numa VNET(def_use_numa)
+SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_numa, CTLFLAG_RWTUN,
+ &VNET_NAME(def_use_numa), 0,
+ "Use numa to steer flows");
+
/* Default value for flowid shift */
VNET_DEFINE_STATIC(int, def_flowid_shift) = 16;
#define V_def_flowid_shift VNET(def_flowid_shift)
@@ -480,10 +501,10 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
struct ifnet *ifp;
static const u_char eaddr[6]; /* 00:00:00:00:00:00 */
- sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
+ sc = malloc(sizeof(*sc), M_LAGG, M_WAITOK|M_ZERO);
ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
if (ifp == NULL) {
- free(sc, M_DEVBUF);
+ free(sc, M_LAGG);
return (ENOSPC);
}
LAGG_SX_INIT(sc);
@@ -491,6 +512,8 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
LAGG_XLOCK(sc);
if (V_def_use_flowid)
sc->sc_opts |= LAGG_OPT_USE_FLOWID;
+ if (V_def_use_numa)
+ sc->sc_opts |= LAGG_OPT_USE_NUMA;
sc->flowid_shift = V_def_flowid_shift;
/* Hash all layers by default */
@@ -514,12 +537,14 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
ifp->if_ioctl = lagg_ioctl;
ifp->if_get_counter = lagg_get_counter;
ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
-#ifdef RATELIMIT
+#if defined(KERN_TLS) || defined(RATELIMIT)
ifp->if_snd_tag_alloc = lagg_snd_tag_alloc;
- ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS | IFCAP_TXRTLMT;
-#else
- ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
+ ifp->if_snd_tag_modify = lagg_snd_tag_modify;
+ ifp->if_snd_tag_query = lagg_snd_tag_query;
+ ifp->if_snd_tag_free = lagg_snd_tag_free;
+ ifp->if_ratelimit_query = lagg_ratelimit_query;
#endif
+ ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
/*
* Attach as an ordinary ethernet device, children will be attached
@@ -572,7 +597,7 @@ lagg_clone_destroy(struct ifnet *ifp)
LAGG_LIST_UNLOCK();
LAGG_SX_DESTROY(sc);
- free(sc, M_DEVBUF);
+ free(sc, M_LAGG);
}
static void
@@ -686,7 +711,7 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
ifr.ifr_mtu = oldmtu;
}
- lp = malloc(sizeof(struct lagg_port), M_DEVBUF, M_WAITOK|M_ZERO);
+ lp = malloc(sizeof(struct lagg_port), M_LAGG, M_WAITOK|M_ZERO);
lp->lp_softc = sc;
/* Check if port is a stacked lagg */
@@ -694,7 +719,7 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
SLIST_FOREACH(sc_ptr, &V_lagg_list, sc_entries) {
if (ifp == sc_ptr->sc_ifp) {
LAGG_LIST_UNLOCK();
- free(lp, M_DEVBUF);
+ free(lp, M_LAGG);
if (oldmtu != -1)
(*ifp->if_ioctl)(ifp, SIOCSIFMTU,
(caddr_t)&ifr);
@@ -705,7 +730,7 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
if (lagg_port_checkstacking(sc_ptr) >=
LAGG_MAX_STACKING) {
LAGG_LIST_UNLOCK();
- free(lp, M_DEVBUF);
+ free(lp, M_LAGG);
if (oldmtu != -1)
(*ifp->if_ioctl)(ifp, SIOCSIFMTU,
(caddr_t)&ifr);
@@ -753,7 +778,6 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
* is predictable and `ifconfig laggN create ...` command
* will lead to the same result each time.
*/
- LAGG_RLOCK();
CK_SLIST_FOREACH(tlp, &sc->sc_ports, lp_entries) {
if (tlp->lp_ifp->if_index < ifp->if_index && (
CK_SLIST_NEXT(tlp, lp_entries) == NULL ||
@@ -761,7 +785,6 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp)
ifp->if_index))
break;
}
- LAGG_RUNLOCK();
if (tlp != NULL)
CK_SLIST_INSERT_AFTER(tlp, lp, lp_entries);
else
@@ -816,7 +839,7 @@ lagg_port_destroy_cb(epoch_context_t ec)
ifp = lp->lp_ifp;
if_rele(ifp);
- free(lp, M_DEVBUF);
+ free(lp, M_LAGG);
}
static int
@@ -1250,6 +1273,8 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
switch (ro->ro_opts) {
case LAGG_OPT_USE_FLOWID:
case -LAGG_OPT_USE_FLOWID:
+ case LAGG_OPT_USE_NUMA:
+ case -LAGG_OPT_USE_NUMA:
case LAGG_OPT_FLOWIDSHIFT:
valid = 1;
lacp = 0;
@@ -1528,49 +1553,142 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
return (error);
}
-#ifdef RATELIMIT
-static int
-lagg_snd_tag_alloc(struct ifnet *ifp,
- union if_snd_tag_alloc_params *params,
- struct m_snd_tag **ppmt)
+#if defined(KERN_TLS) || defined(RATELIMIT)
+static inline struct lagg_snd_tag *
+mst_to_lst(struct m_snd_tag *mst)
{
- struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
+
+ return (__containerof(mst, struct lagg_snd_tag, com));
+}
+
+/*
+ * Look up the port used by a specific flow. This only works for lagg
+ * protocols with deterministic port mappings (e.g. not roundrobin).
+ * In addition protocols which use a hash to map flows to ports must
+ * be configured to use the mbuf flowid rather than hashing packet
+ * contents.
+ */
+static struct lagg_port *
+lookup_snd_tag_port(struct ifnet *ifp, uint32_t flowid, uint32_t flowtype)
+{
+ struct lagg_softc *sc;
struct lagg_port *lp;
struct lagg_lb *lb;
uint32_t p;
+ sc = ifp->if_softc;
+
switch (sc->sc_proto) {
case LAGG_PROTO_FAILOVER:
- lp = lagg_link_active(sc, sc->sc_primary);
- break;
+ return (lagg_link_active(sc, sc->sc_primary));
case LAGG_PROTO_LOADBALANCE:
if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
- params->hdr.flowtype == M_HASHTYPE_NONE)
- return (EOPNOTSUPP);
- p = params->hdr.flowid >> sc->flowid_shift;
+ flowtype == M_HASHTYPE_NONE)
+ return (NULL);
+ p = flowid >> sc->flowid_shift;
p %= sc->sc_count;
lb = (struct lagg_lb *)sc->sc_psc;
lp = lb->lb_ports[p];
- lp = lagg_link_active(sc, lp);
- break;
+ return (lagg_link_active(sc, lp));
case LAGG_PROTO_LACP:
if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
- params->hdr.flowtype == M_HASHTYPE_NONE)
- return (EOPNOTSUPP);
- lp = lacp_select_tx_port_by_hash(sc, params->hdr.flowid);
- break;
+ flowtype == M_HASHTYPE_NONE)
+ return (NULL);
+ return (lacp_select_tx_port_by_hash(sc, flowid));
default:
- return (EOPNOTSUPP);
+ return (NULL);
}
- if (lp == NULL)
+}
+
+static int
+lagg_snd_tag_alloc(struct ifnet *ifp,
+ union if_snd_tag_alloc_params *params,
+ struct m_snd_tag **ppmt)
+{
+ struct lagg_snd_tag *lst;
+ struct lagg_softc *sc;
+ struct lagg_port *lp;
+ struct ifnet *lp_ifp;
+ int error;
+
+ sc = ifp->if_softc;
+
+ LAGG_RLOCK();
+ lp = lookup_snd_tag_port(ifp, params->hdr.flowid, params->hdr.flowtype);
+ if (lp == NULL) {
+ LAGG_RUNLOCK();
return (EOPNOTSUPP);
- ifp = lp->lp_ifp;
- if (ifp == NULL || ifp->if_snd_tag_alloc == NULL ||
- (ifp->if_capenable & IFCAP_TXRTLMT) == 0)
+ }
+ if (lp->lp_ifp == NULL || lp->lp_ifp->if_snd_tag_alloc == NULL) {
+ LAGG_RUNLOCK();
return (EOPNOTSUPP);
+ }
+ lp_ifp = lp->lp_ifp;
+ if_ref(lp_ifp);
+ LAGG_RUNLOCK();
+
+ lst = malloc(sizeof(*lst), M_LAGG, M_NOWAIT);
+ if (lst == NULL) {
+ if_rele(lp_ifp);
+ return (ENOMEM);
+ }
+
+ error = lp_ifp->if_snd_tag_alloc(lp_ifp, params, &lst->tag);
+ if_rele(lp_ifp);
+ if (error) {
+ free(lst, M_LAGG);
+ return (error);
+ }
+
+ m_snd_tag_init(&lst->com, ifp);
- /* forward allocation request */
- return (ifp->if_snd_tag_alloc(ifp, params, ppmt));
+ *ppmt = &lst->com;
+ return (0);
+}
+
+static int
+lagg_snd_tag_modify(struct m_snd_tag *mst,
+ union if_snd_tag_modify_params *params)
+{
+ struct lagg_snd_tag *lst;
+
+ lst = mst_to_lst(mst);
+ return (lst->tag->ifp->if_snd_tag_modify(lst->tag, params));
+}
+
+static int
+lagg_snd_tag_query(struct m_snd_tag *mst,
+ union if_snd_tag_query_params *params)
+{
+ struct lagg_snd_tag *lst;
+
+ lst = mst_to_lst(mst);
+ return (lst->tag->ifp->if_snd_tag_query(lst->tag, params));
+}
+
+static void
+lagg_snd_tag_free(struct m_snd_tag *mst)
+{
+ struct lagg_snd_tag *lst;
+
+ lst = mst_to_lst(mst);
+ m_snd_tag_rele(lst->tag);
+ free(lst, M_LAGG);
+}
+
+static void
+lagg_ratelimit_query(struct ifnet *ifp __unused, struct if_ratelimit_query_results *q)
+{
+ /*
+ * For lagg, we have an indirect
+ * interface. The caller needs to
+ * get a ratelimit tag on the actual
+ * interface the flow will go on.
+ */
+ q->rate_table = NULL;
+ q->flags = RT_IS_INDIRECT;
+ q->max_flows = 0;
+ q->number_of_rates = 0;
}
#endif
@@ -1588,7 +1706,7 @@ lagg_setmulti(struct lagg_port *lp)
CK_STAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) {
if (ifma->ifma_addr->sa_family != AF_LINK)
continue;
- mc = malloc(sizeof(struct lagg_mc), M_DEVBUF, M_NOWAIT);
+ mc = malloc(sizeof(struct lagg_mc), M_LAGG, M_NOWAIT);
if (mc == NULL) {
IF_ADDR_WUNLOCK(scifp);
return (ENOMEM);
@@ -1619,7 +1737,7 @@ lagg_clrmulti(struct lagg_port *lp)
SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries);
if (mc->mc_ifma && lp->lp_detaching == 0)
if_delmulti_ifma(mc->mc_ifma);
- free(mc, M_DEVBUF);
+ free(mc, M_LAGG);
}
return (0);
}
@@ -1696,6 +1814,10 @@ lagg_transmit(struct ifnet *ifp, struct mbuf *m)
struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
int error;
+#if defined(KERN_TLS) || defined(RATELIMIT)
+ if (m->m_pkthdr.csum_flags & CSUM_SND_TAG)
+ MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
+#endif
LAGG_RLOCK();
/* We need a Tx algorithm and at least one port */
if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
@@ -1848,12 +1970,20 @@ struct lagg_port *
lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
{
struct lagg_port *lp_next, *rval = NULL;
- struct epoch_tracker net_et;
/*
* Search a port which reports an active link state.
*/
+#ifdef INVARIANTS
+ /*
+ * This is called with either LAGG_RLOCK() held or
+ * LAGG_XLOCK(sc) held.
+ */
+ if (!in_epoch(net_epoch_preempt))
+ LAGG_XLOCK_ASSERT(sc);
+#endif
+
if (lp == NULL)
goto search;
if (LAGG_PORTACTIVE(lp)) {
@@ -1866,15 +1996,12 @@ lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
goto found;
}
- search:
- epoch_enter_preempt(net_epoch_preempt, &net_et);
+search:
CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
if (LAGG_PORTACTIVE(lp_next)) {
- epoch_exit_preempt(net_epoch_preempt, &net_et);
return (lp_next);
}
}
- epoch_exit_preempt(net_epoch_preempt, &net_et);
found:
return (rval);
}
@@ -1883,6 +2010,21 @@ int
lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
{
+#if defined(KERN_TLS) || defined(RATELIMIT)
+ if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) {
+ struct lagg_snd_tag *lst;
+ struct m_snd_tag *mst;
+
+ mst = m->m_pkthdr.snd_tag;
+ lst = mst_to_lst(mst);
+ if (lst->tag->ifp != ifp) {
+ m_freem(m);
+ return (EAGAIN);
+ }
+ m->m_pkthdr.snd_tag = m_snd_tag_ref(lst->tag);
+ m_snd_tag_rele(mst);
+ }
+#endif
return (ifp->if_transmit)(ifp, m);
}
@@ -1956,7 +2098,7 @@ lagg_bcast_start(struct lagg_softc *sc, struct mbuf *m)
struct lagg_port *lp, *last = NULL;
struct mbuf *m0;
- LAGG_RLOCK();
+ LAGG_RLOCK_ASSERT();
CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
if (!LAGG_PORTACTIVE(lp))
continue;
@@ -1977,7 +2119,6 @@ lagg_bcast_start(struct lagg_softc *sc, struct mbuf *m)
}
last = lp;
}
- LAGG_RUNLOCK();
if (last == NULL) {
m_freem(m);
@@ -2063,7 +2204,7 @@ lagg_lb_attach(struct lagg_softc *sc)
struct lagg_lb *lb;
LAGG_XLOCK_ASSERT(sc);
- lb = malloc(sizeof(struct lagg_lb), M_DEVBUF, M_WAITOK | M_ZERO);
+ lb = malloc(sizeof(struct lagg_lb), M_LAGG, M_WAITOK | M_ZERO);
lb->lb_key = m_ether_tcpip_hash_init();
sc->sc_psc = lb;
@@ -2078,7 +2219,7 @@ lagg_lb_detach(struct lagg_softc *sc)
lb = (struct lagg_lb *)sc->sc_psc;
if (lb != NULL)
- free(lb, M_DEVBUF);
+ free(lb, M_LAGG);
}
static int
@@ -2090,7 +2231,7 @@ lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp)
rv = 0;
bzero(&lb->lb_ports, sizeof(lb->lb_ports));
- LAGG_RLOCK();
+ LAGG_XLOCK_ASSERT(sc);
CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
if (lp_next == lp)
continue;
@@ -2103,7 +2244,6 @@ lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp)
sc->sc_ifname, lp_next->lp_ifp->if_xname, i);
lb->lb_ports[i++] = lp_next;
}
- LAGG_RUNLOCK();
return (rv);
}