diff options
Diffstat (limited to 'freebsd/sys/net/if_lagg.c')
-rw-r--r-- | freebsd/sys/net/if_lagg.c | 242 |
1 files changed, 191 insertions, 51 deletions
diff --git a/freebsd/sys/net/if_lagg.c b/freebsd/sys/net/if_lagg.c index 85099115..b82313eb 100644 --- a/freebsd/sys/net/if_lagg.c +++ b/freebsd/sys/net/if_lagg.c @@ -25,6 +25,7 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/local/opt_inet.h> #include <rtems/bsd/local/opt_inet6.h> +#include <rtems/bsd/local/opt_kern_tls.h> #include <rtems/bsd/local/opt_ratelimit.h> #include <sys/param.h> @@ -97,6 +98,11 @@ static struct { {0, NULL} }; +struct lagg_snd_tag { + struct m_snd_tag com; + struct m_snd_tag *tag; +}; + VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */ #define V_lagg_list VNET(lagg_list) VNET_DEFINE_STATIC(struct mtx, lagg_list_mtx); @@ -113,6 +119,7 @@ static void lagg_clone_destroy(struct ifnet *); VNET_DEFINE_STATIC(struct if_clone *, lagg_cloner); #define V_lagg_cloner VNET(lagg_cloner) static const char laggname[] = "lagg"; +static MALLOC_DEFINE(M_LAGG, laggname, "802.3AD Link Aggregation Interface"); static void lagg_capabilities(struct lagg_softc *); static int lagg_port_create(struct lagg_softc *, struct ifnet *); @@ -131,10 +138,17 @@ static void lagg_port2req(struct lagg_port *, struct lagg_reqport *); static void lagg_init(void *); static void lagg_stop(struct lagg_softc *); static int lagg_ioctl(struct ifnet *, u_long, caddr_t); -#ifdef RATELIMIT +#if defined(KERN_TLS) || defined(RATELIMIT) static int lagg_snd_tag_alloc(struct ifnet *, union if_snd_tag_alloc_params *, struct m_snd_tag **); +static int lagg_snd_tag_modify(struct m_snd_tag *, + union if_snd_tag_modify_params *); +static int lagg_snd_tag_query(struct m_snd_tag *, + union if_snd_tag_query_params *); +static void lagg_snd_tag_free(struct m_snd_tag *); +static void lagg_ratelimit_query(struct ifnet *, + struct if_ratelimit_query_results *); #endif static int lagg_setmulti(struct lagg_port *); static int lagg_clrmulti(struct lagg_port *); @@ -264,6 +278,13 @@ SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RWTUN, &VNET_NAME(def_use_flowid), 0, "Default setting for using flow id for load sharing"); +/* Default value for using numa */ +VNET_DEFINE_STATIC(int, def_use_numa) = 1; +#define V_def_use_numa VNET(def_use_numa) +SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_numa, CTLFLAG_RWTUN, + &VNET_NAME(def_use_numa), 0, + "Use numa to steer flows"); + /* Default value for flowid shift */ VNET_DEFINE_STATIC(int, def_flowid_shift) = 16; #define V_def_flowid_shift VNET(def_flowid_shift) @@ -480,10 +501,10 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) struct ifnet *ifp; static const u_char eaddr[6]; /* 00:00:00:00:00:00 */ - sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); + sc = malloc(sizeof(*sc), M_LAGG, M_WAITOK|M_ZERO); ifp = sc->sc_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { - free(sc, M_DEVBUF); + free(sc, M_LAGG); return (ENOSPC); } LAGG_SX_INIT(sc); @@ -491,6 +512,8 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) LAGG_XLOCK(sc); if (V_def_use_flowid) sc->sc_opts |= LAGG_OPT_USE_FLOWID; + if (V_def_use_numa) + sc->sc_opts |= LAGG_OPT_USE_NUMA; sc->flowid_shift = V_def_flowid_shift; /* Hash all layers by default */ @@ -514,12 +537,14 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params) ifp->if_ioctl = lagg_ioctl; ifp->if_get_counter = lagg_get_counter; ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST; -#ifdef RATELIMIT +#if defined(KERN_TLS) || defined(RATELIMIT) ifp->if_snd_tag_alloc = lagg_snd_tag_alloc; - ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS | IFCAP_TXRTLMT; -#else - ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS; + ifp->if_snd_tag_modify = lagg_snd_tag_modify; + ifp->if_snd_tag_query = lagg_snd_tag_query; + ifp->if_snd_tag_free = lagg_snd_tag_free; + ifp->if_ratelimit_query = lagg_ratelimit_query; #endif + ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS; /* * Attach as an ordinary ethernet device, children will be attached @@ -572,7 +597,7 @@ lagg_clone_destroy(struct ifnet *ifp) LAGG_LIST_UNLOCK(); LAGG_SX_DESTROY(sc); - free(sc, M_DEVBUF); + free(sc, M_LAGG); } static void @@ -686,7 +711,7 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) ifr.ifr_mtu = oldmtu; } - lp = malloc(sizeof(struct lagg_port), M_DEVBUF, M_WAITOK|M_ZERO); + lp = malloc(sizeof(struct lagg_port), M_LAGG, M_WAITOK|M_ZERO); lp->lp_softc = sc; /* Check if port is a stacked lagg */ @@ -694,7 +719,7 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) SLIST_FOREACH(sc_ptr, &V_lagg_list, sc_entries) { if (ifp == sc_ptr->sc_ifp) { LAGG_LIST_UNLOCK(); - free(lp, M_DEVBUF); + free(lp, M_LAGG); if (oldmtu != -1) (*ifp->if_ioctl)(ifp, SIOCSIFMTU, (caddr_t)&ifr); @@ -705,7 +730,7 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) if (lagg_port_checkstacking(sc_ptr) >= LAGG_MAX_STACKING) { LAGG_LIST_UNLOCK(); - free(lp, M_DEVBUF); + free(lp, M_LAGG); if (oldmtu != -1) (*ifp->if_ioctl)(ifp, SIOCSIFMTU, (caddr_t)&ifr); @@ -753,7 +778,6 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) * is predictable and `ifconfig laggN create ...` command * will lead to the same result each time. */ - LAGG_RLOCK(); CK_SLIST_FOREACH(tlp, &sc->sc_ports, lp_entries) { if (tlp->lp_ifp->if_index < ifp->if_index && ( CK_SLIST_NEXT(tlp, lp_entries) == NULL || @@ -761,7 +785,6 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) ifp->if_index)) break; } - LAGG_RUNLOCK(); if (tlp != NULL) CK_SLIST_INSERT_AFTER(tlp, lp, lp_entries); else @@ -816,7 +839,7 @@ lagg_port_destroy_cb(epoch_context_t ec) ifp = lp->lp_ifp; if_rele(ifp); - free(lp, M_DEVBUF); + free(lp, M_LAGG); } static int @@ -1250,6 +1273,8 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) switch (ro->ro_opts) { case LAGG_OPT_USE_FLOWID: case -LAGG_OPT_USE_FLOWID: + case LAGG_OPT_USE_NUMA: + case -LAGG_OPT_USE_NUMA: case LAGG_OPT_FLOWIDSHIFT: valid = 1; lacp = 0; @@ -1528,49 +1553,142 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) return (error); } -#ifdef RATELIMIT -static int -lagg_snd_tag_alloc(struct ifnet *ifp, - union if_snd_tag_alloc_params *params, - struct m_snd_tag **ppmt) +#if defined(KERN_TLS) || defined(RATELIMIT) +static inline struct lagg_snd_tag * +mst_to_lst(struct m_snd_tag *mst) { - struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; + + return (__containerof(mst, struct lagg_snd_tag, com)); +} + +/* + * Look up the port used by a specific flow. This only works for lagg + * protocols with deterministic port mappings (e.g. not roundrobin). + * In addition protocols which use a hash to map flows to ports must + * be configured to use the mbuf flowid rather than hashing packet + * contents. + */ +static struct lagg_port * +lookup_snd_tag_port(struct ifnet *ifp, uint32_t flowid, uint32_t flowtype) +{ + struct lagg_softc *sc; struct lagg_port *lp; struct lagg_lb *lb; uint32_t p; + sc = ifp->if_softc; + switch (sc->sc_proto) { case LAGG_PROTO_FAILOVER: - lp = lagg_link_active(sc, sc->sc_primary); - break; + return (lagg_link_active(sc, sc->sc_primary)); case LAGG_PROTO_LOADBALANCE: if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 || - params->hdr.flowtype == M_HASHTYPE_NONE) - return (EOPNOTSUPP); - p = params->hdr.flowid >> sc->flowid_shift; + flowtype == M_HASHTYPE_NONE) + return (NULL); + p = flowid >> sc->flowid_shift; p %= sc->sc_count; lb = (struct lagg_lb *)sc->sc_psc; lp = lb->lb_ports[p]; - lp = lagg_link_active(sc, lp); - break; + return (lagg_link_active(sc, lp)); case LAGG_PROTO_LACP: if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 || - params->hdr.flowtype == M_HASHTYPE_NONE) - return (EOPNOTSUPP); - lp = lacp_select_tx_port_by_hash(sc, params->hdr.flowid); - break; + flowtype == M_HASHTYPE_NONE) + return (NULL); + return (lacp_select_tx_port_by_hash(sc, flowid)); default: - return (EOPNOTSUPP); + return (NULL); } - if (lp == NULL) +} + +static int +lagg_snd_tag_alloc(struct ifnet *ifp, + union if_snd_tag_alloc_params *params, + struct m_snd_tag **ppmt) +{ + struct lagg_snd_tag *lst; + struct lagg_softc *sc; + struct lagg_port *lp; + struct ifnet *lp_ifp; + int error; + + sc = ifp->if_softc; + + LAGG_RLOCK(); + lp = lookup_snd_tag_port(ifp, params->hdr.flowid, params->hdr.flowtype); + if (lp == NULL) { + LAGG_RUNLOCK(); return (EOPNOTSUPP); - ifp = lp->lp_ifp; - if (ifp == NULL || ifp->if_snd_tag_alloc == NULL || - (ifp->if_capenable & IFCAP_TXRTLMT) == 0) + } + if (lp->lp_ifp == NULL || lp->lp_ifp->if_snd_tag_alloc == NULL) { + LAGG_RUNLOCK(); return (EOPNOTSUPP); + } + lp_ifp = lp->lp_ifp; + if_ref(lp_ifp); + LAGG_RUNLOCK(); + + lst = malloc(sizeof(*lst), M_LAGG, M_NOWAIT); + if (lst == NULL) { + if_rele(lp_ifp); + return (ENOMEM); + } + + error = lp_ifp->if_snd_tag_alloc(lp_ifp, params, &lst->tag); + if_rele(lp_ifp); + if (error) { + free(lst, M_LAGG); + return (error); + } + + m_snd_tag_init(&lst->com, ifp); - /* forward allocation request */ - return (ifp->if_snd_tag_alloc(ifp, params, ppmt)); + *ppmt = &lst->com; + return (0); +} + +static int +lagg_snd_tag_modify(struct m_snd_tag *mst, + union if_snd_tag_modify_params *params) +{ + struct lagg_snd_tag *lst; + + lst = mst_to_lst(mst); + return (lst->tag->ifp->if_snd_tag_modify(lst->tag, params)); +} + +static int +lagg_snd_tag_query(struct m_snd_tag *mst, + union if_snd_tag_query_params *params) +{ + struct lagg_snd_tag *lst; + + lst = mst_to_lst(mst); + return (lst->tag->ifp->if_snd_tag_query(lst->tag, params)); +} + +static void +lagg_snd_tag_free(struct m_snd_tag *mst) +{ + struct lagg_snd_tag *lst; + + lst = mst_to_lst(mst); + m_snd_tag_rele(lst->tag); + free(lst, M_LAGG); +} + +static void +lagg_ratelimit_query(struct ifnet *ifp __unused, struct if_ratelimit_query_results *q) +{ + /* + * For lagg, we have an indirect + * interface. The caller needs to + * get a ratelimit tag on the actual + * interface the flow will go on. + */ + q->rate_table = NULL; + q->flags = RT_IS_INDIRECT; + q->max_flows = 0; + q->number_of_rates = 0; } #endif @@ -1588,7 +1706,7 @@ lagg_setmulti(struct lagg_port *lp) CK_STAILQ_FOREACH(ifma, &scifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; - mc = malloc(sizeof(struct lagg_mc), M_DEVBUF, M_NOWAIT); + mc = malloc(sizeof(struct lagg_mc), M_LAGG, M_NOWAIT); if (mc == NULL) { IF_ADDR_WUNLOCK(scifp); return (ENOMEM); @@ -1619,7 +1737,7 @@ lagg_clrmulti(struct lagg_port *lp) SLIST_REMOVE(&lp->lp_mc_head, mc, lagg_mc, mc_entries); if (mc->mc_ifma && lp->lp_detaching == 0) if_delmulti_ifma(mc->mc_ifma); - free(mc, M_DEVBUF); + free(mc, M_LAGG); } return (0); } @@ -1696,6 +1814,10 @@ lagg_transmit(struct ifnet *ifp, struct mbuf *m) struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; int error; +#if defined(KERN_TLS) || defined(RATELIMIT) + if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) + MPASS(m->m_pkthdr.snd_tag->ifp == ifp); +#endif LAGG_RLOCK(); /* We need a Tx algorithm and at least one port */ if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) { @@ -1848,12 +1970,20 @@ struct lagg_port * lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp) { struct lagg_port *lp_next, *rval = NULL; - struct epoch_tracker net_et; /* * Search a port which reports an active link state. */ +#ifdef INVARIANTS + /* + * This is called with either LAGG_RLOCK() held or + * LAGG_XLOCK(sc) held. + */ + if (!in_epoch(net_epoch_preempt)) + LAGG_XLOCK_ASSERT(sc); +#endif + if (lp == NULL) goto search; if (LAGG_PORTACTIVE(lp)) { @@ -1866,15 +1996,12 @@ lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp) goto found; } - search: - epoch_enter_preempt(net_epoch_preempt, &net_et); +search: CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) { if (LAGG_PORTACTIVE(lp_next)) { - epoch_exit_preempt(net_epoch_preempt, &net_et); return (lp_next); } } - epoch_exit_preempt(net_epoch_preempt, &net_et); found: return (rval); } @@ -1883,6 +2010,21 @@ int lagg_enqueue(struct ifnet *ifp, struct mbuf *m) { +#if defined(KERN_TLS) || defined(RATELIMIT) + if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) { + struct lagg_snd_tag *lst; + struct m_snd_tag *mst; + + mst = m->m_pkthdr.snd_tag; + lst = mst_to_lst(mst); + if (lst->tag->ifp != ifp) { + m_freem(m); + return (EAGAIN); + } + m->m_pkthdr.snd_tag = m_snd_tag_ref(lst->tag); + m_snd_tag_rele(mst); + } +#endif return (ifp->if_transmit)(ifp, m); } @@ -1956,7 +2098,7 @@ lagg_bcast_start(struct lagg_softc *sc, struct mbuf *m) struct lagg_port *lp, *last = NULL; struct mbuf *m0; - LAGG_RLOCK(); + LAGG_RLOCK_ASSERT(); CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { if (!LAGG_PORTACTIVE(lp)) continue; @@ -1977,7 +2119,6 @@ lagg_bcast_start(struct lagg_softc *sc, struct mbuf *m) } last = lp; } - LAGG_RUNLOCK(); if (last == NULL) { m_freem(m); @@ -2063,7 +2204,7 @@ lagg_lb_attach(struct lagg_softc *sc) struct lagg_lb *lb; LAGG_XLOCK_ASSERT(sc); - lb = malloc(sizeof(struct lagg_lb), M_DEVBUF, M_WAITOK | M_ZERO); + lb = malloc(sizeof(struct lagg_lb), M_LAGG, M_WAITOK | M_ZERO); lb->lb_key = m_ether_tcpip_hash_init(); sc->sc_psc = lb; @@ -2078,7 +2219,7 @@ lagg_lb_detach(struct lagg_softc *sc) lb = (struct lagg_lb *)sc->sc_psc; if (lb != NULL) - free(lb, M_DEVBUF); + free(lb, M_LAGG); } static int @@ -2090,7 +2231,7 @@ lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp) rv = 0; bzero(&lb->lb_ports, sizeof(lb->lb_ports)); - LAGG_RLOCK(); + LAGG_XLOCK_ASSERT(sc); CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) { if (lp_next == lp) continue; @@ -2103,7 +2244,6 @@ lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp) sc->sc_ifname, lp_next->lp_ifp->if_xname, i); lb->lb_ports[i++] = lp_next; } - LAGG_RUNLOCK(); return (rv); } |