diff options
Diffstat (limited to 'freebsd/sys/netinet/in_pcb.c')
-rw-r--r-- | freebsd/sys/netinet/in_pcb.c | 204 |
1 files changed, 134 insertions, 70 deletions
diff --git a/freebsd/sys/netinet/in_pcb.c b/freebsd/sys/netinet/in_pcb.c index c00593e5..6147c566 100644 --- a/freebsd/sys/netinet/in_pcb.c +++ b/freebsd/sys/netinet/in_pcb.c @@ -92,6 +92,9 @@ __FBSDID("$FreeBSD$"); #if defined(INET) || defined(INET6) #include <netinet/in.h> #include <netinet/in_pcb.h> +#ifdef INET +#include <netinet/in_var.h> +#endif #include <netinet/ip_var.h> #include <netinet/tcp_var.h> #ifdef TCPHPTS @@ -99,16 +102,13 @@ __FBSDID("$FreeBSD$"); #endif #include <netinet/udp.h> #include <netinet/udp_var.h> -#endif -#ifdef INET -#include <netinet/in_var.h> -#endif #ifdef INET6 #include <netinet/ip6.h> #include <netinet6/in6_pcb.h> #include <netinet6/in6_var.h> #include <netinet6/ip6_var.h> #endif /* INET6 */ +#endif #include <netipsec/ipsec_support.h> @@ -216,6 +216,22 @@ SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomtime, &VNET_NAME(ipport_randomtime), 0, "Minimum time to keep sequental port " "allocation before switching to a random one"); + +#ifdef RATELIMIT +counter_u64_t rate_limit_active; +counter_u64_t rate_limit_alloc_fail; +counter_u64_t rate_limit_set_ok; + +static SYSCTL_NODE(_net_inet_ip, OID_AUTO, rl, CTLFLAG_RD, 0, + "IP Rate Limiting"); +SYSCTL_COUNTER_U64(_net_inet_ip_rl, OID_AUTO, active, CTLFLAG_RD, + &rate_limit_active, "Active rate limited connections"); +SYSCTL_COUNTER_U64(_net_inet_ip_rl, OID_AUTO, alloc_fail, CTLFLAG_RD, + &rate_limit_alloc_fail, "Rate limited connection failures"); +SYSCTL_COUNTER_U64(_net_inet_ip_rl, OID_AUTO, set_ok, CTLFLAG_RD, + &rate_limit_set_ok, "Rate limited setting succeeded"); +#endif /* RATELIMIT */ + #endif /* INET */ /* @@ -516,6 +532,9 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo) if (inp == NULL) return (ENOBUFS); bzero(&inp->inp_start_zero, inp_zero_size); +#ifdef NUMA + inp->inp_numa_domain = M_NODOM; +#endif inp->inp_pcbinfo = pcbinfo; inp->inp_socket = so; inp->inp_cred = crhold(so->so_cred); @@ -1024,6 +1043,7 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr, struct sockaddr *sa; struct sockaddr_in *sin; struct route sro; + struct epoch_tracker et; int error; KASSERT(laddr != NULL, ("%s: laddr NULL", __func__)); @@ -1059,7 +1079,7 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr, * network and try to find a corresponding interface to take * the source address from. */ - NET_EPOCH_ENTER(); + NET_EPOCH_ENTER(et); if (sro.ro_rt == NULL || sro.ro_rt->rt_ifp == NULL) { struct in_ifaddr *ia; struct ifnet *ifp; @@ -1223,7 +1243,7 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr, } done: - NET_EPOCH_EXIT(); + NET_EPOCH_EXIT(et); if (sro.ro_rt != NULL) RTFREE(sro.ro_rt); return (error); @@ -1576,6 +1596,7 @@ in_pcbfree_deferred(epoch_context_t ctx) inp = __containerof(ctx, struct inpcb, inp_epoch_ctx); INP_WLOCK(inp); + CURVNET_SET(inp->inp_vnet); #ifdef INET struct ip_moptions *imo = inp->inp_moptions; inp->inp_moptions = NULL; @@ -1608,6 +1629,7 @@ in_pcbfree_deferred(epoch_context_t ctx) #ifdef INET inp_freemoptions(imo); #endif + CURVNET_RESTORE(); } /* @@ -1785,8 +1807,9 @@ void in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp) { struct inpcb *inp; + struct in_multi *inm; + struct in_mfilter *imf; struct ip_moptions *imo; - int i, gap; INP_INFO_WLOCK(pcbinfo); CK_LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { @@ -1807,17 +1830,18 @@ in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp) * * XXX This can all be deferred to an epoch_call */ - for (i = 0, gap = 0; i < imo->imo_num_memberships; - i++) { - if (imo->imo_membership[i]->inm_ifp == ifp) { - IN_MULTI_LOCK_ASSERT(); - in_leavegroup_locked(imo->imo_membership[i], NULL); - gap++; - } else if (gap != 0) - imo->imo_membership[i - gap] = - imo->imo_membership[i]; +restart: + IP_MFILTER_FOREACH(imf, &imo->imo_head) { + if ((inm = imf->imf_inm) == NULL) + continue; + if (inm->inm_ifp != ifp) + continue; + ip_mfilter_remove(&imo->imo_head, imf); + IN_MULTI_LOCK_ASSERT(); + in_leavegroup_locked(inm, NULL); + ip_mfilter_free(imf); + goto restart; } - imo->imo_num_memberships -= gap; } INP_WUNLOCK(inp); } @@ -3174,6 +3198,7 @@ in_pcbmodify_txrtlmt(struct inpcb *inp, uint32_t max_pacing_rate) { union if_snd_tag_modify_params params = { .rate_limit.max_rate = max_pacing_rate, + .rate_limit.flags = M_NOWAIT, }; struct m_snd_tag *mst; struct ifnet *ifp; @@ -3260,7 +3285,8 @@ in_pcbquery_txrlevel(struct inpcb *inp, uint32_t *p_txqueue_level) */ int in_pcbattach_txrtlmt(struct inpcb *inp, struct ifnet *ifp, - uint32_t flowtype, uint32_t flowid, uint32_t max_pacing_rate) + uint32_t flowtype, uint32_t flowid, uint32_t max_pacing_rate, struct m_snd_tag **st) + { union if_snd_tag_alloc_params params = { .rate_limit.hdr.type = (max_pacing_rate == -1U) ? @@ -3268,12 +3294,13 @@ in_pcbattach_txrtlmt(struct inpcb *inp, struct ifnet *ifp, .rate_limit.hdr.flowid = flowid, .rate_limit.hdr.flowtype = flowtype, .rate_limit.max_rate = max_pacing_rate, + .rate_limit.flags = M_NOWAIT, }; int error; INP_WLOCK_ASSERT(inp); - if (inp->inp_snd_tag != NULL) + if (*st != NULL) return (EINVAL); if (ifp->if_snd_tag_alloc == NULL) { @@ -3281,16 +3308,37 @@ in_pcbattach_txrtlmt(struct inpcb *inp, struct ifnet *ifp, } else { error = ifp->if_snd_tag_alloc(ifp, ¶ms, &inp->inp_snd_tag); - /* - * At success increment the refcount on - * the send tag's network interface: - */ - if (error == 0) - if_ref(inp->inp_snd_tag->ifp); +#ifdef INET + if (error == 0) { + counter_u64_add(rate_limit_set_ok, 1); + counter_u64_add(rate_limit_active, 1); + } else + counter_u64_add(rate_limit_alloc_fail, 1); +#endif } return (error); } +void +in_pcbdetach_tag(struct ifnet *ifp, struct m_snd_tag *mst) +{ + if (ifp == NULL) + return; + + /* + * If the device was detached while we still had reference(s) + * on the ifp, we assume if_snd_tag_free() was replaced with + * stubs. + */ + ifp->if_snd_tag_free(mst); + + /* release reference count on network interface */ + if_rele(ifp); +#ifdef INET + counter_u64_add(rate_limit_active, -1); +#endif +} + /* * Free an existing TX rate limit tag based on the "inp->inp_snd_tag", * if any: @@ -3299,7 +3347,6 @@ void in_pcbdetach_txrtlmt(struct inpcb *inp) { struct m_snd_tag *mst; - struct ifnet *ifp; INP_WLOCK_ASSERT(inp); @@ -3309,19 +3356,57 @@ in_pcbdetach_txrtlmt(struct inpcb *inp) if (mst == NULL) return; - ifp = mst->ifp; - if (ifp == NULL) - return; + m_snd_tag_rele(mst); +} + +int +in_pcboutput_txrtlmt_locked(struct inpcb *inp, struct ifnet *ifp, struct mbuf *mb, uint32_t max_pacing_rate) +{ + int error; /* - * If the device was detached while we still had reference(s) - * on the ifp, we assume if_snd_tag_free() was replaced with - * stubs. + * If the existing send tag is for the wrong interface due to + * a route change, first drop the existing tag. Set the + * CHANGED flag so that we will keep trying to allocate a new + * tag if we fail to allocate one this time. */ - ifp->if_snd_tag_free(mst); + if (inp->inp_snd_tag != NULL && inp->inp_snd_tag->ifp != ifp) { + in_pcbdetach_txrtlmt(inp); + inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED; + } - /* release reference count on network interface */ - if_rele(ifp); + /* + * NOTE: When attaching to a network interface a reference is + * made to ensure the network interface doesn't go away until + * all ratelimit connections are gone. The network interface + * pointers compared below represent valid network interfaces, + * except when comparing towards NULL. + */ + if (max_pacing_rate == 0 && inp->inp_snd_tag == NULL) { + error = 0; + } else if (!(ifp->if_capenable & IFCAP_TXRTLMT)) { + if (inp->inp_snd_tag != NULL) + in_pcbdetach_txrtlmt(inp); + error = 0; + } else if (inp->inp_snd_tag == NULL) { + /* + * In order to utilize packet pacing with RSS, we need + * to wait until there is a valid RSS hash before we + * can proceed: + */ + if (M_HASHTYPE_GET(mb) == M_HASHTYPE_NONE) { + error = EAGAIN; + } else { + error = in_pcbattach_txrtlmt(inp, ifp, M_HASHTYPE_GET(mb), + mb->m_pkthdr.flowid, max_pacing_rate, &inp->inp_snd_tag); + } + } else { + error = in_pcbmodify_txrtlmt(inp, max_pacing_rate); + } + if (error == 0 || error == EOPNOTSUPP) + inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED; + + return (error); } /* @@ -3366,36 +3451,8 @@ in_pcboutput_txrtlmt(struct inpcb *inp, struct ifnet *ifp, struct mbuf *mb) */ max_pacing_rate = socket->so_max_pacing_rate; - /* - * NOTE: When attaching to a network interface a reference is - * made to ensure the network interface doesn't go away until - * all ratelimit connections are gone. The network interface - * pointers compared below represent valid network interfaces, - * except when comparing towards NULL. - */ - if (max_pacing_rate == 0 && inp->inp_snd_tag == NULL) { - error = 0; - } else if (!(ifp->if_capenable & IFCAP_TXRTLMT)) { - if (inp->inp_snd_tag != NULL) - in_pcbdetach_txrtlmt(inp); - error = 0; - } else if (inp->inp_snd_tag == NULL) { - /* - * In order to utilize packet pacing with RSS, we need - * to wait until there is a valid RSS hash before we - * can proceed: - */ - if (M_HASHTYPE_GET(mb) == M_HASHTYPE_NONE) { - error = EAGAIN; - } else { - error = in_pcbattach_txrtlmt(inp, ifp, M_HASHTYPE_GET(mb), - mb->m_pkthdr.flowid, max_pacing_rate); - } - } else { - error = in_pcbmodify_txrtlmt(inp, max_pacing_rate); - } - if (error == 0 || error == EOPNOTSUPP) - inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED; + error = in_pcboutput_txrtlmt_locked(inp, ifp, mb, max_pacing_rate); + if (did_upgrade) INP_DOWNGRADE(inp); } @@ -3406,16 +3463,11 @@ in_pcboutput_txrtlmt(struct inpcb *inp, struct ifnet *ifp, struct mbuf *mb) void in_pcboutput_eagain(struct inpcb *inp) { - struct socket *socket; bool did_upgrade; if (inp == NULL) return; - socket = inp->inp_socket; - if (socket == NULL) - return; - if (inp->inp_snd_tag == NULL) return; @@ -3442,4 +3494,16 @@ in_pcboutput_eagain(struct inpcb *inp) if (did_upgrade) INP_DOWNGRADE(inp); } + +#ifdef INET +static void +rl_init(void *st) +{ + rate_limit_active = counter_u64_alloc(M_WAITOK); + rate_limit_alloc_fail = counter_u64_alloc(M_WAITOK); + rate_limit_set_ok = counter_u64_alloc(M_WAITOK); +} + +SYSINIT(rl, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, rl_init, NULL); +#endif #endif /* RATELIMIT */ |