diff options
author | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2018-08-22 14:59:50 +0200 |
---|---|---|
committer | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2018-09-21 10:29:41 +0200 |
commit | 3489e3b6396ee9944a6a2e19e675ca54c36993b4 (patch) | |
tree | cd55cfac1c96ff4b888a9606fd6a0d8eb65bb446 /freebsd/sys/netinet6 | |
parent | ck: Define CK_MD_PPC32_LWSYNC if available (diff) | |
download | rtems-libbsd-3489e3b6396ee9944a6a2e19e675ca54c36993b4.tar.bz2 |
Update to FreeBSD head 2018-09-17
Git mirror commit 6c2192b1ef8c50788c751f878552526800b1e319.
Update #3472.
Diffstat (limited to 'freebsd/sys/netinet6')
27 files changed, 1066 insertions, 569 deletions
diff --git a/freebsd/sys/netinet6/frag6.c b/freebsd/sys/netinet6/frag6.c index 70103fe3..0b0c7b91 100644 --- a/freebsd/sys/netinet6/frag6.c +++ b/freebsd/sys/netinet6/frag6.c @@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> +#include <sys/hash.h> #include <sys/malloc.h> #include <sys/mbuf.h> #include <sys/domain.h> @@ -51,6 +52,8 @@ __FBSDID("$FreeBSD$"); #include <sys/kernel.h> #include <sys/syslog.h> +#include <machine/atomic.h> + #include <net/if.h> #include <net/if_var.h> #include <net/netisr.h> @@ -67,58 +70,110 @@ __FBSDID("$FreeBSD$"); #include <security/mac/mac_framework.h> -static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *); -static void frag6_deq(struct ip6asfrag *); -static void frag6_insque(struct ip6q *, struct ip6q *); -static void frag6_remque(struct ip6q *); -static void frag6_freef(struct ip6q *); - -static struct mtx ip6qlock; /* - * These fields all protected by ip6qlock. + * Reassembly headers are stored in hash buckets. */ -static VNET_DEFINE(u_int, frag6_nfragpackets); -static VNET_DEFINE(u_int, frag6_nfrags); -static VNET_DEFINE(struct ip6q, ip6q); /* ip6 reassemble queue */ +#define IP6REASS_NHASH_LOG2 10 +#define IP6REASS_NHASH (1 << IP6REASS_NHASH_LOG2) +#define IP6REASS_HMASK (IP6REASS_NHASH - 1) + +static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *, + uint32_t bucket __unused); +static void frag6_deq(struct ip6asfrag *, uint32_t bucket __unused); +static void frag6_insque_head(struct ip6q *, struct ip6q *, + uint32_t bucket); +static void frag6_remque(struct ip6q *, uint32_t bucket); +static void frag6_freef(struct ip6q *, uint32_t bucket); + +struct ip6qbucket { + struct ip6q ip6q; + struct mtx lock; + int count; +}; + +VNET_DEFINE_STATIC(volatile u_int, frag6_nfragpackets); +volatile u_int frag6_nfrags = 0; +VNET_DEFINE_STATIC(struct ip6qbucket, ip6q[IP6REASS_NHASH]); +VNET_DEFINE_STATIC(uint32_t, ip6q_hashseed); #define V_frag6_nfragpackets VNET(frag6_nfragpackets) -#define V_frag6_nfrags VNET(frag6_nfrags) #define V_ip6q VNET(ip6q) +#define V_ip6q_hashseed VNET(ip6q_hashseed) -#define IP6Q_LOCK_INIT() mtx_init(&ip6qlock, "ip6qlock", NULL, MTX_DEF); -#define IP6Q_LOCK() mtx_lock(&ip6qlock) -#define IP6Q_TRYLOCK() mtx_trylock(&ip6qlock) -#define IP6Q_LOCK_ASSERT() mtx_assert(&ip6qlock, MA_OWNED) -#define IP6Q_UNLOCK() mtx_unlock(&ip6qlock) +#define IP6Q_LOCK(i) mtx_lock(&V_ip6q[(i)].lock) +#define IP6Q_TRYLOCK(i) mtx_trylock(&V_ip6q[(i)].lock) +#define IP6Q_LOCK_ASSERT(i) mtx_assert(&V_ip6q[(i)].lock, MA_OWNED) +#define IP6Q_UNLOCK(i) mtx_unlock(&V_ip6q[(i)].lock) +#define IP6Q_HEAD(i) (&V_ip6q[(i)].ip6q) static MALLOC_DEFINE(M_FTABLE, "fragment", "fragment reassembly header"); /* + * By default, limit the number of IP6 fragments across all reassembly + * queues to 1/32 of the total number of mbuf clusters. + * + * Limit the total number of reassembly queues per VNET to the + * IP6 fragment limit, but ensure the limit will not allow any bucket + * to grow above 100 items. (The bucket limit is + * IP_MAXFRAGPACKETS / (IPREASS_NHASH / 2), so the 50 is the correct + * multiplier to reach a 100-item limit.) + * The 100-item limit was chosen as brief testing seems to show that + * this produces "reasonable" performance on some subset of systems + * under DoS attack. + */ +#define IP6_MAXFRAGS (nmbclusters / 32) +#define IP6_MAXFRAGPACKETS (imin(IP6_MAXFRAGS, IP6REASS_NHASH * 50)) + +/* * Initialise reassembly queue and fragment identifier. */ +void +frag6_set_bucketsize() +{ + int i; + + if ((i = V_ip6_maxfragpackets) > 0) + V_ip6_maxfragbucketsize = imax(i / (IP6REASS_NHASH / 2), 1); +} + static void frag6_change(void *tag) { + VNET_ITERATOR_DECL(vnet_iter); - V_ip6_maxfragpackets = nmbclusters / 4; - V_ip6_maxfrags = nmbclusters / 4; + ip6_maxfrags = IP6_MAXFRAGS; + VNET_LIST_RLOCK_NOSLEEP(); + VNET_FOREACH(vnet_iter) { + CURVNET_SET(vnet_iter); + V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS; + frag6_set_bucketsize(); + CURVNET_RESTORE(); + } + VNET_LIST_RUNLOCK_NOSLEEP(); } void frag6_init(void) { - - V_ip6_maxfragpackets = nmbclusters / 4; - V_ip6_maxfrags = nmbclusters / 4; - V_ip6q.ip6q_next = V_ip6q.ip6q_prev = &V_ip6q; - + struct ip6q *q6; + int i; + + V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS; + frag6_set_bucketsize(); + for (i = 0; i < IP6REASS_NHASH; i++) { + q6 = IP6Q_HEAD(i); + q6->ip6q_next = q6->ip6q_prev = q6; + mtx_init(&V_ip6q[i].lock, "ip6qlock", NULL, MTX_DEF); + V_ip6q[i].count = 0; + } + V_ip6q_hashseed = arc4random(); + V_ip6_maxfragsperpacket = 64; if (!IS_DEFAULT_VNET(curvnet)) return; + ip6_maxfrags = IP6_MAXFRAGS; EVENTHANDLER_REGISTER(nmbclusters_change, frag6_change, NULL, EVENTHANDLER_PRI_ANY); - - IP6Q_LOCK_INIT(); } /* @@ -159,12 +214,15 @@ frag6_input(struct mbuf **mp, int *offp, int proto) struct mbuf *m = *mp, *t; struct ip6_hdr *ip6; struct ip6_frag *ip6f; - struct ip6q *q6; + struct ip6q *head, *q6; struct ip6asfrag *af6, *ip6af, *af6dwn; struct in6_ifaddr *ia; int offset = *offp, nxt, i, next; int first_frag = 0; int fragoff, frgpartlen; /* must be larger than u_int16_t */ + uint32_t hashkey[(sizeof(struct in6_addr) * 2 + + sizeof(ip6f->ip6f_ident)) / sizeof(uint32_t)]; + uint32_t hash, *hashkeyp; struct ifnet *dstifp; u_int8_t ecn, ecn0; #ifdef RSS @@ -233,19 +291,38 @@ frag6_input(struct mbuf **mp, int *offp, int proto) return (ip6f->ip6f_nxt); } - IP6Q_LOCK(); + /* Get fragment length and discard 0-byte fragments. */ + frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset; + if (frgpartlen == 0) { + icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, + offsetof(struct ip6_hdr, ip6_plen)); + in6_ifstat_inc(dstifp, ifs6_reass_fail); + IP6STAT_INC(ip6s_fragdropped); + return IPPROTO_DONE; + } + + hashkeyp = hashkey; + memcpy(hashkeyp, &ip6->ip6_src, sizeof(struct in6_addr)); + hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp); + memcpy(hashkeyp, &ip6->ip6_dst, sizeof(struct in6_addr)); + hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp); + *hashkeyp = ip6f->ip6f_ident; + hash = jenkins_hash32(hashkey, nitems(hashkey), V_ip6q_hashseed); + hash &= IP6REASS_HMASK; + head = IP6Q_HEAD(hash); + IP6Q_LOCK(hash); /* * Enforce upper bound on number of fragments. * If maxfrag is 0, never accept fragments. * If maxfrag is -1, accept all fragments without limitation. */ - if (V_ip6_maxfrags < 0) + if (ip6_maxfrags < 0) ; - else if (V_frag6_nfrags >= (u_int)V_ip6_maxfrags) + else if (atomic_load_int(&frag6_nfrags) >= (u_int)ip6_maxfrags) goto dropfrag; - for (q6 = V_ip6q.ip6q_next; q6 != &V_ip6q; q6 = q6->ip6q_next) + for (q6 = head->ip6q_next; q6 != head; q6 = q6->ip6q_next) if (ip6f->ip6f_ident == q6->ip6q_ident && IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) && IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst) @@ -255,7 +332,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto) ) break; - if (q6 == &V_ip6q) { + if (q6 == head) { /* * the first fragment to arrive, create a reassembly queue. */ @@ -270,9 +347,11 @@ frag6_input(struct mbuf **mp, int *offp, int proto) */ if (V_ip6_maxfragpackets < 0) ; - else if (V_frag6_nfragpackets >= (u_int)V_ip6_maxfragpackets) + else if (V_ip6q[hash].count >= V_ip6_maxfragbucketsize || + atomic_load_int(&V_frag6_nfragpackets) >= + (u_int)V_ip6_maxfragpackets) goto dropfrag; - V_frag6_nfragpackets++; + atomic_add_int(&V_frag6_nfragpackets, 1); q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FTABLE, M_NOWAIT); if (q6 == NULL) @@ -285,7 +364,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto) } mac_ip6q_create(m, q6); #endif - frag6_insque(q6, &V_ip6q); + frag6_insque_head(q6, head, hash); /* ip6q_nxt will be filled afterwards, from 1st fragment */ q6->ip6q_down = q6->ip6q_up = (struct ip6asfrag *)q6; @@ -319,21 +398,20 @@ frag6_input(struct mbuf **mp, int *offp, int proto) * in size. * If it would exceed, discard the fragment and return an ICMP error. */ - frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset; if (q6->ip6q_unfrglen >= 0) { /* The 1st fragment has already arrived. */ if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) { icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset - sizeof(struct ip6_frag) + offsetof(struct ip6_frag, ip6f_offlg)); - IP6Q_UNLOCK(); + IP6Q_UNLOCK(hash); return (IPPROTO_DONE); } } else if (fragoff + frgpartlen > IPV6_MAXPACKET) { icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset - sizeof(struct ip6_frag) + offsetof(struct ip6_frag, ip6f_offlg)); - IP6Q_UNLOCK(); + IP6Q_UNLOCK(hash); return (IPPROTO_DONE); } /* @@ -352,7 +430,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto) int erroff = af6->ip6af_offset; /* dequeue the fragment. */ - frag6_deq(af6); + frag6_deq(af6, hash); free(af6, M_FTABLE); /* adjust pointer. */ @@ -450,7 +528,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto) } af6 = af6->ip6af_down; m_freem(IP6_REASS_MBUF(af6->ip6af_up)); - frag6_deq(af6->ip6af_up); + frag6_deq(af6->ip6af_up, hash); } #else /* @@ -499,29 +577,38 @@ insert: /* * Stick new segment in its place; * check for complete reassembly. + * If not complete, check fragment limit. * Move to front of packet queue, as we are * the most recently active fragmented packet. */ - frag6_enq(ip6af, af6->ip6af_up); - V_frag6_nfrags++; + frag6_enq(ip6af, af6->ip6af_up, hash); + atomic_add_int(&frag6_nfrags, 1); q6->ip6q_nfrag++; #if 0 /* xxx */ - if (q6 != V_ip6q.ip6q_next) { - frag6_remque(q6); - frag6_insque(q6, &V_ip6q); + if (q6 != head->ip6q_next) { + frag6_remque(q6, hash); + frag6_insque_head(q6, head, hash); } #endif next = 0; for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; af6 = af6->ip6af_down) { if (af6->ip6af_off != next) { - IP6Q_UNLOCK(); + if (q6->ip6q_nfrag > V_ip6_maxfragsperpacket) { + IP6STAT_INC(ip6s_fragdropped); + frag6_freef(q6, hash); + } + IP6Q_UNLOCK(hash); return IPPROTO_DONE; } next += af6->ip6af_frglen; } if (af6->ip6af_up->ip6af_mff) { - IP6Q_UNLOCK(); + if (q6->ip6q_nfrag > V_ip6_maxfragsperpacket) { + IP6STAT_INC(ip6s_fragdropped); + frag6_freef(q6, hash); + } + IP6Q_UNLOCK(hash); return IPPROTO_DONE; } @@ -531,7 +618,7 @@ insert: ip6af = q6->ip6q_down; t = m = IP6_REASS_MBUF(ip6af); af6 = ip6af->ip6af_down; - frag6_deq(ip6af); + frag6_deq(ip6af, hash); while (af6 != (struct ip6asfrag *)q6) { m->m_pkthdr.csum_flags &= IP6_REASS_MBUF(af6)->m_pkthdr.csum_flags; @@ -539,7 +626,7 @@ insert: IP6_REASS_MBUF(af6)->m_pkthdr.csum_data; af6dwn = af6->ip6af_down; - frag6_deq(af6); + frag6_deq(af6, hash); while (t->m_next) t = t->m_next; m_adj(IP6_REASS_MBUF(af6), af6->ip6af_offset); @@ -566,13 +653,13 @@ insert: #endif if (ip6_deletefraghdr(m, offset, M_NOWAIT) != 0) { - frag6_remque(q6); - V_frag6_nfrags -= q6->ip6q_nfrag; + frag6_remque(q6, hash); + atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag); #ifdef MAC mac_ip6q_destroy(q6); #endif free(q6, M_FTABLE); - V_frag6_nfragpackets--; + atomic_subtract_int(&V_frag6_nfragpackets, 1); goto dropfrag; } @@ -583,14 +670,14 @@ insert: m_copyback(m, ip6_get_prevhdr(m, offset), sizeof(uint8_t), (caddr_t)&nxt); - frag6_remque(q6); - V_frag6_nfrags -= q6->ip6q_nfrag; + frag6_remque(q6, hash); + atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag); #ifdef MAC mac_ip6q_reassemble(q6, m); mac_ip6q_destroy(q6); #endif free(q6, M_FTABLE); - V_frag6_nfragpackets--; + atomic_subtract_int(&V_frag6_nfragpackets, 1); if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */ int plen = 0; @@ -612,7 +699,7 @@ insert: m_tag_prepend(m, mtag); #endif - IP6Q_UNLOCK(); + IP6Q_UNLOCK(hash); IP6STAT_INC(ip6s_reassembled); in6_ifstat_inc(dstifp, ifs6_reass_ok); @@ -634,7 +721,7 @@ insert: return nxt; dropfrag: - IP6Q_UNLOCK(); + IP6Q_UNLOCK(hash); in6_ifstat_inc(dstifp, ifs6_reass_fail); IP6STAT_INC(ip6s_fragdropped); m_freem(m); @@ -645,19 +732,19 @@ insert: * Free a fragment reassembly header and all * associated datagrams. */ -void -frag6_freef(struct ip6q *q6) +static void +frag6_freef(struct ip6q *q6, uint32_t bucket) { struct ip6asfrag *af6, *down6; - IP6Q_LOCK_ASSERT(); + IP6Q_LOCK_ASSERT(bucket); for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; af6 = down6) { struct mbuf *m = IP6_REASS_MBUF(af6); down6 = af6->ip6af_down; - frag6_deq(af6); + frag6_deq(af6, bucket); /* * Return ICMP time exceeded error for the 1st fragment. @@ -679,24 +766,25 @@ frag6_freef(struct ip6q *q6) m_freem(m); free(af6, M_FTABLE); } - frag6_remque(q6); - V_frag6_nfrags -= q6->ip6q_nfrag; + frag6_remque(q6, bucket); + atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag); #ifdef MAC mac_ip6q_destroy(q6); #endif free(q6, M_FTABLE); - V_frag6_nfragpackets--; + atomic_subtract_int(&V_frag6_nfragpackets, 1); } /* * Put an ip fragment on a reassembly chain. * Like insque, but pointers in middle of structure. */ -void -frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6) +static void +frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6, + uint32_t bucket __unused) { - IP6Q_LOCK_ASSERT(); + IP6Q_LOCK_ASSERT(bucket); af6->ip6af_up = up6; af6->ip6af_down = up6->ip6af_down; @@ -707,36 +795,41 @@ frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6) /* * To frag6_enq as remque is to insque. */ -void -frag6_deq(struct ip6asfrag *af6) +static void +frag6_deq(struct ip6asfrag *af6, uint32_t bucket __unused) { - IP6Q_LOCK_ASSERT(); + IP6Q_LOCK_ASSERT(bucket); af6->ip6af_up->ip6af_down = af6->ip6af_down; af6->ip6af_down->ip6af_up = af6->ip6af_up; } -void -frag6_insque(struct ip6q *new, struct ip6q *old) +static void +frag6_insque_head(struct ip6q *new, struct ip6q *old, uint32_t bucket) { - IP6Q_LOCK_ASSERT(); + IP6Q_LOCK_ASSERT(bucket); + KASSERT(IP6Q_HEAD(bucket) == old, + ("%s: attempt to insert at head of wrong bucket" + " (bucket=%u, old=%p)", __func__, bucket, old)); new->ip6q_prev = old; new->ip6q_next = old->ip6q_next; old->ip6q_next->ip6q_prev= new; old->ip6q_next = new; + V_ip6q[bucket].count++; } -void -frag6_remque(struct ip6q *p6) +static void +frag6_remque(struct ip6q *p6, uint32_t bucket) { - IP6Q_LOCK_ASSERT(); + IP6Q_LOCK_ASSERT(bucket); p6->ip6q_prev->ip6q_next = p6->ip6q_next; p6->ip6q_next->ip6q_prev = p6->ip6q_prev; + V_ip6q[bucket].count--; } /* @@ -748,37 +841,72 @@ void frag6_slowtimo(void) { VNET_ITERATOR_DECL(vnet_iter); - struct ip6q *q6; + struct ip6q *head, *q6; + int i; VNET_LIST_RLOCK_NOSLEEP(); - IP6Q_LOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); - q6 = V_ip6q.ip6q_next; - if (q6) - while (q6 != &V_ip6q) { + for (i = 0; i < IP6REASS_NHASH; i++) { + IP6Q_LOCK(i); + head = IP6Q_HEAD(i); + q6 = head->ip6q_next; + if (q6 == NULL) { + /* + * XXXJTL: This should never happen. This + * should turn into an assertion. + */ + IP6Q_UNLOCK(i); + continue; + } + while (q6 != head) { --q6->ip6q_ttl; q6 = q6->ip6q_next; if (q6->ip6q_prev->ip6q_ttl == 0) { IP6STAT_INC(ip6s_fragtimeout); /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ - frag6_freef(q6->ip6q_prev); + frag6_freef(q6->ip6q_prev, i); } } + /* + * If we are over the maximum number of fragments + * (due to the limit being lowered), drain off + * enough to get down to the new limit. + * Note that we drain all reassembly queues if + * maxfragpackets is 0 (fragmentation is disabled), + * and don't enforce a limit when maxfragpackets + * is negative. + */ + while ((V_ip6_maxfragpackets == 0 || + (V_ip6_maxfragpackets > 0 && + V_ip6q[i].count > V_ip6_maxfragbucketsize)) && + head->ip6q_prev != head) { + IP6STAT_INC(ip6s_fragoverflow); + /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ + frag6_freef(head->ip6q_prev, i); + } + IP6Q_UNLOCK(i); + } /* - * If we are over the maximum number of fragments - * (due to the limit being lowered), drain off - * enough to get down to the new limit. + * If we are still over the maximum number of fragmented + * packets, drain off enough to get down to the new limit. */ - while (V_frag6_nfragpackets > (u_int)V_ip6_maxfragpackets && - V_ip6q.ip6q_prev) { - IP6STAT_INC(ip6s_fragoverflow); - /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ - frag6_freef(V_ip6q.ip6q_prev); + i = 0; + while (V_ip6_maxfragpackets >= 0 && + atomic_load_int(&V_frag6_nfragpackets) > + (u_int)V_ip6_maxfragpackets) { + IP6Q_LOCK(i); + head = IP6Q_HEAD(i); + if (head->ip6q_prev != head) { + IP6STAT_INC(ip6s_fragoverflow); + /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ + frag6_freef(head->ip6q_prev, i); + } + IP6Q_UNLOCK(i); + i = (i + 1) % IP6REASS_NHASH; } CURVNET_RESTORE(); } - IP6Q_UNLOCK(); VNET_LIST_RUNLOCK_NOSLEEP(); } @@ -789,22 +917,25 @@ void frag6_drain(void) { VNET_ITERATOR_DECL(vnet_iter); + struct ip6q *head; + int i; VNET_LIST_RLOCK_NOSLEEP(); - if (IP6Q_TRYLOCK() == 0) { - VNET_LIST_RUNLOCK_NOSLEEP(); - return; - } VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); - while (V_ip6q.ip6q_next != &V_ip6q) { - IP6STAT_INC(ip6s_fragdropped); - /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ - frag6_freef(V_ip6q.ip6q_next); + for (i = 0; i < IP6REASS_NHASH; i++) { + if (IP6Q_TRYLOCK(i) == 0) + continue; + head = IP6Q_HEAD(i); + while (head->ip6q_next != head) { + IP6STAT_INC(ip6s_fragdropped); + /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ + frag6_freef(head->ip6q_next, i); + } + IP6Q_UNLOCK(i); } CURVNET_RESTORE(); } - IP6Q_UNLOCK(); VNET_LIST_RUNLOCK_NOSLEEP(); } diff --git a/freebsd/sys/netinet6/icmp6.c b/freebsd/sys/netinet6/icmp6.c index 4d06ca16..2b080169 100644 --- a/freebsd/sys/netinet6/icmp6.c +++ b/freebsd/sys/netinet6/icmp6.c @@ -126,8 +126,8 @@ VNET_PCPUSTAT_SYSUNINIT(icmp6stat); VNET_DECLARE(struct inpcbinfo, ripcbinfo); VNET_DECLARE(struct inpcbhead, ripcb); VNET_DECLARE(int, icmp6errppslim); -static VNET_DEFINE(int, icmp6errpps_count) = 0; -static VNET_DEFINE(struct timeval, icmp6errppslim_last); +VNET_DEFINE_STATIC(int, icmp6errpps_count) = 0; +VNET_DEFINE_STATIC(struct timeval, icmp6errppslim_last); VNET_DECLARE(int, icmp6_nodeinfo); #define V_ripcbinfo VNET(ripcbinfo) @@ -1910,6 +1910,7 @@ icmp6_rip6_input(struct mbuf **mp, int off) struct inpcb *last = NULL; struct sockaddr_in6 fromsa; struct icmp6_hdr *icmp6; + struct epoch_tracker et; struct mbuf *opts = NULL; #ifndef PULLDOWN_TEST @@ -1936,8 +1937,8 @@ icmp6_rip6_input(struct mbuf **mp, int off) return (IPPROTO_DONE); } - INP_INFO_RLOCK(&V_ripcbinfo); - LIST_FOREACH(in6p, &V_ripcb, inp_list) { + INP_INFO_RLOCK_ET(&V_ripcbinfo, et); + CK_LIST_FOREACH(in6p, &V_ripcb, inp_list) { if ((in6p->inp_vflag & INP_IPV6) == 0) continue; if (in6p->inp_ip_p != IPPROTO_ICMPV6) @@ -2014,7 +2015,7 @@ icmp6_rip6_input(struct mbuf **mp, int off) } last = in6p; } - INP_INFO_RUNLOCK(&V_ripcbinfo); + INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et); if (last != NULL) { if (last->inp_flags & INP_CONTROLOPTS) ip6_savecontrol(last, m, &opts); diff --git a/freebsd/sys/netinet6/in6.c b/freebsd/sys/netinet6/in6.c index 3ed80c9c..c415cf78 100644 --- a/freebsd/sys/netinet6/in6.c +++ b/freebsd/sys/netinet6/in6.c @@ -2139,9 +2139,6 @@ in6_lltable_free_entry(struct lltable *llt, struct llentry *lle) lltable_unlink_entry(llt, lle); } - if (callout_stop(&lle->lle_timer) > 0) - LLE_REMREF(lle); - llentry_free(lle); } diff --git a/freebsd/sys/netinet6/in6_fib.c b/freebsd/sys/netinet6/in6_fib.c index cf79797d..e5e8a161 100644 --- a/freebsd/sys/netinet6/in6_fib.c +++ b/freebsd/sys/netinet6/in6_fib.c @@ -40,7 +40,7 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> #include <sys/lock.h> -#include <sys/rwlock.h> +#include <sys/rmlock.h> #include <sys/malloc.h> #include <sys/mbuf.h> #include <sys/socket.h> @@ -173,6 +173,7 @@ int fib6_lookup_nh_basic(uint32_t fibnum, const struct in6_addr *dst, uint32_t scopeid, uint32_t flags, uint32_t flowid, struct nhop6_basic *pnh6) { + RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct sockaddr_in6 sin6; @@ -222,6 +223,7 @@ int fib6_lookup_nh_ext(uint32_t fibnum, const struct in6_addr *dst,uint32_t scopeid, uint32_t flags, uint32_t flowid, struct nhop6_extended *pnh6) { + RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct sockaddr_in6 sin6; diff --git a/freebsd/sys/netinet6/in6_gif.c b/freebsd/sys/netinet6/in6_gif.c index 160a0929..66b4c63a 100644 --- a/freebsd/sys/netinet6/in6_gif.c +++ b/freebsd/sys/netinet6/in6_gif.c @@ -4,6 +4,7 @@ * SPDX-License-Identifier: BSD-3-Clause * * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * Copyright (c) 2018 Andrey V. Elsukov <ae@FreeBSD.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -40,20 +41,19 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/local/opt_inet6.h> #include <sys/param.h> -#include <sys/lock.h> -#include <sys/rmlock.h> #include <sys/systm.h> +#include <sys/jail.h> #include <sys/socket.h> #include <sys/sockio.h> #include <sys/mbuf.h> #include <sys/errno.h> #include <sys/kernel.h> -#include <sys/queue.h> #include <sys/syslog.h> #include <sys/sysctl.h> -#include <sys/protosw.h> #include <sys/malloc.h> +#include <sys/proc.h> +#include <net/ethernet.h> #include <net/if.h> #include <net/if_var.h> #include <net/route.h> @@ -63,52 +63,189 @@ __FBSDID("$FreeBSD$"); #include <netinet/in_systm.h> #ifdef INET #include <netinet/ip.h> +#include <netinet/ip_ecn.h> #endif #include <netinet/ip_encap.h> -#ifdef INET6 #include <netinet/ip6.h> #include <netinet6/ip6_var.h> #include <netinet6/in6_var.h> -#endif -#include <netinet/ip_ecn.h> -#ifdef INET6 +#include <netinet6/scope6_var.h> #include <netinet6/ip6_ecn.h> #include <netinet6/in6_fib.h> -#endif #include <net/if_gif.h> #define GIF_HLIM 30 -static VNET_DEFINE(int, ip6_gif_hlim) = GIF_HLIM; +VNET_DEFINE_STATIC(int, ip6_gif_hlim) = GIF_HLIM; #define V_ip6_gif_hlim VNET(ip6_gif_hlim) SYSCTL_DECL(_net_inet6_ip6); -SYSCTL_INT(_net_inet6_ip6, IPV6CTL_GIF_HLIM, gifhlim, CTLFLAG_VNET | CTLFLAG_RW, - &VNET_NAME(ip6_gif_hlim), 0, ""); - -static int in6_gif_input(struct mbuf **, int *, int); - -extern struct domain inet6domain; -static struct protosw in6_gif_protosw = { - .pr_type = SOCK_RAW, - .pr_domain = &inet6domain, - .pr_protocol = 0, /* IPPROTO_IPV[46] */ - .pr_flags = PR_ATOMIC|PR_ADDR, - .pr_input = in6_gif_input, - .pr_output = rip6_output, - .pr_ctloutput = rip6_ctloutput, - .pr_usrreqs = &rip6_usrreqs -}; +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_GIF_HLIM, gifhlim, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_gif_hlim), 0, + "Default hop limit for encapsulated packets"); + +/* + * We keep interfaces in a hash table using src+dst as key. + * Interfaces with GIF_IGNORE_SOURCE flag are linked into plain list. + */ +VNET_DEFINE_STATIC(struct gif_list *, ipv6_hashtbl) = NULL; +VNET_DEFINE_STATIC(struct gif_list, ipv6_list) = CK_LIST_HEAD_INITIALIZER(); +#define V_ipv6_hashtbl VNET(ipv6_hashtbl) +#define V_ipv6_list VNET(ipv6_list) + +#define GIF_HASH(src, dst) (V_ipv6_hashtbl[\ + in6_gif_hashval((src), (dst)) & (GIF_HASH_SIZE - 1)]) +#define GIF_HASH_SC(sc) GIF_HASH(&(sc)->gif_ip6hdr->ip6_src,\ + &(sc)->gif_ip6hdr->ip6_dst) +static uint32_t +in6_gif_hashval(const struct in6_addr *src, const struct in6_addr *dst) +{ + uint32_t ret; + + ret = fnv_32_buf(src, sizeof(*src), FNV1_32_INIT); + return (fnv_32_buf(dst, sizeof(*dst), ret)); +} + +static int +in6_gif_checkdup(const struct gif_softc *sc, const struct in6_addr *src, + const struct in6_addr *dst) +{ + struct gif_softc *tmp; + + if (sc->gif_family == AF_INET6 && + IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src, src) && + IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_dst, dst)) + return (EEXIST); + + CK_LIST_FOREACH(tmp, &GIF_HASH(src, dst), chain) { + if (tmp == sc) + continue; + if (IN6_ARE_ADDR_EQUAL(&tmp->gif_ip6hdr->ip6_src, src) && + IN6_ARE_ADDR_EQUAL(&tmp->gif_ip6hdr->ip6_dst, dst)) + return (EADDRNOTAVAIL); + } + return (0); +} + +static void +in6_gif_attach(struct gif_softc *sc) +{ + + if (sc->gif_options & GIF_IGNORE_SOURCE) + CK_LIST_INSERT_HEAD(&V_ipv6_list, sc, chain); + else + CK_LIST_INSERT_HEAD(&GIF_HASH_SC(sc), sc, chain); +} + +int +in6_gif_setopts(struct gif_softc *sc, u_int options) +{ + + /* NOTE: we are protected with gif_ioctl_sx lock */ + MPASS(sc->gif_family == AF_INET6); + MPASS(sc->gif_options != options); + + if ((options & GIF_IGNORE_SOURCE) != + (sc->gif_options & GIF_IGNORE_SOURCE)) { + CK_LIST_REMOVE(sc, chain); + sc->gif_options = options; + in6_gif_attach(sc); + } + return (0); +} + +int +in6_gif_ioctl(struct gif_softc *sc, u_long cmd, caddr_t data) +{ + struct in6_ifreq *ifr = (struct in6_ifreq *)data; + struct sockaddr_in6 *dst, *src; + struct ip6_hdr *ip6; + int error; + + /* NOTE: we are protected with gif_ioctl_sx lock */ + error = EINVAL; + switch (cmd) { + case SIOCSIFPHYADDR_IN6: + src = &((struct in6_aliasreq *)data)->ifra_addr; + dst = &((struct in6_aliasreq *)data)->ifra_dstaddr; + + /* sanity checks */ + if (src->sin6_family != dst->sin6_family || + src->sin6_family != AF_INET6 || + src->sin6_len != dst->sin6_len || + src->sin6_len != sizeof(*src)) + break; + if (IN6_IS_ADDR_UNSPECIFIED(&src->sin6_addr) || + IN6_IS_ADDR_UNSPECIFIED(&dst->sin6_addr)) { + error = EADDRNOTAVAIL; + break; + } + /* + * Check validity of the scope zone ID of the + * addresses, and convert it into the kernel + * internal form if necessary. + */ + if ((error = sa6_embedscope(src, 0)) != 0 || + (error = sa6_embedscope(dst, 0)) != 0) + break; + + if (V_ipv6_hashtbl == NULL) + V_ipv6_hashtbl = gif_hashinit(); + error = in6_gif_checkdup(sc, &src->sin6_addr, + &dst->sin6_addr); + if (error == EADDRNOTAVAIL) + break; + if (error == EEXIST) { + /* Addresses are the same. Just return. */ + error = 0; + break; + } + ip6 = malloc(sizeof(*ip6), M_GIF, M_WAITOK | M_ZERO); + ip6->ip6_src = src->sin6_addr; + ip6->ip6_dst = dst->sin6_addr; + ip6->ip6_vfc = IPV6_VERSION; + if (sc->gif_family != 0) { + /* Detach existing tunnel first */ + CK_LIST_REMOVE(sc, chain); + GIF_WAIT(); + free(sc->gif_hdr, M_GIF); + /* XXX: should we notify about link state change? */ + } + sc->gif_family = AF_INET6; + sc->gif_ip6hdr = ip6; + in6_gif_attach(sc); + break; + case SIOCGIFPSRCADDR_IN6: + case SIOCGIFPDSTADDR_IN6: + if (sc->gif_family != AF_INET6) { + error = EADDRNOTAVAIL; + break; + } + src = (struct sockaddr_in6 *)&ifr->ifr_addr; + memset(src, 0, sizeof(*src)); + src->sin6_family = AF_INET6; + src->sin6_len = sizeof(*src); + src->sin6_addr = (cmd == SIOCGIFPSRCADDR_IN6) ? + sc->gif_ip6hdr->ip6_src: sc->gif_ip6hdr->ip6_dst; + error = prison_if(curthread->td_ucred, (struct sockaddr *)src); + if (error == 0) + error = sa6_recoverscope(src); + if (error != 0) + memset(src, 0, sizeof(*src)); + break; + } + return (error); +} int in6_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn) { - GIF_RLOCK_TRACKER; struct gif_softc *sc = ifp->if_softc; struct ip6_hdr *ip6; int len; /* prepend new IP header */ + MPASS(in_epoch(net_epoch_preempt)); len = sizeof(struct ip6_hdr); #ifndef __NO_STRICT_ALIGNMENT if (proto == IPPROTO_ETHERIP) @@ -128,14 +265,8 @@ in6_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn) #endif ip6 = mtod(m, struct ip6_hdr *); - GIF_RLOCK(sc); - if (sc->gif_family != AF_INET6) { - m_freem(m); - GIF_RUNLOCK(sc); - return (ENETDOWN); - } + MPASS(sc->gif_family == AF_INET6); bcopy(sc->gif_ip6hdr, ip6, sizeof(struct ip6_hdr)); - GIF_RUNLOCK(sc); ip6->ip6_flow |= htonl((uint32_t)ecn << 20); ip6->ip6_nxt = proto; @@ -149,15 +280,14 @@ in6_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn) } static int -in6_gif_input(struct mbuf **mp, int *offp, int proto) +in6_gif_input(struct mbuf *m, int off, int proto, void *arg) { - struct mbuf *m = *mp; + struct gif_softc *sc = arg; struct ifnet *gifp; - struct gif_softc *sc; struct ip6_hdr *ip6; uint8_t ecn; - sc = encap_getarg(m); + MPASS(in_epoch(net_epoch_preempt)); if (sc == NULL) { m_freem(m); IP6STAT_INC(ip6s_nogif); @@ -167,7 +297,7 @@ in6_gif_input(struct mbuf **mp, int *offp, int proto) if ((gifp->if_flags & IFF_UP) != 0) { ip6 = mtod(m, struct ip6_hdr *); ecn = (ntohl(ip6->ip6_flow) >> 20) & 0xff; - m_adj(m, *offp); + m_adj(m, off); gif_input(m, gifp, proto, ecn); } else { m_freem(m); @@ -176,59 +306,126 @@ in6_gif_input(struct mbuf **mp, int *offp, int proto) return (IPPROTO_DONE); } -/* - * we know that we are in IFF_UP, outer address available, and outer family - * matched the physical addr family. see gif_encapcheck(). - */ -int -in6_gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg) +static int +in6_gif_lookup(const struct mbuf *m, int off, int proto, void **arg) { const struct ip6_hdr *ip6; struct gif_softc *sc; int ret; - /* sanity check done in caller */ - sc = (struct gif_softc *)arg; - GIF_RLOCK_ASSERT(sc); + if (V_ipv6_hashtbl == NULL) + return (0); + MPASS(in_epoch(net_epoch_preempt)); /* - * Check for address match. Note that the check is for an incoming - * packet. We should compare the *source* address in our configuration - * and the *destination* address of the packet, and vice versa. + * NOTE: it is safe to iterate without any locking here, because softc + * can be reclaimed only when we are not within net_epoch_preempt + * section, but ip_encap lookup+input are executed in epoch section. */ ip6 = mtod(m, const struct ip6_hdr *); - if (!IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src, &ip6->ip6_dst)) + ret = 0; + CK_LIST_FOREACH(sc, &GIF_HASH(&ip6->ip6_dst, &ip6->ip6_src), chain) { + /* + * This is an inbound packet, its ip6_dst is source address + * in softc. + */ + if (IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src, + &ip6->ip6_dst) && + IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_dst, + &ip6->ip6_src)) { + ret = ENCAP_DRV_LOOKUP; + goto done; + } + } + /* + * No exact match. + * Check the list of interfaces with GIF_IGNORE_SOURCE flag. + */ + CK_LIST_FOREACH(sc, &V_ipv6_list, chain) { + if (IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src, + &ip6->ip6_dst)) { + ret = 128 + 8; /* src + proto */ + goto done; + } + } + return (0); +done: + if ((GIF2IFP(sc)->if_flags & IFF_UP) == 0) return (0); - ret = 128; - if (!IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_dst, &ip6->ip6_src)) { - if ((sc->gif_options & GIF_IGNORE_SOURCE) == 0) - return (0); - } else - ret += 128; - /* ingress filters on outer source */ if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0) { struct nhop6_basic nh6; - /* XXX empty scope id */ - if (fib6_lookup_nh_basic(sc->gif_fibnum, &ip6->ip6_src, 0, 0, 0, - &nh6) != 0) + if (fib6_lookup_nh_basic(sc->gif_fibnum, &ip6->ip6_src, + ntohs(in6_getscope(&ip6->ip6_src)), 0, 0, &nh6) != 0) return (0); if (nh6.nh_ifp != m->m_pkthdr.rcvif) return (0); } + *arg = sc; return (ret); } -int -in6_gif_attach(struct gif_softc *sc) +static struct { + const struct encap_config encap; + const struct encaptab *cookie; +} ipv6_encap_cfg[] = { +#ifdef INET + { + .encap = { + .proto = IPPROTO_IPV4, + .min_length = sizeof(struct ip6_hdr) + + sizeof(struct ip), + .exact_match = ENCAP_DRV_LOOKUP, + .lookup = in6_gif_lookup, + .input = in6_gif_input + }, + }, +#endif + { + .encap = { + .proto = IPPROTO_IPV6, + .min_length = 2 * sizeof(struct ip6_hdr), + .exact_match = ENCAP_DRV_LOOKUP, + .lookup = in6_gif_lookup, + .input = in6_gif_input + }, + }, + { + .encap = { + .proto = IPPROTO_ETHERIP, + .min_length = sizeof(struct ip6_hdr) + + sizeof(struct etherip_header) + + sizeof(struct ether_header), + .exact_match = ENCAP_DRV_LOOKUP, + .lookup = in6_gif_lookup, + .input = in6_gif_input + }, + } +}; + +void +in6_gif_init(void) { + int i; - KASSERT(sc->gif_ecookie == NULL, ("gif_ecookie isn't NULL")); - sc->gif_ecookie = encap_attach_func(AF_INET6, -1, gif_encapcheck, - (void *)&in6_gif_protosw, sc); - if (sc->gif_ecookie == NULL) - return (EEXIST); - return (0); + if (!IS_DEFAULT_VNET(curvnet)) + return; + for (i = 0; i < nitems(ipv6_encap_cfg); i++) + ipv6_encap_cfg[i].cookie = ip6_encap_attach( + &ipv6_encap_cfg[i].encap, NULL, M_WAITOK); +} + +void +in6_gif_uninit(void) +{ + int i; + + if (IS_DEFAULT_VNET(curvnet)) { + for (i = 0; i < nitems(ipv6_encap_cfg); i++) + ip6_encap_detach(ipv6_encap_cfg[i].cookie); + } + if (V_ipv6_hashtbl != NULL) + gif_hashdestroy(V_ipv6_hashtbl); } diff --git a/freebsd/sys/netinet6/in6_ifattach.c b/freebsd/sys/netinet6/in6_ifattach.c index 81182b4e..1cab31d1 100644 --- a/freebsd/sys/netinet6/in6_ifattach.c +++ b/freebsd/sys/netinet6/in6_ifattach.c @@ -759,7 +759,6 @@ _in6_ifdetach(struct ifnet *ifp, int purgeulp) /* * nuke any of IPv6 addresses we have - * XXX: all addresses should be already removed */ CK_STAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) { if (ifa->ifa_addr->sa_family != AF_INET6) @@ -874,6 +873,7 @@ in6_purgemaddrs(struct ifnet *ifp) ifma->ifma_protospec == NULL) continue; inm = (struct in6_multi *)ifma->ifma_protospec; + in6m_disconnect(inm); in6m_rele_locked(&purgeinms, inm); if (__predict_false(ifma6_restart)) { ifma6_restart = false; diff --git a/freebsd/sys/netinet6/in6_mcast.c b/freebsd/sys/netinet6/in6_mcast.c index 32660c89..3824645d 100644 --- a/freebsd/sys/netinet6/in6_mcast.c +++ b/freebsd/sys/netinet6/in6_mcast.c @@ -540,6 +540,8 @@ in6m_release(struct in6_multi *inm) CTR2(KTR_MLD, "%s: purging ifma %p", __func__, ifma); KASSERT(ifma->ifma_protospec == NULL, ("%s: ifma_protospec != NULL", __func__)); + if (ifp == NULL) + ifp = ifma->ifma_ifp; if (ifp != NULL) { CURVNET_SET(ifp->if_vnet); @@ -564,8 +566,13 @@ static void in6m_init(void) taskqgroup_config_gtask_init(NULL, &free_gtask, in6m_release_task, "in6m release task"); } +#ifdef EARLY_AP_STARTUP SYSINIT(in6m_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, in6m_init, NULL); +#else +SYSINIT(in6m_init, SI_SUB_ROOT_CONF - 1, SI_ORDER_SECOND, + in6m_init, NULL); +#endif void @@ -589,11 +596,20 @@ in6m_disconnect(struct in6_multi *inm) struct ifmultiaddr *ifma, *ll_ifma; ifp = inm->in6m_ifp; + + if (ifp == NULL) + return; + inm->in6m_ifp = NULL; IF_ADDR_WLOCK_ASSERT(ifp); ifma = inm->in6m_ifma; + if (ifma == NULL) + return; if_ref(ifp); - CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link); + if (ifma->ifma_flags & IFMA_F_ENQUEUED) { + CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link); + ifma->ifma_flags &= ~IFMA_F_ENQUEUED; + } MCDPRINTF("removed ifma: %p from %s\n", ifma, ifp->if_xname); if ((ll_ifma = ifma->ifma_llifma) != NULL) { MPASS(ifma != ll_ifma); @@ -602,7 +618,10 @@ in6m_disconnect(struct in6_multi *inm) MPASS(ll_ifma->ifma_ifp == ifp); if (--ll_ifma->ifma_refcount == 0) { ifma6_restart = true; - CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link); + if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) { + CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link); + ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED; + } MCDPRINTF("removed ll_ifma: %p from %s\n", ll_ifma, ifp->if_xname); if_freemulti(ll_ifma); } @@ -629,7 +648,7 @@ in6m_release_deferred(struct in6_multi *inm) IN6_MULTI_LIST_LOCK_ASSERT(); KASSERT(inm->in6m_refcount > 0, ("refcount == %d inm: %p", inm->in6m_refcount, inm)); if (--inm->in6m_refcount == 0) { - in6m_disconnect(inm); + MPASS(inm->in6m_ifp == NULL); SLIST_INIT(&tmp); inm->in6m_ifma->ifma_protospec = NULL; MPASS(inm->in6m_ifma->ifma_llifma == NULL); @@ -1307,6 +1326,7 @@ out_in6m_release: break; } } + in6m_disconnect(inm); in6m_release_deferred(inm); IF_ADDR_RUNLOCK(ifp); } else { @@ -1386,13 +1406,17 @@ in6_leavegroup_locked(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf) KASSERT(error == 0, ("%s: failed to merge inm state", __func__)); CTR1(KTR_MLD, "%s: doing mld downcall", __func__); - error = mld_change_state(inm, 0); + error = 0; + if (ifp) + error = mld_change_state(inm, 0); if (error) CTR1(KTR_MLD, "%s: failed mld downcall", __func__); CTR2(KTR_MLD, "%s: dropping ref on %p", __func__, inm); if (ifp) IF_ADDR_WLOCK(ifp); + if (inm->in6m_refcount == 1 && inm->in6m_ifp != NULL) + in6m_disconnect(inm); in6m_release_deferred(inm); if (ifp) IF_ADDR_WUNLOCK(ifp); @@ -1626,16 +1650,13 @@ in6p_findmoptions(struct inpcb *inp) */ static void -inp_gcmoptions(epoch_context_t ctx) +inp_gcmoptions(struct ip6_moptions *imo) { - struct ip6_moptions *imo; struct in6_mfilter *imf; struct in6_multi *inm; struct ifnet *ifp; size_t idx, nmships; - imo = __containerof(ctx, struct ip6_moptions, imo6_epoch_ctx); - nmships = imo->im6o_num_memberships; for (idx = 0; idx < nmships; ++idx) { imf = imo->im6o_mfilters ? &imo->im6o_mfilters[idx] : NULL; @@ -1665,7 +1686,7 @@ ip6_freemoptions(struct ip6_moptions *imo) { if (imo == NULL) return; - epoch_call(net_epoch_preempt, &imo->imo6_epoch_ctx, inp_gcmoptions); + inp_gcmoptions(imo); } /* @@ -2159,6 +2180,7 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt) IN6_MULTI_UNLOCK(); goto out_im6o_free; } + in6m_acquire(inm); imo->im6o_membership[idx] = inm; } else { CTR1(KTR_MLD, "%s: merge inm state", __func__); @@ -2193,6 +2215,12 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt) out_im6o_free: if (error && is_new) { + inm = imo->im6o_membership[idx]; + if (inm != NULL) { + IN6_MULTI_LIST_LOCK(); + in6m_release_deferred(inm); + IN6_MULTI_LIST_UNLOCK(); + } imo->im6o_membership[idx] = NULL; --imo->im6o_num_memberships; } diff --git a/freebsd/sys/netinet6/in6_pcb.c b/freebsd/sys/netinet6/in6_pcb.c index 488cca86..a30cb98b 100644 --- a/freebsd/sys/netinet6/in6_pcb.c +++ b/freebsd/sys/netinet6/in6_pcb.c @@ -131,6 +131,12 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, int error, lookupflags = 0; int reuseport = (so->so_options & SO_REUSEPORT); + /* + * XXX: Maybe we could let SO_REUSEPORT_LB set SO_REUSEPORT bit here + * so that we don't have to add to the (already messy) code below. + */ + int reuseport_lb = (so->so_options & SO_REUSEPORT_LB); + INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(pcbinfo); @@ -138,7 +144,7 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, return (EADDRNOTAVAIL); if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) return (EINVAL); - if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) + if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT|SO_REUSEPORT_LB)) == 0) lookupflags = INPLOOKUP_WILDCARD; if (nam == NULL) { if ((error = prison_local_ip6(cred, &inp->in6p_laddr, @@ -172,6 +178,13 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, */ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0) reuseport = SO_REUSEADDR|SO_REUSEPORT; + /* + * XXX: How to deal with SO_REUSEPORT_LB here? + * Treat same as SO_REUSEPORT for now. + */ + if ((so->so_options & + (SO_REUSEADDR|SO_REUSEPORT_LB)) != 0) + reuseport_lb = SO_REUSEADDR|SO_REUSEPORT_LB; } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { struct ifaddr *ifa; @@ -221,7 +234,8 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) && (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) || - (t->inp_flags2 & INP_REUSEPORT) == 0) && + (t->inp_flags2 & INP_REUSEPORT) || + (t->inp_flags2 & INP_REUSEPORT_LB) == 0) && #ifndef __rtems__ (inp->inp_cred->cr_uid != t->inp_cred->cr_uid)) @@ -279,9 +293,11 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, */ tw = intotw(t); if (tw == NULL || - (reuseport & tw->tw_so_options) == 0) + ((reuseport & tw->tw_so_options) == 0 && + (reuseport_lb & tw->tw_so_options) == 0)) return (EADDRINUSE); - } else if (t && (reuseport & inp_so_options(t)) == 0) { + } else if (t && (reuseport & inp_so_options(t)) == 0 && + (reuseport_lb & inp_so_options(t)) == 0) { return (EADDRINUSE); } #ifdef INET @@ -291,22 +307,25 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, in6_sin6_2_sin(&sin, sin6); t = in_pcblookup_local(pcbinfo, sin.sin_addr, - lport, lookupflags, cred); + lport, lookupflags, cred); if (t && t->inp_flags & INP_TIMEWAIT) { tw = intotw(t); if (tw == NULL) return (EADDRINUSE); if ((reuseport & tw->tw_so_options) == 0 + && (reuseport_lb & tw->tw_so_options) == 0 && (ntohl(t->inp_laddr.s_addr) != - INADDR_ANY || ((inp->inp_vflag & - INP_IPV6PROTO) == - (t->inp_vflag & INP_IPV6PROTO)))) + INADDR_ANY || ((inp->inp_vflag & + INP_IPV6PROTO) == + (t->inp_vflag & INP_IPV6PROTO)))) return (EADDRINUSE); } else if (t && (reuseport & inp_so_options(t)) == 0 && + (reuseport_lb & inp_so_options(t)) == 0 && (ntohl(t->inp_laddr.s_addr) != INADDR_ANY || - (t->inp_vflag & INP_IPV6PROTO) != 0)) + (t->inp_vflag & INP_IPV6PROTO) != 0)) { return (EADDRINUSE); + } } #endif } @@ -644,7 +663,7 @@ in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst, } errno = inet6ctlerrmap[cmd]; INP_INFO_WLOCK(pcbinfo); - LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) { + CK_LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) { INP_WLOCK(inp); if ((inp->inp_vflag & INP_IPV6) == 0) { INP_WUNLOCK(inp); @@ -721,7 +740,7 @@ in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr, head = &pcbinfo->ipi_hashbase[INP_PCBHASH( INP6_PCBHASHKEY(&in6addr_any), lport, 0, pcbinfo->ipi_hashmask)]; - LIST_FOREACH(inp, head, inp_hash) { + CK_LIST_FOREACH(inp, head, inp_hash) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; @@ -751,7 +770,7 @@ in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr, */ porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport, pcbinfo->ipi_porthashmask)]; - LIST_FOREACH(phd, porthash, phd_hash) { + CK_LIST_FOREACH(phd, porthash, phd_hash) { if (phd->phd_port == lport) break; } @@ -760,7 +779,7 @@ in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr, * Port is in use by one or more PCBs. Look for best * fit. */ - LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { + CK_LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { wildcard = 0; if (cred != NULL && !prison_equal_ip6(cred->cr_prison, @@ -802,7 +821,7 @@ in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp) int i, gap; INP_INFO_WLOCK(pcbinfo); - LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) { + CK_LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) { INP_WLOCK(in6p); im6o = in6p->in6p_moptions; if ((in6p->inp_vflag & INP_IPV6) && im6o != NULL) { @@ -841,16 +860,10 @@ in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp) * (by a redirect), time to try a default gateway again. */ void -in6_losing(struct inpcb *in6p) +in6_losing(struct inpcb *inp) { - if (in6p->inp_route6.ro_rt) { - RTFREE(in6p->inp_route6.ro_rt); - in6p->inp_route6.ro_rt = (struct rtentry *)NULL; - } - if (in6p->inp_route.ro_lle) - LLE_FREE(in6p->inp_route.ro_lle); /* zeros ro_lle */ - return; + RO_INVALIDATE_CACHE(&inp->inp_route6); } /* @@ -858,18 +871,67 @@ in6_losing(struct inpcb *in6p) * and allocate a (hopefully) better one. */ struct inpcb * -in6_rtchange(struct inpcb *inp, int errno) +in6_rtchange(struct inpcb *inp, int errno __unused) { - if (inp->inp_route6.ro_rt) { - RTFREE(inp->inp_route6.ro_rt); - inp->inp_route6.ro_rt = (struct rtentry *)NULL; - } - if (inp->inp_route.ro_lle) - LLE_FREE(inp->inp_route.ro_lle); /* zeros ro_lle */ + RO_INVALIDATE_CACHE(&inp->inp_route6); return inp; } +static struct inpcb * +in6_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo, + const struct in6_addr *laddr, uint16_t lport, const struct in6_addr *faddr, + uint16_t fport, int lookupflags) +{ + struct inpcb *local_wild = NULL; + const struct inpcblbgrouphead *hdr; + struct inpcblbgroup *grp; + struct inpcblbgroup *grp_local_wild; + uint32_t idx; + + INP_HASH_LOCK_ASSERT(pcbinfo); + + hdr = &pcbinfo->ipi_lbgrouphashbase[INP_PCBLBGROUP_PORTHASH( + lport, pcbinfo->ipi_lbgrouphashmask)]; + + /* + * Order of socket selection: + * 1. non-wild. + * 2. wild (if lookupflags contains INPLOOKUP_WILDCARD). + * + * NOTE: + * - Load balanced group does not contain jailed sockets. + * - Load balanced does not contain IPv4 mapped INET6 wild sockets. + */ + CK_LIST_FOREACH(grp, hdr, il_list) { +#ifdef INET + if (!(grp->il_vflag & INP_IPV6)) + continue; +#endif + if (grp->il_lport == lport) { + idx = 0; + int pkt_hash = INP_PCBLBGROUP_PKTHASH( + INP6_PCBHASHKEY(faddr), lport, fport); + + idx = pkt_hash % grp->il_inpcnt; + + if (IN6_ARE_ADDR_EQUAL(&grp->il6_laddr, laddr)) { + return (grp->il_inp[idx]); + } else { + if (IN6_IS_ADDR_UNSPECIFIED(&grp->il6_laddr) && + (lookupflags & INPLOOKUP_WILDCARD)) { + local_wild = grp->il_inp[idx]; + grp_local_wild = grp; + } + } + } + } + if (local_wild != NULL) { + return (local_wild); + } + return (NULL); +} + #ifdef PCBGROUP /* * Lookup PCB in hash list, using pcbgroup tables. @@ -891,7 +953,7 @@ in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup, INP_GROUP_LOCK(pcbgroup); head = &pcbgroup->ipg_hashbase[INP_PCBHASH( INP6_PCBHASHKEY(faddr), lport, fport, pcbgroup->ipg_hashmask)]; - LIST_FOREACH(inp, head, inp_pcbgrouphash) { + CK_LIST_FOREACH(inp, head, inp_pcbgrouphash) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; @@ -932,7 +994,7 @@ in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup, */ head = &pcbgroup->ipg_hashbase[ INP_PCBHASH(INADDR_ANY, lport, 0, pcbgroup->ipg_hashmask)]; - LIST_FOREACH(inp, head, inp_pcbgrouphash) { + CK_LIST_FOREACH(inp, head, inp_pcbgrouphash) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; @@ -994,7 +1056,7 @@ in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup, head = &pcbinfo->ipi_wildbase[INP_PCBHASH( INP6_PCBHASHKEY(&in6addr_any), lport, 0, pcbinfo->ipi_wildmask)]; - LIST_FOREACH(inp, head, inp_pcbgroup_wild) { + CK_LIST_FOREACH(inp, head, inp_pcbgroup_wild) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; @@ -1094,7 +1156,7 @@ in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, tmpinp = NULL; head = &pcbinfo->ipi_hashbase[INP_PCBHASH( INP6_PCBHASHKEY(faddr), lport, fport, pcbinfo->ipi_hashmask)]; - LIST_FOREACH(inp, head, inp_hash) { + CK_LIST_FOREACH(inp, head, inp_hash) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; @@ -1117,6 +1179,18 @@ in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, return (tmpinp); /* + * Then look in lb group (for wildcard match). + */ + if (pcbinfo->ipi_lbgrouphashbase != NULL && + (lookupflags & INPLOOKUP_WILDCARD)) { + inp = in6_pcblookup_lbgroup(pcbinfo, laddr, lport, faddr, + fport, lookupflags); + if (inp != NULL) { + return (inp); + } + } + + /* * Then look for a wildcard match, if requested. */ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { @@ -1134,7 +1208,7 @@ in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, head = &pcbinfo->ipi_hashbase[INP_PCBHASH( INP6_PCBHASHKEY(&in6addr_any), lport, 0, pcbinfo->ipi_hashmask)]; - LIST_FOREACH(inp, head, inp_hash) { + CK_LIST_FOREACH(inp, head, inp_hash) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; @@ -1192,40 +1266,35 @@ in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, struct ifnet *ifp) { struct inpcb *inp; - bool locked; INP_HASH_RLOCK(pcbinfo); inp = in6_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport, (lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp); if (inp != NULL) { - if (lookupflags & INPLOOKUP_WLOCKPCB) - locked = INP_TRY_WLOCK(inp); - else if (lookupflags & INPLOOKUP_RLOCKPCB) - locked = INP_TRY_RLOCK(inp); - else - panic("%s: locking bug", __func__); - if (!locked) - in_pcbref(inp); - INP_HASH_RUNLOCK(pcbinfo); - if (!locked) { - if (lookupflags & INPLOOKUP_WLOCKPCB) { - INP_WLOCK(inp); - if (in_pcbrele_wlocked(inp)) - return (NULL); - } else { - INP_RLOCK(inp); - if (in_pcbrele_rlocked(inp)) - return (NULL); + if (lookupflags & INPLOOKUP_WLOCKPCB) { + INP_WLOCK(inp); + if (__predict_false(inp->inp_flags2 & INP_FREED)) { + INP_WUNLOCK(inp); + inp = NULL; } - } + } else if (lookupflags & INPLOOKUP_RLOCKPCB) { + INP_RLOCK(inp); + if (__predict_false(inp->inp_flags2 & INP_FREED)) { + INP_RUNLOCK(inp); + inp = NULL; + } + } else + panic("%s: locking bug", __func__); #ifdef INVARIANTS - if (lookupflags & INPLOOKUP_WLOCKPCB) - INP_WLOCK_ASSERT(inp); - else - INP_RLOCK_ASSERT(inp); + if (inp != NULL) { + if (lookupflags & INPLOOKUP_WLOCKPCB) + INP_WLOCK_ASSERT(inp); + else + INP_RLOCK_ASSERT(inp); + } #endif - } else - INP_HASH_RUNLOCK(pcbinfo); + } + INP_HASH_RUNLOCK(pcbinfo); return (inp); } diff --git a/freebsd/sys/netinet6/in6_proto.c b/freebsd/sys/netinet6/in6_proto.c index 756ea48b..cf62e60c 100644 --- a/freebsd/sys/netinet6/in6_proto.c +++ b/freebsd/sys/netinet6/in6_proto.c @@ -173,7 +173,7 @@ struct protosw inet6sw[] = { .pr_type = SOCK_STREAM, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_TCP, - .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN, + .pr_flags = PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD|PR_LISTEN, .pr_input = tcp6_input, .pr_ctlinput = tcp6_ctlinput, .pr_ctloutput = tcp_ctloutput, @@ -387,7 +387,9 @@ VNET_DEFINE(int, ip6_no_radr) = 0; VNET_DEFINE(int, ip6_norbit_raif) = 0; VNET_DEFINE(int, ip6_rfc6204w3) = 0; VNET_DEFINE(int, ip6_maxfragpackets); /* initialized in frag6.c:frag6_init() */ -VNET_DEFINE(int, ip6_maxfrags); /* initialized in frag6.c:frag6_init() */ +int ip6_maxfrags; /* initialized in frag6.c:frag6_init() */ +VNET_DEFINE(int, ip6_maxfragbucketsize);/* initialized in frag6.c:frag6_init() */ +VNET_DEFINE(int, ip6_maxfragsperpacket); /* initialized in frag6.c:frag6_init() */ VNET_DEFINE(int, ip6_log_interval) = 5; VNET_DEFINE(int, ip6_hdrnestlimit) = 15;/* How many header options will we * process? */ @@ -474,6 +476,20 @@ sysctl_ip6_tempvltime(SYSCTL_HANDLER_ARGS) return (0); } +static int +sysctl_ip6_maxfragpackets(SYSCTL_HANDLER_ARGS) +{ + int error, val; + + val = V_ip6_maxfragpackets; + error = sysctl_handle_int(oidp, &val, 0, req); + if (error != 0 || !req->newptr) + return (error); + V_ip6_maxfragpackets = val; + frag6_set_bucketsize(); + return (0); +} + SYSCTL_INT(_net_inet6_ip6, IPV6CTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_forwarding), 0, "Enable forwarding of IPv6 packets between interfaces"); @@ -486,8 +502,9 @@ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFHLIM, hlim, SYSCTL_VNET_PCPUSTAT(_net_inet6_ip6, IPV6CTL_STATS, stats, struct ip6stat, ip6stat, "IP6 statistics (struct ip6stat, netinet6/ip6_var.h)"); -SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragpackets), 0, +SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets, + CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, NULL, 0, + sysctl_ip6_maxfragpackets, "I", "Default maximum number of outstanding fragmented IPv6 packets. " "A value of 0 means no fragmented packets will be accepted, while a " "a value of -1 means no limit"); @@ -561,8 +578,16 @@ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USE_DEFAULTZONE, use_defaultzone, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_use_defzone), 0, "Use the default scope zone when none is specified"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfrags), 0, - "Maximum allowed number of outstanding IPv6 packet fragments"); + CTLFLAG_RW, &ip6_maxfrags, 0, + "Maximum allowed number of outstanding IPv6 packet fragments. " + "A value of 0 means no fragmented packets will be accepted, while a " + "a value of -1 means no limit"); +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGBUCKETSIZE, maxfragbucketsize, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragbucketsize), 0, + "Maximum number of reassembly queues per hash bucket"); +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGSPERPACKET, maxfragsperpacket, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragsperpacket), 0, + "Maximum allowed number of fragments per packet"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MCAST_PMTU, mcast_pmtu, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_mcast_pmtu), 0, "Enable path MTU discovery for multicast packets"); diff --git a/freebsd/sys/netinet6/in6_rmx.c b/freebsd/sys/netinet6/in6_rmx.c index 402d9e87..38c89b9b 100644 --- a/freebsd/sys/netinet6/in6_rmx.c +++ b/freebsd/sys/netinet6/in6_rmx.c @@ -161,7 +161,7 @@ struct mtuex_arg { struct rib_head *rnh; time_t nextstop; }; -static VNET_DEFINE(struct callout, rtq_mtutimer); +VNET_DEFINE_STATIC(struct callout, rtq_mtutimer); #define V_rtq_mtutimer VNET(rtq_mtutimer) static int @@ -211,7 +211,7 @@ in6_mtutimo(void *rock) /* * Initialize our routing tree. */ -static VNET_DEFINE(int, _in6_rt_was_here); +VNET_DEFINE_STATIC(int, _in6_rt_was_here); #define V__in6_rt_was_here VNET(_in6_rt_was_here) int diff --git a/freebsd/sys/netinet6/in6_src.c b/freebsd/sys/netinet6/in6_src.c index 92f7df4e..1cb71b88 100644 --- a/freebsd/sys/netinet6/in6_src.c +++ b/freebsd/sys/netinet6/in6_src.c @@ -129,7 +129,7 @@ static struct sx addrsel_sxlock; #define ADDRSEL_XUNLOCK() sx_xunlock(&addrsel_sxlock) #define ADDR_LABEL_NOTAPP (-1) -static VNET_DEFINE(struct in6_addrpolicy, defaultaddrpolicy); +VNET_DEFINE_STATIC(struct in6_addrpolicy, defaultaddrpolicy); #define V_defaultaddrpolicy VNET(defaultaddrpolicy) VNET_DEFINE(int, ip6_prefer_tempaddr) = 0; @@ -975,7 +975,7 @@ in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct ucred *cred) return(error); /* XXX: this is redundant when called from in6_pcbbind */ - if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) + if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT|SO_REUSEPORT_LB)) == 0) lookupflags = INPLOOKUP_WILDCARD; inp->inp_flags |= INP_ANONPORT; @@ -1096,7 +1096,7 @@ struct addrsel_policyent { TAILQ_HEAD(addrsel_policyhead, addrsel_policyent); -static VNET_DEFINE(struct addrsel_policyhead, addrsel_policytab); +VNET_DEFINE_STATIC(struct addrsel_policyhead, addrsel_policytab); #define V_addrsel_policytab VNET(addrsel_policytab) static void diff --git a/freebsd/sys/netinet6/in6_var.h b/freebsd/sys/netinet6/in6_var.h index 6b4fe1ab..5ed0ae90 100644 --- a/freebsd/sys/netinet6/in6_var.h +++ b/freebsd/sys/netinet6/in6_var.h @@ -784,7 +784,7 @@ in6m_rele_locked(struct in6_multi_head *inmh, struct in6_multi *inm) IN6_MULTI_LIST_LOCK_ASSERT(); if (--inm->in6m_refcount == 0) { - in6m_disconnect(inm); + MPASS(inm->in6m_ifp == NULL); inm->in6m_ifma->ifma_protospec = NULL; MPASS(inm->in6m_ifma->ifma_llifma == NULL); SLIST_INSERT_HEAD(inmh, inm, in6m_nrele); diff --git a/freebsd/sys/netinet6/ip6_input.c b/freebsd/sys/netinet6/ip6_input.c index 77e32da8..25ab624c 100644 --- a/freebsd/sys/netinet6/ip6_input.c +++ b/freebsd/sys/netinet6/ip6_input.c @@ -722,13 +722,15 @@ ip6_input(struct mbuf *m) #endif /* * Try to forward the packet, but if we fail continue. + * ip6_tryforward() does not generate redirects, so fall + * through to normal processing if redirects are required. * ip6_tryforward() does inbound and outbound packet firewall * processing. If firewall has decided that destination becomes * our local address, it sets M_FASTFWD_OURS flag. In this * case skip another inbound firewall processing and update * ip6 pointer. */ - if (V_ip6_forwarding != 0 + if (V_ip6_forwarding != 0 && V_ip6_sendredirects == 0 #if defined(IPSEC) || defined(IPSEC_SUPPORT) && (!IPSEC_ENABLED(ipv6) || IPSEC_CAPS(ipv6, m, IPSEC_CAP_OPERABLE) == 0) diff --git a/freebsd/sys/netinet6/ip6_mroute.c b/freebsd/sys/netinet6/ip6_mroute.c index a4a8cdf9..c1f66028 100644 --- a/freebsd/sys/netinet6/ip6_mroute.c +++ b/freebsd/sys/netinet6/ip6_mroute.c @@ -111,7 +111,6 @@ __FBSDID("$FreeBSD$"); #include <net/if.h> #include <net/if_var.h> #include <net/if_types.h> -#include <net/raw_cb.h> #include <net/vnet.h> #include <netinet/in.h> @@ -141,19 +140,19 @@ extern int in6_mcast_loop; extern struct domain inet6domain; static const struct encaptab *pim6_encap_cookie; -static const struct protosw in6_pim_protosw = { - .pr_type = SOCK_RAW, - .pr_domain = &inet6domain, - .pr_protocol = IPPROTO_PIM, - .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, - .pr_input = pim6_input, - .pr_output = rip6_output, - .pr_ctloutput = rip6_ctloutput, - .pr_usrreqs = &rip6_usrreqs -}; static int pim6_encapcheck(const struct mbuf *, int, int, void *); +static int pim6_input(struct mbuf *, int, int, void *); + +static const struct encap_config ipv6_encap_cfg = { + .proto = IPPROTO_PIM, + .min_length = sizeof(struct ip6_hdr) + PIM_MINLEN, + .exact_match = 8, + .check = pim6_encapcheck, + .input = pim6_input +}; -static VNET_DEFINE(int, ip6_mrouter_ver) = 0; + +VNET_DEFINE_STATIC(int, ip6_mrouter_ver) = 0; #define V_ip6_mrouter_ver VNET(ip6_mrouter_ver) SYSCTL_DECL(_net_inet6); @@ -238,7 +237,7 @@ static struct mtx mif6_mtx; #define MIF6_LOCK_DESTROY() mtx_destroy(&mif6_mtx) #ifdef MRT6DEBUG -static VNET_DEFINE(u_int, mrt6debug) = 0; /* debug level */ +VNET_DEFINE_STATIC(u_int, mrt6debug) = 0; /* debug level */ #define V_mrt6debug VNET(mrt6debug) #define DEBUG_MFC 0x02 #define DEBUG_FORWARD 0x04 @@ -291,7 +290,7 @@ SYSCTL_STRUCT(_net_inet6_pim, PIM6CTL_STATS, stats, CTLFLAG_RW, "PIM Statistics (struct pim6stat, netinet6/pim6_var.h)"); #define PIM6STAT_INC(name) pim6stat.name += 1 -static VNET_DEFINE(int, pim6); +VNET_DEFINE_STATIC(int, pim6); #define V_pim6 VNET(pim6) /* @@ -1697,16 +1696,12 @@ register_send(struct ip6_hdr *ip6, struct mif6 *mif, struct mbuf *m) * into the kernel. */ static int -pim6_encapcheck(const struct mbuf *m, int off, int proto, void *arg) +pim6_encapcheck(const struct mbuf *m __unused, int off __unused, + int proto __unused, void *arg __unused) { -#ifdef DIAGNOSTIC KASSERT(proto == IPPROTO_PIM, ("not for IPPROTO_PIM")); -#endif - if (proto != IPPROTO_PIM) - return 0; /* not for us; reject the datagram. */ - - return 64; /* claim the datagram. */ + return (8); /* claim the datagram. */ } /* @@ -1716,20 +1711,18 @@ pim6_encapcheck(const struct mbuf *m, int off, int proto, void *arg) * The only message processed is the REGISTER pim message; the pim header * is stripped off, and the inner packet is passed to register_mforward. */ -int -pim6_input(struct mbuf **mp, int *offp, int proto) +static int +pim6_input(struct mbuf *m, int off, int proto, void *arg __unused) { struct pim *pim; /* pointer to a pim struct */ struct ip6_hdr *ip6; int pimlen; - struct mbuf *m = *mp; int minlen; - int off = *offp; PIM6STAT_INC(pim6s_rcv_total); ip6 = mtod(m, struct ip6_hdr *); - pimlen = m->m_pkthdr.len - *offp; + pimlen = m->m_pkthdr.len - off; /* * Validate lengths @@ -1906,8 +1899,7 @@ pim6_input(struct mbuf **mp, int *offp, int proto) * encapsulated ip6 header. */ pim6_input_to_daemon: - rip6_input(&m, offp, proto); - return (IPPROTO_DONE); + return (rip6_input(&m, &off, proto)); } static int @@ -1920,9 +1912,8 @@ ip6_mroute_modevent(module_t mod, int type, void *unused) MFC6_LOCK_INIT(); MIF6_LOCK_INIT(); - pim6_encap_cookie = encap_attach_func(AF_INET6, IPPROTO_PIM, - pim6_encapcheck, - (const struct protosw *)&in6_pim_protosw, NULL); + pim6_encap_cookie = ip6_encap_attach(&ipv6_encap_cfg, + NULL, M_WAITOK); if (pim6_encap_cookie == NULL) { printf("ip6_mroute: unable to attach pim6 encap\n"); MIF6_LOCK_DESTROY(); @@ -1943,7 +1934,7 @@ ip6_mroute_modevent(module_t mod, int type, void *unused) return EINVAL; if (pim6_encap_cookie) { - encap_detach(pim6_encap_cookie); + ip6_encap_detach(pim6_encap_cookie); pim6_encap_cookie = NULL; } X_ip6_mrouter_done(); diff --git a/freebsd/sys/netinet6/ip6_output.c b/freebsd/sys/netinet6/ip6_output.c index 1841829a..d3e530a6 100644 --- a/freebsd/sys/netinet6/ip6_output.c +++ b/freebsd/sys/netinet6/ip6_output.c @@ -201,18 +201,10 @@ in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset) csum = 0xffff; offset += m->m_pkthdr.csum_data; /* checksum offset */ - if (offset + sizeof(u_short) > m->m_len) { - printf("%s: delayed m_pullup, m->len: %d plen %u off %u " - "csum_flags=%b\n", __func__, m->m_len, plen, offset, - (int)m->m_pkthdr.csum_flags, CSUM_BITS); - /* - * XXX this should not happen, but if it does, the correct - * behavior may be to insert the checksum in the appropriate - * next mbuf in the chain. - */ - return; - } - *(u_short *)(m->m_data + offset) = csum; + if (offset + sizeof(csum) > m->m_len) + m_copyback(m, offset, sizeof(csum), (caddr_t)&csum); + else + *(u_short *)mtodo(m, offset) = csum; } int @@ -814,22 +806,16 @@ again: error = netisr_queue(NETISR_IPV6, m); goto done; } else { - RO_RTFREE(ro); + RO_INVALIDATE_CACHE(ro); needfiblookup = 1; /* Redo the routing table lookup. */ - if (ro->ro_lle) - LLE_FREE(ro->ro_lle); /* zeros ro_lle */ - ro->ro_lle = NULL; } } /* See if fib was changed by packet filter. */ if (fibnum != M_GETFIB(m)) { m->m_flags |= M_SKIP_FIREWALL; fibnum = M_GETFIB(m); - RO_RTFREE(ro); + RO_INVALIDATE_CACHE(ro); needfiblookup = 1; - if (ro->ro_lle) - LLE_FREE(ro->ro_lle); /* zeros ro_lle */ - ro->ro_lle = NULL; } if (needfiblookup) goto again; @@ -1456,6 +1442,15 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt) INP_WUNLOCK(in6p); error = 0; break; + case SO_REUSEPORT_LB: + INP_WLOCK(in6p); + if ((so->so_options & SO_REUSEPORT_LB) != 0) + in6p->inp_flags2 |= INP_REUSEPORT_LB; + else + in6p->inp_flags2 &= ~INP_REUSEPORT_LB; + INP_WUNLOCK(in6p); + error = 0; + break; case SO_SETFIB: INP_WLOCK(in6p); in6p->inp_inc.inc_fibnum = so->so_fibnum; @@ -1637,11 +1632,17 @@ do { \ error = EINVAL; break; } + INP_WLOCK(in6p); + if (in6p->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + INP_WUNLOCK(in6p); + return (ECONNRESET); + } optp = &in6p->in6p_outputopts; error = ip6_pcbopt(IPV6_HOPLIMIT, (u_char *)&optval, sizeof(optval), optp, (td != NULL) ? td->td_ucred : NULL, uproto); + INP_WUNLOCK(in6p); break; } @@ -1751,11 +1752,17 @@ do { \ break; { struct ip6_pktopts **optp; + INP_WLOCK(in6p); + if (in6p->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + INP_WUNLOCK(in6p); + return (ECONNRESET); + } optp = &in6p->in6p_outputopts; error = ip6_pcbopt(optname, (u_char *)&optval, sizeof(optval), optp, (td != NULL) ? td->td_ucred : NULL, uproto); + INP_WUNLOCK(in6p); break; } @@ -1837,10 +1844,16 @@ do { \ break; optlen = sopt->sopt_valsize; optbuf = optbuf_storage; + INP_WLOCK(in6p); + if (in6p->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + INP_WUNLOCK(in6p); + return (ECONNRESET); + } optp = &in6p->in6p_outputopts; error = ip6_pcbopt(optname, optbuf, optlen, optp, (td != NULL) ? td->td_ucred : NULL, uproto); + INP_WUNLOCK(in6p); break; } #undef OPTSET @@ -2287,7 +2300,9 @@ ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt, if (*pktopt == NULL) { *pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT, - M_WAITOK); + M_NOWAIT); + if (*pktopt == NULL) + return (ENOBUFS); ip6_initpktopts(*pktopt); } opt = *pktopt; diff --git a/freebsd/sys/netinet6/ip6_var.h b/freebsd/sys/netinet6/ip6_var.h index 74b5f89c..f235572d 100644 --- a/freebsd/sys/netinet6/ip6_var.h +++ b/freebsd/sys/netinet6/ip6_var.h @@ -301,8 +301,10 @@ VNET_DECLARE(struct socket *, ip6_mrouter); /* multicast routing daemon */ VNET_DECLARE(int, ip6_sendredirects); /* send IP redirects when forwarding? */ VNET_DECLARE(int, ip6_maxfragpackets); /* Maximum packets in reassembly * queue */ -VNET_DECLARE(int, ip6_maxfrags); /* Maximum fragments in reassembly +extern int ip6_maxfrags; /* Maximum fragments in reassembly * queue */ +VNET_DECLARE(int, ip6_maxfragbucketsize); /* Maximum reassembly queues per bucket */ +VNET_DECLARE(int, ip6_maxfragsperpacket); /* Maximum fragments per packet */ VNET_DECLARE(int, ip6_accept_rtadv); /* Acts as a host not a router */ VNET_DECLARE(int, ip6_no_radr); /* No defroute from RA */ VNET_DECLARE(int, ip6_norbit_raif); /* Disable R-bit in NA on RA @@ -317,7 +319,8 @@ VNET_DECLARE(int, ip6_dad_count); /* DupAddrDetectionTransmits */ #define V_ip6_mrouter VNET(ip6_mrouter) #define V_ip6_sendredirects VNET(ip6_sendredirects) #define V_ip6_maxfragpackets VNET(ip6_maxfragpackets) -#define V_ip6_maxfrags VNET(ip6_maxfrags) +#define V_ip6_maxfragbucketsize VNET(ip6_maxfragbucketsize) +#define V_ip6_maxfragsperpacket VNET(ip6_maxfragsperpacket) #define V_ip6_accept_rtadv VNET(ip6_accept_rtadv) #define V_ip6_no_radr VNET(ip6_no_radr) #define V_ip6_norbit_raif VNET(ip6_norbit_raif) @@ -404,6 +407,7 @@ int ip6_fragment(struct ifnet *, struct mbuf *, int, u_char, int, int route6_input(struct mbuf **, int *, int); +void frag6_set_bucketsize(void); void frag6_init(void); int frag6_input(struct mbuf **, int *, int); void frag6_slowtimo(void); diff --git a/freebsd/sys/netinet6/mld6.c b/freebsd/sys/netinet6/mld6.c index 0c82d5ff..b00f03ef 100644 --- a/freebsd/sys/netinet6/mld6.c +++ b/freebsd/sys/netinet6/mld6.c @@ -209,11 +209,11 @@ static MALLOC_DEFINE(M_MLD, "mld", "mld state"); /* * VIMAGE-wide globals. */ -static VNET_DEFINE(struct timeval, mld_gsrdelay) = {10, 0}; -static VNET_DEFINE(LIST_HEAD(, mld_ifsoftc), mli_head); -static VNET_DEFINE(int, interface_timers_running6); -static VNET_DEFINE(int, state_change_timers_running6); -static VNET_DEFINE(int, current_state_timers_running6); +VNET_DEFINE_STATIC(struct timeval, mld_gsrdelay) = {10, 0}; +VNET_DEFINE_STATIC(LIST_HEAD(, mld_ifsoftc), mli_head); +VNET_DEFINE_STATIC(int, interface_timers_running6); +VNET_DEFINE_STATIC(int, state_change_timers_running6); +VNET_DEFINE_STATIC(int, current_state_timers_running6); #define V_mld_gsrdelay VNET(mld_gsrdelay) #define V_mli_head VNET(mli_head) @@ -559,6 +559,7 @@ mld_ifdetach(struct ifnet *ifp) continue; inm = (struct in6_multi *)ifma->ifma_protospec; if (inm->in6m_state == MLD_LEAVING_MEMBER) { + in6m_disconnect(inm); in6m_rele_locked(&inmh, inm); ifma->ifma_protospec = NULL; } @@ -1485,6 +1486,7 @@ mld_v1_process_group_timer(struct in6_multi_head *inmh, struct in6_multi *inm) case MLD_REPORTING_MEMBER: if (report_timer_expired) { inm->in6m_state = MLD_IDLE_MEMBER; + in6m_disconnect(inm); in6m_rele_locked(inmh, inm); } break; @@ -1609,6 +1611,7 @@ mld_v2_process_group_timers(struct in6_multi_head *inmh, if (inm->in6m_state == MLD_LEAVING_MEMBER && inm->in6m_scrv == 0) { inm->in6m_state = MLD_NOT_MEMBER; + in6m_disconnect(inm); in6m_rele_locked(inmh, inm); } } @@ -1681,7 +1684,8 @@ mld_v2_cancel_link_timers(struct mld_ifsoftc *mli) IF_ADDR_WLOCK(ifp); restart: CK_STAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) { - if (ifma->ifma_addr->sa_family != AF_INET6) + if (ifma->ifma_addr->sa_family != AF_INET6 || + ifma->ifma_protospec == NULL) continue; inm = (struct in6_multi *)ifma->ifma_protospec; switch (inm->in6m_state) { @@ -1698,6 +1702,7 @@ mld_v2_cancel_link_timers(struct mld_ifsoftc *mli) * version, we need to release the final * reference held for issuing the INCLUDE {}. */ + in6m_disconnect(inm); in6m_rele_locked(&inmh, inm); ifma->ifma_protospec = NULL; /* FALLTHROUGH */ @@ -1795,8 +1800,11 @@ mld_v1_transmit_report(struct in6_multi *in6m, const int type) IN6_MULTI_LIST_LOCK_ASSERT(); MLD_LOCK_ASSERT(); - + ifp = in6m->in6m_ifp; + /* in process of being freed */ + if (ifp == NULL) + return (0); ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); /* ia may be NULL if link-local address is tentative. */ @@ -1894,16 +1902,15 @@ mld_change_state(struct in6_multi *inm, const int delay) */ KASSERT(inm->in6m_ifma != NULL, ("%s: no ifma", __func__)); ifp = inm->in6m_ifma->ifma_ifp; - if (ifp != NULL) { - /* - * Sanity check that netinet6's notion of ifp is the - * same as net's. - */ - KASSERT(inm->in6m_ifp == ifp, ("%s: bad ifp", __func__)); - } + if (ifp == NULL) + return (0); + /* + * Sanity check that netinet6's notion of ifp is the + * same as net's. + */ + KASSERT(inm->in6m_ifp == ifp, ("%s: bad ifp", __func__)); MLD_LOCK(); - mli = MLD_IFINFO(ifp); KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp)); @@ -1997,9 +2004,9 @@ mld_initial_join(struct in6_multi *inm, struct mld_ifsoftc *mli, * group around for the final INCLUDE {} enqueue. */ if (mli->mli_version == MLD_VERSION_2 && - inm->in6m_state == MLD_LEAVING_MEMBER) - in6m_release_deferred(inm); - + inm->in6m_state == MLD_LEAVING_MEMBER) { + inm->in6m_refcount--; + } inm->in6m_state = MLD_REPORTING_MEMBER; switch (mli->mli_version) { diff --git a/freebsd/sys/netinet6/nd6.c b/freebsd/sys/netinet6/nd6.c index 6a36803f..f065815c 100644 --- a/freebsd/sys/netinet6/nd6.c +++ b/freebsd/sys/netinet6/nd6.c @@ -100,11 +100,11 @@ VNET_DEFINE(int, nd6_gctimer) = (60 * 60 * 24); /* 1 day: garbage * collection timer */ /* preventing too many loops in ND option parsing */ -static VNET_DEFINE(int, nd6_maxndopt) = 10; /* max # of ND options allowed */ +VNET_DEFINE_STATIC(int, nd6_maxndopt) = 10; /* max # of ND options allowed */ VNET_DEFINE(int, nd6_maxnudhint) = 0; /* max # of subsequent upper * layer hints */ -static VNET_DEFINE(int, nd6_maxqueuelen) = 1; /* max pkts cached in unresolved +VNET_DEFINE_STATIC(int, nd6_maxqueuelen) = 1; /* max pkts cached in unresolved * ND entries */ #define V_nd6_maxndopt VNET(nd6_maxndopt) #define V_nd6_maxqueuelen VNET(nd6_maxqueuelen) @@ -144,7 +144,7 @@ static int nd6_resolve_slow(struct ifnet *, int, struct mbuf *, static int nd6_need_cache(struct ifnet *); -static VNET_DEFINE(struct callout, nd6_slowtimo_ch); +VNET_DEFINE_STATIC(struct callout, nd6_slowtimo_ch); #define V_nd6_slowtimo_ch VNET(nd6_slowtimo_ch) VNET_DEFINE(struct callout, nd6_timer_ch); diff --git a/freebsd/sys/netinet6/nd6_nbr.c b/freebsd/sys/netinet6/nd6_nbr.c index d4ab38af..49810020 100644 --- a/freebsd/sys/netinet6/nd6_nbr.c +++ b/freebsd/sys/netinet6/nd6_nbr.c @@ -101,7 +101,7 @@ static void nd6_na_output_fib(struct ifnet *, const struct in6_addr *, static void nd6_ns_output_fib(struct ifnet *, const struct in6_addr *, const struct in6_addr *, const struct in6_addr *, uint8_t *, u_int); -static VNET_DEFINE(int, dad_enhanced) = 1; +VNET_DEFINE_STATIC(int, dad_enhanced) = 1; #define V_dad_enhanced VNET(dad_enhanced) SYSCTL_DECL(_net_inet6_ip6); @@ -109,7 +109,7 @@ SYSCTL_INT(_net_inet6_ip6, OID_AUTO, dad_enhanced, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dad_enhanced), 0, "Enable Enhanced DAD, which adds a random nonce to NS messages for DAD."); -static VNET_DEFINE(int, dad_maxtry) = 15; /* max # of *tries* to +VNET_DEFINE_STATIC(int, dad_maxtry) = 15; /* max # of *tries* to transmit DAD packet */ #define V_dad_maxtry VNET(dad_maxtry) @@ -1122,8 +1122,8 @@ struct dadq { bool dad_ondadq; /* on dadq? Protected by DADQ_WLOCK. */ }; -static VNET_DEFINE(TAILQ_HEAD(, dadq), dadq); -static VNET_DEFINE(struct rwlock, dad_rwlock); +VNET_DEFINE_STATIC(TAILQ_HEAD(, dadq), dadq); +VNET_DEFINE_STATIC(struct rwlock, dad_rwlock); #define V_dadq VNET(dadq) #define V_dad_rwlock VNET(dad_rwlock) diff --git a/freebsd/sys/netinet6/nd6_rtr.c b/freebsd/sys/netinet6/nd6_rtr.c index fab7c7c2..a60e7c66 100644 --- a/freebsd/sys/netinet6/nd6_rtr.c +++ b/freebsd/sys/netinet6/nd6_rtr.c @@ -96,7 +96,7 @@ static int rt6_deleteroute(const struct rtentry *, void *); VNET_DECLARE(int, nd6_recalc_reachtm_interval); #define V_nd6_recalc_reachtm_interval VNET(nd6_recalc_reachtm_interval) -static VNET_DEFINE(struct ifnet *, nd6_defifp); +VNET_DEFINE_STATIC(struct ifnet *, nd6_defifp); VNET_DEFINE(int, nd6_defifindex); #define V_nd6_defifp VNET(nd6_defifp) diff --git a/freebsd/sys/netinet6/pim6_var.h b/freebsd/sys/netinet6/pim6_var.h index 7afe89b9..7288c67e 100644 --- a/freebsd/sys/netinet6/pim6_var.h +++ b/freebsd/sys/netinet6/pim6_var.h @@ -53,10 +53,6 @@ struct pim6stat { uint64_t pim6s_snd_registers; /* sent registers */ }; -#if (defined(KERNEL)) || (defined(_KERNEL)) -int pim6_input(struct mbuf **, int*, int); -#endif /* KERNEL */ - /* * Identifiers for PIM sysctl nodes */ diff --git a/freebsd/sys/netinet6/raw_ip6.c b/freebsd/sys/netinet6/raw_ip6.c index c05399b3..9c3d7a61 100644 --- a/freebsd/sys/netinet6/raw_ip6.c +++ b/freebsd/sys/netinet6/raw_ip6.c @@ -167,6 +167,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto) struct inpcb *last = NULL; struct mbuf *opts = NULL; struct sockaddr_in6 fromsa; + struct epoch_tracker et; RIP6STAT_INC(rip6s_ipackets); @@ -174,8 +175,8 @@ rip6_input(struct mbuf **mp, int *offp, int proto) ifp = m->m_pkthdr.rcvif; - INP_INFO_RLOCK(&V_ripcbinfo); - LIST_FOREACH(in6p, &V_ripcb, inp_list) { + INP_INFO_RLOCK_ET(&V_ripcbinfo, et); + CK_LIST_FOREACH(in6p, &V_ripcb, inp_list) { /* XXX inp locking */ if ((in6p->inp_vflag & INP_IPV6) == 0) continue; @@ -293,7 +294,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto) } last = in6p; } - INP_INFO_RUNLOCK(&V_ripcbinfo); + INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et); #if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * Check AH/ESP integrity. diff --git a/freebsd/sys/netinet6/scope6.c b/freebsd/sys/netinet6/scope6.c index 40218287..64b866dd 100644 --- a/freebsd/sys/netinet6/scope6.c +++ b/freebsd/sys/netinet6/scope6.c @@ -78,7 +78,7 @@ static struct mtx scope6_lock; #define SCOPE6_UNLOCK() mtx_unlock(&scope6_lock) #define SCOPE6_LOCK_ASSERT() mtx_assert(&scope6_lock, MA_OWNED) -static VNET_DEFINE(struct scope6_id, sid_default); +VNET_DEFINE_STATIC(struct scope6_id, sid_default); #define V_sid_default VNET(sid_default) #define SID(ifp) \ @@ -455,7 +455,7 @@ in6_clearscope(struct in6_addr *in6) * Return the scope identifier or zero. */ uint16_t -in6_getscope(struct in6_addr *in6) +in6_getscope(const struct in6_addr *in6) { if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6)) diff --git a/freebsd/sys/netinet6/scope6_var.h b/freebsd/sys/netinet6/scope6_var.h index a2a9137d..f4e59a19 100644 --- a/freebsd/sys/netinet6/scope6_var.h +++ b/freebsd/sys/netinet6/scope6_var.h @@ -63,7 +63,7 @@ int sa6_checkzone(struct sockaddr_in6 *); int sa6_checkzone_ifp(struct ifnet *, struct sockaddr_in6 *); int in6_setscope(struct in6_addr *, struct ifnet *, u_int32_t *); int in6_clearscope(struct in6_addr *); -uint16_t in6_getscope(struct in6_addr *); +uint16_t in6_getscope(const struct in6_addr *); uint32_t in6_getscopezone(const struct ifnet *, int); void in6_splitscope(const struct in6_addr *, struct in6_addr *, uint32_t *); struct ifnet* in6_getlinkifnet(uint32_t); diff --git a/freebsd/sys/netinet6/sctp6_usrreq.c b/freebsd/sys/netinet6/sctp6_usrreq.c index fd963fb3..6a3391ee 100644 --- a/freebsd/sys/netinet6/sctp6_usrreq.c +++ b/freebsd/sys/netinet6/sctp6_usrreq.c @@ -273,6 +273,7 @@ sctp6_ctlinput(int cmd, struct sockaddr *pktdst, void *d) pktdst->sa_len != sizeof(struct sockaddr_in6)) { return; } + if ((unsigned)cmd >= PRC_NCMDS) { return; } @@ -296,6 +297,7 @@ sctp6_ctlinput(int cmd, struct sockaddr *pktdst, void *d) if (ip6cp->ip6c_m == NULL) { return; } + /* * Check if we can safely examine the ports and the * verification tag of the SCTP common header. @@ -304,6 +306,7 @@ sctp6_ctlinput(int cmd, struct sockaddr *pktdst, void *d) (int32_t)(ip6cp->ip6c_off + offsetof(struct sctphdr, checksum))) { return; } + /* Copy out the port numbers and the verification tag. */ memset(&sh, 0, sizeof(sh)); m_copydata(ip6cp->ip6c_m, @@ -529,6 +532,7 @@ sctp6_attach(struct socket *so, int proto SCTP_UNUSED, struct thread *p SCTP_UNU SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); return (EINVAL); } + if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { error = SCTP_SORESERVE(so, SCTP_BASE_SYSCTL(sctp_sendspace), SCTP_BASE_SYSCTL(sctp_recvspace)); if (error) @@ -569,6 +573,7 @@ sctp6_bind(struct socket *so, struct sockaddr *addr, struct thread *p) SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); return (EINVAL); } + if (addr) { switch (addr->sa_family) { #ifdef INET @@ -918,7 +923,7 @@ sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p) /* Set the connected flag so we can queue data */ soisconnecting(so); } - stcb->asoc.state = SCTP_STATE_COOKIE_WAIT; + SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT); (void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered); /* initialize authentication parameters for the assoc */ @@ -1105,6 +1110,7 @@ sctp6_in6getaddr(struct socket *so, struct sockaddr **nam) SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); return (EINVAL); } + /* allow v6 addresses precedence */ error = sctp6_getaddr(so, nam); #ifdef INET @@ -1140,6 +1146,7 @@ sctp6_getpeeraddr(struct socket *so, struct sockaddr **nam) SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); return (EINVAL); } + /* allow v6 addresses precedence */ error = sctp6_peeraddr(so, nam); #ifdef INET diff --git a/freebsd/sys/netinet6/sctp6_var.h b/freebsd/sys/netinet6/sctp6_var.h index e3c4359a..4ad0ca28 100644 --- a/freebsd/sys/netinet6/sctp6_var.h +++ b/freebsd/sys/netinet6/sctp6_var.h @@ -45,11 +45,11 @@ extern struct pr_usrreqs sctp6_usrreqs; int sctp6_input(struct mbuf **, int *, int); int sctp6_input_with_port(struct mbuf **, int *, uint16_t); -int +int sctp6_output(struct sctp_inpcb *, struct mbuf *, struct sockaddr *, struct mbuf *, struct proc *); void sctp6_ctlinput(int, struct sockaddr *, void *); -void +void sctp6_notify(struct sctp_inpcb *, struct sctp_tcb *, struct sctp_nets *, uint8_t, uint8_t, uint32_t); #endif diff --git a/freebsd/sys/netinet6/udp6_usrreq.c b/freebsd/sys/netinet6/udp6_usrreq.c index c2b32eb1..67ed0e35 100644 --- a/freebsd/sys/netinet6/udp6_usrreq.c +++ b/freebsd/sys/netinet6/udp6_usrreq.c @@ -216,6 +216,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto) int off = *offp; int cscov_partial; int plen, ulen; + struct epoch_tracker et; struct sockaddr_in6 fromsa[2]; struct m_tag *fwd_tag; uint16_t uh_sum; @@ -302,7 +303,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto) struct inpcbhead *pcblist; struct ip6_moptions *imo; - INP_INFO_RLOCK(pcbinfo); + INP_INFO_RLOCK_ET(pcbinfo, et); /* * In the event that laddr should be set to the link-local * address (this happens in RIPng), the multicast address @@ -320,7 +321,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto) */ pcblist = udp_get_pcblist(nxt); last = NULL; - LIST_FOREACH(inp, pcblist, inp_list) { + CK_LIST_FOREACH(inp, pcblist, inp_list) { if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (inp->inp_lport != uh->uh_dport) @@ -357,6 +358,10 @@ udp6_input(struct mbuf **mp, int *offp, int proto) int blocked; INP_RLOCK(inp); + if (__predict_false(inp->inp_flags2 & INP_FREED)) { + INP_RUNLOCK(inp); + continue; + } bzero(&mcaddr, sizeof(struct sockaddr_in6)); mcaddr.sin6_len = sizeof(struct sockaddr_in6); @@ -384,10 +389,16 @@ udp6_input(struct mbuf **mp, int *offp, int proto) if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) { INP_RLOCK(last); - UDP_PROBE(receive, NULL, last, ip6, - last, uh); - if (udp6_append(last, n, off, fromsa)) - goto inp_lost; + if (__predict_true(last->inp_flags2 & INP_FREED) == 0) { + if (nxt == IPPROTO_UDPLITE) + UDPLITE_PROBE(receive, NULL, last, + ip6, last, uh); + else + UDP_PROBE(receive, NULL, last, + ip6, last, uh); + if (udp6_append(last, n, off, fromsa)) + goto inp_lost; + } INP_RUNLOCK(last); } } @@ -401,7 +412,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto) * will never clear these options after setting them. */ if ((last->inp_socket->so_options & - (SO_REUSEPORT|SO_REUSEADDR)) == 0) + (SO_REUSEPORT|SO_REUSEPORT_LB|SO_REUSEADDR)) == 0) break; } @@ -416,10 +427,16 @@ udp6_input(struct mbuf **mp, int *offp, int proto) goto badheadlocked; } INP_RLOCK(last); - INP_INFO_RUNLOCK(pcbinfo); - UDP_PROBE(receive, NULL, last, ip6, last, uh); - if (udp6_append(last, m, off, fromsa) == 0) + if (__predict_true(last->inp_flags2 & INP_FREED) == 0) { + if (nxt == IPPROTO_UDPLITE) + UDPLITE_PROBE(receive, NULL, last, ip6, last, uh); + else + UDP_PROBE(receive, NULL, last, ip6, last, uh); + if (udp6_append(last, m, off, fromsa) == 0) + INP_RUNLOCK(last); + } else INP_RUNLOCK(last); + INP_INFO_RUNLOCK_ET(pcbinfo, et); inp_lost: return (IPPROTO_DONE); } @@ -475,6 +492,10 @@ udp6_input(struct mbuf **mp, int *offp, int proto) ip6_sprintf(ip6bufs, &ip6->ip6_src), ntohs(uh->uh_sport)); } + if (nxt == IPPROTO_UDPLITE) + UDPLITE_PROBE(receive, NULL, NULL, ip6, NULL, uh); + else + UDP_PROBE(receive, NULL, NULL, ip6, NULL, uh); UDPSTAT_INC(udps_noport); if (m->m_flags & M_MCAST) { printf("UDP6: M_MCAST is set in a unicast packet.\n"); @@ -495,13 +516,16 @@ udp6_input(struct mbuf **mp, int *offp, int proto) return (IPPROTO_DONE); } } - UDP_PROBE(receive, NULL, inp, ip6, inp, uh); + if (nxt == IPPROTO_UDPLITE) + UDPLITE_PROBE(receive, NULL, inp, ip6, inp, uh); + else + UDP_PROBE(receive, NULL, inp, ip6, inp, uh); if (udp6_append(inp, m, off, fromsa) == 0) INP_RUNLOCK(inp); return (IPPROTO_DONE); badheadlocked: - INP_INFO_RUNLOCK(pcbinfo); + INP_INFO_RUNLOCK_ET(pcbinfo, et); badunlocked: if (m) m_freem(m); @@ -657,35 +681,38 @@ udp6_getcred(SYSCTL_HANDLER_ARGS) SYSCTL_PROC(_net_inet6_udp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 0, 0, udp6_getcred, "S,xucred", "Get the xucred of a UDP6 connection"); +#define UH_WLOCKED 2 +#define UH_RLOCKED 1 +#define UH_UNLOCKED 0 static int -udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6, - struct mbuf *control, struct thread *td) +udp6_output(struct socket *so, int flags_arg, struct mbuf *m, + struct sockaddr *addr6, struct mbuf *control, struct thread *td) { - u_int32_t ulen = m->m_pkthdr.len; - u_int32_t plen = sizeof(struct udphdr) + ulen; + struct inpcbinfo *pcbinfo; + struct inpcb *inp; struct ip6_hdr *ip6; struct udphdr *udp6; struct in6_addr *laddr, *faddr, in6a; - struct sockaddr_in6 *sin6 = NULL; - int cscov_partial = 0; - int scope_ambiguous = 0; - u_short fport; - int error = 0; - uint8_t nxt; - uint16_t cscov = 0; struct ip6_pktopts *optp, opt; - int af = AF_INET6, hlen = sizeof(struct ip6_hdr); - int flags; - struct sockaddr_in6 tmp; + struct sockaddr_in6 *sin6, tmp; + struct epoch_tracker et; + int cscov_partial, error, flags, hlen, scope_ambiguous; + u_int32_t ulen, plen; + uint16_t cscov; + u_short fport; + uint8_t nxt, unlock_udbinfo; - INP_WLOCK_ASSERT(inp); - INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); + /* addr6 has been validated in udp6_send(). */ + sin6 = (struct sockaddr_in6 *)addr6; - if (addr6) { - /* addr6 has been validated in udp6_send(). */ - sin6 = (struct sockaddr_in6 *)addr6; + /* + * In contrast to to IPv4 we do not validate the max. packet length + * here due to IPv6 Jumbograms (RFC2675). + */ - /* protect *sin6 from overwrites */ + scope_ambiguous = 0; + if (sin6) { + /* Protect *addr6 from overwrites. */ tmp = *sin6; sin6 = &tmp; @@ -699,22 +726,86 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6, */ if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone) scope_ambiguous = 1; - if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) + if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) { + if (control) + m_freem(control); + m_freem(m); return (error); + } } + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("%s: inp == NULL", __func__)); + INP_RLOCK(inp); nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE; + +#ifdef INET + if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { + int hasv4addr; + + if (sin6 == NULL) + hasv4addr = (inp->inp_vflag & INP_IPV4); + else + hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) + ? 1 : 0; + if (hasv4addr) { + struct pr_usrreqs *pru; + + /* + * XXXRW: We release UDP-layer locks before calling + * udp_send() in order to avoid recursion. However, + * this does mean there is a short window where inp's + * fields are unstable. Could this lead to a + * potential race in which the factors causing us to + * select the UDPv4 output routine are invalidated? + */ + INP_RUNLOCK(inp); + if (sin6) + in6_sin6_2_sin_in_sock((struct sockaddr *)sin6); + pru = inetsw[ip_protox[nxt]].pr_usrreqs; + /* addr will just be freed in sendit(). */ + return ((*pru->pru_send)(so, flags_arg, m, + (struct sockaddr *)sin6, control, td)); + } + } +#endif + if (control) { if ((error = ip6_setpktopts(control, &opt, - inp->in6p_outputopts, td->td_ucred, nxt)) != 0) - goto release; + inp->in6p_outputopts, td->td_ucred, nxt)) != 0) { + INP_RUNLOCK(inp); + ip6_clearpktopts(&opt, -1); + if (control) + m_freem(control); + m_freem(m); + return (error); + } optp = &opt; } else optp = inp->in6p_outputopts; + pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); + if (sin6 != NULL && + IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && inp->inp_lport == 0) { + INP_RUNLOCK(inp); + /* + * XXX there is a short window here which could lead to a race; + * should we re-check that what got us here is still valid? + */ + INP_WLOCK(inp); + INP_HASH_WLOCK(pcbinfo); + unlock_udbinfo = UH_WLOCKED; + } else if (sin6 != NULL && + (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || + IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) || + inp->inp_lport == 0)) { + INP_HASH_RLOCK_ET(pcbinfo, et); + unlock_udbinfo = UH_RLOCKED; + } else + unlock_udbinfo = UH_UNLOCKED; + if (sin6) { - faddr = &sin6->sin6_addr; /* * Since we saw no essential reason for calling in_pcbconnect, @@ -733,85 +824,47 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6, goto release; } - fport = sin6->sin6_port; /* allow 0 port */ + /* + * Given we handle the v4mapped case in the INET block above + * assert here that it must not happen anymore. + */ + KASSERT(!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr), + ("%s: sin6(%p)->sin6_addr is v4mapped which we " + "should have handled.", __func__, sin6)); - if (IN6_IS_ADDR_V4MAPPED(faddr)) { - if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) { - /* - * I believe we should explicitly discard the - * packet when mapped addresses are disabled, - * rather than send the packet as an IPv6 one. - * If we chose the latter approach, the packet - * might be sent out on the wire based on the - * default route, the situation which we'd - * probably want to avoid. - * (20010421 jinmei@kame.net) - */ - error = EINVAL; - goto release; - } - if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && - !IN6_IS_ADDR_V4MAPPED(&inp->in6p_laddr)) { - /* - * when remote addr is an IPv4-mapped address, - * local addr should not be an IPv6 address, - * since you cannot determine how to map IPv6 - * source address to IPv4. - */ - error = EINVAL; - goto release; - } + /* This only requires read-locking. */ + error = in6_selectsrc_socket(sin6, optp, inp, + td->td_ucred, scope_ambiguous, &in6a, NULL); + if (error) + goto release; + laddr = &in6a; - af = AF_INET; - } + if (inp->inp_lport == 0) { - if (!IN6_IS_ADDR_V4MAPPED(faddr)) { - error = in6_selectsrc_socket(sin6, optp, inp, - td->td_ucred, scope_ambiguous, &in6a, NULL); - if (error) + INP_WLOCK_ASSERT(inp); + error = in6_pcbsetport(laddr, inp, td->td_ucred); + if (error != 0) { + /* Undo an address bind that may have occurred. */ + inp->in6p_laddr = in6addr_any; goto release; - laddr = &in6a; - } else - laddr = &inp->in6p_laddr; /* XXX */ - if (laddr == NULL) { - if (error == 0) - error = EADDRNOTAVAIL; - goto release; - } - if (inp->inp_lport == 0 && - (error = in6_pcbsetport(laddr, inp, td->td_ucred)) != 0) { - /* Undo an address bind that may have occurred. */ - inp->in6p_laddr = in6addr_any; - goto release; + } } + faddr = &sin6->sin6_addr; + fport = sin6->sin6_port; /* allow 0 port */ + } else { if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { error = ENOTCONN; goto release; } - if (IN6_IS_ADDR_V4MAPPED(&inp->in6p_faddr)) { - if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) { - /* - * XXX: this case would happen when the - * application sets the V6ONLY flag after - * connecting the foreign address. - * Such applications should be fixed, - * so we bark here. - */ - log(LOG_INFO, "udp6_output: IPV6_V6ONLY " - "option was set for a connected socket\n"); - error = EINVAL; - goto release; - } else - af = AF_INET; - } laddr = &inp->in6p_laddr; faddr = &inp->in6p_faddr; fport = inp->inp_fport; } - if (af == AF_INET) - hlen = sizeof(struct ip); + ulen = m->m_pkthdr.len; + plen = sizeof(struct udphdr) + ulen; + hlen = sizeof(struct ip6_hdr); /* * Calculate data length and get a mbuf @@ -826,6 +879,7 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6, /* * Stuff checksum and output datagram. */ + cscov = cscov_partial = 0; udp6 = (struct udphdr *)(mtod(m, caddr_t) + hlen); udp6->uh_sport = inp->inp_lport; /* lport is always set in the PCB */ udp6->uh_dport = fport; @@ -848,59 +902,59 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6, udp6->uh_ulen = 0; udp6->uh_sum = 0; - switch (af) { - case AF_INET6: - ip6 = mtod(m, struct ip6_hdr *); - ip6->ip6_flow = inp->inp_flow & IPV6_FLOWINFO_MASK; - ip6->ip6_vfc &= ~IPV6_VERSION_MASK; - ip6->ip6_vfc |= IPV6_VERSION; - ip6->ip6_plen = htons((u_short)plen); - ip6->ip6_nxt = nxt; - ip6->ip6_hlim = in6_selecthlim(inp, NULL); - ip6->ip6_src = *laddr; - ip6->ip6_dst = *faddr; - - if (cscov_partial) { - if ((udp6->uh_sum = in6_cksum_partial(m, nxt, - sizeof(struct ip6_hdr), plen, cscov)) == 0) - udp6->uh_sum = 0xffff; - } else { - udp6->uh_sum = in6_cksum_pseudo(ip6, plen, nxt, 0); - m->m_pkthdr.csum_flags = CSUM_UDP_IPV6; - m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); - } + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_flow = inp->inp_flow & IPV6_FLOWINFO_MASK; + ip6->ip6_vfc &= ~IPV6_VERSION_MASK; + ip6->ip6_vfc |= IPV6_VERSION; + ip6->ip6_plen = htons((u_short)plen); + ip6->ip6_nxt = nxt; + ip6->ip6_hlim = in6_selecthlim(inp, NULL); + ip6->ip6_src = *laddr; + ip6->ip6_dst = *faddr; +#ifdef MAC + mac_inpcb_create_mbuf(inp, m); +#endif + + if (cscov_partial) { + if ((udp6->uh_sum = in6_cksum_partial(m, nxt, + sizeof(struct ip6_hdr), plen, cscov)) == 0) + udp6->uh_sum = 0xffff; + } else { + udp6->uh_sum = in6_cksum_pseudo(ip6, plen, nxt, 0); + m->m_pkthdr.csum_flags = CSUM_UDP_IPV6; + m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); + } + + flags = 0; #ifdef RSS - { - uint32_t hash_val, hash_type; - uint8_t pr; + { + uint32_t hash_val, hash_type; + uint8_t pr; - pr = inp->inp_socket->so_proto->pr_protocol; - /* - * Calculate an appropriate RSS hash for UDP and - * UDP Lite. - * - * The called function will take care of figuring out - * whether a 2-tuple or 4-tuple hash is required based - * on the currently configured scheme. - * - * Later later on connected socket values should be - * cached in the inpcb and reused, rather than constantly - * re-calculating it. - * - * UDP Lite is a different protocol number and will - * likely end up being hashed as a 2-tuple until - * RSS / NICs grow UDP Lite protocol awareness. - */ - if (rss_proto_software_hash_v6(faddr, laddr, fport, - inp->inp_lport, pr, &hash_val, &hash_type) == 0) { - m->m_pkthdr.flowid = hash_val; - M_HASHTYPE_SET(m, hash_type); - } + pr = inp->inp_socket->so_proto->pr_protocol; + /* + * Calculate an appropriate RSS hash for UDP and + * UDP Lite. + * + * The called function will take care of figuring out + * whether a 2-tuple or 4-tuple hash is required based + * on the currently configured scheme. + * + * Later later on connected socket values should be + * cached in the inpcb and reused, rather than constantly + * re-calculating it. + * + * UDP Lite is a different protocol number and will + * likely end up being hashed as a 2-tuple until + * RSS / NICs grow UDP Lite protocol awareness. + */ + if (rss_proto_software_hash_v6(faddr, laddr, fport, + inp->inp_lport, pr, &hash_val, &hash_type) == 0) { + m->m_pkthdr.flowid = hash_val; + M_HASHTYPE_SET(m, hash_type); } -#endif - flags = 0; -#ifdef RSS + /* * Don't override with the inp cached flowid. * @@ -908,27 +962,46 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6, * be incorrect. */ flags |= IP_NODEFAULTFLOWID; + } #endif + UDPSTAT_INC(udps_opackets); + if (unlock_udbinfo == UH_WLOCKED) + INP_HASH_WUNLOCK(pcbinfo); + else if (unlock_udbinfo == UH_RLOCKED) + INP_HASH_RUNLOCK_ET(pcbinfo, et); + if (nxt == IPPROTO_UDPLITE) + UDPLITE_PROBE(send, NULL, inp, ip6, inp, udp6); + else UDP_PROBE(send, NULL, inp, ip6, inp, udp6); - UDPSTAT_INC(udps_opackets); - error = ip6_output(m, optp, &inp->inp_route6, flags, - inp->in6p_moptions, NULL, inp); - break; - case AF_INET: - error = EAFNOSUPPORT; - goto release; + error = ip6_output(m, optp, &inp->inp_route6, flags, + inp->in6p_moptions, NULL, inp); + if (unlock_udbinfo == UH_WLOCKED) + INP_WUNLOCK(inp); + else + INP_RUNLOCK(inp); + + if (control) { + ip6_clearpktopts(&opt, -1); + m_freem(control); } - goto releaseopt; + return (error); release: - m_freem(m); - -releaseopt: + if (unlock_udbinfo == UH_WLOCKED) { + INP_HASH_WUNLOCK(pcbinfo); + INP_WUNLOCK(inp); + } else if (unlock_udbinfo == UH_RLOCKED) { + INP_HASH_RUNLOCK_ET(pcbinfo, et); + INP_RUNLOCK(inp); + } else + INP_RUNLOCK(inp); if (control) { ip6_clearpktopts(&opt, -1); m_freem(control); } + m_freem(m); + return (error); } @@ -1232,15 +1305,8 @@ static int udp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *td) { - struct inpcb *inp; - struct inpcbinfo *pcbinfo; - int error = 0; - - pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); - inp = sotoinpcb(so); - KASSERT(inp != NULL, ("udp6_send: inp == NULL")); + int error; - INP_WLOCK(inp); if (addr) { if (addr->sa_len != sizeof(struct sockaddr_in6)) { error = EINVAL; @@ -1252,53 +1318,11 @@ udp6_send(struct socket *so, int flags, struct mbuf *m, } } -#ifdef INET - if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { - int hasv4addr; - struct sockaddr_in6 *sin6 = NULL; - - if (addr == NULL) - hasv4addr = (inp->inp_vflag & INP_IPV4); - else { - sin6 = (struct sockaddr_in6 *)addr; - hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) - ? 1 : 0; - } - if (hasv4addr) { - struct pr_usrreqs *pru; - uint8_t nxt; - - nxt = (inp->inp_socket->so_proto->pr_protocol == - IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE; - /* - * XXXRW: We release UDP-layer locks before calling - * udp_send() in order to avoid recursion. However, - * this does mean there is a short window where inp's - * fields are unstable. Could this lead to a - * potential race in which the factors causing us to - * select the UDPv4 output routine are invalidated? - */ - INP_WUNLOCK(inp); - if (sin6) - in6_sin6_2_sin_in_sock(addr); - pru = inetsw[ip_protox[nxt]].pr_usrreqs; - /* addr will just be freed in sendit(). */ - return ((*pru->pru_send)(so, flags, m, addr, control, - td)); - } - } -#endif -#ifdef MAC - mac_inpcb_create_mbuf(inp, m); -#endif - INP_HASH_WLOCK(pcbinfo); - error = udp6_output(inp, m, addr, control, td); - INP_HASH_WUNLOCK(pcbinfo); - INP_WUNLOCK(inp); - return (error); + return (udp6_output(so, flags, m, addr, control, td)); bad: - INP_WUNLOCK(inp); + if (control) + m_freem(control); m_freem(m); return (error); } |