diff options
author | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2019-09-24 11:05:03 +0200 |
---|---|---|
committer | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2019-11-13 10:47:04 +0100 |
commit | a5ddb0ea69f21c16b7697a935d7a0c16bb3cffcf (patch) | |
tree | db091fb0f7d091804482156c9f3f55879ac93d5b /freebsd/sys/netinet6 | |
parent | test/syscalls01: Fix sporadic test failures (diff) | |
download | rtems-libbsd-a5ddb0ea69f21c16b7697a935d7a0c16bb3cffcf.tar.bz2 |
Update to FreeBSD head 2019-09-24
Git mirror commit 6b0307a0a5184339393f555d5d424190d8a8277a.
Diffstat (limited to 'freebsd/sys/netinet6')
26 files changed, 1640 insertions, 1433 deletions
diff --git a/freebsd/sys/netinet6/frag6.c b/freebsd/sys/netinet6/frag6.c index 0b0c7b91..6f16c712 100644 --- a/freebsd/sys/netinet6/frag6.c +++ b/freebsd/sys/netinet6/frag6.c @@ -40,20 +40,17 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> +#include <sys/domain.h> +#include <sys/eventhandler.h> #include <sys/hash.h> +#include <sys/kernel.h> #include <sys/malloc.h> #include <sys/mbuf.h> -#include <sys/domain.h> -#include <sys/eventhandler.h> #include <sys/protosw.h> #include <sys/socket.h> -#include <sys/errno.h> -#include <sys/time.h> -#include <sys/kernel.h> +#include <sys/sysctl.h> #include <sys/syslog.h> -#include <machine/atomic.h> - #include <net/if.h> #include <net/if_var.h> #include <net/netisr.h> @@ -65,14 +62,14 @@ __FBSDID("$FreeBSD$"); #include <netinet/ip6.h> #include <netinet6/ip6_var.h> #include <netinet/icmp6.h> -#include <netinet/in_systm.h> /* for ECN definitions */ -#include <netinet/ip.h> /* for ECN definitions */ +#include <netinet/in_systm.h> /* For ECN definitions. */ +#include <netinet/ip.h> /* For ECN definitions. */ +#ifdef MAC #include <security/mac/mac_framework.h> +#endif -/* - * Reassembly headers are stored in hash buckets. - */ +/* Reassembly headers are stored in hash buckets. */ #define IP6REASS_NHASH_LOG2 10 #define IP6REASS_NHASH (1 << IP6REASS_NHASH_LOG2) #define IP6REASS_HMASK (IP6REASS_NHASH - 1) @@ -91,22 +88,47 @@ struct ip6qbucket { int count; }; -VNET_DEFINE_STATIC(volatile u_int, frag6_nfragpackets); -volatile u_int frag6_nfrags = 0; -VNET_DEFINE_STATIC(struct ip6qbucket, ip6q[IP6REASS_NHASH]); -VNET_DEFINE_STATIC(uint32_t, ip6q_hashseed); +struct ip6asfrag { + struct ip6asfrag *ip6af_down; + struct ip6asfrag *ip6af_up; + struct mbuf *ip6af_m; + int ip6af_offset; /* offset in ip6af_m to next header */ + int ip6af_frglen; /* fragmentable part length */ + int ip6af_off; /* fragment offset */ + u_int16_t ip6af_mff; /* more fragment bit in frag off */ +}; +#define IP6_REASS_MBUF(ip6af) (*(struct mbuf **)&((ip6af)->ip6af_m)) + +static MALLOC_DEFINE(M_FRAG6, "frag6", "IPv6 fragment reassembly header"); + +/* System wide (global) maximum and count of packets in reassembly queues. */ +static int ip6_maxfrags; +static volatile u_int frag6_nfrags = 0; + +/* Maximum and current packets in per-VNET reassembly queue. */ +VNET_DEFINE_STATIC(int, ip6_maxfragpackets); +VNET_DEFINE_STATIC(volatile u_int, frag6_nfragpackets); +#define V_ip6_maxfragpackets VNET(ip6_maxfragpackets) #define V_frag6_nfragpackets VNET(frag6_nfragpackets) -#define V_ip6q VNET(ip6q) -#define V_ip6q_hashseed VNET(ip6q_hashseed) -#define IP6Q_LOCK(i) mtx_lock(&V_ip6q[(i)].lock) -#define IP6Q_TRYLOCK(i) mtx_trylock(&V_ip6q[(i)].lock) -#define IP6Q_LOCK_ASSERT(i) mtx_assert(&V_ip6q[(i)].lock, MA_OWNED) -#define IP6Q_UNLOCK(i) mtx_unlock(&V_ip6q[(i)].lock) -#define IP6Q_HEAD(i) (&V_ip6q[(i)].ip6q) +/* Maximum per-VNET reassembly queues per bucket and fragments per packet. */ +VNET_DEFINE_STATIC(int, ip6_maxfragbucketsize); +VNET_DEFINE_STATIC(int, ip6_maxfragsperpacket); +#define V_ip6_maxfragbucketsize VNET(ip6_maxfragbucketsize) +#define V_ip6_maxfragsperpacket VNET(ip6_maxfragsperpacket) -static MALLOC_DEFINE(M_FTABLE, "fragment", "fragment reassembly header"); +/* Per-VNET reassembly queue buckets. */ +VNET_DEFINE_STATIC(struct ip6qbucket, ip6qb[IP6REASS_NHASH]); +VNET_DEFINE_STATIC(uint32_t, ip6qb_hashseed); +#define V_ip6qb VNET(ip6qb) +#define V_ip6qb_hashseed VNET(ip6qb_hashseed) + +#define IP6QB_LOCK(_b) mtx_lock(&V_ip6qb[(_b)].lock) +#define IP6QB_TRYLOCK(_b) mtx_trylock(&V_ip6qb[(_b)].lock) +#define IP6QB_LOCK_ASSERT(_b) mtx_assert(&V_ip6qb[(_b)].lock, MA_OWNED) +#define IP6QB_UNLOCK(_b) mtx_unlock(&V_ip6qb[(_b)].lock) +#define IP6QB_HEAD(_b) (&V_ip6qb[(_b)].ip6q) /* * By default, limit the number of IP6 fragments across all reassembly @@ -124,11 +146,14 @@ static MALLOC_DEFINE(M_FTABLE, "fragment", "fragment reassembly header"); #define IP6_MAXFRAGS (nmbclusters / 32) #define IP6_MAXFRAGPACKETS (imin(IP6_MAXFRAGS, IP6REASS_NHASH * 50)) + /* - * Initialise reassembly queue and fragment identifier. + * Sysctls and helper function. */ -void -frag6_set_bucketsize() +SYSCTL_DECL(_net_inet6_ip6); + +static void +frag6_set_bucketsize(void) { int i; @@ -136,68 +161,140 @@ frag6_set_bucketsize() V_ip6_maxfragbucketsize = imax(i / (IP6REASS_NHASH / 2), 1); } -static void -frag6_change(void *tag) +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags, + CTLFLAG_RW, &ip6_maxfrags, 0, + "Maximum allowed number of outstanding IPv6 packet fragments. " + "A value of 0 means no fragmented packets will be accepted, while a " + "a value of -1 means no limit"); + +static int +sysctl_ip6_maxfragpackets(SYSCTL_HANDLER_ARGS) { - VNET_ITERATOR_DECL(vnet_iter); + int error, val; - ip6_maxfrags = IP6_MAXFRAGS; - VNET_LIST_RLOCK_NOSLEEP(); - VNET_FOREACH(vnet_iter) { - CURVNET_SET(vnet_iter); - V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS; - frag6_set_bucketsize(); - CURVNET_RESTORE(); - } - VNET_LIST_RUNLOCK_NOSLEEP(); + val = V_ip6_maxfragpackets; + error = sysctl_handle_int(oidp, &val, 0, req); + if (error != 0 || !req->newptr) + return (error); + V_ip6_maxfragpackets = val; + frag6_set_bucketsize(); + return (0); } +SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets, + CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, NULL, 0, + sysctl_ip6_maxfragpackets, "I", + "Default maximum number of outstanding fragmented IPv6 packets. " + "A value of 0 means no fragmented packets will be accepted, while a " + "a value of -1 means no limit"); +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGSPERPACKET, maxfragsperpacket, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragsperpacket), 0, + "Maximum allowed number of fragments per packet"); +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGBUCKETSIZE, maxfragbucketsize, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragbucketsize), 0, + "Maximum number of reassembly queues per hash bucket"); -void -frag6_init(void) + +/* + * Remove the IPv6 fragmentation header from the mbuf. + */ +int +ip6_deletefraghdr(struct mbuf *m, int offset, int wait) { - struct ip6q *q6; - int i; + struct ip6_hdr *ip6; + struct mbuf *t; - V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS; - frag6_set_bucketsize(); - for (i = 0; i < IP6REASS_NHASH; i++) { - q6 = IP6Q_HEAD(i); - q6->ip6q_next = q6->ip6q_prev = q6; - mtx_init(&V_ip6q[i].lock, "ip6qlock", NULL, MTX_DEF); - V_ip6q[i].count = 0; + /* Delete frag6 header. */ + if (m->m_len >= offset + sizeof(struct ip6_frag)) { + + /* This is the only possible case with !PULLDOWN_TEST. */ + ip6 = mtod(m, struct ip6_hdr *); + bcopy(ip6, (char *)ip6 + sizeof(struct ip6_frag), + offset); + m->m_data += sizeof(struct ip6_frag); + m->m_len -= sizeof(struct ip6_frag); + } else { + + /* This comes with no copy if the boundary is on cluster. */ + if ((t = m_split(m, offset, wait)) == NULL) + return (ENOMEM); + m_adj(t, sizeof(struct ip6_frag)); + m_cat(m, t); } - V_ip6q_hashseed = arc4random(); - V_ip6_maxfragsperpacket = 64; - if (!IS_DEFAULT_VNET(curvnet)) - return; - ip6_maxfrags = IP6_MAXFRAGS; - EVENTHANDLER_REGISTER(nmbclusters_change, - frag6_change, NULL, EVENTHANDLER_PRI_ANY); + m->m_flags |= M_FRAGMENTED; + return (0); +} + +/* + * Free a fragment reassembly header and all associated datagrams. + */ +static void +frag6_freef(struct ip6q *q6, uint32_t bucket) +{ + struct ip6_hdr *ip6; + struct ip6asfrag *af6, *down6; + struct mbuf *m; + + IP6QB_LOCK_ASSERT(bucket); + + for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; + af6 = down6) { + + m = IP6_REASS_MBUF(af6); + down6 = af6->ip6af_down; + frag6_deq(af6, bucket); + + /* + * Return ICMP time exceeded error for the 1st fragment. + * Just free other fragments. + */ + if (af6->ip6af_off == 0) { + + /* Adjust pointer. */ + ip6 = mtod(m, struct ip6_hdr *); + + /* Restore source and destination addresses. */ + ip6->ip6_src = q6->ip6q_src; + ip6->ip6_dst = q6->ip6q_dst; + + icmp6_error(m, ICMP6_TIME_EXCEEDED, + ICMP6_TIME_EXCEED_REASSEMBLY, 0); + } else + m_freem(m); + + free(af6, M_FRAG6); + } + frag6_remque(q6, bucket); + atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag); +#ifdef MAC + mac_ip6q_destroy(q6); +#endif + free(q6, M_FRAG6); + atomic_subtract_int(&V_frag6_nfragpackets, 1); } /* - * In RFC2460, fragment and reassembly rule do not agree with each other, - * in terms of next header field handling in fragment header. + * Like in RFC2460, in RFC8200, fragment and reassembly rules do not agree with + * each other, in terms of next header field handling in fragment header. * While the sender will use the same value for all of the fragmented packets, - * receiver is suggested not to check the consistency. + * receiver is suggested not to check for consistency. * - * fragment rule (p20): - * (2) A Fragment header containing: - * The Next Header value that identifies the first header of - * the Fragmentable Part of the original packet. + * Fragment rules (p18,p19): + * (2) A Fragment header containing: + * The Next Header value that identifies the first header + * after the Per-Fragment headers of the original packet. * -> next header field is same for all fragments * - * reassembly rule (p21): - * The Next Header field of the last header of the Unfragmentable - * Part is obtained from the Next Header field of the first + * Reassembly rule (p20): + * The Next Header field of the last header of the Per-Fragment + * headers is obtained from the Next Header field of the first * fragment's Fragment header. * -> should grab it from the first fragment only * * The following note also contradicts with fragment rule - no one is going to * send different fragment with different next header field. * - * additional note (p22): + * Additional note (p22) [not an error]: * The Next Header values in the Fragment headers of different * fragments of the same original packet may differ. Only the value * from the Offset zero fragment packet is used for reassembly. @@ -206,33 +303,32 @@ frag6_init(void) * There is no explicit reason given in the RFC. Historical reason maybe? */ /* - * Fragment input + * Fragment input. */ int frag6_input(struct mbuf **mp, int *offp, int proto) { - struct mbuf *m = *mp, *t; + struct ifnet *dstifp; + struct in6_ifaddr *ia6; struct ip6_hdr *ip6; struct ip6_frag *ip6f; struct ip6q *head, *q6; - struct ip6asfrag *af6, *ip6af, *af6dwn; - struct in6_ifaddr *ia; - int offset = *offp, nxt, i, next; - int first_frag = 0; - int fragoff, frgpartlen; /* must be larger than u_int16_t */ + struct ip6asfrag *af6, *af6dwn, *ip6af; + struct mbuf *m, *t; uint32_t hashkey[(sizeof(struct in6_addr) * 2 + sizeof(ip6f->ip6f_ident)) / sizeof(uint32_t)]; - uint32_t hash, *hashkeyp; - struct ifnet *dstifp; - u_int8_t ecn, ecn0; + uint32_t bucket, *hashkeyp; + int fragoff, frgpartlen; /* Must be larger than uint16_t. */ + int nxt, offset, plen; + uint8_t ecn, ecn0; + bool only_frag; #ifdef RSS - struct m_tag *mtag; struct ip6_direct_ctx *ip6dc; + struct m_tag *mtag; #endif -#if 0 - char ip6buf[INET6_ADDRSTRLEN]; -#endif + m = *mp; + offset = *offp; ip6 = mtod(m, struct ip6_hdr *); #ifndef PULLDOWN_TEST @@ -245,22 +341,23 @@ frag6_input(struct mbuf **mp, int *offp, int proto) #endif dstifp = NULL; - /* find the destination interface of the packet. */ - ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); - if (ia != NULL) { - dstifp = ia->ia_ifp; - ifa_free(&ia->ia_ifa); + /* Find the destination interface of the packet. */ + ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); + if (ia6 != NULL) { + dstifp = ia6->ia_ifp; + ifa_free(&ia6->ia_ifa); } - /* jumbo payload can't contain a fragment header */ + + /* Jumbo payload cannot contain a fragment header. */ if (ip6->ip6_plen == 0) { icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset); in6_ifstat_inc(dstifp, ifs6_reass_fail); - return IPPROTO_DONE; + return (IPPROTO_DONE); } /* - * check whether fragment packet's fragment length is - * multiple of 8 octets. + * Check whether fragment packet's fragment length is a + * multiple of 8 octets (unless it is the last one). * sizeof(struct ip6_frag) == 8 * sizeof(struct ip6_hdr) = 40 */ @@ -269,22 +366,23 @@ frag6_input(struct mbuf **mp, int *offp, int proto) icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offsetof(struct ip6_hdr, ip6_plen)); in6_ifstat_inc(dstifp, ifs6_reass_fail); - return IPPROTO_DONE; + return (IPPROTO_DONE); } IP6STAT_INC(ip6s_fragments); in6_ifstat_inc(dstifp, ifs6_reass_reqd); - /* offset now points to data portion */ + /* Offset now points to data portion. */ offset += sizeof(struct ip6_frag); /* - * RFC 6946: Handle "atomic" fragments (offset and m bit set to 0) - * upfront, unrelated to any reassembly. Just skip the fragment header. + * Handle "atomic" fragments (offset and m bit set to 0) upfront, + * unrelated to any reassembly. Still need to remove the frag hdr. + * See RFC 6946 and section 4.5 of RFC 8200. */ if ((ip6f->ip6f_offlg & ~IP6F_RESERVED_MASK) == 0) { - /* XXX-BZ we want dedicated counters for this. */ - IP6STAT_INC(ip6s_reassembled); + IP6STAT_INC(ip6s_atomicfrags); + /* XXX-BZ handle correctly. */ in6_ifstat_inc(dstifp, ifs6_reass_ok); *offp = offset; m->m_flags |= M_FRAGMENTED; @@ -298,22 +396,23 @@ frag6_input(struct mbuf **mp, int *offp, int proto) offsetof(struct ip6_hdr, ip6_plen)); in6_ifstat_inc(dstifp, ifs6_reass_fail); IP6STAT_INC(ip6s_fragdropped); - return IPPROTO_DONE; + return (IPPROTO_DONE); } + /* Generate a hash value for fragment bucket selection. */ hashkeyp = hashkey; memcpy(hashkeyp, &ip6->ip6_src, sizeof(struct in6_addr)); hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp); memcpy(hashkeyp, &ip6->ip6_dst, sizeof(struct in6_addr)); hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp); *hashkeyp = ip6f->ip6f_ident; - hash = jenkins_hash32(hashkey, nitems(hashkey), V_ip6q_hashseed); - hash &= IP6REASS_HMASK; - head = IP6Q_HEAD(hash); - IP6Q_LOCK(hash); + bucket = jenkins_hash32(hashkey, nitems(hashkey), V_ip6qb_hashseed); + bucket &= IP6REASS_HMASK; + head = IP6QB_HEAD(bucket); + IP6QB_LOCK(bucket); /* - * Enforce upper bound on number of fragments. + * Enforce upper bound on number of fragments for the entire system. * If maxfrag is 0, never accept fragments. * If maxfrag is -1, accept all fragments without limitation. */ @@ -332,11 +431,11 @@ frag6_input(struct mbuf **mp, int *offp, int proto) ) break; + only_frag = false; if (q6 == head) { - /* - * the first fragment to arrive, create a reassembly queue. - */ - first_frag = 1; + + /* A first fragment to arrive creates a reassembly queue. */ + only_frag = true; /* * Enforce upper bound on number of fragmented packets @@ -347,26 +446,27 @@ frag6_input(struct mbuf **mp, int *offp, int proto) */ if (V_ip6_maxfragpackets < 0) ; - else if (V_ip6q[hash].count >= V_ip6_maxfragbucketsize || + else if (V_ip6qb[bucket].count >= V_ip6_maxfragbucketsize || atomic_load_int(&V_frag6_nfragpackets) >= (u_int)V_ip6_maxfragpackets) goto dropfrag; atomic_add_int(&V_frag6_nfragpackets, 1); - q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FTABLE, - M_NOWAIT); + + /* Allocate IPv6 fragement packet queue entry. */ + q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FRAG6, + M_NOWAIT | M_ZERO); if (q6 == NULL) goto dropfrag; - bzero(q6, sizeof(*q6)); #ifdef MAC if (mac_ip6q_init(q6, M_NOWAIT) != 0) { - free(q6, M_FTABLE); + free(q6, M_FRAG6); goto dropfrag; } mac_ip6q_create(m, q6); #endif - frag6_insque_head(q6, head, hash); + frag6_insque_head(q6, head, bucket); - /* ip6q_nxt will be filled afterwards, from 1st fragment */ + /* ip6q_nxt will be filled afterwards, from 1st fragment. */ q6->ip6q_down = q6->ip6q_up = (struct ip6asfrag *)q6; #ifdef notyet q6->ip6q_nxtp = (u_char *)nxtp; @@ -383,7 +483,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto) } /* - * If it's the 1st fragment, record the length of the + * If it is the 1st fragment, record the length of the * unfragmentable part and the next header of the fragment header. */ fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK); @@ -404,18 +504,18 @@ frag6_input(struct mbuf **mp, int *offp, int proto) icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset - sizeof(struct ip6_frag) + offsetof(struct ip6_frag, ip6f_offlg)); - IP6Q_UNLOCK(hash); + IP6QB_UNLOCK(bucket); return (IPPROTO_DONE); } } else if (fragoff + frgpartlen > IPV6_MAXPACKET) { icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset - sizeof(struct ip6_frag) + offsetof(struct ip6_frag, ip6f_offlg)); - IP6Q_UNLOCK(hash); + IP6QB_UNLOCK(bucket); return (IPPROTO_DONE); } /* - * If it's the first fragment, do the above check for each + * If it is the first fragment, do the above check for each * fragment already stored in the reassembly queue. */ if (fragoff == 0) { @@ -425,15 +525,18 @@ frag6_input(struct mbuf **mp, int *offp, int proto) if (q6->ip6q_unfrglen + af6->ip6af_off + af6->ip6af_frglen > IPV6_MAXPACKET) { - struct mbuf *merr = IP6_REASS_MBUF(af6); struct ip6_hdr *ip6err; - int erroff = af6->ip6af_offset; + struct mbuf *merr; + int erroff; + + merr = IP6_REASS_MBUF(af6); + erroff = af6->ip6af_offset; - /* dequeue the fragment. */ - frag6_deq(af6, hash); - free(af6, M_FTABLE); + /* Dequeue the fragment. */ + frag6_deq(af6, bucket); + free(af6, M_FRAG6); - /* adjust pointer. */ + /* Adjust pointer. */ ip6err = mtod(merr, struct ip6_hdr *); /* @@ -451,174 +554,113 @@ frag6_input(struct mbuf **mp, int *offp, int proto) } } - ip6af = (struct ip6asfrag *)malloc(sizeof(struct ip6asfrag), M_FTABLE, - M_NOWAIT); + /* Allocate an IPv6 fragement queue entry for this fragmented part. */ + ip6af = (struct ip6asfrag *)malloc(sizeof(struct ip6asfrag), M_FRAG6, + M_NOWAIT | M_ZERO); if (ip6af == NULL) goto dropfrag; - bzero(ip6af, sizeof(*ip6af)); ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG; ip6af->ip6af_off = fragoff; ip6af->ip6af_frglen = frgpartlen; ip6af->ip6af_offset = offset; IP6_REASS_MBUF(ip6af) = m; - if (first_frag) { + if (only_frag) { af6 = (struct ip6asfrag *)q6; goto insert; } + /* Do duplicate, condition, and boundry checks. */ /* * Handle ECN by comparing this segment with the first one; * if CE is set, do not lose CE. - * drop if CE and not-ECT are mixed for the same packet. + * Drop if CE and not-ECT are mixed for the same packet. */ ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK; ecn0 = q6->ip6q_ecn; if (ecn == IPTOS_ECN_CE) { if (ecn0 == IPTOS_ECN_NOTECT) { - free(ip6af, M_FTABLE); + free(ip6af, M_FRAG6); goto dropfrag; } if (ecn0 != IPTOS_ECN_CE) q6->ip6q_ecn = IPTOS_ECN_CE; } if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) { - free(ip6af, M_FTABLE); + free(ip6af, M_FRAG6); goto dropfrag; } - /* - * Find a segment which begins after this one does. - */ + /* Find a fragmented part which begins after this one does. */ for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; af6 = af6->ip6af_down) if (af6->ip6af_off > ip6af->ip6af_off) break; -#if 0 - /* - * If there is a preceding segment, it may provide some of - * our data already. If so, drop the data from the incoming - * segment. If it provides all of our data, drop us. - */ - if (af6->ip6af_up != (struct ip6asfrag *)q6) { - i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen - - ip6af->ip6af_off; - if (i > 0) { - if (i >= ip6af->ip6af_frglen) - goto dropfrag; - m_adj(IP6_REASS_MBUF(ip6af), i); - ip6af->ip6af_off += i; - ip6af->ip6af_frglen -= i; - } - } - - /* - * While we overlap succeeding segments trim them or, - * if they are completely covered, dequeue them. - */ - while (af6 != (struct ip6asfrag *)q6 && - ip6af->ip6af_off + ip6af->ip6af_frglen > af6->ip6af_off) { - i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off; - if (i < af6->ip6af_frglen) { - af6->ip6af_frglen -= i; - af6->ip6af_off += i; - m_adj(IP6_REASS_MBUF(af6), i); - break; - } - af6 = af6->ip6af_down; - m_freem(IP6_REASS_MBUF(af6->ip6af_up)); - frag6_deq(af6->ip6af_up, hash); - } -#else /* * If the incoming framgent overlaps some existing fragments in - * the reassembly queue, drop it, since it is dangerous to override - * existing fragments from a security point of view. - * We don't know which fragment is the bad guy - here we trust - * fragment that came in earlier, with no real reason. - * - * Note: due to changes after disabling this part, mbuf passed to - * m_adj() below now does not meet the requirement. + * the reassembly queue, drop both the new fragment and the + * entire reassembly queue. However, if the new fragment + * is an exact duplicate of an existing fragment, only silently + * drop the existing fragment and leave the fragmentation queue + * unchanged, as allowed by the RFC. (RFC 8200, 4.5) */ if (af6->ip6af_up != (struct ip6asfrag *)q6) { - i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen - - ip6af->ip6af_off; - if (i > 0) { -#if 0 /* suppress the noisy log */ - log(LOG_ERR, "%d bytes of a fragment from %s " - "overlaps the previous fragment\n", - i, ip6_sprintf(ip6buf, &q6->ip6q_src)); -#endif - free(ip6af, M_FTABLE); + if (af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen - + ip6af->ip6af_off > 0) { + free(ip6af, M_FRAG6); goto dropfrag; } } if (af6 != (struct ip6asfrag *)q6) { - i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off; - if (i > 0) { -#if 0 /* suppress the noisy log */ - log(LOG_ERR, "%d bytes of a fragment from %s " - "overlaps the succeeding fragment", - i, ip6_sprintf(ip6buf, &q6->ip6q_src)); -#endif - free(ip6af, M_FTABLE); + if (ip6af->ip6af_off + ip6af->ip6af_frglen - + af6->ip6af_off > 0) { + free(ip6af, M_FRAG6); goto dropfrag; } } -#endif insert: #ifdef MAC - if (!first_frag) + if (!only_frag) mac_ip6q_update(m, q6); #endif /* - * Stick new segment in its place; - * check for complete reassembly. - * If not complete, check fragment limit. - * Move to front of packet queue, as we are - * the most recently active fragmented packet. + * Stick new segment in its place; check for complete reassembly. + * If not complete, check fragment limit. Move to front of packet + * queue, as we are the most recently active fragmented packet. */ - frag6_enq(ip6af, af6->ip6af_up, hash); + frag6_enq(ip6af, af6->ip6af_up, bucket); atomic_add_int(&frag6_nfrags, 1); q6->ip6q_nfrag++; -#if 0 /* xxx */ - if (q6 != head->ip6q_next) { - frag6_remque(q6, hash); - frag6_insque_head(q6, head, hash); - } -#endif - next = 0; + plen = 0; for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; af6 = af6->ip6af_down) { - if (af6->ip6af_off != next) { + if (af6->ip6af_off != plen) { if (q6->ip6q_nfrag > V_ip6_maxfragsperpacket) { - IP6STAT_INC(ip6s_fragdropped); - frag6_freef(q6, hash); + IP6STAT_ADD(ip6s_fragdropped, q6->ip6q_nfrag); + frag6_freef(q6, bucket); } - IP6Q_UNLOCK(hash); - return IPPROTO_DONE; + IP6QB_UNLOCK(bucket); + return (IPPROTO_DONE); } - next += af6->ip6af_frglen; + plen += af6->ip6af_frglen; } if (af6->ip6af_up->ip6af_mff) { if (q6->ip6q_nfrag > V_ip6_maxfragsperpacket) { - IP6STAT_INC(ip6s_fragdropped); - frag6_freef(q6, hash); + IP6STAT_ADD(ip6s_fragdropped, q6->ip6q_nfrag); + frag6_freef(q6, bucket); } - IP6Q_UNLOCK(hash); - return IPPROTO_DONE; + IP6QB_UNLOCK(bucket); + return (IPPROTO_DONE); } - /* - * Reassembly is complete; concatenate fragments. - */ + /* Reassembly is complete; concatenate fragments. */ ip6af = q6->ip6q_down; t = m = IP6_REASS_MBUF(ip6af); af6 = ip6af->ip6af_down; - frag6_deq(ip6af, hash); + frag6_deq(ip6af, bucket); while (af6 != (struct ip6asfrag *)q6) { m->m_pkthdr.csum_flags &= IP6_REASS_MBUF(af6)->m_pkthdr.csum_flags; @@ -626,13 +668,13 @@ insert: IP6_REASS_MBUF(af6)->m_pkthdr.csum_data; af6dwn = af6->ip6af_down; - frag6_deq(af6, hash); + frag6_deq(af6, bucket); while (t->m_next) t = t->m_next; m_adj(IP6_REASS_MBUF(af6), af6->ip6af_offset); m_demote_pkthdr(IP6_REASS_MBUF(af6)); m_cat(t, IP6_REASS_MBUF(af6)); - free(af6, M_FTABLE); + free(af6, M_FRAG6); af6 = af6dwn; } @@ -640,47 +682,43 @@ insert: m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) + (m->m_pkthdr.csum_data >> 16); - /* adjust offset to point where the original next header starts */ + /* Adjust offset to point where the original next header starts. */ offset = ip6af->ip6af_offset - sizeof(struct ip6_frag); - free(ip6af, M_FTABLE); + free(ip6af, M_FRAG6); ip6 = mtod(m, struct ip6_hdr *); - ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr)); + ip6->ip6_plen = htons((u_short)plen + offset - sizeof(struct ip6_hdr)); if (q6->ip6q_ecn == IPTOS_ECN_CE) ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20); nxt = q6->ip6q_nxt; -#ifdef notyet - *q6->ip6q_nxtp = (u_char)(nxt & 0xff); -#endif if (ip6_deletefraghdr(m, offset, M_NOWAIT) != 0) { - frag6_remque(q6, hash); + frag6_remque(q6, bucket); atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag); #ifdef MAC mac_ip6q_destroy(q6); #endif - free(q6, M_FTABLE); + free(q6, M_FRAG6); atomic_subtract_int(&V_frag6_nfragpackets, 1); goto dropfrag; } - /* - * Store NXT to the original. - */ + /* Set nxt(-hdr field value) to the original value. */ m_copyback(m, ip6_get_prevhdr(m, offset), sizeof(uint8_t), (caddr_t)&nxt); - frag6_remque(q6, hash); + frag6_remque(q6, bucket); atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag); #ifdef MAC mac_ip6q_reassemble(q6, m); mac_ip6q_destroy(q6); #endif - free(q6, M_FTABLE); + free(q6, M_FRAG6); atomic_subtract_int(&V_frag6_nfragpackets, 1); if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */ - int plen = 0; + + plen = 0; for (t = m; t; t = t->m_next) plen += t->m_len; m->m_pkthdr.len = plen; @@ -699,173 +737,64 @@ insert: m_tag_prepend(m, mtag); #endif - IP6Q_UNLOCK(hash); + IP6QB_UNLOCK(bucket); IP6STAT_INC(ip6s_reassembled); in6_ifstat_inc(dstifp, ifs6_reass_ok); #ifdef RSS - /* - * Queue/dispatch for reprocessing. - */ + /* Queue/dispatch for reprocessing. */ netisr_dispatch(NETISR_IPV6_DIRECT, m); - return IPPROTO_DONE; + return (IPPROTO_DONE); #endif - /* - * Tell launch routine the next header - */ - + /* Tell launch routine the next header. */ *mp = m; *offp = offset; - return nxt; + return (nxt); - dropfrag: - IP6Q_UNLOCK(hash); +dropfrag: + IP6QB_UNLOCK(bucket); in6_ifstat_inc(dstifp, ifs6_reass_fail); IP6STAT_INC(ip6s_fragdropped); m_freem(m); - return IPPROTO_DONE; -} - -/* - * Free a fragment reassembly header and all - * associated datagrams. - */ -static void -frag6_freef(struct ip6q *q6, uint32_t bucket) -{ - struct ip6asfrag *af6, *down6; - - IP6Q_LOCK_ASSERT(bucket); - - for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; - af6 = down6) { - struct mbuf *m = IP6_REASS_MBUF(af6); - - down6 = af6->ip6af_down; - frag6_deq(af6, bucket); - - /* - * Return ICMP time exceeded error for the 1st fragment. - * Just free other fragments. - */ - if (af6->ip6af_off == 0) { - struct ip6_hdr *ip6; - - /* adjust pointer */ - ip6 = mtod(m, struct ip6_hdr *); - - /* restore source and destination addresses */ - ip6->ip6_src = q6->ip6q_src; - ip6->ip6_dst = q6->ip6q_dst; - - icmp6_error(m, ICMP6_TIME_EXCEEDED, - ICMP6_TIME_EXCEED_REASSEMBLY, 0); - } else - m_freem(m); - free(af6, M_FTABLE); - } - frag6_remque(q6, bucket); - atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag); -#ifdef MAC - mac_ip6q_destroy(q6); -#endif - free(q6, M_FTABLE); - atomic_subtract_int(&V_frag6_nfragpackets, 1); -} - -/* - * Put an ip fragment on a reassembly chain. - * Like insque, but pointers in middle of structure. - */ -static void -frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6, - uint32_t bucket __unused) -{ - - IP6Q_LOCK_ASSERT(bucket); - - af6->ip6af_up = up6; - af6->ip6af_down = up6->ip6af_down; - up6->ip6af_down->ip6af_up = af6; - up6->ip6af_down = af6; -} - -/* - * To frag6_enq as remque is to insque. - */ -static void -frag6_deq(struct ip6asfrag *af6, uint32_t bucket __unused) -{ - - IP6Q_LOCK_ASSERT(bucket); - - af6->ip6af_up->ip6af_down = af6->ip6af_down; - af6->ip6af_down->ip6af_up = af6->ip6af_up; -} - -static void -frag6_insque_head(struct ip6q *new, struct ip6q *old, uint32_t bucket) -{ - - IP6Q_LOCK_ASSERT(bucket); - KASSERT(IP6Q_HEAD(bucket) == old, - ("%s: attempt to insert at head of wrong bucket" - " (bucket=%u, old=%p)", __func__, bucket, old)); - - new->ip6q_prev = old; - new->ip6q_next = old->ip6q_next; - old->ip6q_next->ip6q_prev= new; - old->ip6q_next = new; - V_ip6q[bucket].count++; -} - -static void -frag6_remque(struct ip6q *p6, uint32_t bucket) -{ - - IP6Q_LOCK_ASSERT(bucket); - - p6->ip6q_prev->ip6q_next = p6->ip6q_next; - p6->ip6q_next->ip6q_prev = p6->ip6q_prev; - V_ip6q[bucket].count--; + return (IPPROTO_DONE); } /* * IPv6 reassembling timer processing; - * if a timer expires on a reassembly - * queue, discard it. + * if a timer expires on a reassembly queue, discard it. */ void frag6_slowtimo(void) { VNET_ITERATOR_DECL(vnet_iter); struct ip6q *head, *q6; - int i; + uint32_t bucket; VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); - for (i = 0; i < IP6REASS_NHASH; i++) { - IP6Q_LOCK(i); - head = IP6Q_HEAD(i); + for (bucket = 0; bucket < IP6REASS_NHASH; bucket++) { + IP6QB_LOCK(bucket); + head = IP6QB_HEAD(bucket); q6 = head->ip6q_next; if (q6 == NULL) { /* * XXXJTL: This should never happen. This * should turn into an assertion. */ - IP6Q_UNLOCK(i); + IP6QB_UNLOCK(bucket); continue; } while (q6 != head) { --q6->ip6q_ttl; q6 = q6->ip6q_next; if (q6->ip6q_prev->ip6q_ttl == 0) { - IP6STAT_INC(ip6s_fragtimeout); + IP6STAT_ADD(ip6s_fragtimeout, + q6->ip6q_prev->ip6q_nfrag); /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ - frag6_freef(q6->ip6q_prev, i); + frag6_freef(q6->ip6q_prev, bucket); } } /* @@ -874,36 +803,38 @@ frag6_slowtimo(void) * enough to get down to the new limit. * Note that we drain all reassembly queues if * maxfragpackets is 0 (fragmentation is disabled), - * and don't enforce a limit when maxfragpackets + * and do not enforce a limit when maxfragpackets * is negative. */ while ((V_ip6_maxfragpackets == 0 || (V_ip6_maxfragpackets > 0 && - V_ip6q[i].count > V_ip6_maxfragbucketsize)) && + V_ip6qb[bucket].count > V_ip6_maxfragbucketsize)) && head->ip6q_prev != head) { - IP6STAT_INC(ip6s_fragoverflow); + IP6STAT_ADD(ip6s_fragoverflow, + q6->ip6q_prev->ip6q_nfrag); /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ - frag6_freef(head->ip6q_prev, i); + frag6_freef(head->ip6q_prev, bucket); } - IP6Q_UNLOCK(i); + IP6QB_UNLOCK(bucket); } /* * If we are still over the maximum number of fragmented * packets, drain off enough to get down to the new limit. */ - i = 0; + bucket = 0; while (V_ip6_maxfragpackets >= 0 && atomic_load_int(&V_frag6_nfragpackets) > (u_int)V_ip6_maxfragpackets) { - IP6Q_LOCK(i); - head = IP6Q_HEAD(i); + IP6QB_LOCK(bucket); + head = IP6QB_HEAD(bucket); if (head->ip6q_prev != head) { - IP6STAT_INC(ip6s_fragoverflow); + IP6STAT_ADD(ip6s_fragoverflow, + q6->ip6q_prev->ip6q_nfrag); /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ - frag6_freef(head->ip6q_prev, i); + frag6_freef(head->ip6q_prev, bucket); } - IP6Q_UNLOCK(i); - i = (i + 1) % IP6REASS_NHASH; + IP6QB_UNLOCK(bucket); + bucket = (bucket + 1) % IP6REASS_NHASH; } CURVNET_RESTORE(); } @@ -911,6 +842,52 @@ frag6_slowtimo(void) } /* + * Eventhandler to adjust limits in case nmbclusters change. + */ +static void +frag6_change(void *tag) +{ + VNET_ITERATOR_DECL(vnet_iter); + + ip6_maxfrags = IP6_MAXFRAGS; + VNET_LIST_RLOCK_NOSLEEP(); + VNET_FOREACH(vnet_iter) { + CURVNET_SET(vnet_iter); + V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS; + frag6_set_bucketsize(); + CURVNET_RESTORE(); + } + VNET_LIST_RUNLOCK_NOSLEEP(); +} + +/* + * Initialise reassembly queue and fragment identifier. + */ +void +frag6_init(void) +{ + struct ip6q *q6; + uint32_t bucket; + + V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS; + frag6_set_bucketsize(); + for (bucket = 0; bucket < IP6REASS_NHASH; bucket++) { + q6 = IP6QB_HEAD(bucket); + q6->ip6q_next = q6->ip6q_prev = q6; + mtx_init(&V_ip6qb[bucket].lock, "ip6qlock", NULL, MTX_DEF); + V_ip6qb[bucket].count = 0; + } + V_ip6qb_hashseed = arc4random(); + V_ip6_maxfragsperpacket = 64; + if (!IS_DEFAULT_VNET(curvnet)) + return; + + ip6_maxfrags = IP6_MAXFRAGS; + EVENTHANDLER_REGISTER(nmbclusters_change, + frag6_change, NULL, EVENTHANDLER_PRI_ANY); +} + +/* * Drain off all datagram fragments. */ void @@ -918,48 +895,80 @@ frag6_drain(void) { VNET_ITERATOR_DECL(vnet_iter); struct ip6q *head; - int i; + uint32_t bucket; VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); - for (i = 0; i < IP6REASS_NHASH; i++) { - if (IP6Q_TRYLOCK(i) == 0) + for (bucket = 0; bucket < IP6REASS_NHASH; bucket++) { + if (IP6QB_TRYLOCK(bucket) == 0) continue; - head = IP6Q_HEAD(i); + head = IP6QB_HEAD(bucket); while (head->ip6q_next != head) { IP6STAT_INC(ip6s_fragdropped); /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ - frag6_freef(head->ip6q_next, i); + frag6_freef(head->ip6q_next, bucket); } - IP6Q_UNLOCK(i); + IP6QB_UNLOCK(bucket); } CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); } -int -ip6_deletefraghdr(struct mbuf *m, int offset, int wait) +/* + * Put an ip fragment on a reassembly chain. + * Like insque, but pointers in middle of structure. + */ +static void +frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6, + uint32_t bucket __unused) { - struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); - struct mbuf *t; - /* Delete frag6 header. */ - if (m->m_len >= offset + sizeof(struct ip6_frag)) { - /* This is the only possible case with !PULLDOWN_TEST. */ - bcopy(ip6, (char *)ip6 + sizeof(struct ip6_frag), - offset); - m->m_data += sizeof(struct ip6_frag); - m->m_len -= sizeof(struct ip6_frag); - } else { - /* This comes with no copy if the boundary is on cluster. */ - if ((t = m_split(m, offset, wait)) == NULL) - return (ENOMEM); - m_adj(t, sizeof(struct ip6_frag)); - m_cat(m, t); - } + IP6QB_LOCK_ASSERT(bucket); - m->m_flags |= M_FRAGMENTED; - return (0); + af6->ip6af_up = up6; + af6->ip6af_down = up6->ip6af_down; + up6->ip6af_down->ip6af_up = af6; + up6->ip6af_down = af6; +} + +/* + * To frag6_enq as remque is to insque. + */ +static void +frag6_deq(struct ip6asfrag *af6, uint32_t bucket __unused) +{ + + IP6QB_LOCK_ASSERT(bucket); + + af6->ip6af_up->ip6af_down = af6->ip6af_down; + af6->ip6af_down->ip6af_up = af6->ip6af_up; +} + +static void +frag6_insque_head(struct ip6q *new, struct ip6q *old, uint32_t bucket) +{ + + IP6QB_LOCK_ASSERT(bucket); + KASSERT(IP6QB_HEAD(bucket) == old, + ("%s: attempt to insert at head of wrong bucket" + " (bucket=%u, old=%p)", __func__, bucket, old)); + + new->ip6q_prev = old; + new->ip6q_next = old->ip6q_next; + old->ip6q_next->ip6q_prev= new; + old->ip6q_next = new; + V_ip6qb[bucket].count++; +} + +static void +frag6_remque(struct ip6q *p6, uint32_t bucket) +{ + + IP6QB_LOCK_ASSERT(bucket); + + p6->ip6q_prev->ip6q_next = p6->ip6q_next; + p6->ip6q_next->ip6q_prev = p6->ip6q_prev; + V_ip6qb[bucket].count--; } diff --git a/freebsd/sys/netinet6/icmp6.c b/freebsd/sys/netinet6/icmp6.c index 6dd25e98..4a35eb8d 100644 --- a/freebsd/sys/netinet6/icmp6.c +++ b/freebsd/sys/netinet6/icmp6.c @@ -142,7 +142,7 @@ static int icmp6_rip6_input(struct mbuf **, int); static int icmp6_ratelimit(const struct in6_addr *, const int, const int); static const char *icmp6_redirect_diag(struct in6_addr *, struct in6_addr *, struct in6_addr *); -static struct mbuf *ni6_input(struct mbuf *, int); +static struct mbuf *ni6_input(struct mbuf *, int, struct prison *); static struct mbuf *ni6_nametodns(const char *, int, int); static int ni6_dnsmatch(const char *, int, const char *, int); static int ni6_addrs(struct icmp6_nodeinfo *, struct mbuf *, @@ -629,6 +629,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) case ICMP6_WRUREQUEST: /* ICMP6_FQDN_QUERY */ { enum { WRU, FQDN } mode; + struct prison *pr; if (!V_icmp6_nodeinfo) break; @@ -640,6 +641,18 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) else goto badlen; +#ifndef __rtems__ + pr = NULL; + sx_slock(&allprison_lock); + TAILQ_FOREACH(pr, &allprison, pr_list) + if (pr->pr_vnet == ifp->if_vnet) + break; + sx_sunlock(&allprison_lock); + if (pr == NULL) + pr = curthread->td_ucred->cr_prison; +#else /* __rtems__ */ + pr = &prison0; +#endif /* __rtems__ */ if (mode == FQDN) { #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_nodeinfo), @@ -647,11 +660,10 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) #endif n = m_copym(m, 0, M_COPYALL, M_NOWAIT); if (n) - n = ni6_input(n, off); + n = ni6_input(n, off, pr); /* XXX meaningless if n == NULL */ noff = sizeof(struct ip6_hdr); } else { - struct prison *pr; u_char *p; int maxhlen, hlen; @@ -685,17 +697,6 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) n = NULL; break; } - maxhlen = M_TRAILINGSPACE(n) - - (sizeof(*nip6) + sizeof(*nicmp6) + 4); -#ifndef __rtems__ - pr = curthread->td_ucred->cr_prison; -#else /* __rtems__ */ - pr = &prison0; -#endif /* __rtems__ */ - mtx_lock(&pr->pr_mtx); - hlen = strlen(pr->pr_hostname); - if (maxhlen > hlen) - maxhlen = hlen; /* * Copy IPv6 and ICMPv6 only. */ @@ -705,6 +706,13 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr)); p = (u_char *)(nicmp6 + 1); bzero(p, 4); + + maxhlen = M_TRAILINGSPACE(n) - + (sizeof(*nip6) + sizeof(*nicmp6) + 4); + mtx_lock(&pr->pr_mtx); + hlen = strlen(pr->pr_hostname); + if (maxhlen > hlen) + maxhlen = hlen; /* meaningless TTL */ bcopy(pr->pr_hostname, p + 4, maxhlen); mtx_unlock(&pr->pr_mtx); @@ -1173,11 +1181,10 @@ icmp6_mtudisc_update(struct ip6ctlparam *ip6cp, int validated) * with hostname changes by sethostname(3) */ static struct mbuf * -ni6_input(struct mbuf *m, int off) +ni6_input(struct mbuf *m, int off, struct prison *pr) { struct icmp6_nodeinfo *ni6, *nni6; struct mbuf *n = NULL; - struct prison *pr; u_int16_t qtype; int subjlen; int replylen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo); @@ -1329,11 +1336,6 @@ ni6_input(struct mbuf *m, int off) * wildcard match, if gethostname(3) side has * truncated hostname. */ -#ifndef __rtems__ - pr = curthread->td_ucred->cr_prison; -#else /* __rtems__ */ - pr = &prison0; -#endif /* __rtems__ */ mtx_lock(&pr->pr_mtx); n = ni6_nametodns(pr->pr_hostname, strlen(pr->pr_hostname), 0); @@ -1458,11 +1460,6 @@ ni6_input(struct mbuf *m, int off) /* * XXX do we really have FQDN in hostname? */ -#ifndef __rtems__ - pr = curthread->td_ucred->cr_prison; -#else /* __rtems__ */ - pr = &prison0; -#endif /* __rtems__ */ mtx_lock(&pr->pr_mtx); n->m_next = ni6_nametodns(pr->pr_hostname, strlen(pr->pr_hostname), oldfqdn); @@ -1669,6 +1666,7 @@ static int ni6_addrs(struct icmp6_nodeinfo *ni6, struct mbuf *m, struct ifnet **ifpp, struct in6_addr *subj) { + struct epoch_tracker et; struct ifnet *ifp; struct in6_ifaddr *ifa6; struct ifaddr *ifa; @@ -1690,10 +1688,9 @@ ni6_addrs(struct icmp6_nodeinfo *ni6, struct mbuf *m, struct ifnet **ifpp, } } - IFNET_RLOCK_NOSLEEP(); + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { addrsofif = 0; - IF_ADDR_RLOCK(ifp); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; @@ -1744,16 +1741,15 @@ ni6_addrs(struct icmp6_nodeinfo *ni6, struct mbuf *m, struct ifnet **ifpp, } addrsofif++; /* count the address */ } - IF_ADDR_RUNLOCK(ifp); if (iffound) { *ifpp = ifp; - IFNET_RUNLOCK_NOSLEEP(); + NET_EPOCH_EXIT(et); return (addrsofif); } addrs += addrsofif; } - IFNET_RUNLOCK_NOSLEEP(); + NET_EPOCH_EXIT(et); return (addrs); } @@ -1762,6 +1758,7 @@ static int ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6, struct ifnet *ifp0, int resid) { + struct epoch_tracker et; struct ifnet *ifp; struct in6_ifaddr *ifa6; struct ifaddr *ifa; @@ -1774,12 +1771,11 @@ ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6, if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL)) return (0); /* needless to copy */ - IFNET_RLOCK_NOSLEEP(); + NET_EPOCH_ENTER(et); ifp = ifp0 ? ifp0 : CK_STAILQ_FIRST(&V_ifnet); again: for (; ifp; ifp = CK_STAILQ_NEXT(ifp, if_link)) { - IF_ADDR_RLOCK(ifp); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; @@ -1834,13 +1830,12 @@ ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6, /* now we can copy the address */ if (resid < sizeof(struct in6_addr) + sizeof(u_int32_t)) { - IF_ADDR_RUNLOCK(ifp); /* * We give up much more copy. * Set the truncate flag and return. */ nni6->ni_flags |= NI_NODEADDR_FLAG_TRUNCATE; - IFNET_RUNLOCK_NOSLEEP(); + NET_EPOCH_EXIT(et); return (copied); } @@ -1881,7 +1876,6 @@ ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6, resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t)); copied += (sizeof(struct in6_addr) + sizeof(u_int32_t)); } - IF_ADDR_RUNLOCK(ifp); if (ifp0) /* we need search only on the specified IF */ break; } @@ -1893,7 +1887,7 @@ ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6, goto again; } - IFNET_RUNLOCK_NOSLEEP(); + NET_EPOCH_EXIT(et); return (copied); } @@ -1906,7 +1900,7 @@ icmp6_rip6_input(struct mbuf **mp, int off) { struct mbuf *m = *mp; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); - struct inpcb *in6p; + struct inpcb *inp; struct inpcb *last = NULL; struct sockaddr_in6 fromsa; struct icmp6_hdr *icmp6; @@ -1938,25 +1932,25 @@ icmp6_rip6_input(struct mbuf **mp, int off) } INP_INFO_RLOCK_ET(&V_ripcbinfo, et); - CK_LIST_FOREACH(in6p, &V_ripcb, inp_list) { - if ((in6p->inp_vflag & INP_IPV6) == 0) + CK_LIST_FOREACH(inp, &V_ripcb, inp_list) { + if ((inp->inp_vflag & INP_IPV6) == 0) continue; - if (in6p->inp_ip_p != IPPROTO_ICMPV6) + if (inp->inp_ip_p != IPPROTO_ICMPV6) continue; - if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) && - !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst)) + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && + !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &ip6->ip6_dst)) continue; - if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) && - !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src)) + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && + !IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &ip6->ip6_src)) continue; - INP_RLOCK(in6p); - if (__predict_false(in6p->inp_flags2 & INP_FREED)) { - INP_RUNLOCK(in6p); + INP_RLOCK(inp); + if (__predict_false(inp->inp_flags2 & INP_FREED)) { + INP_RUNLOCK(inp); continue; } if (ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type, - in6p->in6p_icmp6filt)) { - INP_RUNLOCK(in6p); + inp->in6p_icmp6filt)) { + INP_RUNLOCK(inp); continue; } if (last != NULL) { @@ -2017,7 +2011,7 @@ icmp6_rip6_input(struct mbuf **mp, int off) } INP_RUNLOCK(last); } - last = in6p; + last = inp; } INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et); if (last != NULL) { @@ -2575,13 +2569,14 @@ icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt) { /* target lladdr option */ + struct epoch_tracker et; int len; struct nd_opt_hdr *nd_opt; char *lladdr; - IF_AFDATA_RLOCK(ifp); + NET_EPOCH_ENTER(et); ln = nd6_lookup(router_ll6, 0, ifp); - IF_AFDATA_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); if (ln == NULL) goto nolladdropt; diff --git a/freebsd/sys/netinet6/in6.c b/freebsd/sys/netinet6/in6.c index ef59203e..f3306bc3 100644 --- a/freebsd/sys/netinet6/in6.c +++ b/freebsd/sys/netinet6/in6.c @@ -1392,13 +1392,15 @@ in6_notify_ifa(struct ifnet *ifp, struct in6_ifaddr *ia, * if this is its first address, */ if (hostIsNew != 0) { - IF_ADDR_RLOCK(ifp); + struct epoch_tracker et; + + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ifacount++; } - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); } if (ifacount <= 1 && ifp->if_ioctl) { @@ -1476,9 +1478,10 @@ done: struct in6_ifaddr * in6ifa_ifpforlinklocal(struct ifnet *ifp, int ignoreflags) { + struct epoch_tracker et; struct ifaddr *ifa; - IF_ADDR_RLOCK(ifp); + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; @@ -1490,7 +1493,7 @@ in6ifa_ifpforlinklocal(struct ifnet *ifp, int ignoreflags) break; } } - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); return ((struct in6_ifaddr *)ifa); } @@ -1527,9 +1530,10 @@ in6ifa_ifwithaddr(const struct in6_addr *addr, uint32_t zoneid) struct in6_ifaddr * in6ifa_ifpwithaddr(struct ifnet *ifp, const struct in6_addr *addr) { + struct epoch_tracker et; struct ifaddr *ifa; - IF_ADDR_RLOCK(ifp); + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; @@ -1538,7 +1542,7 @@ in6ifa_ifpwithaddr(struct ifnet *ifp, const struct in6_addr *addr) break; } } - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); return ((struct in6_ifaddr *)ifa); } @@ -1549,12 +1553,13 @@ in6ifa_ifpwithaddr(struct ifnet *ifp, const struct in6_addr *addr) struct in6_ifaddr * in6ifa_llaonifp(struct ifnet *ifp) { + struct epoch_tracker et; struct sockaddr_in6 *sin6; struct ifaddr *ifa; if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) return (NULL); - IF_ADDR_RLOCK(ifp); + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; @@ -1564,7 +1569,7 @@ in6ifa_llaonifp(struct ifnet *ifp) IN6_IS_ADDR_MC_NODELOCAL(&sin6->sin6_addr)) break; } - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); return ((struct in6_ifaddr *)ifa); } @@ -1701,6 +1706,7 @@ int in6_ifhasaddr(struct ifnet *ifp, struct in6_addr *addr) { struct in6_addr in6; + struct epoch_tracker et; struct ifaddr *ifa; struct in6_ifaddr *ia6; @@ -1709,17 +1715,17 @@ in6_ifhasaddr(struct ifnet *ifp, struct in6_addr *addr) return (0); in6_setscope(&in6, ifp, NULL); - IF_ADDR_RLOCK(ifp); + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ia6 = (struct in6_ifaddr *)ifa; if (IN6_ARE_ADDR_EQUAL(&ia6->ia_addr.sin6_addr, &in6)) { - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); return (1); } } - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); return (0); } @@ -1823,6 +1829,7 @@ in6_prefixlen2mask(struct in6_addr *maskp, int len) struct in6_ifaddr * in6_ifawithifp(struct ifnet *ifp, struct in6_addr *dst) { + struct epoch_tracker et; int dst_scope = in6_addrscope(dst), blen = -1, tlen; struct ifaddr *ifa; struct in6_ifaddr *besta = NULL; @@ -1836,7 +1843,7 @@ in6_ifawithifp(struct ifnet *ifp, struct in6_addr *dst) * If two or more, return one which matches the dst longest. * If none, return one of global addresses assigned other ifs. */ - IF_ADDR_RLOCK(ifp); + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; @@ -1870,7 +1877,7 @@ in6_ifawithifp(struct ifnet *ifp, struct in6_addr *dst) } if (besta) { ifa_ref(&besta->ia_ifa); - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); return (besta); } @@ -1891,23 +1898,23 @@ in6_ifawithifp(struct ifnet *ifp, struct in6_addr *dst) if (ifa != NULL) ifa_ref(ifa); - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); return (struct in6_ifaddr *)ifa; } /* use the last-resort values, that are, deprecated addresses */ if (dep[0]) { ifa_ref((struct ifaddr *)dep[0]); - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); return dep[0]; } if (dep[1]) { ifa_ref((struct ifaddr *)dep[1]); - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); return dep[1]; } - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); return NULL; } @@ -1917,10 +1924,11 @@ in6_ifawithifp(struct ifnet *ifp, struct in6_addr *dst) void in6_if_up(struct ifnet *ifp) { + struct epoch_tracker et; struct ifaddr *ifa; struct in6_ifaddr *ia; - IF_ADDR_RLOCK(ifp); + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; @@ -1936,7 +1944,7 @@ in6_if_up(struct ifnet *ifp) arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz)); } } - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); /* * special cases, like 6to4, are handled in in6_ifattach @@ -1947,26 +1955,14 @@ in6_if_up(struct ifnet *ifp) int in6if_do_dad(struct ifnet *ifp) { + if ((ifp->if_flags & IFF_LOOPBACK) != 0) return (0); - - if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) || - (ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD)) + if ((ifp->if_flags & IFF_MULTICAST) == 0) + return (0); + if ((ND_IFINFO(ifp)->flags & + (ND6_IFF_IFDISABLED | ND6_IFF_NO_DAD)) != 0) return (0); - - /* - * Our DAD routine requires the interface up and running. - * However, some interfaces can be up before the RUNNING - * status. Additionally, users may try to assign addresses - * before the interface becomes up (or running). - * This function returns EAGAIN in that case. - * The caller should mark "tentative" on the address instead of - * performing DAD immediately. - */ - if (!((ifp->if_flags & IFF_UP) && - (ifp->if_drv_flags & IFF_DRV_RUNNING))) - return (EAGAIN); - return (1); } @@ -1977,10 +1973,11 @@ in6if_do_dad(struct ifnet *ifp) void in6_setmaxmtu(void) { + struct epoch_tracker et; unsigned long maxmtu = 0; struct ifnet *ifp; - IFNET_RLOCK_NOSLEEP(); + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { /* this function can be called during ifnet initialization */ if (!ifp->if_afdata[AF_INET6]) @@ -1989,7 +1986,7 @@ in6_setmaxmtu(void) IN6_LINKMTU(ifp) > maxmtu) maxmtu = IN6_LINKMTU(ifp); } - IFNET_RUNLOCK_NOSLEEP(); + NET_EPOCH_EXIT(et); if (maxmtu) /* update only when maxmtu is positive */ V_in6_maxmtu = maxmtu; } @@ -2167,18 +2164,19 @@ in6_lltable_rtcheck(struct ifnet *ifp, fibnum = V_rt_add_addr_allfibs ? RT_DEFAULT_FIB : ifp->if_fib; error = fib6_lookup_nh_basic(fibnum, &dst, scopeid, 0, 0, &nh6); if (error != 0 || (nh6.nh_flags & NHF_GATEWAY) || nh6.nh_ifp != ifp) { + struct epoch_tracker et; struct ifaddr *ifa; /* * Create an ND6 cache for an IPv6 neighbor * that is not covered by our own prefix. */ - NET_EPOCH_ENTER(); + NET_EPOCH_ENTER(et); ifa = ifaof_ifpforaddr(l3addr, ifp); if (ifa != NULL) { - NET_EPOCH_EXIT(); + NET_EPOCH_EXIT(et); return 0; } - NET_EPOCH_EXIT(); + NET_EPOCH_EXIT(et); log(LOG_INFO, "IPv6 address: \"%s\" is not on the network\n", ip6_sprintf(ip6buf, &sin6->sin6_addr)); return EINVAL; @@ -2319,16 +2317,13 @@ in6_lltable_lookup(struct lltable *llt, u_int flags, IF_AFDATA_LOCK_ASSERT(llt->llt_ifp); KASSERT(l3addr->sa_family == AF_INET6, ("sin_family %d", l3addr->sa_family)); + KASSERT((flags & (LLE_UNLOCKED | LLE_EXCLUSIVE)) != + (LLE_UNLOCKED | LLE_EXCLUSIVE), + ("wrong lle request flags: %#x", flags)); lle = in6_lltable_find_dst(llt, &sin6->sin6_addr); - if (lle == NULL) return (NULL); - - KASSERT((flags & (LLE_UNLOCKED|LLE_EXCLUSIVE)) != - (LLE_UNLOCKED|LLE_EXCLUSIVE),("wrong lle request flags: 0x%X", - flags)); - if (flags & LLE_UNLOCKED) return (lle); @@ -2336,6 +2331,18 @@ in6_lltable_lookup(struct lltable *llt, u_int flags, LLE_WLOCK(lle); else LLE_RLOCK(lle); + + /* + * If the afdata lock is not held, the LLE may have been unlinked while + * we were blocked on the LLE lock. Check for this case. + */ + if (__predict_false((lle->la_flags & LLE_LINKED) == 0)) { + if (flags & LLE_EXCLUSIVE) + LLE_WUNLOCK(lle); + else + LLE_RUNLOCK(lle); + return (NULL); + } return (lle); } diff --git a/freebsd/sys/netinet6/in6_ifattach.c b/freebsd/sys/netinet6/in6_ifattach.c index 6af4b557..560b4255 100644 --- a/freebsd/sys/netinet6/in6_ifattach.c +++ b/freebsd/sys/netinet6/in6_ifattach.c @@ -246,6 +246,7 @@ generate_tmp_ifid(u_int8_t *seed0, const u_int8_t *seed1, u_int8_t *ret) int in6_get_hw_ifid(struct ifnet *ifp, struct in6_addr *in6) { + struct epoch_tracker et; struct ifaddr *ifa; struct sockaddr_dl *sdl; u_int8_t *addr; @@ -254,7 +255,7 @@ in6_get_hw_ifid(struct ifnet *ifp, struct in6_addr *in6) static u_int8_t allone[8] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; - IF_ADDR_RLOCK(ifp); + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_LINK) continue; @@ -266,7 +267,7 @@ in6_get_hw_ifid(struct ifnet *ifp, struct in6_addr *in6) goto found; } - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); return -1; @@ -289,7 +290,7 @@ found: /* look at IEEE802/EUI64 only */ if (addrlen != 8 && addrlen != 6) { - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); return -1; } @@ -299,11 +300,11 @@ found: * card insertion. */ if (bcmp(addr, allzero, addrlen) == 0) { - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); return -1; } if (bcmp(addr, allone, addrlen) == 0) { - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); return -1; } @@ -330,17 +331,25 @@ found: * identifier source (can be renumbered). * we don't do this. */ - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); return -1; + case IFT_INFINIBAND: + if (addrlen != 20) { + NET_EPOCH_EXIT(et); + return -1; + } + bcopy(addr + 12, &in6->s6_addr[8], 8); + break; + default: - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); return -1; } /* sanity check: g bit must not indicate "group" */ if (EUI64_GROUP(in6)) { - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); return -1; } @@ -353,11 +362,11 @@ found: */ if ((in6->s6_addr[8] & ~(EUI64_GBIT | EUI64_UBIT)) == 0x00 && bcmp(&in6->s6_addr[9], allzero, 7) == 0) { - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); return -1; } - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); return 0; } @@ -372,6 +381,7 @@ static int get_ifid(struct ifnet *ifp0, struct ifnet *altifp, struct in6_addr *in6) { + struct epoch_tracker et; struct ifnet *ifp; /* first, try to get it from the interface itself */ @@ -389,7 +399,7 @@ get_ifid(struct ifnet *ifp0, struct ifnet *altifp, } /* next, try to get it from some other hardware interface */ - IFNET_RLOCK_NOSLEEP(); + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (ifp == ifp0) continue; @@ -404,11 +414,11 @@ get_ifid(struct ifnet *ifp0, struct ifnet *altifp, nd6log((LOG_DEBUG, "%s: borrow interface identifier from %s\n", if_name(ifp0), if_name(ifp))); - IFNET_RUNLOCK_NOSLEEP(); + NET_EPOCH_EXIT(et); goto success; } } - IFNET_RUNLOCK_NOSLEEP(); + NET_EPOCH_EXIT(et); /* last resort: get from random number source */ if (get_rand_ifid(ifp, in6) == 0) { @@ -700,6 +710,7 @@ in6_ifattach(struct ifnet *ifp, struct ifnet *altifp) * it is rather harmful to have one. */ ND_IFINFO(ifp)->flags &= ~ND6_IFF_AUTO_LINKLOCAL; + ND_IFINFO(ifp)->flags |= ND6_IFF_NO_DAD; break; default: break; @@ -773,9 +784,11 @@ _in6_ifdetach(struct ifnet *ifp, int purgeulp) in6_purgeaddr(ifa); } if (purgeulp) { + IN6_MULTI_LOCK(); in6_pcbpurgeif0(&V_udbinfo, ifp); in6_pcbpurgeif0(&V_ulitecbinfo, ifp); in6_pcbpurgeif0(&V_ripcbinfo, ifp); + IN6_MULTI_UNLOCK(); } /* leave from all multicast groups joined */ in6_purgemaddrs(ifp); @@ -862,36 +875,22 @@ in6_tmpaddrtimer(void *arg) static void in6_purgemaddrs(struct ifnet *ifp) { - struct in6_multi_head purgeinms; - struct in6_multi *inm; - struct ifmultiaddr *ifma, *next; + struct in6_multi_head inmh; - SLIST_INIT(&purgeinms); + SLIST_INIT(&inmh); IN6_MULTI_LOCK(); IN6_MULTI_LIST_LOCK(); - IF_ADDR_WLOCK(ifp); - /* - * Extract list of in6_multi associated with the detaching ifp - * which the PF_INET6 layer is about to release. - */ - restart: - CK_STAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) { - if (ifma->ifma_addr->sa_family != AF_INET6 || - ifma->ifma_protospec == NULL) - continue; - inm = (struct in6_multi *)ifma->ifma_protospec; - in6m_disconnect(inm); - in6m_rele_locked(&purgeinms, inm); - if (__predict_false(ifma6_restart)) { - ifma6_restart = false; - goto restart; - } - } - IF_ADDR_WUNLOCK(ifp); - mld_ifdetach(ifp); + mld_ifdetach(ifp, &inmh); IN6_MULTI_LIST_UNLOCK(); IN6_MULTI_UNLOCK(); - in6m_release_list_deferred(&purgeinms); + in6m_release_list_deferred(&inmh); + + /* + * Make sure all multicast deletions invoking if_ioctl() are + * completed before returning. Else we risk accessing a freed + * ifnet structure pointer. + */ + in6m_release_wait(); } void diff --git a/freebsd/sys/netinet6/in6_mcast.c b/freebsd/sys/netinet6/in6_mcast.c index 3824645d..44d20612 100644 --- a/freebsd/sys/netinet6/in6_mcast.c +++ b/freebsd/sys/netinet6/in6_mcast.c @@ -104,7 +104,8 @@ RB_GENERATE(ip6_msource_tree, ip6_msource, im6s_link, ip6_msource_cmp); /* * Locking: - * - Lock order is: Giant, INP_WLOCK, IN6_MULTI_LOCK, MLD_LOCK, IF_ADDR_LOCK. + * - Lock order is: Giant, IN6_MULTI_LOCK, INP_WLOCK, + * IN6_MULTI_LIST_LOCK, MLD_LOCK, IF_ADDR_LOCK. * - The IF_ADDR_LOCK is implicitly taken by in6m_lookup() earlier, however * it can be taken by code in net/if.c also. * - ip6_moptions and in6_mfilter are covered by the INP_WLOCK. @@ -136,12 +137,11 @@ static int im6f_prune(struct in6_mfilter *, const struct sockaddr_in6 *); static void im6f_purge(struct in6_mfilter *); static void im6f_rollback(struct in6_mfilter *); static void im6f_reap(struct in6_mfilter *); -static int im6o_grow(struct ip6_moptions *); -static size_t im6o_match_group(const struct ip6_moptions *, +static struct in6_mfilter * + im6o_match_group(const struct ip6_moptions *, const struct ifnet *, const struct sockaddr *); static struct in6_msource * - im6o_match_source(const struct ip6_moptions *, const size_t, - const struct sockaddr *); + im6o_match_source(struct in6_mfilter *, const struct sockaddr *); static void im6s_merge(struct ip6_msource *ims, const struct in6_msource *lims, const int rollback); static int in6_getmulti(struct ifnet *, const struct in6_addr *, @@ -192,7 +192,6 @@ static SYSCTL_NODE(_net_inet6_ip6_mcast, OID_AUTO, filters, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip6_mcast_filters, "Per-interface stack-wide source filters"); -int ifma6_restart = 0; #ifdef KTR /* * Inline function which wraps assertions for a valid ifp. @@ -231,55 +230,25 @@ im6f_init(struct in6_mfilter *imf, const int st0, const int st1) imf->im6f_st[1] = st1; } -/* - * Resize the ip6_moptions vector to the next power-of-two minus 1. - * May be called with locks held; do not sleep. - */ -static int -im6o_grow(struct ip6_moptions *imo) +struct in6_mfilter * +ip6_mfilter_alloc(const int mflags, const int st0, const int st1) { - struct in6_multi **nmships; - struct in6_multi **omships; - struct in6_mfilter *nmfilters; - struct in6_mfilter *omfilters; - size_t idx; - size_t newmax; - size_t oldmax; - - nmships = NULL; - nmfilters = NULL; - omships = imo->im6o_membership; - omfilters = imo->im6o_mfilters; - oldmax = imo->im6o_max_memberships; - newmax = ((oldmax + 1) * 2) - 1; - - if (newmax <= IPV6_MAX_MEMBERSHIPS) { - nmships = (struct in6_multi **)realloc(omships, - sizeof(struct in6_multi *) * newmax, M_IP6MOPTS, M_NOWAIT); - nmfilters = (struct in6_mfilter *)realloc(omfilters, - sizeof(struct in6_mfilter) * newmax, M_IN6MFILTER, - M_NOWAIT); - if (nmships != NULL && nmfilters != NULL) { - /* Initialize newly allocated source filter heads. */ - for (idx = oldmax; idx < newmax; idx++) { - im6f_init(&nmfilters[idx], MCAST_UNDEFINED, - MCAST_EXCLUDE); - } - imo->im6o_max_memberships = newmax; - imo->im6o_membership = nmships; - imo->im6o_mfilters = nmfilters; - } - } + struct in6_mfilter *imf; - if (nmships == NULL || nmfilters == NULL) { - if (nmships != NULL) - free(nmships, M_IP6MOPTS); - if (nmfilters != NULL) - free(nmfilters, M_IN6MFILTER); - return (ETOOMANYREFS); - } + imf = malloc(sizeof(*imf), M_IN6MFILTER, mflags); - return (0); + if (imf != NULL) + im6f_init(imf, st0, st1); + + return (imf); +} + +void +ip6_mfilter_free(struct in6_mfilter *imf) +{ + + im6f_purge(imf); + free(imf, M_IN6MFILTER); } /* @@ -287,36 +256,27 @@ im6o_grow(struct ip6_moptions *imo) * which matches the specified group, and optionally an interface. * Return its index into the array, or -1 if not found. */ -static size_t +static struct in6_mfilter * im6o_match_group(const struct ip6_moptions *imo, const struct ifnet *ifp, const struct sockaddr *group) { const struct sockaddr_in6 *gsin6; - struct in6_multi **pinm; - int idx; - int nmships; + struct in6_mfilter *imf; + struct in6_multi *inm; - gsin6 = (const struct sockaddr_in6 *)group; + gsin6 = (const struct sockaddr_in6 *)group; - /* The im6o_membership array may be lazy allocated. */ - if (imo->im6o_membership == NULL || imo->im6o_num_memberships == 0) - return (-1); - - nmships = imo->im6o_num_memberships; - pinm = &imo->im6o_membership[0]; - for (idx = 0; idx < nmships; idx++, pinm++) { - if (*pinm == NULL) + IP6_MFILTER_FOREACH(imf, &imo->im6o_head) { + inm = imf->im6f_in6m; + if (inm == NULL) continue; - if ((ifp == NULL || ((*pinm)->in6m_ifp == ifp)) && - IN6_ARE_ADDR_EQUAL(&(*pinm)->in6m_addr, + if ((ifp == NULL || (inm->in6m_ifp == ifp)) && + IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, &gsin6->sin6_addr)) { break; } } - if (idx >= nmships) - idx = -1; - - return (idx); + return (imf); } /* @@ -331,22 +291,13 @@ im6o_match_group(const struct ip6_moptions *imo, const struct ifnet *ifp, * it exists, which may not be the desired behaviour. */ static struct in6_msource * -im6o_match_source(const struct ip6_moptions *imo, const size_t gidx, - const struct sockaddr *src) +im6o_match_source(struct in6_mfilter *imf, const struct sockaddr *src) { struct ip6_msource find; - struct in6_mfilter *imf; struct ip6_msource *ims; const sockunion_t *psa; KASSERT(src->sa_family == AF_INET6, ("%s: !AF_INET6", __func__)); - KASSERT(gidx != -1 && gidx < imo->im6o_num_memberships, - ("%s: invalid index %d\n", __func__, (int)gidx)); - - /* The im6o_mfilters array may be lazy allocated. */ - if (imo->im6o_mfilters == NULL) - return (NULL); - imf = &imo->im6o_mfilters[gidx]; psa = (const sockunion_t *)src; find.im6s_addr = psa->sin6.sin6_addr; @@ -366,14 +317,14 @@ int im6o_mc_filter(const struct ip6_moptions *imo, const struct ifnet *ifp, const struct sockaddr *group, const struct sockaddr *src) { - size_t gidx; + struct in6_mfilter *imf; struct in6_msource *ims; int mode; KASSERT(ifp != NULL, ("%s: null ifp", __func__)); - gidx = im6o_match_group(imo, ifp, group); - if (gidx == -1) + imf = im6o_match_group(imo, ifp, group); + if (imf == NULL) return (MCAST_NOTGMEMBER); /* @@ -385,8 +336,8 @@ im6o_mc_filter(const struct ip6_moptions *imo, const struct ifnet *ifp, * NOTE: We are comparing group state here at MLD t1 (now) * with socket-layer t0 (since last downcall). */ - mode = imo->im6o_mfilters[gidx].im6f_st[1]; - ims = im6o_match_source(imo, gidx, src); + mode = imf->im6f_st[1]; + ims = im6o_match_source(imf, src); if ((ims == NULL && mode == MCAST_INCLUDE) || (ims != NULL && ims->im6sl_st[0] != mode)) @@ -407,6 +358,7 @@ static int in6_getmulti(struct ifnet *ifp, const struct in6_addr *group, struct in6_multi **pinm) { + struct epoch_tracker et; struct sockaddr_in6 gsin6; struct ifmultiaddr *ifma; struct in6_multi *inm; @@ -422,7 +374,10 @@ in6_getmulti(struct ifnet *ifp, const struct in6_addr *group, IN6_MULTI_LOCK_ASSERT(); IN6_MULTI_LIST_LOCK(); IF_ADDR_WLOCK(ifp); + NET_EPOCH_ENTER(et); inm = in6m_lookup_locked(ifp, group); + NET_EPOCH_EXIT(et); + if (inm != NULL) { /* * If we already joined this group, just bump the @@ -587,7 +542,15 @@ in6m_release_list_deferred(struct in6_multi_head *inmh) } void -in6m_disconnect(struct in6_multi *inm) +in6m_release_wait(void) +{ + + /* Wait for all jobs to complete. */ + gtaskqueue_drain_all(free_gtask.gt_taskqueue); +} + +void +in6m_disconnect_locked(struct in6_multi_head *inmh, struct in6_multi *inm) { struct ifnet *ifp; struct ifaddr *ifa; @@ -595,10 +558,12 @@ in6m_disconnect(struct in6_multi *inm) struct in6_multi_mship *imm, *imm_tmp; struct ifmultiaddr *ifma, *ll_ifma; - ifp = inm->in6m_ifp; + IN6_MULTI_LIST_LOCK_ASSERT(); + ifp = inm->in6m_ifp; if (ifp == NULL) - return; + return; /* already called */ + inm->in6m_ifp = NULL; IF_ADDR_WLOCK_ASSERT(ifp); ifma = inm->in6m_ifma; @@ -617,7 +582,6 @@ in6m_disconnect(struct in6_multi *inm) MPASS(ll_ifma->ifma_llifma == NULL); MPASS(ll_ifma->ifma_ifp == ifp); if (--ll_ifma->ifma_refcount == 0) { - ifma6_restart = true; if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) { CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link); ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED; @@ -635,28 +599,12 @@ in6m_disconnect(struct in6_multi *inm) if (inm == imm->i6mm_maddr) { LIST_REMOVE(imm, i6mm_chain); free(imm, M_IP6MADDR); + in6m_rele_locked(inmh, inm); } } } } -void -in6m_release_deferred(struct in6_multi *inm) -{ - struct in6_multi_head tmp; - - IN6_MULTI_LIST_LOCK_ASSERT(); - KASSERT(inm->in6m_refcount > 0, ("refcount == %d inm: %p", inm->in6m_refcount, inm)); - if (--inm->in6m_refcount == 0) { - MPASS(inm->in6m_ifp == NULL); - SLIST_INIT(&tmp); - inm->in6m_ifma->ifma_protospec = NULL; - MPASS(inm->in6m_ifma->ifma_llifma == NULL); - SLIST_INSERT_HEAD(&tmp, inm, in6m_nrele); - in6m_release_list_deferred(&tmp); - } -} - static void in6m_release_task(void *arg __unused) { @@ -1256,6 +1204,7 @@ in6_joingroup_locked(struct ifnet *ifp, const struct in6_addr *mcaddr, /*const*/ struct in6_mfilter *imf, struct in6_multi **pinm, const int delay) { + struct in6_multi_head inmh; struct in6_mfilter timf; struct in6_multi *inm; struct ifmultiaddr *ifma; @@ -1264,7 +1213,6 @@ in6_joingroup_locked(struct ifnet *ifp, const struct in6_addr *mcaddr, char ip6tbuf[INET6_ADDRSTRLEN]; #endif -#ifdef INVARIANTS /* * Sanity: Check scope zone ID was set for ifp, if and * only if group is scoped to an interface. @@ -1276,7 +1224,6 @@ in6_joingroup_locked(struct ifnet *ifp, const struct in6_addr *mcaddr, KASSERT(mcaddr->s6_addr16[1] != 0, ("%s: scope zone ID not set", __func__)); } -#endif IN6_MULTI_LOCK_ASSERT(); IN6_MULTI_LIST_UNLOCK_ASSERT(); @@ -1317,22 +1264,26 @@ in6_joingroup_locked(struct ifnet *ifp, const struct in6_addr *mcaddr, } out_in6m_release: + SLIST_INIT(&inmh); if (error) { + struct epoch_tracker et; + CTR2(KTR_MLD, "%s: dropping ref on %p", __func__, inm); - IF_ADDR_RLOCK(ifp); + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_protospec == inm) { ifma->ifma_protospec = NULL; break; } } - in6m_disconnect(inm); - in6m_release_deferred(inm); - IF_ADDR_RUNLOCK(ifp); + in6m_disconnect_locked(&inmh, inm); + in6m_rele_locked(&inmh, inm); + NET_EPOCH_EXIT(et); } else { *pinm = inm; } IN6_MULTI_LIST_UNLOCK(); + in6m_release_list_deferred(&inmh); return (error); } @@ -1366,6 +1317,7 @@ in6_leavegroup(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf) int in6_leavegroup_locked(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf) { + struct in6_multi_head inmh; struct in6_mfilter timf; struct ifnet *ifp; int error; @@ -1415,13 +1367,15 @@ in6_leavegroup_locked(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf) CTR2(KTR_MLD, "%s: dropping ref on %p", __func__, inm); if (ifp) IF_ADDR_WLOCK(ifp); - if (inm->in6m_refcount == 1 && inm->in6m_ifp != NULL) - in6m_disconnect(inm); - in6m_release_deferred(inm); + + SLIST_INIT(&inmh); + if (inm->in6m_refcount == 1) + in6m_disconnect_locked(&inmh, inm); + in6m_rele_locked(&inmh, inm); if (ifp) IF_ADDR_WUNLOCK(ifp); IN6_MULTI_LIST_UNLOCK(); - + in6m_release_list_deferred(&inmh); return (error); } @@ -1447,7 +1401,6 @@ in6p_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) struct ip6_moptions *imo; struct in6_msource *ims; struct in6_multi *inm; - size_t idx; uint16_t fmode; int error, doblock; #ifdef KTR @@ -1504,16 +1457,12 @@ in6p_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) * Check if we are actually a member of this group. */ imo = in6p_findmoptions(inp); - idx = im6o_match_group(imo, ifp, &gsa->sa); - if (idx == -1 || imo->im6o_mfilters == NULL) { + imf = im6o_match_group(imo, ifp, &gsa->sa); + if (imf == NULL) { error = EADDRNOTAVAIL; goto out_in6p_locked; } - - KASSERT(imo->im6o_mfilters != NULL, - ("%s: im6o_mfilters not allocated", __func__)); - imf = &imo->im6o_mfilters[idx]; - inm = imo->im6o_membership[idx]; + inm = imf->im6f_in6m; /* * Attempting to use the delta-based API on an @@ -1531,7 +1480,7 @@ in6p_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) * Asked to unblock, but nothing to unblock. * If adding a new block entry, allocate it. */ - ims = im6o_match_source(imo, idx, &ssa->sa); + ims = im6o_match_source(imf, &ssa->sa); if ((ims != NULL && doblock) || (ims == NULL && !doblock)) { CTR3(KTR_MLD, "%s: source %s %spresent", __func__, ip6_sprintf(ip6tbuf, &ssa->sin6.sin6_addr), @@ -1601,9 +1550,6 @@ static struct ip6_moptions * in6p_findmoptions(struct inpcb *inp) { struct ip6_moptions *imo; - struct in6_multi **immp; - struct in6_mfilter *imfp; - size_t idx; INP_WLOCK(inp); if (inp->in6p_moptions != NULL) @@ -1612,27 +1558,14 @@ in6p_findmoptions(struct inpcb *inp) INP_WUNLOCK(inp); imo = malloc(sizeof(*imo), M_IP6MOPTS, M_WAITOK); - immp = malloc(sizeof(*immp) * IPV6_MIN_MEMBERSHIPS, M_IP6MOPTS, - M_WAITOK | M_ZERO); - imfp = malloc(sizeof(struct in6_mfilter) * IPV6_MIN_MEMBERSHIPS, - M_IN6MFILTER, M_WAITOK); imo->im6o_multicast_ifp = NULL; imo->im6o_multicast_hlim = V_ip6_defmcasthlim; imo->im6o_multicast_loop = in6_mcast_loop; - imo->im6o_num_memberships = 0; - imo->im6o_max_memberships = IPV6_MIN_MEMBERSHIPS; - imo->im6o_membership = immp; - - /* Initialize per-group source filters. */ - for (idx = 0; idx < IPV6_MIN_MEMBERSHIPS; idx++) - im6f_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE); - imo->im6o_mfilters = imfp; + STAILQ_INIT(&imo->im6o_head); INP_WLOCK(inp); if (inp->in6p_moptions != NULL) { - free(imfp, M_IN6MFILTER); - free(immp, M_IP6MOPTS); free(imo, M_IP6MOPTS); return (inp->in6p_moptions); } @@ -1652,33 +1585,26 @@ in6p_findmoptions(struct inpcb *inp) static void inp_gcmoptions(struct ip6_moptions *imo) { - struct in6_mfilter *imf; + struct in6_mfilter *imf; struct in6_multi *inm; struct ifnet *ifp; - size_t idx, nmships; - - nmships = imo->im6o_num_memberships; - for (idx = 0; idx < nmships; ++idx) { - imf = imo->im6o_mfilters ? &imo->im6o_mfilters[idx] : NULL; - if (imf) - im6f_leave(imf); - inm = imo->im6o_membership[idx]; - ifp = inm->in6m_ifp; - if (ifp != NULL) { - CURVNET_SET(ifp->if_vnet); - (void)in6_leavegroup(inm, imf); - CURVNET_RESTORE(); - } else { - (void)in6_leavegroup(inm, imf); - } - if (imf) - im6f_purge(imf); - } - if (imo->im6o_mfilters) - free(imo->im6o_mfilters, M_IN6MFILTER); - free(imo->im6o_membership, M_IP6MOPTS); - free(imo, M_IP6MOPTS); + while ((imf = ip6_mfilter_first(&imo->im6o_head)) != NULL) { + ip6_mfilter_remove(&imo->im6o_head, imf); + + im6f_leave(imf); + if ((inm = imf->im6f_in6m) != NULL) { + if ((ifp = inm->in6m_ifp) != NULL) { + CURVNET_SET(ifp->if_vnet); + (void)in6_leavegroup(inm, imf); + CURVNET_RESTORE(); + } else { + (void)in6_leavegroup(inm, imf); + } + } + ip6_mfilter_free(imf); + } + free(imo, M_IP6MOPTS); } void @@ -1707,7 +1633,7 @@ in6p_get_source_filters(struct inpcb *inp, struct sockopt *sopt) struct sockaddr_storage *ptss; struct sockaddr_storage *tss; int error; - size_t idx, nsrcs, ncsrcs; + size_t nsrcs, ncsrcs; INP_WLOCK_ASSERT(inp); @@ -1741,12 +1667,11 @@ in6p_get_source_filters(struct inpcb *inp, struct sockopt *sopt) /* * Lookup group on the socket. */ - idx = im6o_match_group(imo, ifp, &gsa->sa); - if (idx == -1 || imo->im6o_mfilters == NULL) { + imf = im6o_match_group(imo, ifp, &gsa->sa); + if (imf == NULL) { INP_WUNLOCK(inp); return (EADDRNOTAVAIL); } - imf = &imo->im6o_mfilters[idx]; /* * Ignore memberships which are in limbo. @@ -1905,7 +1830,7 @@ ip6_getmoptions(struct inpcb *inp, struct sockopt *sopt) * Returns NULL if no ifp could be found. */ static struct ifnet * -in6p_lookup_mcast_ifp(const struct inpcb *in6p, +in6p_lookup_mcast_ifp(const struct inpcb *inp, const struct sockaddr_in6 *gsin6) { struct nhop6_basic nh6; @@ -1913,13 +1838,13 @@ in6p_lookup_mcast_ifp(const struct inpcb *in6p, uint32_t scopeid; uint32_t fibnum; - KASSERT(in6p->inp_vflag & INP_IPV6, + KASSERT(inp->inp_vflag & INP_IPV6, ("%s: not INP_IPV6 inpcb", __func__)); KASSERT(gsin6->sin6_family == AF_INET6, ("%s: not AF_INET6 group", __func__)); in6_splitscope(&gsin6->sin6_addr, &dst, &scopeid); - fibnum = in6p ? in6p->inp_inc.inc_fibnum : RT_DEFAULT_FIB; + fibnum = inp ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB; if (fib6_lookup_nh_basic(fibnum, &dst, scopeid, 0, 0, &nh6) != 0) return (NULL); @@ -1935,6 +1860,7 @@ in6p_lookup_mcast_ifp(const struct inpcb *in6p, static int in6p_join_group(struct inpcb *inp, struct sockopt *sopt) { + struct in6_multi_head inmh; struct group_source_req gsr; sockunion_t *gsa, *ssa; struct ifnet *ifp; @@ -1942,14 +1868,12 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt) struct ip6_moptions *imo; struct in6_multi *inm; struct in6_msource *lims; - size_t idx; int error, is_new; + SLIST_INIT(&inmh); ifp = NULL; - imf = NULL; lims = NULL; error = 0; - is_new = 0; memset(&gsr, 0, sizeof(struct group_source_req)); gsa = (sockunion_t *)&gsr.gsr_group; @@ -2050,13 +1974,25 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt) */ (void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL); + IN6_MULTI_LOCK(); + + /* + * Find the membership in the membership list. + */ imo = in6p_findmoptions(inp); - idx = im6o_match_group(imo, ifp, &gsa->sa); - if (idx == -1) { + imf = im6o_match_group(imo, ifp, &gsa->sa); + if (imf == NULL) { is_new = 1; + inm = NULL; + + if (ip6_mfilter_count(&imo->im6o_head) >= IPV6_MAX_MEMBERSHIPS) { + error = ENOMEM; + goto out_in6p_locked; + } } else { - inm = imo->im6o_membership[idx]; - imf = &imo->im6o_mfilters[idx]; + is_new = 0; + inm = imf->im6f_in6m; + if (ssa->ss.ss_family != AF_UNSPEC) { /* * MCAST_JOIN_SOURCE_GROUP on an exclusive membership @@ -2083,7 +2019,7 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt) * full-state SSM API with the delta-based API, * which is discouraged in the relevant RFCs. */ - lims = im6o_match_source(imo, idx, &ssa->sa); + lims = im6o_match_source(imf, &ssa->sa); if (lims != NULL /*&& lims->im6sl_st[1] == MCAST_INCLUDE*/) { error = EADDRNOTAVAIL; @@ -2111,27 +2047,6 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt) */ INP_WLOCK_ASSERT(inp); - if (is_new) { - if (imo->im6o_num_memberships == imo->im6o_max_memberships) { - error = im6o_grow(imo); - if (error) - goto out_in6p_locked; - } - /* - * Allocate the new slot upfront so we can deal with - * grafting the new source filter in same code path - * as for join-source on existing membership. - */ - idx = imo->im6o_num_memberships; - imo->im6o_membership[idx] = NULL; - imo->im6o_num_memberships++; - KASSERT(imo->im6o_mfilters != NULL, - ("%s: im6f_mfilters vector was not allocated", __func__)); - imf = &imo->im6o_mfilters[idx]; - KASSERT(RB_EMPTY(&imf->im6f_sources), - ("%s: im6f_sources not empty", __func__)); - } - /* * Graft new source into filter list for this inpcb's * membership of the group. The in6_multi may not have @@ -2147,7 +2062,11 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt) /* Membership starts in IN mode */ if (is_new) { CTR1(KTR_MLD, "%s: new join w/source", __func__); - im6f_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE); + imf = ip6_mfilter_alloc(M_NOWAIT, MCAST_UNDEFINED, MCAST_INCLUDE); + if (imf == NULL) { + error = ENOMEM; + goto out_in6p_locked; + } } else { CTR2(KTR_MLD, "%s: %s source", __func__, "allow"); } @@ -2156,77 +2075,88 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt) CTR1(KTR_MLD, "%s: merge imf state failed", __func__); error = ENOMEM; - goto out_im6o_free; + goto out_in6p_locked; } } else { /* No address specified; Membership starts in EX mode */ if (is_new) { CTR1(KTR_MLD, "%s: new join w/o source", __func__); - im6f_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE); + imf = ip6_mfilter_alloc(M_NOWAIT, MCAST_UNDEFINED, MCAST_EXCLUDE); + if (imf == NULL) { + error = ENOMEM; + goto out_in6p_locked; + } } } /* * Begin state merge transaction at MLD layer. */ - in_pcbref(inp); - INP_WUNLOCK(inp); - IN6_MULTI_LOCK(); - if (is_new) { + in_pcbref(inp); + INP_WUNLOCK(inp); + error = in6_joingroup_locked(ifp, &gsa->sin6.sin6_addr, imf, - &inm, 0); + &imf->im6f_in6m, 0); + + INP_WLOCK(inp); + if (in_pcbrele_wlocked(inp)) { + error = ENXIO; + goto out_in6p_unlocked; + } if (error) { - IN6_MULTI_UNLOCK(); - goto out_im6o_free; + goto out_in6p_locked; } - in6m_acquire(inm); - imo->im6o_membership[idx] = inm; + /* + * NOTE: Refcount from in6_joingroup_locked() + * is protecting membership. + */ } else { CTR1(KTR_MLD, "%s: merge inm state", __func__); IN6_MULTI_LIST_LOCK(); error = in6m_merge(inm, imf); - if (error) + if (error) { CTR1(KTR_MLD, "%s: failed to merge inm state", __func__); - else { - CTR1(KTR_MLD, "%s: doing mld downcall", __func__); - error = mld_change_state(inm, 0); - if (error) - CTR1(KTR_MLD, "%s: failed mld downcall", - __func__); + IN6_MULTI_LIST_UNLOCK(); + im6f_rollback(imf); + im6f_reap(imf); + goto out_in6p_locked; } + CTR1(KTR_MLD, "%s: doing mld downcall", __func__); + error = mld_change_state(inm, 0); IN6_MULTI_LIST_UNLOCK(); - } - IN6_MULTI_UNLOCK(); - INP_WLOCK(inp); - if (in_pcbrele_wlocked(inp)) - return (ENXIO); - if (error) { - im6f_rollback(imf); - if (is_new) - im6f_purge(imf); - else + if (error) { + CTR1(KTR_MLD, "%s: failed mld downcall", + __func__); + im6f_rollback(imf); im6f_reap(imf); - } else { - im6f_commit(imf); - } - -out_im6o_free: - if (error && is_new) { - inm = imo->im6o_membership[idx]; - if (inm != NULL) { - IN6_MULTI_LIST_LOCK(); - in6m_release_deferred(inm); - IN6_MULTI_LIST_UNLOCK(); + goto out_in6p_locked; } - imo->im6o_membership[idx] = NULL; - --imo->im6o_num_memberships; } + if (is_new) + ip6_mfilter_insert(&imo->im6o_head, imf); + + im6f_commit(imf); + imf = NULL; + out_in6p_locked: INP_WUNLOCK(inp); +out_in6p_unlocked: + IN6_MULTI_UNLOCK(); + + if (is_new && imf) { + if (imf->im6f_in6m != NULL) { + struct in6_multi_head inmh; + + SLIST_INIT(&inmh); + SLIST_INSERT_HEAD(&inmh, imf->im6f_in6m, in6m_defer); + in6m_release_list_deferred(&inmh); + } + ip6_mfilter_free(imf); + } return (error); } @@ -2245,8 +2175,8 @@ in6p_leave_group(struct inpcb *inp, struct sockopt *sopt) struct in6_msource *ims; struct in6_multi *inm; uint32_t ifindex; - size_t idx; - int error, is_final; + int error; + bool is_final; #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; #endif @@ -2254,7 +2184,7 @@ in6p_leave_group(struct inpcb *inp, struct sockopt *sopt) ifp = NULL; ifindex = 0; error = 0; - is_final = 1; + is_final = true; memset(&gsr, 0, sizeof(struct group_source_req)); gsa = (sockunion_t *)&gsr.gsr_group; @@ -2372,20 +2302,21 @@ in6p_leave_group(struct inpcb *inp, struct sockopt *sopt) CTR2(KTR_MLD, "%s: ifp = %p", __func__, ifp); KASSERT(ifp != NULL, ("%s: ifp did not resolve", __func__)); + IN6_MULTI_LOCK(); + /* - * Find the membership in the membership array. + * Find the membership in the membership list. */ imo = in6p_findmoptions(inp); - idx = im6o_match_group(imo, ifp, &gsa->sa); - if (idx == -1) { + imf = im6o_match_group(imo, ifp, &gsa->sa); + if (imf == NULL) { error = EADDRNOTAVAIL; goto out_in6p_locked; } - inm = imo->im6o_membership[idx]; - imf = &imo->im6o_mfilters[idx]; + inm = imf->im6f_in6m; if (ssa->ss.ss_family != AF_UNSPEC) - is_final = 0; + is_final = false; /* * Begin state merge transaction at socket layer. @@ -2397,13 +2328,14 @@ in6p_leave_group(struct inpcb *inp, struct sockopt *sopt) * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships. */ if (is_final) { + ip6_mfilter_remove(&imo->im6o_head, imf); im6f_leave(imf); } else { if (imf->im6f_st[0] == MCAST_EXCLUDE) { error = EADDRNOTAVAIL; goto out_in6p_locked; } - ims = im6o_match_source(imo, idx, &ssa->sa); + ims = im6o_match_source(imf, &ssa->sa); if (ims == NULL) { CTR3(KTR_MLD, "%s: source %p %spresent", __func__, ip6_sprintf(ip6tbuf, &ssa->sin6.sin6_addr), @@ -2423,56 +2355,47 @@ in6p_leave_group(struct inpcb *inp, struct sockopt *sopt) /* * Begin state merge transaction at MLD layer. */ - in_pcbref(inp); - INP_WUNLOCK(inp); - IN6_MULTI_LOCK(); - - if (is_final) { - /* - * Give up the multicast address record to which - * the membership points. - */ - (void)in6_leavegroup_locked(inm, imf); - } else { + if (!is_final) { CTR1(KTR_MLD, "%s: merge inm state", __func__); IN6_MULTI_LIST_LOCK(); error = in6m_merge(inm, imf); - if (error) + if (error) { CTR1(KTR_MLD, "%s: failed to merge inm state", __func__); - else { - CTR1(KTR_MLD, "%s: doing mld downcall", __func__); - error = mld_change_state(inm, 0); - if (error) - CTR1(KTR_MLD, "%s: failed mld downcall", - __func__); + IN6_MULTI_LIST_UNLOCK(); + im6f_rollback(imf); + im6f_reap(imf); + goto out_in6p_locked; } + + CTR1(KTR_MLD, "%s: doing mld downcall", __func__); + error = mld_change_state(inm, 0); IN6_MULTI_LIST_UNLOCK(); + if (error) { + CTR1(KTR_MLD, "%s: failed mld downcall", + __func__); + im6f_rollback(imf); + im6f_reap(imf); + goto out_in6p_locked; + } } - IN6_MULTI_UNLOCK(); - INP_WLOCK(inp); - if (in_pcbrele_wlocked(inp)) - return (ENXIO); - - if (error) - im6f_rollback(imf); - else - im6f_commit(imf); - + im6f_commit(imf); im6f_reap(imf); - if (is_final) { - /* Remove the gap in the membership array. */ - for (++idx; idx < imo->im6o_num_memberships; ++idx) { - imo->im6o_membership[idx-1] = imo->im6o_membership[idx]; - imo->im6o_mfilters[idx-1] = imo->im6o_mfilters[idx]; - } - imo->im6o_num_memberships--; - } - out_in6p_locked: INP_WUNLOCK(inp); + + if (is_final && imf) { + /* + * Give up the multicast address record to which + * the membership points. + */ + (void)in6_leavegroup_locked(inm, imf); + ip6_mfilter_free(imf); + } + + IN6_MULTI_UNLOCK(); return (error); } @@ -2530,7 +2453,6 @@ in6p_set_source_filters(struct inpcb *inp, struct sockopt *sopt) struct in6_mfilter *imf; struct ip6_moptions *imo; struct in6_multi *inm; - size_t idx; int error; error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), @@ -2567,13 +2489,12 @@ in6p_set_source_filters(struct inpcb *inp, struct sockopt *sopt) * Check if this socket is a member of this group. */ imo = in6p_findmoptions(inp); - idx = im6o_match_group(imo, ifp, &gsa->sa); - if (idx == -1 || imo->im6o_mfilters == NULL) { + imf = im6o_match_group(imo, ifp, &gsa->sa); + if (imf == NULL) { error = EADDRNOTAVAIL; goto out_in6p_locked; } - inm = imo->im6o_membership[idx]; - imf = &imo->im6o_mfilters[idx]; + inm = imf->im6f_in6m; /* * Begin state merge transaction at socket layer. @@ -2814,6 +2735,7 @@ sysctl_ip6_mcast_filters(SYSCTL_HANDLER_ARGS) { struct in6_addr mcaddr; struct in6_addr src; + struct epoch_tracker et; struct ifnet *ifp; struct ifmultiaddr *ifma; struct in6_multi *inm; @@ -2868,12 +2790,11 @@ sysctl_ip6_mcast_filters(SYSCTL_HANDLER_ARGS) IN6_MULTI_LOCK(); IN6_MULTI_LIST_LOCK(); - IF_ADDR_RLOCK(ifp); + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { - if (ifma->ifma_addr->sa_family != AF_INET6 || - ifma->ifma_protospec == NULL) + inm = in6m_ifmultiaddr_get_inm(ifma); + if (inm == NULL) continue; - inm = (struct in6_multi *)ifma->ifma_protospec; if (!IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, &mcaddr)) continue; fmode = inm->in6m_st[1].iss_fmode; @@ -2897,7 +2818,7 @@ sysctl_ip6_mcast_filters(SYSCTL_HANDLER_ARGS) break; } } - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); IN6_MULTI_LIST_UNLOCK(); IN6_MULTI_UNLOCK(); diff --git a/freebsd/sys/netinet6/in6_pcb.c b/freebsd/sys/netinet6/in6_pcb.c index a6beba43..b66aa8a4 100644 --- a/freebsd/sys/netinet6/in6_pcb.c +++ b/freebsd/sys/netinet6/in6_pcb.c @@ -186,14 +186,15 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, (SO_REUSEADDR|SO_REUSEPORT_LB)) != 0) reuseport_lb = SO_REUSEADDR|SO_REUSEPORT_LB; } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { + struct epoch_tracker et; struct ifaddr *ifa; sin6->sin6_port = 0; /* yech... */ - NET_EPOCH_ENTER(); + NET_EPOCH_ENTER(et); if ((ifa = ifa_ifwithaddr((struct sockaddr *)sin6)) == NULL && (inp->inp_flags & INP_BINDANY) == 0) { - NET_EPOCH_EXIT(); + NET_EPOCH_EXIT(et); return (EADDRNOTAVAIL); } @@ -206,10 +207,10 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, if (ifa != NULL && ((struct in6_ifaddr *)ifa)->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) { - NET_EPOCH_EXIT(); + NET_EPOCH_EXIT(et); return (EADDRNOTAVAIL); } - NET_EPOCH_EXIT(); + NET_EPOCH_EXIT(et); } if (lport) { struct inpcb *t; @@ -814,19 +815,20 @@ in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr, void in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp) { - struct inpcb *in6p; + struct inpcb *inp; + struct in6_multi *inm; + struct in6_mfilter *imf; struct ip6_moptions *im6o; - int i, gap; INP_INFO_WLOCK(pcbinfo); - CK_LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) { - INP_WLOCK(in6p); - if (__predict_false(in6p->inp_flags2 & INP_FREED)) { - INP_WUNLOCK(in6p); + CK_LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { + INP_WLOCK(inp); + if (__predict_false(inp->inp_flags2 & INP_FREED)) { + INP_WUNLOCK(inp); continue; } - im6o = in6p->in6p_moptions; - if ((in6p->inp_vflag & INP_IPV6) && im6o != NULL) { + im6o = inp->in6p_moptions; + if ((inp->inp_vflag & INP_IPV6) && im6o != NULL) { /* * Unselect the outgoing ifp for multicast if it * is being detached. @@ -837,20 +839,20 @@ in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp) * Drop multicast group membership if we joined * through the interface being detached. */ - gap = 0; - for (i = 0; i < im6o->im6o_num_memberships; i++) { - if (im6o->im6o_membership[i]->in6m_ifp == - ifp) { - in6_leavegroup(im6o->im6o_membership[i], NULL); - gap++; - } else if (gap != 0) { - im6o->im6o_membership[i - gap] = - im6o->im6o_membership[i]; - } +restart: + IP6_MFILTER_FOREACH(imf, &im6o->im6o_head) { + if ((inm = imf->im6f_in6m) == NULL) + continue; + if (inm->in6m_ifp != ifp) + continue; + ip6_mfilter_remove(&im6o->im6o_head, imf); + IN6_MULTI_LOCK_ASSERT(); + in6_leavegroup_locked(inm, NULL); + ip6_mfilter_free(imf); + goto restart; } - im6o->im6o_num_memberships -= gap; } - INP_WUNLOCK(in6p); + INP_WUNLOCK(inp); } INP_INFO_WUNLOCK(pcbinfo); } diff --git a/freebsd/sys/netinet6/in6_pcb.h b/freebsd/sys/netinet6/in6_pcb.h index 2c6bcdc6..56ea6eeb 100644 --- a/freebsd/sys/netinet6/in6_pcb.h +++ b/freebsd/sys/netinet6/in6_pcb.h @@ -113,7 +113,7 @@ int in6_getpeeraddr(struct socket *so, struct sockaddr **nam); int in6_getsockaddr(struct socket *so, struct sockaddr **nam); int in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam); int in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam); -int in6_selecthlim(struct in6pcb *, struct ifnet *); +int in6_selecthlim(struct inpcb *, struct ifnet *); int in6_pcbsetport(struct in6_addr *, struct inpcb *, struct ucred *); void init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m, int); #endif /* _KERNEL */ diff --git a/freebsd/sys/netinet6/in6_proto.c b/freebsd/sys/netinet6/in6_proto.c index cf62e60c..652d70b6 100644 --- a/freebsd/sys/netinet6/in6_proto.c +++ b/freebsd/sys/netinet6/in6_proto.c @@ -386,10 +386,6 @@ VNET_DEFINE(int, ip6_accept_rtadv) = 0; VNET_DEFINE(int, ip6_no_radr) = 0; VNET_DEFINE(int, ip6_norbit_raif) = 0; VNET_DEFINE(int, ip6_rfc6204w3) = 0; -VNET_DEFINE(int, ip6_maxfragpackets); /* initialized in frag6.c:frag6_init() */ -int ip6_maxfrags; /* initialized in frag6.c:frag6_init() */ -VNET_DEFINE(int, ip6_maxfragbucketsize);/* initialized in frag6.c:frag6_init() */ -VNET_DEFINE(int, ip6_maxfragsperpacket); /* initialized in frag6.c:frag6_init() */ VNET_DEFINE(int, ip6_log_interval) = 5; VNET_DEFINE(int, ip6_hdrnestlimit) = 15;/* How many header options will we * process? */ @@ -476,20 +472,6 @@ sysctl_ip6_tempvltime(SYSCTL_HANDLER_ARGS) return (0); } -static int -sysctl_ip6_maxfragpackets(SYSCTL_HANDLER_ARGS) -{ - int error, val; - - val = V_ip6_maxfragpackets; - error = sysctl_handle_int(oidp, &val, 0, req); - if (error != 0 || !req->newptr) - return (error); - V_ip6_maxfragpackets = val; - frag6_set_bucketsize(); - return (0); -} - SYSCTL_INT(_net_inet6_ip6, IPV6CTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_forwarding), 0, "Enable forwarding of IPv6 packets between interfaces"); @@ -502,12 +484,6 @@ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFHLIM, hlim, SYSCTL_VNET_PCPUSTAT(_net_inet6_ip6, IPV6CTL_STATS, stats, struct ip6stat, ip6stat, "IP6 statistics (struct ip6stat, netinet6/ip6_var.h)"); -SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets, - CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, NULL, 0, - sysctl_ip6_maxfragpackets, "I", - "Default maximum number of outstanding fragmented IPv6 packets. " - "A value of 0 means no fragmented packets will be accepted, while a " - "a value of -1 means no limit"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_ACCEPT_RTADV, accept_rtadv, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_accept_rtadv), 0, "Default value of per-interface flag for accepting ICMPv6 RA messages"); @@ -577,17 +553,6 @@ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_PREFER_TEMPADDR, prefer_tempaddr, SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USE_DEFAULTZONE, use_defaultzone, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_use_defzone), 0, "Use the default scope zone when none is specified"); -SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags, - CTLFLAG_RW, &ip6_maxfrags, 0, - "Maximum allowed number of outstanding IPv6 packet fragments. " - "A value of 0 means no fragmented packets will be accepted, while a " - "a value of -1 means no limit"); -SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGBUCKETSIZE, maxfragbucketsize, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragbucketsize), 0, - "Maximum number of reassembly queues per hash bucket"); -SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGSPERPACKET, maxfragsperpacket, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragsperpacket), 0, - "Maximum allowed number of fragments per packet"); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MCAST_PMTU, mcast_pmtu, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_mcast_pmtu), 0, "Enable path MTU discovery for multicast packets"); @@ -643,3 +608,10 @@ SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_ONLINKNSRFC4861, nd6_onlink_ns_rfc4861, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_onlink_ns_rfc4861), 0, "Accept 'on-link' ICMPv6 NS messages in compliance with RFC 4861"); +#ifdef EXPERIMENTAL +SYSCTL_INT(_net_inet6_icmp6, OID_AUTO, + nd6_ignore_ipv6_only_ra, CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(nd6_ignore_ipv6_only_ra), 0, + "Ignore the 'IPv6-Only flag' in RA messages in compliance with " + "draft-ietf-6man-ipv6only-flag"); +#endif diff --git a/freebsd/sys/netinet6/in6_src.c b/freebsd/sys/netinet6/in6_src.c index 1cb71b88..0bd8bba4 100644 --- a/freebsd/sys/netinet6/in6_src.c +++ b/freebsd/sys/netinet6/in6_src.c @@ -726,6 +726,10 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, if (ron->ro_rt == NULL || (ron->ro_rt->rt_flags & RTF_GATEWAY) != 0) error = EHOSTUNREACH; + else { + rt = ron->ro_rt; + ifp = rt->rt_ifp; + } goto done; } @@ -929,21 +933,21 @@ in6_selectroute_fib(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, * 3. The system default hoplimit. */ int -in6_selecthlim(struct inpcb *in6p, struct ifnet *ifp) +in6_selecthlim(struct inpcb *inp, struct ifnet *ifp) { - if (in6p && in6p->in6p_hops >= 0) - return (in6p->in6p_hops); + if (inp && inp->in6p_hops >= 0) + return (inp->in6p_hops); else if (ifp) return (ND_IFINFO(ifp)->chlim); - else if (in6p && !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { + else if (inp && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { struct nhop6_basic nh6; struct in6_addr dst; uint32_t fibnum, scopeid; int hlim; - fibnum = in6p->inp_inc.inc_fibnum; - in6_splitscope(&in6p->in6p_faddr, &dst, &scopeid); + fibnum = inp->inp_inc.inc_fibnum; + in6_splitscope(&inp->in6p_faddr, &dst, &scopeid); if (fib6_lookup_nh_basic(fibnum, &dst, scopeid, 0, 0, &nh6)==0){ hlim = ND_IFINFO(nh6.nh_ifp)->chlim; return (hlim); diff --git a/freebsd/sys/netinet6/in6_var.h b/freebsd/sys/netinet6/in6_var.h index 5ed0ae90..3e535310 100644 --- a/freebsd/sys/netinet6/in6_var.h +++ b/freebsd/sys/netinet6/in6_var.h @@ -602,9 +602,61 @@ struct in6_mfilter { struct ip6_msource_tree im6f_sources; /* source list for (S,G) */ u_long im6f_nsrc; /* # of source entries */ uint8_t im6f_st[2]; /* state before/at commit */ + struct in6_multi *im6f_in6m; /* associated multicast address */ + STAILQ_ENTRY(in6_mfilter) im6f_entry; /* list entry */ }; /* + * Helper types and functions for IPv4 multicast filters. + */ +STAILQ_HEAD(ip6_mfilter_head, in6_mfilter); + +struct in6_mfilter *ip6_mfilter_alloc(int mflags, int st0, int st1); +void ip6_mfilter_free(struct in6_mfilter *); + +static inline void +ip6_mfilter_init(struct ip6_mfilter_head *head) +{ + + STAILQ_INIT(head); +} + +static inline struct in6_mfilter * +ip6_mfilter_first(const struct ip6_mfilter_head *head) +{ + + return (STAILQ_FIRST(head)); +} + +static inline void +ip6_mfilter_insert(struct ip6_mfilter_head *head, struct in6_mfilter *imf) +{ + + STAILQ_INSERT_TAIL(head, imf, im6f_entry); +} + +static inline void +ip6_mfilter_remove(struct ip6_mfilter_head *head, struct in6_mfilter *imf) +{ + + STAILQ_REMOVE(head, imf, in6_mfilter, im6f_entry); +} + +#define IP6_MFILTER_FOREACH(imf, head) \ + STAILQ_FOREACH(imf, head, im6f_entry) + +static inline size_t +ip6_mfilter_count(struct ip6_mfilter_head *head) +{ + struct in6_mfilter *imf; + size_t num = 0; + + STAILQ_FOREACH(imf, head, im6f_entry) + num++; + return (num); +} + +/* * Legacy KAME IPv6 multicast membership descriptor. */ struct in6_multi_mship { @@ -645,6 +697,7 @@ struct in6_multi { /* New fields for MLDv2 follow. */ struct mld_ifsoftc *in6m_mli; /* MLD info */ SLIST_ENTRY(in6_multi) in6m_nrele; /* to-be-released by MLD */ + SLIST_ENTRY(in6_multi) in6m_defer; /* deferred MLDv1 */ struct ip6_msource_tree in6m_srcs; /* tree of sources */ u_long in6m_nsrc; /* # of tree entries */ @@ -670,8 +723,8 @@ struct in6_multi { } in6m_st[2]; /* state at t0, t1 */ }; -void in6m_disconnect(struct in6_multi *inm); -extern int ifma6_restart; +void in6m_disconnect_locked(struct in6_multi_head *inmh, struct in6_multi *inm); + /* * Helper function to derive the filter mode on a source entry * from its internal counters. Predicates are: @@ -713,13 +766,25 @@ extern struct sx in6_multi_sx; #define IN6_MULTI_LOCK_ASSERT() sx_assert(&in6_multi_sx, SA_XLOCKED) #define IN6_MULTI_UNLOCK_ASSERT() sx_assert(&in6_multi_sx, SA_XUNLOCKED) +/* + * Get the in6_multi pointer from a ifmultiaddr. + * Returns NULL if ifmultiaddr is no longer valid. + */ +static __inline struct in6_multi * +in6m_ifmultiaddr_get_inm(struct ifmultiaddr *ifma) +{ + + NET_EPOCH_ASSERT(); + + return ((ifma->ifma_addr->sa_family != AF_INET6 || + (ifma->ifma_flags & IFMA_F_ENQUEUED) == 0) ? NULL : + ifma->ifma_protospec); +} /* * Look up an in6_multi record for an IPv6 multicast address * on the interface ifp. * If no record found, return NULL. - * - * SMPng: The IN6_MULTI_LOCK and IF_ADDR_LOCK on ifp must be held. */ static __inline struct in6_multi * in6m_lookup_locked(struct ifnet *ifp, const struct in6_addr *mcaddr) @@ -727,18 +792,14 @@ in6m_lookup_locked(struct ifnet *ifp, const struct in6_addr *mcaddr) struct ifmultiaddr *ifma; struct in6_multi *inm; - inm = NULL; - CK_STAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) { - if (ifma->ifma_addr->sa_family == AF_INET6) { - inm = (struct in6_multi *)ifma->ifma_protospec; - if (inm == NULL) - continue; - if (IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, mcaddr)) - break; - inm = NULL; - } + CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { + inm = in6m_ifmultiaddr_get_inm(ifma); + if (inm == NULL) + continue; + if (IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, mcaddr)) + return (inm); } - return (inm); + return (NULL); } /* @@ -749,12 +810,13 @@ in6m_lookup_locked(struct ifnet *ifp, const struct in6_addr *mcaddr) static __inline struct in6_multi * in6m_lookup(struct ifnet *ifp, const struct in6_addr *mcaddr) { + struct epoch_tracker et; struct in6_multi *inm; IN6_MULTI_LIST_LOCK(); - IF_ADDR_RLOCK(ifp); + NET_EPOCH_ENTER(et); inm = in6m_lookup_locked(ifp, mcaddr); - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); IN6_MULTI_LIST_UNLOCK(); return (inm); @@ -808,8 +870,8 @@ void in6m_clear_recorded(struct in6_multi *); void in6m_commit(struct in6_multi *); void in6m_print(const struct in6_multi *); int in6m_record_source(struct in6_multi *, const struct in6_addr *); -void in6m_release_deferred(struct in6_multi *); void in6m_release_list_deferred(struct in6_multi_head *); +void in6m_release_wait(void); void ip6_freemoptions(struct ip6_moptions *); int ip6_getmoptions(struct inpcb *, struct sockopt *); int ip6_setmoptions(struct inpcb *, struct sockopt *); diff --git a/freebsd/sys/netinet6/ip6_fastfwd.c b/freebsd/sys/netinet6/ip6_fastfwd.c index f63c51bf..0c04200c 100644 --- a/freebsd/sys/netinet6/ip6_fastfwd.c +++ b/freebsd/sys/netinet6/ip6_fastfwd.c @@ -158,10 +158,10 @@ ip6_tryforward(struct mbuf *m) /* * Incoming packet firewall processing. */ - if (!PFIL_HOOKED(&V_inet6_pfil_hook)) + if (!PFIL_HOOKED_IN(V_inet6_pfil_head)) goto passin; - if (pfil_run_hooks(&V_inet6_pfil_hook, &m, rcvif, PFIL_IN, 0, - NULL) != 0 || m == NULL) + if (pfil_run_hooks(V_inet6_pfil_head, &m, rcvif, PFIL_IN, NULL) != + PFIL_PASS) goto dropin; /* * If packet filter sets the M_FASTFWD_OURS flag, this means @@ -197,7 +197,7 @@ passin: in6_ifstat_inc(rcvif, ifs6_in_noroute); goto dropin; } - if (!PFIL_HOOKED(&V_inet6_pfil_hook)) { + if (!PFIL_HOOKED_OUT(V_inet6_pfil_head)) { if (m->m_pkthdr.len > nh.nh_mtu) { in6_ifstat_inc(nh.nh_ifp, ifs6_in_toobig); icmp6_error(m, ICMP6_PACKET_TOO_BIG, 0, nh.nh_mtu); @@ -210,8 +210,8 @@ passin: /* * Outgoing packet firewall processing. */ - if (pfil_run_hooks(&V_inet6_pfil_hook, &m, nh.nh_ifp, PFIL_OUT, - PFIL_FWD, NULL) != 0 || m == NULL) + if (pfil_run_hooks(V_inet6_pfil_head, &m, nh.nh_ifp, PFIL_OUT | + PFIL_FWD, NULL) != PFIL_PASS) goto dropout; /* diff --git a/freebsd/sys/netinet6/ip6_forward.c b/freebsd/sys/netinet6/ip6_forward.c index 80535efe..ca73977f 100644 --- a/freebsd/sys/netinet6/ip6_forward.c +++ b/freebsd/sys/netinet6/ip6_forward.c @@ -322,15 +322,14 @@ again2: in6_clearscope(&ip6->ip6_dst); /* Jump over all PFIL processing if hooks are not active. */ - if (!PFIL_HOOKED(&V_inet6_pfil_hook)) + if (!PFIL_HOOKED_OUT(V_inet6_pfil_head)) goto pass; odst = ip6->ip6_dst; /* Run through list of hooks for forwarded packets. */ - error = pfil_run_hooks(&V_inet6_pfil_hook, &m, rt->rt_ifp, PFIL_OUT, - PFIL_FWD, NULL); - if (error != 0 || m == NULL) - goto freecopy; /* consumed by filter */ + if (pfil_run_hooks(V_inet6_pfil_head, &m, rt->rt_ifp, PFIL_OUT | + PFIL_FWD, NULL) != PFIL_PASS) + goto freecopy; ip6 = mtod(m, struct ip6_hdr *); /* See if destination IP address was changed by packet filter. */ diff --git a/freebsd/sys/netinet6/ip6_id.c b/freebsd/sys/netinet6/ip6_id.c index 0905ab3f..847dc403 100644 --- a/freebsd/sys/netinet6/ip6_id.c +++ b/freebsd/sys/netinet6/ip6_id.c @@ -91,6 +91,7 @@ __FBSDID("$FreeBSD$"); #include <sys/types.h> #include <sys/param.h> #include <sys/kernel.h> +#include <sys/random.h> #include <sys/socket.h> #include <sys/libkern.h> @@ -260,5 +261,15 @@ u_int32_t ip6_randomflowlabel(void) { + /* + * It's ok to emit zero flow labels early, before random is available + * (seeded). RFC 6437: + * + * "A Flow Label of zero is used to indicate packets that have not been + * labeled." + */ + if (__predict_false(!is_random_seeded())) + return (0); + return randomid(&randomtab_20) & 0xfffff; } diff --git a/freebsd/sys/netinet6/ip6_input.c b/freebsd/sys/netinet6/ip6_input.c index 25ab624c..c5c040f0 100644 --- a/freebsd/sys/netinet6/ip6_input.c +++ b/freebsd/sys/netinet6/ip6_input.c @@ -193,7 +193,7 @@ SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_INTRDQMAXLEN, intr_direct_queue_maxlen, #endif -VNET_DEFINE(struct pfil_head, inet6_pfil_hook); +VNET_DEFINE(pfil_head_t, inet6_pfil_head); VNET_PCPUSTAT_DEFINE(struct ip6stat, ip6stat); VNET_PCPUSTAT_SYSINIT(ip6stat); @@ -216,6 +216,7 @@ static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int); void ip6_init(void) { + struct pfil_head_args args; struct protosw *pr; int i; @@ -229,11 +230,11 @@ ip6_init(void) &V_in6_ifaddrhmask); /* Initialize packet filter hooks. */ - V_inet6_pfil_hook.ph_type = PFIL_TYPE_AF; - V_inet6_pfil_hook.ph_af = AF_INET6; - if ((i = pfil_head_register(&V_inet6_pfil_hook)) != 0) - printf("%s: WARNING: unable to register pfil hook, " - "error %d\n", __func__, i); + args.pa_version = PFIL_VERSION; + args.pa_flags = PFIL_IN | PFIL_OUT; + args.pa_type = PFIL_TYPE_IP6; + args.pa_headname = PFIL_INET6_NAME; + V_inet6_pfil_head = pfil_head_register(&args); if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET6, &V_ipsec_hhh_in[HHOOK_IPSEC_INET6], @@ -361,9 +362,7 @@ ip6_destroy(void *unused __unused) #endif netisr_unregister_vnet(&ip6_nh); - if ((error = pfil_head_unregister(&V_inet6_pfil_hook)) != 0) - printf("%s: WARNING: unable to unregister pfil hook, " - "error %d\n", __func__, error); + pfil_head_unregister(V_inet6_pfil_head); error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET6]); if (error != 0) { printf("%s: WARNING: unable to deregister input helper hook " @@ -406,20 +405,22 @@ VNET_SYSUNINIT(inet6, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip6_destroy, NULL); #endif static int -ip6_input_hbh(struct mbuf *m, uint32_t *plen, uint32_t *rtalert, int *off, +ip6_input_hbh(struct mbuf **mp, uint32_t *plen, uint32_t *rtalert, int *off, int *nxt, int *ours) { + struct mbuf *m; struct ip6_hdr *ip6; struct ip6_hbh *hbh; - if (ip6_hopopts_input(plen, rtalert, &m, off)) { + if (ip6_hopopts_input(plen, rtalert, mp, off)) { #if 0 /*touches NULL pointer*/ - in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); + in6_ifstat_inc((*mp)->m_pkthdr.rcvif, ifs6_in_discard); #endif goto out; /* m have already been freed */ } /* adjust pointer */ + m = *mp; ip6 = mtod(m, struct ip6_hdr *); /* @@ -760,14 +761,12 @@ ip6_input(struct mbuf *m) */ /* Jump over all PFIL processing if hooks are not active. */ - if (!PFIL_HOOKED(&V_inet6_pfil_hook)) + if (!PFIL_HOOKED_IN(V_inet6_pfil_head)) goto passin; odst = ip6->ip6_dst; - if (pfil_run_hooks(&V_inet6_pfil_hook, &m, - m->m_pkthdr.rcvif, PFIL_IN, 0, NULL)) - return; - if (m == NULL) /* consumed by filter */ + if (pfil_run_hooks(V_inet6_pfil_head, &m, m->m_pkthdr.rcvif, PFIL_IN, + NULL) != PFIL_PASS) return; ip6 = mtod(m, struct ip6_hdr *); srcrt = !IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst); @@ -859,7 +858,7 @@ passin: */ plen = (u_int32_t)ntohs(ip6->ip6_plen); if (ip6->ip6_nxt == IPPROTO_HOPOPTS) { - if (ip6_input_hbh(m, &plen, &rtalert, &off, &nxt, &ours) != 0) + if (ip6_input_hbh(&m, &plen, &rtalert, &off, &nxt, &ours) != 0) return; } else nxt = ip6->ip6_nxt; @@ -1409,12 +1408,12 @@ ip6_savecontrol_v4(struct inpcb *inp, struct mbuf *m, struct mbuf **mp, } void -ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) +ip6_savecontrol(struct inpcb *inp, struct mbuf *m, struct mbuf **mp) { struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); int v4only = 0; - mp = ip6_savecontrol_v4(in6p, m, mp, &v4only); + mp = ip6_savecontrol_v4(inp, m, mp, &v4only); if (v4only) return; @@ -1425,7 +1424,7 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) * returned to normal user. * See also RFC 2292 section 6 (or RFC 3542 section 8). */ - if ((in6p->inp_flags & IN6P_HOPOPTS) != 0) { + if ((inp->inp_flags & IN6P_HOPOPTS) != 0) { /* * Check if a hop-by-hop options header is contatined in the * received packet, and if so, store the options as ancillary @@ -1467,7 +1466,7 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) * Note: this constraint is removed in RFC3542 */ *mp = sbcreatecontrol((caddr_t)hbh, hbhlen, - IS2292(in6p, IPV6_2292HOPOPTS, IPV6_HOPOPTS), + IS2292(inp, IPV6_2292HOPOPTS, IPV6_HOPOPTS), IPPROTO_IPV6); if (*mp) mp = &(*mp)->m_next; @@ -1477,7 +1476,7 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) } } - if ((in6p->inp_flags & (IN6P_RTHDR | IN6P_DSTOPTS)) != 0) { + if ((inp->inp_flags & (IN6P_RTHDR | IN6P_DSTOPTS)) != 0) { int nxt = ip6->ip6_nxt, off = sizeof(struct ip6_hdr); /* @@ -1538,22 +1537,22 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) switch (nxt) { case IPPROTO_DSTOPTS: - if (!(in6p->inp_flags & IN6P_DSTOPTS)) + if (!(inp->inp_flags & IN6P_DSTOPTS)) break; *mp = sbcreatecontrol((caddr_t)ip6e, elen, - IS2292(in6p, + IS2292(inp, IPV6_2292DSTOPTS, IPV6_DSTOPTS), IPPROTO_IPV6); if (*mp) mp = &(*mp)->m_next; break; case IPPROTO_ROUTING: - if (!(in6p->inp_flags & IN6P_RTHDR)) + if (!(inp->inp_flags & IN6P_RTHDR)) break; *mp = sbcreatecontrol((caddr_t)ip6e, elen, - IS2292(in6p, IPV6_2292RTHDR, IPV6_RTHDR), + IS2292(inp, IPV6_2292RTHDR, IPV6_RTHDR), IPPROTO_IPV6); if (*mp) mp = &(*mp)->m_next; @@ -1589,7 +1588,7 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) ; } - if (in6p->inp_flags2 & INP_RECVFLOWID) { + if (inp->inp_flags2 & INP_RECVFLOWID) { uint32_t flowid, flow_type; flowid = m->m_pkthdr.flowid; @@ -1610,7 +1609,7 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) } #ifdef RSS - if (in6p->inp_flags2 & INP_RECVRSSBUCKETID) { + if (inp->inp_flags2 & INP_RECVRSSBUCKETID) { uint32_t flowid, flow_type; uint32_t rss_bucketid; diff --git a/freebsd/sys/netinet6/ip6_output.c b/freebsd/sys/netinet6/ip6_output.c index 0851bef8..483f17f0 100644 --- a/freebsd/sys/netinet6/ip6_output.c +++ b/freebsd/sys/netinet6/ip6_output.c @@ -69,14 +69,16 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/local/opt_inet.h> #include <rtems/bsd/local/opt_inet6.h> -#include <rtems/bsd/local/opt_ratelimit.h> #include <rtems/bsd/local/opt_ipsec.h> -#include <rtems/bsd/local/opt_sctp.h> +#include <rtems/bsd/local/opt_kern_tls.h> +#include <rtems/bsd/local/opt_ratelimit.h> #include <rtems/bsd/local/opt_route.h> #include <rtems/bsd/local/opt_rss.h> +#include <rtems/bsd/local/opt_sctp.h> #include <sys/param.h> #include <sys/kernel.h> +#include <sys/ktls.h> #include <sys/malloc.h> #include <sys/mbuf.h> #include <sys/errno.h> @@ -230,7 +232,20 @@ ip6_fragment(struct ifnet *ifp, struct mbuf *m0, int hlen, u_char nextproto, IP6STAT_INC(ip6s_odropped); return (ENOBUFS); } - m->m_flags = m0->m_flags & M_COPYFLAGS; + + /* + * Make sure the complete packet header gets copied + * from the originating mbuf to the newly created + * mbuf. This also ensures that existing firewall + * classification(s), VLAN tags and so on get copied + * to the resulting fragmented packet(s): + */ + if (m_dup_pkthdr(m, m0, M_NOWAIT) == 0) { + m_free(m); + IP6STAT_INC(ip6s_odropped); + return (ENOBUFS); + } + *mnext = m; mnext = &m->m_nextpkt; m->m_data += max_linkhdr; @@ -255,8 +270,6 @@ ip6_fragment(struct ifnet *ifp, struct mbuf *m0, int hlen, u_char nextproto, } m_cat(m, m_frgpart); m->m_pkthdr.len = fraglen + hlen + sizeof(*ip6f); - m->m_pkthdr.fibnum = m0->m_pkthdr.fibnum; - m->m_pkthdr.rcvif = NULL; ip6f->ip6f_reserved = 0; ip6f->ip6f_ident = id; ip6f->ip6f_nxt = nextproto; @@ -267,6 +280,83 @@ ip6_fragment(struct ifnet *ifp, struct mbuf *m0, int hlen, u_char nextproto, return (0); } +static int +ip6_output_send(struct inpcb *inp, struct ifnet *ifp, struct ifnet *origifp, + struct mbuf *m, struct sockaddr_in6 *dst, struct route_in6 *ro) +{ +#ifdef KERN_TLS + struct ktls_session *tls = NULL; +#endif + struct m_snd_tag *mst; + int error; + + MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); + mst = NULL; + +#ifdef KERN_TLS + /* + * If this is an unencrypted TLS record, save a reference to + * the record. This local reference is used to call + * ktls_output_eagain after the mbuf has been freed (thus + * dropping the mbuf's reference) in if_output. + */ + if (m->m_next != NULL && mbuf_has_tls_session(m->m_next)) { + tls = ktls_hold(m->m_next->m_ext.ext_pgs->tls); + mst = tls->snd_tag; + + /* + * If a TLS session doesn't have a valid tag, it must + * have had an earlier ifp mismatch, so drop this + * packet. + */ + if (mst == NULL) { + error = EAGAIN; + goto done; + } + } +#endif +#ifdef RATELIMIT + if (inp != NULL && mst == NULL) { + if ((inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) != 0 || + (inp->inp_snd_tag != NULL && + inp->inp_snd_tag->ifp != ifp)) + in_pcboutput_txrtlmt(inp, ifp, m); + + if (inp->inp_snd_tag != NULL) + mst = inp->inp_snd_tag; + } +#endif + if (mst != NULL) { + KASSERT(m->m_pkthdr.rcvif == NULL, + ("trying to add a send tag to a forwarded packet")); + if (mst->ifp != ifp) { + error = EAGAIN; + goto done; + } + + /* stamp send tag on mbuf */ + m->m_pkthdr.snd_tag = m_snd_tag_ref(mst); + m->m_pkthdr.csum_flags |= CSUM_SND_TAG; + } + + error = nd6_output_ifp(ifp, origifp, m, dst, (struct route *)ro); + +done: + /* Check for route change invalidating send tags. */ +#ifdef KERN_TLS + if (tls != NULL) { + if (error == EAGAIN) + error = ktls_output_eagain(inp, tls); + ktls_free(tls); + } +#endif +#ifdef RATELIMIT + if (error == EAGAIN) + in_pcboutput_eagain(inp); +#endif + return (error); +} + /* * IP6 output. The packet in mbuf chain m contains a skeletal IP6 * header (with pri, len, nxt, hlim, src, dst). @@ -324,6 +414,9 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, m->m_pkthdr.flowid = inp->inp_flowid; M_HASHTYPE_SET(m, inp->inp_flowtype); } +#ifdef NUMA + m->m_pkthdr.numa_domain = inp->inp_numa_domain; +#endif } #if defined(IPSEC) || defined(IPSEC_SUPPORT) @@ -573,52 +666,72 @@ again: counter_u64_add(rt->rt_pksent, 1); } - - /* - * The outgoing interface must be in the zone of source and - * destination addresses. - */ - origifp = ifp; - + /* Setup data structures for scope ID checks. */ src0 = ip6->ip6_src; - if (in6_setscope(&src0, origifp, &zone)) - goto badscope; bzero(&src_sa, sizeof(src_sa)); src_sa.sin6_family = AF_INET6; src_sa.sin6_len = sizeof(src_sa); src_sa.sin6_addr = ip6->ip6_src; - if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id) - goto badscope; dst0 = ip6->ip6_dst; - if (in6_setscope(&dst0, origifp, &zone)) - goto badscope; /* re-initialize to be sure */ bzero(&dst_sa, sizeof(dst_sa)); dst_sa.sin6_family = AF_INET6; dst_sa.sin6_len = sizeof(dst_sa); dst_sa.sin6_addr = ip6->ip6_dst; - if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) { - goto badscope; - } - - /* We should use ia_ifp to support the case of - * sending packets to an address of our own. - */ - if (ia != NULL && ia->ia_ifp) - ifp = ia->ia_ifp; - /* scope check is done. */ - goto routefound; + /* Check for valid scope ID. */ + if (in6_setscope(&src0, ifp, &zone) == 0 && + sa6_recoverscope(&src_sa) == 0 && zone == src_sa.sin6_scope_id && + in6_setscope(&dst0, ifp, &zone) == 0 && + sa6_recoverscope(&dst_sa) == 0 && zone == dst_sa.sin6_scope_id) { + /* + * The outgoing interface is in the zone of the source + * and destination addresses. + * + * Because the loopback interface cannot receive + * packets with a different scope ID than its own, + * there is a trick is to pretend the outgoing packet + * was received by the real network interface, by + * setting "origifp" different from "ifp". This is + * only allowed when "ifp" is a loopback network + * interface. Refer to code in nd6_output_ifp() for + * more details. + */ + origifp = ifp; + + /* + * We should use ia_ifp to support the case of sending + * packets to an address of our own. + */ + if (ia != NULL && ia->ia_ifp) + ifp = ia->ia_ifp; + + } else if ((ifp->if_flags & IFF_LOOPBACK) == 0 || + sa6_recoverscope(&src_sa) != 0 || + sa6_recoverscope(&dst_sa) != 0 || + dst_sa.sin6_scope_id == 0 || + (src_sa.sin6_scope_id != 0 && + src_sa.sin6_scope_id != dst_sa.sin6_scope_id) || + (origifp = ifnet_byindex(dst_sa.sin6_scope_id)) == NULL) { + /* + * If the destination network interface is not a + * loopback interface, or the destination network + * address has no scope ID, or the source address has + * a scope ID set which is different from the + * destination address one, or there is no network + * interface representing this scope ID, the address + * pair is considered invalid. + */ + IP6STAT_INC(ip6s_badscope); + in6_ifstat_inc(ifp, ifs6_out_discard); + if (error == 0) + error = EHOSTUNREACH; /* XXX */ + goto bad; + } - badscope: - IP6STAT_INC(ip6s_badscope); - in6_ifstat_inc(origifp, ifs6_out_discard); - if (error == 0) - error = EHOSTUNREACH; /* XXX */ - goto bad; + /* All scope ID checks are successful. */ - routefound: if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { if (opt && opt->ip6po_nextroute.ro_rt) { /* @@ -774,16 +887,21 @@ again: } /* Jump over all PFIL processing if hooks are not active. */ - if (!PFIL_HOOKED(&V_inet6_pfil_hook)) + if (!PFIL_HOOKED_OUT(V_inet6_pfil_head)) goto passout; odst = ip6->ip6_dst; /* Run through list of hooks for output packets. */ - error = pfil_run_hooks(&V_inet6_pfil_hook, &m, ifp, PFIL_OUT, 0, inp); - if (error != 0 || m == NULL) + switch (pfil_run_hooks(V_inet6_pfil_head, &m, ifp, PFIL_OUT, inp)) { + case PFIL_PASS: + ip6 = mtod(m, struct ip6_hdr *); + break; + case PFIL_DROPPED: + error = EPERM; + /* FALLTHROUGH */ + case PFIL_CONSUMED: goto done; - /* adjust pointer */ - ip6 = mtod(m, struct ip6_hdr *); + } needfiblookup = 0; /* See if destination IP address was changed by packet filter. */ @@ -881,11 +999,30 @@ passout: */ if (sw_csum & CSUM_DELAY_DATA_IPV6) { sw_csum &= ~CSUM_DELAY_DATA_IPV6; + m = mb_unmapped_to_ext(m); + if (m == NULL) { + error = ENOBUFS; + IP6STAT_INC(ip6s_odropped); + goto bad; + } in6_delayed_cksum(m, plen, sizeof(struct ip6_hdr)); + } else if ((ifp->if_capenable & IFCAP_NOMAP) == 0) { + m = mb_unmapped_to_ext(m); + if (m == NULL) { + error = ENOBUFS; + IP6STAT_INC(ip6s_odropped); + goto bad; + } } #ifdef SCTP if (sw_csum & CSUM_SCTP_IPV6) { sw_csum &= ~CSUM_SCTP_IPV6; + m = mb_unmapped_to_ext(m); + if (m == NULL) { + error = ENOBUFS; + IP6STAT_INC(ip6s_odropped); + goto bad; + } sctp_delayed_cksum(m, sizeof(struct ip6_hdr)); } #endif @@ -931,23 +1068,7 @@ passout: m->m_pkthdr.len); ifa_free(&ia6->ia_ifa); } -#ifdef RATELIMIT - if (inp != NULL) { - if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) - in_pcboutput_txrtlmt(inp, ifp, m); - /* stamp send tag on mbuf */ - m->m_pkthdr.snd_tag = inp->inp_snd_tag; - } else { - m->m_pkthdr.snd_tag = NULL; - } -#endif - error = nd6_output_ifp(ifp, origifp, m, dst, - (struct route *)ro); -#ifdef RATELIMIT - /* check for route change */ - if (error == EAGAIN) - in_pcboutput_eagain(inp); -#endif + error = ip6_output_send(inp, ifp, origifp, m, dst, ro); goto done; } @@ -989,11 +1110,23 @@ passout: * XXX-BZ handle the hw offloading case. Need flags. */ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { + m = mb_unmapped_to_ext(m); + if (m == NULL) { + in6_ifstat_inc(ifp, ifs6_out_fragfail); + error = ENOBUFS; + goto bad; + } in6_delayed_cksum(m, plen, hlen); m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; } #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) { + m = mb_unmapped_to_ext(m); + if (m == NULL) { + in6_ifstat_inc(ifp, ifs6_out_fragfail); + error = ENOBUFS; + goto bad; + } sctp_delayed_cksum(m, hlen); m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6; } @@ -1046,23 +1179,7 @@ sendorfree: counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len); } -#ifdef RATELIMIT - if (inp != NULL) { - if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) - in_pcboutput_txrtlmt(inp, ifp, m); - /* stamp send tag on mbuf */ - m->m_pkthdr.snd_tag = inp->inp_snd_tag; - } else { - m->m_pkthdr.snd_tag = NULL; - } -#endif - error = nd6_output_ifp(ifp, origifp, m, dst, - (struct route *)ro); -#ifdef RATELIMIT - /* check for route change */ - if (error == EAGAIN) - in_pcboutput_eagain(inp); -#endif + error = ip6_output_send(inp, ifp, origifp, m, dst, ro); } else m_freem(m); } @@ -1390,7 +1507,7 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt) { int optdatalen, uproto; void *optdata; - struct inpcb *in6p = sotoinpcb(so); + struct inpcb *inp = sotoinpcb(so); int error, optval; int level, op, optname; int optlen; @@ -1425,43 +1542,43 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt) sopt->sopt_dir == SOPT_SET) { switch (sopt->sopt_name) { case SO_REUSEADDR: - INP_WLOCK(in6p); + INP_WLOCK(inp); if ((so->so_options & SO_REUSEADDR) != 0) - in6p->inp_flags2 |= INP_REUSEADDR; + inp->inp_flags2 |= INP_REUSEADDR; else - in6p->inp_flags2 &= ~INP_REUSEADDR; - INP_WUNLOCK(in6p); + inp->inp_flags2 &= ~INP_REUSEADDR; + INP_WUNLOCK(inp); error = 0; break; case SO_REUSEPORT: - INP_WLOCK(in6p); + INP_WLOCK(inp); if ((so->so_options & SO_REUSEPORT) != 0) - in6p->inp_flags2 |= INP_REUSEPORT; + inp->inp_flags2 |= INP_REUSEPORT; else - in6p->inp_flags2 &= ~INP_REUSEPORT; - INP_WUNLOCK(in6p); + inp->inp_flags2 &= ~INP_REUSEPORT; + INP_WUNLOCK(inp); error = 0; break; case SO_REUSEPORT_LB: - INP_WLOCK(in6p); + INP_WLOCK(inp); if ((so->so_options & SO_REUSEPORT_LB) != 0) - in6p->inp_flags2 |= INP_REUSEPORT_LB; + inp->inp_flags2 |= INP_REUSEPORT_LB; else - in6p->inp_flags2 &= ~INP_REUSEPORT_LB; - INP_WUNLOCK(in6p); + inp->inp_flags2 &= ~INP_REUSEPORT_LB; + INP_WUNLOCK(inp); error = 0; break; case SO_SETFIB: - INP_WLOCK(in6p); - in6p->inp_inc.inc_fibnum = so->so_fibnum; - INP_WUNLOCK(in6p); + INP_WLOCK(inp); + inp->inp_inc.inc_fibnum = so->so_fibnum; + INP_WUNLOCK(inp); error = 0; break; case SO_MAX_PACING_RATE: #ifdef RATELIMIT - INP_WLOCK(in6p); - in6p->inp_flags2 |= INP_RATE_LIMIT_CHANGED; - INP_WUNLOCK(in6p); + INP_WLOCK(inp); + inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED; + INP_WUNLOCK(inp); error = 0; #else error = EOPNOTSUPP; @@ -1495,7 +1612,7 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt) error = soopt_mcopyin(sopt, m); /* XXX */ if (error != 0) break; - error = ip6_pcbopts(&in6p->in6p_outputopts, + error = ip6_pcbopts(&inp->in6p_outputopts, m, so, sopt); m_freem(m); /* XXX */ break; @@ -1566,57 +1683,57 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt) error = EINVAL; else { /* -1 = kernel default */ - in6p->in6p_hops = optval; - if ((in6p->inp_vflag & + inp->in6p_hops = optval; + if ((inp->inp_vflag & INP_IPV4) != 0) - in6p->inp_ip_ttl = optval; + inp->inp_ip_ttl = optval; } break; #define OPTSET(bit) \ do { \ - INP_WLOCK(in6p); \ + INP_WLOCK(inp); \ if (optval) \ - in6p->inp_flags |= (bit); \ + inp->inp_flags |= (bit); \ else \ - in6p->inp_flags &= ~(bit); \ - INP_WUNLOCK(in6p); \ + inp->inp_flags &= ~(bit); \ + INP_WUNLOCK(inp); \ } while (/*CONSTCOND*/ 0) #define OPTSET2292(bit) \ do { \ - INP_WLOCK(in6p); \ - in6p->inp_flags |= IN6P_RFC2292; \ + INP_WLOCK(inp); \ + inp->inp_flags |= IN6P_RFC2292; \ if (optval) \ - in6p->inp_flags |= (bit); \ + inp->inp_flags |= (bit); \ else \ - in6p->inp_flags &= ~(bit); \ - INP_WUNLOCK(in6p); \ + inp->inp_flags &= ~(bit); \ + INP_WUNLOCK(inp); \ } while (/*CONSTCOND*/ 0) -#define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0) +#define OPTBIT(bit) (inp->inp_flags & (bit) ? 1 : 0) #define OPTSET2_N(bit, val) do { \ if (val) \ - in6p->inp_flags2 |= bit; \ + inp->inp_flags2 |= bit; \ else \ - in6p->inp_flags2 &= ~bit; \ + inp->inp_flags2 &= ~bit; \ } while (0) #define OPTSET2(bit, val) do { \ - INP_WLOCK(in6p); \ + INP_WLOCK(inp); \ OPTSET2_N(bit, val); \ - INP_WUNLOCK(in6p); \ + INP_WUNLOCK(inp); \ } while (0) -#define OPTBIT2(bit) (in6p->inp_flags2 & (bit) ? 1 : 0) +#define OPTBIT2(bit) (inp->inp_flags2 & (bit) ? 1 : 0) #define OPTSET2292_EXCLUSIVE(bit) \ do { \ - INP_WLOCK(in6p); \ + INP_WLOCK(inp); \ if (OPTBIT(IN6P_RFC2292)) { \ error = EINVAL; \ } else { \ if (optval) \ - in6p->inp_flags |= (bit); \ + inp->inp_flags |= (bit); \ else \ - in6p->inp_flags &= ~(bit); \ + inp->inp_flags &= ~(bit); \ } \ - INP_WUNLOCK(in6p); \ + INP_WUNLOCK(inp); \ } while (/*CONSTCOND*/ 0) case IPV6_RECVPKTINFO: @@ -1632,17 +1749,17 @@ do { \ error = EINVAL; break; } - INP_WLOCK(in6p); - if (in6p->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { - INP_WUNLOCK(in6p); + INP_WLOCK(inp); + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + INP_WUNLOCK(inp); return (ECONNRESET); } - optp = &in6p->in6p_outputopts; + optp = &inp->in6p_outputopts; error = ip6_pcbopt(IPV6_HOPLIMIT, (u_char *)&optval, sizeof(optval), optp, (td != NULL) ? td->td_ucred : NULL, uproto); - INP_WUNLOCK(in6p); + INP_WUNLOCK(inp); break; } @@ -1693,16 +1810,16 @@ do { \ * available only prior to bind(2). * see ipng mailing list, Jun 22 2001. */ - if (in6p->inp_lport || - !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) { + if (inp->inp_lport || + !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { error = EINVAL; break; } OPTSET(IN6P_IPV6_V6ONLY); if (optval) - in6p->inp_vflag &= ~INP_IPV4; + inp->inp_vflag &= ~INP_IPV4; else - in6p->inp_vflag |= INP_IPV4; + inp->inp_vflag |= INP_IPV4; break; case IPV6_RECVTCLASS: /* cannot mix with RFC2292 XXX */ @@ -1726,10 +1843,10 @@ do { \ case IPV6_RSS_LISTEN_BUCKET: if ((optval >= 0) && (optval < rss_getnumbuckets())) { - INP_WLOCK(in6p); - in6p->inp_rss_listen_bucket = optval; + INP_WLOCK(inp); + inp->inp_rss_listen_bucket = optval; OPTSET2_N(INP_RSS_BUCKET_SET, 1); - INP_WUNLOCK(in6p); + INP_WUNLOCK(inp); } else { error = EINVAL; } @@ -1752,17 +1869,17 @@ do { \ break; { struct ip6_pktopts **optp; - INP_WLOCK(in6p); - if (in6p->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { - INP_WUNLOCK(in6p); + INP_WLOCK(inp); + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + INP_WUNLOCK(inp); return (ECONNRESET); } - optp = &in6p->in6p_outputopts; + optp = &inp->in6p_outputopts; error = ip6_pcbopt(optname, (u_char *)&optval, sizeof(optval), optp, (td != NULL) ? td->td_ucred : NULL, uproto); - INP_WUNLOCK(in6p); + INP_WUNLOCK(inp); break; } @@ -1844,16 +1961,16 @@ do { \ break; optlen = sopt->sopt_valsize; optbuf = optbuf_storage; - INP_WLOCK(in6p); - if (in6p->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { - INP_WUNLOCK(in6p); + INP_WLOCK(inp); + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + INP_WUNLOCK(inp); return (ECONNRESET); } - optp = &in6p->in6p_outputopts; + optp = &inp->in6p_outputopts; error = ip6_pcbopt(optname, optbuf, optlen, optp, (td != NULL) ? td->td_ucred : NULL, uproto); - INP_WUNLOCK(in6p); + INP_WUNLOCK(inp); break; } #undef OPTSET @@ -1870,7 +1987,7 @@ do { \ case MCAST_LEAVE_GROUP: case MCAST_JOIN_SOURCE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: - error = ip6_setmoptions(in6p, sopt); + error = ip6_setmoptions(inp, sopt); break; case IPV6_PORTRANGE: @@ -1879,34 +1996,34 @@ do { \ if (error) break; - INP_WLOCK(in6p); + INP_WLOCK(inp); switch (optval) { case IPV6_PORTRANGE_DEFAULT: - in6p->inp_flags &= ~(INP_LOWPORT); - in6p->inp_flags &= ~(INP_HIGHPORT); + inp->inp_flags &= ~(INP_LOWPORT); + inp->inp_flags &= ~(INP_HIGHPORT); break; case IPV6_PORTRANGE_HIGH: - in6p->inp_flags &= ~(INP_LOWPORT); - in6p->inp_flags |= INP_HIGHPORT; + inp->inp_flags &= ~(INP_LOWPORT); + inp->inp_flags |= INP_HIGHPORT; break; case IPV6_PORTRANGE_LOW: - in6p->inp_flags &= ~(INP_HIGHPORT); - in6p->inp_flags |= INP_LOWPORT; + inp->inp_flags &= ~(INP_HIGHPORT); + inp->inp_flags |= INP_LOWPORT; break; default: error = EINVAL; break; } - INP_WUNLOCK(in6p); + INP_WUNLOCK(inp); break; #if defined(IPSEC) || defined(IPSEC_SUPPORT) case IPV6_IPSEC_POLICY: if (IPSEC_ENABLED(ipv6)) { - error = IPSEC_PCBCTL(ipv6, in6p, sopt); + error = IPSEC_PCBCTL(ipv6, inp, sopt); break; } /* FALLTHROUGH */ @@ -1974,7 +2091,7 @@ do { \ break; case IPV6_UNICAST_HOPS: - optval = in6p->in6p_hops; + optval = inp->in6p_hops; break; case IPV6_RECVPKTINFO: @@ -2000,7 +2117,7 @@ do { \ case IPV6_PORTRANGE: { int flags; - flags = in6p->inp_flags; + flags = inp->inp_flags; if (flags & INP_HIGHPORT) optval = IPV6_PORTRANGE_HIGH; else if (flags & INP_LOWPORT) @@ -2026,11 +2143,11 @@ do { \ break; case IPV6_FLOWID: - optval = in6p->inp_flowid; + optval = inp->inp_flowid; break; case IPV6_FLOWTYPE: - optval = in6p->inp_flowtype; + optval = inp->inp_flowtype; break; case IPV6_RECVFLOWID: @@ -2039,8 +2156,8 @@ do { \ #ifdef RSS case IPV6_RSSBUCKETID: retval = - rss_hash2bucket(in6p->inp_flowid, - in6p->inp_flowtype, + rss_hash2bucket(inp->inp_flowid, + inp->inp_flowtype, &rss_bucket); if (retval == 0) optval = rss_bucket; @@ -2076,12 +2193,12 @@ do { \ * XXX: we dot not consider the case of source * routing, or optional information to specify * the outgoing interface. - * Copy faddr out of in6p to avoid holding lock + * Copy faddr out of inp to avoid holding lock * on inp during route lookup. */ - INP_RLOCK(in6p); - bcopy(&in6p->in6p_faddr, &addr, sizeof(addr)); - INP_RUNLOCK(in6p); + INP_RLOCK(inp); + bcopy(&inp->in6p_faddr, &addr, sizeof(addr)); + INP_RUNLOCK(inp); error = ip6_getpmtu_ctl(so->so_fibnum, &addr, &pmtu); if (error) @@ -2133,20 +2250,20 @@ do { \ case IPV6_DONTFRAG: case IPV6_USE_MIN_MTU: case IPV6_PREFER_TEMPADDR: - error = ip6_getpcbopt(in6p, optname, sopt); + error = ip6_getpcbopt(inp, optname, sopt); break; case IPV6_MULTICAST_IF: case IPV6_MULTICAST_HOPS: case IPV6_MULTICAST_LOOP: case IPV6_MSFILTER: - error = ip6_getmoptions(in6p, sopt); + error = ip6_getmoptions(inp, sopt); break; #if defined(IPSEC) || defined(IPSEC_SUPPORT) case IPV6_IPSEC_POLICY: if (IPSEC_ENABLED(ipv6)) { - error = IPSEC_PCBCTL(ipv6, in6p, sopt); + error = IPSEC_PCBCTL(ipv6, inp, sopt); break; } /* FALLTHROUGH */ @@ -2166,7 +2283,7 @@ ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt) { int error = 0, optval, optlen; const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum); - struct inpcb *in6p = sotoinpcb(so); + struct inpcb *inp = sotoinpcb(so); int level, op, optname; level = sopt->sopt_level; @@ -2198,22 +2315,25 @@ ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt) sizeof(optval)); if (error) break; - if ((optval % 2) != 0) { - /* the API assumes even offset values */ + if (optval < -1 || (optval % 2) != 0) { + /* + * The API assumes non-negative even offset + * values or -1 as a special value. + */ error = EINVAL; } else if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) { if (optval != icmp6off) error = EINVAL; } else - in6p->in6p_cksum = optval; + inp->in6p_cksum = optval; break; case SOPT_GET: if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) optval = icmp6off; else - optval = in6p->in6p_cksum; + optval = inp->in6p_cksum; error = sooptcopyout(sopt, &optval, sizeof(optval)); break; @@ -2312,16 +2432,16 @@ ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt, #define GET_PKTOPT_VAR(field, lenexpr) do { \ if (pktopt && pktopt->field) { \ - INP_RUNLOCK(in6p); \ + INP_RUNLOCK(inp); \ optdata = malloc(sopt->sopt_valsize, M_TEMP, M_WAITOK); \ malloc_optdata = true; \ - INP_RLOCK(in6p); \ - if (in6p->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { \ - INP_RUNLOCK(in6p); \ + INP_RLOCK(inp); \ + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { \ + INP_RUNLOCK(inp); \ free(optdata, M_TEMP); \ return (ECONNRESET); \ } \ - pktopt = in6p->in6p_outputopts; \ + pktopt = inp->in6p_outputopts; \ if (pktopt && pktopt->field) { \ optdatalen = min(lenexpr, sopt->sopt_valsize); \ bcopy(&pktopt->field, optdata, optdatalen); \ @@ -2340,7 +2460,7 @@ ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt, pktopt->field->sa_len) static int -ip6_getpcbopt(struct inpcb *in6p, int optname, struct sockopt *sopt) +ip6_getpcbopt(struct inpcb *inp, int optname, struct sockopt *sopt) { void *optdata = NULL; bool malloc_optdata = false; @@ -2352,8 +2472,8 @@ ip6_getpcbopt(struct inpcb *in6p, int optname, struct sockopt *sopt) int defpreftemp = IP6PO_TEMPADDR_SYSTEM; struct ip6_pktopts *pktopt; - INP_RLOCK(in6p); - pktopt = in6p->in6p_outputopts; + INP_RLOCK(inp); + pktopt = inp->in6p_outputopts; switch (optname) { case IPV6_PKTINFO: @@ -2413,10 +2533,10 @@ ip6_getpcbopt(struct inpcb *in6p, int optname, struct sockopt *sopt) #ifdef DIAGNOSTIC panic("ip6_getpcbopt: unexpected option\n"); #endif - INP_RUNLOCK(in6p); + INP_RUNLOCK(inp); return (ENOPROTOOPT); } - INP_RUNLOCK(in6p); + INP_RUNLOCK(inp); error = sooptcopyout(sopt, optdata, optdatalen); if (malloc_optdata) @@ -3098,23 +3218,23 @@ ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs) * Compute IPv6 extension header length. */ int -ip6_optlen(struct inpcb *in6p) +ip6_optlen(struct inpcb *inp) { int len; - if (!in6p->in6p_outputopts) + if (!inp->in6p_outputopts) return 0; len = 0; #define elen(x) \ (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0) - len += elen(in6p->in6p_outputopts->ip6po_hbh); - if (in6p->in6p_outputopts->ip6po_rthdr) + len += elen(inp->in6p_outputopts->ip6po_hbh); + if (inp->in6p_outputopts->ip6po_rthdr) /* dest1 is valid with rthdr only */ - len += elen(in6p->in6p_outputopts->ip6po_dest1); - len += elen(in6p->in6p_outputopts->ip6po_rthdr); - len += elen(in6p->in6p_outputopts->ip6po_dest2); + len += elen(inp->in6p_outputopts->ip6po_dest1); + len += elen(inp->in6p_outputopts->ip6po_rthdr); + len += elen(inp->in6p_outputopts->ip6po_dest2); return len; #undef elen } diff --git a/freebsd/sys/netinet6/ip6_var.h b/freebsd/sys/netinet6/ip6_var.h index f235572d..be748b31 100644 --- a/freebsd/sys/netinet6/ip6_var.h +++ b/freebsd/sys/netinet6/ip6_var.h @@ -68,6 +68,7 @@ #include <sys/epoch.h> +struct ip6asfrag; /* * IP6 reassembly queue structure. Each fragment * being reassembled is attached to one of these structures. @@ -83,25 +84,10 @@ struct ip6q { struct ip6q *ip6q_next; struct ip6q *ip6q_prev; int ip6q_unfrglen; /* len of unfragmentable part */ -#ifdef notyet - u_char *ip6q_nxtp; -#endif int ip6q_nfrag; /* # of fragments */ struct label *ip6q_label; }; -struct ip6asfrag { - struct ip6asfrag *ip6af_down; - struct ip6asfrag *ip6af_up; - struct mbuf *ip6af_m; - int ip6af_offset; /* offset in ip6af_m to next header */ - int ip6af_frglen; /* fragmentable part length */ - int ip6af_off; /* fragment offset */ - u_int16_t ip6af_mff; /* more fragment bit in frag off */ -}; - -#define IP6_REASS_MBUF(ip6af) (*(struct mbuf **)&((ip6af)->ip6af_m)) - /* * IP6 reinjecting structure. */ @@ -110,6 +96,7 @@ struct ip6_direct_ctx { uint32_t ip6dc_off; /* offset to next header */ }; +#if defined(_NETINET6_IN6_VAR_H_) && defined(_KERNEL) /* * Structure attached to inpcb.in6p_moptions and * passed to ip6_output when IPv6 multicast options are in use. @@ -119,13 +106,11 @@ struct ip6_moptions { struct ifnet *im6o_multicast_ifp; /* ifp for outgoing multicasts */ u_char im6o_multicast_hlim; /* hoplimit for outgoing multicasts */ u_char im6o_multicast_loop; /* 1 >= hear sends if a member */ - u_short im6o_num_memberships; /* no. memberships this socket */ - u_short im6o_max_memberships; /* max memberships this socket */ - struct in6_multi **im6o_membership; /* group memberships */ - struct in6_mfilter *im6o_mfilters; /* source filters */ - struct epoch_context imo6_epoch_ctx; + struct ip6_mfilter_head im6o_head; /* group membership list */ }; - +#else +struct ip6_moptions; +#endif /* * Control options for outgoing packets */ @@ -208,6 +193,7 @@ struct ip6stat { uint64_t ip6s_localout; /* total ip packets generated here */ uint64_t ip6s_odropped; /* lost packets due to nobufs, etc. */ uint64_t ip6s_reassembled; /* total packets reassembled ok */ + uint64_t ip6s_atomicfrags; /* atomic fragments */ uint64_t ip6s_fragmented; /* datagrams successfully fragmented */ uint64_t ip6s_ofragments; /* output fragments created */ uint64_t ip6s_cantfrag; /* don't fragment flag was set, etc. */ @@ -299,12 +285,6 @@ VNET_DECLARE(int, ip6_v6only); VNET_DECLARE(struct socket *, ip6_mrouter); /* multicast routing daemon */ VNET_DECLARE(int, ip6_sendredirects); /* send IP redirects when forwarding? */ -VNET_DECLARE(int, ip6_maxfragpackets); /* Maximum packets in reassembly - * queue */ -extern int ip6_maxfrags; /* Maximum fragments in reassembly - * queue */ -VNET_DECLARE(int, ip6_maxfragbucketsize); /* Maximum reassembly queues per bucket */ -VNET_DECLARE(int, ip6_maxfragsperpacket); /* Maximum fragments per packet */ VNET_DECLARE(int, ip6_accept_rtadv); /* Acts as a host not a router */ VNET_DECLARE(int, ip6_no_radr); /* No defroute from RA */ VNET_DECLARE(int, ip6_norbit_raif); /* Disable R-bit in NA on RA @@ -318,9 +298,6 @@ VNET_DECLARE(int, ip6_hdrnestlimit); /* upper limit of # of extension VNET_DECLARE(int, ip6_dad_count); /* DupAddrDetectionTransmits */ #define V_ip6_mrouter VNET(ip6_mrouter) #define V_ip6_sendredirects VNET(ip6_sendredirects) -#define V_ip6_maxfragpackets VNET(ip6_maxfragpackets) -#define V_ip6_maxfragbucketsize VNET(ip6_maxfragbucketsize) -#define V_ip6_maxfragsperpacket VNET(ip6_maxfragsperpacket) #define V_ip6_accept_rtadv VNET(ip6_accept_rtadv) #define V_ip6_no_radr VNET(ip6_no_radr) #define V_ip6_norbit_raif VNET(ip6_norbit_raif) @@ -346,13 +323,20 @@ VNET_DECLARE(int, ip6_use_defzone); /* Whether to use the default scope * zone when unspecified */ #define V_ip6_use_defzone VNET(ip6_use_defzone) -VNET_DECLARE (struct pfil_head, inet6_pfil_hook); /* packet filter hooks */ -#define V_inet6_pfil_hook VNET(inet6_pfil_hook) +VNET_DECLARE(struct pfil_head *, inet6_pfil_head); +#define V_inet6_pfil_head VNET(inet6_pfil_head) +#define PFIL_INET6_NAME "inet6" + #ifdef IPSTEALTH VNET_DECLARE(int, ip6stealth); #define V_ip6stealth VNET(ip6stealth) #endif +#ifdef EXPERIMENTAL +VNET_DECLARE(int, nd6_ignore_ipv6_only_ra); +#define V_nd6_ignore_ipv6_only_ra VNET(nd6_ignore_ipv6_only_ra) +#endif + extern struct pr_usrreqs rip6_usrreqs; struct sockopt; @@ -407,7 +391,6 @@ int ip6_fragment(struct ifnet *, struct mbuf *, int, u_char, int, int route6_input(struct mbuf **, int *, int); -void frag6_set_bucketsize(void); void frag6_init(void); int frag6_input(struct mbuf **, int *, int); void frag6_slowtimo(void); diff --git a/freebsd/sys/netinet6/mld6.c b/freebsd/sys/netinet6/mld6.c index b00f03ef..cc946f67 100644 --- a/freebsd/sys/netinet6/mld6.c +++ b/freebsd/sys/netinet6/mld6.c @@ -112,7 +112,7 @@ static void mli_delete_locked(const struct ifnet *); static void mld_dispatch_packet(struct mbuf *); static void mld_dispatch_queue(struct mbufq *, int); static void mld_final_leave(struct in6_multi *, struct mld_ifsoftc *); -static void mld_fasttimo_vnet(void); +static void mld_fasttimo_vnet(struct in6_multi_head *inmh); static int mld_handle_state_change(struct in6_multi *, struct mld_ifsoftc *); static int mld_initial_join(struct in6_multi *, struct mld_ifsoftc *, @@ -141,14 +141,15 @@ static int mld_v2_enqueue_group_record(struct mbufq *, struct in6_multi *, const int, const int, const int, const int); static int mld_v2_input_query(struct ifnet *, const struct ip6_hdr *, - struct mbuf *, const int, const int); + struct mbuf *, struct mldv2_query *, const int, const int); static int mld_v2_merge_state_changes(struct in6_multi *, struct mbufq *); static void mld_v2_process_group_timers(struct in6_multi_head *, struct mbufq *, struct mbufq *, struct in6_multi *, const int); static int mld_v2_process_group_query(struct in6_multi *, - struct mld_ifsoftc *mli, int, struct mbuf *, const int); + struct mld_ifsoftc *mli, int, struct mbuf *, + struct mldv2_query *, const int); static int sysctl_mld_gsr(SYSCTL_HANDLER_ARGS); static int sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS); @@ -245,6 +246,10 @@ static int mld_v1enable = 1; SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RWTUN, &mld_v1enable, 0, "Enable fallback to MLDv1"); +static int mld_v2enable = 1; +SYSCTL_INT(_net_inet6_mld, OID_AUTO, v2enable, CTLFLAG_RWTUN, + &mld_v2enable, 0, "Enable MLDv2"); + static int mld_use_allow = 1; SYSCTL_INT(_net_inet6_mld, OID_AUTO, use_allow, CTLFLAG_RWTUN, &mld_use_allow, 0, "Use ALLOW/BLOCK for RFC 4604 SSM joins/leaves"); @@ -535,45 +540,48 @@ out: * XXX This routine is also bitten by unlocked ifma_protospec access. */ void -mld_ifdetach(struct ifnet *ifp) +mld_ifdetach(struct ifnet *ifp, struct in6_multi_head *inmh) { + struct epoch_tracker et; struct mld_ifsoftc *mli; - struct ifmultiaddr *ifma, *next; + struct ifmultiaddr *ifma; struct in6_multi *inm; - struct in6_multi_head inmh; CTR3(KTR_MLD, "%s: called for ifp %p(%s)", __func__, ifp, if_name(ifp)); - SLIST_INIT(&inmh); IN6_MULTI_LIST_LOCK_ASSERT(); MLD_LOCK(); mli = MLD_IFINFO(ifp); - if (mli->mli_version == MLD_VERSION_2) { - IF_ADDR_WLOCK(ifp); - restart: - CK_STAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) { - if (ifma->ifma_addr->sa_family != AF_INET6 || - ifma->ifma_protospec == NULL) - continue; - inm = (struct in6_multi *)ifma->ifma_protospec; - if (inm->in6m_state == MLD_LEAVING_MEMBER) { - in6m_disconnect(inm); - in6m_rele_locked(&inmh, inm); - ifma->ifma_protospec = NULL; - } + IF_ADDR_WLOCK(ifp); + /* + * Extract list of in6_multi associated with the detaching ifp + * which the PF_INET6 layer is about to release. + */ + NET_EPOCH_ENTER(et); + CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { + inm = in6m_ifmultiaddr_get_inm(ifma); + if (inm == NULL) + continue; + in6m_disconnect_locked(inmh, inm); + + if (mli->mli_version == MLD_VERSION_2) { in6m_clear_recorded(inm); - if (__predict_false(ifma6_restart)) { - ifma6_restart = false; - goto restart; + + /* + * We need to release the final reference held + * for issuing the INCLUDE {}. + */ + if (inm->in6m_state == MLD_LEAVING_MEMBER) { + inm->in6m_state = MLD_NOT_MEMBER; + in6m_rele_locked(inmh, inm); } } - IF_ADDR_WUNLOCK(ifp); } - + NET_EPOCH_EXIT(et); + IF_ADDR_WUNLOCK(ifp); MLD_UNLOCK(); - in6m_release_list_deferred(&inmh); } /* @@ -630,6 +638,7 @@ static int mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, /*const*/ struct mld_hdr *mld) { + struct epoch_tracker et; struct ifmultiaddr *ifma; struct mld_ifsoftc *mli; struct in6_multi *inm; @@ -697,7 +706,7 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, if (timer == 0) timer = 1; - IF_ADDR_RLOCK(ifp); + NET_EPOCH_ENTER(et); if (is_general_query) { /* * For each reporting group joined on this @@ -706,10 +715,9 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, CTR2(KTR_MLD, "process v1 general query on ifp %p(%s)", ifp, if_name(ifp)); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { - if (ifma->ifma_addr->sa_family != AF_INET6 || - ifma->ifma_protospec == NULL) + inm = in6m_ifmultiaddr_get_inm(ifma); + if (inm == NULL) continue; - inm = (struct in6_multi *)ifma->ifma_protospec; mld_v1_update_group(inm, timer); } } else { @@ -729,7 +737,7 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, in6_clearscope(&mld->mld_addr); } - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); MLD_UNLOCK(); IN6_MULTI_LIST_UNLOCK(); @@ -799,16 +807,16 @@ mld_v1_update_group(struct in6_multi *inm, const int timer) * Process a received MLDv2 general, group-specific or * group-and-source-specific query. * - * Assumes that the query header has been pulled up to sizeof(mldv2_query). + * Assumes that mld points to a struct mldv2_query which is stored in + * contiguous memory. * * Return 0 if successful, otherwise an appropriate error code is returned. */ static int mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, - struct mbuf *m, const int off, const int icmp6len) + struct mbuf *m, struct mldv2_query *mld, const int off, const int icmp6len) { struct mld_ifsoftc *mli; - struct mldv2_query *mld; struct in6_multi *inm; uint32_t maxdelay, nsrc, qqi; int is_general_query; @@ -818,7 +826,12 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, char ip6tbuf[INET6_ADDRSTRLEN]; #endif - is_general_query = 0; + if (!mld_v2enable) { + CTR3(KTR_MLD, "ignore v2 query src %s on ifp %p(%s)", + ip6_sprintf(ip6tbuf, &ip6->ip6_src), + ifp, if_name(ifp)); + return (0); + } /* * RFC3810 Section 6.2: MLD queries must originate from @@ -831,9 +844,9 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, return (0); } - CTR2(KTR_MLD, "input v2 query on ifp %p(%s)", ifp, if_name(ifp)); + is_general_query = 0; - mld = (struct mldv2_query *)(mtod(m, uint8_t *) + off); + CTR2(KTR_MLD, "input v2 query on ifp %p(%s)", ifp, if_name(ifp)); maxdelay = ntohs(mld->mld_maxdelay); /* in 1/10ths of a second */ if (maxdelay >= 32768) { @@ -926,6 +939,8 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, V_interface_timers_running6 = 1; } } else { + struct epoch_tracker et; + /* * MLDv2 Group-specific or Group-and-source-specific Query. * @@ -934,10 +949,10 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, * Queries for groups we are not a member of on this * link are simply ignored. */ - IF_ADDR_RLOCK(ifp); + NET_EPOCH_ENTER(et); inm = in6m_lookup_locked(ifp, &mld->mld_addr); if (inm == NULL) { - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); goto out_locked; } if (nsrc > 0) { @@ -945,7 +960,7 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, &V_mld_gsrdelay)) { CTR1(KTR_MLD, "%s: GS query throttled.", __func__); - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); goto out_locked; } } @@ -959,11 +974,11 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, * group-specific or group-and-source query. */ if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) - mld_v2_process_group_query(inm, mli, timer, m, off); + mld_v2_process_group_query(inm, mli, timer, m, mld, off); /* XXX Clear embedded scope ID as userland won't expect it. */ in6_clearscope(&mld->mld_addr); - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); } out_locked: @@ -980,9 +995,8 @@ out_locked: */ static int mld_v2_process_group_query(struct in6_multi *inm, struct mld_ifsoftc *mli, - int timer, struct mbuf *m0, const int off) + int timer, struct mbuf *m0, struct mldv2_query *mld, const int off) { - struct mldv2_query *mld; int retval; uint16_t nsrc; @@ -990,7 +1004,6 @@ mld_v2_process_group_query(struct in6_multi *inm, struct mld_ifsoftc *mli, MLD_LOCK_ASSERT(); retval = 0; - mld = (struct mldv2_query *)(mtod(m0, uint8_t *) + off); switch (inm->in6m_state) { case MLD_NOT_MEMBER: @@ -1010,6 +1023,15 @@ mld_v2_process_group_query(struct in6_multi *inm, struct mld_ifsoftc *mli, nsrc = ntohs(mld->mld_numsrc); + /* Length should be checked by calling function. */ + KASSERT((m0->m_flags & M_PKTHDR) == 0 || + m0->m_pkthdr.len >= off + sizeof(struct mldv2_query) + + nsrc * sizeof(struct in6_addr), + ("mldv2 packet is too short: (%d bytes < %zd bytes, m=%p)", + m0->m_pkthdr.len, off + sizeof(struct mldv2_query) + + nsrc * sizeof(struct in6_addr), m0)); + + /* * Deal with group-specific queries upfront. * If any group query is already pending, purge any recorded @@ -1051,28 +1073,20 @@ mld_v2_process_group_query(struct in6_multi *inm, struct mld_ifsoftc *mli, * report for those sources. */ if (inm->in6m_nsrc > 0) { - struct mbuf *m; - uint8_t *sp; + struct in6_addr srcaddr; int i, nrecorded; int soff; - m = m0; soff = off + sizeof(struct mldv2_query); nrecorded = 0; for (i = 0; i < nsrc; i++) { - sp = mtod(m, uint8_t *) + soff; - retval = in6m_record_source(inm, - (const struct in6_addr *)sp); + m_copydata(m0, soff, sizeof(struct in6_addr), + (caddr_t)&srcaddr); + retval = in6m_record_source(inm, &srcaddr); if (retval < 0) break; nrecorded += retval; soff += sizeof(struct in6_addr); - if (soff >= m->m_len) { - soff = soff - m->m_len; - m = m->m_next; - if (m == NULL) - break; - } } if (nrecorded > 0) { CTR1(KTR_MLD, @@ -1098,6 +1112,7 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6, /*const*/ struct mld_hdr *mld) { struct in6_addr src, dst; + struct epoch_tracker et; struct in6_ifaddr *ia; struct in6_multi *inm; #ifdef KTR @@ -1173,7 +1188,7 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6, IN6_MULTI_LIST_LOCK(); MLD_LOCK(); - IF_ADDR_RLOCK(ifp); + NET_EPOCH_ENTER(et); /* * MLDv1 report suppression. @@ -1221,7 +1236,7 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6, } out_locked: - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); MLD_UNLOCK(); IN6_MULTI_LIST_UNLOCK(); @@ -1281,8 +1296,8 @@ mld_input(struct mbuf *m, int off, int icmp6len) if (mld_v1_input_query(ifp, ip6, mld) != 0) return (0); } else if (icmp6len >= sizeof(struct mldv2_query)) { - if (mld_v2_input_query(ifp, ip6, m, off, - icmp6len) != 0) + if (mld_v2_input_query(ifp, ip6, m, + (struct mldv2_query *)mld, off, icmp6len) != 0) return (0); } break; @@ -1311,15 +1326,19 @@ mld_input(struct mbuf *m, int off, int icmp6len) void mld_fasttimo(void) { + struct in6_multi_head inmh; VNET_ITERATOR_DECL(vnet_iter); + SLIST_INIT(&inmh); + VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); - mld_fasttimo_vnet(); + mld_fasttimo_vnet(&inmh); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); + in6m_release_list_deferred(&inmh); } /* @@ -1328,15 +1347,15 @@ mld_fasttimo(void) * VIMAGE: Assume caller has set up our curvnet. */ static void -mld_fasttimo_vnet(void) +mld_fasttimo_vnet(struct in6_multi_head *inmh) { + struct epoch_tracker et; struct mbufq scq; /* State-change packets */ struct mbufq qrq; /* Query response packets */ struct ifnet *ifp; struct mld_ifsoftc *mli; - struct ifmultiaddr *ifma, *next; - struct in6_multi *inm, *tinm; - struct in6_multi_head inmh; + struct ifmultiaddr *ifma; + struct in6_multi *inm; int uri_fasthz; uri_fasthz = 0; @@ -1351,7 +1370,6 @@ mld_fasttimo_vnet(void) !V_state_change_timers_running6) return; - SLIST_INIT(&inmh); IN6_MULTI_LIST_LOCK(); MLD_LOCK(); @@ -1397,25 +1415,20 @@ mld_fasttimo_vnet(void) } IF_ADDR_WLOCK(ifp); - restart: - CK_STAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) { - if (ifma->ifma_addr->sa_family != AF_INET6 || - ifma->ifma_protospec == NULL) + NET_EPOCH_ENTER(et); + CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { + inm = in6m_ifmultiaddr_get_inm(ifma); + if (inm == NULL) continue; - inm = (struct in6_multi *)ifma->ifma_protospec; switch (mli->mli_version) { case MLD_VERSION_1: - mld_v1_process_group_timer(&inmh, inm); + mld_v1_process_group_timer(inmh, inm); break; case MLD_VERSION_2: - mld_v2_process_group_timers(&inmh, &qrq, + mld_v2_process_group_timers(inmh, &qrq, &scq, inm, uri_fasthz); break; } - if (__predict_false(ifma6_restart)) { - ifma6_restart = false; - goto restart; - } } IF_ADDR_WUNLOCK(ifp); @@ -1429,9 +1442,8 @@ mld_fasttimo_vnet(void) * IF_ADDR_LOCK internally as well as * ip6_output() to transmit a packet. */ - SLIST_FOREACH_SAFE(inm, &inmh, in6m_nrele, tinm) { - SLIST_REMOVE_HEAD(&inmh, - in6m_nrele); + while ((inm = SLIST_FIRST(inmh)) != NULL) { + SLIST_REMOVE_HEAD(inmh, in6m_defer); (void)mld_v1_transmit_report(inm, MLD_LISTENER_REPORT); } @@ -1439,14 +1451,9 @@ mld_fasttimo_vnet(void) case MLD_VERSION_2: mld_dispatch_queue(&qrq, 0); mld_dispatch_queue(&scq, 0); - - /* - * Free the in_multi reference(s) for - * this lifecycle. - */ - in6m_release_list_deferred(&inmh); break; } + NET_EPOCH_EXIT(et); } out_locked: @@ -1486,8 +1493,7 @@ mld_v1_process_group_timer(struct in6_multi_head *inmh, struct in6_multi *inm) case MLD_REPORTING_MEMBER: if (report_timer_expired) { inm->in6m_state = MLD_IDLE_MEMBER; - in6m_disconnect(inm); - in6m_rele_locked(inmh, inm); + SLIST_INSERT_HEAD(inmh, inm, in6m_defer); } break; case MLD_G_QUERY_PENDING_MEMBER: @@ -1611,7 +1617,7 @@ mld_v2_process_group_timers(struct in6_multi_head *inmh, if (inm->in6m_state == MLD_LEAVING_MEMBER && inm->in6m_scrv == 0) { inm->in6m_state = MLD_NOT_MEMBER; - in6m_disconnect(inm); + in6m_disconnect_locked(inmh, inm); in6m_rele_locked(inmh, inm); } } @@ -1656,10 +1662,11 @@ mld_set_version(struct mld_ifsoftc *mli, const int version) static void mld_v2_cancel_link_timers(struct mld_ifsoftc *mli) { - struct ifmultiaddr *ifma, *next; + struct epoch_tracker et; + struct in6_multi_head inmh; + struct ifmultiaddr *ifma; struct ifnet *ifp; struct in6_multi *inm; - struct in6_multi_head inmh; CTR3(KTR_MLD, "%s: cancel v2 timers on ifp %p(%s)", __func__, mli->mli_ifp, if_name(mli->mli_ifp)); @@ -1682,12 +1689,11 @@ mld_v2_cancel_link_timers(struct mld_ifsoftc *mli) ifp = mli->mli_ifp; IF_ADDR_WLOCK(ifp); - restart: - CK_STAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) { - if (ifma->ifma_addr->sa_family != AF_INET6 || - ifma->ifma_protospec == NULL) + NET_EPOCH_ENTER(et); + CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { + inm = in6m_ifmultiaddr_get_inm(ifma); + if (inm == NULL) continue; - inm = (struct in6_multi *)ifma->ifma_protospec; switch (inm->in6m_state) { case MLD_NOT_MEMBER: case MLD_SILENT_MEMBER: @@ -1702,9 +1708,9 @@ mld_v2_cancel_link_timers(struct mld_ifsoftc *mli) * version, we need to release the final * reference held for issuing the INCLUDE {}. */ - in6m_disconnect(inm); + if (inm->in6m_refcount == 1) + in6m_disconnect_locked(&inmh, inm); in6m_rele_locked(&inmh, inm); - ifma->ifma_protospec = NULL; /* FALLTHROUGH */ case MLD_G_QUERY_PENDING_MEMBER: case MLD_SG_QUERY_PENDING_MEMBER: @@ -1720,11 +1726,8 @@ mld_v2_cancel_link_timers(struct mld_ifsoftc *mli) mbufq_drain(&inm->in6m_scq); break; } - if (__predict_false(ifma6_restart)) { - ifma6_restart = false; - goto restart; - } } + NET_EPOCH_EXIT(et); IF_ADDR_WUNLOCK(ifp); in6m_release_list_deferred(&inmh); } @@ -1897,6 +1900,14 @@ mld_change_state(struct in6_multi *inm, const int delay) error = 0; /* + * Check if the in6_multi has already been disconnected. + */ + if (inm->in6m_ifp == NULL) { + CTR1(KTR_MLD, "%s: inm is disconnected", __func__); + return (0); + } + + /* * Try to detect if the upper layer just asked us to change state * for an interface which has now gone away. */ @@ -2006,6 +2017,7 @@ mld_initial_join(struct in6_multi *inm, struct mld_ifsoftc *mli, if (mli->mli_version == MLD_VERSION_2 && inm->in6m_state == MLD_LEAVING_MEMBER) { inm->in6m_refcount--; + MPASS(inm->in6m_refcount > 0); } inm->in6m_state = MLD_REPORTING_MEMBER; @@ -2985,6 +2997,7 @@ mld_v2_merge_state_changes(struct in6_multi *inm, struct mbufq *scq) static void mld_v2_dispatch_general_query(struct mld_ifsoftc *mli) { + struct epoch_tracker et; struct ifmultiaddr *ifma; struct ifnet *ifp; struct in6_multi *inm; @@ -3007,13 +3020,11 @@ mld_v2_dispatch_general_query(struct mld_ifsoftc *mli) ifp = mli->mli_ifp; - IF_ADDR_RLOCK(ifp); + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { - if (ifma->ifma_addr->sa_family != AF_INET6 || - ifma->ifma_protospec == NULL) + inm = in6m_ifmultiaddr_get_inm(ifma); + if (inm == NULL) continue; - - inm = (struct in6_multi *)ifma->ifma_protospec; KASSERT(ifp == inm->in6m_ifp, ("%s: inconsistent ifp", __func__)); @@ -3038,7 +3049,7 @@ mld_v2_dispatch_general_query(struct mld_ifsoftc *mli) break; } } - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); send: mld_dispatch_queue(&mli->mli_gq, MLD_MAX_RESPONSE_BURST); diff --git a/freebsd/sys/netinet6/mld6_var.h b/freebsd/sys/netinet6/mld6_var.h index 166c2055..8dc2ffa4 100644 --- a/freebsd/sys/netinet6/mld6_var.h +++ b/freebsd/sys/netinet6/mld6_var.h @@ -160,12 +160,13 @@ struct mld_ifsoftc { #define MLD_IFINFO(ifp) \ (((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->mld_ifinfo) +struct in6_multi_head; int mld_change_state(struct in6_multi *, const int); struct mld_ifsoftc * mld_domifattach(struct ifnet *); void mld_domifdetach(struct ifnet *); void mld_fasttimo(void); -void mld_ifdetach(struct ifnet *); +void mld_ifdetach(struct ifnet *, struct in6_multi_head *); int mld_input(struct mbuf *, int, int); void mld_slowtimo(void); diff --git a/freebsd/sys/netinet6/nd6.c b/freebsd/sys/netinet6/nd6.c index f065815c..201b4d40 100644 --- a/freebsd/sys/netinet6/nd6.c +++ b/freebsd/sys/netinet6/nd6.c @@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> +#include <sys/eventhandler.h> #include <sys/callout.h> #include <sys/lock.h> #include <sys/malloc.h> @@ -115,7 +116,7 @@ VNET_DEFINE(int, nd6_debug) = 1; VNET_DEFINE(int, nd6_debug) = 0; #endif -static eventhandler_tag lle_event_eh, iflladdr_event_eh; +static eventhandler_tag lle_event_eh, iflladdr_event_eh, ifnet_link_event_eh; VNET_DEFINE(struct nd_drhead, nd_defrouter); VNET_DEFINE(struct nd_prhead, nd_prefix); @@ -235,6 +236,8 @@ nd6_init(void) NULL, EVENTHANDLER_PRI_ANY); iflladdr_event_eh = EVENTHANDLER_REGISTER(iflladdr_event, nd6_iflladdr, NULL, EVENTHANDLER_PRI_ANY); + ifnet_link_event_eh = EVENTHANDLER_REGISTER(ifnet_link_event, + nd6_ifnet_link_event, NULL, EVENTHANDLER_PRI_ANY); } } @@ -246,6 +249,7 @@ nd6_destroy() callout_drain(&V_nd6_slowtimo_ch); callout_drain(&V_nd6_timer_ch); if (IS_DEFAULT_VNET(curvnet)) { + EVENTHANDLER_DEREGISTER(ifnet_link_event, ifnet_link_event_eh); EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh); EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_event_eh); } @@ -300,9 +304,10 @@ nd6_ifattach(struct ifnet *ifp) void nd6_ifdetach(struct ifnet *ifp, struct nd_ifinfo *nd) { + struct epoch_tracker et; struct ifaddr *ifa, *next; - IF_ADDR_RLOCK(ifp); + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; @@ -310,7 +315,7 @@ nd6_ifdetach(struct ifnet *ifp, struct nd_ifinfo *nd) /* stop DAD processing */ nd6_dad_stop(ifa); } - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); free(nd, M_IP6NDP); } @@ -898,6 +903,7 @@ nd6_timer(void *arg) struct nd_prhead prl; struct nd_defrouter *dr, *ndr; struct nd_prefix *pr, *npr; + struct ifnet *ifp; struct in6_ifaddr *ia6, *nia6; uint64_t genid; @@ -994,14 +1000,15 @@ nd6_timer(void *arg) * Check status of the interface. If it is down, * mark the address as tentative for future DAD. */ - if ((ia6->ia_ifp->if_flags & IFF_UP) == 0 || - (ia6->ia_ifp->if_drv_flags & IFF_DRV_RUNNING) - == 0 || - (ND_IFINFO(ia6->ia_ifp)->flags & - ND6_IFF_IFDISABLED) != 0) { + ifp = ia6->ia_ifp; + if ((ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD) == 0 && + ((ifp->if_flags & IFF_UP) == 0 || + (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || + (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) != 0)){ ia6->ia6_flags &= ~IN6_IFF_DUPLICATED; ia6->ia6_flags |= IN6_IFF_TENTATIVE; } + /* * A new RA might have made a deprecated address * preferred. @@ -1064,12 +1071,13 @@ restart: static int regen_tmpaddr(struct in6_ifaddr *ia6) { + struct epoch_tracker et; struct ifaddr *ifa; struct ifnet *ifp; struct in6_ifaddr *public_ifa6 = NULL; ifp = ia6->ia_ifa.ifa_ifp; - IF_ADDR_RLOCK(ifp); + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct in6_ifaddr *it6; @@ -1110,7 +1118,7 @@ regen_tmpaddr(struct in6_ifaddr *ia6) } if (public_ifa6 != NULL) ifa_ref(&public_ifa6->ia_ifa); - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); if (public_ifa6 != NULL) { int e; @@ -1345,17 +1353,19 @@ restart: * a p2p interface, the address should be a neighbor. */ if (ifp->if_flags & IFF_POINTOPOINT) { - IF_ADDR_RLOCK(ifp); + struct epoch_tracker et; + + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sin6_family) continue; if (ifa->ifa_dstaddr != NULL && sa_equal(addr, ifa->ifa_dstaddr)) { - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); return 1; } } - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); } /* @@ -1379,6 +1389,7 @@ restart: int nd6_is_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp) { + struct epoch_tracker et; struct llentry *lle; int rc = 0; @@ -1390,12 +1401,12 @@ nd6_is_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp) * Even if the address matches none of our addresses, it might be * in the neighbor cache. */ - IF_AFDATA_RLOCK(ifp); + NET_EPOCH_ENTER(et); if ((lle = nd6_lookup(&addr->sin6_addr, 0, ifp)) != NULL) { LLE_RUNLOCK(lle); rc = 1; } - IF_AFDATA_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); return (rc); } @@ -1624,6 +1635,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) struct in6_ndireq *ndi = (struct in6_ndireq *)data; struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data; struct in6_ndifreq *ndif = (struct in6_ndifreq *)data; + struct epoch_tracker et; int error = 0; if (ifp->if_afdata[AF_INET6] == NULL) @@ -1688,7 +1700,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) * do not clear ND6_IFF_IFDISABLED. * See RFC 4862, Section 5.4.5. */ - IF_ADDR_RLOCK(ifp); + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; @@ -1697,7 +1709,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) break; } - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); if (ifa != NULL) { /* LLA is duplicated. */ @@ -1718,7 +1730,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED; if (V_ip6_dad_count > 0 && (ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD) == 0) { - IF_ADDR_RLOCK(ifp); + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != @@ -1727,7 +1739,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) ia = (struct in6_ifaddr *)ifa; ia->ia6_flags |= IN6_IFF_TENTATIVE; } - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); } } @@ -1746,7 +1758,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) * address is assigned, and IFF_UP, try to * assign one. */ - IF_ADDR_RLOCK(ifp); + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != @@ -1756,7 +1768,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) break; } - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); if (ifa != NULL) /* No LLA is configured. */ in6_ifattach(ifp, NULL); @@ -1833,9 +1845,9 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) if ((error = in6_setscope(&nb_addr, ifp, NULL)) != 0) return (error); - IF_AFDATA_RLOCK(ifp); + NET_EPOCH_ENTER(et); ln = nd6_lookup(&nb_addr, 0, ifp); - IF_AFDATA_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); if (ln == NULL) { error = EINVAL; @@ -1960,6 +1972,7 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr, int flags; uint16_t router = 0; struct sockaddr_in6 sin6; + struct epoch_tracker et; struct mbuf *chain = NULL; u_char linkhdr[LLE_MAX_LINKHDR]; size_t linkhdrsize; @@ -1984,9 +1997,9 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr, * description on it in NS section (RFC 2461 7.2.3). */ flags = lladdr ? LLE_EXCLUSIVE : 0; - IF_AFDATA_RLOCK(ifp); + NET_EPOCH_ENTER(et); ln = nd6_lookup(from, flags, ifp); - IF_AFDATA_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); is_newentry = 0; if (ln == NULL) { flags |= LLE_EXCLUSIVE; @@ -2128,13 +2141,14 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr, static void nd6_slowtimo(void *arg) { + struct epoch_tracker et; CURVNET_SET((struct vnet *) arg); struct nd_ifinfo *nd6if; struct ifnet *ifp; callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz, nd6_slowtimo, curvnet); - IFNET_RLOCK_NOSLEEP(); + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (ifp->if_afdata[AF_INET6] == NULL) continue; @@ -2151,7 +2165,7 @@ nd6_slowtimo(void *arg) nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable); } } - IFNET_RUNLOCK_NOSLEEP(); + NET_EPOCH_EXIT(et); CURVNET_RESTORE(); } @@ -2244,6 +2258,7 @@ nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m, const struct sockaddr *sa_dst, u_char *desten, uint32_t *pflags, struct llentry **plle) { + struct epoch_tracker et; struct llentry *ln = NULL; const struct sockaddr_in6 *dst6; @@ -2272,7 +2287,7 @@ nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m, } } - IF_AFDATA_RLOCK(ifp); + NET_EPOCH_ENTER(et); ln = nd6_lookup(&dst6->sin6_addr, plle ? LLE_EXCLUSIVE : LLE_UNLOCKED, ifp); if (ln != NULL && (ln->r_flags & RLLE_VALID) != 0) { @@ -2292,11 +2307,11 @@ nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m, *plle = ln; LLE_WUNLOCK(ln); } - IF_AFDATA_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); return (0); } else if (plle && ln) LLE_WUNLOCK(ln); - IF_AFDATA_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); return (nd6_resolve_slow(ifp, 0, m, dst6, desten, pflags, plle)); } @@ -2330,9 +2345,11 @@ nd6_resolve_slow(struct ifnet *ifp, int flags, struct mbuf *m, * or an anycast address(i.e. not a multicast). */ if (lle == NULL) { - IF_AFDATA_RLOCK(ifp); + struct epoch_tracker et; + + NET_EPOCH_ENTER(et); lle = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp); - IF_AFDATA_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); if ((lle == NULL) && nd6_is_addr_neighbor(dst, ifp)) { /* * Since nd6_is_addr_neighbor() internally calls nd6_lookup(), diff --git a/freebsd/sys/netinet6/nd6.h b/freebsd/sys/netinet6/nd6.h index 7544d23c..ffc88cb5 100644 --- a/freebsd/sys/netinet6/nd6.h +++ b/freebsd/sys/netinet6/nd6.h @@ -91,7 +91,10 @@ struct nd_ifinfo { #define ND6_IFF_NO_PREFER_IFACE 0x80 /* XXX: not related to ND. */ #define ND6_IFF_NO_DAD 0x100 #ifdef EXPERIMENTAL +/* XXX: not related to ND. */ #define ND6_IFF_IPV6_ONLY 0x200 /* draft-ietf-6man-ipv6only-flag */ +#define ND6_IFF_IPV6_ONLY_MANUAL 0x400 +#define ND6_IFF_IPV6_ONLY_MASK (ND6_IFF_IPV6_ONLY|ND6_IFF_IPV6_ONLY_MANUAL) #endif #ifdef _KERNEL @@ -473,6 +476,7 @@ void nd6_dad_stop(struct ifaddr *); /* nd6_rtr.c */ void nd6_rs_input(struct mbuf *, int, int); void nd6_ra_input(struct mbuf *, int, int); +void nd6_ifnet_link_event(void *, struct ifnet *, int); void defrouter_reset(void); void defrouter_select_fib(int fibnum); void defrouter_select(void); diff --git a/freebsd/sys/netinet6/nd6_nbr.c b/freebsd/sys/netinet6/nd6_nbr.c index 49810020..136fbecc 100644 --- a/freebsd/sys/netinet6/nd6_nbr.c +++ b/freebsd/sys/netinet6/nd6_nbr.c @@ -43,6 +43,7 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> +#include <sys/eventhandler.h> #include <sys/malloc.h> #include <sys/libkern.h> #include <sys/lock.h> @@ -615,6 +616,7 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *saddr6, void nd6_na_input(struct mbuf *m, int off, int icmp6len) { + struct epoch_tracker et; struct ifnet *ifp = m->m_pkthdr.rcvif; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct nd_neighbor_advert *nd_na; @@ -742,9 +744,9 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) * If no neighbor cache entry is found, NA SHOULD silently be * discarded. */ - IF_AFDATA_RLOCK(ifp); + NET_EPOCH_ENTER(et); ln = nd6_lookup(&taddr6, LLE_EXCLUSIVE, ifp); - IF_AFDATA_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); if (ln == NULL) { goto freeit; } diff --git a/freebsd/sys/netinet6/nd6_rtr.c b/freebsd/sys/netinet6/nd6_rtr.c index 59868383..0ba1e416 100644 --- a/freebsd/sys/netinet6/nd6_rtr.c +++ b/freebsd/sys/netinet6/nd6_rtr.c @@ -108,6 +108,10 @@ VNET_DEFINE(u_int32_t, ip6_temp_valid_lifetime) = DEF_TEMP_VALID_LIFETIME; VNET_DEFINE(int, ip6_temp_regen_advance) = TEMPADDR_REGEN_ADVANCE; +#ifdef EXPERIMENTAL +VNET_DEFINE(int, nd6_ignore_ipv6_only_ra) = 1; +#endif + /* RTPREF_MEDIUM has to be 0! */ #define RTPREF_HIGH 1 #define RTPREF_MEDIUM 0 @@ -210,7 +214,7 @@ nd6_rs_input(struct mbuf *m, int off, int icmp6len) /* * An initial update routine for draft-ietf-6man-ipv6only-flag. * We need to iterate over all default routers for the given - * interface to see whether they are all advertising the "6" + * interface to see whether they are all advertising the "S" * (IPv6-Only) flag. If they do set, otherwise unset, the * interface flag we later use to filter on. */ @@ -218,7 +222,15 @@ static void defrtr_ipv6_only_ifp(struct ifnet *ifp) { struct nd_defrouter *dr; - bool ipv6_only; + bool ipv6_only, ipv6_only_old; +#ifdef INET + struct epoch_tracker et; + struct ifaddr *ifa; + bool has_ipv4_addr; +#endif + + if (V_nd6_ignore_ipv6_only_ra != 0) + return; ipv6_only = true; ND6_RLOCK(); @@ -229,13 +241,78 @@ defrtr_ipv6_only_ifp(struct ifnet *ifp) ND6_RUNLOCK(); IF_AFDATA_WLOCK(ifp); + ipv6_only_old = ND_IFINFO(ifp)->flags & ND6_IFF_IPV6_ONLY; + IF_AFDATA_WUNLOCK(ifp); + + /* If nothing changed, we have an early exit. */ + if (ipv6_only == ipv6_only_old) + return; + +#ifdef INET + /* + * Should we want to set the IPV6-ONLY flag, check if the + * interface has a non-0/0 and non-link-local IPv4 address + * configured on it. If it has we will assume working + * IPv4 operations and will clear the interface flag. + */ + has_ipv4_addr = false; + if (ipv6_only) { + NET_EPOCH_ENTER(et); + CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET) + continue; + if (in_canforward( + satosin(ifa->ifa_addr)->sin_addr)) { + has_ipv4_addr = true; + break; + } + } + NET_EPOCH_EXIT(et); + } + if (ipv6_only && has_ipv4_addr) { + log(LOG_NOTICE, "%s rcvd RA w/ IPv6-Only flag set but has IPv4 " + "configured, ignoring IPv6-Only flag.\n", ifp->if_xname); + ipv6_only = false; + } +#endif + + IF_AFDATA_WLOCK(ifp); if (ipv6_only) ND_IFINFO(ifp)->flags |= ND6_IFF_IPV6_ONLY; else ND_IFINFO(ifp)->flags &= ~ND6_IFF_IPV6_ONLY; IF_AFDATA_WUNLOCK(ifp); + +#ifdef notyet + /* Send notification of flag change. */ +#endif +} + +static void +defrtr_ipv6_only_ipf_down(struct ifnet *ifp) +{ + + IF_AFDATA_WLOCK(ifp); + ND_IFINFO(ifp)->flags &= ~ND6_IFF_IPV6_ONLY; + IF_AFDATA_WUNLOCK(ifp); } +#endif /* EXPERIMENTAL */ + +void +nd6_ifnet_link_event(void *arg __unused, struct ifnet *ifp, int linkstate) +{ + + /* + * XXX-BZ we might want to trigger re-evaluation of our default router + * availability. E.g., on link down the default router might be + * unreachable but a different interface might still have connectivity. + */ + +#ifdef EXPERIMENTAL + if (linkstate == LINK_STATE_DOWN) + defrtr_ipv6_only_ipf_down(ifp); #endif +} /* * Receive Router Advertisement Message. @@ -513,11 +590,13 @@ nd6_rtmsg(int cmd, struct rtentry *rt) info.rti_info[RTAX_NETMASK] = rt_mask(rt); ifp = rt->rt_ifp; if (ifp != NULL) { - IF_ADDR_RLOCK(ifp); + struct epoch_tracker et; + + NET_EPOCH_ENTER(et); ifa = CK_STAILQ_FIRST(&ifp->if_addrhead); info.rti_info[RTAX_IFP] = ifa->ifa_addr; ifa_ref(ifa); - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; } else ifa = NULL; @@ -791,6 +870,7 @@ defrouter_del(struct nd_defrouter *dr) void defrouter_select_fib(int fibnum) { + struct epoch_tracker et; struct nd_defrouter *dr, *selected_dr, *installed_dr; struct llentry *ln = NULL; @@ -817,14 +897,14 @@ defrouter_select_fib(int fibnum) */ selected_dr = installed_dr = NULL; TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { - IF_AFDATA_RLOCK(dr->ifp); + NET_EPOCH_ENTER(et); if (selected_dr == NULL && dr->ifp->if_fib == fibnum && (ln = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) && ND6_IS_LLINFO_PROBREACH(ln)) { selected_dr = dr; defrouter_ref(selected_dr); } - IF_AFDATA_RUNLOCK(dr->ifp); + NET_EPOCH_EXIT(et); if (ln != NULL) { LLE_RUNLOCK(ln); ln = NULL; @@ -868,7 +948,7 @@ defrouter_select_fib(int fibnum) } } } else if (installed_dr != NULL) { - IF_AFDATA_RLOCK(installed_dr->ifp); + NET_EPOCH_ENTER(et); if ((ln = nd6_lookup(&installed_dr->rtaddr, 0, installed_dr->ifp)) && ND6_IS_LLINFO_PROBREACH(ln) && @@ -877,7 +957,7 @@ defrouter_select_fib(int fibnum) defrouter_rele(selected_dr); selected_dr = installed_dr; } - IF_AFDATA_RUNLOCK(installed_dr->ifp); + NET_EPOCH_EXIT(et); if (ln != NULL) LLE_RUNLOCK(ln); } @@ -1273,6 +1353,7 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr, int auth; struct in6_addrlifetime lt6_tmp; char ip6buf[INET6_ADDRSTRLEN]; + struct epoch_tracker et; auth = 0; if (m) { @@ -1386,7 +1467,7 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr, * consider autoconfigured addresses while RFC2462 simply said * "address". */ - IF_ADDR_RLOCK(ifp); + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct in6_ifaddr *ifa6; u_int32_t remaininglifetime; @@ -1509,7 +1590,7 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr, ifa6->ia6_lifetime = lt6_tmp; ifa6->ia6_updatetime = time_uptime; } - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); if (ia6_match == NULL && new->ndpr_vltime) { int ifidlen; @@ -1598,6 +1679,7 @@ end: static struct nd_pfxrouter * find_pfxlist_reachable_router(struct nd_prefix *pr) { + struct epoch_tracker et; struct nd_pfxrouter *pfxrtr; struct llentry *ln; int canreach; @@ -1605,9 +1687,9 @@ find_pfxlist_reachable_router(struct nd_prefix *pr) ND6_LOCK_ASSERT(); LIST_FOREACH(pfxrtr, &pr->ndpr_advrtrs, pfr_entry) { - IF_AFDATA_RLOCK(pfxrtr->router->ifp); + NET_EPOCH_ENTER(et); ln = nd6_lookup(&pfxrtr->router->rtaddr, 0, pfxrtr->router->ifp); - IF_AFDATA_RUNLOCK(pfxrtr->router->ifp); + NET_EPOCH_EXIT(et); if (ln == NULL) continue; canreach = ND6_IS_LLINFO_PROBREACH(ln); @@ -1814,8 +1896,7 @@ restart: static int nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa) { - static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; - struct rib_head *rnh; + struct sockaddr_dl sdl; struct rtentry *rt; struct sockaddr_in6 mask6; u_long rtflags; @@ -1830,6 +1911,12 @@ nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa) mask6.sin6_addr = pr->ndpr_mask; rtflags = (ifa->ifa_flags & ~IFA_RTSELF) | RTF_UP; + bzero(&sdl, sizeof(struct sockaddr_dl)); + sdl.sdl_len = sizeof(struct sockaddr_dl); + sdl.sdl_family = AF_LINK; + sdl.sdl_type = ifa->ifa_ifp->if_type; + sdl.sdl_index = ifa->ifa_ifp->if_index; + if(V_rt_add_addr_allfibs) { fibnum = 0; maxfib = rt_numfibs; @@ -1842,26 +1929,13 @@ nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa) rt = NULL; error = in6_rtrequest(RTM_ADD, - (struct sockaddr *)&pr->ndpr_prefix, ifa->ifa_addr, + (struct sockaddr *)&pr->ndpr_prefix, (struct sockaddr *)&sdl, (struct sockaddr *)&mask6, rtflags, &rt, fibnum); if (error == 0) { KASSERT(rt != NULL, ("%s: in6_rtrequest return no " "error(%d) but rt is NULL, pr=%p, ifa=%p", __func__, error, pr, ifa)); - - rnh = rt_tables_get_rnh(rt->rt_fibnum, AF_INET6); - /* XXX what if rhn == NULL? */ - RIB_WLOCK(rnh); RT_LOCK(rt); - if (rt_setgate(rt, rt_key(rt), - (struct sockaddr *)&null_sdl) == 0) { - struct sockaddr_dl *dl; - - dl = (struct sockaddr_dl *)rt->rt_gateway; - dl->sdl_type = rt->rt_ifp->if_type; - dl->sdl_index = rt->rt_ifp->if_index; - } - RIB_WUNLOCK(rnh); nd6_rtmsg(RTM_ADD, rt); RT_UNLOCK(rt); pr->ndpr_stateflags |= NDPRF_ONLINK; @@ -1946,15 +2020,17 @@ nd6_prefix_onlink(struct nd_prefix *pr) ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY | IN6_IFF_ANYCAST); if (ifa == NULL) { + struct epoch_tracker et; + /* XXX: freebsd does not have ifa_ifwithaf */ - IF_ADDR_RLOCK(ifp); + NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family == AF_INET6) { ifa_ref(ifa); break; } } - IF_ADDR_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); /* should we care about ia6_flags? */ } if (ifa == NULL) { diff --git a/freebsd/sys/netinet6/raw_ip6.c b/freebsd/sys/netinet6/raw_ip6.c index 73d0832a..b4aa9664 100644 --- a/freebsd/sys/netinet6/raw_ip6.c +++ b/freebsd/sys/netinet6/raw_ip6.c @@ -163,7 +163,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto) struct ifnet *ifp; struct mbuf *m = *mp; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); - struct inpcb *in6p; + struct inpcb *inp; struct inpcb *last = NULL; struct mbuf *opts = NULL; struct sockaddr_in6 fromsa; @@ -176,18 +176,18 @@ rip6_input(struct mbuf **mp, int *offp, int proto) ifp = m->m_pkthdr.rcvif; INP_INFO_RLOCK_ET(&V_ripcbinfo, et); - CK_LIST_FOREACH(in6p, &V_ripcb, inp_list) { + CK_LIST_FOREACH(inp, &V_ripcb, inp_list) { /* XXX inp locking */ - if ((in6p->inp_vflag & INP_IPV6) == 0) + if ((inp->inp_vflag & INP_IPV6) == 0) continue; - if (in6p->inp_ip_p && - in6p->inp_ip_p != proto) + if (inp->inp_ip_p && + inp->inp_ip_p != proto) continue; - if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) && - !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst)) + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && + !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &ip6->ip6_dst)) continue; - if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) && - !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src)) + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && + !IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &ip6->ip6_src)) continue; if (last != NULL) { struct mbuf *n = m_copym(m, 0, M_COPYALL, M_NOWAIT); @@ -225,25 +225,32 @@ rip6_input(struct mbuf **mp, int *offp, int proto) INP_RUNLOCK(last); last = NULL; } - INP_RLOCK(in6p); - if (__predict_false(in6p->inp_flags2 & INP_FREED)) + INP_RLOCK(inp); + if (__predict_false(inp->inp_flags2 & INP_FREED)) goto skip_2; - if (jailed_without_vnet(in6p->inp_cred)) { + if (jailed_without_vnet(inp->inp_cred)) { /* * Allow raw socket in jail to receive multicast; * assume process had PRIV_NETINET_RAW at attach, * and fall through into normal filter path if so. */ if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) && - prison_check_ip6(in6p->inp_cred, + prison_check_ip6(inp->inp_cred, &ip6->ip6_dst) != 0) goto skip_2; } - if (in6p->in6p_cksum != -1) { + if (inp->in6p_cksum != -1) { RIP6STAT_INC(rip6s_isum); - if (in6_cksum(m, proto, *offp, + if (m->m_pkthdr.len - (*offp + inp->in6p_cksum) < 2 || + in6_cksum(m, proto, *offp, m->m_pkthdr.len - *offp)) { RIP6STAT_INC(rip6s_badsum); + /* + * Drop the received message, don't send an + * ICMP6 message. Set proto to IPPROTO_NONE + * to achieve that. + */ + proto = IPPROTO_NONE; goto skip_2; } } @@ -253,7 +260,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto) * should receive it, as multicast filtering is now * the responsibility of the transport layer. */ - if (in6p->in6p_moptions && + if (inp->in6p_moptions && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { /* * If the incoming datagram is for MLD, allow it @@ -283,7 +290,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto) mcaddr.sin6_family = AF_INET6; mcaddr.sin6_addr = ip6->ip6_dst; - blocked = im6o_mc_filter(in6p->in6p_moptions, + blocked = im6o_mc_filter(inp->in6p_moptions, ifp, (struct sockaddr *)&mcaddr, (struct sockaddr *)&fromsa); @@ -293,10 +300,10 @@ rip6_input(struct mbuf **mp, int *offp, int proto) goto skip_2; } } - last = in6p; + last = inp; continue; skip_2: - INP_RUNLOCK(in6p); + INP_RUNLOCK(inp); } INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et); #if defined(IPSEC) || defined(IPSEC_SUPPORT) @@ -389,7 +396,7 @@ rip6_output(struct mbuf *m, struct socket *so, ...) struct m_tag *mtag; struct sockaddr_in6 *dstsock; struct ip6_hdr *ip6; - struct inpcb *in6p; + struct inpcb *inp; u_int plen = m->m_pkthdr.len; int error = 0; struct ip6_pktopts opt, *optp; @@ -406,18 +413,18 @@ rip6_output(struct mbuf *m, struct socket *so, ...) control = va_arg(ap, struct mbuf *); va_end(ap); - in6p = sotoinpcb(so); - INP_WLOCK(in6p); + inp = sotoinpcb(so); + INP_WLOCK(inp); if (control != NULL) { if ((error = ip6_setpktopts(control, &opt, - in6p->in6p_outputopts, so->so_cred, + inp->in6p_outputopts, so->so_cred, so->so_proto->pr_protocol)) != 0) { goto bad; } optp = &opt; } else - optp = in6p->in6p_outputopts; + optp = inp->in6p_outputopts; /* * Check and convert scope zone ID into internal form. @@ -460,12 +467,12 @@ rip6_output(struct mbuf *m, struct socket *so, ...) /* * Source address selection. */ - error = in6_selectsrc_socket(dstsock, optp, in6p, so->so_cred, + error = in6_selectsrc_socket(dstsock, optp, inp, so->so_cred, scope_ambiguous, &in6a, &hlim); if (error) goto bad; - error = prison_check_ip6(in6p->inp_cred, &in6a); + error = prison_check_ip6(inp->inp_cred, &in6a); if (error != 0) goto bad; ip6->ip6_src = in6a; @@ -476,18 +483,18 @@ rip6_output(struct mbuf *m, struct socket *so, ...) * Fill in the rest of the IPv6 header fields. */ ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) | - (in6p->inp_flow & IPV6_FLOWINFO_MASK); + (inp->inp_flow & IPV6_FLOWINFO_MASK); ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) | (IPV6_VERSION & IPV6_VERSION_MASK); /* * ip6_plen will be filled in ip6_output, so not fill it here. */ - ip6->ip6_nxt = in6p->inp_ip_p; + ip6->ip6_nxt = inp->inp_ip_p; ip6->ip6_hlim = hlim; if (so->so_proto->pr_protocol == IPPROTO_ICMPV6 || - in6p->in6p_cksum != -1) { + inp->in6p_cksum != -1) { struct mbuf *n; int off; u_int16_t *p; @@ -496,8 +503,8 @@ rip6_output(struct mbuf *m, struct socket *so, ...) if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) off = offsetof(struct icmp6_hdr, icmp6_cksum); else - off = in6p->in6p_cksum; - if (plen < off + 1) { + off = inp->in6p_cksum; + if (plen < off + 2) { error = EINVAL; goto bad; } @@ -532,7 +539,7 @@ rip6_output(struct mbuf *m, struct socket *so, ...) } } - error = ip6_output(m, optp, NULL, 0, in6p->in6p_moptions, &oifp, in6p); + error = ip6_output(m, optp, NULL, 0, inp->in6p_moptions, &oifp, inp); if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) { if (oifp) icmp6_ifoutstat_inc(oifp, type, code); @@ -551,7 +558,7 @@ rip6_output(struct mbuf *m, struct socket *so, ...) ip6_clearpktopts(&opt, -1); m_freem(control); } - INP_WUNLOCK(in6p); + INP_WUNLOCK(inp); return (error); } @@ -729,6 +736,7 @@ rip6_disconnect(struct socket *so) static int rip6_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { + struct epoch_tracker et; struct inpcb *inp; struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam; struct ifaddr *ifa = NULL; @@ -746,20 +754,20 @@ rip6_bind(struct socket *so, struct sockaddr *nam, struct thread *td) if ((error = sa6_embedscope(addr, V_ip6_use_defzone)) != 0) return (error); - NET_EPOCH_ENTER(); + NET_EPOCH_ENTER(et); if (!IN6_IS_ADDR_UNSPECIFIED(&addr->sin6_addr) && (ifa = ifa_ifwithaddr((struct sockaddr *)addr)) == NULL) { - NET_EPOCH_EXIT(); + NET_EPOCH_EXIT(et); return (EADDRNOTAVAIL); } if (ifa != NULL && ((struct in6_ifaddr *)ifa)->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY| IN6_IFF_DETACHED|IN6_IFF_DEPRECATED)) { - NET_EPOCH_EXIT(); + NET_EPOCH_EXIT(et); return (EADDRNOTAVAIL); } - NET_EPOCH_EXIT(); + NET_EPOCH_EXIT(et); INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); inp->in6p_laddr = addr->sin6_addr; diff --git a/freebsd/sys/netinet6/scope6.c b/freebsd/sys/netinet6/scope6.c index 64b866dd..d556f3a4 100644 --- a/freebsd/sys/netinet6/scope6.c +++ b/freebsd/sys/netinet6/scope6.c @@ -211,19 +211,20 @@ scope6_set(struct ifnet *ifp, struct scope6_id *idlist) static int scope6_get(struct ifnet *ifp, struct scope6_id *idlist) { + struct epoch_tracker et; struct scope6_id *sid; /* We only need to lock the interface's afdata for SID() to work. */ - IF_AFDATA_RLOCK(ifp); + NET_EPOCH_ENTER(et); sid = SID(ifp); if (sid == NULL) { /* paranoid? */ - IF_AFDATA_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); return (EINVAL); } *idlist = *sid; - IF_AFDATA_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); return (0); } @@ -420,10 +421,12 @@ in6_setscope(struct in6_addr *in6, struct ifnet *ifp, u_int32_t *ret_id) zoneid = ifp->if_index; in6->s6_addr16[1] = htons(zoneid & 0xffff); /* XXX */ } else if (scope != IPV6_ADDR_SCOPE_GLOBAL) { - IF_AFDATA_RLOCK(ifp); + struct epoch_tracker et; + + NET_EPOCH_ENTER(et); sid = SID(ifp); zoneid = sid->s6id_list[scope]; - IF_AFDATA_RUNLOCK(ifp); + NET_EPOCH_EXIT(et); } } diff --git a/freebsd/sys/netinet6/sctp6_usrreq.c b/freebsd/sys/netinet6/sctp6_usrreq.c index 6a3391ee..dd320c32 100644 --- a/freebsd/sys/netinet6/sctp6_usrreq.c +++ b/freebsd/sys/netinet6/sctp6_usrreq.c @@ -522,7 +522,6 @@ sctp_must_try_again: static int sctp6_attach(struct socket *so, int proto SCTP_UNUSED, struct thread *p SCTP_UNUSED) { - struct in6pcb *inp6; int error; struct sctp_inpcb *inp; uint32_t vrf_id = SCTP_DEFAULT_VRFID; @@ -544,18 +543,17 @@ sctp6_attach(struct socket *so, int proto SCTP_UNUSED, struct thread *p SCTP_UNU inp = (struct sctp_inpcb *)so->so_pcb; SCTP_INP_WLOCK(inp); inp->sctp_flags |= SCTP_PCB_FLAGS_BOUND_V6; /* I'm v6! */ - inp6 = (struct in6pcb *)inp; - inp6->inp_vflag |= INP_IPV6; - inp6->in6p_hops = -1; /* use kernel default */ - inp6->in6p_cksum = -1; /* just to be sure */ + inp->ip_inp.inp.inp_vflag |= INP_IPV6; + inp->ip_inp.inp.in6p_hops = -1; /* use kernel default */ + inp->ip_inp.inp.in6p_cksum = -1; /* just to be sure */ #ifdef INET /* * XXX: ugly!! IPv4 TTL initialization is necessary for an IPv6 * socket as well, because the socket may be bound to an IPv6 * wildcard address, which may match an IPv4-mapped IPv6 address. */ - inp6->inp_ip_ttl = MODULE_GLOBAL(ip_defttl); + inp->ip_inp.inp.inp_ip_ttl = MODULE_GLOBAL(ip_defttl); #endif SCTP_INP_WUNLOCK(inp); return (0); @@ -565,7 +563,6 @@ static int sctp6_bind(struct socket *so, struct sockaddr *addr, struct thread *p) { struct sctp_inpcb *inp; - struct in6pcb *inp6; int error; inp = (struct sctp_inpcb *)so->so_pcb; @@ -597,16 +594,15 @@ sctp6_bind(struct socket *so, struct sockaddr *addr, struct thread *p) return (EINVAL); } } - inp6 = (struct in6pcb *)inp; - inp6->inp_vflag &= ~INP_IPV4; - inp6->inp_vflag |= INP_IPV6; - if ((addr != NULL) && (SCTP_IPV6_V6ONLY(inp6) == 0)) { + inp->ip_inp.inp.inp_vflag &= ~INP_IPV4; + inp->ip_inp.inp.inp_vflag |= INP_IPV6; + if ((addr != NULL) && (SCTP_IPV6_V6ONLY(inp) == 0)) { switch (addr->sa_family) { #ifdef INET case AF_INET: /* binding v4 addr to v6 socket, so reset flags */ - inp6->inp_vflag |= INP_IPV4; - inp6->inp_vflag &= ~INP_IPV6; + inp->ip_inp.inp.inp_vflag |= INP_IPV4; + inp->ip_inp.inp.inp_vflag &= ~INP_IPV6; break; #endif #ifdef INET6 @@ -617,15 +613,15 @@ sctp6_bind(struct socket *so, struct sockaddr *addr, struct thread *p) sin6_p = (struct sockaddr_in6 *)addr; if (IN6_IS_ADDR_UNSPECIFIED(&sin6_p->sin6_addr)) { - inp6->inp_vflag |= INP_IPV4; + inp->ip_inp.inp.inp_vflag |= INP_IPV4; } #ifdef INET if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) { struct sockaddr_in sin; in6_sin6_2_sin(&sin, sin6_p); - inp6->inp_vflag |= INP_IPV4; - inp6->inp_vflag &= ~INP_IPV6; + inp->ip_inp.inp.inp_vflag |= INP_IPV4; + inp->ip_inp.inp.inp_vflag &= ~INP_IPV6; error = sctp_inpcb_bind(so, (struct sockaddr *)&sin, NULL, p); return (error); } @@ -687,7 +683,6 @@ sctp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *p) { struct sctp_inpcb *inp; - struct in6pcb *inp6; #ifdef INET struct sockaddr_in6 *sin6; @@ -704,7 +699,6 @@ sctp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); return (EINVAL); } - inp6 = (struct in6pcb *)inp; /* * For the TCP model we may get a NULL addr, if we are a connected * socket thats ok. @@ -724,7 +718,7 @@ sctp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, } #ifdef INET sin6 = (struct sockaddr_in6 *)addr; - if (SCTP_IPV6_V6ONLY(inp6)) { + if (SCTP_IPV6_V6ONLY(inp)) { /* * if IPV6_V6ONLY flag, we discard datagrams destined to a * v4 addr or v4-mapped addr @@ -793,14 +787,10 @@ sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p) struct sctp_inpcb *inp; struct sctp_tcb *stcb; #ifdef INET - struct in6pcb *inp6; struct sockaddr_in6 *sin6; union sctp_sockstore store; #endif -#ifdef INET - inp6 = (struct in6pcb *)so->so_pcb; -#endif inp = (struct sctp_inpcb *)so->so_pcb; if (inp == NULL) { SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ECONNRESET); @@ -858,7 +848,7 @@ sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p) } #ifdef INET sin6 = (struct sockaddr_in6 *)addr; - if (SCTP_IPV6_V6ONLY(inp6)) { + if (SCTP_IPV6_V6ONLY(inp)) { /* * if IPV6_V6ONLY flag, ignore connections destined to a v4 * addr or v4-mapped addr @@ -912,7 +902,8 @@ sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p) /* We are GOOD to go */ stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id, inp->sctp_ep.pre_open_stream_count, - inp->sctp_ep.port, p); + inp->sctp_ep.port, p, + SCTP_INITIALIZE_AUTH_PARAMS); SCTP_ASOC_CREATE_UNLOCK(inp); if (stcb == NULL) { /* Gak! no memory */ @@ -925,10 +916,6 @@ sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p) } SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT); (void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered); - - /* initialize authentication parameters for the assoc */ - sctp_initialize_auth_params(inp, stcb); - sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED); SCTP_TCB_UNLOCK(stcb); return (error); @@ -1103,10 +1090,10 @@ sctp6_peeraddr(struct socket *so, struct sockaddr **addr) static int sctp6_in6getaddr(struct socket *so, struct sockaddr **nam) { - struct in6pcb *inp6 = sotoin6pcb(so); + struct inpcb *inp = sotoinpcb(so); int error; - if (inp6 == NULL) { + if (inp == NULL) { SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); return (EINVAL); } @@ -1139,10 +1126,10 @@ sctp6_in6getaddr(struct socket *so, struct sockaddr **nam) static int sctp6_getpeeraddr(struct socket *so, struct sockaddr **nam) { - struct in6pcb *inp6 = sotoin6pcb(so); + struct inpcb *inp = sotoinpcb(so); int error; - if (inp6 == NULL) { + if (inp == NULL) { SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL); return (EINVAL); } diff --git a/freebsd/sys/netinet6/udp6_usrreq.c b/freebsd/sys/netinet6/udp6_usrreq.c index e0fcd06d..270b4880 100644 --- a/freebsd/sys/netinet6/udp6_usrreq.c +++ b/freebsd/sys/netinet6/udp6_usrreq.c @@ -744,9 +744,24 @@ udp6_output(struct socket *so, int flags_arg, struct mbuf *m, * - when we are not bound to an address and source port (it is * in6_pcbsetport() which will require the write lock). */ +retry: if (sin6 == NULL || (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && inp->inp_lport == 0)) { INP_WLOCK(inp); + /* + * In case we lost a race and another thread bound addr/port + * on the inp we cannot keep the wlock (which still would be + * fine) as further down, based on these values we make + * decisions for the pcbinfo lock. If the locks are not in + * synch the assertions on unlock will fire, hence we go for + * one retry loop. + */ + if (sin6 != NULL && + (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) || + inp->inp_lport != 0)) { + INP_WUNLOCK(inp); + goto retry; + } unlock_inp = UH_WLOCKED; } else { INP_RLOCK(inp); |