summaryrefslogtreecommitdiffstats
path: root/freebsd/sys/netinet6
diff options
context:
space:
mode:
authorSebastian Huber <sebastian.huber@embedded-brains.de>2018-08-22 14:59:50 +0200
committerSebastian Huber <sebastian.huber@embedded-brains.de>2018-09-21 10:29:41 +0200
commit3489e3b6396ee9944a6a2e19e675ca54c36993b4 (patch)
treecd55cfac1c96ff4b888a9606fd6a0d8eb65bb446 /freebsd/sys/netinet6
parentck: Define CK_MD_PPC32_LWSYNC if available (diff)
downloadrtems-libbsd-3489e3b6396ee9944a6a2e19e675ca54c36993b4.tar.bz2
Update to FreeBSD head 2018-09-17
Git mirror commit 6c2192b1ef8c50788c751f878552526800b1e319. Update #3472.
Diffstat (limited to 'freebsd/sys/netinet6')
-rw-r--r--freebsd/sys/netinet6/frag6.c331
-rw-r--r--freebsd/sys/netinet6/icmp6.c11
-rw-r--r--freebsd/sys/netinet6/in6.c3
-rw-r--r--freebsd/sys/netinet6/in6_fib.c4
-rw-r--r--freebsd/sys/netinet6/in6_gif.c337
-rw-r--r--freebsd/sys/netinet6/in6_ifattach.c2
-rw-r--r--freebsd/sys/netinet6/in6_mcast.c46
-rw-r--r--freebsd/sys/netinet6/in6_pcb.c189
-rw-r--r--freebsd/sys/netinet6/in6_proto.c37
-rw-r--r--freebsd/sys/netinet6/in6_rmx.c4
-rw-r--r--freebsd/sys/netinet6/in6_src.c6
-rw-r--r--freebsd/sys/netinet6/in6_var.h2
-rw-r--r--freebsd/sys/netinet6/ip6_input.c4
-rw-r--r--freebsd/sys/netinet6/ip6_mroute.c55
-rw-r--r--freebsd/sys/netinet6/ip6_output.c57
-rw-r--r--freebsd/sys/netinet6/ip6_var.h8
-rw-r--r--freebsd/sys/netinet6/mld6.c43
-rw-r--r--freebsd/sys/netinet6/nd6.c6
-rw-r--r--freebsd/sys/netinet6/nd6_nbr.c8
-rw-r--r--freebsd/sys/netinet6/nd6_rtr.c2
-rw-r--r--freebsd/sys/netinet6/pim6_var.h4
-rw-r--r--freebsd/sys/netinet6/raw_ip6.c7
-rw-r--r--freebsd/sys/netinet6/scope6.c4
-rw-r--r--freebsd/sys/netinet6/scope6_var.h2
-rw-r--r--freebsd/sys/netinet6/sctp6_usrreq.c9
-rw-r--r--freebsd/sys/netinet6/sctp6_var.h4
-rw-r--r--freebsd/sys/netinet6/udp6_usrreq.c450
27 files changed, 1066 insertions, 569 deletions
diff --git a/freebsd/sys/netinet6/frag6.c b/freebsd/sys/netinet6/frag6.c
index 70103fe3..0b0c7b91 100644
--- a/freebsd/sys/netinet6/frag6.c
+++ b/freebsd/sys/netinet6/frag6.c
@@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/hash.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/domain.h>
@@ -51,6 +52,8 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/syslog.h>
+#include <machine/atomic.h>
+
#include <net/if.h>
#include <net/if_var.h>
#include <net/netisr.h>
@@ -67,58 +70,110 @@ __FBSDID("$FreeBSD$");
#include <security/mac/mac_framework.h>
-static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *);
-static void frag6_deq(struct ip6asfrag *);
-static void frag6_insque(struct ip6q *, struct ip6q *);
-static void frag6_remque(struct ip6q *);
-static void frag6_freef(struct ip6q *);
-
-static struct mtx ip6qlock;
/*
- * These fields all protected by ip6qlock.
+ * Reassembly headers are stored in hash buckets.
*/
-static VNET_DEFINE(u_int, frag6_nfragpackets);
-static VNET_DEFINE(u_int, frag6_nfrags);
-static VNET_DEFINE(struct ip6q, ip6q); /* ip6 reassemble queue */
+#define IP6REASS_NHASH_LOG2 10
+#define IP6REASS_NHASH (1 << IP6REASS_NHASH_LOG2)
+#define IP6REASS_HMASK (IP6REASS_NHASH - 1)
+
+static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *,
+ uint32_t bucket __unused);
+static void frag6_deq(struct ip6asfrag *, uint32_t bucket __unused);
+static void frag6_insque_head(struct ip6q *, struct ip6q *,
+ uint32_t bucket);
+static void frag6_remque(struct ip6q *, uint32_t bucket);
+static void frag6_freef(struct ip6q *, uint32_t bucket);
+
+struct ip6qbucket {
+ struct ip6q ip6q;
+ struct mtx lock;
+ int count;
+};
+
+VNET_DEFINE_STATIC(volatile u_int, frag6_nfragpackets);
+volatile u_int frag6_nfrags = 0;
+VNET_DEFINE_STATIC(struct ip6qbucket, ip6q[IP6REASS_NHASH]);
+VNET_DEFINE_STATIC(uint32_t, ip6q_hashseed);
#define V_frag6_nfragpackets VNET(frag6_nfragpackets)
-#define V_frag6_nfrags VNET(frag6_nfrags)
#define V_ip6q VNET(ip6q)
+#define V_ip6q_hashseed VNET(ip6q_hashseed)
-#define IP6Q_LOCK_INIT() mtx_init(&ip6qlock, "ip6qlock", NULL, MTX_DEF);
-#define IP6Q_LOCK() mtx_lock(&ip6qlock)
-#define IP6Q_TRYLOCK() mtx_trylock(&ip6qlock)
-#define IP6Q_LOCK_ASSERT() mtx_assert(&ip6qlock, MA_OWNED)
-#define IP6Q_UNLOCK() mtx_unlock(&ip6qlock)
+#define IP6Q_LOCK(i) mtx_lock(&V_ip6q[(i)].lock)
+#define IP6Q_TRYLOCK(i) mtx_trylock(&V_ip6q[(i)].lock)
+#define IP6Q_LOCK_ASSERT(i) mtx_assert(&V_ip6q[(i)].lock, MA_OWNED)
+#define IP6Q_UNLOCK(i) mtx_unlock(&V_ip6q[(i)].lock)
+#define IP6Q_HEAD(i) (&V_ip6q[(i)].ip6q)
static MALLOC_DEFINE(M_FTABLE, "fragment", "fragment reassembly header");
/*
+ * By default, limit the number of IP6 fragments across all reassembly
+ * queues to 1/32 of the total number of mbuf clusters.
+ *
+ * Limit the total number of reassembly queues per VNET to the
+ * IP6 fragment limit, but ensure the limit will not allow any bucket
+ * to grow above 100 items. (The bucket limit is
+ * IP_MAXFRAGPACKETS / (IPREASS_NHASH / 2), so the 50 is the correct
+ * multiplier to reach a 100-item limit.)
+ * The 100-item limit was chosen as brief testing seems to show that
+ * this produces "reasonable" performance on some subset of systems
+ * under DoS attack.
+ */
+#define IP6_MAXFRAGS (nmbclusters / 32)
+#define IP6_MAXFRAGPACKETS (imin(IP6_MAXFRAGS, IP6REASS_NHASH * 50))
+
+/*
* Initialise reassembly queue and fragment identifier.
*/
+void
+frag6_set_bucketsize()
+{
+ int i;
+
+ if ((i = V_ip6_maxfragpackets) > 0)
+ V_ip6_maxfragbucketsize = imax(i / (IP6REASS_NHASH / 2), 1);
+}
+
static void
frag6_change(void *tag)
{
+ VNET_ITERATOR_DECL(vnet_iter);
- V_ip6_maxfragpackets = nmbclusters / 4;
- V_ip6_maxfrags = nmbclusters / 4;
+ ip6_maxfrags = IP6_MAXFRAGS;
+ VNET_LIST_RLOCK_NOSLEEP();
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+ V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS;
+ frag6_set_bucketsize();
+ CURVNET_RESTORE();
+ }
+ VNET_LIST_RUNLOCK_NOSLEEP();
}
void
frag6_init(void)
{
-
- V_ip6_maxfragpackets = nmbclusters / 4;
- V_ip6_maxfrags = nmbclusters / 4;
- V_ip6q.ip6q_next = V_ip6q.ip6q_prev = &V_ip6q;
-
+ struct ip6q *q6;
+ int i;
+
+ V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS;
+ frag6_set_bucketsize();
+ for (i = 0; i < IP6REASS_NHASH; i++) {
+ q6 = IP6Q_HEAD(i);
+ q6->ip6q_next = q6->ip6q_prev = q6;
+ mtx_init(&V_ip6q[i].lock, "ip6qlock", NULL, MTX_DEF);
+ V_ip6q[i].count = 0;
+ }
+ V_ip6q_hashseed = arc4random();
+ V_ip6_maxfragsperpacket = 64;
if (!IS_DEFAULT_VNET(curvnet))
return;
+ ip6_maxfrags = IP6_MAXFRAGS;
EVENTHANDLER_REGISTER(nmbclusters_change,
frag6_change, NULL, EVENTHANDLER_PRI_ANY);
-
- IP6Q_LOCK_INIT();
}
/*
@@ -159,12 +214,15 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
struct mbuf *m = *mp, *t;
struct ip6_hdr *ip6;
struct ip6_frag *ip6f;
- struct ip6q *q6;
+ struct ip6q *head, *q6;
struct ip6asfrag *af6, *ip6af, *af6dwn;
struct in6_ifaddr *ia;
int offset = *offp, nxt, i, next;
int first_frag = 0;
int fragoff, frgpartlen; /* must be larger than u_int16_t */
+ uint32_t hashkey[(sizeof(struct in6_addr) * 2 +
+ sizeof(ip6f->ip6f_ident)) / sizeof(uint32_t)];
+ uint32_t hash, *hashkeyp;
struct ifnet *dstifp;
u_int8_t ecn, ecn0;
#ifdef RSS
@@ -233,19 +291,38 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
return (ip6f->ip6f_nxt);
}
- IP6Q_LOCK();
+ /* Get fragment length and discard 0-byte fragments. */
+ frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset;
+ if (frgpartlen == 0) {
+ icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
+ offsetof(struct ip6_hdr, ip6_plen));
+ in6_ifstat_inc(dstifp, ifs6_reass_fail);
+ IP6STAT_INC(ip6s_fragdropped);
+ return IPPROTO_DONE;
+ }
+
+ hashkeyp = hashkey;
+ memcpy(hashkeyp, &ip6->ip6_src, sizeof(struct in6_addr));
+ hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp);
+ memcpy(hashkeyp, &ip6->ip6_dst, sizeof(struct in6_addr));
+ hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp);
+ *hashkeyp = ip6f->ip6f_ident;
+ hash = jenkins_hash32(hashkey, nitems(hashkey), V_ip6q_hashseed);
+ hash &= IP6REASS_HMASK;
+ head = IP6Q_HEAD(hash);
+ IP6Q_LOCK(hash);
/*
* Enforce upper bound on number of fragments.
* If maxfrag is 0, never accept fragments.
* If maxfrag is -1, accept all fragments without limitation.
*/
- if (V_ip6_maxfrags < 0)
+ if (ip6_maxfrags < 0)
;
- else if (V_frag6_nfrags >= (u_int)V_ip6_maxfrags)
+ else if (atomic_load_int(&frag6_nfrags) >= (u_int)ip6_maxfrags)
goto dropfrag;
- for (q6 = V_ip6q.ip6q_next; q6 != &V_ip6q; q6 = q6->ip6q_next)
+ for (q6 = head->ip6q_next; q6 != head; q6 = q6->ip6q_next)
if (ip6f->ip6f_ident == q6->ip6q_ident &&
IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) &&
IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst)
@@ -255,7 +332,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
)
break;
- if (q6 == &V_ip6q) {
+ if (q6 == head) {
/*
* the first fragment to arrive, create a reassembly queue.
*/
@@ -270,9 +347,11 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
*/
if (V_ip6_maxfragpackets < 0)
;
- else if (V_frag6_nfragpackets >= (u_int)V_ip6_maxfragpackets)
+ else if (V_ip6q[hash].count >= V_ip6_maxfragbucketsize ||
+ atomic_load_int(&V_frag6_nfragpackets) >=
+ (u_int)V_ip6_maxfragpackets)
goto dropfrag;
- V_frag6_nfragpackets++;
+ atomic_add_int(&V_frag6_nfragpackets, 1);
q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FTABLE,
M_NOWAIT);
if (q6 == NULL)
@@ -285,7 +364,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
}
mac_ip6q_create(m, q6);
#endif
- frag6_insque(q6, &V_ip6q);
+ frag6_insque_head(q6, head, hash);
/* ip6q_nxt will be filled afterwards, from 1st fragment */
q6->ip6q_down = q6->ip6q_up = (struct ip6asfrag *)q6;
@@ -319,21 +398,20 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
* in size.
* If it would exceed, discard the fragment and return an ICMP error.
*/
- frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset;
if (q6->ip6q_unfrglen >= 0) {
/* The 1st fragment has already arrived. */
if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) {
icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
offset - sizeof(struct ip6_frag) +
offsetof(struct ip6_frag, ip6f_offlg));
- IP6Q_UNLOCK();
+ IP6Q_UNLOCK(hash);
return (IPPROTO_DONE);
}
} else if (fragoff + frgpartlen > IPV6_MAXPACKET) {
icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
offset - sizeof(struct ip6_frag) +
offsetof(struct ip6_frag, ip6f_offlg));
- IP6Q_UNLOCK();
+ IP6Q_UNLOCK(hash);
return (IPPROTO_DONE);
}
/*
@@ -352,7 +430,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
int erroff = af6->ip6af_offset;
/* dequeue the fragment. */
- frag6_deq(af6);
+ frag6_deq(af6, hash);
free(af6, M_FTABLE);
/* adjust pointer. */
@@ -450,7 +528,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
}
af6 = af6->ip6af_down;
m_freem(IP6_REASS_MBUF(af6->ip6af_up));
- frag6_deq(af6->ip6af_up);
+ frag6_deq(af6->ip6af_up, hash);
}
#else
/*
@@ -499,29 +577,38 @@ insert:
/*
* Stick new segment in its place;
* check for complete reassembly.
+ * If not complete, check fragment limit.
* Move to front of packet queue, as we are
* the most recently active fragmented packet.
*/
- frag6_enq(ip6af, af6->ip6af_up);
- V_frag6_nfrags++;
+ frag6_enq(ip6af, af6->ip6af_up, hash);
+ atomic_add_int(&frag6_nfrags, 1);
q6->ip6q_nfrag++;
#if 0 /* xxx */
- if (q6 != V_ip6q.ip6q_next) {
- frag6_remque(q6);
- frag6_insque(q6, &V_ip6q);
+ if (q6 != head->ip6q_next) {
+ frag6_remque(q6, hash);
+ frag6_insque_head(q6, head, hash);
}
#endif
next = 0;
for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
af6 = af6->ip6af_down) {
if (af6->ip6af_off != next) {
- IP6Q_UNLOCK();
+ if (q6->ip6q_nfrag > V_ip6_maxfragsperpacket) {
+ IP6STAT_INC(ip6s_fragdropped);
+ frag6_freef(q6, hash);
+ }
+ IP6Q_UNLOCK(hash);
return IPPROTO_DONE;
}
next += af6->ip6af_frglen;
}
if (af6->ip6af_up->ip6af_mff) {
- IP6Q_UNLOCK();
+ if (q6->ip6q_nfrag > V_ip6_maxfragsperpacket) {
+ IP6STAT_INC(ip6s_fragdropped);
+ frag6_freef(q6, hash);
+ }
+ IP6Q_UNLOCK(hash);
return IPPROTO_DONE;
}
@@ -531,7 +618,7 @@ insert:
ip6af = q6->ip6q_down;
t = m = IP6_REASS_MBUF(ip6af);
af6 = ip6af->ip6af_down;
- frag6_deq(ip6af);
+ frag6_deq(ip6af, hash);
while (af6 != (struct ip6asfrag *)q6) {
m->m_pkthdr.csum_flags &=
IP6_REASS_MBUF(af6)->m_pkthdr.csum_flags;
@@ -539,7 +626,7 @@ insert:
IP6_REASS_MBUF(af6)->m_pkthdr.csum_data;
af6dwn = af6->ip6af_down;
- frag6_deq(af6);
+ frag6_deq(af6, hash);
while (t->m_next)
t = t->m_next;
m_adj(IP6_REASS_MBUF(af6), af6->ip6af_offset);
@@ -566,13 +653,13 @@ insert:
#endif
if (ip6_deletefraghdr(m, offset, M_NOWAIT) != 0) {
- frag6_remque(q6);
- V_frag6_nfrags -= q6->ip6q_nfrag;
+ frag6_remque(q6, hash);
+ atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
#ifdef MAC
mac_ip6q_destroy(q6);
#endif
free(q6, M_FTABLE);
- V_frag6_nfragpackets--;
+ atomic_subtract_int(&V_frag6_nfragpackets, 1);
goto dropfrag;
}
@@ -583,14 +670,14 @@ insert:
m_copyback(m, ip6_get_prevhdr(m, offset), sizeof(uint8_t),
(caddr_t)&nxt);
- frag6_remque(q6);
- V_frag6_nfrags -= q6->ip6q_nfrag;
+ frag6_remque(q6, hash);
+ atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
#ifdef MAC
mac_ip6q_reassemble(q6, m);
mac_ip6q_destroy(q6);
#endif
free(q6, M_FTABLE);
- V_frag6_nfragpackets--;
+ atomic_subtract_int(&V_frag6_nfragpackets, 1);
if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */
int plen = 0;
@@ -612,7 +699,7 @@ insert:
m_tag_prepend(m, mtag);
#endif
- IP6Q_UNLOCK();
+ IP6Q_UNLOCK(hash);
IP6STAT_INC(ip6s_reassembled);
in6_ifstat_inc(dstifp, ifs6_reass_ok);
@@ -634,7 +721,7 @@ insert:
return nxt;
dropfrag:
- IP6Q_UNLOCK();
+ IP6Q_UNLOCK(hash);
in6_ifstat_inc(dstifp, ifs6_reass_fail);
IP6STAT_INC(ip6s_fragdropped);
m_freem(m);
@@ -645,19 +732,19 @@ insert:
* Free a fragment reassembly header and all
* associated datagrams.
*/
-void
-frag6_freef(struct ip6q *q6)
+static void
+frag6_freef(struct ip6q *q6, uint32_t bucket)
{
struct ip6asfrag *af6, *down6;
- IP6Q_LOCK_ASSERT();
+ IP6Q_LOCK_ASSERT(bucket);
for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
af6 = down6) {
struct mbuf *m = IP6_REASS_MBUF(af6);
down6 = af6->ip6af_down;
- frag6_deq(af6);
+ frag6_deq(af6, bucket);
/*
* Return ICMP time exceeded error for the 1st fragment.
@@ -679,24 +766,25 @@ frag6_freef(struct ip6q *q6)
m_freem(m);
free(af6, M_FTABLE);
}
- frag6_remque(q6);
- V_frag6_nfrags -= q6->ip6q_nfrag;
+ frag6_remque(q6, bucket);
+ atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
#ifdef MAC
mac_ip6q_destroy(q6);
#endif
free(q6, M_FTABLE);
- V_frag6_nfragpackets--;
+ atomic_subtract_int(&V_frag6_nfragpackets, 1);
}
/*
* Put an ip fragment on a reassembly chain.
* Like insque, but pointers in middle of structure.
*/
-void
-frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6)
+static void
+frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6,
+ uint32_t bucket __unused)
{
- IP6Q_LOCK_ASSERT();
+ IP6Q_LOCK_ASSERT(bucket);
af6->ip6af_up = up6;
af6->ip6af_down = up6->ip6af_down;
@@ -707,36 +795,41 @@ frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6)
/*
* To frag6_enq as remque is to insque.
*/
-void
-frag6_deq(struct ip6asfrag *af6)
+static void
+frag6_deq(struct ip6asfrag *af6, uint32_t bucket __unused)
{
- IP6Q_LOCK_ASSERT();
+ IP6Q_LOCK_ASSERT(bucket);
af6->ip6af_up->ip6af_down = af6->ip6af_down;
af6->ip6af_down->ip6af_up = af6->ip6af_up;
}
-void
-frag6_insque(struct ip6q *new, struct ip6q *old)
+static void
+frag6_insque_head(struct ip6q *new, struct ip6q *old, uint32_t bucket)
{
- IP6Q_LOCK_ASSERT();
+ IP6Q_LOCK_ASSERT(bucket);
+ KASSERT(IP6Q_HEAD(bucket) == old,
+ ("%s: attempt to insert at head of wrong bucket"
+ " (bucket=%u, old=%p)", __func__, bucket, old));
new->ip6q_prev = old;
new->ip6q_next = old->ip6q_next;
old->ip6q_next->ip6q_prev= new;
old->ip6q_next = new;
+ V_ip6q[bucket].count++;
}
-void
-frag6_remque(struct ip6q *p6)
+static void
+frag6_remque(struct ip6q *p6, uint32_t bucket)
{
- IP6Q_LOCK_ASSERT();
+ IP6Q_LOCK_ASSERT(bucket);
p6->ip6q_prev->ip6q_next = p6->ip6q_next;
p6->ip6q_next->ip6q_prev = p6->ip6q_prev;
+ V_ip6q[bucket].count--;
}
/*
@@ -748,37 +841,72 @@ void
frag6_slowtimo(void)
{
VNET_ITERATOR_DECL(vnet_iter);
- struct ip6q *q6;
+ struct ip6q *head, *q6;
+ int i;
VNET_LIST_RLOCK_NOSLEEP();
- IP6Q_LOCK();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
- q6 = V_ip6q.ip6q_next;
- if (q6)
- while (q6 != &V_ip6q) {
+ for (i = 0; i < IP6REASS_NHASH; i++) {
+ IP6Q_LOCK(i);
+ head = IP6Q_HEAD(i);
+ q6 = head->ip6q_next;
+ if (q6 == NULL) {
+ /*
+ * XXXJTL: This should never happen. This
+ * should turn into an assertion.
+ */
+ IP6Q_UNLOCK(i);
+ continue;
+ }
+ while (q6 != head) {
--q6->ip6q_ttl;
q6 = q6->ip6q_next;
if (q6->ip6q_prev->ip6q_ttl == 0) {
IP6STAT_INC(ip6s_fragtimeout);
/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
- frag6_freef(q6->ip6q_prev);
+ frag6_freef(q6->ip6q_prev, i);
}
}
+ /*
+ * If we are over the maximum number of fragments
+ * (due to the limit being lowered), drain off
+ * enough to get down to the new limit.
+ * Note that we drain all reassembly queues if
+ * maxfragpackets is 0 (fragmentation is disabled),
+ * and don't enforce a limit when maxfragpackets
+ * is negative.
+ */
+ while ((V_ip6_maxfragpackets == 0 ||
+ (V_ip6_maxfragpackets > 0 &&
+ V_ip6q[i].count > V_ip6_maxfragbucketsize)) &&
+ head->ip6q_prev != head) {
+ IP6STAT_INC(ip6s_fragoverflow);
+ /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
+ frag6_freef(head->ip6q_prev, i);
+ }
+ IP6Q_UNLOCK(i);
+ }
/*
- * If we are over the maximum number of fragments
- * (due to the limit being lowered), drain off
- * enough to get down to the new limit.
+ * If we are still over the maximum number of fragmented
+ * packets, drain off enough to get down to the new limit.
*/
- while (V_frag6_nfragpackets > (u_int)V_ip6_maxfragpackets &&
- V_ip6q.ip6q_prev) {
- IP6STAT_INC(ip6s_fragoverflow);
- /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
- frag6_freef(V_ip6q.ip6q_prev);
+ i = 0;
+ while (V_ip6_maxfragpackets >= 0 &&
+ atomic_load_int(&V_frag6_nfragpackets) >
+ (u_int)V_ip6_maxfragpackets) {
+ IP6Q_LOCK(i);
+ head = IP6Q_HEAD(i);
+ if (head->ip6q_prev != head) {
+ IP6STAT_INC(ip6s_fragoverflow);
+ /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
+ frag6_freef(head->ip6q_prev, i);
+ }
+ IP6Q_UNLOCK(i);
+ i = (i + 1) % IP6REASS_NHASH;
}
CURVNET_RESTORE();
}
- IP6Q_UNLOCK();
VNET_LIST_RUNLOCK_NOSLEEP();
}
@@ -789,22 +917,25 @@ void
frag6_drain(void)
{
VNET_ITERATOR_DECL(vnet_iter);
+ struct ip6q *head;
+ int i;
VNET_LIST_RLOCK_NOSLEEP();
- if (IP6Q_TRYLOCK() == 0) {
- VNET_LIST_RUNLOCK_NOSLEEP();
- return;
- }
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
- while (V_ip6q.ip6q_next != &V_ip6q) {
- IP6STAT_INC(ip6s_fragdropped);
- /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
- frag6_freef(V_ip6q.ip6q_next);
+ for (i = 0; i < IP6REASS_NHASH; i++) {
+ if (IP6Q_TRYLOCK(i) == 0)
+ continue;
+ head = IP6Q_HEAD(i);
+ while (head->ip6q_next != head) {
+ IP6STAT_INC(ip6s_fragdropped);
+ /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
+ frag6_freef(head->ip6q_next, i);
+ }
+ IP6Q_UNLOCK(i);
}
CURVNET_RESTORE();
}
- IP6Q_UNLOCK();
VNET_LIST_RUNLOCK_NOSLEEP();
}
diff --git a/freebsd/sys/netinet6/icmp6.c b/freebsd/sys/netinet6/icmp6.c
index 4d06ca16..2b080169 100644
--- a/freebsd/sys/netinet6/icmp6.c
+++ b/freebsd/sys/netinet6/icmp6.c
@@ -126,8 +126,8 @@ VNET_PCPUSTAT_SYSUNINIT(icmp6stat);
VNET_DECLARE(struct inpcbinfo, ripcbinfo);
VNET_DECLARE(struct inpcbhead, ripcb);
VNET_DECLARE(int, icmp6errppslim);
-static VNET_DEFINE(int, icmp6errpps_count) = 0;
-static VNET_DEFINE(struct timeval, icmp6errppslim_last);
+VNET_DEFINE_STATIC(int, icmp6errpps_count) = 0;
+VNET_DEFINE_STATIC(struct timeval, icmp6errppslim_last);
VNET_DECLARE(int, icmp6_nodeinfo);
#define V_ripcbinfo VNET(ripcbinfo)
@@ -1910,6 +1910,7 @@ icmp6_rip6_input(struct mbuf **mp, int off)
struct inpcb *last = NULL;
struct sockaddr_in6 fromsa;
struct icmp6_hdr *icmp6;
+ struct epoch_tracker et;
struct mbuf *opts = NULL;
#ifndef PULLDOWN_TEST
@@ -1936,8 +1937,8 @@ icmp6_rip6_input(struct mbuf **mp, int off)
return (IPPROTO_DONE);
}
- INP_INFO_RLOCK(&V_ripcbinfo);
- LIST_FOREACH(in6p, &V_ripcb, inp_list) {
+ INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
+ CK_LIST_FOREACH(in6p, &V_ripcb, inp_list) {
if ((in6p->inp_vflag & INP_IPV6) == 0)
continue;
if (in6p->inp_ip_p != IPPROTO_ICMPV6)
@@ -2014,7 +2015,7 @@ icmp6_rip6_input(struct mbuf **mp, int off)
}
last = in6p;
}
- INP_INFO_RUNLOCK(&V_ripcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
if (last != NULL) {
if (last->inp_flags & INP_CONTROLOPTS)
ip6_savecontrol(last, m, &opts);
diff --git a/freebsd/sys/netinet6/in6.c b/freebsd/sys/netinet6/in6.c
index 3ed80c9c..c415cf78 100644
--- a/freebsd/sys/netinet6/in6.c
+++ b/freebsd/sys/netinet6/in6.c
@@ -2139,9 +2139,6 @@ in6_lltable_free_entry(struct lltable *llt, struct llentry *lle)
lltable_unlink_entry(llt, lle);
}
- if (callout_stop(&lle->lle_timer) > 0)
- LLE_REMREF(lle);
-
llentry_free(lle);
}
diff --git a/freebsd/sys/netinet6/in6_fib.c b/freebsd/sys/netinet6/in6_fib.c
index cf79797d..e5e8a161 100644
--- a/freebsd/sys/netinet6/in6_fib.c
+++ b/freebsd/sys/netinet6/in6_fib.c
@@ -40,7 +40,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
-#include <sys/rwlock.h>
+#include <sys/rmlock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
@@ -173,6 +173,7 @@ int
fib6_lookup_nh_basic(uint32_t fibnum, const struct in6_addr *dst, uint32_t scopeid,
uint32_t flags, uint32_t flowid, struct nhop6_basic *pnh6)
{
+ RIB_RLOCK_TRACKER;
struct rib_head *rh;
struct radix_node *rn;
struct sockaddr_in6 sin6;
@@ -222,6 +223,7 @@ int
fib6_lookup_nh_ext(uint32_t fibnum, const struct in6_addr *dst,uint32_t scopeid,
uint32_t flags, uint32_t flowid, struct nhop6_extended *pnh6)
{
+ RIB_RLOCK_TRACKER;
struct rib_head *rh;
struct radix_node *rn;
struct sockaddr_in6 sin6;
diff --git a/freebsd/sys/netinet6/in6_gif.c b/freebsd/sys/netinet6/in6_gif.c
index 160a0929..66b4c63a 100644
--- a/freebsd/sys/netinet6/in6_gif.c
+++ b/freebsd/sys/netinet6/in6_gif.c
@@ -4,6 +4,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * Copyright (c) 2018 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -40,20 +41,19 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet6.h>
#include <sys/param.h>
-#include <sys/lock.h>
-#include <sys/rmlock.h>
#include <sys/systm.h>
+#include <sys/jail.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/mbuf.h>
#include <sys/errno.h>
#include <sys/kernel.h>
-#include <sys/queue.h>
#include <sys/syslog.h>
#include <sys/sysctl.h>
-#include <sys/protosw.h>
#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/route.h>
@@ -63,52 +63,189 @@ __FBSDID("$FreeBSD$");
#include <netinet/in_systm.h>
#ifdef INET
#include <netinet/ip.h>
+#include <netinet/ip_ecn.h>
#endif
#include <netinet/ip_encap.h>
-#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet6/in6_var.h>
-#endif
-#include <netinet/ip_ecn.h>
-#ifdef INET6
+#include <netinet6/scope6_var.h>
#include <netinet6/ip6_ecn.h>
#include <netinet6/in6_fib.h>
-#endif
#include <net/if_gif.h>
#define GIF_HLIM 30
-static VNET_DEFINE(int, ip6_gif_hlim) = GIF_HLIM;
+VNET_DEFINE_STATIC(int, ip6_gif_hlim) = GIF_HLIM;
#define V_ip6_gif_hlim VNET(ip6_gif_hlim)
SYSCTL_DECL(_net_inet6_ip6);
-SYSCTL_INT(_net_inet6_ip6, IPV6CTL_GIF_HLIM, gifhlim, CTLFLAG_VNET | CTLFLAG_RW,
- &VNET_NAME(ip6_gif_hlim), 0, "");
-
-static int in6_gif_input(struct mbuf **, int *, int);
-
-extern struct domain inet6domain;
-static struct protosw in6_gif_protosw = {
- .pr_type = SOCK_RAW,
- .pr_domain = &inet6domain,
- .pr_protocol = 0, /* IPPROTO_IPV[46] */
- .pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_input = in6_gif_input,
- .pr_output = rip6_output,
- .pr_ctloutput = rip6_ctloutput,
- .pr_usrreqs = &rip6_usrreqs
-};
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_GIF_HLIM, gifhlim,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_gif_hlim), 0,
+ "Default hop limit for encapsulated packets");
+
+/*
+ * We keep interfaces in a hash table using src+dst as key.
+ * Interfaces with GIF_IGNORE_SOURCE flag are linked into plain list.
+ */
+VNET_DEFINE_STATIC(struct gif_list *, ipv6_hashtbl) = NULL;
+VNET_DEFINE_STATIC(struct gif_list, ipv6_list) = CK_LIST_HEAD_INITIALIZER();
+#define V_ipv6_hashtbl VNET(ipv6_hashtbl)
+#define V_ipv6_list VNET(ipv6_list)
+
+#define GIF_HASH(src, dst) (V_ipv6_hashtbl[\
+ in6_gif_hashval((src), (dst)) & (GIF_HASH_SIZE - 1)])
+#define GIF_HASH_SC(sc) GIF_HASH(&(sc)->gif_ip6hdr->ip6_src,\
+ &(sc)->gif_ip6hdr->ip6_dst)
+static uint32_t
+in6_gif_hashval(const struct in6_addr *src, const struct in6_addr *dst)
+{
+ uint32_t ret;
+
+ ret = fnv_32_buf(src, sizeof(*src), FNV1_32_INIT);
+ return (fnv_32_buf(dst, sizeof(*dst), ret));
+}
+
+static int
+in6_gif_checkdup(const struct gif_softc *sc, const struct in6_addr *src,
+ const struct in6_addr *dst)
+{
+ struct gif_softc *tmp;
+
+ if (sc->gif_family == AF_INET6 &&
+ IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src, src) &&
+ IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_dst, dst))
+ return (EEXIST);
+
+ CK_LIST_FOREACH(tmp, &GIF_HASH(src, dst), chain) {
+ if (tmp == sc)
+ continue;
+ if (IN6_ARE_ADDR_EQUAL(&tmp->gif_ip6hdr->ip6_src, src) &&
+ IN6_ARE_ADDR_EQUAL(&tmp->gif_ip6hdr->ip6_dst, dst))
+ return (EADDRNOTAVAIL);
+ }
+ return (0);
+}
+
+static void
+in6_gif_attach(struct gif_softc *sc)
+{
+
+ if (sc->gif_options & GIF_IGNORE_SOURCE)
+ CK_LIST_INSERT_HEAD(&V_ipv6_list, sc, chain);
+ else
+ CK_LIST_INSERT_HEAD(&GIF_HASH_SC(sc), sc, chain);
+}
+
+int
+in6_gif_setopts(struct gif_softc *sc, u_int options)
+{
+
+ /* NOTE: we are protected with gif_ioctl_sx lock */
+ MPASS(sc->gif_family == AF_INET6);
+ MPASS(sc->gif_options != options);
+
+ if ((options & GIF_IGNORE_SOURCE) !=
+ (sc->gif_options & GIF_IGNORE_SOURCE)) {
+ CK_LIST_REMOVE(sc, chain);
+ sc->gif_options = options;
+ in6_gif_attach(sc);
+ }
+ return (0);
+}
+
+int
+in6_gif_ioctl(struct gif_softc *sc, u_long cmd, caddr_t data)
+{
+ struct in6_ifreq *ifr = (struct in6_ifreq *)data;
+ struct sockaddr_in6 *dst, *src;
+ struct ip6_hdr *ip6;
+ int error;
+
+ /* NOTE: we are protected with gif_ioctl_sx lock */
+ error = EINVAL;
+ switch (cmd) {
+ case SIOCSIFPHYADDR_IN6:
+ src = &((struct in6_aliasreq *)data)->ifra_addr;
+ dst = &((struct in6_aliasreq *)data)->ifra_dstaddr;
+
+ /* sanity checks */
+ if (src->sin6_family != dst->sin6_family ||
+ src->sin6_family != AF_INET6 ||
+ src->sin6_len != dst->sin6_len ||
+ src->sin6_len != sizeof(*src))
+ break;
+ if (IN6_IS_ADDR_UNSPECIFIED(&src->sin6_addr) ||
+ IN6_IS_ADDR_UNSPECIFIED(&dst->sin6_addr)) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ /*
+ * Check validity of the scope zone ID of the
+ * addresses, and convert it into the kernel
+ * internal form if necessary.
+ */
+ if ((error = sa6_embedscope(src, 0)) != 0 ||
+ (error = sa6_embedscope(dst, 0)) != 0)
+ break;
+
+ if (V_ipv6_hashtbl == NULL)
+ V_ipv6_hashtbl = gif_hashinit();
+ error = in6_gif_checkdup(sc, &src->sin6_addr,
+ &dst->sin6_addr);
+ if (error == EADDRNOTAVAIL)
+ break;
+ if (error == EEXIST) {
+ /* Addresses are the same. Just return. */
+ error = 0;
+ break;
+ }
+ ip6 = malloc(sizeof(*ip6), M_GIF, M_WAITOK | M_ZERO);
+ ip6->ip6_src = src->sin6_addr;
+ ip6->ip6_dst = dst->sin6_addr;
+ ip6->ip6_vfc = IPV6_VERSION;
+ if (sc->gif_family != 0) {
+ /* Detach existing tunnel first */
+ CK_LIST_REMOVE(sc, chain);
+ GIF_WAIT();
+ free(sc->gif_hdr, M_GIF);
+ /* XXX: should we notify about link state change? */
+ }
+ sc->gif_family = AF_INET6;
+ sc->gif_ip6hdr = ip6;
+ in6_gif_attach(sc);
+ break;
+ case SIOCGIFPSRCADDR_IN6:
+ case SIOCGIFPDSTADDR_IN6:
+ if (sc->gif_family != AF_INET6) {
+ error = EADDRNOTAVAIL;
+ break;
+ }
+ src = (struct sockaddr_in6 *)&ifr->ifr_addr;
+ memset(src, 0, sizeof(*src));
+ src->sin6_family = AF_INET6;
+ src->sin6_len = sizeof(*src);
+ src->sin6_addr = (cmd == SIOCGIFPSRCADDR_IN6) ?
+ sc->gif_ip6hdr->ip6_src: sc->gif_ip6hdr->ip6_dst;
+ error = prison_if(curthread->td_ucred, (struct sockaddr *)src);
+ if (error == 0)
+ error = sa6_recoverscope(src);
+ if (error != 0)
+ memset(src, 0, sizeof(*src));
+ break;
+ }
+ return (error);
+}
int
in6_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn)
{
- GIF_RLOCK_TRACKER;
struct gif_softc *sc = ifp->if_softc;
struct ip6_hdr *ip6;
int len;
/* prepend new IP header */
+ MPASS(in_epoch(net_epoch_preempt));
len = sizeof(struct ip6_hdr);
#ifndef __NO_STRICT_ALIGNMENT
if (proto == IPPROTO_ETHERIP)
@@ -128,14 +265,8 @@ in6_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn)
#endif
ip6 = mtod(m, struct ip6_hdr *);
- GIF_RLOCK(sc);
- if (sc->gif_family != AF_INET6) {
- m_freem(m);
- GIF_RUNLOCK(sc);
- return (ENETDOWN);
- }
+ MPASS(sc->gif_family == AF_INET6);
bcopy(sc->gif_ip6hdr, ip6, sizeof(struct ip6_hdr));
- GIF_RUNLOCK(sc);
ip6->ip6_flow |= htonl((uint32_t)ecn << 20);
ip6->ip6_nxt = proto;
@@ -149,15 +280,14 @@ in6_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn)
}
static int
-in6_gif_input(struct mbuf **mp, int *offp, int proto)
+in6_gif_input(struct mbuf *m, int off, int proto, void *arg)
{
- struct mbuf *m = *mp;
+ struct gif_softc *sc = arg;
struct ifnet *gifp;
- struct gif_softc *sc;
struct ip6_hdr *ip6;
uint8_t ecn;
- sc = encap_getarg(m);
+ MPASS(in_epoch(net_epoch_preempt));
if (sc == NULL) {
m_freem(m);
IP6STAT_INC(ip6s_nogif);
@@ -167,7 +297,7 @@ in6_gif_input(struct mbuf **mp, int *offp, int proto)
if ((gifp->if_flags & IFF_UP) != 0) {
ip6 = mtod(m, struct ip6_hdr *);
ecn = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
- m_adj(m, *offp);
+ m_adj(m, off);
gif_input(m, gifp, proto, ecn);
} else {
m_freem(m);
@@ -176,59 +306,126 @@ in6_gif_input(struct mbuf **mp, int *offp, int proto)
return (IPPROTO_DONE);
}
-/*
- * we know that we are in IFF_UP, outer address available, and outer family
- * matched the physical addr family. see gif_encapcheck().
- */
-int
-in6_gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
+static int
+in6_gif_lookup(const struct mbuf *m, int off, int proto, void **arg)
{
const struct ip6_hdr *ip6;
struct gif_softc *sc;
int ret;
- /* sanity check done in caller */
- sc = (struct gif_softc *)arg;
- GIF_RLOCK_ASSERT(sc);
+ if (V_ipv6_hashtbl == NULL)
+ return (0);
+ MPASS(in_epoch(net_epoch_preempt));
/*
- * Check for address match. Note that the check is for an incoming
- * packet. We should compare the *source* address in our configuration
- * and the *destination* address of the packet, and vice versa.
+ * NOTE: it is safe to iterate without any locking here, because softc
+ * can be reclaimed only when we are not within net_epoch_preempt
+ * section, but ip_encap lookup+input are executed in epoch section.
*/
ip6 = mtod(m, const struct ip6_hdr *);
- if (!IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src, &ip6->ip6_dst))
+ ret = 0;
+ CK_LIST_FOREACH(sc, &GIF_HASH(&ip6->ip6_dst, &ip6->ip6_src), chain) {
+ /*
+ * This is an inbound packet, its ip6_dst is source address
+ * in softc.
+ */
+ if (IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src,
+ &ip6->ip6_dst) &&
+ IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_dst,
+ &ip6->ip6_src)) {
+ ret = ENCAP_DRV_LOOKUP;
+ goto done;
+ }
+ }
+ /*
+ * No exact match.
+ * Check the list of interfaces with GIF_IGNORE_SOURCE flag.
+ */
+ CK_LIST_FOREACH(sc, &V_ipv6_list, chain) {
+ if (IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src,
+ &ip6->ip6_dst)) {
+ ret = 128 + 8; /* src + proto */
+ goto done;
+ }
+ }
+ return (0);
+done:
+ if ((GIF2IFP(sc)->if_flags & IFF_UP) == 0)
return (0);
- ret = 128;
- if (!IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_dst, &ip6->ip6_src)) {
- if ((sc->gif_options & GIF_IGNORE_SOURCE) == 0)
- return (0);
- } else
- ret += 128;
-
/* ingress filters on outer source */
if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0) {
struct nhop6_basic nh6;
- /* XXX empty scope id */
- if (fib6_lookup_nh_basic(sc->gif_fibnum, &ip6->ip6_src, 0, 0, 0,
- &nh6) != 0)
+ if (fib6_lookup_nh_basic(sc->gif_fibnum, &ip6->ip6_src,
+ ntohs(in6_getscope(&ip6->ip6_src)), 0, 0, &nh6) != 0)
return (0);
if (nh6.nh_ifp != m->m_pkthdr.rcvif)
return (0);
}
+ *arg = sc;
return (ret);
}
-int
-in6_gif_attach(struct gif_softc *sc)
+static struct {
+ const struct encap_config encap;
+ const struct encaptab *cookie;
+} ipv6_encap_cfg[] = {
+#ifdef INET
+ {
+ .encap = {
+ .proto = IPPROTO_IPV4,
+ .min_length = sizeof(struct ip6_hdr) +
+ sizeof(struct ip),
+ .exact_match = ENCAP_DRV_LOOKUP,
+ .lookup = in6_gif_lookup,
+ .input = in6_gif_input
+ },
+ },
+#endif
+ {
+ .encap = {
+ .proto = IPPROTO_IPV6,
+ .min_length = 2 * sizeof(struct ip6_hdr),
+ .exact_match = ENCAP_DRV_LOOKUP,
+ .lookup = in6_gif_lookup,
+ .input = in6_gif_input
+ },
+ },
+ {
+ .encap = {
+ .proto = IPPROTO_ETHERIP,
+ .min_length = sizeof(struct ip6_hdr) +
+ sizeof(struct etherip_header) +
+ sizeof(struct ether_header),
+ .exact_match = ENCAP_DRV_LOOKUP,
+ .lookup = in6_gif_lookup,
+ .input = in6_gif_input
+ },
+ }
+};
+
+void
+in6_gif_init(void)
{
+ int i;
- KASSERT(sc->gif_ecookie == NULL, ("gif_ecookie isn't NULL"));
- sc->gif_ecookie = encap_attach_func(AF_INET6, -1, gif_encapcheck,
- (void *)&in6_gif_protosw, sc);
- if (sc->gif_ecookie == NULL)
- return (EEXIST);
- return (0);
+ if (!IS_DEFAULT_VNET(curvnet))
+ return;
+ for (i = 0; i < nitems(ipv6_encap_cfg); i++)
+ ipv6_encap_cfg[i].cookie = ip6_encap_attach(
+ &ipv6_encap_cfg[i].encap, NULL, M_WAITOK);
+}
+
+void
+in6_gif_uninit(void)
+{
+ int i;
+
+ if (IS_DEFAULT_VNET(curvnet)) {
+ for (i = 0; i < nitems(ipv6_encap_cfg); i++)
+ ip6_encap_detach(ipv6_encap_cfg[i].cookie);
+ }
+ if (V_ipv6_hashtbl != NULL)
+ gif_hashdestroy(V_ipv6_hashtbl);
}
diff --git a/freebsd/sys/netinet6/in6_ifattach.c b/freebsd/sys/netinet6/in6_ifattach.c
index 81182b4e..1cab31d1 100644
--- a/freebsd/sys/netinet6/in6_ifattach.c
+++ b/freebsd/sys/netinet6/in6_ifattach.c
@@ -759,7 +759,6 @@ _in6_ifdetach(struct ifnet *ifp, int purgeulp)
/*
* nuke any of IPv6 addresses we have
- * XXX: all addresses should be already removed
*/
CK_STAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
if (ifa->ifa_addr->sa_family != AF_INET6)
@@ -874,6 +873,7 @@ in6_purgemaddrs(struct ifnet *ifp)
ifma->ifma_protospec == NULL)
continue;
inm = (struct in6_multi *)ifma->ifma_protospec;
+ in6m_disconnect(inm);
in6m_rele_locked(&purgeinms, inm);
if (__predict_false(ifma6_restart)) {
ifma6_restart = false;
diff --git a/freebsd/sys/netinet6/in6_mcast.c b/freebsd/sys/netinet6/in6_mcast.c
index 32660c89..3824645d 100644
--- a/freebsd/sys/netinet6/in6_mcast.c
+++ b/freebsd/sys/netinet6/in6_mcast.c
@@ -540,6 +540,8 @@ in6m_release(struct in6_multi *inm)
CTR2(KTR_MLD, "%s: purging ifma %p", __func__, ifma);
KASSERT(ifma->ifma_protospec == NULL,
("%s: ifma_protospec != NULL", __func__));
+ if (ifp == NULL)
+ ifp = ifma->ifma_ifp;
if (ifp != NULL) {
CURVNET_SET(ifp->if_vnet);
@@ -564,8 +566,13 @@ static void in6m_init(void)
taskqgroup_config_gtask_init(NULL, &free_gtask, in6m_release_task, "in6m release task");
}
+#ifdef EARLY_AP_STARTUP
SYSINIT(in6m_init, SI_SUB_SMP + 1, SI_ORDER_FIRST,
in6m_init, NULL);
+#else
+SYSINIT(in6m_init, SI_SUB_ROOT_CONF - 1, SI_ORDER_SECOND,
+ in6m_init, NULL);
+#endif
void
@@ -589,11 +596,20 @@ in6m_disconnect(struct in6_multi *inm)
struct ifmultiaddr *ifma, *ll_ifma;
ifp = inm->in6m_ifp;
+
+ if (ifp == NULL)
+ return;
+ inm->in6m_ifp = NULL;
IF_ADDR_WLOCK_ASSERT(ifp);
ifma = inm->in6m_ifma;
+ if (ifma == NULL)
+ return;
if_ref(ifp);
- CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link);
+ if (ifma->ifma_flags & IFMA_F_ENQUEUED) {
+ CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link);
+ ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
+ }
MCDPRINTF("removed ifma: %p from %s\n", ifma, ifp->if_xname);
if ((ll_ifma = ifma->ifma_llifma) != NULL) {
MPASS(ifma != ll_ifma);
@@ -602,7 +618,10 @@ in6m_disconnect(struct in6_multi *inm)
MPASS(ll_ifma->ifma_ifp == ifp);
if (--ll_ifma->ifma_refcount == 0) {
ifma6_restart = true;
- CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link);
+ if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) {
+ CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link);
+ ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED;
+ }
MCDPRINTF("removed ll_ifma: %p from %s\n", ll_ifma, ifp->if_xname);
if_freemulti(ll_ifma);
}
@@ -629,7 +648,7 @@ in6m_release_deferred(struct in6_multi *inm)
IN6_MULTI_LIST_LOCK_ASSERT();
KASSERT(inm->in6m_refcount > 0, ("refcount == %d inm: %p", inm->in6m_refcount, inm));
if (--inm->in6m_refcount == 0) {
- in6m_disconnect(inm);
+ MPASS(inm->in6m_ifp == NULL);
SLIST_INIT(&tmp);
inm->in6m_ifma->ifma_protospec = NULL;
MPASS(inm->in6m_ifma->ifma_llifma == NULL);
@@ -1307,6 +1326,7 @@ out_in6m_release:
break;
}
}
+ in6m_disconnect(inm);
in6m_release_deferred(inm);
IF_ADDR_RUNLOCK(ifp);
} else {
@@ -1386,13 +1406,17 @@ in6_leavegroup_locked(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf)
KASSERT(error == 0, ("%s: failed to merge inm state", __func__));
CTR1(KTR_MLD, "%s: doing mld downcall", __func__);
- error = mld_change_state(inm, 0);
+ error = 0;
+ if (ifp)
+ error = mld_change_state(inm, 0);
if (error)
CTR1(KTR_MLD, "%s: failed mld downcall", __func__);
CTR2(KTR_MLD, "%s: dropping ref on %p", __func__, inm);
if (ifp)
IF_ADDR_WLOCK(ifp);
+ if (inm->in6m_refcount == 1 && inm->in6m_ifp != NULL)
+ in6m_disconnect(inm);
in6m_release_deferred(inm);
if (ifp)
IF_ADDR_WUNLOCK(ifp);
@@ -1626,16 +1650,13 @@ in6p_findmoptions(struct inpcb *inp)
*/
static void
-inp_gcmoptions(epoch_context_t ctx)
+inp_gcmoptions(struct ip6_moptions *imo)
{
- struct ip6_moptions *imo;
struct in6_mfilter *imf;
struct in6_multi *inm;
struct ifnet *ifp;
size_t idx, nmships;
- imo = __containerof(ctx, struct ip6_moptions, imo6_epoch_ctx);
-
nmships = imo->im6o_num_memberships;
for (idx = 0; idx < nmships; ++idx) {
imf = imo->im6o_mfilters ? &imo->im6o_mfilters[idx] : NULL;
@@ -1665,7 +1686,7 @@ ip6_freemoptions(struct ip6_moptions *imo)
{
if (imo == NULL)
return;
- epoch_call(net_epoch_preempt, &imo->imo6_epoch_ctx, inp_gcmoptions);
+ inp_gcmoptions(imo);
}
/*
@@ -2159,6 +2180,7 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt)
IN6_MULTI_UNLOCK();
goto out_im6o_free;
}
+ in6m_acquire(inm);
imo->im6o_membership[idx] = inm;
} else {
CTR1(KTR_MLD, "%s: merge inm state", __func__);
@@ -2193,6 +2215,12 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt)
out_im6o_free:
if (error && is_new) {
+ inm = imo->im6o_membership[idx];
+ if (inm != NULL) {
+ IN6_MULTI_LIST_LOCK();
+ in6m_release_deferred(inm);
+ IN6_MULTI_LIST_UNLOCK();
+ }
imo->im6o_membership[idx] = NULL;
--imo->im6o_num_memberships;
}
diff --git a/freebsd/sys/netinet6/in6_pcb.c b/freebsd/sys/netinet6/in6_pcb.c
index 488cca86..a30cb98b 100644
--- a/freebsd/sys/netinet6/in6_pcb.c
+++ b/freebsd/sys/netinet6/in6_pcb.c
@@ -131,6 +131,12 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
int error, lookupflags = 0;
int reuseport = (so->so_options & SO_REUSEPORT);
+ /*
+ * XXX: Maybe we could let SO_REUSEPORT_LB set SO_REUSEPORT bit here
+ * so that we don't have to add to the (already messy) code below.
+ */
+ int reuseport_lb = (so->so_options & SO_REUSEPORT_LB);
+
INP_WLOCK_ASSERT(inp);
INP_HASH_WLOCK_ASSERT(pcbinfo);
@@ -138,7 +144,7 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
return (EADDRNOTAVAIL);
if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
return (EINVAL);
- if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
+ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT|SO_REUSEPORT_LB)) == 0)
lookupflags = INPLOOKUP_WILDCARD;
if (nam == NULL) {
if ((error = prison_local_ip6(cred, &inp->in6p_laddr,
@@ -172,6 +178,13 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
*/
if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0)
reuseport = SO_REUSEADDR|SO_REUSEPORT;
+ /*
+ * XXX: How to deal with SO_REUSEPORT_LB here?
+ * Treat same as SO_REUSEPORT for now.
+ */
+ if ((so->so_options &
+ (SO_REUSEADDR|SO_REUSEPORT_LB)) != 0)
+ reuseport_lb = SO_REUSEADDR|SO_REUSEPORT_LB;
} else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
struct ifaddr *ifa;
@@ -221,7 +234,8 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) &&
(!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
!IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) ||
- (t->inp_flags2 & INP_REUSEPORT) == 0) &&
+ (t->inp_flags2 & INP_REUSEPORT) ||
+ (t->inp_flags2 & INP_REUSEPORT_LB) == 0) &&
#ifndef __rtems__
(inp->inp_cred->cr_uid !=
t->inp_cred->cr_uid))
@@ -279,9 +293,11 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
*/
tw = intotw(t);
if (tw == NULL ||
- (reuseport & tw->tw_so_options) == 0)
+ ((reuseport & tw->tw_so_options) == 0 &&
+ (reuseport_lb & tw->tw_so_options) == 0))
return (EADDRINUSE);
- } else if (t && (reuseport & inp_so_options(t)) == 0) {
+ } else if (t && (reuseport & inp_so_options(t)) == 0 &&
+ (reuseport_lb & inp_so_options(t)) == 0) {
return (EADDRINUSE);
}
#ifdef INET
@@ -291,22 +307,25 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
in6_sin6_2_sin(&sin, sin6);
t = in_pcblookup_local(pcbinfo, sin.sin_addr,
- lport, lookupflags, cred);
+ lport, lookupflags, cred);
if (t && t->inp_flags & INP_TIMEWAIT) {
tw = intotw(t);
if (tw == NULL)
return (EADDRINUSE);
if ((reuseport & tw->tw_so_options) == 0
+ && (reuseport_lb & tw->tw_so_options) == 0
&& (ntohl(t->inp_laddr.s_addr) !=
- INADDR_ANY || ((inp->inp_vflag &
- INP_IPV6PROTO) ==
- (t->inp_vflag & INP_IPV6PROTO))))
+ INADDR_ANY || ((inp->inp_vflag &
+ INP_IPV6PROTO) ==
+ (t->inp_vflag & INP_IPV6PROTO))))
return (EADDRINUSE);
} else if (t &&
(reuseport & inp_so_options(t)) == 0 &&
+ (reuseport_lb & inp_so_options(t)) == 0 &&
(ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
- (t->inp_vflag & INP_IPV6PROTO) != 0))
+ (t->inp_vflag & INP_IPV6PROTO) != 0)) {
return (EADDRINUSE);
+ }
}
#endif
}
@@ -644,7 +663,7 @@ in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst,
}
errno = inet6ctlerrmap[cmd];
INP_INFO_WLOCK(pcbinfo);
- LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) {
+ CK_LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) {
INP_WLOCK(inp);
if ((inp->inp_vflag & INP_IPV6) == 0) {
INP_WUNLOCK(inp);
@@ -721,7 +740,7 @@ in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr,
head = &pcbinfo->ipi_hashbase[INP_PCBHASH(
INP6_PCBHASHKEY(&in6addr_any), lport, 0,
pcbinfo->ipi_hashmask)];
- LIST_FOREACH(inp, head, inp_hash) {
+ CK_LIST_FOREACH(inp, head, inp_hash) {
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV6) == 0)
continue;
@@ -751,7 +770,7 @@ in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr,
*/
porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
pcbinfo->ipi_porthashmask)];
- LIST_FOREACH(phd, porthash, phd_hash) {
+ CK_LIST_FOREACH(phd, porthash, phd_hash) {
if (phd->phd_port == lport)
break;
}
@@ -760,7 +779,7 @@ in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr,
* Port is in use by one or more PCBs. Look for best
* fit.
*/
- LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
+ CK_LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
wildcard = 0;
if (cred != NULL &&
!prison_equal_ip6(cred->cr_prison,
@@ -802,7 +821,7 @@ in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
int i, gap;
INP_INFO_WLOCK(pcbinfo);
- LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) {
+ CK_LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) {
INP_WLOCK(in6p);
im6o = in6p->in6p_moptions;
if ((in6p->inp_vflag & INP_IPV6) && im6o != NULL) {
@@ -841,16 +860,10 @@ in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
* (by a redirect), time to try a default gateway again.
*/
void
-in6_losing(struct inpcb *in6p)
+in6_losing(struct inpcb *inp)
{
- if (in6p->inp_route6.ro_rt) {
- RTFREE(in6p->inp_route6.ro_rt);
- in6p->inp_route6.ro_rt = (struct rtentry *)NULL;
- }
- if (in6p->inp_route.ro_lle)
- LLE_FREE(in6p->inp_route.ro_lle); /* zeros ro_lle */
- return;
+ RO_INVALIDATE_CACHE(&inp->inp_route6);
}
/*
@@ -858,18 +871,67 @@ in6_losing(struct inpcb *in6p)
* and allocate a (hopefully) better one.
*/
struct inpcb *
-in6_rtchange(struct inpcb *inp, int errno)
+in6_rtchange(struct inpcb *inp, int errno __unused)
{
- if (inp->inp_route6.ro_rt) {
- RTFREE(inp->inp_route6.ro_rt);
- inp->inp_route6.ro_rt = (struct rtentry *)NULL;
- }
- if (inp->inp_route.ro_lle)
- LLE_FREE(inp->inp_route.ro_lle); /* zeros ro_lle */
+ RO_INVALIDATE_CACHE(&inp->inp_route6);
return inp;
}
+static struct inpcb *
+in6_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
+ const struct in6_addr *laddr, uint16_t lport, const struct in6_addr *faddr,
+ uint16_t fport, int lookupflags)
+{
+ struct inpcb *local_wild = NULL;
+ const struct inpcblbgrouphead *hdr;
+ struct inpcblbgroup *grp;
+ struct inpcblbgroup *grp_local_wild;
+ uint32_t idx;
+
+ INP_HASH_LOCK_ASSERT(pcbinfo);
+
+ hdr = &pcbinfo->ipi_lbgrouphashbase[INP_PCBLBGROUP_PORTHASH(
+ lport, pcbinfo->ipi_lbgrouphashmask)];
+
+ /*
+ * Order of socket selection:
+ * 1. non-wild.
+ * 2. wild (if lookupflags contains INPLOOKUP_WILDCARD).
+ *
+ * NOTE:
+ * - Load balanced group does not contain jailed sockets.
+ * - Load balanced does not contain IPv4 mapped INET6 wild sockets.
+ */
+ CK_LIST_FOREACH(grp, hdr, il_list) {
+#ifdef INET
+ if (!(grp->il_vflag & INP_IPV6))
+ continue;
+#endif
+ if (grp->il_lport == lport) {
+ idx = 0;
+ int pkt_hash = INP_PCBLBGROUP_PKTHASH(
+ INP6_PCBHASHKEY(faddr), lport, fport);
+
+ idx = pkt_hash % grp->il_inpcnt;
+
+ if (IN6_ARE_ADDR_EQUAL(&grp->il6_laddr, laddr)) {
+ return (grp->il_inp[idx]);
+ } else {
+ if (IN6_IS_ADDR_UNSPECIFIED(&grp->il6_laddr) &&
+ (lookupflags & INPLOOKUP_WILDCARD)) {
+ local_wild = grp->il_inp[idx];
+ grp_local_wild = grp;
+ }
+ }
+ }
+ }
+ if (local_wild != NULL) {
+ return (local_wild);
+ }
+ return (NULL);
+}
+
#ifdef PCBGROUP
/*
* Lookup PCB in hash list, using pcbgroup tables.
@@ -891,7 +953,7 @@ in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
INP_GROUP_LOCK(pcbgroup);
head = &pcbgroup->ipg_hashbase[INP_PCBHASH(
INP6_PCBHASHKEY(faddr), lport, fport, pcbgroup->ipg_hashmask)];
- LIST_FOREACH(inp, head, inp_pcbgrouphash) {
+ CK_LIST_FOREACH(inp, head, inp_pcbgrouphash) {
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV6) == 0)
continue;
@@ -932,7 +994,7 @@ in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
*/
head = &pcbgroup->ipg_hashbase[
INP_PCBHASH(INADDR_ANY, lport, 0, pcbgroup->ipg_hashmask)];
- LIST_FOREACH(inp, head, inp_pcbgrouphash) {
+ CK_LIST_FOREACH(inp, head, inp_pcbgrouphash) {
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV6) == 0)
continue;
@@ -994,7 +1056,7 @@ in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
head = &pcbinfo->ipi_wildbase[INP_PCBHASH(
INP6_PCBHASHKEY(&in6addr_any), lport, 0,
pcbinfo->ipi_wildmask)];
- LIST_FOREACH(inp, head, inp_pcbgroup_wild) {
+ CK_LIST_FOREACH(inp, head, inp_pcbgroup_wild) {
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV6) == 0)
continue;
@@ -1094,7 +1156,7 @@ in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
tmpinp = NULL;
head = &pcbinfo->ipi_hashbase[INP_PCBHASH(
INP6_PCBHASHKEY(faddr), lport, fport, pcbinfo->ipi_hashmask)];
- LIST_FOREACH(inp, head, inp_hash) {
+ CK_LIST_FOREACH(inp, head, inp_hash) {
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV6) == 0)
continue;
@@ -1117,6 +1179,18 @@ in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
return (tmpinp);
/*
+ * Then look in lb group (for wildcard match).
+ */
+ if (pcbinfo->ipi_lbgrouphashbase != NULL &&
+ (lookupflags & INPLOOKUP_WILDCARD)) {
+ inp = in6_pcblookup_lbgroup(pcbinfo, laddr, lport, faddr,
+ fport, lookupflags);
+ if (inp != NULL) {
+ return (inp);
+ }
+ }
+
+ /*
* Then look for a wildcard match, if requested.
*/
if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
@@ -1134,7 +1208,7 @@ in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
head = &pcbinfo->ipi_hashbase[INP_PCBHASH(
INP6_PCBHASHKEY(&in6addr_any), lport, 0,
pcbinfo->ipi_hashmask)];
- LIST_FOREACH(inp, head, inp_hash) {
+ CK_LIST_FOREACH(inp, head, inp_hash) {
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV6) == 0)
continue;
@@ -1192,40 +1266,35 @@ in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
struct ifnet *ifp)
{
struct inpcb *inp;
- bool locked;
INP_HASH_RLOCK(pcbinfo);
inp = in6_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport,
(lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp);
if (inp != NULL) {
- if (lookupflags & INPLOOKUP_WLOCKPCB)
- locked = INP_TRY_WLOCK(inp);
- else if (lookupflags & INPLOOKUP_RLOCKPCB)
- locked = INP_TRY_RLOCK(inp);
- else
- panic("%s: locking bug", __func__);
- if (!locked)
- in_pcbref(inp);
- INP_HASH_RUNLOCK(pcbinfo);
- if (!locked) {
- if (lookupflags & INPLOOKUP_WLOCKPCB) {
- INP_WLOCK(inp);
- if (in_pcbrele_wlocked(inp))
- return (NULL);
- } else {
- INP_RLOCK(inp);
- if (in_pcbrele_rlocked(inp))
- return (NULL);
+ if (lookupflags & INPLOOKUP_WLOCKPCB) {
+ INP_WLOCK(inp);
+ if (__predict_false(inp->inp_flags2 & INP_FREED)) {
+ INP_WUNLOCK(inp);
+ inp = NULL;
}
- }
+ } else if (lookupflags & INPLOOKUP_RLOCKPCB) {
+ INP_RLOCK(inp);
+ if (__predict_false(inp->inp_flags2 & INP_FREED)) {
+ INP_RUNLOCK(inp);
+ inp = NULL;
+ }
+ } else
+ panic("%s: locking bug", __func__);
#ifdef INVARIANTS
- if (lookupflags & INPLOOKUP_WLOCKPCB)
- INP_WLOCK_ASSERT(inp);
- else
- INP_RLOCK_ASSERT(inp);
+ if (inp != NULL) {
+ if (lookupflags & INPLOOKUP_WLOCKPCB)
+ INP_WLOCK_ASSERT(inp);
+ else
+ INP_RLOCK_ASSERT(inp);
+ }
#endif
- } else
- INP_HASH_RUNLOCK(pcbinfo);
+ }
+ INP_HASH_RUNLOCK(pcbinfo);
return (inp);
}
diff --git a/freebsd/sys/netinet6/in6_proto.c b/freebsd/sys/netinet6/in6_proto.c
index 756ea48b..cf62e60c 100644
--- a/freebsd/sys/netinet6/in6_proto.c
+++ b/freebsd/sys/netinet6/in6_proto.c
@@ -173,7 +173,7 @@ struct protosw inet6sw[] = {
.pr_type = SOCK_STREAM,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_TCP,
- .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN,
+ .pr_flags = PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD|PR_LISTEN,
.pr_input = tcp6_input,
.pr_ctlinput = tcp6_ctlinput,
.pr_ctloutput = tcp_ctloutput,
@@ -387,7 +387,9 @@ VNET_DEFINE(int, ip6_no_radr) = 0;
VNET_DEFINE(int, ip6_norbit_raif) = 0;
VNET_DEFINE(int, ip6_rfc6204w3) = 0;
VNET_DEFINE(int, ip6_maxfragpackets); /* initialized in frag6.c:frag6_init() */
-VNET_DEFINE(int, ip6_maxfrags); /* initialized in frag6.c:frag6_init() */
+int ip6_maxfrags; /* initialized in frag6.c:frag6_init() */
+VNET_DEFINE(int, ip6_maxfragbucketsize);/* initialized in frag6.c:frag6_init() */
+VNET_DEFINE(int, ip6_maxfragsperpacket); /* initialized in frag6.c:frag6_init() */
VNET_DEFINE(int, ip6_log_interval) = 5;
VNET_DEFINE(int, ip6_hdrnestlimit) = 15;/* How many header options will we
* process? */
@@ -474,6 +476,20 @@ sysctl_ip6_tempvltime(SYSCTL_HANDLER_ARGS)
return (0);
}
+static int
+sysctl_ip6_maxfragpackets(SYSCTL_HANDLER_ARGS)
+{
+ int error, val;
+
+ val = V_ip6_maxfragpackets;
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ if (error != 0 || !req->newptr)
+ return (error);
+ V_ip6_maxfragpackets = val;
+ frag6_set_bucketsize();
+ return (0);
+}
+
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_FORWARDING, forwarding,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_forwarding), 0,
"Enable forwarding of IPv6 packets between interfaces");
@@ -486,8 +502,9 @@ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFHLIM, hlim,
SYSCTL_VNET_PCPUSTAT(_net_inet6_ip6, IPV6CTL_STATS, stats, struct ip6stat,
ip6stat,
"IP6 statistics (struct ip6stat, netinet6/ip6_var.h)");
-SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets,
- CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragpackets), 0,
+SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, NULL, 0,
+ sysctl_ip6_maxfragpackets, "I",
"Default maximum number of outstanding fragmented IPv6 packets. "
"A value of 0 means no fragmented packets will be accepted, while a "
"a value of -1 means no limit");
@@ -561,8 +578,16 @@ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USE_DEFAULTZONE, use_defaultzone,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_use_defzone), 0,
"Use the default scope zone when none is specified");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags,
- CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfrags), 0,
- "Maximum allowed number of outstanding IPv6 packet fragments");
+ CTLFLAG_RW, &ip6_maxfrags, 0,
+ "Maximum allowed number of outstanding IPv6 packet fragments. "
+ "A value of 0 means no fragmented packets will be accepted, while a "
+ "a value of -1 means no limit");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGBUCKETSIZE, maxfragbucketsize,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragbucketsize), 0,
+ "Maximum number of reassembly queues per hash bucket");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGSPERPACKET, maxfragsperpacket,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragsperpacket), 0,
+ "Maximum allowed number of fragments per packet");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MCAST_PMTU, mcast_pmtu,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_mcast_pmtu), 0,
"Enable path MTU discovery for multicast packets");
diff --git a/freebsd/sys/netinet6/in6_rmx.c b/freebsd/sys/netinet6/in6_rmx.c
index 402d9e87..38c89b9b 100644
--- a/freebsd/sys/netinet6/in6_rmx.c
+++ b/freebsd/sys/netinet6/in6_rmx.c
@@ -161,7 +161,7 @@ struct mtuex_arg {
struct rib_head *rnh;
time_t nextstop;
};
-static VNET_DEFINE(struct callout, rtq_mtutimer);
+VNET_DEFINE_STATIC(struct callout, rtq_mtutimer);
#define V_rtq_mtutimer VNET(rtq_mtutimer)
static int
@@ -211,7 +211,7 @@ in6_mtutimo(void *rock)
/*
* Initialize our routing tree.
*/
-static VNET_DEFINE(int, _in6_rt_was_here);
+VNET_DEFINE_STATIC(int, _in6_rt_was_here);
#define V__in6_rt_was_here VNET(_in6_rt_was_here)
int
diff --git a/freebsd/sys/netinet6/in6_src.c b/freebsd/sys/netinet6/in6_src.c
index 92f7df4e..1cb71b88 100644
--- a/freebsd/sys/netinet6/in6_src.c
+++ b/freebsd/sys/netinet6/in6_src.c
@@ -129,7 +129,7 @@ static struct sx addrsel_sxlock;
#define ADDRSEL_XUNLOCK() sx_xunlock(&addrsel_sxlock)
#define ADDR_LABEL_NOTAPP (-1)
-static VNET_DEFINE(struct in6_addrpolicy, defaultaddrpolicy);
+VNET_DEFINE_STATIC(struct in6_addrpolicy, defaultaddrpolicy);
#define V_defaultaddrpolicy VNET(defaultaddrpolicy)
VNET_DEFINE(int, ip6_prefer_tempaddr) = 0;
@@ -975,7 +975,7 @@ in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct ucred *cred)
return(error);
/* XXX: this is redundant when called from in6_pcbbind */
- if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
+ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT|SO_REUSEPORT_LB)) == 0)
lookupflags = INPLOOKUP_WILDCARD;
inp->inp_flags |= INP_ANONPORT;
@@ -1096,7 +1096,7 @@ struct addrsel_policyent {
TAILQ_HEAD(addrsel_policyhead, addrsel_policyent);
-static VNET_DEFINE(struct addrsel_policyhead, addrsel_policytab);
+VNET_DEFINE_STATIC(struct addrsel_policyhead, addrsel_policytab);
#define V_addrsel_policytab VNET(addrsel_policytab)
static void
diff --git a/freebsd/sys/netinet6/in6_var.h b/freebsd/sys/netinet6/in6_var.h
index 6b4fe1ab..5ed0ae90 100644
--- a/freebsd/sys/netinet6/in6_var.h
+++ b/freebsd/sys/netinet6/in6_var.h
@@ -784,7 +784,7 @@ in6m_rele_locked(struct in6_multi_head *inmh, struct in6_multi *inm)
IN6_MULTI_LIST_LOCK_ASSERT();
if (--inm->in6m_refcount == 0) {
- in6m_disconnect(inm);
+ MPASS(inm->in6m_ifp == NULL);
inm->in6m_ifma->ifma_protospec = NULL;
MPASS(inm->in6m_ifma->ifma_llifma == NULL);
SLIST_INSERT_HEAD(inmh, inm, in6m_nrele);
diff --git a/freebsd/sys/netinet6/ip6_input.c b/freebsd/sys/netinet6/ip6_input.c
index 77e32da8..25ab624c 100644
--- a/freebsd/sys/netinet6/ip6_input.c
+++ b/freebsd/sys/netinet6/ip6_input.c
@@ -722,13 +722,15 @@ ip6_input(struct mbuf *m)
#endif
/*
* Try to forward the packet, but if we fail continue.
+ * ip6_tryforward() does not generate redirects, so fall
+ * through to normal processing if redirects are required.
* ip6_tryforward() does inbound and outbound packet firewall
* processing. If firewall has decided that destination becomes
* our local address, it sets M_FASTFWD_OURS flag. In this
* case skip another inbound firewall processing and update
* ip6 pointer.
*/
- if (V_ip6_forwarding != 0
+ if (V_ip6_forwarding != 0 && V_ip6_sendredirects == 0
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
&& (!IPSEC_ENABLED(ipv6) ||
IPSEC_CAPS(ipv6, m, IPSEC_CAP_OPERABLE) == 0)
diff --git a/freebsd/sys/netinet6/ip6_mroute.c b/freebsd/sys/netinet6/ip6_mroute.c
index a4a8cdf9..c1f66028 100644
--- a/freebsd/sys/netinet6/ip6_mroute.c
+++ b/freebsd/sys/netinet6/ip6_mroute.c
@@ -111,7 +111,6 @@ __FBSDID("$FreeBSD$");
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_types.h>
-#include <net/raw_cb.h>
#include <net/vnet.h>
#include <netinet/in.h>
@@ -141,19 +140,19 @@ extern int in6_mcast_loop;
extern struct domain inet6domain;
static const struct encaptab *pim6_encap_cookie;
-static const struct protosw in6_pim_protosw = {
- .pr_type = SOCK_RAW,
- .pr_domain = &inet6domain,
- .pr_protocol = IPPROTO_PIM,
- .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
- .pr_input = pim6_input,
- .pr_output = rip6_output,
- .pr_ctloutput = rip6_ctloutput,
- .pr_usrreqs = &rip6_usrreqs
-};
static int pim6_encapcheck(const struct mbuf *, int, int, void *);
+static int pim6_input(struct mbuf *, int, int, void *);
+
+static const struct encap_config ipv6_encap_cfg = {
+ .proto = IPPROTO_PIM,
+ .min_length = sizeof(struct ip6_hdr) + PIM_MINLEN,
+ .exact_match = 8,
+ .check = pim6_encapcheck,
+ .input = pim6_input
+};
-static VNET_DEFINE(int, ip6_mrouter_ver) = 0;
+
+VNET_DEFINE_STATIC(int, ip6_mrouter_ver) = 0;
#define V_ip6_mrouter_ver VNET(ip6_mrouter_ver)
SYSCTL_DECL(_net_inet6);
@@ -238,7 +237,7 @@ static struct mtx mif6_mtx;
#define MIF6_LOCK_DESTROY() mtx_destroy(&mif6_mtx)
#ifdef MRT6DEBUG
-static VNET_DEFINE(u_int, mrt6debug) = 0; /* debug level */
+VNET_DEFINE_STATIC(u_int, mrt6debug) = 0; /* debug level */
#define V_mrt6debug VNET(mrt6debug)
#define DEBUG_MFC 0x02
#define DEBUG_FORWARD 0x04
@@ -291,7 +290,7 @@ SYSCTL_STRUCT(_net_inet6_pim, PIM6CTL_STATS, stats, CTLFLAG_RW,
"PIM Statistics (struct pim6stat, netinet6/pim6_var.h)");
#define PIM6STAT_INC(name) pim6stat.name += 1
-static VNET_DEFINE(int, pim6);
+VNET_DEFINE_STATIC(int, pim6);
#define V_pim6 VNET(pim6)
/*
@@ -1697,16 +1696,12 @@ register_send(struct ip6_hdr *ip6, struct mif6 *mif, struct mbuf *m)
* into the kernel.
*/
static int
-pim6_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
+pim6_encapcheck(const struct mbuf *m __unused, int off __unused,
+ int proto __unused, void *arg __unused)
{
-#ifdef DIAGNOSTIC
KASSERT(proto == IPPROTO_PIM, ("not for IPPROTO_PIM"));
-#endif
- if (proto != IPPROTO_PIM)
- return 0; /* not for us; reject the datagram. */
-
- return 64; /* claim the datagram. */
+ return (8); /* claim the datagram. */
}
/*
@@ -1716,20 +1711,18 @@ pim6_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
* The only message processed is the REGISTER pim message; the pim header
* is stripped off, and the inner packet is passed to register_mforward.
*/
-int
-pim6_input(struct mbuf **mp, int *offp, int proto)
+static int
+pim6_input(struct mbuf *m, int off, int proto, void *arg __unused)
{
struct pim *pim; /* pointer to a pim struct */
struct ip6_hdr *ip6;
int pimlen;
- struct mbuf *m = *mp;
int minlen;
- int off = *offp;
PIM6STAT_INC(pim6s_rcv_total);
ip6 = mtod(m, struct ip6_hdr *);
- pimlen = m->m_pkthdr.len - *offp;
+ pimlen = m->m_pkthdr.len - off;
/*
* Validate lengths
@@ -1906,8 +1899,7 @@ pim6_input(struct mbuf **mp, int *offp, int proto)
* encapsulated ip6 header.
*/
pim6_input_to_daemon:
- rip6_input(&m, offp, proto);
- return (IPPROTO_DONE);
+ return (rip6_input(&m, &off, proto));
}
static int
@@ -1920,9 +1912,8 @@ ip6_mroute_modevent(module_t mod, int type, void *unused)
MFC6_LOCK_INIT();
MIF6_LOCK_INIT();
- pim6_encap_cookie = encap_attach_func(AF_INET6, IPPROTO_PIM,
- pim6_encapcheck,
- (const struct protosw *)&in6_pim_protosw, NULL);
+ pim6_encap_cookie = ip6_encap_attach(&ipv6_encap_cfg,
+ NULL, M_WAITOK);
if (pim6_encap_cookie == NULL) {
printf("ip6_mroute: unable to attach pim6 encap\n");
MIF6_LOCK_DESTROY();
@@ -1943,7 +1934,7 @@ ip6_mroute_modevent(module_t mod, int type, void *unused)
return EINVAL;
if (pim6_encap_cookie) {
- encap_detach(pim6_encap_cookie);
+ ip6_encap_detach(pim6_encap_cookie);
pim6_encap_cookie = NULL;
}
X_ip6_mrouter_done();
diff --git a/freebsd/sys/netinet6/ip6_output.c b/freebsd/sys/netinet6/ip6_output.c
index 1841829a..d3e530a6 100644
--- a/freebsd/sys/netinet6/ip6_output.c
+++ b/freebsd/sys/netinet6/ip6_output.c
@@ -201,18 +201,10 @@ in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset)
csum = 0xffff;
offset += m->m_pkthdr.csum_data; /* checksum offset */
- if (offset + sizeof(u_short) > m->m_len) {
- printf("%s: delayed m_pullup, m->len: %d plen %u off %u "
- "csum_flags=%b\n", __func__, m->m_len, plen, offset,
- (int)m->m_pkthdr.csum_flags, CSUM_BITS);
- /*
- * XXX this should not happen, but if it does, the correct
- * behavior may be to insert the checksum in the appropriate
- * next mbuf in the chain.
- */
- return;
- }
- *(u_short *)(m->m_data + offset) = csum;
+ if (offset + sizeof(csum) > m->m_len)
+ m_copyback(m, offset, sizeof(csum), (caddr_t)&csum);
+ else
+ *(u_short *)mtodo(m, offset) = csum;
}
int
@@ -814,22 +806,16 @@ again:
error = netisr_queue(NETISR_IPV6, m);
goto done;
} else {
- RO_RTFREE(ro);
+ RO_INVALIDATE_CACHE(ro);
needfiblookup = 1; /* Redo the routing table lookup. */
- if (ro->ro_lle)
- LLE_FREE(ro->ro_lle); /* zeros ro_lle */
- ro->ro_lle = NULL;
}
}
/* See if fib was changed by packet filter. */
if (fibnum != M_GETFIB(m)) {
m->m_flags |= M_SKIP_FIREWALL;
fibnum = M_GETFIB(m);
- RO_RTFREE(ro);
+ RO_INVALIDATE_CACHE(ro);
needfiblookup = 1;
- if (ro->ro_lle)
- LLE_FREE(ro->ro_lle); /* zeros ro_lle */
- ro->ro_lle = NULL;
}
if (needfiblookup)
goto again;
@@ -1456,6 +1442,15 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
INP_WUNLOCK(in6p);
error = 0;
break;
+ case SO_REUSEPORT_LB:
+ INP_WLOCK(in6p);
+ if ((so->so_options & SO_REUSEPORT_LB) != 0)
+ in6p->inp_flags2 |= INP_REUSEPORT_LB;
+ else
+ in6p->inp_flags2 &= ~INP_REUSEPORT_LB;
+ INP_WUNLOCK(in6p);
+ error = 0;
+ break;
case SO_SETFIB:
INP_WLOCK(in6p);
in6p->inp_inc.inc_fibnum = so->so_fibnum;
@@ -1637,11 +1632,17 @@ do { \
error = EINVAL;
break;
}
+ INP_WLOCK(in6p);
+ if (in6p->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+ INP_WUNLOCK(in6p);
+ return (ECONNRESET);
+ }
optp = &in6p->in6p_outputopts;
error = ip6_pcbopt(IPV6_HOPLIMIT,
(u_char *)&optval, sizeof(optval),
optp, (td != NULL) ? td->td_ucred :
NULL, uproto);
+ INP_WUNLOCK(in6p);
break;
}
@@ -1751,11 +1752,17 @@ do { \
break;
{
struct ip6_pktopts **optp;
+ INP_WLOCK(in6p);
+ if (in6p->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+ INP_WUNLOCK(in6p);
+ return (ECONNRESET);
+ }
optp = &in6p->in6p_outputopts;
error = ip6_pcbopt(optname,
(u_char *)&optval, sizeof(optval),
optp, (td != NULL) ? td->td_ucred :
NULL, uproto);
+ INP_WUNLOCK(in6p);
break;
}
@@ -1837,10 +1844,16 @@ do { \
break;
optlen = sopt->sopt_valsize;
optbuf = optbuf_storage;
+ INP_WLOCK(in6p);
+ if (in6p->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+ INP_WUNLOCK(in6p);
+ return (ECONNRESET);
+ }
optp = &in6p->in6p_outputopts;
error = ip6_pcbopt(optname, optbuf, optlen,
optp, (td != NULL) ? td->td_ucred : NULL,
uproto);
+ INP_WUNLOCK(in6p);
break;
}
#undef OPTSET
@@ -2287,7 +2300,9 @@ ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
if (*pktopt == NULL) {
*pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
- M_WAITOK);
+ M_NOWAIT);
+ if (*pktopt == NULL)
+ return (ENOBUFS);
ip6_initpktopts(*pktopt);
}
opt = *pktopt;
diff --git a/freebsd/sys/netinet6/ip6_var.h b/freebsd/sys/netinet6/ip6_var.h
index 74b5f89c..f235572d 100644
--- a/freebsd/sys/netinet6/ip6_var.h
+++ b/freebsd/sys/netinet6/ip6_var.h
@@ -301,8 +301,10 @@ VNET_DECLARE(struct socket *, ip6_mrouter); /* multicast routing daemon */
VNET_DECLARE(int, ip6_sendredirects); /* send IP redirects when forwarding? */
VNET_DECLARE(int, ip6_maxfragpackets); /* Maximum packets in reassembly
* queue */
-VNET_DECLARE(int, ip6_maxfrags); /* Maximum fragments in reassembly
+extern int ip6_maxfrags; /* Maximum fragments in reassembly
* queue */
+VNET_DECLARE(int, ip6_maxfragbucketsize); /* Maximum reassembly queues per bucket */
+VNET_DECLARE(int, ip6_maxfragsperpacket); /* Maximum fragments per packet */
VNET_DECLARE(int, ip6_accept_rtadv); /* Acts as a host not a router */
VNET_DECLARE(int, ip6_no_radr); /* No defroute from RA */
VNET_DECLARE(int, ip6_norbit_raif); /* Disable R-bit in NA on RA
@@ -317,7 +319,8 @@ VNET_DECLARE(int, ip6_dad_count); /* DupAddrDetectionTransmits */
#define V_ip6_mrouter VNET(ip6_mrouter)
#define V_ip6_sendredirects VNET(ip6_sendredirects)
#define V_ip6_maxfragpackets VNET(ip6_maxfragpackets)
-#define V_ip6_maxfrags VNET(ip6_maxfrags)
+#define V_ip6_maxfragbucketsize VNET(ip6_maxfragbucketsize)
+#define V_ip6_maxfragsperpacket VNET(ip6_maxfragsperpacket)
#define V_ip6_accept_rtadv VNET(ip6_accept_rtadv)
#define V_ip6_no_radr VNET(ip6_no_radr)
#define V_ip6_norbit_raif VNET(ip6_norbit_raif)
@@ -404,6 +407,7 @@ int ip6_fragment(struct ifnet *, struct mbuf *, int, u_char, int,
int route6_input(struct mbuf **, int *, int);
+void frag6_set_bucketsize(void);
void frag6_init(void);
int frag6_input(struct mbuf **, int *, int);
void frag6_slowtimo(void);
diff --git a/freebsd/sys/netinet6/mld6.c b/freebsd/sys/netinet6/mld6.c
index 0c82d5ff..b00f03ef 100644
--- a/freebsd/sys/netinet6/mld6.c
+++ b/freebsd/sys/netinet6/mld6.c
@@ -209,11 +209,11 @@ static MALLOC_DEFINE(M_MLD, "mld", "mld state");
/*
* VIMAGE-wide globals.
*/
-static VNET_DEFINE(struct timeval, mld_gsrdelay) = {10, 0};
-static VNET_DEFINE(LIST_HEAD(, mld_ifsoftc), mli_head);
-static VNET_DEFINE(int, interface_timers_running6);
-static VNET_DEFINE(int, state_change_timers_running6);
-static VNET_DEFINE(int, current_state_timers_running6);
+VNET_DEFINE_STATIC(struct timeval, mld_gsrdelay) = {10, 0};
+VNET_DEFINE_STATIC(LIST_HEAD(, mld_ifsoftc), mli_head);
+VNET_DEFINE_STATIC(int, interface_timers_running6);
+VNET_DEFINE_STATIC(int, state_change_timers_running6);
+VNET_DEFINE_STATIC(int, current_state_timers_running6);
#define V_mld_gsrdelay VNET(mld_gsrdelay)
#define V_mli_head VNET(mli_head)
@@ -559,6 +559,7 @@ mld_ifdetach(struct ifnet *ifp)
continue;
inm = (struct in6_multi *)ifma->ifma_protospec;
if (inm->in6m_state == MLD_LEAVING_MEMBER) {
+ in6m_disconnect(inm);
in6m_rele_locked(&inmh, inm);
ifma->ifma_protospec = NULL;
}
@@ -1485,6 +1486,7 @@ mld_v1_process_group_timer(struct in6_multi_head *inmh, struct in6_multi *inm)
case MLD_REPORTING_MEMBER:
if (report_timer_expired) {
inm->in6m_state = MLD_IDLE_MEMBER;
+ in6m_disconnect(inm);
in6m_rele_locked(inmh, inm);
}
break;
@@ -1609,6 +1611,7 @@ mld_v2_process_group_timers(struct in6_multi_head *inmh,
if (inm->in6m_state == MLD_LEAVING_MEMBER &&
inm->in6m_scrv == 0) {
inm->in6m_state = MLD_NOT_MEMBER;
+ in6m_disconnect(inm);
in6m_rele_locked(inmh, inm);
}
}
@@ -1681,7 +1684,8 @@ mld_v2_cancel_link_timers(struct mld_ifsoftc *mli)
IF_ADDR_WLOCK(ifp);
restart:
CK_STAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) {
- if (ifma->ifma_addr->sa_family != AF_INET6)
+ if (ifma->ifma_addr->sa_family != AF_INET6 ||
+ ifma->ifma_protospec == NULL)
continue;
inm = (struct in6_multi *)ifma->ifma_protospec;
switch (inm->in6m_state) {
@@ -1698,6 +1702,7 @@ mld_v2_cancel_link_timers(struct mld_ifsoftc *mli)
* version, we need to release the final
* reference held for issuing the INCLUDE {}.
*/
+ in6m_disconnect(inm);
in6m_rele_locked(&inmh, inm);
ifma->ifma_protospec = NULL;
/* FALLTHROUGH */
@@ -1795,8 +1800,11 @@ mld_v1_transmit_report(struct in6_multi *in6m, const int type)
IN6_MULTI_LIST_LOCK_ASSERT();
MLD_LOCK_ASSERT();
-
+
ifp = in6m->in6m_ifp;
+ /* in process of being freed */
+ if (ifp == NULL)
+ return (0);
ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
/* ia may be NULL if link-local address is tentative. */
@@ -1894,16 +1902,15 @@ mld_change_state(struct in6_multi *inm, const int delay)
*/
KASSERT(inm->in6m_ifma != NULL, ("%s: no ifma", __func__));
ifp = inm->in6m_ifma->ifma_ifp;
- if (ifp != NULL) {
- /*
- * Sanity check that netinet6's notion of ifp is the
- * same as net's.
- */
- KASSERT(inm->in6m_ifp == ifp, ("%s: bad ifp", __func__));
- }
+ if (ifp == NULL)
+ return (0);
+ /*
+ * Sanity check that netinet6's notion of ifp is the
+ * same as net's.
+ */
+ KASSERT(inm->in6m_ifp == ifp, ("%s: bad ifp", __func__));
MLD_LOCK();
-
mli = MLD_IFINFO(ifp);
KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp));
@@ -1997,9 +2004,9 @@ mld_initial_join(struct in6_multi *inm, struct mld_ifsoftc *mli,
* group around for the final INCLUDE {} enqueue.
*/
if (mli->mli_version == MLD_VERSION_2 &&
- inm->in6m_state == MLD_LEAVING_MEMBER)
- in6m_release_deferred(inm);
-
+ inm->in6m_state == MLD_LEAVING_MEMBER) {
+ inm->in6m_refcount--;
+ }
inm->in6m_state = MLD_REPORTING_MEMBER;
switch (mli->mli_version) {
diff --git a/freebsd/sys/netinet6/nd6.c b/freebsd/sys/netinet6/nd6.c
index 6a36803f..f065815c 100644
--- a/freebsd/sys/netinet6/nd6.c
+++ b/freebsd/sys/netinet6/nd6.c
@@ -100,11 +100,11 @@ VNET_DEFINE(int, nd6_gctimer) = (60 * 60 * 24); /* 1 day: garbage
* collection timer */
/* preventing too many loops in ND option parsing */
-static VNET_DEFINE(int, nd6_maxndopt) = 10; /* max # of ND options allowed */
+VNET_DEFINE_STATIC(int, nd6_maxndopt) = 10; /* max # of ND options allowed */
VNET_DEFINE(int, nd6_maxnudhint) = 0; /* max # of subsequent upper
* layer hints */
-static VNET_DEFINE(int, nd6_maxqueuelen) = 1; /* max pkts cached in unresolved
+VNET_DEFINE_STATIC(int, nd6_maxqueuelen) = 1; /* max pkts cached in unresolved
* ND entries */
#define V_nd6_maxndopt VNET(nd6_maxndopt)
#define V_nd6_maxqueuelen VNET(nd6_maxqueuelen)
@@ -144,7 +144,7 @@ static int nd6_resolve_slow(struct ifnet *, int, struct mbuf *,
static int nd6_need_cache(struct ifnet *);
-static VNET_DEFINE(struct callout, nd6_slowtimo_ch);
+VNET_DEFINE_STATIC(struct callout, nd6_slowtimo_ch);
#define V_nd6_slowtimo_ch VNET(nd6_slowtimo_ch)
VNET_DEFINE(struct callout, nd6_timer_ch);
diff --git a/freebsd/sys/netinet6/nd6_nbr.c b/freebsd/sys/netinet6/nd6_nbr.c
index d4ab38af..49810020 100644
--- a/freebsd/sys/netinet6/nd6_nbr.c
+++ b/freebsd/sys/netinet6/nd6_nbr.c
@@ -101,7 +101,7 @@ static void nd6_na_output_fib(struct ifnet *, const struct in6_addr *,
static void nd6_ns_output_fib(struct ifnet *, const struct in6_addr *,
const struct in6_addr *, const struct in6_addr *, uint8_t *, u_int);
-static VNET_DEFINE(int, dad_enhanced) = 1;
+VNET_DEFINE_STATIC(int, dad_enhanced) = 1;
#define V_dad_enhanced VNET(dad_enhanced)
SYSCTL_DECL(_net_inet6_ip6);
@@ -109,7 +109,7 @@ SYSCTL_INT(_net_inet6_ip6, OID_AUTO, dad_enhanced, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(dad_enhanced), 0,
"Enable Enhanced DAD, which adds a random nonce to NS messages for DAD.");
-static VNET_DEFINE(int, dad_maxtry) = 15; /* max # of *tries* to
+VNET_DEFINE_STATIC(int, dad_maxtry) = 15; /* max # of *tries* to
transmit DAD packet */
#define V_dad_maxtry VNET(dad_maxtry)
@@ -1122,8 +1122,8 @@ struct dadq {
bool dad_ondadq; /* on dadq? Protected by DADQ_WLOCK. */
};
-static VNET_DEFINE(TAILQ_HEAD(, dadq), dadq);
-static VNET_DEFINE(struct rwlock, dad_rwlock);
+VNET_DEFINE_STATIC(TAILQ_HEAD(, dadq), dadq);
+VNET_DEFINE_STATIC(struct rwlock, dad_rwlock);
#define V_dadq VNET(dadq)
#define V_dad_rwlock VNET(dad_rwlock)
diff --git a/freebsd/sys/netinet6/nd6_rtr.c b/freebsd/sys/netinet6/nd6_rtr.c
index fab7c7c2..a60e7c66 100644
--- a/freebsd/sys/netinet6/nd6_rtr.c
+++ b/freebsd/sys/netinet6/nd6_rtr.c
@@ -96,7 +96,7 @@ static int rt6_deleteroute(const struct rtentry *, void *);
VNET_DECLARE(int, nd6_recalc_reachtm_interval);
#define V_nd6_recalc_reachtm_interval VNET(nd6_recalc_reachtm_interval)
-static VNET_DEFINE(struct ifnet *, nd6_defifp);
+VNET_DEFINE_STATIC(struct ifnet *, nd6_defifp);
VNET_DEFINE(int, nd6_defifindex);
#define V_nd6_defifp VNET(nd6_defifp)
diff --git a/freebsd/sys/netinet6/pim6_var.h b/freebsd/sys/netinet6/pim6_var.h
index 7afe89b9..7288c67e 100644
--- a/freebsd/sys/netinet6/pim6_var.h
+++ b/freebsd/sys/netinet6/pim6_var.h
@@ -53,10 +53,6 @@ struct pim6stat {
uint64_t pim6s_snd_registers; /* sent registers */
};
-#if (defined(KERNEL)) || (defined(_KERNEL))
-int pim6_input(struct mbuf **, int*, int);
-#endif /* KERNEL */
-
/*
* Identifiers for PIM sysctl nodes
*/
diff --git a/freebsd/sys/netinet6/raw_ip6.c b/freebsd/sys/netinet6/raw_ip6.c
index c05399b3..9c3d7a61 100644
--- a/freebsd/sys/netinet6/raw_ip6.c
+++ b/freebsd/sys/netinet6/raw_ip6.c
@@ -167,6 +167,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
struct inpcb *last = NULL;
struct mbuf *opts = NULL;
struct sockaddr_in6 fromsa;
+ struct epoch_tracker et;
RIP6STAT_INC(rip6s_ipackets);
@@ -174,8 +175,8 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
ifp = m->m_pkthdr.rcvif;
- INP_INFO_RLOCK(&V_ripcbinfo);
- LIST_FOREACH(in6p, &V_ripcb, inp_list) {
+ INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
+ CK_LIST_FOREACH(in6p, &V_ripcb, inp_list) {
/* XXX inp locking */
if ((in6p->inp_vflag & INP_IPV6) == 0)
continue;
@@ -293,7 +294,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
}
last = in6p;
}
- INP_INFO_RUNLOCK(&V_ripcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
/*
* Check AH/ESP integrity.
diff --git a/freebsd/sys/netinet6/scope6.c b/freebsd/sys/netinet6/scope6.c
index 40218287..64b866dd 100644
--- a/freebsd/sys/netinet6/scope6.c
+++ b/freebsd/sys/netinet6/scope6.c
@@ -78,7 +78,7 @@ static struct mtx scope6_lock;
#define SCOPE6_UNLOCK() mtx_unlock(&scope6_lock)
#define SCOPE6_LOCK_ASSERT() mtx_assert(&scope6_lock, MA_OWNED)
-static VNET_DEFINE(struct scope6_id, sid_default);
+VNET_DEFINE_STATIC(struct scope6_id, sid_default);
#define V_sid_default VNET(sid_default)
#define SID(ifp) \
@@ -455,7 +455,7 @@ in6_clearscope(struct in6_addr *in6)
* Return the scope identifier or zero.
*/
uint16_t
-in6_getscope(struct in6_addr *in6)
+in6_getscope(const struct in6_addr *in6)
{
if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6))
diff --git a/freebsd/sys/netinet6/scope6_var.h b/freebsd/sys/netinet6/scope6_var.h
index a2a9137d..f4e59a19 100644
--- a/freebsd/sys/netinet6/scope6_var.h
+++ b/freebsd/sys/netinet6/scope6_var.h
@@ -63,7 +63,7 @@ int sa6_checkzone(struct sockaddr_in6 *);
int sa6_checkzone_ifp(struct ifnet *, struct sockaddr_in6 *);
int in6_setscope(struct in6_addr *, struct ifnet *, u_int32_t *);
int in6_clearscope(struct in6_addr *);
-uint16_t in6_getscope(struct in6_addr *);
+uint16_t in6_getscope(const struct in6_addr *);
uint32_t in6_getscopezone(const struct ifnet *, int);
void in6_splitscope(const struct in6_addr *, struct in6_addr *, uint32_t *);
struct ifnet* in6_getlinkifnet(uint32_t);
diff --git a/freebsd/sys/netinet6/sctp6_usrreq.c b/freebsd/sys/netinet6/sctp6_usrreq.c
index fd963fb3..6a3391ee 100644
--- a/freebsd/sys/netinet6/sctp6_usrreq.c
+++ b/freebsd/sys/netinet6/sctp6_usrreq.c
@@ -273,6 +273,7 @@ sctp6_ctlinput(int cmd, struct sockaddr *pktdst, void *d)
pktdst->sa_len != sizeof(struct sockaddr_in6)) {
return;
}
+
if ((unsigned)cmd >= PRC_NCMDS) {
return;
}
@@ -296,6 +297,7 @@ sctp6_ctlinput(int cmd, struct sockaddr *pktdst, void *d)
if (ip6cp->ip6c_m == NULL) {
return;
}
+
/*
* Check if we can safely examine the ports and the
* verification tag of the SCTP common header.
@@ -304,6 +306,7 @@ sctp6_ctlinput(int cmd, struct sockaddr *pktdst, void *d)
(int32_t)(ip6cp->ip6c_off + offsetof(struct sctphdr, checksum))) {
return;
}
+
/* Copy out the port numbers and the verification tag. */
memset(&sh, 0, sizeof(sh));
m_copydata(ip6cp->ip6c_m,
@@ -529,6 +532,7 @@ sctp6_attach(struct socket *so, int proto SCTP_UNUSED, struct thread *p SCTP_UNU
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL);
return (EINVAL);
}
+
if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
error = SCTP_SORESERVE(so, SCTP_BASE_SYSCTL(sctp_sendspace), SCTP_BASE_SYSCTL(sctp_recvspace));
if (error)
@@ -569,6 +573,7 @@ sctp6_bind(struct socket *so, struct sockaddr *addr, struct thread *p)
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL);
return (EINVAL);
}
+
if (addr) {
switch (addr->sa_family) {
#ifdef INET
@@ -918,7 +923,7 @@ sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
/* Set the connected flag so we can queue data */
soisconnecting(so);
}
- stcb->asoc.state = SCTP_STATE_COOKIE_WAIT;
+ SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT);
(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
/* initialize authentication parameters for the assoc */
@@ -1105,6 +1110,7 @@ sctp6_in6getaddr(struct socket *so, struct sockaddr **nam)
SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL);
return (EINVAL);
}
+
/* allow v6 addresses precedence */
error = sctp6_getaddr(so, nam);
#ifdef INET
@@ -1140,6 +1146,7 @@ sctp6_getpeeraddr(struct socket *so, struct sockaddr **nam)
SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL);
return (EINVAL);
}
+
/* allow v6 addresses precedence */
error = sctp6_peeraddr(so, nam);
#ifdef INET
diff --git a/freebsd/sys/netinet6/sctp6_var.h b/freebsd/sys/netinet6/sctp6_var.h
index e3c4359a..4ad0ca28 100644
--- a/freebsd/sys/netinet6/sctp6_var.h
+++ b/freebsd/sys/netinet6/sctp6_var.h
@@ -45,11 +45,11 @@ extern struct pr_usrreqs sctp6_usrreqs;
int sctp6_input(struct mbuf **, int *, int);
int sctp6_input_with_port(struct mbuf **, int *, uint16_t);
-int
+int
sctp6_output(struct sctp_inpcb *, struct mbuf *, struct sockaddr *,
struct mbuf *, struct proc *);
void sctp6_ctlinput(int, struct sockaddr *, void *);
-void
+void
sctp6_notify(struct sctp_inpcb *, struct sctp_tcb *, struct sctp_nets *,
uint8_t, uint8_t, uint32_t);
#endif
diff --git a/freebsd/sys/netinet6/udp6_usrreq.c b/freebsd/sys/netinet6/udp6_usrreq.c
index c2b32eb1..67ed0e35 100644
--- a/freebsd/sys/netinet6/udp6_usrreq.c
+++ b/freebsd/sys/netinet6/udp6_usrreq.c
@@ -216,6 +216,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
int off = *offp;
int cscov_partial;
int plen, ulen;
+ struct epoch_tracker et;
struct sockaddr_in6 fromsa[2];
struct m_tag *fwd_tag;
uint16_t uh_sum;
@@ -302,7 +303,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
struct inpcbhead *pcblist;
struct ip6_moptions *imo;
- INP_INFO_RLOCK(pcbinfo);
+ INP_INFO_RLOCK_ET(pcbinfo, et);
/*
* In the event that laddr should be set to the link-local
* address (this happens in RIPng), the multicast address
@@ -320,7 +321,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
*/
pcblist = udp_get_pcblist(nxt);
last = NULL;
- LIST_FOREACH(inp, pcblist, inp_list) {
+ CK_LIST_FOREACH(inp, pcblist, inp_list) {
if ((inp->inp_vflag & INP_IPV6) == 0)
continue;
if (inp->inp_lport != uh->uh_dport)
@@ -357,6 +358,10 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
int blocked;
INP_RLOCK(inp);
+ if (__predict_false(inp->inp_flags2 & INP_FREED)) {
+ INP_RUNLOCK(inp);
+ continue;
+ }
bzero(&mcaddr, sizeof(struct sockaddr_in6));
mcaddr.sin6_len = sizeof(struct sockaddr_in6);
@@ -384,10 +389,16 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) !=
NULL) {
INP_RLOCK(last);
- UDP_PROBE(receive, NULL, last, ip6,
- last, uh);
- if (udp6_append(last, n, off, fromsa))
- goto inp_lost;
+ if (__predict_true(last->inp_flags2 & INP_FREED) == 0) {
+ if (nxt == IPPROTO_UDPLITE)
+ UDPLITE_PROBE(receive, NULL, last,
+ ip6, last, uh);
+ else
+ UDP_PROBE(receive, NULL, last,
+ ip6, last, uh);
+ if (udp6_append(last, n, off, fromsa))
+ goto inp_lost;
+ }
INP_RUNLOCK(last);
}
}
@@ -401,7 +412,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
* will never clear these options after setting them.
*/
if ((last->inp_socket->so_options &
- (SO_REUSEPORT|SO_REUSEADDR)) == 0)
+ (SO_REUSEPORT|SO_REUSEPORT_LB|SO_REUSEADDR)) == 0)
break;
}
@@ -416,10 +427,16 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
goto badheadlocked;
}
INP_RLOCK(last);
- INP_INFO_RUNLOCK(pcbinfo);
- UDP_PROBE(receive, NULL, last, ip6, last, uh);
- if (udp6_append(last, m, off, fromsa) == 0)
+ if (__predict_true(last->inp_flags2 & INP_FREED) == 0) {
+ if (nxt == IPPROTO_UDPLITE)
+ UDPLITE_PROBE(receive, NULL, last, ip6, last, uh);
+ else
+ UDP_PROBE(receive, NULL, last, ip6, last, uh);
+ if (udp6_append(last, m, off, fromsa) == 0)
+ INP_RUNLOCK(last);
+ } else
INP_RUNLOCK(last);
+ INP_INFO_RUNLOCK_ET(pcbinfo, et);
inp_lost:
return (IPPROTO_DONE);
}
@@ -475,6 +492,10 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
ip6_sprintf(ip6bufs, &ip6->ip6_src),
ntohs(uh->uh_sport));
}
+ if (nxt == IPPROTO_UDPLITE)
+ UDPLITE_PROBE(receive, NULL, NULL, ip6, NULL, uh);
+ else
+ UDP_PROBE(receive, NULL, NULL, ip6, NULL, uh);
UDPSTAT_INC(udps_noport);
if (m->m_flags & M_MCAST) {
printf("UDP6: M_MCAST is set in a unicast packet.\n");
@@ -495,13 +516,16 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
return (IPPROTO_DONE);
}
}
- UDP_PROBE(receive, NULL, inp, ip6, inp, uh);
+ if (nxt == IPPROTO_UDPLITE)
+ UDPLITE_PROBE(receive, NULL, inp, ip6, inp, uh);
+ else
+ UDP_PROBE(receive, NULL, inp, ip6, inp, uh);
if (udp6_append(inp, m, off, fromsa) == 0)
INP_RUNLOCK(inp);
return (IPPROTO_DONE);
badheadlocked:
- INP_INFO_RUNLOCK(pcbinfo);
+ INP_INFO_RUNLOCK_ET(pcbinfo, et);
badunlocked:
if (m)
m_freem(m);
@@ -657,35 +681,38 @@ udp6_getcred(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_net_inet6_udp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 0,
0, udp6_getcred, "S,xucred", "Get the xucred of a UDP6 connection");
+#define UH_WLOCKED 2
+#define UH_RLOCKED 1
+#define UH_UNLOCKED 0
static int
-udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
- struct mbuf *control, struct thread *td)
+udp6_output(struct socket *so, int flags_arg, struct mbuf *m,
+ struct sockaddr *addr6, struct mbuf *control, struct thread *td)
{
- u_int32_t ulen = m->m_pkthdr.len;
- u_int32_t plen = sizeof(struct udphdr) + ulen;
+ struct inpcbinfo *pcbinfo;
+ struct inpcb *inp;
struct ip6_hdr *ip6;
struct udphdr *udp6;
struct in6_addr *laddr, *faddr, in6a;
- struct sockaddr_in6 *sin6 = NULL;
- int cscov_partial = 0;
- int scope_ambiguous = 0;
- u_short fport;
- int error = 0;
- uint8_t nxt;
- uint16_t cscov = 0;
struct ip6_pktopts *optp, opt;
- int af = AF_INET6, hlen = sizeof(struct ip6_hdr);
- int flags;
- struct sockaddr_in6 tmp;
+ struct sockaddr_in6 *sin6, tmp;
+ struct epoch_tracker et;
+ int cscov_partial, error, flags, hlen, scope_ambiguous;
+ u_int32_t ulen, plen;
+ uint16_t cscov;
+ u_short fport;
+ uint8_t nxt, unlock_udbinfo;
- INP_WLOCK_ASSERT(inp);
- INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
+ /* addr6 has been validated in udp6_send(). */
+ sin6 = (struct sockaddr_in6 *)addr6;
- if (addr6) {
- /* addr6 has been validated in udp6_send(). */
- sin6 = (struct sockaddr_in6 *)addr6;
+ /*
+ * In contrast to to IPv4 we do not validate the max. packet length
+ * here due to IPv6 Jumbograms (RFC2675).
+ */
- /* protect *sin6 from overwrites */
+ scope_ambiguous = 0;
+ if (sin6) {
+ /* Protect *addr6 from overwrites. */
tmp = *sin6;
sin6 = &tmp;
@@ -699,22 +726,86 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
*/
if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone)
scope_ambiguous = 1;
- if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0)
+ if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) {
+ if (control)
+ m_freem(control);
+ m_freem(m);
return (error);
+ }
}
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
+ INP_RLOCK(inp);
nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
IPPROTO_UDP : IPPROTO_UDPLITE;
+
+#ifdef INET
+ if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
+ int hasv4addr;
+
+ if (sin6 == NULL)
+ hasv4addr = (inp->inp_vflag & INP_IPV4);
+ else
+ hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)
+ ? 1 : 0;
+ if (hasv4addr) {
+ struct pr_usrreqs *pru;
+
+ /*
+ * XXXRW: We release UDP-layer locks before calling
+ * udp_send() in order to avoid recursion. However,
+ * this does mean there is a short window where inp's
+ * fields are unstable. Could this lead to a
+ * potential race in which the factors causing us to
+ * select the UDPv4 output routine are invalidated?
+ */
+ INP_RUNLOCK(inp);
+ if (sin6)
+ in6_sin6_2_sin_in_sock((struct sockaddr *)sin6);
+ pru = inetsw[ip_protox[nxt]].pr_usrreqs;
+ /* addr will just be freed in sendit(). */
+ return ((*pru->pru_send)(so, flags_arg, m,
+ (struct sockaddr *)sin6, control, td));
+ }
+ }
+#endif
+
if (control) {
if ((error = ip6_setpktopts(control, &opt,
- inp->in6p_outputopts, td->td_ucred, nxt)) != 0)
- goto release;
+ inp->in6p_outputopts, td->td_ucred, nxt)) != 0) {
+ INP_RUNLOCK(inp);
+ ip6_clearpktopts(&opt, -1);
+ if (control)
+ m_freem(control);
+ m_freem(m);
+ return (error);
+ }
optp = &opt;
} else
optp = inp->in6p_outputopts;
+ pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
+ if (sin6 != NULL &&
+ IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && inp->inp_lport == 0) {
+ INP_RUNLOCK(inp);
+ /*
+ * XXX there is a short window here which could lead to a race;
+ * should we re-check that what got us here is still valid?
+ */
+ INP_WLOCK(inp);
+ INP_HASH_WLOCK(pcbinfo);
+ unlock_udbinfo = UH_WLOCKED;
+ } else if (sin6 != NULL &&
+ (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
+ IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) ||
+ inp->inp_lport == 0)) {
+ INP_HASH_RLOCK_ET(pcbinfo, et);
+ unlock_udbinfo = UH_RLOCKED;
+ } else
+ unlock_udbinfo = UH_UNLOCKED;
+
if (sin6) {
- faddr = &sin6->sin6_addr;
/*
* Since we saw no essential reason for calling in_pcbconnect,
@@ -733,85 +824,47 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
goto release;
}
- fport = sin6->sin6_port; /* allow 0 port */
+ /*
+ * Given we handle the v4mapped case in the INET block above
+ * assert here that it must not happen anymore.
+ */
+ KASSERT(!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr),
+ ("%s: sin6(%p)->sin6_addr is v4mapped which we "
+ "should have handled.", __func__, sin6));
- if (IN6_IS_ADDR_V4MAPPED(faddr)) {
- if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) {
- /*
- * I believe we should explicitly discard the
- * packet when mapped addresses are disabled,
- * rather than send the packet as an IPv6 one.
- * If we chose the latter approach, the packet
- * might be sent out on the wire based on the
- * default route, the situation which we'd
- * probably want to avoid.
- * (20010421 jinmei@kame.net)
- */
- error = EINVAL;
- goto release;
- }
- if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
- !IN6_IS_ADDR_V4MAPPED(&inp->in6p_laddr)) {
- /*
- * when remote addr is an IPv4-mapped address,
- * local addr should not be an IPv6 address,
- * since you cannot determine how to map IPv6
- * source address to IPv4.
- */
- error = EINVAL;
- goto release;
- }
+ /* This only requires read-locking. */
+ error = in6_selectsrc_socket(sin6, optp, inp,
+ td->td_ucred, scope_ambiguous, &in6a, NULL);
+ if (error)
+ goto release;
+ laddr = &in6a;
- af = AF_INET;
- }
+ if (inp->inp_lport == 0) {
- if (!IN6_IS_ADDR_V4MAPPED(faddr)) {
- error = in6_selectsrc_socket(sin6, optp, inp,
- td->td_ucred, scope_ambiguous, &in6a, NULL);
- if (error)
+ INP_WLOCK_ASSERT(inp);
+ error = in6_pcbsetport(laddr, inp, td->td_ucred);
+ if (error != 0) {
+ /* Undo an address bind that may have occurred. */
+ inp->in6p_laddr = in6addr_any;
goto release;
- laddr = &in6a;
- } else
- laddr = &inp->in6p_laddr; /* XXX */
- if (laddr == NULL) {
- if (error == 0)
- error = EADDRNOTAVAIL;
- goto release;
- }
- if (inp->inp_lport == 0 &&
- (error = in6_pcbsetport(laddr, inp, td->td_ucred)) != 0) {
- /* Undo an address bind that may have occurred. */
- inp->in6p_laddr = in6addr_any;
- goto release;
+ }
}
+ faddr = &sin6->sin6_addr;
+ fport = sin6->sin6_port; /* allow 0 port */
+
} else {
if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
error = ENOTCONN;
goto release;
}
- if (IN6_IS_ADDR_V4MAPPED(&inp->in6p_faddr)) {
- if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) {
- /*
- * XXX: this case would happen when the
- * application sets the V6ONLY flag after
- * connecting the foreign address.
- * Such applications should be fixed,
- * so we bark here.
- */
- log(LOG_INFO, "udp6_output: IPV6_V6ONLY "
- "option was set for a connected socket\n");
- error = EINVAL;
- goto release;
- } else
- af = AF_INET;
- }
laddr = &inp->in6p_laddr;
faddr = &inp->in6p_faddr;
fport = inp->inp_fport;
}
- if (af == AF_INET)
- hlen = sizeof(struct ip);
+ ulen = m->m_pkthdr.len;
+ plen = sizeof(struct udphdr) + ulen;
+ hlen = sizeof(struct ip6_hdr);
/*
* Calculate data length and get a mbuf
@@ -826,6 +879,7 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
/*
* Stuff checksum and output datagram.
*/
+ cscov = cscov_partial = 0;
udp6 = (struct udphdr *)(mtod(m, caddr_t) + hlen);
udp6->uh_sport = inp->inp_lport; /* lport is always set in the PCB */
udp6->uh_dport = fport;
@@ -848,59 +902,59 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
udp6->uh_ulen = 0;
udp6->uh_sum = 0;
- switch (af) {
- case AF_INET6:
- ip6 = mtod(m, struct ip6_hdr *);
- ip6->ip6_flow = inp->inp_flow & IPV6_FLOWINFO_MASK;
- ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
- ip6->ip6_vfc |= IPV6_VERSION;
- ip6->ip6_plen = htons((u_short)plen);
- ip6->ip6_nxt = nxt;
- ip6->ip6_hlim = in6_selecthlim(inp, NULL);
- ip6->ip6_src = *laddr;
- ip6->ip6_dst = *faddr;
-
- if (cscov_partial) {
- if ((udp6->uh_sum = in6_cksum_partial(m, nxt,
- sizeof(struct ip6_hdr), plen, cscov)) == 0)
- udp6->uh_sum = 0xffff;
- } else {
- udp6->uh_sum = in6_cksum_pseudo(ip6, plen, nxt, 0);
- m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
- m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
- }
+ ip6 = mtod(m, struct ip6_hdr *);
+ ip6->ip6_flow = inp->inp_flow & IPV6_FLOWINFO_MASK;
+ ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
+ ip6->ip6_vfc |= IPV6_VERSION;
+ ip6->ip6_plen = htons((u_short)plen);
+ ip6->ip6_nxt = nxt;
+ ip6->ip6_hlim = in6_selecthlim(inp, NULL);
+ ip6->ip6_src = *laddr;
+ ip6->ip6_dst = *faddr;
+#ifdef MAC
+ mac_inpcb_create_mbuf(inp, m);
+#endif
+
+ if (cscov_partial) {
+ if ((udp6->uh_sum = in6_cksum_partial(m, nxt,
+ sizeof(struct ip6_hdr), plen, cscov)) == 0)
+ udp6->uh_sum = 0xffff;
+ } else {
+ udp6->uh_sum = in6_cksum_pseudo(ip6, plen, nxt, 0);
+ m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
+ m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+ }
+
+ flags = 0;
#ifdef RSS
- {
- uint32_t hash_val, hash_type;
- uint8_t pr;
+ {
+ uint32_t hash_val, hash_type;
+ uint8_t pr;
- pr = inp->inp_socket->so_proto->pr_protocol;
- /*
- * Calculate an appropriate RSS hash for UDP and
- * UDP Lite.
- *
- * The called function will take care of figuring out
- * whether a 2-tuple or 4-tuple hash is required based
- * on the currently configured scheme.
- *
- * Later later on connected socket values should be
- * cached in the inpcb and reused, rather than constantly
- * re-calculating it.
- *
- * UDP Lite is a different protocol number and will
- * likely end up being hashed as a 2-tuple until
- * RSS / NICs grow UDP Lite protocol awareness.
- */
- if (rss_proto_software_hash_v6(faddr, laddr, fport,
- inp->inp_lport, pr, &hash_val, &hash_type) == 0) {
- m->m_pkthdr.flowid = hash_val;
- M_HASHTYPE_SET(m, hash_type);
- }
+ pr = inp->inp_socket->so_proto->pr_protocol;
+ /*
+ * Calculate an appropriate RSS hash for UDP and
+ * UDP Lite.
+ *
+ * The called function will take care of figuring out
+ * whether a 2-tuple or 4-tuple hash is required based
+ * on the currently configured scheme.
+ *
+ * Later later on connected socket values should be
+ * cached in the inpcb and reused, rather than constantly
+ * re-calculating it.
+ *
+ * UDP Lite is a different protocol number and will
+ * likely end up being hashed as a 2-tuple until
+ * RSS / NICs grow UDP Lite protocol awareness.
+ */
+ if (rss_proto_software_hash_v6(faddr, laddr, fport,
+ inp->inp_lport, pr, &hash_val, &hash_type) == 0) {
+ m->m_pkthdr.flowid = hash_val;
+ M_HASHTYPE_SET(m, hash_type);
}
-#endif
- flags = 0;
-#ifdef RSS
+
/*
* Don't override with the inp cached flowid.
*
@@ -908,27 +962,46 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
* be incorrect.
*/
flags |= IP_NODEFAULTFLOWID;
+ }
#endif
+ UDPSTAT_INC(udps_opackets);
+ if (unlock_udbinfo == UH_WLOCKED)
+ INP_HASH_WUNLOCK(pcbinfo);
+ else if (unlock_udbinfo == UH_RLOCKED)
+ INP_HASH_RUNLOCK_ET(pcbinfo, et);
+ if (nxt == IPPROTO_UDPLITE)
+ UDPLITE_PROBE(send, NULL, inp, ip6, inp, udp6);
+ else
UDP_PROBE(send, NULL, inp, ip6, inp, udp6);
- UDPSTAT_INC(udps_opackets);
- error = ip6_output(m, optp, &inp->inp_route6, flags,
- inp->in6p_moptions, NULL, inp);
- break;
- case AF_INET:
- error = EAFNOSUPPORT;
- goto release;
+ error = ip6_output(m, optp, &inp->inp_route6, flags,
+ inp->in6p_moptions, NULL, inp);
+ if (unlock_udbinfo == UH_WLOCKED)
+ INP_WUNLOCK(inp);
+ else
+ INP_RUNLOCK(inp);
+
+ if (control) {
+ ip6_clearpktopts(&opt, -1);
+ m_freem(control);
}
- goto releaseopt;
+ return (error);
release:
- m_freem(m);
-
-releaseopt:
+ if (unlock_udbinfo == UH_WLOCKED) {
+ INP_HASH_WUNLOCK(pcbinfo);
+ INP_WUNLOCK(inp);
+ } else if (unlock_udbinfo == UH_RLOCKED) {
+ INP_HASH_RUNLOCK_ET(pcbinfo, et);
+ INP_RUNLOCK(inp);
+ } else
+ INP_RUNLOCK(inp);
if (control) {
ip6_clearpktopts(&opt, -1);
m_freem(control);
}
+ m_freem(m);
+
return (error);
}
@@ -1232,15 +1305,8 @@ static int
udp6_send(struct socket *so, int flags, struct mbuf *m,
struct sockaddr *addr, struct mbuf *control, struct thread *td)
{
- struct inpcb *inp;
- struct inpcbinfo *pcbinfo;
- int error = 0;
-
- pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
- inp = sotoinpcb(so);
- KASSERT(inp != NULL, ("udp6_send: inp == NULL"));
+ int error;
- INP_WLOCK(inp);
if (addr) {
if (addr->sa_len != sizeof(struct sockaddr_in6)) {
error = EINVAL;
@@ -1252,53 +1318,11 @@ udp6_send(struct socket *so, int flags, struct mbuf *m,
}
}
-#ifdef INET
- if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
- int hasv4addr;
- struct sockaddr_in6 *sin6 = NULL;
-
- if (addr == NULL)
- hasv4addr = (inp->inp_vflag & INP_IPV4);
- else {
- sin6 = (struct sockaddr_in6 *)addr;
- hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)
- ? 1 : 0;
- }
- if (hasv4addr) {
- struct pr_usrreqs *pru;
- uint8_t nxt;
-
- nxt = (inp->inp_socket->so_proto->pr_protocol ==
- IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE;
- /*
- * XXXRW: We release UDP-layer locks before calling
- * udp_send() in order to avoid recursion. However,
- * this does mean there is a short window where inp's
- * fields are unstable. Could this lead to a
- * potential race in which the factors causing us to
- * select the UDPv4 output routine are invalidated?
- */
- INP_WUNLOCK(inp);
- if (sin6)
- in6_sin6_2_sin_in_sock(addr);
- pru = inetsw[ip_protox[nxt]].pr_usrreqs;
- /* addr will just be freed in sendit(). */
- return ((*pru->pru_send)(so, flags, m, addr, control,
- td));
- }
- }
-#endif
-#ifdef MAC
- mac_inpcb_create_mbuf(inp, m);
-#endif
- INP_HASH_WLOCK(pcbinfo);
- error = udp6_output(inp, m, addr, control, td);
- INP_HASH_WUNLOCK(pcbinfo);
- INP_WUNLOCK(inp);
- return (error);
+ return (udp6_output(so, flags, m, addr, control, td));
bad:
- INP_WUNLOCK(inp);
+ if (control)
+ m_freem(control);
m_freem(m);
return (error);
}