summaryrefslogtreecommitdiffstats
path: root/freebsd/sys/netinet6
diff options
context:
space:
mode:
authorSebastian Huber <sebastian.huber@embedded-brains.de>2016-10-07 15:10:20 +0200
committerSebastian Huber <sebastian.huber@embedded-brains.de>2017-01-10 09:53:31 +0100
commitc40e45b75eb76d79a05c7fa85c1fa9b5c728a12f (patch)
treead4f2519067709f00ab98b3c591186c26dc3a21f /freebsd/sys/netinet6
parentuserspace-header-gen.py: Simplify program ports (diff)
downloadrtems-libbsd-c40e45b75eb76d79a05c7fa85c1fa9b5c728a12f.tar.bz2
Update to FreeBSD head 2016-08-23
Git mirror commit 9fe7c416e6abb28b1398fd3e5687099846800cfd.
Diffstat (limited to 'freebsd/sys/netinet6')
-rw-r--r--freebsd/sys/netinet6/dest6.c1
-rw-r--r--freebsd/sys/netinet6/frag6.c109
-rw-r--r--freebsd/sys/netinet6/icmp6.c336
-rw-r--r--freebsd/sys/netinet6/in6.c1836
-rw-r--r--freebsd/sys/netinet6/in6.h70
-rw-r--r--freebsd/sys/netinet6/in6_cksum.c28
-rw-r--r--freebsd/sys/netinet6/in6_fib.c278
-rw-r--r--freebsd/sys/netinet6/in6_fib.h61
-rw-r--r--freebsd/sys/netinet6/in6_gif.c387
-rw-r--r--freebsd/sys/netinet6/in6_ifattach.c205
-rw-r--r--freebsd/sys/netinet6/in6_ifattach.h2
-rw-r--r--freebsd/sys/netinet6/in6_mcast.c74
-rw-r--r--freebsd/sys/netinet6/in6_pcb.c191
-rw-r--r--freebsd/sys/netinet6/in6_pcb.h5
-rw-r--r--freebsd/sys/netinet6/in6_proto.c343
-rw-r--r--freebsd/sys/netinet6/in6_rmx.c126
-rw-r--r--freebsd/sys/netinet6/in6_rss.h58
-rw-r--r--freebsd/sys/netinet6/in6_src.c320
-rw-r--r--freebsd/sys/netinet6/in6_var.h217
-rw-r--r--freebsd/sys/netinet6/ip6_forward.c216
-rw-r--r--freebsd/sys/netinet6/ip6_id.c11
-rw-r--r--freebsd/sys/netinet6/ip6_input.c712
-rw-r--r--freebsd/sys/netinet6/ip6_ipsec.h3
-rw-r--r--freebsd/sys/netinet6/ip6_mroute.c75
-rw-r--r--freebsd/sys/netinet6/ip6_mroute.h40
-rw-r--r--freebsd/sys/netinet6/ip6_output.c783
-rw-r--r--freebsd/sys/netinet6/ip6_var.h159
-rw-r--r--freebsd/sys/netinet6/ip6protosw.h35
-rw-r--r--freebsd/sys/netinet6/ip_fw_nat64.h154
-rw-r--r--freebsd/sys/netinet6/ip_fw_nptv6.h51
-rw-r--r--freebsd/sys/netinet6/mld6.c336
-rw-r--r--freebsd/sys/netinet6/mld6_var.h65
-rw-r--r--freebsd/sys/netinet6/nd6.c1911
-rw-r--r--freebsd/sys/netinet6/nd6.h96
-rw-r--r--freebsd/sys/netinet6/nd6_nbr.c787
-rw-r--r--freebsd/sys/netinet6/nd6_rtr.c493
-rw-r--r--freebsd/sys/netinet6/pim6_var.h21
-rw-r--r--freebsd/sys/netinet6/raw_ip6.c66
-rw-r--r--freebsd/sys/netinet6/raw_ip6.h24
-rw-r--r--freebsd/sys/netinet6/route6.c1
-rw-r--r--freebsd/sys/netinet6/scope6.c278
-rw-r--r--freebsd/sys/netinet6/scope6_var.h13
-rw-r--r--freebsd/sys/netinet6/sctp6_usrreq.c458
-rw-r--r--freebsd/sys/netinet6/sctp6_var.h7
-rw-r--r--freebsd/sys/netinet6/udp6_usrreq.c390
-rw-r--r--freebsd/sys/netinet6/udp6_var.h1
46 files changed, 6126 insertions, 5707 deletions
diff --git a/freebsd/sys/netinet6/dest6.c b/freebsd/sys/netinet6/dest6.c
index c8c6f547..94386ddd 100644
--- a/freebsd/sys/netinet6/dest6.c
+++ b/freebsd/sys/netinet6/dest6.c
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/route.h>
#include <netinet/in.h>
diff --git a/freebsd/sys/netinet6/frag6.c b/freebsd/sys/netinet6/frag6.c
index 511c8601..4cbd3000 100644
--- a/freebsd/sys/netinet6/frag6.c
+++ b/freebsd/sys/netinet6/frag6.c
@@ -34,11 +34,14 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <rtems/bsd/local/opt_rss.h>
+
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/domain.h>
+#include <sys/eventhandler.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <rtems/bsd/sys/errno.h>
@@ -47,6 +50,8 @@ __FBSDID("$FreeBSD$");
#include <sys/syslog.h>
#include <net/if.h>
+#include <net/if_var.h>
+#include <net/netisr.h>
#include <net/route.h>
#include <net/vnet.h>
@@ -60,13 +65,6 @@ __FBSDID("$FreeBSD$");
#include <security/mac/mac_framework.h>
-/*
- * Define it to get a correct behavior on per-interface statistics.
- * You will need to perform an extra routing table lookup, per fragment,
- * to do it. This may, or may not be, a performance hit.
- */
-#define IN6_IFSTAT_STRICT
-
static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *);
static void frag6_deq(struct ip6asfrag *);
static void frag6_insque(struct ip6q *, struct ip6q *);
@@ -139,7 +137,7 @@ frag6_init(void)
* fragment's Fragment header.
* -> should grab it from the first fragment only
*
- * The following note also contradicts with fragment rule - noone is going to
+ * The following note also contradicts with fragment rule - no one is going to
* send different fragment with different next header field.
*
* additional note (p22):
@@ -161,14 +159,17 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
struct ip6_frag *ip6f;
struct ip6q *q6;
struct ip6asfrag *af6, *ip6af, *af6dwn;
-#ifdef IN6_IFSTAT_STRICT
struct in6_ifaddr *ia;
-#endif
int offset = *offp, nxt, i, next;
int first_frag = 0;
int fragoff, frgpartlen; /* must be larger than u_int16_t */
struct ifnet *dstifp;
u_int8_t ecn, ecn0;
+#ifdef RSS
+ struct m_tag *mtag;
+ struct ip6_direct_ctx *ip6dc;
+#endif
+
#if 0
char ip6buf[INET6_ADDRSTRLEN];
#endif
@@ -184,18 +185,12 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
#endif
dstifp = NULL;
-#ifdef IN6_IFSTAT_STRICT
/* find the destination interface of the packet. */
- if ((ia = ip6_getdstifaddr(m)) != NULL) {
+ ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
+ if (ia != NULL) {
dstifp = ia->ia_ifp;
ifa_free(&ia->ia_ifa);
}
-#else
- /* we are violating the spec, this is not the destination interface */
- if ((m->m_flags & M_PKTHDR) != 0)
- dstifp = m->m_pkthdr.rcvif;
-#endif
-
/* jumbo payload can't contain a fragment header */
if (ip6->ip6_plen == 0) {
icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset);
@@ -539,8 +534,8 @@ insert:
frag6_deq(af6);
while (t->m_next)
t = t->m_next;
- t->m_next = IP6_REASS_MBUF(af6);
- m_adj(t->m_next, af6->ip6af_offset);
+ m_adj(IP6_REASS_MBUF(af6), af6->ip6af_offset);
+ m_cat(t, IP6_REASS_MBUF(af6));
free(af6, M_FTABLE);
af6 = af6dwn;
}
@@ -557,27 +552,16 @@ insert:
*q6->ip6q_nxtp = (u_char)(nxt & 0xff);
#endif
- /* Delete frag6 header */
- if (m->m_len >= offset + sizeof(struct ip6_frag)) {
- /* This is the only possible case with !PULLDOWN_TEST */
- ovbcopy((caddr_t)ip6, (caddr_t)ip6 + sizeof(struct ip6_frag),
- offset);
- m->m_data += sizeof(struct ip6_frag);
- m->m_len -= sizeof(struct ip6_frag);
- } else {
- /* this comes with no copy if the boundary is on cluster */
- if ((t = m_split(m, offset, M_DONTWAIT)) == NULL) {
- frag6_remque(q6);
- V_frag6_nfrags -= q6->ip6q_nfrag;
+ if (ip6_deletefraghdr(m, offset, M_NOWAIT) != 0) {
+ frag6_remque(q6);
+ V_frag6_nfrags -= q6->ip6q_nfrag;
#ifdef MAC
- mac_ip6q_destroy(q6);
+ mac_ip6q_destroy(q6);
#endif
- free(q6, M_FTABLE);
- V_frag6_nfragpackets--;
- goto dropfrag;
- }
- m_adj(t, sizeof(struct ip6_frag));
- m_cat(m, t);
+ free(q6, M_FTABLE);
+ V_frag6_nfragpackets--;
+
+ goto dropfrag;
}
/*
@@ -604,9 +588,31 @@ insert:
m->m_pkthdr.len = plen;
}
+#ifdef RSS
+ mtag = m_tag_alloc(MTAG_ABI_IPV6, IPV6_TAG_DIRECT, sizeof(*ip6dc),
+ M_NOWAIT);
+ if (mtag == NULL)
+ goto dropfrag;
+
+ ip6dc = (struct ip6_direct_ctx *)(mtag + 1);
+ ip6dc->ip6dc_nxt = nxt;
+ ip6dc->ip6dc_off = offset;
+
+ m_tag_prepend(m, mtag);
+#endif
+
+ IP6Q_UNLOCK();
IP6STAT_INC(ip6s_reassembled);
in6_ifstat_inc(dstifp, ifs6_reass_ok);
+#ifdef RSS
+ /*
+ * Queue/dispatch for reprocessing.
+ */
+ netisr_dispatch(NETISR_IPV6_DIRECT, m);
+ return IPPROTO_DONE;
+#endif
+
/*
* Tell launch routine the next header
*/
@@ -614,7 +620,6 @@ insert:
*mp = m;
*offp = offset;
- IP6Q_UNLOCK();
return nxt;
dropfrag:
@@ -791,3 +796,27 @@ frag6_drain(void)
IP6Q_UNLOCK();
VNET_LIST_RUNLOCK_NOSLEEP();
}
+
+int
+ip6_deletefraghdr(struct mbuf *m, int offset, int wait)
+{
+ struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
+ struct mbuf *t;
+
+ /* Delete frag6 header. */
+ if (m->m_len >= offset + sizeof(struct ip6_frag)) {
+ /* This is the only possible case with !PULLDOWN_TEST. */
+ bcopy(ip6, (char *)ip6 + sizeof(struct ip6_frag),
+ offset);
+ m->m_data += sizeof(struct ip6_frag);
+ m->m_len -= sizeof(struct ip6_frag);
+ } else {
+ /* This comes with no copy if the boundary is on cluster. */
+ if ((t = m_split(m, offset, wait)) == NULL)
+ return (ENOMEM);
+ m_adj(t, sizeof(struct ip6_frag));
+ m_cat(m, t);
+ }
+
+ return (0);
+}
diff --git a/freebsd/sys/netinet6/icmp6.c b/freebsd/sys/netinet6/icmp6.c
index 569b5dfa..6e3a4873 100644
--- a/freebsd/sys/netinet6/icmp6.c
+++ b/freebsd/sys/netinet6/icmp6.c
@@ -65,9 +65,10 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#define MBUF_PRIVATE /* XXXRW: Optimisation tries to avoid M_EXT mbufs */
+
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipsec.h>
#include <rtems/bsd/sys/param.h>
#include <sys/domain.h>
@@ -87,6 +88,7 @@ __FBSDID("$FreeBSD$");
#include <sys/time.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/if_llatbl.h>
#include <net/if_types.h>
@@ -100,6 +102,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/icmp6.h>
#include <netinet/tcp_var.h>
+#include <netinet6/in6_fib.h>
#include <netinet6/in6_ifattach.h>
#include <netinet6/in6_pcb.h>
#include <netinet6/ip6protosw.h>
@@ -109,14 +112,14 @@ __FBSDID("$FreeBSD$");
#include <netinet6/nd6.h>
#include <netinet6/send.h>
-#ifdef IPSEC
-#include <netipsec/ipsec.h>
-#include <netipsec/key.h>
-#endif
-
extern struct domain inet6domain;
-VNET_DEFINE(struct icmp6stat, icmp6stat);
+VNET_PCPUSTAT_DEFINE(struct icmp6stat, icmp6stat);
+VNET_PCPUSTAT_SYSINIT(icmp6stat);
+
+#ifdef VIMAGE
+VNET_PCPUSTAT_SYSUNINIT(icmp6stat);
+#endif /* VIMAGE */
VNET_DECLARE(struct inpcbinfo, ripcbinfo);
VNET_DECLARE(struct inpcbhead, ripcb);
@@ -157,7 +160,7 @@ void
kmod_icmp6stat_inc(int statnum)
{
- (*((u_quad_t *)&V_icmp6stat + statnum))++;
+ counter_u64_add(VNET(icmp6stat)[statnum], 1);
}
static void
@@ -362,7 +365,7 @@ icmp6_error(struct mbuf *m, int type, int code, int param)
m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len);
preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
- M_PREPEND(m, preplen, M_DONTWAIT); /* FIB is also copied over. */
+ M_PREPEND(m, preplen, M_NOWAIT); /* FIB is also copied over. */
if (m == NULL) {
nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__));
return;
@@ -474,22 +477,6 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
goto freeit;
}
- if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) {
- /*
- * Deliver very specific ICMP6 type only.
- * This is important to deliver TOOBIG. Otherwise PMTUD
- * will not work.
- */
- switch (icmp6->icmp6_type) {
- case ICMP6_DST_UNREACH:
- case ICMP6_PACKET_TOO_BIG:
- case ICMP6_TIME_EXCEEDED:
- break;
- default:
- goto freeit;
- }
- }
-
ICMP6STAT_INC(icp6s_inhist[icmp6->icmp6_type]);
icmp6_ifstat_inc(ifp, ifs6_in_msg);
if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK)
@@ -500,15 +487,13 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
icmp6_ifstat_inc(ifp, ifs6_in_dstunreach);
switch (code) {
case ICMP6_DST_UNREACH_NOROUTE:
+ case ICMP6_DST_UNREACH_ADDR: /* PRC_HOSTDEAD is a DOS */
code = PRC_UNREACH_NET;
break;
case ICMP6_DST_UNREACH_ADMIN:
icmp6_ifstat_inc(ifp, ifs6_in_adminprohib);
code = PRC_UNREACH_PROTOCOL; /* is this a good code? */
break;
- case ICMP6_DST_UNREACH_ADDR:
- code = PRC_HOSTDEAD;
- break;
case ICMP6_DST_UNREACH_BEYONDSCOPE:
/* I mean "source address was incorrect." */
code = PRC_PARAMPROB;
@@ -575,28 +560,21 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
/* Give up remote */
break;
}
- if ((n->m_flags & M_EXT) != 0
+ if (!M_WRITABLE(n)
|| n->m_len < off + sizeof(struct icmp6_hdr)) {
struct mbuf *n0 = n;
- const int maxlen = sizeof(*nip6) + sizeof(*nicmp6);
int n0len;
- MGETHDR(n, M_DONTWAIT, n0->m_type);
- n0len = n0->m_pkthdr.len; /* save for use below */
- if (n)
- M_MOVE_PKTHDR(n, n0); /* FIB copied. */
- if (n && maxlen >= MHLEN) {
- MCLGET(n, M_DONTWAIT);
- if ((n->m_flags & M_EXT) == 0) {
- m_free(n);
- n = NULL;
- }
- }
+ CTASSERT(sizeof(*nip6) + sizeof(*nicmp6) <= MHLEN);
+ n = m_gethdr(M_NOWAIT, n0->m_type);
if (n == NULL) {
/* Give up remote */
m_freem(n0);
break;
}
+
+ m_move_pkthdr(n, n0); /* FIB copied. */
+ n0len = n0->m_pkthdr.len; /* save for use below */
/*
* Copy IPv6 and ICMPv6 only.
*/
@@ -683,31 +661,27 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
} else {
struct prison *pr;
u_char *p;
- int maxlen, maxhlen, hlen;
+ int maxhlen, hlen;
/*
* XXX: this combination of flags is pointless,
* but should we keep this for compatibility?
*/
- if ((V_icmp6_nodeinfo & 5) != 5)
+ if ((V_icmp6_nodeinfo & (ICMP6_NODEINFO_FQDNOK |
+ ICMP6_NODEINFO_TMPADDROK)) !=
+ (ICMP6_NODEINFO_FQDNOK | ICMP6_NODEINFO_TMPADDROK))
break;
if (code != 0)
goto badcode;
- maxlen = sizeof(*nip6) + sizeof(*nicmp6) + 4;
- if (maxlen >= MCLBYTES) {
+
+ CTASSERT(sizeof(*nip6) + sizeof(*nicmp6) + 4 <= MHLEN);
+ n = m_gethdr(M_NOWAIT, m->m_type);
+ if (n == NULL) {
/* Give up remote */
break;
}
- MGETHDR(n, M_DONTWAIT, m->m_type);
- if (n && maxlen > MHLEN) {
- MCLGET(n, M_DONTWAIT);
- if ((n->m_flags & M_EXT) == 0) {
- m_free(n);
- n = NULL;
- }
- }
- if (n && !m_dup_pkthdr(n, m, M_DONTWAIT)) {
+ if (!m_dup_pkthdr(n, m, M_NOWAIT)) {
/*
* Previous code did a blind M_COPY_PKTHDR
* and said "just for rcvif". If true, then
@@ -718,13 +692,8 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
m_free(n);
n = NULL;
}
- if (n == NULL) {
- /* Give up remote */
- break;
- }
- n->m_pkthdr.rcvif = NULL;
- n->m_len = 0;
- maxhlen = M_TRAILINGSPACE(n) - maxlen;
+ maxhlen = M_TRAILINGSPACE(n) -
+ (sizeof(*nip6) + sizeof(*nicmp6) + 4);
#ifndef __rtems__
pr = curthread->td_ucred->cr_prison;
#else /* __rtems__ */
@@ -771,7 +740,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
goto badcode;
if (icmp6len < sizeof(struct nd_router_solicit))
goto badlen;
- if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
+ if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
/* give up local */
/* Send incoming SeND packet to user space. */
@@ -809,7 +778,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
goto badcode;
if (icmp6len < sizeof(struct nd_router_advert))
goto badlen;
- if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
+ if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
/* Send incoming SeND-protected/ND packet to user space. */
if (send_sendso_input_hook != NULL) {
@@ -840,7 +809,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
goto badcode;
if (icmp6len < sizeof(struct nd_neighbor_solicit))
goto badlen;
- if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
+ if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
if (send_sendso_input_hook != NULL) {
error = send_sendso_input_hook(m, ifp,
SND_IN, ip6len);
@@ -869,7 +838,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
goto badcode;
if (icmp6len < sizeof(struct nd_neighbor_advert))
goto badlen;
- if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
+ if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
/* Send incoming SeND-protected/ND packet to user space. */
if (send_sendso_input_hook != NULL) {
@@ -900,7 +869,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
goto badcode;
if (icmp6len < sizeof(struct nd_redirect))
goto badlen;
- if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
+ if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
if (send_sendso_input_hook != NULL) {
error = send_sendso_input_hook(m, ifp,
SND_IN, ip6len);
@@ -1181,8 +1150,6 @@ icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code)
ip6cp.ip6c_src = &icmp6src;
ip6cp.ip6c_nxt = nxt;
- m_addr_changed(m);
-
if (icmp6type == ICMP6_PACKET_TOO_BIG) {
notifymtu = ntohl(icmp6->icmp6_mtu);
ip6cp.ip6c_cmdarg = (void *)&notifymtu;
@@ -1329,7 +1296,8 @@ ni6_input(struct mbuf *m, int off)
goto bad;
/* else it's a link-local multicast, fine */
} else { /* unicast or anycast */
- if ((ia6 = ip6_getdstifaddr(m)) == NULL)
+ ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
+ if (ia6 == NULL)
goto bad; /* XXX impossible */
if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) &&
@@ -1505,26 +1473,23 @@ ni6_input(struct mbuf *m, int off)
break;
}
- /* allocate an mbuf to reply. */
- MGETHDR(n, M_DONTWAIT, m->m_type);
+ /* Allocate an mbuf to reply. */
+ if (replylen > MCLBYTES) {
+ /*
+ * XXX: should we try to allocate more? But MCLBYTES
+ * is probably much larger than IPV6_MMTU...
+ */
+ goto bad;
+ }
+ if (replylen > MHLEN)
+ n = m_getcl(M_NOWAIT, m->m_type, M_PKTHDR);
+ else
+ n = m_gethdr(M_NOWAIT, m->m_type);
if (n == NULL) {
m_freem(m);
return (NULL);
}
- M_MOVE_PKTHDR(n, m); /* just for recvif and FIB */
- if (replylen > MHLEN) {
- if (replylen > MCLBYTES) {
- /*
- * XXX: should we try to allocate more? But MCLBYTES
- * is probably much larger than IPV6_MMTU...
- */
- goto bad;
- }
- MCLGET(n, M_DONTWAIT);
- if ((n->m_flags & M_EXT) == 0) {
- goto bad;
- }
- }
+ m_move_pkthdr(n, m); /* just for recvif and FIB */
n->m_pkthdr.len = n->m_len = replylen;
/* copy mbuf header and IPv6 + Node Information base headers */
@@ -1623,16 +1588,13 @@ ni6_nametodns(const char *name, int namelen, int old)
else
len = MCLBYTES;
- /* because MAXHOSTNAMELEN is usually 256, we use cluster mbuf */
- MGET(m, M_DONTWAIT, MT_DATA);
- if (m && len > MLEN) {
- MCLGET(m, M_DONTWAIT);
- if ((m->m_flags & M_EXT) == 0)
- goto fail;
- }
- if (!m)
+ /* Because MAXHOSTNAMELEN is usually 256, we use cluster mbuf. */
+ if (len > MLEN)
+ m = m_getcl(M_NOWAIT, MT_DATA, 0);
+ else
+ m = m_get(M_NOWAIT, MT_DATA);
+ if (m == NULL)
goto fail;
- m->m_next = NULL;
if (old) {
m->m_len = len;
@@ -1793,7 +1755,7 @@ ni6_addrs(struct icmp6_nodeinfo *ni6, struct mbuf *m, struct ifnet **ifpp,
}
IFNET_RLOCK_NOSLEEP();
- TAILQ_FOREACH(ifp, &V_ifnet, if_list) {
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
addrsofif = 0;
IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
@@ -1880,7 +1842,7 @@ ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6,
ifp = ifp0 ? ifp0 : TAILQ_FIRST(&V_ifnet);
again:
- for (; ifp; ifp = TAILQ_NEXT(ifp, if_list)) {
+ for (; ifp; ifp = TAILQ_NEXT(ifp, if_link)) {
IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET6)
@@ -1965,8 +1927,8 @@ ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6,
ltime = ND6_INFINITE_LIFETIME;
else {
if (ifa6->ia6_lifetime.ia6t_expire >
- time_second)
- ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_second);
+ time_uptime)
+ ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_uptime);
else
ltime = 0;
}
@@ -2078,7 +2040,7 @@ icmp6_rip6_input(struct mbuf **mp, int off)
*/
if ((m->m_flags & M_EXT) && m->m_next == NULL &&
m->m_len <= MHLEN) {
- MGET(n, M_DONTWAIT, m->m_type);
+ n = m_get(M_NOWAIT, m->m_type);
if (n != NULL) {
if (m_dup_pkthdr(n, m, M_NOWAIT)) {
bcopy(m->m_data, n->m_data,
@@ -2128,7 +2090,7 @@ icmp6_rip6_input(struct mbuf **mp, int off)
m->m_len <= MHLEN) {
struct mbuf *n;
- MGET(n, M_DONTWAIT, m->m_type);
+ n = m_get(M_NOWAIT, m->m_type);
if (n != NULL) {
if (m_dup_pkthdr(n, m, M_NOWAIT)) {
bcopy(m->m_data, n->m_data, m->m_len);
@@ -2166,13 +2128,13 @@ icmp6_rip6_input(struct mbuf **mp, int off)
void
icmp6_reflect(struct mbuf *m, size_t off)
{
+ struct in6_addr src6, *srcp;
struct ip6_hdr *ip6;
struct icmp6_hdr *icmp6;
struct in6_ifaddr *ia = NULL;
- int plen;
- int type, code;
struct ifnet *outif = NULL;
- struct in6_addr origdst, src, *srcp = NULL;
+ int plen;
+ int type, code, hlim;
/* too short to reflect */
if (off < sizeof(struct ip6_hdr)) {
@@ -2218,13 +2180,8 @@ icmp6_reflect(struct mbuf *m, size_t off)
icmp6 = (struct icmp6_hdr *)(ip6 + 1);
type = icmp6->icmp6_type; /* keep type for statistics */
code = icmp6->icmp6_code; /* ditto. */
-
- origdst = ip6->ip6_dst;
- /*
- * ip6_input() drops a packet if its src is multicast.
- * So, the src is never multicast.
- */
- ip6->ip6_dst = ip6->ip6_src;
+ hlim = 0;
+ srcp = NULL;
/*
* If the incoming packet was addressed directly to us (i.e. unicast),
@@ -2232,74 +2189,59 @@ icmp6_reflect(struct mbuf *m, size_t off)
* The IN6_IFF_NOTREADY case should be VERY rare, but is possible
* (for example) when we encounter an error while forwarding procedure
* destined to a duplicated address of ours.
- * Note that ip6_getdstifaddr() may fail if we are in an error handling
- * procedure of an outgoing packet of our own, in which case we need
- * to search in the ifaddr list.
*/
- if (!IN6_IS_ADDR_MULTICAST(&origdst)) {
- if ((ia = ip6_getdstifaddr(m))) {
- if (!(ia->ia6_flags &
- (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY)))
- srcp = &ia->ia_addr.sin6_addr;
- } else {
- struct sockaddr_in6 d;
-
- bzero(&d, sizeof(d));
- d.sin6_family = AF_INET6;
- d.sin6_len = sizeof(d);
- d.sin6_addr = origdst;
- ia = (struct in6_ifaddr *)
- ifa_ifwithaddr((struct sockaddr *)&d);
- if (ia &&
- !(ia->ia6_flags &
- (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) {
- srcp = &ia->ia_addr.sin6_addr;
- }
+ if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
+ ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
+ if (ia != NULL && !(ia->ia6_flags &
+ (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) {
+ src6 = ia->ia_addr.sin6_addr;
+ srcp = &src6;
+
+ if (m->m_pkthdr.rcvif != NULL) {
+ /* XXX: This may not be the outgoing interface */
+ hlim = ND_IFINFO(m->m_pkthdr.rcvif)->chlim;
+ } else
+ hlim = V_ip6_defhlim;
}
+ if (ia != NULL)
+ ifa_free(&ia->ia_ifa);
}
if (srcp == NULL) {
- int e;
- struct sockaddr_in6 sin6;
- struct route_in6 ro;
+ int error;
+ struct in6_addr dst6;
+ uint32_t scopeid;
/*
* This case matches to multicasts, our anycast, or unicasts
* that we do not own. Select a source address based on the
* source address of the erroneous packet.
*/
- bzero(&sin6, sizeof(sin6));
- sin6.sin6_family = AF_INET6;
- sin6.sin6_len = sizeof(sin6);
- sin6.sin6_addr = ip6->ip6_dst; /* zone ID should be embedded */
-
- bzero(&ro, sizeof(ro));
- e = in6_selectsrc(&sin6, NULL, NULL, &ro, NULL, &outif, &src);
- if (ro.ro_rt)
- RTFREE(ro.ro_rt); /* XXX: we could use this */
- if (e) {
+ in6_splitscope(&ip6->ip6_dst, &dst6, &scopeid);
+ error = in6_selectsrc_addr(RT_DEFAULT_FIB, &dst6,
+ scopeid, NULL, &src6, &hlim);
+
+ if (error) {
char ip6buf[INET6_ADDRSTRLEN];
nd6log((LOG_DEBUG,
"icmp6_reflect: source can't be determined: "
"dst=%s, error=%d\n",
- ip6_sprintf(ip6buf, &sin6.sin6_addr), e));
+ ip6_sprintf(ip6buf, &ip6->ip6_dst), error));
goto bad;
}
- srcp = &src;
+ srcp = &src6;
}
-
+ /*
+ * ip6_input() drops a packet if its src is multicast.
+ * So, the src is never multicast.
+ */
+ ip6->ip6_dst = ip6->ip6_src;
ip6->ip6_src = *srcp;
ip6->ip6_flow = 0;
ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
ip6->ip6_vfc |= IPV6_VERSION;
ip6->ip6_nxt = IPPROTO_ICMPV6;
- if (outif)
- ip6->ip6_hlim = ND_IFINFO(outif)->chlim;
- else if (m->m_pkthdr.rcvif) {
- /* XXX: This may not be the outgoing interface */
- ip6->ip6_hlim = ND_IFINFO(m->m_pkthdr.rcvif)->chlim;
- } else
- ip6->ip6_hlim = V_ip6_defhlim;
+ ip6->ip6_hlim = hlim;
icmp6->icmp6_cksum = 0;
icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6,
@@ -2311,19 +2253,13 @@ icmp6_reflect(struct mbuf *m, size_t off)
m->m_flags &= ~(M_BCAST|M_MCAST);
- m_addr_changed(m);
-
ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL);
if (outif)
icmp6_ifoutstat_inc(outif, type, code);
- if (ia != NULL)
- ifa_free(&ia->ia_ifa);
return;
bad:
- if (ia != NULL)
- ifa_free(&ia->ia_ifa);
m_freem(m);
return;
}
@@ -2365,7 +2301,6 @@ icmp6_redirect_input(struct mbuf *m, int off)
int icmp6len = ntohs(ip6->ip6_plen);
char *lladdr = NULL;
int lladdrlen = 0;
- struct rtentry *rt = NULL;
int is_router;
int is_onlink;
struct in6_addr src6 = ip6->ip6_src;
@@ -2420,18 +2355,13 @@ icmp6_redirect_input(struct mbuf *m, int off)
}
{
/* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */
- struct sockaddr_in6 sin6;
- struct in6_addr *gw6;
-
- bzero(&sin6, sizeof(sin6));
- sin6.sin6_family = AF_INET6;
- sin6.sin6_len = sizeof(struct sockaddr_in6);
- bcopy(&reddst6, &sin6.sin6_addr, sizeof(reddst6));
- rt = in6_rtalloc1((struct sockaddr *)&sin6, 0, 0UL, RT_DEFAULT_FIB);
- if (rt) {
- if (rt->rt_gateway == NULL ||
- rt->rt_gateway->sa_family != AF_INET6) {
- RTFREE_LOCKED(rt);
+ struct nhop6_basic nh6;
+ struct in6_addr kdst;
+ uint32_t scopeid;
+
+ in6_splitscope(&reddst6, &kdst, &scopeid);
+ if (fib6_lookup_nh_basic(RT_DEFAULT_FIB, &kdst, scopeid, 0, 0,&nh6)==0){
+ if ((nh6.nh_flags & NHF_GATEWAY) == 0) {
nd6log((LOG_ERR,
"ICMP6 redirect rejected; no route "
"with inet6 gateway found for redirect dst: %s\n",
@@ -2439,14 +2369,12 @@ icmp6_redirect_input(struct mbuf *m, int off)
goto bad;
}
- gw6 = &(((struct sockaddr_in6 *)rt->rt_gateway)->sin6_addr);
- if (bcmp(&src6, gw6, sizeof(struct in6_addr)) != 0) {
- RTFREE_LOCKED(rt);
+ if (IN6_ARE_ADDR_EQUAL(&src6, &nh6.nh_addr) == 0) {
nd6log((LOG_ERR,
"ICMP6 redirect rejected; "
"not equal to gw-for-src=%s (must be same): "
"%s\n",
- ip6_sprintf(ip6buf, gw6),
+ ip6_sprintf(ip6buf, &nh6.nh_addr),
icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
goto bad;
}
@@ -2457,8 +2385,6 @@ icmp6_redirect_input(struct mbuf *m, int off)
icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
goto bad;
}
- RTFREE_LOCKED(rt);
- rt = NULL;
}
if (IN6_IS_ADDR_MULTICAST(&reddst6)) {
nd6log((LOG_ERR,
@@ -2480,7 +2406,6 @@ icmp6_redirect_input(struct mbuf *m, int off)
icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
goto bad;
}
- /* validation passed */
icmp6len -= sizeof(*nd_rd);
nd6_option_init(nd_rd + 1, icmp6len, &ndopts);
@@ -2505,31 +2430,45 @@ icmp6_redirect_input(struct mbuf *m, int off)
goto bad;
}
+ /* Validation passed. */
+
/* RFC 2461 8.3 */
nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT,
is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER);
- if (!is_onlink) { /* better router case. perform rtredirect. */
- /* perform rtredirect */
+ /*
+ * Install a gateway route in the better-router case or an interface
+ * route in the on-link-destination case.
+ */
+ {
struct sockaddr_in6 sdst;
struct sockaddr_in6 sgw;
struct sockaddr_in6 ssrc;
+ struct sockaddr *gw;
+ int rt_flags;
u_int fibnum;
bzero(&sdst, sizeof(sdst));
- bzero(&sgw, sizeof(sgw));
bzero(&ssrc, sizeof(ssrc));
- sdst.sin6_family = sgw.sin6_family = ssrc.sin6_family = AF_INET6;
- sdst.sin6_len = sgw.sin6_len = ssrc.sin6_len =
- sizeof(struct sockaddr_in6);
- bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr));
+ sdst.sin6_family = ssrc.sin6_family = AF_INET6;
+ sdst.sin6_len = ssrc.sin6_len = sizeof(struct sockaddr_in6);
bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr));
+ rt_flags = RTF_HOST;
+ if (is_router) {
+ bzero(&sgw, sizeof(sgw));
+ sgw.sin6_family = AF_INET6;
+ sgw.sin6_len = sizeof(struct sockaddr_in6);
+ bcopy(&redtgt6, &sgw.sin6_addr,
+ sizeof(struct in6_addr));
+ gw = (struct sockaddr *)&sgw;
+ rt_flags |= RTF_GATEWAY;
+ } else
+ gw = ifp->if_addr->ifa_addr;
for (fibnum = 0; fibnum < rt_numfibs; fibnum++)
- in6_rtredirect((struct sockaddr *)&sdst,
- (struct sockaddr *)&sgw, (struct sockaddr *)NULL,
- RTF_GATEWAY | RTF_HOST, (struct sockaddr *)&ssrc,
- fibnum);
+ in6_rtredirect((struct sockaddr *)&sdst, gw,
+ (struct sockaddr *)NULL, rt_flags,
+ (struct sockaddr *)&ssrc, fibnum);
}
/* finally update cached route in each socket via pfctlinput */
{
@@ -2540,9 +2479,6 @@ icmp6_redirect_input(struct mbuf *m, int off)
sdst.sin6_len = sizeof(struct sockaddr_in6);
bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst);
-#ifdef IPSEC
- key_sa_routechange((struct sockaddr *)&sdst);
-#endif /* IPSEC */
}
freeit:
@@ -2609,14 +2545,10 @@ icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt)
#if IPV6_MMTU >= MCLBYTES
# error assumption failed about IPV6_MMTU and MCLBYTES
#endif
- MGETHDR(m, M_DONTWAIT, MT_HEADER);
- if (m && IPV6_MMTU >= MHLEN)
- MCLGET(m, M_DONTWAIT);
- if (!m)
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+ if (m == NULL)
goto fail;
M_SETFIB(m, rt->rt_fibnum);
- m->m_pkthdr.rcvif = NULL;
- m->m_len = 0;
maxlen = M_TRAILINGSPACE(m);
maxlen = min(IPV6_MMTU, maxlen);
/* just for safety */
@@ -2711,7 +2643,7 @@ icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt)
nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
nd_opt->nd_opt_len = len >> 3;
lladdr = (char *)(nd_opt + 1);
- bcopy(&ln->ll_addr, lladdr, ifp->if_addrlen);
+ bcopy(ln->ll_addr, lladdr, ifp->if_addrlen);
p += len;
}
}
diff --git a/freebsd/sys/netinet6/in6.c b/freebsd/sys/netinet6/in6.c
index 66888fa8..f5d82524 100644
--- a/freebsd/sys/netinet6/in6.c
+++ b/freebsd/sys/netinet6/in6.c
@@ -70,6 +70,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/sys/param.h>
+#include <sys/eventhandler.h>
#include <rtems/bsd/sys/errno.h>
#include <sys/jail.h>
#include <sys/malloc.h>
@@ -81,6 +82,8 @@ __FBSDID("$FreeBSD$");
#include <sys/proc.h>
#include <sys/time.h>
#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rmlock.h>
#include <sys/syslog.h>
#include <net/if.h>
@@ -97,6 +100,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
+#include <netinet/ip_carp.h>
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
@@ -105,6 +109,7 @@ __FBSDID("$FreeBSD$");
#include <netinet6/ip6_mroute.h>
#include <netinet6/in6_ifattach.h>
#include <netinet6/scope6_var.h>
+#include <netinet6/in6_fib.h>
#include <netinet6/in6_pcb.h>
VNET_DECLARE(int, icmp6_nodeinfo_oldmcprefix);
@@ -135,49 +140,36 @@ const struct in6_addr in6mask128 = IN6MASK128;
const struct sockaddr_in6 sa6_any =
{ sizeof(sa6_any), AF_INET6, 0, 0, IN6ADDR_ANY_INIT, 0 };
-static int in6_lifaddr_ioctl(struct socket *, u_long, caddr_t,
- struct ifnet *, struct thread *);
-static int in6_ifinit(struct ifnet *, struct in6_ifaddr *,
- struct sockaddr_in6 *, int);
+static int in6_notify_ifa(struct ifnet *, struct in6_ifaddr *,
+ struct in6_aliasreq *, int);
static void in6_unlink_ifa(struct in6_ifaddr *, struct ifnet *);
-int (*faithprefix_p)(struct in6_addr *);
+static int in6_validate_ifra(struct ifnet *, struct in6_aliasreq *,
+ struct in6_ifaddr *, int);
+static struct in6_ifaddr *in6_alloc_ifa(struct ifnet *,
+ struct in6_aliasreq *, int flags);
+static int in6_update_ifa_internal(struct ifnet *, struct in6_aliasreq *,
+ struct in6_ifaddr *, int, int);
+static int in6_broadcast_ifa(struct ifnet *, struct in6_aliasreq *,
+ struct in6_ifaddr *, int);
#define ifa2ia6(ifa) ((struct in6_ifaddr *)(ifa))
#define ia62ifa(ia6) (&((ia6)->ia_ifa))
+
void
-in6_ifaddloop(struct ifaddr *ifa)
+in6_newaddrmsg(struct in6_ifaddr *ia, int cmd)
{
struct sockaddr_dl gateway;
struct sockaddr_in6 mask, addr;
struct rtentry rt;
- struct in6_ifaddr *ia;
- struct ifnet *ifp;
- struct llentry *ln;
-
- ia = ifa2ia6(ifa);
- ifp = ifa->ifa_ifp;
- IF_AFDATA_LOCK(ifp);
- ifa->ifa_rtrequest = nd6_rtrequest;
- ln = lla_lookup(LLTABLE6(ifp), (LLE_CREATE | LLE_IFADDR |
- LLE_EXCLUSIVE), (struct sockaddr *)&ia->ia_addr);
- IF_AFDATA_UNLOCK(ifp);
- if (ln != NULL) {
- ln->la_expire = 0; /* for IPv6 this means permanent */
- ln->ln_state = ND6_LLINFO_REACHABLE;
- /*
- * initialize for rtmsg generation
- */
- bzero(&gateway, sizeof(gateway));
- gateway.sdl_len = sizeof(gateway);
- gateway.sdl_family = AF_LINK;
- gateway.sdl_nlen = 0;
- gateway.sdl_alen = 6;
- memcpy(gateway.sdl_data, &ln->ll_addr.mac_aligned,
- sizeof(ln->ll_addr));
- LLE_WUNLOCK(ln);
- }
+
+ /*
+ * initialize for rtmsg generation
+ */
+ bzero(&gateway, sizeof(gateway));
+ gateway.sdl_len = sizeof(gateway);
+ gateway.sdl_family = AF_LINK;
bzero(&rt, sizeof(rt));
rt.rt_gateway = (struct sockaddr *)&gateway;
@@ -185,42 +177,11 @@ in6_ifaddloop(struct ifaddr *ifa)
memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr));
rt_mask(&rt) = (struct sockaddr *)&mask;
rt_key(&rt) = (struct sockaddr *)&addr;
- rt.rt_flags = RTF_UP | RTF_HOST | RTF_STATIC;
+ rt.rt_flags = RTF_HOST | RTF_STATIC;
+ if (cmd == RTM_ADD)
+ rt.rt_flags |= RTF_UP;
/* Announce arrival of local address to all FIBs. */
- rt_newaddrmsg(RTM_ADD, ifa, 0, &rt);
-}
-
-void
-in6_ifremloop(struct ifaddr *ifa)
-{
- struct sockaddr_dl gateway;
- struct sockaddr_in6 mask, addr;
- struct rtentry rt0;
- struct in6_ifaddr *ia;
- struct ifnet *ifp;
-
- ia = ifa2ia6(ifa);
- ifp = ifa->ifa_ifp;
- memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr));
- memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask));
- lltable_prefix_free(AF_INET6, (struct sockaddr *)&addr,
- (struct sockaddr *)&mask, LLE_STATIC);
-
- /*
- * initialize for rtmsg generation
- */
- bzero(&gateway, sizeof(gateway));
- gateway.sdl_len = sizeof(gateway);
- gateway.sdl_family = AF_LINK;
- gateway.sdl_nlen = 0;
- gateway.sdl_alen = ifp->if_addrlen;
- bzero(&rt0, sizeof(rt0));
- rt0.rt_gateway = (struct sockaddr *)&gateway;
- rt_mask(&rt0) = (struct sockaddr *)&mask;
- rt_key(&rt0) = (struct sockaddr *)&addr;
- rt0.rt_flags = RTF_HOST | RTF_STATIC;
- /* Announce removal of local address to all FIBs. */
- rt_newaddrmsg(RTM_DELETE, ifa, 0, &rt0);
+ rt_newaddrmsg(cmd, &ia->ia_ifa, 0, &rt);
}
int
@@ -275,7 +236,15 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
struct in6_ifaddr *ia = NULL;
struct in6_aliasreq *ifra = (struct in6_aliasreq *)data;
struct sockaddr_in6 *sa6;
+ int carp_attached = 0;
int error;
+ u_long ocmd = cmd;
+
+ /*
+ * Compat to make pre-10.x ifconfig(8) operable.
+ */
+ if (cmd == OSIOCAIFADDR_IN6)
+ cmd = SIOCAIFADDR_IN6;
switch (cmd) {
case SIOCGETSGCNT_IN6:
@@ -317,8 +286,6 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
/* FALLTHROUGH */
case OSIOCGIFINFO_IN6:
case SIOCGIFINFO_IN6:
- case SIOCGDRLST_IN6:
- case SIOCGPRLST_IN6:
case SIOCGNBRINFO_IN6:
case SIOCGDEFIFACE_IN6:
return (nd6_ioctl(cmd, data, ifp));
@@ -366,26 +333,6 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
return (scope6_ioctl(cmd, data, ifp));
}
- switch (cmd) {
- case SIOCALIFADDR:
- if (td != NULL) {
- error = priv_check(td, PRIV_NET_ADDIFADDR);
- if (error)
- return (error);
- }
- return in6_lifaddr_ioctl(so, cmd, data, ifp, td);
-
- case SIOCDLIFADDR:
- if (td != NULL) {
- error = priv_check(td, PRIV_NET_DELIFADDR);
- if (error)
- return (error);
- }
- /* FALLTHROUGH */
- case SIOCGLIFADDR:
- return in6_lifaddr_ioctl(so, cmd, data, ifp, td);
- }
-
/*
* Find address for this interface, if it exists.
*
@@ -417,7 +364,6 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
case SIOCSPFXFLUSH_IN6:
case SIOCSRTRFLUSH_IN6:
case SIOCGIFALIFETIME_IN6:
- case SIOCSIFALIFETIME_IN6:
case SIOCGIFSTAT_IN6:
case SIOCGIFSTAT_ICMP6:
sa6 = &ifr->ifr_addr;
@@ -516,34 +462,6 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
goto out;
}
break;
-
- case SIOCSIFALIFETIME_IN6:
- {
- struct in6_addrlifetime *lt;
-
- if (td != NULL) {
- error = priv_check(td, PRIV_NETINET_ALIFETIME6);
- if (error)
- goto out;
- }
- if (ia == NULL) {
- error = EADDRNOTAVAIL;
- goto out;
- }
- /* sanity for overflow - beware unsigned */
- lt = &ifr->ifr_ifru.ifru_lifetime;
- if (lt->ia6t_vltime != ND6_INFINITE_LIFETIME &&
- lt->ia6t_vltime + time_second < time_second) {
- error = EINVAL;
- goto out;
- }
- if (lt->ia6t_pltime != ND6_INFINITE_LIFETIME &&
- lt->ia6t_pltime + time_second < time_second) {
- error = EINVAL;
- goto out;
- }
- break;
- }
}
switch (cmd) {
@@ -576,17 +494,17 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
break;
case SIOCGIFSTAT_IN6:
- bzero(&ifr->ifr_ifru.ifru_stat,
- sizeof(ifr->ifr_ifru.ifru_stat));
- ifr->ifr_ifru.ifru_stat =
- *((struct in6_ifextra *)ifp->if_afdata[AF_INET6])->in6_ifstat;
+ COUNTER_ARRAY_COPY(((struct in6_ifextra *)
+ ifp->if_afdata[AF_INET6])->in6_ifstat,
+ &ifr->ifr_ifru.ifru_stat,
+ sizeof(struct in6_ifstat) / sizeof(uint64_t));
break;
case SIOCGIFSTAT_ICMP6:
- bzero(&ifr->ifr_ifru.ifru_icmp6stat,
- sizeof(ifr->ifr_ifru.ifru_icmp6stat));
- ifr->ifr_ifru.ifru_icmp6stat =
- *((struct in6_ifextra *)ifp->if_afdata[AF_INET6])->icmp6_ifstat;
+ COUNTER_ARRAY_COPY(((struct in6_ifextra *)
+ ifp->if_afdata[AF_INET6])->icmp6_ifstat,
+ &ifr->ifr_ifru.ifru_icmp6stat,
+ sizeof(struct icmp6_ifstat) / sizeof(uint64_t));
break;
case SIOCGIFALIFETIME_IN6:
@@ -629,24 +547,8 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
}
break;
- case SIOCSIFALIFETIME_IN6:
- ia->ia6_lifetime = ifr->ifr_ifru.ifru_lifetime;
- /* for sanity */
- if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
- ia->ia6_lifetime.ia6t_expire =
- time_second + ia->ia6_lifetime.ia6t_vltime;
- } else
- ia->ia6_lifetime.ia6t_expire = 0;
- if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
- ia->ia6_lifetime.ia6t_preferred =
- time_second + ia->ia6_lifetime.ia6t_pltime;
- } else
- ia->ia6_lifetime.ia6t_preferred = 0;
- break;
-
case SIOCAIFADDR_IN6:
{
- int i;
struct nd_prefixctl pr0;
struct nd_prefix *pr;
@@ -667,6 +569,18 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
break;
}
+ if (cmd == ocmd && ifra->ifra_vhid > 0) {
+ if (carp_attach_p != NULL)
+ error = (*carp_attach_p)(&ia->ia_ifa,
+ ifra->ifra_vhid);
+ else
+ error = EPROTONOSUPPORT;
+ if (error)
+ goto out;
+ else
+ carp_attached = 1;
+ }
+
/*
* then, make the prefix on-link on the interface.
* XXX: we'd rather create the prefix before the address, but
@@ -683,14 +597,14 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
pr0.ndpr_plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr,
NULL);
if (pr0.ndpr_plen == 128) {
- break; /* we don't need to install a host route. */
+ /* we don't need to install a host route. */
+ goto aifaddr_out;
}
pr0.ndpr_prefix = ifra->ifra_addr;
/* apply the mask for safety. */
- for (i = 0; i < 4; i++) {
- pr0.ndpr_prefix.sin6_addr.s6_addr32[i] &=
- ifra->ifra_prefixmask.sin6_addr.s6_addr32[i];
- }
+ IN6_MASK_ADDR(&pr0.ndpr_prefix.sin6_addr,
+ &ifra->ifra_prefixmask.sin6_addr);
+
/*
* XXX: since we don't have an API to set prefix (not address)
* lifetimes, we just use the same lifetimes as addresses.
@@ -710,12 +624,9 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
* nd6_prelist_add will install the corresponding
* interface route.
*/
- if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0)
- goto out;
- if (pr == NULL) {
- log(LOG_ERR, "nd6_prelist_add succeeded but "
- "no prefix\n");
- error = EINVAL;
+ if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0) {
+ if (carp_attached)
+ (*carp_detach_p)(&ia->ia_ifa);
goto out;
}
}
@@ -746,32 +657,28 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
* that is, this address might make other addresses detached.
*/
pfxlist_onlink_check();
- if (error == 0 && ia) {
- if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) {
- /*
- * Try to clear the flag when a new
- * IPv6 address is added onto an
- * IFDISABLED interface and it
- * succeeds.
- */
- struct in6_ndireq nd;
-
- memset(&nd, 0, sizeof(nd));
- nd.ndi.flags = ND_IFINFO(ifp)->flags;
- nd.ndi.flags &= ~ND6_IFF_IFDISABLED;
- if (nd6_ioctl(SIOCSIFINFO_FLAGS,
- (caddr_t)&nd, ifp) < 0)
- log(LOG_NOTICE, "SIOCAIFADDR_IN6: "
- "SIOCSIFINFO_FLAGS for -ifdisabled "
- "failed.");
- /*
- * Ignore failure of clearing the flag
- * intentionally. The failure means
- * address duplication was detected.
- */
- }
- EVENTHANDLER_INVOKE(ifaddr_event, ifp);
+
+aifaddr_out:
+ /*
+ * Try to clear the flag when a new IPv6 address is added
+ * onto an IFDISABLED interface and it succeeds.
+ */
+ if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) {
+ struct in6_ndireq nd;
+
+ memset(&nd, 0, sizeof(nd));
+ nd.ndi.flags = ND_IFINFO(ifp)->flags;
+ nd.ndi.flags &= ~ND6_IFF_IFDISABLED;
+ if (nd6_ioctl(SIOCSIFINFO_FLAGS, (caddr_t)&nd, ifp) < 0)
+ log(LOG_NOTICE, "SIOCAIFADDR_IN6: "
+ "SIOCSIFINFO_FLAGS for -ifdisabled "
+ "failed.");
+ /*
+ * Ignore failure of clearing the flag intentionally.
+ * The failure means address duplication was detected.
+ */
}
+ EVENTHANDLER_INVOKE(ifaddr_event, ifp);
break;
}
@@ -823,27 +730,24 @@ in6_update_ifa_join_mc(struct ifnet *ifp, struct in6_aliasreq *ifra,
struct in6_ifaddr *ia, int flags, struct in6_multi **in6m_sol)
{
char ip6buf[INET6_ADDRSTRLEN];
- struct sockaddr_in6 mltaddr, mltmask;
- struct in6_addr llsol;
+ struct in6_addr mltaddr;
struct in6_multi_mship *imm;
- struct rtentry *rt;
int delay, error;
KASSERT(in6m_sol != NULL, ("%s: in6m_sol is NULL", __func__));
/* Join solicited multicast addr for new host id. */
- bzero(&llsol, sizeof(struct in6_addr));
- llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL;
- llsol.s6_addr32[1] = 0;
- llsol.s6_addr32[2] = htonl(1);
- llsol.s6_addr32[3] = ifra->ifra_addr.sin6_addr.s6_addr32[3];
- llsol.s6_addr8[12] = 0xff;
- if ((error = in6_setscope(&llsol, ifp, NULL)) != 0) {
+ bzero(&mltaddr, sizeof(struct in6_addr));
+ mltaddr.s6_addr32[0] = IPV6_ADDR_INT32_MLL;
+ mltaddr.s6_addr32[2] = htonl(1);
+ mltaddr.s6_addr32[3] = ifra->ifra_addr.sin6_addr.s6_addr32[3];
+ mltaddr.s6_addr8[12] = 0xff;
+ if ((error = in6_setscope(&mltaddr, ifp, NULL)) != 0) {
/* XXX: should not happen */
log(LOG_ERR, "%s: in6_setscope failed\n", __func__);
goto cleanup;
}
- delay = 0;
+ delay = error = 0;
if ((flags & IN6_IFAUPDATE_DADDELAY)) {
/*
* We need a random delay for DAD on the address being
@@ -853,62 +757,28 @@ in6_update_ifa_join_mc(struct ifnet *ifp, struct in6_aliasreq *ifra,
*/
delay = arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz);
}
- imm = in6_joingroup(ifp, &llsol, &error, delay);
+ imm = in6_joingroup(ifp, &mltaddr, &error, delay);
if (imm == NULL) {
- nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s "
- "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, &llsol),
+ nd6log((LOG_WARNING, "%s: in6_joingroup failed for %s on %s "
+ "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, &mltaddr),
if_name(ifp), error));
goto cleanup;
}
LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
*in6m_sol = imm->i6mm_maddr;
- bzero(&mltmask, sizeof(mltmask));
- mltmask.sin6_len = sizeof(struct sockaddr_in6);
- mltmask.sin6_family = AF_INET6;
- mltmask.sin6_addr = in6mask32;
-#define MLTMASK_LEN 4 /* mltmask's masklen (=32bit=4octet) */
-
/*
* Join link-local all-nodes address.
*/
- bzero(&mltaddr, sizeof(mltaddr));
- mltaddr.sin6_len = sizeof(struct sockaddr_in6);
- mltaddr.sin6_family = AF_INET6;
- mltaddr.sin6_addr = in6addr_linklocal_allnodes;
- if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0)
+ mltaddr = in6addr_linklocal_allnodes;
+ if ((error = in6_setscope(&mltaddr, ifp, NULL)) != 0)
goto cleanup; /* XXX: should not fail */
- /*
- * XXX: do we really need this automatic routes? We should probably
- * reconsider this stuff. Most applications actually do not need the
- * routes, since they usually specify the outgoing interface.
- */
- rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB);
- if (rt != NULL) {
- /* XXX: only works in !SCOPEDROUTING case. */
- if (memcmp(&mltaddr.sin6_addr,
- &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr,
- MLTMASK_LEN)) {
- RTFREE_LOCKED(rt);
- rt = NULL;
- }
- }
- if (rt == NULL) {
- error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr,
- (struct sockaddr *)&ia->ia_addr,
- (struct sockaddr *)&mltmask, RTF_UP,
- (struct rtentry **)0, RT_DEFAULT_FIB);
- if (error)
- goto cleanup;
- } else
- RTFREE_LOCKED(rt);
-
- imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0);
+ imm = in6_joingroup(ifp, &mltaddr, &error, 0);
if (imm == NULL) {
- nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s "
- "(errno=%d)\n", __func__, ip6_sprintf(ip6buf,
- &mltaddr.sin6_addr), if_name(ifp), error));
+ nd6log((LOG_WARNING, "%s: in6_joingroup failed for %s on %s "
+ "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, &mltaddr),
+ if_name(ifp), error));
goto cleanup;
}
LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
@@ -924,24 +794,26 @@ in6_update_ifa_join_mc(struct ifnet *ifp, struct in6_aliasreq *ifra,
*/
delay = arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz);
}
- if (in6_nigroup(ifp, NULL, -1, &mltaddr.sin6_addr) == 0) {
+ if (in6_nigroup(ifp, NULL, -1, &mltaddr) == 0) {
/* XXX jinmei */
- imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, delay);
+ imm = in6_joingroup(ifp, &mltaddr, &error, delay);
if (imm == NULL)
- nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s "
+ nd6log((LOG_WARNING,
+ "%s: in6_joingroup failed for %s on %s "
"(errno=%d)\n", __func__, ip6_sprintf(ip6buf,
- &mltaddr.sin6_addr), if_name(ifp), error));
+ &mltaddr), if_name(ifp), error));
/* XXX not very fatal, go on... */
else
LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
}
- if (V_icmp6_nodeinfo_oldmcprefix &&
- in6_nigroup_oldmcprefix(ifp, NULL, -1, &mltaddr.sin6_addr) == 0) {
- imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, delay);
+ if (V_icmp6_nodeinfo_oldmcprefix &&
+ in6_nigroup_oldmcprefix(ifp, NULL, -1, &mltaddr) == 0) {
+ imm = in6_joingroup(ifp, &mltaddr, &error, delay);
if (imm == NULL)
- nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s "
+ nd6log((LOG_WARNING,
+ "%s: in6_joingroup failed for %s on %s "
"(errno=%d)\n", __func__, ip6_sprintf(ip6buf,
- &mltaddr.sin6_addr), if_name(ifp), error));
+ &mltaddr), if_name(ifp), error));
/* XXX not very fatal, go on... */
else
LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
@@ -951,38 +823,18 @@ in6_update_ifa_join_mc(struct ifnet *ifp, struct in6_aliasreq *ifra,
* Join interface-local all-nodes address.
* (ff01::1%ifN, and ff01::%ifN/32)
*/
- mltaddr.sin6_addr = in6addr_nodelocal_allnodes;
- if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0)
+ mltaddr = in6addr_nodelocal_allnodes;
+ if ((error = in6_setscope(&mltaddr, ifp, NULL)) != 0)
goto cleanup; /* XXX: should not fail */
- /* XXX: again, do we really need the route? */
- rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB);
- if (rt != NULL) {
- if (memcmp(&mltaddr.sin6_addr,
- &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr,
- MLTMASK_LEN)) {
- RTFREE_LOCKED(rt);
- rt = NULL;
- }
- }
- if (rt == NULL) {
- error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr,
- (struct sockaddr *)&ia->ia_addr,
- (struct sockaddr *)&mltmask, RTF_UP,
- (struct rtentry **)0, RT_DEFAULT_FIB);
- if (error)
- goto cleanup;
- } else
- RTFREE_LOCKED(rt);
- imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0);
+ imm = in6_joingroup(ifp, &mltaddr, &error, 0);
if (imm == NULL) {
- nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s "
+ nd6log((LOG_WARNING, "%s: in6_joingroup failed for %s on %s "
"(errno=%d)\n", __func__, ip6_sprintf(ip6buf,
- &mltaddr.sin6_addr), if_name(ifp), error));
+ &mltaddr), if_name(ifp), error));
goto cleanup;
}
LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
-#undef MLTMASK_LEN
cleanup:
return (error);
@@ -992,17 +844,65 @@ cleanup:
* Update parameters of an IPv6 interface address.
* If necessary, a new entry is created and linked into address chains.
* This function is separated from in6_control().
- * XXX: should this be performed under splnet()?
*/
int
in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
struct in6_ifaddr *ia, int flags)
{
- int error = 0, hostIsNew = 0, plen = -1;
+ int error, hostIsNew = 0;
+
+ if ((error = in6_validate_ifra(ifp, ifra, ia, flags)) != 0)
+ return (error);
+
+ if (ia == NULL) {
+ hostIsNew = 1;
+ if ((ia = in6_alloc_ifa(ifp, ifra, flags)) == NULL)
+ return (ENOBUFS);
+ }
+
+ error = in6_update_ifa_internal(ifp, ifra, ia, hostIsNew, flags);
+ if (error != 0) {
+ if (hostIsNew != 0) {
+ in6_unlink_ifa(ia, ifp);
+ ifa_free(&ia->ia_ifa);
+ }
+ return (error);
+ }
+
+ if (hostIsNew)
+ error = in6_broadcast_ifa(ifp, ifra, ia, flags);
+
+ return (error);
+}
+
+/*
+ * Fill in basic IPv6 address request info.
+ */
+void
+in6_prepare_ifra(struct in6_aliasreq *ifra, const struct in6_addr *addr,
+ const struct in6_addr *mask)
+{
+
+ memset(ifra, 0, sizeof(struct in6_aliasreq));
+
+ ifra->ifra_addr.sin6_family = AF_INET6;
+ ifra->ifra_addr.sin6_len = sizeof(struct sockaddr_in6);
+ if (addr != NULL)
+ ifra->ifra_addr.sin6_addr = *addr;
+
+ ifra->ifra_prefixmask.sin6_family = AF_INET6;
+ ifra->ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
+ if (mask != NULL)
+ ifra->ifra_prefixmask.sin6_addr = *mask;
+}
+
+static int
+in6_validate_ifra(struct ifnet *ifp, struct in6_aliasreq *ifra,
+ struct in6_ifaddr *ia, int flags)
+{
+ int plen = -1;
struct sockaddr_in6 dst6;
struct in6_addrlifetime *lt;
- struct in6_multi *in6m_sol;
- int delay;
char ip6buf[INET6_ADDRSTRLEN];
/* Validate parameters */
@@ -1017,6 +917,14 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
ifra->ifra_dstaddr.sin6_family != AF_INET6 &&
ifra->ifra_dstaddr.sin6_family != AF_UNSPEC)
return (EAFNOSUPPORT);
+
+ /*
+ * Validate address
+ */
+ if (ifra->ifra_addr.sin6_len != sizeof(struct sockaddr_in6) ||
+ ifra->ifra_addr.sin6_family != AF_INET6)
+ return (EINVAL);
+
/*
* validate ifra_prefixmask. don't check sin6_family, netmask
* does not carry fields other than sin6_len.
@@ -1069,6 +977,9 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
if (sa6_embedscope(&dst6, 0))
return (EINVAL); /* XXX: should be impossible */
}
+ /* Modify original ifra_dstaddr to reflect changes */
+ ifra->ifra_dstaddr = dst6;
+
/*
* The destination address can be specified only for a p2p or a
* loopback interface. If specified, the corresponding prefix length
@@ -1104,94 +1015,102 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
return (0); /* there's nothing to do */
}
- /*
- * If this is a new address, allocate a new ifaddr and link it
- * into chains.
- */
- if (ia == NULL) {
- hostIsNew = 1;
- /*
- * When in6_update_ifa() is called in a process of a received
- * RA, it is called under an interrupt context. So, we should
- * call malloc with M_NOWAIT.
- */
- ia = (struct in6_ifaddr *) malloc(sizeof(*ia), M_IFADDR,
- M_NOWAIT);
- if (ia == NULL)
- return (ENOBUFS);
- bzero((caddr_t)ia, sizeof(*ia));
- ifa_init(&ia->ia_ifa);
- LIST_INIT(&ia->ia6_memberships);
- /* Initialize the address and masks, and put time stamp */
- ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
- ia->ia_addr.sin6_family = AF_INET6;
- ia->ia_addr.sin6_len = sizeof(ia->ia_addr);
- ia->ia6_createtime = time_second;
- if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) != 0) {
- /*
- * XXX: some functions expect that ifa_dstaddr is not
- * NULL for p2p interfaces.
- */
- ia->ia_ifa.ifa_dstaddr =
- (struct sockaddr *)&ia->ia_dstaddr;
- } else {
- ia->ia_ifa.ifa_dstaddr = NULL;
- }
- ia->ia_ifa.ifa_netmask = (struct sockaddr *)&ia->ia_prefixmask;
- ia->ia_ifp = ifp;
- ifa_ref(&ia->ia_ifa); /* if_addrhead */
- IF_ADDR_WLOCK(ifp);
- TAILQ_INSERT_TAIL(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
- IF_ADDR_WUNLOCK(ifp);
-
- ifa_ref(&ia->ia_ifa); /* in6_ifaddrhead */
- IN6_IFADDR_WLOCK();
- TAILQ_INSERT_TAIL(&V_in6_ifaddrhead, ia, ia_link);
- IN6_IFADDR_WUNLOCK();
- }
-
- /* update timestamp */
- ia->ia6_updatetime = time_second;
-
- /* set prefix mask */
- if (ifra->ifra_prefixmask.sin6_len) {
+ /* Check prefix mask */
+ if (ia != NULL && ifra->ifra_prefixmask.sin6_len != 0) {
/*
* We prohibit changing the prefix length of an existing
* address, because
* + such an operation should be rare in IPv6, and
* + the operation would confuse prefix management.
*/
- if (ia->ia_prefixmask.sin6_len &&
+ if (ia->ia_prefixmask.sin6_len != 0 &&
in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL) != plen) {
- nd6log((LOG_INFO, "in6_update_ifa: the prefix length of an"
- " existing (%s) address should not be changed\n",
+ nd6log((LOG_INFO, "in6_validate_ifa: the prefix length "
+ "of an existing %s address should not be changed\n",
ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
- error = EINVAL;
- goto unlink;
+
+ return (EINVAL);
}
- ia->ia_prefixmask = ifra->ifra_prefixmask;
}
+ return (0);
+}
+
+
+/*
+ * Allocate a new ifaddr and link it into chains.
+ */
+static struct in6_ifaddr *
+in6_alloc_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, int flags)
+{
+ struct in6_ifaddr *ia;
+
/*
- * If a new destination address is specified, scrub the old one and
- * install the new destination. Note that the interface must be
- * p2p or loopback (see the check above.)
+ * When in6_alloc_ifa() is called in a process of a received
+ * RA, it is called under an interrupt context. So, we should
+ * call malloc with M_NOWAIT.
*/
- if (dst6.sin6_family == AF_INET6 &&
- !IN6_ARE_ADDR_EQUAL(&dst6.sin6_addr, &ia->ia_dstaddr.sin6_addr)) {
- int e;
+ ia = (struct in6_ifaddr *)ifa_alloc(sizeof(*ia), M_NOWAIT);
+ if (ia == NULL)
+ return (NULL);
+ LIST_INIT(&ia->ia6_memberships);
+ /* Initialize the address and masks, and put time stamp */
+ ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
+ ia->ia_addr.sin6_family = AF_INET6;
+ ia->ia_addr.sin6_len = sizeof(ia->ia_addr);
+ /* XXX: Can we assign ,sin6_addr and skip the rest? */
+ ia->ia_addr = ifra->ifra_addr;
+ ia->ia6_createtime = time_uptime;
+ if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) != 0) {
+ /*
+ * Some functions expect that ifa_dstaddr is not
+ * NULL for p2p interfaces.
+ */
+ ia->ia_ifa.ifa_dstaddr =
+ (struct sockaddr *)&ia->ia_dstaddr;
+ } else {
+ ia->ia_ifa.ifa_dstaddr = NULL;
+ }
- if ((ia->ia_flags & IFA_ROUTE) != 0 &&
- (e = rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST)) != 0) {
- nd6log((LOG_ERR, "in6_update_ifa: failed to remove "
- "a route to the old destination: %s\n",
- ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
- /* proceed anyway... */
- } else
- ia->ia_flags &= ~IFA_ROUTE;
- ia->ia_dstaddr = dst6;
+ /* set prefix mask if any */
+ ia->ia_ifa.ifa_netmask = (struct sockaddr *)&ia->ia_prefixmask;
+ if (ifra->ifra_prefixmask.sin6_len != 0) {
+ ia->ia_prefixmask.sin6_family = AF_INET6;
+ ia->ia_prefixmask.sin6_len = ifra->ifra_prefixmask.sin6_len;
+ ia->ia_prefixmask.sin6_addr = ifra->ifra_prefixmask.sin6_addr;
}
+ ia->ia_ifp = ifp;
+ ifa_ref(&ia->ia_ifa); /* if_addrhead */
+ IF_ADDR_WLOCK(ifp);
+ TAILQ_INSERT_TAIL(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
+ IF_ADDR_WUNLOCK(ifp);
+
+ ifa_ref(&ia->ia_ifa); /* in6_ifaddrhead */
+ IN6_IFADDR_WLOCK();
+ TAILQ_INSERT_TAIL(&V_in6_ifaddrhead, ia, ia_link);
+ LIST_INSERT_HEAD(IN6ADDR_HASH(&ia->ia_addr.sin6_addr), ia, ia6_hash);
+ IN6_IFADDR_WUNLOCK();
+
+ return (ia);
+}
+
+/*
+ * Update/configure interface address parameters:
+ *
+ * 1) Update lifetime
+ * 2) Update interface metric ad flags
+ * 3) Notify other subsystems
+ */
+static int
+in6_update_ifa_internal(struct ifnet *ifp, struct in6_aliasreq *ifra,
+ struct in6_ifaddr *ia, int hostIsNew, int flags)
+{
+ int error;
+
+ /* update timestamp */
+ ia->ia6_updatetime = time_uptime;
+
/*
* Set lifetimes. We do not refer to ia6t_expire and ia6t_preferred
* to see if the address is deprecated or invalidated, but initialize
@@ -1200,71 +1119,85 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
ia->ia6_lifetime = ifra->ifra_lifetime;
if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
ia->ia6_lifetime.ia6t_expire =
- time_second + ia->ia6_lifetime.ia6t_vltime;
+ time_uptime + ia->ia6_lifetime.ia6t_vltime;
} else
ia->ia6_lifetime.ia6t_expire = 0;
if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
ia->ia6_lifetime.ia6t_preferred =
- time_second + ia->ia6_lifetime.ia6t_pltime;
+ time_uptime + ia->ia6_lifetime.ia6t_pltime;
} else
ia->ia6_lifetime.ia6t_preferred = 0;
- /* reset the interface and routing table appropriately. */
- if ((error = in6_ifinit(ifp, ia, &ifra->ifra_addr, hostIsNew)) != 0)
- goto unlink;
-
- /*
- * configure address flags.
- */
- ia->ia6_flags = ifra->ifra_flags;
/*
* backward compatibility - if IN6_IFF_DEPRECATED is set from the
* userland, make it deprecated.
*/
if ((ifra->ifra_flags & IN6_IFF_DEPRECATED) != 0) {
ia->ia6_lifetime.ia6t_pltime = 0;
- ia->ia6_lifetime.ia6t_preferred = time_second;
+ ia->ia6_lifetime.ia6t_preferred = time_uptime;
}
+
+ /*
+ * configure address flags.
+ */
+ ia->ia6_flags = ifra->ifra_flags;
+
/*
* Make the address tentative before joining multicast addresses,
* so that corresponding MLD responses would not have a tentative
* source address.
*/
ia->ia6_flags &= ~IN6_IFF_DUPLICATED; /* safety */
- if (hostIsNew && in6if_do_dad(ifp))
- ia->ia6_flags |= IN6_IFF_TENTATIVE;
-
- /* DAD should be performed after ND6_IFF_IFDISABLED is cleared. */
- if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)
- ia->ia6_flags |= IN6_IFF_TENTATIVE;
/*
- * We are done if we have simply modified an existing address.
+ * DAD should be performed for an new address or addresses on
+ * an interface with ND6_IFF_IFDISABLED.
*/
- if (!hostIsNew)
- return (error);
+ if (in6if_do_dad(ifp) &&
+ (hostIsNew || (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)))
+ ia->ia6_flags |= IN6_IFF_TENTATIVE;
- /*
- * Beyond this point, we should call in6_purgeaddr upon an error,
- * not just go to unlink.
- */
+ /* notify other subsystems */
+ error = in6_notify_ifa(ifp, ia, ifra, hostIsNew);
+
+ return (error);
+}
+
+/*
+ * Do link-level ifa job:
+ * 1) Add lle entry for added address
+ * 2) Notifies routing socket users about new address
+ * 3) join appropriate multicast group
+ * 4) start DAD if enabled
+ */
+static int
+in6_broadcast_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
+ struct in6_ifaddr *ia, int flags)
+{
+ struct in6_multi *in6m_sol;
+ int error = 0;
+
+ /* Add local address to lltable, if necessary (ex. on p2p link). */
+ if ((error = nd6_add_ifa_lle(ia)) != 0) {
+ in6_purgeaddr(&ia->ia_ifa);
+ ifa_free(&ia->ia_ifa);
+ return (error);
+ }
/* Join necessary multicast groups. */
in6m_sol = NULL;
if ((ifp->if_flags & IFF_MULTICAST) != 0) {
error = in6_update_ifa_join_mc(ifp, ifra, ia, flags, &in6m_sol);
- if (error)
- goto cleanup;
+ if (error != 0) {
+ in6_purgeaddr(&ia->ia_ifa);
+ ifa_free(&ia->ia_ifa);
+ return (error);
+ }
}
- /*
- * Perform DAD, if needed.
- * XXX It may be of use, if we can administratively disable DAD.
- */
- if (in6if_do_dad(ifp) && ((ifra->ifra_flags & IN6_IFF_NODAD) == 0) &&
- (ia->ia6_flags & IN6_IFF_TENTATIVE))
- {
- int mindelay, maxdelay;
+ /* Perform DAD, if the address is TENTATIVE. */
+ if ((ia->ia6_flags & IN6_IFF_TENTATIVE)) {
+ int delay, mindelay, maxdelay;
delay = 0;
if ((flags & IN6_IFAUPDATE_DADDELAY)) {
@@ -1295,159 +1228,9 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
nd6_dad_start((struct ifaddr *)ia, delay);
}
- KASSERT(hostIsNew, ("in6_update_ifa: !hostIsNew"));
+ in6_newaddrmsg(ia, RTM_ADD);
ifa_free(&ia->ia_ifa);
return (error);
-
- unlink:
- /*
- * XXX: if a change of an existing address failed, keep the entry
- * anyway.
- */
- if (hostIsNew) {
- in6_unlink_ifa(ia, ifp);
- ifa_free(&ia->ia_ifa);
- }
- return (error);
-
- cleanup:
- KASSERT(hostIsNew, ("in6_update_ifa: cleanup: !hostIsNew"));
- ifa_free(&ia->ia_ifa);
- in6_purgeaddr(&ia->ia_ifa);
- return error;
-}
-
-/*
- * Leave multicast groups. Factored out from in6_purgeaddr().
- * This entire work should only be done once, for the default FIB.
- */
-static int
-in6_purgeaddr_mc(struct ifnet *ifp, struct in6_ifaddr *ia, struct ifaddr *ifa0)
-{
- struct sockaddr_in6 mltaddr, mltmask;
- struct in6_multi_mship *imm;
- struct rtentry *rt;
- struct sockaddr_in6 sin6;
- int error;
-
- /*
- * Leave from multicast groups we have joined for the interface.
- */
- while ((imm = LIST_FIRST(&ia->ia6_memberships)) != NULL) {
- LIST_REMOVE(imm, i6mm_chain);
- in6_leavegroup(imm);
- }
-
- /*
- * Remove the link-local all-nodes address.
- */
- bzero(&mltmask, sizeof(mltmask));
- mltmask.sin6_len = sizeof(struct sockaddr_in6);
- mltmask.sin6_family = AF_INET6;
- mltmask.sin6_addr = in6mask32;
-
- bzero(&mltaddr, sizeof(mltaddr));
- mltaddr.sin6_len = sizeof(struct sockaddr_in6);
- mltaddr.sin6_family = AF_INET6;
- mltaddr.sin6_addr = in6addr_linklocal_allnodes;
-
- if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0)
- return (error);
-
- /*
- * As for the mltaddr above, proactively prepare the sin6 to avoid
- * rtentry un- and re-locking.
- */
- if (ifa0 != NULL) {
- bzero(&sin6, sizeof(sin6));
- sin6.sin6_len = sizeof(sin6);
- sin6.sin6_family = AF_INET6;
- memcpy(&sin6.sin6_addr, &satosin6(ifa0->ifa_addr)->sin6_addr,
- sizeof(sin6.sin6_addr));
- error = in6_setscope(&sin6.sin6_addr, ifa0->ifa_ifp, NULL);
- if (error != 0)
- return (error);
- }
-
- rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB);
- if (rt != NULL && rt->rt_gateway != NULL &&
- (memcmp(&satosin6(rt->rt_gateway)->sin6_addr,
- &ia->ia_addr.sin6_addr,
- sizeof(ia->ia_addr.sin6_addr)) == 0)) {
- /*
- * If no more IPv6 address exists on this interface then
- * remove the multicast address route.
- */
- if (ifa0 == NULL) {
- memcpy(&mltaddr.sin6_addr,
- &satosin6(rt_key(rt))->sin6_addr,
- sizeof(mltaddr.sin6_addr));
- RTFREE_LOCKED(rt);
- error = in6_rtrequest(RTM_DELETE,
- (struct sockaddr *)&mltaddr,
- (struct sockaddr *)&ia->ia_addr,
- (struct sockaddr *)&mltmask, RTF_UP,
- (struct rtentry **)0, RT_DEFAULT_FIB);
- if (error)
- log(LOG_INFO, "%s: link-local all-nodes "
- "multicast address deletion error\n",
- __func__);
- } else {
- /*
- * Replace the gateway of the route.
- */
- memcpy(rt->rt_gateway, &sin6, sizeof(sin6));
- RTFREE_LOCKED(rt);
- }
- } else {
- if (rt != NULL)
- RTFREE_LOCKED(rt);
- }
-
- /*
- * Remove the node-local all-nodes address.
- */
- mltaddr.sin6_addr = in6addr_nodelocal_allnodes;
- if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0)
- return (error);
-
- rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB);
- if (rt != NULL && rt->rt_gateway != NULL &&
- (memcmp(&satosin6(rt->rt_gateway)->sin6_addr,
- &ia->ia_addr.sin6_addr,
- sizeof(ia->ia_addr.sin6_addr)) == 0)) {
- /*
- * If no more IPv6 address exists on this interface then
- * remove the multicast address route.
- */
- if (ifa0 == NULL) {
- memcpy(&mltaddr.sin6_addr,
- &satosin6(rt_key(rt))->sin6_addr,
- sizeof(mltaddr.sin6_addr));
-
- RTFREE_LOCKED(rt);
- error = in6_rtrequest(RTM_DELETE,
- (struct sockaddr *)&mltaddr,
- (struct sockaddr *)&ia->ia_addr,
- (struct sockaddr *)&mltmask, RTF_UP,
- (struct rtentry **)0, RT_DEFAULT_FIB);
- if (error)
- log(LOG_INFO, "%s: node-local all-nodes"
- "multicast address deletion error\n",
- __func__);
- } else {
- /*
- * Replace the gateway of the route.
- */
- memcpy(rt->rt_gateway, &sin6, sizeof(sin6));
- RTFREE_LOCKED(rt);
- }
- } else {
- if (rt != NULL)
- RTFREE_LOCKED(rt);
- }
-
- return (0);
}
void
@@ -1455,26 +1238,11 @@ in6_purgeaddr(struct ifaddr *ifa)
{
struct ifnet *ifp = ifa->ifa_ifp;
struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa;
+ struct in6_multi_mship *imm;
int plen, error;
- struct ifaddr *ifa0;
- /*
- * find another IPv6 address as the gateway for the
- * link-local and node-local all-nodes multicast
- * address routes
- */
- IF_ADDR_RLOCK(ifp);
- TAILQ_FOREACH(ifa0, &ifp->if_addrhead, ifa_link) {
- if ((ifa0->ifa_addr->sa_family != AF_INET6) ||
- memcmp(&satosin6(ifa0->ifa_addr)->sin6_addr,
- &ia->ia_addr.sin6_addr, sizeof(struct in6_addr)) == 0)
- continue;
- else
- break;
- }
- if (ifa0 != NULL)
- ifa_ref(ifa0);
- IF_ADDR_RUNLOCK(ifp);
+ if (ifa->ifa_carp)
+ (*carp_detach_p)(ifa);
/*
* Remove the loopback route to the interface address.
@@ -1491,32 +1259,30 @@ in6_purgeaddr(struct ifaddr *ifa)
/* stop DAD processing */
nd6_dad_stop(ifa);
- /* Remove local address entry from lltable. */
- in6_ifremloop(ifa);
-
/* Leave multicast groups. */
- error = in6_purgeaddr_mc(ifp, ia, ifa0);
-
- if (ifa0 != NULL)
- ifa_free(ifa0);
-
+ while ((imm = LIST_FIRST(&ia->ia6_memberships)) != NULL) {
+ LIST_REMOVE(imm, i6mm_chain);
+ in6_leavegroup(imm);
+ }
plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */
if ((ia->ia_flags & IFA_ROUTE) && plen == 128) {
error = rtinit(&(ia->ia_ifa), RTM_DELETE, ia->ia_flags |
- (ia->ia_dstaddr.sin6_family == AF_INET6) ? RTF_HOST : 0);
+ (ia->ia_dstaddr.sin6_family == AF_INET6 ? RTF_HOST : 0));
if (error != 0)
log(LOG_INFO, "%s: err=%d, destination address delete "
"failed\n", __func__, error);
ia->ia_flags &= ~IFA_ROUTE;
}
+ in6_newaddrmsg(ia, RTM_DELETE);
in6_unlink_ifa(ia, ifp);
}
static void
in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp)
{
- int s = splnet();
+ char ip6buf[INET6_ADDRSTRLEN];
+ int remove_lle;
IF_ADDR_WLOCK(ifp);
TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
@@ -1530,21 +1296,28 @@ in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp)
*/
IN6_IFADDR_WLOCK();
TAILQ_REMOVE(&V_in6_ifaddrhead, ia, ia_link);
+ LIST_REMOVE(ia, ia6_hash);
IN6_IFADDR_WUNLOCK();
/*
* Release the reference to the base prefix. There should be a
* positive reference.
*/
+ remove_lle = 0;
if (ia->ia6_ndpr == NULL) {
nd6log((LOG_NOTICE,
"in6_unlink_ifa: autoconf'ed address "
- "%p has no prefix\n", ia));
+ "%s has no prefix\n", ip6_sprintf(ip6buf, IA6_IN6(ia))));
} else {
ia->ia6_ndpr->ndpr_refcnt--;
+ /* Do not delete lles within prefix if refcont != 0 */
+ if (ia->ia6_ndpr->ndpr_refcnt == 0)
+ remove_lle = 1;
ia->ia6_ndpr = NULL;
}
+ nd6_rem_ifa_lle(ia, remove_lle);
+
/*
* Also, if the address being removed is autoconf'ed, call
* pfxlist_onlink_check() since the release might affect the status of
@@ -1554,335 +1327,63 @@ in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp)
pfxlist_onlink_check();
}
ifa_free(&ia->ia_ifa); /* in6_ifaddrhead */
- splx(s);
-}
-
-void
-in6_purgeif(struct ifnet *ifp)
-{
- struct ifaddr *ifa, *nifa;
-
- TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, nifa) {
- if (ifa->ifa_addr->sa_family != AF_INET6)
- continue;
- in6_purgeaddr(ifa);
- }
-
- in6_ifdetach(ifp);
}
/*
- * SIOC[GAD]LIFADDR.
- * SIOCGLIFADDR: get first address. (?)
- * SIOCGLIFADDR with IFLR_PREFIX:
- * get first address that matches the specified prefix.
- * SIOCALIFADDR: add the specified address.
- * SIOCALIFADDR with IFLR_PREFIX:
- * add the specified prefix, filling hostid part from
- * the first link-local address. prefixlen must be <= 64.
- * SIOCDLIFADDR: delete the specified address.
- * SIOCDLIFADDR with IFLR_PREFIX:
- * delete the first address that matches the specified prefix.
- * return values:
- * EINVAL on invalid parameters
- * EADDRNOTAVAIL on prefix match failed/specified address not found
- * other values may be returned from in6_ioctl()
- *
- * NOTE: SIOCALIFADDR(with IFLR_PREFIX set) allows prefixlen less than 64.
- * this is to accomodate address naming scheme other than RFC2374,
- * in the future.
- * RFC2373 defines interface id to be 64bit, but it allows non-RFC2374
- * address encoding scheme. (see figure on page 8)
+ * Notifies other subsystems about address change/arrival:
+ * 1) Notifies device handler on the first IPv6 address assignment
+ * 2) Handle routing table changes for P2P links and route
+ * 3) Handle routing table changes for address host route
*/
static int
-in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
- struct ifnet *ifp, struct thread *td)
+in6_notify_ifa(struct ifnet *ifp, struct in6_ifaddr *ia,
+ struct in6_aliasreq *ifra, int hostIsNew)
{
- struct if_laddrreq *iflr = (struct if_laddrreq *)data;
+ int error = 0, plen, ifacount = 0;
struct ifaddr *ifa;
- struct sockaddr *sa;
-
- /* sanity checks */
- if (!data || !ifp) {
- panic("invalid argument to in6_lifaddr_ioctl");
- /* NOTREACHED */
- }
-
- switch (cmd) {
- case SIOCGLIFADDR:
- /* address must be specified on GET with IFLR_PREFIX */
- if ((iflr->flags & IFLR_PREFIX) == 0)
- break;
- /* FALLTHROUGH */
- case SIOCALIFADDR:
- case SIOCDLIFADDR:
- /* address must be specified on ADD and DELETE */
- sa = (struct sockaddr *)&iflr->addr;
- if (sa->sa_family != AF_INET6)
- return EINVAL;
- if (sa->sa_len != sizeof(struct sockaddr_in6))
- return EINVAL;
- /* XXX need improvement */
- sa = (struct sockaddr *)&iflr->dstaddr;
- if (sa->sa_family && sa->sa_family != AF_INET6)
- return EINVAL;
- if (sa->sa_len && sa->sa_len != sizeof(struct sockaddr_in6))
- return EINVAL;
- break;
- default: /* shouldn't happen */
-#if 0
- panic("invalid cmd to in6_lifaddr_ioctl");
- /* NOTREACHED */
-#else
- return EOPNOTSUPP;
-#endif
- }
- if (sizeof(struct in6_addr) * 8 < iflr->prefixlen)
- return EINVAL;
-
- switch (cmd) {
- case SIOCALIFADDR:
- {
- struct in6_aliasreq ifra;
- struct in6_addr *hostid = NULL;
- int prefixlen;
-
- ifa = NULL;
- if ((iflr->flags & IFLR_PREFIX) != 0) {
- struct sockaddr_in6 *sin6;
-
- /*
- * hostid is to fill in the hostid part of the
- * address. hostid points to the first link-local
- * address attached to the interface.
- */
- ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0);
- if (!ifa)
- return EADDRNOTAVAIL;
- hostid = IFA_IN6(ifa);
-
- /* prefixlen must be <= 64. */
- if (64 < iflr->prefixlen) {
- if (ifa != NULL)
- ifa_free(ifa);
- return EINVAL;
- }
- prefixlen = iflr->prefixlen;
-
- /* hostid part must be zero. */
- sin6 = (struct sockaddr_in6 *)&iflr->addr;
- if (sin6->sin6_addr.s6_addr32[2] != 0 ||
- sin6->sin6_addr.s6_addr32[3] != 0) {
- if (ifa != NULL)
- ifa_free(ifa);
- return EINVAL;
- }
- } else
- prefixlen = iflr->prefixlen;
-
- /* copy args to in6_aliasreq, perform ioctl(SIOCAIFADDR_IN6). */
- bzero(&ifra, sizeof(ifra));
- bcopy(iflr->iflr_name, ifra.ifra_name, sizeof(ifra.ifra_name));
-
- bcopy(&iflr->addr, &ifra.ifra_addr,
- ((struct sockaddr *)&iflr->addr)->sa_len);
- if (hostid) {
- /* fill in hostid part */
- ifra.ifra_addr.sin6_addr.s6_addr32[2] =
- hostid->s6_addr32[2];
- ifra.ifra_addr.sin6_addr.s6_addr32[3] =
- hostid->s6_addr32[3];
- }
-
- if (((struct sockaddr *)&iflr->dstaddr)->sa_family) { /* XXX */
- bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr,
- ((struct sockaddr *)&iflr->dstaddr)->sa_len);
- if (hostid) {
- ifra.ifra_dstaddr.sin6_addr.s6_addr32[2] =
- hostid->s6_addr32[2];
- ifra.ifra_dstaddr.sin6_addr.s6_addr32[3] =
- hostid->s6_addr32[3];
- }
- }
- if (ifa != NULL)
- ifa_free(ifa);
-
- ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
- in6_prefixlen2mask(&ifra.ifra_prefixmask.sin6_addr, prefixlen);
-
- ifra.ifra_flags = iflr->flags & ~IFLR_PREFIX;
- return in6_control(so, SIOCAIFADDR_IN6, (caddr_t)&ifra, ifp, td);
- }
- case SIOCGLIFADDR:
- case SIOCDLIFADDR:
- {
- struct in6_ifaddr *ia;
- struct in6_addr mask, candidate, match;
- struct sockaddr_in6 *sin6;
- int cmp;
-
- bzero(&mask, sizeof(mask));
- if (iflr->flags & IFLR_PREFIX) {
- /* lookup a prefix rather than address. */
- in6_prefixlen2mask(&mask, iflr->prefixlen);
-
- sin6 = (struct sockaddr_in6 *)&iflr->addr;
- bcopy(&sin6->sin6_addr, &match, sizeof(match));
- match.s6_addr32[0] &= mask.s6_addr32[0];
- match.s6_addr32[1] &= mask.s6_addr32[1];
- match.s6_addr32[2] &= mask.s6_addr32[2];
- match.s6_addr32[3] &= mask.s6_addr32[3];
-
- /* if you set extra bits, that's wrong */
- if (bcmp(&match, &sin6->sin6_addr, sizeof(match)))
- return EINVAL;
-
- cmp = 1;
- } else {
- if (cmd == SIOCGLIFADDR) {
- /* on getting an address, take the 1st match */
- cmp = 0; /* XXX */
- } else {
- /* on deleting an address, do exact match */
- in6_prefixlen2mask(&mask, 128);
- sin6 = (struct sockaddr_in6 *)&iflr->addr;
- bcopy(&sin6->sin6_addr, &match, sizeof(match));
-
- cmp = 1;
- }
- }
+ struct sockaddr_in6 *pdst;
+ char ip6buf[INET6_ADDRSTRLEN];
+ /*
+ * Give the interface a chance to initialize
+ * if this is its first address,
+ */
+ if (hostIsNew != 0) {
IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
- if (!cmp)
- break;
-
- /*
- * XXX: this is adhoc, but is necessary to allow
- * a user to specify fe80::/64 (not /10) for a
- * link-local address.
- */
- bcopy(IFA_IN6(ifa), &candidate, sizeof(candidate));
- in6_clearscope(&candidate);
- candidate.s6_addr32[0] &= mask.s6_addr32[0];
- candidate.s6_addr32[1] &= mask.s6_addr32[1];
- candidate.s6_addr32[2] &= mask.s6_addr32[2];
- candidate.s6_addr32[3] &= mask.s6_addr32[3];
- if (IN6_ARE_ADDR_EQUAL(&candidate, &match))
- break;
+ ifacount++;
}
- if (ifa != NULL)
- ifa_ref(ifa);
IF_ADDR_RUNLOCK(ifp);
- if (!ifa)
- return EADDRNOTAVAIL;
- ia = ifa2ia6(ifa);
-
- if (cmd == SIOCGLIFADDR) {
- int error;
-
- /* fill in the if_laddrreq structure */
- bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin6_len);
- error = sa6_recoverscope(
- (struct sockaddr_in6 *)&iflr->addr);
- if (error != 0) {
- ifa_free(ifa);
- return (error);
- }
-
- if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
- bcopy(&ia->ia_dstaddr, &iflr->dstaddr,
- ia->ia_dstaddr.sin6_len);
- error = sa6_recoverscope(
- (struct sockaddr_in6 *)&iflr->dstaddr);
- if (error != 0) {
- ifa_free(ifa);
- return (error);
- }
- } else
- bzero(&iflr->dstaddr, sizeof(iflr->dstaddr));
-
- iflr->prefixlen =
- in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL);
-
- iflr->flags = ia->ia6_flags; /* XXX */
- ifa_free(ifa);
-
- return 0;
- } else {
- struct in6_aliasreq ifra;
-
- /* fill in6_aliasreq and do ioctl(SIOCDIFADDR_IN6) */
- bzero(&ifra, sizeof(ifra));
- bcopy(iflr->iflr_name, ifra.ifra_name,
- sizeof(ifra.ifra_name));
-
- bcopy(&ia->ia_addr, &ifra.ifra_addr,
- ia->ia_addr.sin6_len);
- if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
- bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr,
- ia->ia_dstaddr.sin6_len);
- } else {
- bzero(&ifra.ifra_dstaddr,
- sizeof(ifra.ifra_dstaddr));
- }
- bcopy(&ia->ia_prefixmask, &ifra.ifra_dstaddr,
- ia->ia_prefixmask.sin6_len);
-
- ifra.ifra_flags = ia->ia6_flags;
- ifa_free(ifa);
- return in6_control(so, SIOCDIFADDR_IN6, (caddr_t)&ifra,
- ifp, td);
- }
- }
}
- return EOPNOTSUPP; /* just for safety */
-}
-
-/*
- * Initialize an interface's IPv6 address and routing table entry.
- */
-static int
-in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia,
- struct sockaddr_in6 *sin6, int newhost)
-{
- int error = 0, plen, ifacount = 0;
- int s = splimp();
- struct ifaddr *ifa;
-
- /*
- * Give the interface a chance to initialize
- * if this is its first address,
- * and to validate the address if necessary.
- */
- IF_ADDR_RLOCK(ifp);
- TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- if (ifa->ifa_addr->sa_family != AF_INET6)
- continue;
- ifacount++;
- }
- IF_ADDR_RUNLOCK(ifp);
-
- ia->ia_addr = *sin6;
-
if (ifacount <= 1 && ifp->if_ioctl) {
error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia);
- if (error) {
- splx(s);
+ if (error)
return (error);
- }
}
- splx(s);
-
- ia->ia_ifa.ifa_metric = ifp->if_metric;
- /* we could do in(6)_socktrim here, but just omit it at this moment. */
+ /*
+ * If a new destination address is specified, scrub the old one and
+ * install the new destination. Note that the interface must be
+ * p2p or loopback.
+ */
+ pdst = &ifra->ifra_dstaddr;
+ if (pdst->sin6_family == AF_INET6 &&
+ !IN6_ARE_ADDR_EQUAL(&pdst->sin6_addr, &ia->ia_dstaddr.sin6_addr)) {
+ if ((ia->ia_flags & IFA_ROUTE) != 0 &&
+ (rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST) != 0)) {
+ nd6log((LOG_ERR, "in6_update_ifa_internal: failed to "
+ "remove a route to the old destination: %s\n",
+ ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
+ /* proceed anyway... */
+ } else
+ ia->ia_flags &= ~IFA_ROUTE;
+ ia->ia_dstaddr = *pdst;
+ }
/*
- * Special case:
* If a new destination address is specified for a point-to-point
* interface, install a route to the destination as an interface
* direct route.
@@ -1893,19 +1394,19 @@ in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia,
if (!(ia->ia_flags & IFA_ROUTE) && plen == 128 &&
ia->ia_dstaddr.sin6_family == AF_INET6) {
int rtflags = RTF_UP | RTF_HOST;
- error = rtinit(&ia->ia_ifa, RTM_ADD, ia->ia_flags | rtflags);
- if (error)
- return (error);
- ia->ia_flags |= IFA_ROUTE;
/*
* Handle the case for ::1 .
*/
if (ifp->if_flags & IFF_LOOPBACK)
ia->ia_flags |= IFA_RTSELF;
+ error = rtinit(&ia->ia_ifa, RTM_ADD, ia->ia_flags | rtflags);
+ if (error)
+ return (error);
+ ia->ia_flags |= IFA_ROUTE;
}
/*
- * add a loopback route to self
+ * add a loopback route to self if not exists
*/
if (!(ia->ia_flags & IFA_RTSELF) && V_nd6_useloopback) {
error = ifa_add_loopback_route((struct ifaddr *)ia,
@@ -1914,10 +1415,6 @@ in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia,
ia->ia_flags |= IFA_RTSELF;
}
- /* Add local address to lltable, if necessary (ex. on p2p link). */
- if (newhost)
- in6_ifaddloop(&(ia->ia_ifa));
-
return (error);
}
@@ -1949,11 +1446,35 @@ in6ifa_ifpforlinklocal(struct ifnet *ifp, int ignoreflags)
/*
+ * find the internet address corresponding to a given address.
+ * ifaddr is returned referenced.
+ */
+struct in6_ifaddr *
+in6ifa_ifwithaddr(const struct in6_addr *addr, uint32_t zoneid)
+{
+ struct rm_priotracker in6_ifa_tracker;
+ struct in6_ifaddr *ia;
+
+ IN6_IFADDR_RLOCK(&in6_ifa_tracker);
+ LIST_FOREACH(ia, IN6ADDR_HASH(addr), ia6_hash) {
+ if (IN6_ARE_ADDR_EQUAL(IA6_IN6(ia), addr)) {
+ if (zoneid != 0 &&
+ zoneid != ia->ia_addr.sin6_scope_id)
+ continue;
+ ifa_ref(&ia->ia_ifa);
+ break;
+ }
+ }
+ IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
+ return (ia);
+}
+
+/*
* find the internet address corresponding to a given interface and address.
* ifaddr is returned referenced.
*/
struct in6_ifaddr *
-in6ifa_ifpwithaddr(struct ifnet *ifp, struct in6_addr *addr)
+in6ifa_ifpwithaddr(struct ifnet *ifp, const struct in6_addr *addr)
{
struct ifaddr *ifa;
@@ -1982,7 +1503,7 @@ in6ifa_llaonifp(struct ifnet *ifp)
if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)
return (NULL);
- if_addr_rlock(ifp);
+ IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
@@ -1992,7 +1513,7 @@ in6ifa_llaonifp(struct ifnet *ifp)
IN6_IS_ADDR_MC_NODELOCAL(&sin6->sin6_addr))
break;
}
- if_addr_runlock(ifp);
+ IF_ADDR_RUNLOCK(ifp);
return ((struct in6_ifaddr *)ifa);
}
@@ -2082,20 +1603,21 @@ ip6_sprintf(char *ip6buf, const struct in6_addr *addr)
int
in6_localaddr(struct in6_addr *in6)
{
+ struct rm_priotracker in6_ifa_tracker;
struct in6_ifaddr *ia;
if (IN6_IS_ADDR_LOOPBACK(in6) || IN6_IS_ADDR_LINKLOCAL(in6))
return 1;
- IN6_IFADDR_RLOCK();
+ IN6_IFADDR_RLOCK(&in6_ifa_tracker);
TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
if (IN6_ARE_MASKED_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr,
&ia->ia_prefixmask.sin6_addr)) {
- IN6_IFADDR_RUNLOCK();
+ IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
return 1;
}
}
- IN6_IFADDR_RUNLOCK();
+ IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
return (0);
}
@@ -2107,37 +1629,67 @@ in6_localaddr(struct in6_addr *in6)
int
in6_localip(struct in6_addr *in6)
{
+ struct rm_priotracker in6_ifa_tracker;
struct in6_ifaddr *ia;
- IN6_IFADDR_RLOCK();
- TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
+ IN6_IFADDR_RLOCK(&in6_ifa_tracker);
+ LIST_FOREACH(ia, IN6ADDR_HASH(in6), ia6_hash) {
if (IN6_ARE_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr)) {
- IN6_IFADDR_RUNLOCK();
+ IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
return (1);
}
}
- IN6_IFADDR_RUNLOCK();
+ IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
return (0);
}
+
+/*
+ * Return 1 if an internet address is configured on an interface.
+ */
+int
+in6_ifhasaddr(struct ifnet *ifp, struct in6_addr *addr)
+{
+ struct in6_addr in6;
+ struct ifaddr *ifa;
+ struct in6_ifaddr *ia6;
+ in6 = *addr;
+ if (in6_clearscope(&in6))
+ return (0);
+ in6_setscope(&in6, ifp, NULL);
+
+ IF_ADDR_RLOCK(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr->sa_family != AF_INET6)
+ continue;
+ ia6 = (struct in6_ifaddr *)ifa;
+ if (IN6_ARE_ADDR_EQUAL(&ia6->ia_addr.sin6_addr, &in6)) {
+ IF_ADDR_RUNLOCK(ifp);
+ return (1);
+ }
+ }
+ IF_ADDR_RUNLOCK(ifp);
+
+ return (0);
+}
int
in6_is_addr_deprecated(struct sockaddr_in6 *sa6)
{
+ struct rm_priotracker in6_ifa_tracker;
struct in6_ifaddr *ia;
- IN6_IFADDR_RLOCK();
- TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
- if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
- &sa6->sin6_addr) &&
- (ia->ia6_flags & IN6_IFF_DEPRECATED) != 0) {
- IN6_IFADDR_RUNLOCK();
- return (1); /* true */
+ IN6_IFADDR_RLOCK(&in6_ifa_tracker);
+ LIST_FOREACH(ia, IN6ADDR_HASH(&sa6->sin6_addr), ia6_hash) {
+ if (IN6_ARE_ADDR_EQUAL(IA6_IN6(ia), &sa6->sin6_addr)) {
+ if (ia->ia6_flags & IN6_IFF_DEPRECATED) {
+ IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
+ return (1); /* true */
+ }
+ break;
}
-
- /* XXX: do we still have to go thru the rest of the list? */
}
- IN6_IFADDR_RUNLOCK();
+ IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
return (0); /* false */
}
@@ -2222,7 +1774,7 @@ in6_ifawithifp(struct ifnet *ifp, struct in6_addr *dst)
{
int dst_scope = in6_addrscope(dst), blen = -1, tlen;
struct ifaddr *ifa;
- struct in6_ifaddr *besta = 0;
+ struct in6_ifaddr *besta = NULL;
struct in6_ifaddr *dep[2]; /* last-resort: deprecated */
dep[0] = dep[1] = NULL;
@@ -2347,37 +1899,24 @@ in6if_do_dad(struct ifnet *ifp)
if ((ifp->if_flags & IFF_LOOPBACK) != 0)
return (0);
- if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)
+ if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) ||
+ (ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD))
return (0);
- switch (ifp->if_type) {
-#ifdef IFT_DUMMY
- case IFT_DUMMY:
-#endif
- case IFT_FAITH:
- /*
- * These interfaces do not have the IFF_LOOPBACK flag,
- * but loop packets back. We do not have to do DAD on such
- * interfaces. We should even omit it, because loop-backed
- * NS would confuse the DAD procedure.
- */
- return (0);
- default:
- /*
- * Our DAD routine requires the interface up and running.
- * However, some interfaces can be up before the RUNNING
- * status. Additionaly, users may try to assign addresses
- * before the interface becomes up (or running).
- * We simply skip DAD in such a case as a work around.
- * XXX: we should rather mark "tentative" on such addresses,
- * and do DAD after the interface becomes ready.
- */
- if (!((ifp->if_flags & IFF_UP) &&
- (ifp->if_drv_flags & IFF_DRV_RUNNING)))
- return (0);
+ /*
+ * Our DAD routine requires the interface up and running.
+ * However, some interfaces can be up before the RUNNING
+ * status. Additionally, users may try to assign addresses
+ * before the interface becomes up (or running).
+ * This function returns EAGAIN in that case.
+ * The caller should mark "tentative" on the address instead of
+ * performing DAD immediately.
+ */
+ if (!((ifp->if_flags & IFF_UP) &&
+ (ifp->if_drv_flags & IFF_DRV_RUNNING)))
+ return (EAGAIN);
- return (1);
- }
+ return (1);
}
/*
@@ -2391,7 +1930,7 @@ in6_setmaxmtu(void)
struct ifnet *ifp;
IFNET_RLOCK_NOSLEEP();
- TAILQ_FOREACH(ifp, &V_ifnet, if_list) {
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
/* this function can be called during ifnet initialization */
if (!ifp->if_afdata[AF_INET6])
continue;
@@ -2417,18 +1956,10 @@ in6_if2idlen(struct ifnet *ifp)
{
switch (ifp->if_type) {
case IFT_ETHER: /* RFC2464 */
-#ifdef IFT_PROPVIRTUAL
case IFT_PROPVIRTUAL: /* XXX: no RFC. treat it as ether */
-#endif
-#ifdef IFT_L2VLAN
case IFT_L2VLAN: /* ditto */
-#endif
-#ifdef IFT_IEEE80211
case IFT_IEEE80211: /* ditto */
-#endif
-#ifdef IFT_MIP
- case IFT_MIP: /* ditto */
-#endif
+ case IFT_BRIDGE: /* bridge(4) only does Ethernet-like links */
case IFT_INFINIBAND:
return (64);
case IFT_FDDI: /* RFC2467 */
@@ -2468,25 +1999,38 @@ in6_if2idlen(struct ifnet *ifp)
struct in6_llentry {
struct llentry base;
- struct sockaddr_in6 l3_addr6;
};
+#define IN6_LLTBL_DEFAULT_HSIZE 32
+#define IN6_LLTBL_HASH(k, h) \
+ (((((((k >> 8) ^ k) >> 8) ^ k) >> 8) ^ k) & ((h) - 1))
+
/*
- * Deletes an address from the address table.
- * This function is called by the timer functions
- * such as arptimer() and nd6_llinfo_timer(), and
- * the caller does the locking.
+ * Do actual deallocation of @lle.
*/
static void
-in6_lltable_free(struct lltable *llt, struct llentry *lle)
+in6_lltable_destroy_lle_unlocked(struct llentry *lle)
{
- LLE_WUNLOCK(lle);
+
LLE_LOCK_DESTROY(lle);
+ LLE_REQ_DESTROY(lle);
free(lle, M_LLTABLE);
}
+/*
+ * Called by LLE_FREE_LOCKED when number of references
+ * drops to zero.
+ */
+static void
+in6_lltable_destroy_lle(struct llentry *lle)
+{
+
+ LLE_WUNLOCK(lle);
+ in6_lltable_destroy_lle_unlocked(lle);
+}
+
static struct llentry *
-in6_lltable_new(const struct sockaddr *l3addr, u_int flags)
+in6_lltable_new(const struct in6_addr *addr6, u_int flags)
{
struct in6_llentry *lle;
@@ -2494,45 +2038,69 @@ in6_lltable_new(const struct sockaddr *l3addr, u_int flags)
if (lle == NULL) /* NB: caller generates msg */
return NULL;
- lle->l3_addr6 = *(const struct sockaddr_in6 *)l3addr;
+ lle->base.r_l3addr.addr6 = *addr6;
lle->base.lle_refcnt = 1;
- lle->base.lle_free = in6_lltable_free;
+ lle->base.lle_free = in6_lltable_destroy_lle;
LLE_LOCK_INIT(&lle->base);
- callout_init_rw(&lle->base.ln_timer_ch, &lle->base.lle_lock,
- CALLOUT_RETURNUNLOCKED);
+ LLE_REQ_INIT(&lle->base);
+ callout_init(&lle->base.lle_timer, 1);
return (&lle->base);
}
+static int
+in6_lltable_match_prefix(const struct sockaddr *saddr,
+ const struct sockaddr *smask, u_int flags, struct llentry *lle)
+{
+ const struct in6_addr *addr, *mask, *lle_addr;
+
+ addr = &((const struct sockaddr_in6 *)saddr)->sin6_addr;
+ mask = &((const struct sockaddr_in6 *)smask)->sin6_addr;
+ lle_addr = &lle->r_l3addr.addr6;
+
+ if (IN6_ARE_MASKED_ADDR_EQUAL(lle_addr, addr, mask) == 0)
+ return (0);
+
+ if (lle->la_flags & LLE_IFADDR) {
+
+ /*
+ * Delete LLE_IFADDR records IFF address & flag matches.
+ * Note that addr is the interface address within prefix
+ * being matched.
+ */
+ if (IN6_ARE_ADDR_EQUAL(addr, lle_addr) &&
+ (flags & LLE_STATIC) != 0)
+ return (1);
+ return (0);
+ }
+
+ /* flags & LLE_STATIC means deleting both dynamic and static entries */
+ if ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC))
+ return (1);
+
+ return (0);
+}
+
static void
-in6_lltable_prefix_free(struct lltable *llt, const struct sockaddr *prefix,
- const struct sockaddr *mask, u_int flags)
+in6_lltable_free_entry(struct lltable *llt, struct llentry *lle)
{
- const struct sockaddr_in6 *pfx = (const struct sockaddr_in6 *)prefix;
- const struct sockaddr_in6 *msk = (const struct sockaddr_in6 *)mask;
- struct llentry *lle, *next;
- int i;
+ struct ifnet *ifp;
- /*
- * (flags & LLE_STATIC) means deleting all entries
- * including static ND6 entries.
- */
- IF_AFDATA_WLOCK(llt->llt_ifp);
- for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
- LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
- if (IN6_ARE_MASKED_ADDR_EQUAL(
- &satosin6(L3_ADDR(lle))->sin6_addr,
- &pfx->sin6_addr, &msk->sin6_addr) &&
- ((flags & LLE_STATIC) ||
- !(lle->la_flags & LLE_STATIC))) {
- LLE_WLOCK(lle);
- if (callout_stop(&lle->la_timer))
- LLE_REMREF(lle);
- llentry_free(lle);
- }
- }
+ LLE_WLOCK_ASSERT(lle);
+ KASSERT(llt != NULL, ("lltable is NULL"));
+
+ /* Unlink entry from table */
+ if ((lle->la_flags & LLE_LINKED) != 0) {
+
+ ifp = llt->llt_ifp;
+ IF_AFDATA_WLOCK_ASSERT(ifp);
+ lltable_unlink_entry(llt, lle);
}
- IF_AFDATA_WUNLOCK(llt->llt_ifp);
+
+ if (callout_stop(&lle->lle_timer) > 0)
+ LLE_REMREF(lle);
+
+ llentry_free(lle);
}
static int
@@ -2540,122 +2108,178 @@ in6_lltable_rtcheck(struct ifnet *ifp,
u_int flags,
const struct sockaddr *l3addr)
{
- struct rtentry *rt;
+ const struct sockaddr_in6 *sin6;
+ struct nhop6_basic nh6;
+ struct in6_addr dst;
+ uint32_t scopeid;
+ int error;
char ip6buf[INET6_ADDRSTRLEN];
KASSERT(l3addr->sa_family == AF_INET6,
("sin_family %d", l3addr->sa_family));
/* Our local addresses are always only installed on the default FIB. */
- /* XXX rtalloc1 should take a const param */
- rt = in6_rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0,
- RT_DEFAULT_FIB);
- if (rt == NULL || (rt->rt_flags & RTF_GATEWAY) || rt->rt_ifp != ifp) {
+
+ sin6 = (const struct sockaddr_in6 *)l3addr;
+ in6_splitscope(&sin6->sin6_addr, &dst, &scopeid);
+ error = fib6_lookup_nh_basic(RT_DEFAULT_FIB, &dst, scopeid, 0, 0, &nh6);
+ if (error != 0 || (nh6.nh_flags & NHF_GATEWAY) || nh6.nh_ifp != ifp) {
struct ifaddr *ifa;
/*
* Create an ND6 cache for an IPv6 neighbor
* that is not covered by our own prefix.
*/
- /* XXX ifaof_ifpforaddr should take a const param */
- ifa = ifaof_ifpforaddr(__DECONST(struct sockaddr *, l3addr), ifp);
+ ifa = ifaof_ifpforaddr(l3addr, ifp);
if (ifa != NULL) {
ifa_free(ifa);
- if (rt != NULL)
- RTFREE_LOCKED(rt);
return 0;
}
log(LOG_INFO, "IPv6 address: \"%s\" is not on the network\n",
- ip6_sprintf(ip6buf, &((const struct sockaddr_in6 *)l3addr)->sin6_addr));
- if (rt != NULL)
- RTFREE_LOCKED(rt);
+ ip6_sprintf(ip6buf, &sin6->sin6_addr));
return EINVAL;
}
- RTFREE_LOCKED(rt);
return 0;
}
-static struct llentry *
-in6_lltable_lookup(struct lltable *llt, u_int flags,
- const struct sockaddr *l3addr)
+static inline uint32_t
+in6_lltable_hash_dst(const struct in6_addr *dst, uint32_t hsize)
+{
+
+ return (IN6_LLTBL_HASH(dst->s6_addr32[3], hsize));
+}
+
+static uint32_t
+in6_lltable_hash(const struct llentry *lle, uint32_t hsize)
+{
+
+ return (in6_lltable_hash_dst(&lle->r_l3addr.addr6, hsize));
+}
+
+static void
+in6_lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa)
+{
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)sa;
+ bzero(sin6, sizeof(*sin6));
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_len = sizeof(*sin6);
+ sin6->sin6_addr = lle->r_l3addr.addr6;
+}
+
+static inline struct llentry *
+in6_lltable_find_dst(struct lltable *llt, const struct in6_addr *dst)
{
- const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr;
- struct ifnet *ifp = llt->llt_ifp;
struct llentry *lle;
struct llentries *lleh;
- u_int hashkey;
-
- IF_AFDATA_LOCK_ASSERT(ifp);
- KASSERT(l3addr->sa_family == AF_INET6,
- ("sin_family %d", l3addr->sa_family));
+ u_int hashidx;
- hashkey = sin6->sin6_addr.s6_addr32[3];
- lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)];
+ hashidx = in6_lltable_hash_dst(dst, llt->llt_hsize);
+ lleh = &llt->lle_head[hashidx];
LIST_FOREACH(lle, lleh, lle_next) {
- struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)L3_ADDR(lle);
if (lle->la_flags & LLE_DELETED)
continue;
- if (bcmp(&sa6->sin6_addr, &sin6->sin6_addr,
- sizeof(struct in6_addr)) == 0)
+ if (IN6_ARE_ADDR_EQUAL(&lle->r_l3addr.addr6, dst))
break;
}
- if (lle == NULL) {
- if (!(flags & LLE_CREATE))
- return (NULL);
- IF_AFDATA_WLOCK_ASSERT(ifp);
- /*
- * A route that covers the given address must have
- * been installed 1st because we are doing a resolution,
- * verify this.
- */
- if (!(flags & LLE_IFADDR) &&
- in6_lltable_rtcheck(ifp, flags, l3addr) != 0)
- return NULL;
-
- lle = in6_lltable_new(l3addr, flags);
- if (lle == NULL) {
- log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
- return NULL;
- }
- lle->la_flags = flags & ~LLE_CREATE;
- if ((flags & (LLE_CREATE | LLE_IFADDR)) == (LLE_CREATE | LLE_IFADDR)) {
- bcopy(IF_LLADDR(ifp), &lle->ll_addr, ifp->if_addrlen);
- lle->la_flags |= (LLE_VALID | LLE_STATIC);
- }
+ return (lle);
+}
- lle->lle_tbl = llt;
- lle->lle_head = lleh;
- lle->la_flags |= LLE_LINKED;
- LIST_INSERT_HEAD(lleh, lle, lle_next);
- } else if (flags & LLE_DELETE) {
- if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) {
- LLE_WLOCK(lle);
- lle->la_flags |= LLE_DELETED;
+static void
+in6_lltable_delete_entry(struct lltable *llt, struct llentry *lle)
+{
+
+ lle->la_flags |= LLE_DELETED;
+ EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED);
#ifdef DIAGNOSTIC
- log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
+ log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
#endif
- if ((lle->la_flags &
- (LLE_STATIC | LLE_IFADDR)) == LLE_STATIC)
- llentry_free(lle);
- else
- LLE_WUNLOCK(lle);
- }
- lle = (void *)-1;
+ llentry_free(lle);
+}
+
+static struct llentry *
+in6_lltable_alloc(struct lltable *llt, u_int flags,
+ const struct sockaddr *l3addr)
+{
+ const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr;
+ struct ifnet *ifp = llt->llt_ifp;
+ struct llentry *lle;
+ char linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ int lladdr_off;
+
+ KASSERT(l3addr->sa_family == AF_INET6,
+ ("sin_family %d", l3addr->sa_family));
+
+ /*
+ * A route that covers the given address must have
+ * been installed 1st because we are doing a resolution,
+ * verify this.
+ */
+ if (!(flags & LLE_IFADDR) &&
+ in6_lltable_rtcheck(ifp, flags, l3addr) != 0)
+ return (NULL);
+
+ lle = in6_lltable_new(&sin6->sin6_addr, flags);
+ if (lle == NULL) {
+ log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
+ return (NULL);
}
- if (LLE_IS_VALID(lle)) {
- if (flags & LLE_EXCLUSIVE)
- LLE_WLOCK(lle);
- else
- LLE_RLOCK(lle);
+ lle->la_flags = flags;
+ if ((flags & LLE_IFADDR) == LLE_IFADDR) {
+ linkhdrsize = LLE_MAX_LINKHDR;
+ if (lltable_calc_llheader(ifp, AF_INET6, IF_LLADDR(ifp),
+ linkhdr, &linkhdrsize, &lladdr_off) != 0) {
+ in6_lltable_destroy_lle_unlocked(lle);
+ return (NULL);
+ }
+ lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize,
+ lladdr_off);
+ lle->la_flags |= LLE_STATIC;
}
+
+ if ((lle->la_flags & LLE_STATIC) != 0)
+ lle->ln_state = ND6_LLINFO_REACHABLE;
+
+ return (lle);
+}
+
+static struct llentry *
+in6_lltable_lookup(struct lltable *llt, u_int flags,
+ const struct sockaddr *l3addr)
+{
+ const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr;
+ struct llentry *lle;
+
+ IF_AFDATA_LOCK_ASSERT(llt->llt_ifp);
+ KASSERT(l3addr->sa_family == AF_INET6,
+ ("sin_family %d", l3addr->sa_family));
+
+ lle = in6_lltable_find_dst(llt, &sin6->sin6_addr);
+
+ if (lle == NULL)
+ return (NULL);
+
+ KASSERT((flags & (LLE_UNLOCKED|LLE_EXCLUSIVE)) !=
+ (LLE_UNLOCKED|LLE_EXCLUSIVE),("wrong lle request flags: 0x%X",
+ flags));
+
+ if (flags & LLE_UNLOCKED)
+ return (lle);
+
+ if (flags & LLE_EXCLUSIVE)
+ LLE_WLOCK(lle);
+ else
+ LLE_RLOCK(lle);
return (lle);
}
static int
-in6_lltable_dump(struct lltable *llt, struct sysctl_req *wr)
+in6_lltable_dump_entry(struct lltable *llt, struct llentry *lle,
+ struct sysctl_req *wr)
{
struct ifnet *ifp = llt->llt_ifp;
- struct llentry *lle;
/* XXX stack use */
struct {
struct rt_msghdr rtm;
@@ -2668,39 +2292,32 @@ in6_lltable_dump(struct lltable *llt, struct sysctl_req *wr)
#endif
struct sockaddr_dl sdl;
} ndpc;
- int i, error;
-
- if (ifp->if_flags & IFF_LOOPBACK)
- return 0;
-
- LLTABLE_LOCK_ASSERT();
-
- error = 0;
- for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
- LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
- struct sockaddr_dl *sdl;
+ struct sockaddr_dl *sdl;
+ int error;
- /* skip deleted or invalid entries */
- if ((lle->la_flags & (LLE_DELETED|LLE_VALID)) != LLE_VALID)
- continue;
+ bzero(&ndpc, sizeof(ndpc));
+ /* skip deleted entries */
+ if ((lle->la_flags & LLE_DELETED) == LLE_DELETED)
+ return (0);
/* Skip if jailed and not a valid IP of the prison. */
- if (prison_if(wr->td->td_ucred, L3_ADDR(lle)) != 0)
- continue;
+ lltable_fill_sa_entry(lle,
+ (struct sockaddr *)&ndpc.sin6);
+ if (prison_if(wr->td->td_ucred,
+ (struct sockaddr *)&ndpc.sin6) != 0)
+ return (0);
/*
* produce a msg made of:
* struct rt_msghdr;
* struct sockaddr_in6 (IPv6)
* struct sockaddr_dl;
*/
- bzero(&ndpc, sizeof(ndpc));
ndpc.rtm.rtm_msglen = sizeof(ndpc);
ndpc.rtm.rtm_version = RTM_VERSION;
ndpc.rtm.rtm_type = RTM_GET;
ndpc.rtm.rtm_flags = RTF_UP;
ndpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY;
- ndpc.sin6.sin6_family = AF_INET6;
- ndpc.sin6.sin6_len = sizeof(ndpc.sin6);
- bcopy(L3_ADDR(lle), &ndpc.sin6, L3_ADDR_LEN(lle));
+ if (V_deembed_scopeid)
+ sa6_recoverscope(&ndpc.sin6);
/* publish */
if (lle->la_flags & LLE_PUB)
@@ -2709,22 +2326,56 @@ in6_lltable_dump(struct lltable *llt, struct sysctl_req *wr)
sdl = &ndpc.sdl;
sdl->sdl_family = AF_LINK;
sdl->sdl_len = sizeof(*sdl);
- sdl->sdl_alen = ifp->if_addrlen;
sdl->sdl_index = ifp->if_index;
sdl->sdl_type = ifp->if_type;
- bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
- ndpc.rtm.rtm_rmx.rmx_expire =
- lle->la_flags & LLE_STATIC ? 0 : lle->la_expire;
+ if ((lle->la_flags & LLE_VALID) == LLE_VALID) {
+ sdl->sdl_alen = ifp->if_addrlen;
+ bcopy(lle->ll_addr, LLADDR(sdl),
+ ifp->if_addrlen);
+ } else {
+ sdl->sdl_alen = 0;
+ bzero(LLADDR(sdl), ifp->if_addrlen);
+ }
+ if (lle->la_expire != 0)
+ ndpc.rtm.rtm_rmx.rmx_expire = lle->la_expire +
+ lle->lle_remtime / hz +
+ time_second - time_uptime;
ndpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA);
if (lle->la_flags & LLE_STATIC)
ndpc.rtm.rtm_flags |= RTF_STATIC;
+ if (lle->la_flags & LLE_IFADDR)
+ ndpc.rtm.rtm_flags |= RTF_PINNED;
+ if (lle->ln_router != 0)
+ ndpc.rtm.rtm_flags |= RTF_GATEWAY;
+ ndpc.rtm.rtm_rmx.rmx_pksent = lle->la_asked;
+ /* Store state in rmx_weight value */
+ ndpc.rtm.rtm_rmx.rmx_state = lle->ln_state;
ndpc.rtm.rtm_index = ifp->if_index;
error = SYSCTL_OUT(wr, &ndpc, sizeof(ndpc));
- if (error)
- break;
- }
- }
- return error;
+
+ return (error);
+}
+
+static struct lltable *
+in6_lltattach(struct ifnet *ifp)
+{
+ struct lltable *llt;
+
+ llt = lltable_allocate_htbl(IN6_LLTBL_DEFAULT_HSIZE);
+ llt->llt_af = AF_INET6;
+ llt->llt_ifp = ifp;
+
+ llt->llt_lookup = in6_lltable_lookup;
+ llt->llt_alloc_entry = in6_lltable_alloc;
+ llt->llt_delete_entry = in6_lltable_delete_entry;
+ llt->llt_dump_entry = in6_lltable_dump_entry;
+ llt->llt_hash = in6_lltable_hash;
+ llt->llt_fill_sa_entry = in6_lltable_fill_sa_entry;
+ llt->llt_free_entry = in6_lltable_free_entry;
+ llt->llt_match_prefix = in6_lltable_match_prefix;
+ lltable_link(llt);
+
+ return (llt);
}
void *
@@ -2732,32 +2383,45 @@ in6_domifattach(struct ifnet *ifp)
{
struct in6_ifextra *ext;
+ /* There are not IPv6-capable interfaces. */
+ switch (ifp->if_type) {
+ case IFT_PFLOG:
+ case IFT_PFSYNC:
+ case IFT_USB:
+ return (NULL);
+ }
ext = (struct in6_ifextra *)malloc(sizeof(*ext), M_IFADDR, M_WAITOK);
bzero(ext, sizeof(*ext));
- ext->in6_ifstat = (struct in6_ifstat *)malloc(sizeof(struct in6_ifstat),
- M_IFADDR, M_WAITOK);
- bzero(ext->in6_ifstat, sizeof(*ext->in6_ifstat));
+ ext->in6_ifstat = malloc(sizeof(counter_u64_t) *
+ sizeof(struct in6_ifstat) / sizeof(uint64_t), M_IFADDR, M_WAITOK);
+ COUNTER_ARRAY_ALLOC(ext->in6_ifstat,
+ sizeof(struct in6_ifstat) / sizeof(uint64_t), M_WAITOK);
- ext->icmp6_ifstat =
- (struct icmp6_ifstat *)malloc(sizeof(struct icmp6_ifstat),
- M_IFADDR, M_WAITOK);
- bzero(ext->icmp6_ifstat, sizeof(*ext->icmp6_ifstat));
+ ext->icmp6_ifstat = malloc(sizeof(counter_u64_t) *
+ sizeof(struct icmp6_ifstat) / sizeof(uint64_t), M_IFADDR,
+ M_WAITOK);
+ COUNTER_ARRAY_ALLOC(ext->icmp6_ifstat,
+ sizeof(struct icmp6_ifstat) / sizeof(uint64_t), M_WAITOK);
ext->nd_ifinfo = nd6_ifattach(ifp);
ext->scope6_id = scope6_ifattach(ifp);
- ext->lltable = lltable_init(ifp, AF_INET6);
- if (ext->lltable != NULL) {
- ext->lltable->llt_prefix_free = in6_lltable_prefix_free;
- ext->lltable->llt_lookup = in6_lltable_lookup;
- ext->lltable->llt_dump = in6_lltable_dump;
- }
+ ext->lltable = in6_lltattach(ifp);
ext->mld_ifinfo = mld_domifattach(ifp);
return ext;
}
+int
+in6_domifmtu(struct ifnet *ifp)
+{
+ if (ifp->if_afdata[AF_INET6] == NULL)
+ return ifp->if_mtu;
+
+ return (IN6_LINKMTU(ifp));
+}
+
void
in6_domifdetach(struct ifnet *ifp, void *aux)
{
@@ -2765,9 +2429,13 @@ in6_domifdetach(struct ifnet *ifp, void *aux)
mld_domifdetach(ifp);
scope6_ifdetach(ext->scope6_id);
- nd6_ifdetach(ext->nd_ifinfo);
+ nd6_ifdetach(ifp, ext->nd_ifinfo);
lltable_free(ext->lltable);
+ COUNTER_ARRAY_FREE(ext->in6_ifstat,
+ sizeof(struct in6_ifstat) / sizeof(uint64_t));
free(ext->in6_ifstat, M_IFADDR);
+ COUNTER_ARRAY_FREE(ext->icmp6_ifstat,
+ sizeof(struct icmp6_ifstat) / sizeof(uint64_t));
free(ext->icmp6_ifstat, M_IFADDR);
free(ext, M_IFADDR);
}
diff --git a/freebsd/sys/netinet6/in6.h b/freebsd/sys/netinet6/in6.h
index 616f1009..62c5e0b0 100644
--- a/freebsd/sys/netinet6/in6.h
+++ b/freebsd/sys/netinet6/in6.h
@@ -361,11 +361,11 @@ extern const struct in6_addr in6addr_linklocal_allv2routers;
#define IFA6_IS_DEPRECATED(a) \
((a)->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME && \
- (u_int32_t)((time_second - (a)->ia6_updatetime)) > \
+ (u_int32_t)((time_uptime - (a)->ia6_updatetime)) > \
(a)->ia6_lifetime.ia6t_pltime)
#define IFA6_IS_INVALID(a) \
((a)->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME && \
- (u_int32_t)((time_second - (a)->ia6_updatetime)) > \
+ (u_int32_t)((time_uptime - (a)->ia6_updatetime)) > \
(a)->ia6_lifetime.ia6t_vltime)
#endif /* _KERNEL */
@@ -376,12 +376,24 @@ extern const struct in6_addr in6addr_linklocal_allv2routers;
struct route_in6 {
struct rtentry *ro_rt;
struct llentry *ro_lle;
- struct in6_addr *ro_ia6;
- int ro_flags;
+ /*
+ * ro_prepend and ro_plen are only used for bpf to pass in a
+ * preformed header. They are not cacheable.
+ */
+ char *ro_prepend;
+ uint16_t ro_plen;
+ uint16_t ro_flags;
+ uint16_t ro_mtu; /* saved ro_rt mtu */
+ uint16_t spare;
struct sockaddr_in6 ro_dst;
};
#endif
+#ifdef _KERNEL
+#define MTAG_ABI_IPV6 1444287380 /* IPv6 ABI */
+#define IPV6_TAG_DIRECT 0 /* direct-dispatch IPv6 */
+#endif /* _KERNEL */
+
/*
* Options for use with [gs]etsockopt at the IPV6 level.
* First word of comment is data type; bool is stored in int.
@@ -424,8 +436,7 @@ struct route_in6 {
#define IPV6_IPSEC_POLICY 28 /* struct; get/set security policy */
#endif /* IPSEC */
-#define IPV6_FAITH 29 /* bool; accept FAITH'ed connections */
-
+ /* 29; unused; was IPV6_FAITH */
#if 1 /* IPV6FIREWALL */
#define IPV6_FW_ADD 30 /* add a firewall rule to chain */
#define IPV6_FW_DEL 31 /* delete a firewall rule from chain */
@@ -481,6 +492,14 @@ struct route_in6 {
#define IPV6_BINDANY 64 /* bool: allow bind to any address */
+#define IPV6_BINDMULTI 65 /* bool; allow multibind to same addr/port */
+#define IPV6_RSS_LISTEN_BUCKET 66 /* int; set RSS listen bucket */
+#define IPV6_FLOWID 67 /* int; flowid of given socket */
+#define IPV6_FLOWTYPE 68 /* int; flowtype of given socket */
+#define IPV6_RSSBUCKETID 69 /* int; RSS bucket ID of given socket */
+#define IPV6_RECVFLOWID 70 /* bool; receive IP6 flowid/flowtype w/ datagram */
+#define IPV6_RECVRSSBUCKETID 71 /* bool; receive IP6 RSS bucket id w/ datagram */
+
/*
* The following option is private; do not use it from user applications.
* It is deliberately defined to the same value as IP_MSFILTER.
@@ -574,7 +593,7 @@ struct ip6_mtuinfo {
#define IPV6CTL_SOURCECHECK 10 /* verify source route and intf */
#define IPV6CTL_SOURCECHECK_LOGINT 11 /* minimume logging interval */
#define IPV6CTL_ACCEPT_RTADV 12
-#define IPV6CTL_KEEPFAITH 13
+ /* 13; unused; was: IPV6CTL_KEEPFAITH */
#define IPV6CTL_LOG_INTERVAL 14
#define IPV6CTL_HDRNESTLIMIT 15
#define IPV6CTL_DAD_COUNT 16
@@ -588,9 +607,9 @@ struct ip6_mtuinfo {
#define IPV6CTL_MAPPED_ADDR 23
#endif
#define IPV6CTL_V6ONLY 24
-#define IPV6CTL_RTEXPIRE 25 /* cloned route expiration time */
-#define IPV6CTL_RTMINEXPIRE 26 /* min value for expiration time */
-#define IPV6CTL_RTMAXCACHE 27 /* trigger level for dynamic expire */
+/* IPV6CTL_RTEXPIRE 25 deprecated */
+/* IPV6CTL_RTMINEXPIRE 26 deprecated */
+/* IPV6CTL_RTMAXCACHE 27 deprecated */
#define IPV6CTL_USETEMPADDR 32 /* use temporary addresses (RFC3041) */
#define IPV6CTL_TEMPPLTIME 33 /* preferred lifetime for tmpaddrs */
@@ -618,17 +637,25 @@ struct ip6_mtuinfo {
* receiving IF. */
#define IPV6CTL_RFC6204W3 50 /* Accept defroute even when forwarding
enabled */
-#define IPV6CTL_MAXID 51
+#define IPV6CTL_INTRQMAXLEN 51 /* max length of IPv6 netisr queue */
+#define IPV6CTL_INTRDQMAXLEN 52 /* max length of direct IPv6 netisr
+ * queue */
+#define IPV6CTL_MAXID 53
#endif /* __BSD_VISIBLE */
/*
- * Redefinition of mbuf flags
- */
-#define M_AUTHIPHDR M_PROTO2
-#define M_DECRYPTED M_PROTO3
-#define M_LOOP M_PROTO4
-#define M_AUTHIPDGM M_PROTO5
-#define M_RTALERT_MLD M_PROTO6
+ * Since both netinet/ and netinet6/ call into netipsec/ and netpfil/,
+ * the protocol specific mbuf flags are shared between them.
+ */
+#define M_FASTFWD_OURS M_PROTO1 /* changed dst to local */
+#define M_IP6_NEXTHOP M_PROTO2 /* explicit ip nexthop */
+#define M_IP_NEXTHOP M_PROTO2 /* explicit ip nexthop */
+#define M_SKIP_FIREWALL M_PROTO3 /* skip firewall processing */
+#define M_AUTHIPHDR M_PROTO4
+#define M_DECRYPTED M_PROTO5
+#define M_LOOP M_PROTO6
+#define M_AUTHIPDGM M_PROTO7
+#define M_RTALERT_MLD M_PROTO8
#ifdef _KERNEL
struct cmsghdr;
@@ -636,9 +663,13 @@ struct ip6_hdr;
int in6_cksum_pseudo(struct ip6_hdr *, uint32_t, uint8_t, uint16_t);
int in6_cksum(struct mbuf *, u_int8_t, u_int32_t, u_int32_t);
+int in6_cksum_partial(struct mbuf *, u_int8_t, u_int32_t, u_int32_t,
+ u_int32_t);
int in6_localaddr(struct in6_addr *);
int in6_localip(struct in6_addr *);
-int in6_addrscope(struct in6_addr *);
+int in6_ifhasaddr(struct ifnet *, struct in6_addr *);
+int in6_addrscope(const struct in6_addr *);
+char *ip6_sprintf(char *, const struct in6_addr *);
struct in6_ifaddr *in6_ifawithifp(struct ifnet *, struct in6_addr *);
extern void in6_if_up(struct ifnet *);
struct sockaddr;
@@ -656,7 +687,6 @@ extern void addrsel_policy_init(void);
#define sin6tosa(sin6) ((struct sockaddr *)(sin6))
#define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa))
-extern int (*faithprefix_p)(struct in6_addr *);
#endif /* _KERNEL */
#ifndef _SIZE_T_DECLARED
diff --git a/freebsd/sys/netinet6/in6_cksum.c b/freebsd/sys/netinet6/in6_cksum.c
index e129ca71..6eebdadc 100644
--- a/freebsd/sys/netinet6/in6_cksum.c
+++ b/freebsd/sys/netinet6/in6_cksum.c
@@ -147,9 +147,11 @@ in6_cksum_pseudo(struct ip6_hdr *ip6, uint32_t len, uint8_t nxt, uint16_t csum)
* off is an offset where TCP/UDP/ICMP6 header starts.
* len is a total length of a transport segment.
* (e.g. TCP header + TCP payload)
+ * cov is the number of bytes to be taken into account for the checksum
*/
int
-in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len)
+in6_cksum_partial(struct mbuf *m, u_int8_t nxt, u_int32_t off,
+ u_int32_t len, u_int32_t cov)
{
struct ip6_hdr *ip6;
u_int16_t *w, scope;
@@ -217,9 +219,9 @@ in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len)
}
w = (u_int16_t *)(mtod(m, u_char *) + off);
mlen = m->m_len - off;
- if (len < mlen)
- mlen = len;
- len -= mlen;
+ if (cov < mlen)
+ mlen = cov;
+ cov -= mlen;
/*
* Force to even boundary.
*/
@@ -275,7 +277,7 @@ in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len)
* Lastly calculate a summary of the rest of mbufs.
*/
- for (;m && len; m = m->m_next) {
+ for (;m && cov; m = m->m_next) {
if (m->m_len == 0)
continue;
w = mtod(m, u_int16_t *);
@@ -292,12 +294,12 @@ in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len)
sum += s_util.s;
w = (u_int16_t *)((char *)w + 1);
mlen = m->m_len - 1;
- len--;
+ cov--;
} else
mlen = m->m_len;
- if (len < mlen)
- mlen = len;
- len -= mlen;
+ if (cov < mlen)
+ mlen = cov;
+ cov -= mlen;
/*
* Force to even boundary.
*/
@@ -345,7 +347,7 @@ in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len)
} else if (mlen == -1)
s_util.c[0] = *(char *)w;
}
- if (len)
+ if (cov)
panic("in6_cksum: out of data");
if (mlen == -1) {
/* The last mbuf has odd # of bytes. Follow the
@@ -357,3 +359,9 @@ in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len)
REDUCE;
return (~sum & 0xffff);
}
+
+int
+in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len)
+{
+ return (in6_cksum_partial(m, nxt, off, len, len));
+}
diff --git a/freebsd/sys/netinet6/in6_fib.c b/freebsd/sys/netinet6/in6_fib.c
new file mode 100644
index 00000000..824db1fc
--- /dev/null
+++ b/freebsd/sys/netinet6/in6_fib.c
@@ -0,0 +1,278 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/*-
+ * Copyright (c) 2015
+ * Alexander V. Chernikov <melifaro@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+#include <rtems/bsd/local/opt_route.h>
+#include <rtems/bsd/local/opt_mpath.h>
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/systm.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_dl.h>
+#include <net/route.h>
+#include <net/route_var.h>
+#include <net/vnet.h>
+
+#ifdef RADIX_MPATH
+#include <net/radix_mpath.h>
+#endif
+
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_mroute.h>
+#include <netinet/ip6.h>
+#include <netinet6/in6_fib.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/nd6.h>
+#include <netinet6/scope6_var.h>
+
+#include <net/if_types.h>
+
+#ifdef INET6
+static void fib6_rte_to_nh_extended(struct rtentry *rte,
+ const struct in6_addr *dst, uint32_t flags, struct nhop6_extended *pnh6);
+static void fib6_rte_to_nh_basic(struct rtentry *rte, const struct in6_addr *dst,
+ uint32_t flags, struct nhop6_basic *pnh6);
+static struct ifnet *fib6_get_ifaifp(struct rtentry *rte);
+#define RNTORT(p) ((struct rtentry *)(p))
+
+/*
+ * Gets real interface for the @rte.
+ * Returns rt_ifp for !IFF_LOOPBACK routers.
+ * Extracts "real" address interface from interface address
+ * loopback routes.
+ */
+static struct ifnet *
+fib6_get_ifaifp(struct rtentry *rte)
+{
+ struct ifnet *ifp;
+ struct sockaddr_dl *sdl;
+
+ ifp = rte->rt_ifp;
+ if ((ifp->if_flags & IFF_LOOPBACK) &&
+ rte->rt_gateway->sa_family == AF_LINK) {
+ sdl = (struct sockaddr_dl *)rte->rt_gateway;
+ return (ifnet_byindex(sdl->sdl_index));
+ }
+
+ return (ifp);
+}
+
+static void
+fib6_rte_to_nh_basic(struct rtentry *rte, const struct in6_addr *dst,
+ uint32_t flags, struct nhop6_basic *pnh6)
+{
+ struct sockaddr_in6 *gw;
+
+ /* Do explicit nexthop zero unless we're copying it */
+ memset(pnh6, 0, sizeof(*pnh6));
+
+ if ((flags & NHR_IFAIF) != 0)
+ pnh6->nh_ifp = fib6_get_ifaifp(rte);
+ else
+ pnh6->nh_ifp = rte->rt_ifp;
+
+ pnh6->nh_mtu = min(rte->rt_mtu, IN6_LINKMTU(rte->rt_ifp));
+ if (rte->rt_flags & RTF_GATEWAY) {
+ gw = (struct sockaddr_in6 *)rte->rt_gateway;
+ pnh6->nh_addr = gw->sin6_addr;
+ in6_clearscope(&pnh6->nh_addr);
+ } else
+ pnh6->nh_addr = *dst;
+ /* Set flags */
+ pnh6->nh_flags = fib_rte_to_nh_flags(rte->rt_flags);
+ gw = (struct sockaddr_in6 *)rt_key(rte);
+ if (IN6_IS_ADDR_UNSPECIFIED(&gw->sin6_addr))
+ pnh6->nh_flags |= NHF_DEFAULT;
+}
+
+static void
+fib6_rte_to_nh_extended(struct rtentry *rte, const struct in6_addr *dst,
+ uint32_t flags, struct nhop6_extended *pnh6)
+{
+ struct sockaddr_in6 *gw;
+
+ /* Do explicit nexthop zero unless we're copying it */
+ memset(pnh6, 0, sizeof(*pnh6));
+
+ if ((flags & NHR_IFAIF) != 0)
+ pnh6->nh_ifp = fib6_get_ifaifp(rte);
+ else
+ pnh6->nh_ifp = rte->rt_ifp;
+
+ pnh6->nh_mtu = min(rte->rt_mtu, IN6_LINKMTU(rte->rt_ifp));
+ if (rte->rt_flags & RTF_GATEWAY) {
+ gw = (struct sockaddr_in6 *)rte->rt_gateway;
+ pnh6->nh_addr = gw->sin6_addr;
+ in6_clearscope(&pnh6->nh_addr);
+ } else
+ pnh6->nh_addr = *dst;
+ /* Set flags */
+ pnh6->nh_flags = fib_rte_to_nh_flags(rte->rt_flags);
+ gw = (struct sockaddr_in6 *)rt_key(rte);
+ if (IN6_IS_ADDR_UNSPECIFIED(&gw->sin6_addr))
+ pnh6->nh_flags |= NHF_DEFAULT;
+}
+
+/*
+ * Performs IPv6 route table lookup on @dst. Returns 0 on success.
+ * Stores basic nexthop info into provided @pnh6 structure.
+ * Note that
+ * - nh_ifp represents logical transmit interface (rt_ifp) by default
+ * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed
+ * - mtu from logical transmit interface will be returned.
+ * - nh_ifp cannot be safely dereferenced
+ * - nh_ifp represents rt_ifp (e.g. if looking up address on
+ * interface "ix0" pointer to "ix0" interface will be returned instead
+ * of "lo0")
+ * - howewer mtu from "transmit" interface will be returned.
+ * - scope will be embedded in nh_addr
+ */
+int
+fib6_lookup_nh_basic(uint32_t fibnum, const struct in6_addr *dst, uint32_t scopeid,
+ uint32_t flags, uint32_t flowid, struct nhop6_basic *pnh6)
+{
+ struct rib_head *rh;
+ struct radix_node *rn;
+ struct sockaddr_in6 sin6;
+ struct rtentry *rte;
+
+ KASSERT((fibnum < rt_numfibs), ("fib6_lookup_nh_basic: bad fibnum"));
+ rh = rt_tables_get_rnh(fibnum, AF_INET6);
+ if (rh == NULL)
+ return (ENOENT);
+
+ /* Prepare lookup key */
+ memset(&sin6, 0, sizeof(sin6));
+ sin6.sin6_addr = *dst;
+ sin6.sin6_len = sizeof(struct sockaddr_in6);
+ /* Assume scopeid is valid and embed it directly */
+ if (IN6_IS_SCOPE_LINKLOCAL(dst))
+ sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff);
+
+ RIB_RLOCK(rh);
+ rn = rh->rnh_matchaddr((void *)&sin6, &rh->head);
+ if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
+ rte = RNTORT(rn);
+ /* Ensure route & ifp is UP */
+ if (RT_LINK_IS_UP(rte->rt_ifp)) {
+ fib6_rte_to_nh_basic(rte, &sin6.sin6_addr, flags, pnh6);
+ RIB_RUNLOCK(rh);
+ return (0);
+ }
+ }
+ RIB_RUNLOCK(rh);
+
+ return (ENOENT);
+}
+
+/*
+ * Performs IPv6 route table lookup on @dst. Returns 0 on success.
+ * Stores extended nexthop info into provided @pnh6 structure.
+ * Note that
+ * - nh_ifp cannot be safely dereferenced unless NHR_REF is specified.
+ * - in that case you need to call fib6_free_nh_ext()
+ * - nh_ifp represents logical transmit interface (rt_ifp) by default
+ * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed
+ * - mtu from logical transmit interface will be returned.
+ * - scope will be embedded in nh_addr
+ */
+int
+fib6_lookup_nh_ext(uint32_t fibnum, const struct in6_addr *dst,uint32_t scopeid,
+ uint32_t flags, uint32_t flowid, struct nhop6_extended *pnh6)
+{
+ struct rib_head *rh;
+ struct radix_node *rn;
+ struct sockaddr_in6 sin6;
+ struct rtentry *rte;
+
+ KASSERT((fibnum < rt_numfibs), ("fib6_lookup_nh_ext: bad fibnum"));
+ rh = rt_tables_get_rnh(fibnum, AF_INET6);
+ if (rh == NULL)
+ return (ENOENT);
+
+ /* Prepare lookup key */
+ memset(&sin6, 0, sizeof(sin6));
+ sin6.sin6_len = sizeof(struct sockaddr_in6);
+ sin6.sin6_addr = *dst;
+ /* Assume scopeid is valid and embed it directly */
+ if (IN6_IS_SCOPE_LINKLOCAL(dst))
+ sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff);
+
+ RIB_RLOCK(rh);
+ rn = rh->rnh_matchaddr((void *)&sin6, &rh->head);
+ if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
+ rte = RNTORT(rn);
+#ifdef RADIX_MPATH
+ rte = rt_mpath_select(rte, flowid);
+ if (rte == NULL) {
+ RIB_RUNLOCK(rh);
+ return (ENOENT);
+ }
+#endif
+ /* Ensure route & ifp is UP */
+ if (RT_LINK_IS_UP(rte->rt_ifp)) {
+ fib6_rte_to_nh_extended(rte, &sin6.sin6_addr, flags,
+ pnh6);
+ if ((flags & NHR_REF) != 0) {
+ /* TODO: Do lwref on egress ifp's */
+ }
+ RIB_RUNLOCK(rh);
+
+ return (0);
+ }
+ }
+ RIB_RUNLOCK(rh);
+
+ return (ENOENT);
+}
+
+void
+fib6_free_nh_ext(uint32_t fibnum, struct nhop6_extended *pnh6)
+{
+
+}
+
+#endif
+
diff --git a/freebsd/sys/netinet6/in6_fib.h b/freebsd/sys/netinet6/in6_fib.h
new file mode 100644
index 00000000..3d58cd22
--- /dev/null
+++ b/freebsd/sys/netinet6/in6_fib.h
@@ -0,0 +1,61 @@
+/*-
+ * Copyright (c) 2015
+ * Alexander V. Chernikov <melifaro@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET6_IN6_FIB_H_
+#define _NETINET6_IN6_FIB_H_
+
+/* Basic nexthop info used for uRPF/mtu checks */
+struct nhop6_basic {
+ struct ifnet *nh_ifp; /* Logical egress interface */
+ uint16_t nh_mtu; /* nexthop mtu */
+ uint16_t nh_flags; /* nhop flags */
+ uint8_t spare[4];
+ struct in6_addr nh_addr; /* GW/DST IPv4 address */
+};
+
+/* Does not differ from nhop6_basic */
+struct nhop6_extended {
+ struct ifnet *nh_ifp; /* Logical egress interface */
+ uint16_t nh_mtu; /* nexthop mtu */
+ uint16_t nh_flags; /* nhop flags */
+ uint8_t spare[4];
+ struct in6_addr nh_addr; /* GW/DST IPv6 address */
+ uint64_t spare2[2];
+};
+
+int fib6_lookup_nh_basic(uint32_t fibnum, const struct in6_addr *dst,
+ uint32_t scopeid, uint32_t flags, uint32_t flowid,struct nhop6_basic *pnh6);
+int fib6_lookup_nh_ext(uint32_t fibnum, const struct in6_addr *dst,
+ uint32_t scopeid, uint32_t flags, uint32_t flowid,
+ struct nhop6_extended *pnh6);
+void fib6_free_nh_ext(uint32_t fibnum, struct nhop6_extended *pnh6);
+#endif
+
diff --git a/freebsd/sys/netinet6/in6_gif.c b/freebsd/sys/netinet6/in6_gif.c
index 9e0f37f0..6e1fb8b1 100644
--- a/freebsd/sys/netinet6/in6_gif.c
+++ b/freebsd/sys/netinet6/in6_gif.c
@@ -38,6 +38,8 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/sys/param.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rmlock.h>
#include <sys/systm.h>
#include <sys/socket.h>
#include <sys/sockio.h>
@@ -51,7 +53,9 @@ __FBSDID("$FreeBSD$");
#include <sys/malloc.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/route.h>
+#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
@@ -62,29 +66,28 @@ __FBSDID("$FreeBSD$");
#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
-#include <netinet6/in6_gif.h>
#include <netinet6/in6_var.h>
#endif
-#include <netinet6/ip6protosw.h>
#include <netinet/ip_ecn.h>
#ifdef INET6
#include <netinet6/ip6_ecn.h>
+#include <netinet6/in6_fib.h>
#endif
#include <net/if_gif.h>
-VNET_DEFINE(int, ip6_gif_hlim) = GIF_HLIM;
+#define GIF_HLIM 30
+static VNET_DEFINE(int, ip6_gif_hlim) = GIF_HLIM;
#define V_ip6_gif_hlim VNET(ip6_gif_hlim)
SYSCTL_DECL(_net_inet6_ip6);
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_GIF_HLIM, gifhlim, CTLFLAG_RW,
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_GIF_HLIM, gifhlim, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(ip6_gif_hlim), 0, "");
-static int gif_validate6(const struct ip6_hdr *, struct gif_softc *,
- struct ifnet *);
+static int in6_gif_input(struct mbuf **, int *, int);
extern struct domain inet6domain;
-struct ip6protosw in6_gif_protosw = {
+static struct protosw in6_gif_protosw = {
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
.pr_protocol = 0, /* IPPROTO_IPV[46] */
@@ -96,112 +99,24 @@ struct ip6protosw in6_gif_protosw = {
};
int
-in6_gif_output(struct ifnet *ifp,
- int family, /* family of the packet to be encapsulate */
- struct mbuf *m)
+in6_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn)
{
+ GIF_RLOCK_TRACKER;
struct gif_softc *sc = ifp->if_softc;
- struct sockaddr_in6 *dst = (struct sockaddr_in6 *)&sc->gif_ro6.ro_dst;
- struct sockaddr_in6 *sin6_src = (struct sockaddr_in6 *)sc->gif_psrc;
- struct sockaddr_in6 *sin6_dst = (struct sockaddr_in6 *)sc->gif_pdst;
struct ip6_hdr *ip6;
- struct etherip_header eiphdr;
- int error, len, proto;
- u_int8_t itos, otos;
-
- GIF_LOCK_ASSERT(sc);
-
- if (sin6_src == NULL || sin6_dst == NULL ||
- sin6_src->sin6_family != AF_INET6 ||
- sin6_dst->sin6_family != AF_INET6) {
- m_freem(m);
- return EAFNOSUPPORT;
- }
-
- switch (family) {
-#ifdef INET
- case AF_INET:
- {
- struct ip *ip;
-
- proto = IPPROTO_IPV4;
- if (m->m_len < sizeof(*ip)) {
- m = m_pullup(m, sizeof(*ip));
- if (!m)
- return ENOBUFS;
- }
- ip = mtod(m, struct ip *);
- itos = ip->ip_tos;
- break;
- }
-#endif
-#ifdef INET6
- case AF_INET6:
- {
- struct ip6_hdr *ip6;
- proto = IPPROTO_IPV6;
- if (m->m_len < sizeof(*ip6)) {
- m = m_pullup(m, sizeof(*ip6));
- if (!m)
- return ENOBUFS;
- }
- ip6 = mtod(m, struct ip6_hdr *);
- itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
- break;
- }
-#endif
- case AF_LINK:
- proto = IPPROTO_ETHERIP;
-
- /*
- * GIF_SEND_REVETHIP (disabled by default) intentionally
- * sends an EtherIP packet with revered version field in
- * the header. This is a knob for backward compatibility
- * with FreeBSD 7.2R or prior.
- */
- if ((sc->gif_options & GIF_SEND_REVETHIP)) {
- eiphdr.eip_ver = 0;
- eiphdr.eip_resvl = ETHERIP_VERSION;
- eiphdr.eip_resvh = 0;
- } else {
- eiphdr.eip_ver = ETHERIP_VERSION;
- eiphdr.eip_resvl = 0;
- eiphdr.eip_resvh = 0;
- }
- /* prepend Ethernet-in-IP header */
- M_PREPEND(m, sizeof(struct etherip_header), M_DONTWAIT);
- if (m && m->m_len < sizeof(struct etherip_header))
- m = m_pullup(m, sizeof(struct etherip_header));
- if (m == NULL)
- return ENOBUFS;
- bcopy(&eiphdr, mtod(m, struct etherip_header *),
- sizeof(struct etherip_header));
- break;
-
- default:
-#ifdef DEBUG
- printf("in6_gif_output: warning: unknown family %d passed\n",
- family);
-#endif
- m_freem(m);
- return EAFNOSUPPORT;
- }
+ int len;
/* prepend new IP header */
len = sizeof(struct ip6_hdr);
#ifndef __NO_STRICT_ALIGNMENT
- if (family == AF_LINK)
+ if (proto == IPPROTO_ETHERIP)
len += ETHERIP_ALIGN;
#endif
- M_PREPEND(m, len, M_DONTWAIT);
- if (m != NULL && m->m_len < len)
- m = m_pullup(m, len);
- if (m == NULL) {
- printf("ENOBUFS in in6_gif_output %d\n", __LINE__);
- return ENOBUFS;
- }
+ M_PREPEND(m, len, M_NOWAIT);
+ if (m == NULL)
+ return (ENOBUFS);
#ifndef __NO_STRICT_ALIGNMENT
- if (family == AF_LINK) {
+ if (proto == IPPROTO_ETHERIP) {
len = mtod(m, vm_offset_t) & 3;
KASSERT(len == 0 || len == ETHERIP_ALIGN,
("in6_gif_output: unexpected misalignment"));
@@ -211,261 +126,107 @@ in6_gif_output(struct ifnet *ifp,
#endif
ip6 = mtod(m, struct ip6_hdr *);
- ip6->ip6_flow = 0;
- ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
- ip6->ip6_vfc |= IPV6_VERSION;
- ip6->ip6_plen = htons((u_short)m->m_pkthdr.len);
- ip6->ip6_nxt = proto;
- ip6->ip6_hlim = V_ip6_gif_hlim;
- ip6->ip6_src = sin6_src->sin6_addr;
- /* bidirectional configured tunnel mode */
- if (!IN6_IS_ADDR_UNSPECIFIED(&sin6_dst->sin6_addr))
- ip6->ip6_dst = sin6_dst->sin6_addr;
- else {
+ GIF_RLOCK(sc);
+ if (sc->gif_family != AF_INET6) {
m_freem(m);
- return ENETUNREACH;
+ GIF_RUNLOCK(sc);
+ return (ENETDOWN);
}
- ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED : ECN_NOCARE,
- &otos, &itos);
- ip6->ip6_flow &= ~htonl(0xff << 20);
- ip6->ip6_flow |= htonl((u_int32_t)otos << 20);
+ bcopy(sc->gif_ip6hdr, ip6, sizeof(struct ip6_hdr));
+ GIF_RUNLOCK(sc);
- M_SETFIB(m, sc->gif_fibnum);
-
- if (dst->sin6_family != sin6_dst->sin6_family ||
- !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &sin6_dst->sin6_addr)) {
- /* cache route doesn't match */
- bzero(dst, sizeof(*dst));
- dst->sin6_family = sin6_dst->sin6_family;
- dst->sin6_len = sizeof(struct sockaddr_in6);
- dst->sin6_addr = sin6_dst->sin6_addr;
- if (sc->gif_ro6.ro_rt) {
- RTFREE(sc->gif_ro6.ro_rt);
- sc->gif_ro6.ro_rt = NULL;
- }
-#if 0
- GIF2IFP(sc)->if_mtu = GIF_MTU;
-#endif
- }
-
- if (sc->gif_ro6.ro_rt == NULL) {
- in6_rtalloc(&sc->gif_ro6, sc->gif_fibnum);
- if (sc->gif_ro6.ro_rt == NULL) {
- m_freem(m);
- return ENETUNREACH;
- }
-
- /* if it constitutes infinite encapsulation, punt. */
- if (sc->gif_ro.ro_rt->rt_ifp == ifp) {
- m_freem(m);
- return ENETUNREACH; /*XXX*/
- }
-#if 0
- ifp->if_mtu = sc->gif_ro6.ro_rt->rt_ifp->if_mtu
- - sizeof(struct ip6_hdr);
-#endif
- }
-
- m_addr_changed(m);
-
-#ifdef IPV6_MINMTU
+ ip6->ip6_flow |= htonl((uint32_t)ecn << 20);
+ ip6->ip6_nxt = proto;
+ ip6->ip6_hlim = V_ip6_gif_hlim;
/*
* force fragmentation to minimum MTU, to avoid path MTU discovery.
* it is too painful to ask for resend of inner packet, to achieve
* path MTU discovery for encapsulated packets.
*/
- error = ip6_output(m, 0, &sc->gif_ro6, IPV6_MINMTU, 0, NULL, NULL);
-#else
- error = ip6_output(m, 0, &sc->gif_ro6, 0, 0, NULL, NULL);
-#endif
-
- if (!(GIF2IFP(sc)->if_flags & IFF_LINK0) &&
- sc->gif_ro6.ro_rt != NULL) {
- RTFREE(sc->gif_ro6.ro_rt);
- sc->gif_ro6.ro_rt = NULL;
- }
-
- return (error);
+ return (ip6_output(m, 0, NULL, IPV6_MINMTU, 0, NULL, NULL));
}
-int
+static int
in6_gif_input(struct mbuf **mp, int *offp, int proto)
{
struct mbuf *m = *mp;
- struct ifnet *gifp = NULL;
+ struct ifnet *gifp;
struct gif_softc *sc;
struct ip6_hdr *ip6;
- int af = 0;
- u_int32_t otos;
+ uint8_t ecn;
- ip6 = mtod(m, struct ip6_hdr *);
-
- sc = (struct gif_softc *)encap_getarg(m);
+ sc = encap_getarg(m);
if (sc == NULL) {
m_freem(m);
IP6STAT_INC(ip6s_nogif);
- return IPPROTO_DONE;
+ return (IPPROTO_DONE);
}
-
gifp = GIF2IFP(sc);
- if (gifp == NULL || (gifp->if_flags & IFF_UP) == 0) {
- m_freem(m);
- IP6STAT_INC(ip6s_nogif);
- return IPPROTO_DONE;
- }
-
- otos = ip6->ip6_flow;
- m_adj(m, *offp);
-
- switch (proto) {
-#ifdef INET
- case IPPROTO_IPV4:
- {
- struct ip *ip;
- u_int8_t otos8;
- af = AF_INET;
- otos8 = (ntohl(otos) >> 20) & 0xff;
- if (m->m_len < sizeof(*ip)) {
- m = m_pullup(m, sizeof(*ip));
- if (!m)
- return IPPROTO_DONE;
- }
- ip = mtod(m, struct ip *);
- if (ip_ecn_egress((gifp->if_flags & IFF_LINK1) ?
- ECN_ALLOWED : ECN_NOCARE,
- &otos8, &ip->ip_tos) == 0) {
- m_freem(m);
- return IPPROTO_DONE;
- }
- break;
- }
-#endif /* INET */
-#ifdef INET6
- case IPPROTO_IPV6:
- {
- struct ip6_hdr *ip6;
- af = AF_INET6;
- if (m->m_len < sizeof(*ip6)) {
- m = m_pullup(m, sizeof(*ip6));
- if (!m)
- return IPPROTO_DONE;
- }
+ if ((gifp->if_flags & IFF_UP) != 0) {
ip6 = mtod(m, struct ip6_hdr *);
- if (ip6_ecn_egress((gifp->if_flags & IFF_LINK1) ?
- ECN_ALLOWED : ECN_NOCARE,
- &otos, &ip6->ip6_flow) == 0) {
- m_freem(m);
- return IPPROTO_DONE;
- }
- break;
- }
-#endif
- case IPPROTO_ETHERIP:
- af = AF_LINK;
- break;
-
- default:
- IP6STAT_INC(ip6s_nogif);
+ ecn = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+ m_adj(m, *offp);
+ gif_input(m, gifp, proto, ecn);
+ } else {
m_freem(m);
- return IPPROTO_DONE;
+ IP6STAT_INC(ip6s_nogif);
}
-
- gif_input(m, af, gifp);
- return IPPROTO_DONE;
+ return (IPPROTO_DONE);
}
/*
- * validate outer address.
+ * we know that we are in IFF_UP, outer address available, and outer family
+ * matched the physical addr family. see gif_encapcheck().
*/
-static int
-gif_validate6(const struct ip6_hdr *ip6, struct gif_softc *sc,
- struct ifnet *ifp)
+int
+in6_gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
{
- struct sockaddr_in6 *src, *dst;
+ const struct ip6_hdr *ip6;
+ struct gif_softc *sc;
+ int ret;
- src = (struct sockaddr_in6 *)sc->gif_psrc;
- dst = (struct sockaddr_in6 *)sc->gif_pdst;
+ /* sanity check done in caller */
+ sc = (struct gif_softc *)arg;
+ GIF_RLOCK_ASSERT(sc);
/*
* Check for address match. Note that the check is for an incoming
* packet. We should compare the *source* address in our configuration
* and the *destination* address of the packet, and vice versa.
*/
- if (!IN6_ARE_ADDR_EQUAL(&src->sin6_addr, &ip6->ip6_dst) ||
- !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_src))
- return 0;
-
- /* martian filters on outer source - done in ip6_input */
+ ip6 = mtod(m, const struct ip6_hdr *);
+ if (!IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src, &ip6->ip6_dst))
+ return (0);
+ ret = 128;
+ if (!IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_dst, &ip6->ip6_src)) {
+ if ((sc->gif_options & GIF_IGNORE_SOURCE) == 0)
+ return (0);
+ } else
+ ret += 128;
/* ingress filters on outer source */
- if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0 && ifp) {
- struct sockaddr_in6 sin6;
- struct rtentry *rt;
+ if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0) {
+ struct nhop6_basic nh6;
- bzero(&sin6, sizeof(sin6));
- sin6.sin6_family = AF_INET6;
- sin6.sin6_len = sizeof(struct sockaddr_in6);
- sin6.sin6_addr = ip6->ip6_src;
- sin6.sin6_scope_id = 0; /* XXX */
+ /* XXX empty scope id */
+ if (fib6_lookup_nh_basic(sc->gif_fibnum, &ip6->ip6_src, 0, 0, 0,
+ &nh6) != 0)
+ return (0);
- rt = in6_rtalloc1((struct sockaddr *)&sin6, 0, 0UL,
- sc->gif_fibnum);
- if (!rt || rt->rt_ifp != ifp) {
-#if 0
- char ip6buf[INET6_ADDRSTRLEN];
- log(LOG_WARNING, "%s: packet from %s dropped "
- "due to ingress filter\n", if_name(GIF2IFP(sc)),
- ip6_sprintf(ip6buf, &sin6.sin6_addr));
-#endif
- if (rt)
- RTFREE_LOCKED(rt);
- return 0;
- }
- RTFREE_LOCKED(rt);
+ if (nh6.nh_ifp != m->m_pkthdr.rcvif)
+ return (0);
}
-
- return 128 * 2;
-}
-
-/*
- * we know that we are in IFF_UP, outer address available, and outer family
- * matched the physical addr family. see gif_encapcheck().
- * sanity check for arg should have been done in the caller.
- */
-int
-gif_encapcheck6(const struct mbuf *m, int off, int proto, void *arg)
-{
- struct ip6_hdr ip6;
- struct gif_softc *sc;
- struct ifnet *ifp;
-
- /* sanity check done in caller */
- sc = (struct gif_softc *)arg;
-
- /* LINTED const cast */
- m_copydata(m, 0, sizeof(ip6), (caddr_t)&ip6);
- ifp = ((m->m_flags & M_PKTHDR) != 0) ? m->m_pkthdr.rcvif : NULL;
-
- return gif_validate6(&ip6, sc, ifp);
+ return (ret);
}
int
in6_gif_attach(struct gif_softc *sc)
{
- sc->encap_cookie6 = encap_attach_func(AF_INET6, -1, gif_encapcheck,
- (void *)&in6_gif_protosw, sc);
- if (sc->encap_cookie6 == NULL)
- return EEXIST;
- return 0;
-}
-
-int
-in6_gif_detach(struct gif_softc *sc)
-{
- int error;
- error = encap_detach(sc->encap_cookie6);
- if (error == 0)
- sc->encap_cookie6 = NULL;
- return error;
+ KASSERT(sc->gif_ecookie == NULL, ("gif_ecookie isn't NULL"));
+ sc->gif_ecookie = encap_attach_func(AF_INET6, -1, gif_encapcheck,
+ (void *)&in6_gif_protosw, sc);
+ if (sc->gif_ecookie == NULL)
+ return (EEXIST);
+ return (0);
}
diff --git a/freebsd/sys/netinet6/in6_ifattach.c b/freebsd/sys/netinet6/in6_ifattach.c
index a8f03017..791e9e27 100644
--- a/freebsd/sys/netinet6/in6_ifattach.c
+++ b/freebsd/sys/netinet6/in6_ifattach.c
@@ -41,11 +41,14 @@ __FBSDID("$FreeBSD$");
#include <sys/sockio.h>
#include <sys/jail.h>
#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
#include <sys/proc.h>
+#include <sys/rmlock.h>
#include <sys/syslog.h>
#include <sys/md5.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/if_types.h>
#include <net/route.h>
@@ -279,9 +282,7 @@ found:
case IFT_ISO88025:
case IFT_ATM:
case IFT_IEEE1394:
-#ifdef IFT_IEEE80211
case IFT_IEEE80211:
-#endif
/* IEEE802/EUI64 cases - what others? */
/* IEEE1394 uses 16byte length address starting with EUI64 */
if (addrlen > 8)
@@ -343,9 +344,7 @@ found:
break;
case IFT_GIF:
-#ifdef IFT_STF
case IFT_STF:
-#endif
/*
* RFC2893 says: "SHOULD use IPv4 address as ifid source".
* however, IPv4 address is not very suitable as unique
@@ -412,7 +411,7 @@ get_ifid(struct ifnet *ifp0, struct ifnet *altifp,
/* next, try to get it from some other hardware interface */
IFNET_RLOCK_NOSLEEP();
- TAILQ_FOREACH(ifp, &V_ifnet, if_list) {
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (ifp == ifp0)
continue;
if (in6_get_hw_ifid(ifp, in6) != 0)
@@ -460,21 +459,13 @@ in6_ifattach_linklocal(struct ifnet *ifp, struct ifnet *altifp)
struct in6_ifaddr *ia;
struct in6_aliasreq ifra;
struct nd_prefixctl pr0;
- int i, error;
+ int error;
/*
* configure link-local address.
*/
- bzero(&ifra, sizeof(ifra));
-
- /*
- * in6_update_ifa() does not use ifra_name, but we accurately set it
- * for safety.
- */
- strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
+ in6_prepare_ifra(&ifra, NULL, &in6mask64);
- ifra.ifra_addr.sin6_family = AF_INET6;
- ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6);
ifra.ifra_addr.sin6_addr.s6_addr32[0] = htonl(0xfe800000);
ifra.ifra_addr.sin6_addr.s6_addr32[1] = 0;
if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
@@ -490,9 +481,6 @@ in6_ifattach_linklocal(struct ifnet *ifp, struct ifnet *altifp)
if (in6_setscope(&ifra.ifra_addr.sin6_addr, ifp, NULL))
return (-1);
- ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
- ifra.ifra_prefixmask.sin6_family = AF_INET6;
- ifra.ifra_prefixmask.sin6_addr = in6mask64;
/* link-local addresses should NEVER expire. */
ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME;
ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME;
@@ -537,10 +525,7 @@ in6_ifattach_linklocal(struct ifnet *ifp, struct ifnet *altifp)
pr0.ndpr_plen = in6_mask2len(&ifra.ifra_prefixmask.sin6_addr, NULL);
pr0.ndpr_prefix = ifra.ifra_addr;
/* apply the mask for safety. (nd6_prelist_add will apply it again) */
- for (i = 0; i < 4; i++) {
- pr0.ndpr_prefix.sin6_addr.s6_addr32[i] &=
- in6mask64.s6_addr32[i];
- }
+ IN6_MASK_ADDR(&pr0.ndpr_prefix.sin6_addr, &in6mask64);
/*
* Initialize parameters. The link-local prefix must always be
* on-link, and its lifetimes never expire.
@@ -573,17 +558,7 @@ in6_ifattach_loopback(struct ifnet *ifp)
struct in6_aliasreq ifra;
int error;
- bzero(&ifra, sizeof(ifra));
-
- /*
- * in6_update_ifa() does not use ifra_name, but we accurately set it
- * for safety.
- */
- strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
-
- ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
- ifra.ifra_prefixmask.sin6_family = AF_INET6;
- ifra.ifra_prefixmask.sin6_addr = in6mask128;
+ in6_prepare_ifra(&ifra, &in6addr_loopback, &in6mask128);
/*
* Always initialize ia_dstaddr (= broadcast address) to loopback
@@ -593,20 +568,10 @@ in6_ifattach_loopback(struct ifnet *ifp)
ifra.ifra_dstaddr.sin6_family = AF_INET6;
ifra.ifra_dstaddr.sin6_addr = in6addr_loopback;
- ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6);
- ifra.ifra_addr.sin6_family = AF_INET6;
- ifra.ifra_addr.sin6_addr = in6addr_loopback;
-
/* the loopback address should NEVER expire. */
ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME;
ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME;
- /* we don't need to perform DAD on loopback interfaces. */
- ifra.ifra_flags |= IN6_IFF_NODAD;
-
- /* skip registration to the prefix list. XXX should be temporary. */
- ifra.ifra_flags |= IN6_IFF_NOPFX;
-
/*
* We are sure that this is a newly assigned address, so we can set
* NULL to the 3rd arg.
@@ -734,15 +699,8 @@ in6_ifattach(struct ifnet *ifp, struct ifnet *altifp)
struct in6_ifaddr *ia;
struct in6_addr in6;
- /* some of the interfaces are inherently not IPv6 capable */
- switch (ifp->if_type) {
- case IFT_PFLOG:
- case IFT_PFSYNC:
- ND_IFINFO(ifp)->flags &= ~ND6_IFF_AUTO_LINKLOCAL;
- ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED;
+ if (ifp->if_afdata[AF_INET6] == NULL)
return;
- }
-
/*
* quirks based on interface type
*/
@@ -813,64 +771,45 @@ in6_ifattach(struct ifnet *ifp, struct ifnet *altifp)
/*
* NOTE: in6_ifdetach() does not support loopback if at this moment.
- * We don't need this function in bsdi, because interfaces are never removed
- * from the ifnet list in bsdi.
+ *
+ * When shutting down a VNET we clean up layers top-down. In that case
+ * upper layer protocols (ulp) are cleaned up already and locks are destroyed
+ * and we must not call into these cleanup functions anymore, thus purgeulp
+ * is set to 0 in that case by in6_ifdetach_destroy().
+ * The normal case of destroying a (cloned) interface still needs to cleanup
+ * everything related to the interface and will have purgeulp set to 1.
*/
-void
-in6_ifdetach(struct ifnet *ifp)
+static void
+_in6_ifdetach(struct ifnet *ifp, int purgeulp)
{
- struct in6_ifaddr *ia;
struct ifaddr *ifa, *next;
- struct radix_node_head *rnh;
- struct rtentry *rt;
- struct sockaddr_in6 sin6;
- struct in6_multi_mship *imm;
- /* remove neighbor management table */
- nd6_purge(ifp);
+ if (ifp->if_afdata[AF_INET6] == NULL)
+ return;
- /* nuke any of IPv6 addresses we have */
+ /*
+ * Remove neighbor management table.
+ * Enabling the nd6_purge will panic on vmove for interfaces on VNET
+ * teardown as the IPv6 layer is cleaned up already and the locks
+ * are destroyed.
+ */
+ if (purgeulp)
+ nd6_purge(ifp);
+
+ /*
+ * nuke any of IPv6 addresses we have
+ * XXX: all addresses should be already removed
+ */
TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
in6_purgeaddr(ifa);
}
-
- /* undo everything done by in6_ifattach(), just in case */
- TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
- if (ifa->ifa_addr->sa_family != AF_INET6
- || !IN6_IS_ADDR_LINKLOCAL(&satosin6(&ifa->ifa_addr)->sin6_addr)) {
- continue;
- }
-
- ia = (struct in6_ifaddr *)ifa;
-
- /*
- * leave from multicast groups we have joined for the interface
- */
- while ((imm = LIST_FIRST(&ia->ia6_memberships)) != NULL) {
- LIST_REMOVE(imm, i6mm_chain);
- in6_leavegroup(imm);
- }
-
- /* Remove link-local from the routing table. */
- if (ia->ia_flags & IFA_ROUTE)
- (void)rtinit(&ia->ia_ifa, RTM_DELETE, ia->ia_flags);
-
- /* remove from the linked list */
- IF_ADDR_WLOCK(ifp);
- TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
- IF_ADDR_WUNLOCK(ifp);
- ifa_free(ifa); /* if_addrhead */
-
- IN6_IFADDR_WLOCK();
- TAILQ_REMOVE(&V_in6_ifaddrhead, ia, ia_link);
- IN6_IFADDR_WUNLOCK();
- ifa_free(ifa);
+ if (purgeulp) {
+ in6_pcbpurgeif0(&V_udbinfo, ifp);
+ in6_pcbpurgeif0(&V_ulitecbinfo, ifp);
+ in6_pcbpurgeif0(&V_ripcbinfo, ifp);
}
-
- in6_pcbpurgeif0(&V_udbinfo, ifp);
- in6_pcbpurgeif0(&V_ripcbinfo, ifp);
/* leave from all multicast groups joined */
in6_purgemaddrs(ifp);
@@ -882,32 +821,22 @@ in6_ifdetach(struct ifnet *ifp)
* prefixes after removing all addresses above.
* (Or can we just delay calling nd6_purge until at this point?)
*/
- nd6_purge(ifp);
+ if (purgeulp)
+ nd6_purge(ifp);
+}
- /*
- * Remove route to link-local allnodes multicast (ff02::1).
- * These only get automatically installed for the default FIB.
- */
- bzero(&sin6, sizeof(sin6));
- sin6.sin6_len = sizeof(struct sockaddr_in6);
- sin6.sin6_family = AF_INET6;
- sin6.sin6_addr = in6addr_linklocal_allnodes;
- if (in6_setscope(&sin6.sin6_addr, ifp, NULL))
- /* XXX: should not fail */
- return;
- /* XXX grab lock first to avoid LOR */
- rnh = rt_tables_get_rnh(RT_DEFAULT_FIB, AF_INET6);
- if (rnh != NULL) {
- RADIX_NODE_HEAD_LOCK(rnh);
- rt = in6_rtalloc1((struct sockaddr *)&sin6, 0, RTF_RNH_LOCKED,
- RT_DEFAULT_FIB);
- if (rt) {
- if (rt->rt_ifp == ifp)
- rtexpunge(rt);
- RTFREE_LOCKED(rt);
- }
- RADIX_NODE_HEAD_UNLOCK(rnh);
- }
+void
+in6_ifdetach(struct ifnet *ifp)
+{
+
+ _in6_ifdetach(ifp, 1);
+}
+
+void
+in6_ifdetach_destroy(struct ifnet *ifp)
+{
+
+ _in6_ifdetach(ifp, 0);
}
int
@@ -948,7 +877,9 @@ in6_tmpaddrtimer(void *arg)
V_ip6_temp_regen_advance) * hz, in6_tmpaddrtimer, curvnet);
bzero(nullbuf, sizeof(nullbuf));
- TAILQ_FOREACH(ifp, &V_ifnet, if_list) {
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ if (ifp->if_afdata[AF_INET6] == NULL)
+ continue;
ndi = ND_IFINFO(ifp);
if (bcmp(ndi->randomid, nullbuf, sizeof(nullbuf)) != 0) {
/*
@@ -997,3 +928,29 @@ in6_purgemaddrs(struct ifnet *ifp)
IN6_MULTI_UNLOCK();
}
+
+void
+in6_ifattach_destroy(void)
+{
+
+ callout_drain(&V_in6_tmpaddrtimer_ch);
+}
+
+static void
+in6_ifattach_init(void *dummy)
+{
+
+ /* Timer for regeneranation of temporary addresses randomize ID. */
+ callout_init(&V_in6_tmpaddrtimer_ch, 0);
+ callout_reset(&V_in6_tmpaddrtimer_ch,
+ (V_ip6_temp_preferred_lifetime - V_ip6_desync_factor -
+ V_ip6_temp_regen_advance) * hz,
+ in6_tmpaddrtimer, curvnet);
+}
+
+/*
+ * Cheat.
+ * This must be after route_init(), which is now SI_ORDER_THIRD.
+ */
+SYSINIT(in6_ifattach_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE,
+ in6_ifattach_init, NULL);
diff --git a/freebsd/sys/netinet6/in6_ifattach.h b/freebsd/sys/netinet6/in6_ifattach.h
index af627313..a34530db 100644
--- a/freebsd/sys/netinet6/in6_ifattach.h
+++ b/freebsd/sys/netinet6/in6_ifattach.h
@@ -35,7 +35,9 @@
#ifdef _KERNEL
void in6_ifattach(struct ifnet *, struct ifnet *);
+void in6_ifattach_destroy(void);
void in6_ifdetach(struct ifnet *);
+void in6_ifdetach_destroy(struct ifnet *);
int in6_get_tmpifid(struct ifnet *, u_int8_t *, const u_int8_t *, int);
void in6_tmpaddrtimer(void *);
int in6_get_hw_ifid(struct ifnet *, struct in6_addr *);
diff --git a/freebsd/sys/netinet6/in6_mcast.c b/freebsd/sys/netinet6/in6_mcast.c
index d32d57c6..174f1109 100644
--- a/freebsd/sys/netinet6/in6_mcast.c
+++ b/freebsd/sys/netinet6/in6_mcast.c
@@ -54,12 +54,14 @@ __FBSDID("$FreeBSD$");
#include <sys/tree.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
+#include <netinet6/in6_fib.h>
#include <netinet6/in6_var.h>
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
@@ -159,21 +161,18 @@ static SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, mcast, CTLFLAG_RW, 0,
static u_long in6_mcast_maxgrpsrc = IPV6_MAX_GROUP_SRC_FILTER;
SYSCTL_ULONG(_net_inet6_ip6_mcast, OID_AUTO, maxgrpsrc,
- CTLFLAG_RW | CTLFLAG_TUN, &in6_mcast_maxgrpsrc, 0,
+ CTLFLAG_RWTUN, &in6_mcast_maxgrpsrc, 0,
"Max source filters per group");
-TUNABLE_ULONG("net.inet6.ip6.mcast.maxgrpsrc", &in6_mcast_maxgrpsrc);
static u_long in6_mcast_maxsocksrc = IPV6_MAX_SOCK_SRC_FILTER;
SYSCTL_ULONG(_net_inet6_ip6_mcast, OID_AUTO, maxsocksrc,
- CTLFLAG_RW | CTLFLAG_TUN, &in6_mcast_maxsocksrc, 0,
+ CTLFLAG_RWTUN, &in6_mcast_maxsocksrc, 0,
"Max source filters per socket");
-TUNABLE_ULONG("net.inet6.ip6.mcast.maxsocksrc", &in6_mcast_maxsocksrc);
/* TODO Virtualize this switch. */
int in6_mcast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
-SYSCTL_INT(_net_inet6_ip6_mcast, OID_AUTO, loop, CTLFLAG_RW | CTLFLAG_TUN,
+SYSCTL_INT(_net_inet6_ip6_mcast, OID_AUTO, loop, CTLFLAG_RWTUN,
&in6_mcast_loop, 0, "Loopback multicast datagrams by default");
-TUNABLE_INT("net.inet6.ip6.mcast.loop", &in6_mcast_loop);
static SYSCTL_NODE(_net_inet6_ip6_mcast, OID_AUTO, filters,
CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip6_mcast_filters,
@@ -473,9 +472,9 @@ in6_mc_get(struct ifnet *ifp, const struct in6_addr *group,
*/
inm = malloc(sizeof(*inm), M_IP6MADDR, M_NOWAIT | M_ZERO);
if (inm == NULL) {
+ IF_ADDR_WUNLOCK(ifp);
if_delmulti_ifma(ifma);
- error = ENOMEM;
- goto out_locked;
+ return (ENOMEM);
}
inm->in6m_addr = *group;
inm->in6m_ifp = ifp;
@@ -483,7 +482,7 @@ in6_mc_get(struct ifnet *ifp, const struct in6_addr *group,
inm->in6m_ifma = ifma;
inm->in6m_refcount = 1;
inm->in6m_state = MLD_NOT_MEMBER;
- IFQ_SET_MAXLEN(&inm->in6m_scq, MLD_MAX_STATE_CHANGES);
+ mbufq_init(&inm->in6m_scq, MLD_MAX_STATE_CHANGES);
inm->in6m_st[0].iss_fmode = MCAST_UNDEFINED;
inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED;
@@ -577,7 +576,7 @@ in6m_clear_recorded(struct in6_multi *inm)
*
* Return 0 if the source didn't exist or was already marked as recorded.
* Return 1 if the source was marked as recorded by this function.
- * Return <0 if any error occured (negated errno code).
+ * Return <0 if any error occurred (negated errno code).
*/
int
in6m_record_source(struct in6_multi *inm, const struct in6_addr *addr)
@@ -1078,7 +1077,7 @@ in6m_purge(struct in6_multi *inm)
inm->in6m_nsrc--;
}
/* Free state-change requests that might be queued. */
- _IF_DRAIN(&inm->in6m_scq);
+ mbufq_drain(&inm->in6m_scq);
}
/*
@@ -1187,7 +1186,7 @@ in6_mc_join_locked(struct ifnet *ifp, const struct in6_addr *mcaddr,
IN6_MULTI_LOCK_ASSERT();
CTR4(KTR_MLD, "%s: join %s on %p(%s))", __func__,
- ip6_sprintf(ip6tbuf, mcaddr), ifp, ifp->if_xname);
+ ip6_sprintf(ip6tbuf, mcaddr), ifp, if_name(ifp));
error = 0;
inm = NULL;
@@ -1278,7 +1277,7 @@ in6_mc_leave_locked(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf)
CTR5(KTR_MLD, "%s: leave inm %p, %s/%s, imf %p", __func__,
inm, ip6_sprintf(ip6tbuf, &inm->in6m_addr),
- (in6m_is_ifp_detached(inm) ? "null" : inm->in6m_ifp->if_xname),
+ (in6m_is_ifp_detached(inm) ? "null" : if_name(inm->in6m_ifp)),
imf);
/*
@@ -1776,28 +1775,22 @@ static struct ifnet *
in6p_lookup_mcast_ifp(const struct inpcb *in6p,
const struct sockaddr_in6 *gsin6)
{
- struct route_in6 ro6;
- struct ifnet *ifp;
+ struct nhop6_basic nh6;
+ struct in6_addr dst;
+ uint32_t scopeid;
+ uint32_t fibnum;
KASSERT(in6p->inp_vflag & INP_IPV6,
("%s: not INP_IPV6 inpcb", __func__));
KASSERT(gsin6->sin6_family == AF_INET6,
("%s: not AF_INET6 group", __func__));
- KASSERT(IN6_IS_ADDR_MULTICAST(&gsin6->sin6_addr),
- ("%s: not multicast", __func__));
- ifp = NULL;
- memset(&ro6, 0, sizeof(struct route_in6));
- memcpy(&ro6.ro_dst, gsin6, sizeof(struct sockaddr_in6));
- rtalloc_ign_fib((struct route *)&ro6, 0,
- in6p ? in6p->inp_inc.inc_fibnum : RT_DEFAULT_FIB);
- if (ro6.ro_rt != NULL) {
- ifp = ro6.ro_rt->rt_ifp;
- KASSERT(ifp != NULL, ("%s: null ifp", __func__));
- RTFREE(ro6.ro_rt);
- }
+ in6_splitscope(&gsin6->sin6_addr, &dst, &scopeid);
+ fibnum = in6p ? in6p->inp_inc.inc_fibnum : RT_DEFAULT_FIB;
+ if (fib6_lookup_nh_basic(fibnum, &dst, scopeid, 0, 0, &nh6) != 0)
+ return (NULL);
- return (ifp);
+ return (nh6.nh_ifp);
}
/*
@@ -1853,8 +1846,7 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt)
if (mreq.ipv6mr_interface == 0) {
ifp = in6p_lookup_mcast_ifp(inp, &gsa->sin6);
} else {
- if (mreq.ipv6mr_interface < 0 ||
- V_if_index < mreq.ipv6mr_interface)
+ if (V_if_index < mreq.ipv6mr_interface)
return (EADDRNOTAVAIL);
ifp = ifnet_byindex(mreq.ipv6mr_interface);
}
@@ -2198,7 +2190,7 @@ in6p_leave_group(struct inpcb *inp, struct sockopt *sopt)
* XXX SCOPE6 lock potentially taken here.
*/
if (ifindex != 0) {
- if (ifindex < 0 || V_if_index < ifindex)
+ if (V_if_index < ifindex)
return (EADDRNOTAVAIL);
ifp = ifnet_byindex(ifindex);
if (ifp == NULL)
@@ -2353,13 +2345,17 @@ in6p_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
error = sooptcopyin(sopt, &ifindex, sizeof(u_int), sizeof(u_int));
if (error)
return (error);
- if (ifindex < 0 || V_if_index < ifindex)
+ if (V_if_index < ifindex)
return (EINVAL);
-
- ifp = ifnet_byindex(ifindex);
- if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
- return (EADDRNOTAVAIL);
-
+ if (ifindex == 0)
+ ifp = NULL;
+ else {
+ ifp = ifnet_byindex(ifindex);
+ if (ifp == NULL)
+ return (EINVAL);
+ if ((ifp->if_flags & IFF_MULTICAST) == 0)
+ return (EADDRNOTAVAIL);
+ }
imo = in6p_findmoptions(inp);
imo->im6o_multicast_ifp = ifp;
INP_WUNLOCK(inp);
@@ -2805,13 +2801,13 @@ in6m_print(const struct in6_multi *inm)
printf("addr %s ifp %p(%s) ifma %p\n",
ip6_sprintf(ip6tbuf, &inm->in6m_addr),
inm->in6m_ifp,
- inm->in6m_ifp->if_xname,
+ if_name(inm->in6m_ifp),
inm->in6m_ifma);
printf("timer %u state %s refcount %u scq.len %u\n",
inm->in6m_timer,
in6m_state_str(inm->in6m_state),
inm->in6m_refcount,
- inm->in6m_scq.ifq_len);
+ mbufq_len(&inm->in6m_scq));
printf("mli %p nsrc %lu sctimer %u scrv %u\n",
inm->in6m_mli,
inm->in6m_nsrc,
diff --git a/freebsd/sys/netinet6/in6_pcb.c b/freebsd/sys/netinet6/in6_pcb.c
index bf69996d..95e376c7 100644
--- a/freebsd/sys/netinet6/in6_pcb.c
+++ b/freebsd/sys/netinet6/in6_pcb.c
@@ -77,6 +77,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/local/opt_ipsec.h>
#include <rtems/bsd/local/opt_pcbgroup.h>
+#include <rtems/bsd/local/opt_rss.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
@@ -96,6 +97,8 @@ __FBSDID("$FreeBSD$");
#include <vm/uma.h>
#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_llatbl.h>
#include <net/if_types.h>
#include <net/route.h>
@@ -112,7 +115,8 @@ __FBSDID("$FreeBSD$");
#include <netinet6/in6_pcb.h>
#include <netinet6/scope6_var.h>
-struct in6_addr zeroin6_addr;
+static struct inpcb *in6_pcblookup_hash_locked(struct inpcbinfo *,
+ struct in6_addr *, u_int, struct in6_addr *, u_int, int, struct ifnet *);
int
in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam,
@@ -208,6 +212,7 @@ in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam,
&sin6->sin6_addr, lport,
INPLOOKUP_WILDCARD, cred);
if (t &&
+ ((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
((t->inp_flags & INP_TIMEWAIT) == 0) &&
(so->so_type != SOCK_STREAM ||
IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) &&
@@ -221,6 +226,16 @@ in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam,
0)
#endif /* __rtems__ */
return (EADDRINUSE);
+
+ /*
+ * If the socket is a BINDMULTI socket, then
+ * the credentials need to match and the
+ * original socket also has to have been bound
+ * with BINDMULTI.
+ */
+ if (t && (! in_pcbbind_check_bindmulti(inp, t)))
+ return (EADDRINUSE);
+
#ifdef INET
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
@@ -231,6 +246,7 @@ in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam,
sin.sin_addr, lport,
INPLOOKUP_WILDCARD, cred);
if (t &&
+ ((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
((t->inp_flags &
INP_TIMEWAIT) == 0) &&
(so->so_type != SOCK_STREAM ||
@@ -243,6 +259,9 @@ in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam,
0)
#endif /* __rtems__ */
return (EADDRINUSE);
+
+ if (t && (! in_pcbbind_check_bindmulti(inp, t)))
+ return (EADDRINUSE);
}
#endif
}
@@ -318,13 +337,12 @@ in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam,
* a bit of a kludge, but cleaning up the internal interfaces would
* have forced minor changes in every protocol).
*/
-int
+static int
in6_pcbladdr(register struct inpcb *inp, struct sockaddr *nam,
struct in6_addr *plocal_addr6)
{
register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
int error = 0;
- struct ifnet *ifp = NULL;
int scope_ambiguous = 0;
struct in6_addr in6a;
@@ -354,20 +372,15 @@ in6_pcbladdr(register struct inpcb *inp, struct sockaddr *nam,
if ((error = prison_remote_ip6(inp->inp_cred, &sin6->sin6_addr)) != 0)
return (error);
- error = in6_selectsrc(sin6, inp->in6p_outputopts,
- inp, NULL, inp->inp_cred, &ifp, &in6a);
+ error = in6_selectsrc_socket(sin6, inp->in6p_outputopts,
+ inp, inp->inp_cred, scope_ambiguous, &in6a, NULL);
if (error)
return (error);
- if (ifp && scope_ambiguous &&
- (error = in6_setscope(&sin6->sin6_addr, ifp, NULL)) != 0) {
- return(error);
- }
-
/*
* Do not update this earlier, in case we return with an error.
*
- * XXX: this in6_selectsrc result might replace the bound local
+ * XXX: this in6_selectsrc_socket result might replace the bound local
* address with the address specified by setsockopt(IPV6_PKTINFO).
* Is it the intended behavior?
*/
@@ -702,8 +715,9 @@ in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr,
* Look for an unconnected (wildcard foreign addr) PCB that
* matches the local address and port we're looking for.
*/
- head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
- 0, pcbinfo->ipi_hashmask)];
+ head = &pcbinfo->ipi_hashbase[INP_PCBHASH(
+ INP6_PCBHASHKEY(&in6addr_any), lport, 0,
+ pcbinfo->ipi_hashmask)];
LIST_FOREACH(inp, head, inp_hash) {
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV6) == 0)
@@ -784,7 +798,7 @@ in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
struct ip6_moptions *im6o;
int i, gap;
- INP_INFO_RLOCK(pcbinfo);
+ INP_INFO_WLOCK(pcbinfo);
LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) {
INP_WLOCK(in6p);
im6o = in6p->in6p_moptions;
@@ -815,7 +829,7 @@ in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
}
INP_WUNLOCK(in6p);
}
- INP_INFO_RUNLOCK(pcbinfo);
+ INP_INFO_WUNLOCK(pcbinfo);
}
/*
@@ -828,9 +842,12 @@ void
in6_losing(struct inpcb *in6p)
{
- /*
- * We don't store route pointers in the routing table anymore
- */
+ if (in6p->inp_route6.ro_rt) {
+ RTFREE(in6p->inp_route6.ro_rt);
+ in6p->inp_route6.ro_rt = (struct rtentry *)NULL;
+ }
+ if (in6p->inp_route.ro_lle)
+ LLE_FREE(in6p->inp_route.ro_lle); /* zeros ro_lle */
return;
}
@@ -841,9 +858,13 @@ in6_losing(struct inpcb *in6p)
struct inpcb *
in6_rtchange(struct inpcb *inp, int errno)
{
- /*
- * We don't store route pointers in the routing table anymore
- */
+
+ if (inp->inp_route6.ro_rt) {
+ RTFREE(inp->inp_route6.ro_rt);
+ inp->inp_route6.ro_rt = (struct rtentry *)NULL;
+ }
+ if (inp->inp_route.ro_lle)
+ LLE_FREE(inp->inp_route.ro_lle); /* zeros ro_lle */
return inp;
}
@@ -859,21 +880,14 @@ in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
struct inpcbhead *head;
struct inpcb *inp, *tmpinp;
u_short fport = fport_arg, lport = lport_arg;
- int faith;
-
- if (faithprefix_p != NULL)
- faith = (*faithprefix_p)(laddr);
- else
- faith = 0;
/*
* First look for an exact match.
*/
tmpinp = NULL;
INP_GROUP_LOCK(pcbgroup);
- head = &pcbgroup->ipg_hashbase[
- INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, lport, fport,
- pcbgroup->ipg_hashmask)];
+ head = &pcbgroup->ipg_hashbase[INP_PCBHASH(
+ INP6_PCBHASHKEY(faddr), lport, fport, pcbgroup->ipg_hashmask)];
LIST_FOREACH(inp, head, inp_pcbgrouphash) {
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV6) == 0)
@@ -899,7 +913,7 @@ in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
}
/*
- * Then look for a wildcard match, if requested.
+ * Then look for a wildcard match in the pcbgroup.
*/
if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
struct inpcb *local_wild = NULL, *local_exact = NULL;
@@ -913,9 +927,9 @@ in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
* 3. non-jailed, non-wild.
* 4. non-jailed, wild.
*/
- head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, lport,
- 0, pcbinfo->ipi_wildmask)];
- LIST_FOREACH(inp, head, inp_pcbgroup_wild) {
+ head = &pcbgroup->ipg_hashbase[
+ INP_PCBHASH(INADDR_ANY, lport, 0, pcbgroup->ipg_hashmask)];
+ LIST_FOREACH(inp, head, inp_pcbgrouphash) {
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV6) == 0)
continue;
@@ -925,9 +939,67 @@ in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
continue;
}
+ injail = prison_flag(inp->inp_cred, PR_IP6);
+ if (injail) {
+ if (prison_check_ip6(inp->inp_cred,
+ laddr) != 0)
+ continue;
+ } else {
+ if (local_exact != NULL)
+ continue;
+ }
+
+ if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) {
+ if (injail)
+ goto found;
+ else
+ local_exact = inp;
+ } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
+ if (injail)
+ jail_wild = inp;
+ else
+ local_wild = inp;
+ }
+ } /* LIST_FOREACH */
+
+ inp = jail_wild;
+ if (inp == NULL)
+ inp = jail_wild;
+ if (inp == NULL)
+ inp = local_exact;
+ if (inp == NULL)
+ inp = local_wild;
+ if (inp != NULL)
+ goto found;
+ }
+
+ /*
+ * Then look for a wildcard match, if requested.
+ */
+ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
+ struct inpcb *local_wild = NULL, *local_exact = NULL;
+ struct inpcb *jail_wild = NULL;
+ int injail;
+
+ /*
+ * Order of socket selection - we always prefer jails.
+ * 1. jailed, non-wild.
+ * 2. jailed, wild.
+ * 3. non-jailed, non-wild.
+ * 4. non-jailed, wild.
+ */
+ head = &pcbinfo->ipi_wildbase[INP_PCBHASH(
+ INP6_PCBHASHKEY(&in6addr_any), lport, 0,
+ pcbinfo->ipi_wildmask)];
+ LIST_FOREACH(inp, head, inp_pcbgroup_wild) {
/* XXX inp locking */
- if (faith && (inp->inp_flags & INP_FAITH) == 0)
+ if ((inp->inp_vflag & INP_IPV6) == 0)
+ continue;
+
+ if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) ||
+ inp->inp_lport != lport) {
continue;
+ }
injail = prison_flag(inp->inp_cred, PR_IP6);
if (injail) {
@@ -985,7 +1057,7 @@ found:
/*
* Lookup PCB in hash list.
*/
-struct inpcb *
+static struct inpcb *
in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
u_int fport_arg, struct in6_addr *laddr, u_int lport_arg,
int lookupflags, struct ifnet *ifp)
@@ -993,25 +1065,18 @@ in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
struct inpcbhead *head;
struct inpcb *inp, *tmpinp;
u_short fport = fport_arg, lport = lport_arg;
- int faith;
KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
("%s: invalid lookup flags %d", __func__, lookupflags));
INP_HASH_LOCK_ASSERT(pcbinfo);
- if (faithprefix_p != NULL)
- faith = (*faithprefix_p)(laddr);
- else
- faith = 0;
-
/*
* First look for an exact match.
*/
tmpinp = NULL;
- head = &pcbinfo->ipi_hashbase[
- INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, lport, fport,
- pcbinfo->ipi_hashmask)];
+ head = &pcbinfo->ipi_hashbase[INP_PCBHASH(
+ INP6_PCBHASHKEY(faddr), lport, fport, pcbinfo->ipi_hashmask)];
LIST_FOREACH(inp, head, inp_hash) {
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV6) == 0)
@@ -1049,8 +1114,9 @@ in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
* 3. non-jailed, non-wild.
* 4. non-jailed, wild.
*/
- head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
- 0, pcbinfo->ipi_hashmask)];
+ head = &pcbinfo->ipi_hashbase[INP_PCBHASH(
+ INP6_PCBHASHKEY(&in6addr_any), lport, 0,
+ pcbinfo->ipi_hashmask)];
LIST_FOREACH(inp, head, inp_hash) {
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV6) == 0)
@@ -1061,10 +1127,6 @@ in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
continue;
}
- /* XXX inp locking */
- if (faith && (inp->inp_flags & INP_FAITH) == 0)
- continue;
-
injail = prison_flag(inp->inp_cred, PR_IP6);
if (injail) {
if (prison_check_ip6(inp->inp_cred,
@@ -1145,7 +1207,7 @@ struct inpcb *
in6_pcblookup(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport,
struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp)
{
-#if defined(PCBGROUP)
+#if defined(PCBGROUP) && !defined(RSS)
struct inpcbgroup *pcbgroup;
#endif
@@ -1154,7 +1216,17 @@ in6_pcblookup(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport,
KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
("%s: LOCKPCB not set", __func__));
-#if defined(PCBGROUP)
+ /*
+ * When not using RSS, use connection groups in preference to the
+ * reservation table when looking up 4-tuples. When using RSS, just
+ * use the reservation table, due to the cost of the Toeplitz hash
+ * in software.
+ *
+ * XXXRW: This policy belongs in the pcbgroup code, as in principle
+ * we could be doing RSS with a non-Toeplitz hash that is affordable
+ * in software.
+ */
+#if defined(PCBGROUP) && !defined(RSS)
if (in_pcbgroup_enabled(pcbinfo)) {
pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
fport);
@@ -1181,16 +1253,27 @@ in6_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
("%s: LOCKPCB not set", __func__));
#ifdef PCBGROUP
- if (in_pcbgroup_enabled(pcbinfo)) {
+ /*
+ * If we can use a hardware-generated hash to look up the connection
+ * group, use that connection group to find the inpcb. Otherwise
+ * fall back on a software hash -- or the reservation table if we're
+ * using RSS.
+ *
+ * XXXRW: As above, that policy belongs in the pcbgroup code.
+ */
+ if (in_pcbgroup_enabled(pcbinfo) &&
+ M_HASHTYPE_TEST(m, M_HASHTYPE_NONE) == 0) {
pcbgroup = in6_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
m->m_pkthdr.flowid);
if (pcbgroup != NULL)
return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr,
fport, laddr, lport, lookupflags, ifp));
+#ifndef RSS
pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
fport);
return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
laddr, lport, lookupflags, ifp));
+#endif
}
#endif
return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
diff --git a/freebsd/sys/netinet6/in6_pcb.h b/freebsd/sys/netinet6/in6_pcb.h
index 19d151b7..e758dace 100644
--- a/freebsd/sys/netinet6/in6_pcb.h
+++ b/freebsd/sys/netinet6/in6_pcb.h
@@ -86,7 +86,6 @@ int in6_pcbconnect(struct inpcb *, struct sockaddr *, struct ucred *);
int in6_pcbconnect_mbuf(struct inpcb *, struct sockaddr *,
struct ucred *, struct mbuf *);
void in6_pcbdisconnect(struct inpcb *);
-int in6_pcbladdr(struct inpcb *, struct sockaddr *, struct in6_addr *);
struct inpcb *
in6_pcblookup_local(struct inpcbinfo *,
struct in6_addr *, u_short, int,
@@ -96,10 +95,6 @@ struct inpcb *
u_int, struct in6_addr *, u_int, int,
struct ifnet *);
struct inpcb *
- in6_pcblookup_hash_locked(struct inpcbinfo *, struct in6_addr *,
- u_int, struct in6_addr *, u_int, int,
- struct ifnet *);
-struct inpcb *
in6_pcblookup_mbuf(struct inpcbinfo *, struct in6_addr *,
u_int, struct in6_addr *, u_int, int,
struct ifnet *ifp, struct mbuf *);
diff --git a/freebsd/sys/netinet6/in6_proto.c b/freebsd/sys/netinet6/in6_proto.c
index a6c3b4e8..8a9c1cd9 100644
--- a/freebsd/sys/netinet6/in6_proto.c
+++ b/freebsd/sys/netinet6/in6_proto.c
@@ -80,12 +80,14 @@ __FBSDID("$FreeBSD$");
#include <sys/protosw.h>
#include <sys/jail.h>
#include <sys/kernel.h>
+#include <sys/malloc.h>
#include <sys/domain.h>
#include <sys/mbuf.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/radix.h>
#include <net/route.h>
#ifdef RADIX_MPATH
@@ -128,10 +130,6 @@ __FBSDID("$FreeBSD$");
#include <netinet6/ip6protosw.h>
-#ifdef FLOWTABLE
-#include <net/flowtable.h>
-#endif
-
/*
* TCP/IP protocol family: IP6, ICMP6, UDP, TCP.
*/
@@ -151,15 +149,12 @@ static struct pr_usrreqs nousrreqs;
.pr_usrreqs = &nousrreqs \
}
-struct ip6protosw inet6sw[] = {
+struct protosw inet6sw[] = {
{
.pr_type = 0,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_IPV6,
.pr_init = ip6_init,
-#ifdef VIMAGE
- .pr_destroy = ip6_destroy,
-#endif
.pr_slowtimo = frag6_slowtimo,
.pr_drain = frag6_drain,
.pr_usrreqs = &nousrreqs,
@@ -211,15 +206,28 @@ struct ip6protosw inet6sw[] = {
.pr_type = SOCK_STREAM,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_SCTP,
- .pr_flags = PR_WANTRCVD,
+ .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD,
.pr_input = sctp6_input,
- .pr_ctlinput = sctp6_ctlinput,
+ .pr_ctlinput = sctp6_ctlinput,
.pr_ctloutput = sctp_ctloutput,
.pr_drain = sctp_drain,
.pr_usrreqs = &sctp6_usrreqs
},
#endif /* SCTP */
{
+ .pr_type = SOCK_DGRAM,
+ .pr_domain = &inet6domain,
+ .pr_protocol = IPPROTO_UDPLITE,
+ .pr_flags = PR_ATOMIC|PR_ADDR,
+ .pr_input = udp6_input,
+ .pr_ctlinput = udplite6_ctlinput,
+ .pr_ctloutput = udp_ctloutput,
+#ifndef INET /* Do not call initialization twice. */
+ .pr_init = udplite_init,
+#endif
+ .pr_usrreqs = &udp6_usrreqs,
+},
+{
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_RAW,
@@ -324,6 +332,17 @@ struct ip6protosw inet6sw[] = {
{
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
+ .pr_protocol = IPPROTO_GRE,
+ .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
+ .pr_input = encap6_input,
+ .pr_output = rip6_output,
+ .pr_ctloutput = rip6_ctloutput,
+ .pr_init = encap_init,
+ .pr_usrreqs = &rip6_usrreqs
+},
+{
+ .pr_type = SOCK_RAW,
+ .pr_domain = &inet6domain,
.pr_protocol = IPPROTO_PIM,
.pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
.pr_input = encap6_input,
@@ -361,8 +380,7 @@ struct domain inet6domain = {
.dom_family = AF_INET6,
.dom_name = "internet6",
.dom_protosw = (struct protosw *)inet6sw,
- .dom_protoswNPROTOSW = (struct protosw *)
- &inet6sw[sizeof(inet6sw)/sizeof(inet6sw[0])],
+ .dom_protoswNPROTOSW = (struct protosw *)&inet6sw[nitems(inet6sw)],
#ifdef RADIX_MPATH
.dom_rtattach = rn6_mpath_inithead,
#else
@@ -371,10 +389,9 @@ struct domain inet6domain = {
#ifdef VIMAGE
.dom_rtdetach = in6_detachhead,
#endif
- .dom_rtoffset = offsetof(struct sockaddr_in6, sin6_addr) << 3,
- .dom_maxrtkey = sizeof(struct sockaddr_in6),
.dom_ifattach = in6_domifattach,
- .dom_ifdetach = in6_domifdetach
+ .dom_ifdetach = in6_domifdetach,
+ .dom_ifmtu = in6_domifmtu
};
VNET_DOMAIN_SET(inet6);
@@ -416,7 +433,6 @@ VNET_DEFINE(int, ip6_rr_prune) = 5; /* router renumbering prefix
VNET_DEFINE(int, ip6_mcast_pmtu) = 0; /* enable pMTU discovery for multicast? */
VNET_DEFINE(int, ip6_v6only) = 1;
-VNET_DEFINE(int, ip6_keepfaith) = 0;
VNET_DEFINE(time_t, ip6_log_time) = (time_t)0L;
#ifdef IPSTEALTH
VNET_DEFINE(int, ip6stealth) = 0;
@@ -433,16 +449,6 @@ VNET_DEFINE(int, nd6_onlink_ns_rfc4861) = 0;/* allow 'on-link' nd6 NS
VNET_DEFINE(int, pmtu_expire) = 60*10;
VNET_DEFINE(int, pmtu_probe) = 60*2;
-/* raw IP6 parameters */
-/*
- * Nominal space allocated to a raw ip socket.
- */
-#define RIPV6SNDQ 8192
-#define RIPV6RCVQ 8192
-
-VNET_DEFINE(u_long, rip6_sendspace) = RIPV6SNDQ;
-VNET_DEFINE(u_long, rip6_recvspace) = RIPV6RCVQ;
-
/* ICMPV6 parameters */
VNET_DEFINE(int, icmp6_rediraccept) = 1;/* accept and process redirects */
VNET_DEFINE(int, icmp6_redirtimeout) = 10 * 60; /* 10 minutes */
@@ -452,11 +458,6 @@ VNET_DEFINE(int, icmp6_nodeinfo) =
(ICMP6_NODEINFO_FQDNOK|ICMP6_NODEINFO_NODEADDROK);
VNET_DEFINE(int, icmp6_nodeinfo_oldmcprefix) = 1;
-/* UDP on IP6 parameters */
-VNET_DEFINE(int, udp6_sendspace) = 9216;/* really max datagram size */
-VNET_DEFINE(int, udp6_recvspace) = 40 * (1024 + sizeof(struct sockaddr_in6));
- /* 40 1K datagrams */
-
/*
* sysctl related items.
*/
@@ -479,158 +480,170 @@ SYSCTL_NODE(_net_inet6, IPPROTO_ESP, ipsec6, CTLFLAG_RW, 0, "IPSEC6");
static int
sysctl_ip6_temppltime(SYSCTL_HANDLER_ARGS)
{
- int error = 0;
- int old;
-
- VNET_SYSCTL_ARG(req, arg1);
+ int error, val;
- error = SYSCTL_OUT(req, arg1, sizeof(int));
- if (error || !req->newptr)
+ val = V_ip6_temp_preferred_lifetime;
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ if (error != 0 || !req->newptr)
return (error);
- old = V_ip6_temp_preferred_lifetime;
- error = SYSCTL_IN(req, arg1, sizeof(int));
- if (V_ip6_temp_preferred_lifetime <
- V_ip6_desync_factor + V_ip6_temp_regen_advance) {
- V_ip6_temp_preferred_lifetime = old;
+ if (val < V_ip6_desync_factor + V_ip6_temp_regen_advance)
return (EINVAL);
- }
- return (error);
+ V_ip6_temp_preferred_lifetime = val;
+ return (0);
}
static int
sysctl_ip6_tempvltime(SYSCTL_HANDLER_ARGS)
{
- int error = 0;
- int old;
+ int error, val;
- VNET_SYSCTL_ARG(req, arg1);
-
- error = SYSCTL_OUT(req, arg1, sizeof(int));
- if (error || !req->newptr)
+ val = V_ip6_temp_valid_lifetime;
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ if (error != 0 || !req->newptr)
return (error);
- old = V_ip6_temp_valid_lifetime;
- error = SYSCTL_IN(req, arg1, sizeof(int));
- if (V_ip6_temp_valid_lifetime < V_ip6_temp_preferred_lifetime) {
- V_ip6_temp_preferred_lifetime = old;
+ if (val < V_ip6_temp_preferred_lifetime)
return (EINVAL);
- }
- return (error);
+ V_ip6_temp_valid_lifetime = val;
+ return (0);
}
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_FORWARDING, forwarding, CTLFLAG_RW,
- &VNET_NAME(ip6_forwarding), 0, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_SENDREDIRECTS, redirect, CTLFLAG_RW,
- &VNET_NAME(ip6_sendredirects), 0, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_DEFHLIM, hlim, CTLFLAG_RW,
- &VNET_NAME(ip6_defhlim), 0, "");
-SYSCTL_VNET_STRUCT(_net_inet6_ip6, IPV6CTL_STATS, stats, CTLFLAG_RW,
- &VNET_NAME(ip6stat), ip6stat, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets,
- CTLFLAG_RW, &VNET_NAME(ip6_maxfragpackets), 0, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_ACCEPT_RTADV, accept_rtadv,
- CTLFLAG_RW, &VNET_NAME(ip6_accept_rtadv), 0,
- "Default value of per-interface flag for accepting ICMPv6 Router"
- "Advertisement messages");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_NO_RADR, no_radr,
- CTLFLAG_RW, &VNET_NAME(ip6_no_radr), 0,
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_FORWARDING, forwarding,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_forwarding), 0,
+ "Enable IPv6 forwarding between interfaces");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_SENDREDIRECTS, redirect,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_sendredirects), 0,
+ "Send a redirect message when forwarding back to a source link");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFHLIM, hlim,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_defhlim), 0,
+ "Default hop limit");
+SYSCTL_VNET_PCPUSTAT(_net_inet6_ip6, IPV6CTL_STATS, stats, struct ip6stat,
+ ip6stat,
+ "IP6 statistics (struct ip6stat, netinet6/ip6_var.h)");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragpackets), 0,
+ "Maximum allowed number of outstanding fragmented IPv6 packets");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_ACCEPT_RTADV, accept_rtadv,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_accept_rtadv), 0,
+ "Default value of per-interface flag for accepting ICMPv6 RA messages");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_NO_RADR, no_radr,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_no_radr), 0,
"Default value of per-interface flag to control whether routers "
"sending ICMPv6 RA messages on that interface are added into the "
- "default router list.");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_NORBIT_RAIF, norbit_raif, CTLFLAG_RW,
- &VNET_NAME(ip6_norbit_raif), 0,
- "Always set 0 to R flag in ICMPv6 NA messages when accepting RA"
- " on the interface.");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_RFC6204W3, rfc6204w3,
- CTLFLAG_RW, &VNET_NAME(ip6_rfc6204w3), 0,
+ "default router list");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_NORBIT_RAIF, norbit_raif,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_norbit_raif), 0,
+ "Always set clear the R flag in ICMPv6 NA messages when accepting RA "
+ "on the interface");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RFC6204W3, rfc6204w3,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_rfc6204w3), 0,
"Accept the default router list from ICMPv6 RA messages even "
- "when packet forwarding enabled.");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_KEEPFAITH, keepfaith, CTLFLAG_RW,
- &VNET_NAME(ip6_keepfaith), 0, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_LOG_INTERVAL, log_interval,
- CTLFLAG_RW, &VNET_NAME(ip6_log_interval), 0, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_HDRNESTLIMIT, hdrnestlimit,
- CTLFLAG_RW, &VNET_NAME(ip6_hdrnestlimit), 0, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_DAD_COUNT, dad_count, CTLFLAG_RW,
- &VNET_NAME(ip6_dad_count), 0, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_AUTO_FLOWLABEL, auto_flowlabel,
- CTLFLAG_RW, &VNET_NAME(ip6_auto_flowlabel), 0, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_DEFMCASTHLIM, defmcasthlim,
- CTLFLAG_RW, &VNET_NAME(ip6_defmcasthlim), 0, "");
+ "when packet forwarding is enabled");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_LOG_INTERVAL, log_interval,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_log_interval), 0,
+ "Frequency in seconds at which to log IPv6 forwarding errors");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_HDRNESTLIMIT, hdrnestlimit,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_hdrnestlimit), 0,
+ "Maximum allowed number of nested protocol headers");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DAD_COUNT, dad_count,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_dad_count), 0,
+ "Number of ICMPv6 NS messages sent during duplicate address detection");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_AUTO_FLOWLABEL, auto_flowlabel,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_auto_flowlabel), 0,
+ "Provide an IPv6 flowlabel in outbound packets");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFMCASTHLIM, defmcasthlim,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_defmcasthlim), 0,
+ "Default hop limit for multicast packets");
SYSCTL_STRING(_net_inet6_ip6, IPV6CTL_KAME_VERSION, kame_version,
- CTLFLAG_RD, __KAME_VERSION, 0, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_USE_DEPRECATED, use_deprecated,
- CTLFLAG_RW, &VNET_NAME(ip6_use_deprecated), 0, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_RR_PRUNE, rr_prune, CTLFLAG_RW,
- &VNET_NAME(ip6_rr_prune), 0, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_USETEMPADDR, use_tempaddr,
- CTLFLAG_RW, &VNET_NAME(ip6_use_tempaddr), 0, "");
-SYSCTL_VNET_PROC(_net_inet6_ip6, IPV6CTL_TEMPPLTIME, temppltime,
- CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ip6_temp_preferred_lifetime), 0,
- sysctl_ip6_temppltime, "I", "");
-SYSCTL_VNET_PROC(_net_inet6_ip6, IPV6CTL_TEMPVLTIME, tempvltime,
- CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ip6_temp_valid_lifetime), 0,
- sysctl_ip6_tempvltime, "I", "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_V6ONLY, v6only, CTLFLAG_RW,
- &VNET_NAME(ip6_v6only), 0, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_AUTO_LINKLOCAL, auto_linklocal,
- CTLFLAG_RW, &VNET_NAME(ip6_auto_linklocal), 0,
- "Default value of per-interface flag for automatically adding an IPv6"
- " link-local address to interfaces when attached");
-SYSCTL_VNET_STRUCT(_net_inet6_ip6, IPV6CTL_RIP6STATS, rip6stats, CTLFLAG_RW,
- &VNET_NAME(rip6stat), rip6stat, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_PREFER_TEMPADDR, prefer_tempaddr,
- CTLFLAG_RW, &VNET_NAME(ip6_prefer_tempaddr), 0, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_USE_DEFAULTZONE, use_defaultzone,
- CTLFLAG_RW, &VNET_NAME(ip6_use_defzone), 0,"");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags, CTLFLAG_RW,
- &VNET_NAME(ip6_maxfrags), 0, "");
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_MCAST_PMTU, mcast_pmtu, CTLFLAG_RW,
- &VNET_NAME(ip6_mcast_pmtu), 0, "");
+ CTLFLAG_RD, __KAME_VERSION, 0,
+ "KAME version string");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USE_DEPRECATED, use_deprecated,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_use_deprecated), 0,
+ "Allow the use of addresses whose preferred lifetimes have expired");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RR_PRUNE, rr_prune,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_rr_prune), 0,
+ ""); /* XXX unused */
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USETEMPADDR, use_tempaddr,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_use_tempaddr), 0,
+ "Create RFC3041 temporary addresses for autoconfigured addresses");
+SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_TEMPPLTIME, temppltime,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
+ NULL, 0, sysctl_ip6_temppltime, "I",
+ "Maximum preferred lifetime for temporary addresses");
+SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_TEMPVLTIME, tempvltime,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
+ NULL, 0, sysctl_ip6_tempvltime, "I",
+ "Maximum valid lifetime for temporary addresses");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_V6ONLY, v6only,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_v6only), 0,
+ "Restrict AF_INET6 sockets to IPv6 addresses only");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_AUTO_LINKLOCAL, auto_linklocal,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_auto_linklocal), 0,
+ "Default value of per-interface flag for automatically adding an IPv6 "
+ "link-local address to interfaces when attached");
+SYSCTL_VNET_PCPUSTAT(_net_inet6_ip6, IPV6CTL_RIP6STATS, rip6stats,
+ struct rip6stat, rip6stat,
+ "Raw IP6 statistics (struct rip6stat, netinet6/raw_ip6.h)");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_PREFER_TEMPADDR, prefer_tempaddr,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_prefer_tempaddr), 0,
+ "Prefer RFC3041 temporary addresses in source address selection");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USE_DEFAULTZONE, use_defaultzone,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_use_defzone), 0,
+ "Use the default scope zone when none is specified");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfrags), 0,
+ "Maximum allowed number of outstanding IPv6 packet fragments");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MCAST_PMTU, mcast_pmtu,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_mcast_pmtu), 0,
+ "Enable path MTU discovery for multicast packets");
#ifdef IPSTEALTH
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_STEALTH, stealth, CTLFLAG_RW,
- &VNET_NAME(ip6stealth), 0, "");
-#endif
-
-#ifdef FLOWTABLE
-VNET_DEFINE(int, ip6_output_flowtable_size) = 2048;
-VNET_DEFINE(struct flowtable *, ip6_ft);
-#define V_ip6_output_flowtable_size VNET(ip6_output_flowtable_size)
-
-SYSCTL_VNET_INT(_net_inet6_ip6, OID_AUTO, output_flowtable_size, CTLFLAG_RDTUN,
- &VNET_NAME(ip6_output_flowtable_size), 2048,
- "number of entries in the per-cpu output flow caches");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_STEALTH, stealth, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(ip6stealth), 0,
+ "Forward IPv6 packets without decrementing their TTL");
#endif
/* net.inet6.icmp6 */
-SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRACCEPT, rediraccept,
- CTLFLAG_RW, &VNET_NAME(icmp6_rediraccept), 0, "");
-SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRTIMEOUT, redirtimeout,
- CTLFLAG_RW, &VNET_NAME(icmp6_redirtimeout), 0, "");
-SYSCTL_VNET_STRUCT(_net_inet6_icmp6, ICMPV6CTL_STATS, stats, CTLFLAG_RW,
- &VNET_NAME(icmp6stat), icmp6stat, "");
-SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_PRUNE, nd6_prune, CTLFLAG_RW,
- &VNET_NAME(nd6_prune), 0, "");
-SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DELAY, nd6_delay, CTLFLAG_RW,
- &VNET_NAME(nd6_delay), 0, "");
-SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_UMAXTRIES, nd6_umaxtries,
- CTLFLAG_RW, &VNET_NAME(nd6_umaxtries), 0, "");
-SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MMAXTRIES, nd6_mmaxtries,
- CTLFLAG_RW, &VNET_NAME(nd6_mmaxtries), 0, "");
-SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_USELOOPBACK, nd6_useloopback,
- CTLFLAG_RW, &VNET_NAME(nd6_useloopback), 0, "");
-SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO, nodeinfo, CTLFLAG_RW,
- &VNET_NAME(icmp6_nodeinfo), 0, "");
-SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO_OLDMCPREFIX,
- nodeinfo_oldmcprefix, CTLFLAG_RW,
- &VNET_NAME(icmp6_nodeinfo_oldmcprefix), 0,
- "Join old IPv6 NI group address in draft-ietf-ipngwg-icmp-name-lookup"
- " for compatibility with KAME implememtation.");
-SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ERRPPSLIMIT, errppslimit,
- CTLFLAG_RW, &VNET_NAME(icmp6errppslim), 0, "");
-SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXNUDHINT, nd6_maxnudhint,
- CTLFLAG_RW, &VNET_NAME(nd6_maxnudhint), 0, "");
-SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DEBUG, nd6_debug, CTLFLAG_RW,
- &VNET_NAME(nd6_debug), 0, "");
-SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_ONLINKNSRFC4861,
- nd6_onlink_ns_rfc4861, CTLFLAG_RW, &VNET_NAME(nd6_onlink_ns_rfc4861),
- 0, "Accept 'on-link' nd6 NS in compliance with RFC 4861.");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRACCEPT, rediraccept,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6_rediraccept), 0,
+ "Accept ICMPv6 redirect messages");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRTIMEOUT, redirtimeout,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6_redirtimeout), 0,
+ ""); /* XXX unused */
+SYSCTL_VNET_PCPUSTAT(_net_inet6_icmp6, ICMPV6CTL_STATS, stats,
+ struct icmp6stat, icmp6stat,
+ "ICMPv6 statistics (struct icmp6stat, netinet/icmp6.h)");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_PRUNE, nd6_prune,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_prune), 0,
+ "Frequency in seconds of checks for expired prefixes and routers");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DELAY, nd6_delay,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_delay), 0,
+ "Delay in seconds before probing for reachability");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_UMAXTRIES, nd6_umaxtries,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_umaxtries), 0,
+ "Number of ICMPv6 NS messages sent during reachability detection");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MMAXTRIES, nd6_mmaxtries,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_mmaxtries), 0,
+ "Number of ICMPv6 NS messages sent during address resolution");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_USELOOPBACK, nd6_useloopback,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_useloopback), 0,
+ "Create a loopback route when configuring an IPv6 address");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO, nodeinfo,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6_nodeinfo), 0,
+ "Mask of enabled RF4620 node information query types");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO_OLDMCPREFIX,
+ nodeinfo_oldmcprefix, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(icmp6_nodeinfo_oldmcprefix), 0,
+ "Join old IPv6 NI group address in draft-ietf-ipngwg-icmp-name-lookup "
+ "for compatibility with KAME implementation");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ERRPPSLIMIT, errppslimit,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6errppslim), 0,
+ "Maximum number of ICMPv6 error messages per second");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXNUDHINT, nd6_maxnudhint,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_maxnudhint), 0,
+ ""); /* XXX unused */
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DEBUG, nd6_debug,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_debug), 0,
+ "Log NDP debug messages");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_ONLINKNSRFC4861,
+ nd6_onlink_ns_rfc4861, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(nd6_onlink_ns_rfc4861), 0,
+ "Accept 'on-link' ICMPv6 NS messages in compliance with RFC 4861");
diff --git a/freebsd/sys/netinet6/in6_rmx.c b/freebsd/sys/netinet6/in6_rmx.c
index 4c59a1ad..f04e0058 100644
--- a/freebsd/sys/netinet6/in6_rmx.c
+++ b/freebsd/sys/netinet6/in6_rmx.c
@@ -68,7 +68,6 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/kernel.h>
#include <rtems/bsd/sys/lock.h>
-#include <sys/sysctl.h>
#include <sys/queue.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
@@ -78,7 +77,9 @@ __FBSDID("$FreeBSD$");
#include <sys/callout.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/route.h>
+#include <net/route_var.h>
#include <netinet/in.h>
#include <netinet/ip_var.h>
@@ -104,14 +105,12 @@ extern int in6_detachhead(void **head, int off);
* Do what we need to do when inserting a route.
*/
static struct radix_node *
-in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
+in6_addroute(void *v_arg, void *n_arg, struct radix_head *head,
struct radix_node *treenodes)
{
struct rtentry *rt = (struct rtentry *)treenodes;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)rt_key(rt);
- struct radix_node *ret;
- RADIX_NODE_HEAD_WLOCK_ASSERT(head);
if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
rt->rt_flags |= RTF_MULTICAST;
@@ -137,116 +136,69 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
}
}
- if (!rt->rt_rmx.rmx_mtu && rt->rt_ifp)
- rt->rt_rmx.rmx_mtu = IN6_LINKMTU(rt->rt_ifp);
+ if (rt->rt_ifp != NULL) {
- ret = rn_addroute(v_arg, n_arg, head, treenodes);
- if (ret == NULL) {
- struct rtentry *rt2;
/*
- * We are trying to add a net route, but can't.
- * The following case should be allowed, so we'll make a
- * special check for this:
- * Two IPv6 addresses with the same prefix is assigned
- * to a single interrface.
- * # ifconfig if0 inet6 3ffe:0501::1 prefix 64 alias (*1)
- * # ifconfig if0 inet6 3ffe:0501::2 prefix 64 alias (*2)
- * In this case, (*1) and (*2) want to add the same
- * net route entry, 3ffe:0501:: -> if0.
- * This case should not raise an error.
+ * Check route MTU:
+ * inherit interface MTU if not set or
+ * check if MTU is too large.
*/
- rt2 = in6_rtalloc1((struct sockaddr *)sin6, 0, RTF_RNH_LOCKED,
- rt->rt_fibnum);
- if (rt2) {
- if (((rt2->rt_flags & (RTF_HOST|RTF_GATEWAY)) == 0)
- && rt2->rt_gateway
- && rt2->rt_gateway->sa_family == AF_LINK
- && rt2->rt_ifp == rt->rt_ifp) {
- ret = rt2->rt_nodes;
- }
- RTFREE_LOCKED(rt2);
- }
+ if (rt->rt_mtu == 0) {
+ rt->rt_mtu = IN6_LINKMTU(rt->rt_ifp);
+ } else if (rt->rt_mtu > IN6_LINKMTU(rt->rt_ifp))
+ rt->rt_mtu = IN6_LINKMTU(rt->rt_ifp);
}
- return (ret);
-}
-SYSCTL_DECL(_net_inet6_ip6);
-
-static VNET_DEFINE(int, rtq_toomany6) = 128;
- /* 128 cached routes is ``too many'' */
-#define V_rtq_toomany6 VNET(rtq_toomany6)
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_RTMAXCACHE, rtmaxcache, CTLFLAG_RW,
- &VNET_NAME(rtq_toomany6) , 0, "");
-
-struct rtqk_arg {
- struct radix_node_head *rnh;
- int mode;
- int updating;
- int draining;
- int killed;
- int found;
- time_t nextstop;
-};
+ return (rn_addroute(v_arg, n_arg, head, treenodes));
+}
/*
* Age old PMTUs.
*/
struct mtuex_arg {
- struct radix_node_head *rnh;
+ struct rib_head *rnh;
time_t nextstop;
};
static VNET_DEFINE(struct callout, rtq_mtutimer);
#define V_rtq_mtutimer VNET(rtq_mtutimer)
static int
-in6_mtuexpire(struct radix_node *rn, void *rock)
+in6_mtuexpire(struct rtentry *rt, void *rock)
{
- struct rtentry *rt = (struct rtentry *)rn;
struct mtuex_arg *ap = rock;
- /* sanity */
- if (!rt)
- panic("rt == NULL in in6_mtuexpire");
-
- if (rt->rt_rmx.rmx_expire && !(rt->rt_flags & RTF_PROBEMTU)) {
- if (rt->rt_rmx.rmx_expire <= time_uptime) {
+ if (rt->rt_expire && !(rt->rt_flags & RTF_PROBEMTU)) {
+ if (rt->rt_expire <= time_uptime) {
rt->rt_flags |= RTF_PROBEMTU;
} else {
- ap->nextstop = lmin(ap->nextstop,
- rt->rt_rmx.rmx_expire);
+ ap->nextstop = lmin(ap->nextstop, rt->rt_expire);
}
}
- return 0;
+ return (0);
}
#define MTUTIMO_DEFAULT (60*1)
static void
-in6_mtutimo_one(struct radix_node_head *rnh)
+in6_mtutimo_setwa(struct rib_head *rnh, uint32_t fibum, int af,
+ void *_arg)
{
- struct mtuex_arg arg;
+ struct mtuex_arg *arg;
- arg.rnh = rnh;
- arg.nextstop = time_uptime + MTUTIMO_DEFAULT;
- RADIX_NODE_HEAD_LOCK(rnh);
- rnh->rnh_walktree(rnh, in6_mtuexpire, &arg);
- RADIX_NODE_HEAD_UNLOCK(rnh);
+ arg = (struct mtuex_arg *)_arg;
+
+ arg->rnh = rnh;
}
static void
in6_mtutimo(void *rock)
{
CURVNET_SET_QUIET((struct vnet *) rock);
- struct radix_node_head *rnh;
struct timeval atv;
- u_int fibnum;
+ struct mtuex_arg arg;
- for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
- rnh = rt_tables_get_rnh(fibnum, AF_INET6);
- if (rnh != NULL)
- in6_mtutimo_one(rnh);
- }
+ rt_foreach_fib_walk(AF_INET6, in6_mtutimo_setwa, in6_mtuexpire, &arg);
atv.tv_sec = MTUTIMO_DEFAULT;
atv.tv_usec = 0;
@@ -256,10 +208,6 @@ in6_mtutimo(void *rock)
/*
* Initialize our routing tree.
- * XXX MRT When off == 0, we are being called from vfs_export.c
- * so just set up their table and leave. (we know what the correct
- * value should be so just use that).. FIX AFTER RELENG_7 is MFC'd
- * see also comments in in_inithead() vfs_export.c and domain.h
*/
static VNET_DEFINE(int, _in6_rt_was_here);
#define V__in6_rt_was_here VNET(_in6_rt_was_here)
@@ -267,24 +215,22 @@ static VNET_DEFINE(int, _in6_rt_was_here);
int
in6_inithead(void **head, int off)
{
- struct radix_node_head *rnh;
+ struct rib_head *rh;
- if (!rn_inithead(head, offsetof(struct sockaddr_in6, sin6_addr) << 3))
- return 0; /* See above */
+ rh = rt_table_init(offsetof(struct sockaddr_in6, sin6_addr) << 3);
+ if (rh == NULL)
+ return (0);
- if (off == 0) /* See above */
- return 1; /* only do the rest for the real thing */
-
- rnh = *head;
- rnh->rnh_addaddr = in6_addroute;
+ rh->rnh_addaddr = in6_addroute;
+ *head = (void *)rh;
if (V__in6_rt_was_here == 0) {
- callout_init(&V_rtq_mtutimer, CALLOUT_MPSAFE);
+ callout_init(&V_rtq_mtutimer, 1);
in6_mtutimo(curvnet); /* kick off timeout first time */
V__in6_rt_was_here = 1;
}
- return 1;
+ return (1);
}
#ifdef VIMAGE
@@ -293,6 +239,8 @@ in6_detachhead(void **head, int off)
{
callout_drain(&V_rtq_mtutimer);
+ rt_table_destroy((struct rib_head *)(*head));
+
return (1);
}
#endif
diff --git a/freebsd/sys/netinet6/in6_rss.h b/freebsd/sys/netinet6/in6_rss.h
new file mode 100644
index 00000000..f5b48c71
--- /dev/null
+++ b/freebsd/sys/netinet6/in6_rss.h
@@ -0,0 +1,58 @@
+/*-
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * This software was developed by Robert N. M. Watson under contract
+ * to Juniper Networks, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET6_IN6_RSS_H_
+#define _NETINET6_IN6_RSS_H_
+
+#include <netinet/in.h> /* in_addr_t */
+
+/*
+ * Network stack interface to generate a hash for a protocol tuple.
+ */
+uint32_t rss_hash_ip6_4tuple(const struct in6_addr *src, u_short srcport,
+ const struct in6_addr *dst, u_short dstport);
+uint32_t rss_hash_ip6_2tuple(const struct in6_addr *src,
+ const struct in6_addr *dst);
+
+/*
+ * Functions to calculate a software RSS hash for a given mbuf or
+ * packet detail.
+ */
+int rss_mbuf_software_hash_v6(const struct mbuf *m, int dir,
+ uint32_t *hashval, uint32_t *hashtype);
+int rss_proto_software_hash_v6(const struct in6_addr *src,
+ const struct in6_addr *dst, u_short src_port,
+ u_short dst_port, int proto, uint32_t *hashval,
+ uint32_t *hashtype);
+struct mbuf * rss_soft_m2cpuid_v6(struct mbuf *m, uintptr_t source,
+ u_int *cpuid);
+
+#endif /* !_NETINET6_IN6_RSS_H_ */
diff --git a/freebsd/sys/netinet6/in6_src.c b/freebsd/sys/netinet6/in6_src.c
index a69ecf24..2a50a975 100644
--- a/freebsd/sys/netinet6/in6_src.c
+++ b/freebsd/sys/netinet6/in6_src.c
@@ -84,9 +84,11 @@ __FBSDID("$FreeBSD$");
#include <sys/time.h>
#include <sys/jail.h>
#include <sys/kernel.h>
+#include <sys/rmlock.h>
#include <sys/sx.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <net/if_llatbl.h>
@@ -105,6 +107,7 @@ __FBSDID("$FreeBSD$");
#include <netinet6/in6_var.h>
#include <netinet/ip6.h>
+#include <netinet6/in6_fib.h>
#include <netinet6/in6_pcb.h>
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
@@ -133,8 +136,11 @@ static int selectroute(struct sockaddr_in6 *, struct ip6_pktopts *,
struct ip6_moptions *, struct route_in6 *, struct ifnet **,
struct rtentry **, int, u_int);
static int in6_selectif(struct sockaddr_in6 *, struct ip6_pktopts *,
- struct ip6_moptions *, struct route_in6 *ro, struct ifnet **,
+ struct ip6_moptions *, struct ifnet **,
struct ifnet *, u_int);
+static int in6_selectsrc(uint32_t, struct sockaddr_in6 *,
+ struct ip6_pktopts *, struct inpcb *, struct ucred *,
+ struct ifnet **, struct in6_addr *);
static struct in6_addrpolicy *lookup_addrsel_policy(struct sockaddr_in6 *);
@@ -142,7 +148,7 @@ static void init_policy_queue(void);
static int add_addrsel_policyent(struct in6_addrpolicy *);
static int delete_addrsel_policyent(struct in6_addrpolicy *);
static int walk_addrsel_policy(int (*)(struct in6_addrpolicy *, void *),
- void *);
+ void *);
static int dump_addrsel_policyent(struct in6_addrpolicy *, void *);
static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *);
@@ -174,11 +180,12 @@ static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *);
goto out; /* XXX: we can't use 'break' here */ \
} while(0)
-int
-in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
- struct inpcb *inp, struct route_in6 *ro, struct ucred *cred,
+static int
+in6_selectsrc(uint32_t fibnum, struct sockaddr_in6 *dstsock,
+ struct ip6_pktopts *opts, struct inpcb *inp, struct ucred *cred,
struct ifnet **ifpp, struct in6_addr *srcp)
{
+ struct rm_priotracker in6_ifa_tracker;
struct in6_addr dst, tmp;
struct ifnet *ifp = NULL, *oifp = NULL;
struct in6_ifaddr *ia = NULL, *ia_best = NULL;
@@ -221,12 +228,9 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
*/
if (opts && (pi = opts->ip6po_pktinfo) &&
!IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) {
- struct sockaddr_in6 srcsock;
- struct in6_ifaddr *ia6;
-
/* get the outgoing interface */
- if ((error = in6_selectif(dstsock, opts, mopts, ro, &ifp, oifp,
- (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB))
+ if ((error = in6_selectif(dstsock, opts, mopts, &ifp, oifp,
+ fibnum))
!= 0)
return (error);
@@ -237,33 +241,36 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
* the interface must be specified; otherwise, ifa_ifwithaddr()
* will fail matching the address.
*/
- bzero(&srcsock, sizeof(srcsock));
- srcsock.sin6_family = AF_INET6;
- srcsock.sin6_len = sizeof(srcsock);
- srcsock.sin6_addr = pi->ipi6_addr;
+ tmp = pi->ipi6_addr;
if (ifp) {
- error = in6_setscope(&srcsock.sin6_addr, ifp, NULL);
+ error = in6_setscope(&tmp, ifp, &odstzone);
if (error)
return (error);
}
if (cred != NULL && (error = prison_local_ip6(cred,
- &srcsock.sin6_addr, (inp != NULL &&
- (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0)
+ &tmp, (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0)
return (error);
- ia6 = (struct in6_ifaddr *)ifa_ifwithaddr(
- (struct sockaddr *)&srcsock);
- if (ia6 == NULL ||
- (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) {
- if (ia6 != NULL)
- ifa_free(&ia6->ia_ifa);
- return (EADDRNOTAVAIL);
- }
- pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */
+ /*
+ * If IPV6_BINDANY socket option is set, we allow to specify
+ * non local addresses as source address in IPV6_PKTINFO
+ * ancillary data.
+ */
+ if ((inp->inp_flags & INP_BINDANY) == 0) {
+ ia = in6ifa_ifwithaddr(&tmp, 0 /* XXX */);
+ if (ia == NULL || (ia->ia6_flags & (IN6_IFF_ANYCAST |
+ IN6_IFF_NOTREADY))) {
+ if (ia != NULL)
+ ifa_free(&ia->ia_ifa);
+ return (EADDRNOTAVAIL);
+ }
+ bcopy(&ia->ia_addr.sin6_addr, srcp, sizeof(*srcp));
+ ifa_free(&ia->ia_ifa);
+ } else
+ bcopy(&tmp, srcp, sizeof(*srcp));
+ pi->ipi6_addr = tmp; /* XXX: this overrides pi */
if (ifpp)
*ifpp = ifp;
- bcopy(&ia6->ia_addr.sin6_addr, srcp, sizeof(*srcp));
- ifa_free(&ia6->ia_ifa);
return (0);
}
@@ -291,7 +298,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
* the outgoing interface and the destination address.
*/
/* get the outgoing interface */
- if ((error = in6_selectif(dstsock, opts, mopts, ro, &ifp, oifp,
+ if ((error = in6_selectif(dstsock, opts, mopts, &ifp, oifp,
(inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB)) != 0)
return (error);
@@ -304,7 +311,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
return (error);
rule = 0;
- IN6_IFADDR_RLOCK();
+ IN6_IFADDR_RLOCK(&in6_ifa_tracker);
TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
int new_scope = -1, new_matchlen = -1;
struct in6_addrpolicy *new_policy = NULL;
@@ -445,6 +452,14 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
REPLACE(8);
/*
+ * Rule 9: prefer address with better virtual status.
+ */
+ if (ifa_preferred(&ia_best->ia_ifa, &ia->ia_ifa))
+ REPLACE(9);
+ if (ifa_preferred(&ia->ia_ifa, &ia_best->ia_ifa))
+ NEXT(9);
+
+ /*
* Rule 10: prefer address with `prefer_source' flag.
*/
if ((ia_best->ia6_flags & IN6_IFF_PREFER_SOURCE) == 0 &&
@@ -494,7 +509,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
}
if ((ia = ia_best) == NULL) {
- IN6_IFADDR_RUNLOCK();
+ IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
IP6STAT_INC(ip6s_sources_none);
return (EADDRNOTAVAIL);
}
@@ -511,7 +526,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
tmp = ia->ia_addr.sin6_addr;
if (cred != NULL && prison_local_ip6(cred, &tmp, (inp != NULL &&
(inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0) {
- IN6_IFADDR_RUNLOCK();
+ IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
IP6STAT_INC(ip6s_sources_none);
return (EADDRNOTAVAIL);
}
@@ -530,11 +545,84 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
IP6STAT_INC(ip6s_sources_otherscope[best_scope]);
if (IFA6_IS_DEPRECATED(ia))
IP6STAT_INC(ip6s_sources_deprecated[best_scope]);
- IN6_IFADDR_RUNLOCK();
+ IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
return (0);
}
/*
+ * Select source address based on @inp, @dstsock and @opts.
+ * Stores selected address to @srcp. If @scope_ambiguous is set,
+ * embed scope from selected outgoing interface. If @hlim pointer
+ * is provided, stores calculated hop limit there.
+ * Returns 0 on success.
+ */
+int
+in6_selectsrc_socket(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
+ struct inpcb *inp, struct ucred *cred, int scope_ambiguous,
+ struct in6_addr *srcp, int *hlim)
+{
+ struct ifnet *retifp;
+ uint32_t fibnum;
+ int error;
+
+ fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB;
+ retifp = NULL;
+
+ error = in6_selectsrc(fibnum, dstsock, opts, inp, cred, &retifp, srcp);
+ if (error != 0)
+ return (error);
+
+ if (hlim != NULL)
+ *hlim = in6_selecthlim(inp, retifp);
+
+ if (retifp == NULL || scope_ambiguous == 0)
+ return (0);
+
+ /*
+ * Application should provide a proper zone ID or the use of
+ * default zone IDs should be enabled. Unfortunately, some
+ * applications do not behave as it should, so we need a
+ * workaround. Even if an appropriate ID is not determined
+ * (when it's required), if we can determine the outgoing
+ * interface. determine the zone ID based on the interface.
+ */
+ error = in6_setscope(&dstsock->sin6_addr, retifp, NULL);
+
+ return (error);
+}
+
+/*
+ * Select source address based on @fibnum, @dst and @scopeid.
+ * Stores selected address to @srcp.
+ * Returns 0 on success.
+ *
+ * Used by non-socket based consumers (ND code mostly)
+ */
+int
+in6_selectsrc_addr(uint32_t fibnum, const struct in6_addr *dst,
+ uint32_t scopeid, struct ifnet *ifp, struct in6_addr *srcp,
+ int *hlim)
+{
+ struct ifnet *retifp;
+ struct sockaddr_in6 dst_sa;
+ int error;
+
+ retifp = ifp;
+ bzero(&dst_sa, sizeof(dst_sa));
+ dst_sa.sin6_family = AF_INET6;
+ dst_sa.sin6_len = sizeof(dst_sa);
+ dst_sa.sin6_addr = *dst;
+ dst_sa.sin6_scope_id = scopeid;
+ sa6_embedscope(&dst_sa, 0);
+
+ error = in6_selectsrc(fibnum, &dst_sa, NULL, NULL, NULL, &retifp, srcp);
+ if (hlim != NULL)
+ *hlim = in6_selecthlim(NULL, retifp);
+
+ return (error);
+}
+
+/*
* clone - meaningful only for bsdi and freebsd
*/
static int
@@ -548,6 +636,7 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
struct sockaddr_in6 *sin6_next;
struct in6_pktinfo *pi = NULL;
struct in6_addr *dst = &dstsock->sin6_addr;
+ uint32_t zoneid;
#if 0
char ip6buf[INET6_ADDRSTRLEN];
@@ -578,7 +667,6 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
} else
goto getroute;
}
-
/*
* If the destination address is a multicast address and the outgoing
* interface for the address is specified by the caller, use it.
@@ -587,6 +675,18 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) {
goto done; /* we do not need a route for multicast. */
}
+ /*
+ * If destination address is LLA or link- or node-local multicast,
+ * use it's embedded scope zone id to determine outgoing interface.
+ */
+ if (IN6_IS_ADDR_MC_LINKLOCAL(dst) ||
+ IN6_IS_ADDR_MC_NODELOCAL(dst)) {
+ zoneid = ntohs(in6_getscope(dst));
+ if (zoneid > 0) {
+ ifp = in6_getlinkifnet(zoneid);
+ goto done;
+ }
+ }
getroute:
/*
@@ -595,81 +695,38 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
*/
if (opts && opts->ip6po_nexthop) {
struct route_in6 *ron;
- struct llentry *la;
-
- sin6_next = satosin6(opts->ip6po_nexthop);
-
- /* at this moment, we only support AF_INET6 next hops */
- if (sin6_next->sin6_family != AF_INET6) {
- error = EAFNOSUPPORT; /* or should we proceed? */
- goto done;
- }
-
- /*
- * If the next hop is an IPv6 address, then the node identified
- * by that address must be a neighbor of the sending host.
- */
- ron = &opts->ip6po_nextroute;
- /*
- * XXX what do we do here?
- * PLZ to be fixing
- */
-
- if (ron->ro_rt == NULL) {
- in6_rtalloc(ron, fibnum); /* multi path case? */
- if (ron->ro_rt == NULL) {
- if (ron->ro_rt) {
- RTFREE(ron->ro_rt);
- ron->ro_rt = NULL;
- }
- error = EHOSTUNREACH;
+ sin6_next = satosin6(opts->ip6po_nexthop);
+ if (IN6_IS_ADDR_LINKLOCAL(&sin6_next->sin6_addr)) {
+ /*
+ * Next hop is LLA, thus it should be neighbor.
+ * Determine outgoing interface by zone index.
+ */
+ zoneid = ntohs(in6_getscope(&sin6_next->sin6_addr));
+ if (zoneid > 0) {
+ ifp = in6_getlinkifnet(zoneid);
goto done;
- }
- }
-
- rt = ron->ro_rt;
- ifp = rt->rt_ifp;
- IF_AFDATA_RLOCK(ifp);
- la = lla_lookup(LLTABLE6(ifp), 0, (struct sockaddr *)&sin6_next->sin6_addr);
- IF_AFDATA_RUNLOCK(ifp);
- if (la != NULL)
- LLE_RUNLOCK(la);
- else {
- error = EHOSTUNREACH;
- goto done;
- }
-#if 0
- if ((ron->ro_rt &&
- (ron->ro_rt->rt_flags & (RTF_UP | RTF_LLINFO)) !=
- (RTF_UP | RTF_LLINFO)) ||
- !IN6_ARE_ADDR_EQUAL(&satosin6(&ron->ro_dst)->sin6_addr,
- &sin6_next->sin6_addr)) {
- if (ron->ro_rt) {
- RTFREE(ron->ro_rt);
- ron->ro_rt = NULL;
}
- *satosin6(&ron->ro_dst) = *sin6_next;
}
+ ron = &opts->ip6po_nextroute;
+ /* Use a cached route if it exists and is valid. */
+ if (ron->ro_rt != NULL && (
+ (ron->ro_rt->rt_flags & RTF_UP) == 0 ||
+ ron->ro_dst.sin6_family != AF_INET6 ||
+ !IN6_ARE_ADDR_EQUAL(&ron->ro_dst.sin6_addr,
+ &sin6_next->sin6_addr)))
+ RO_RTFREE(ron);
if (ron->ro_rt == NULL) {
+ ron->ro_dst = *sin6_next;
in6_rtalloc(ron, fibnum); /* multi path case? */
- if (ron->ro_rt == NULL ||
- !(ron->ro_rt->rt_flags & RTF_LLINFO)) {
- if (ron->ro_rt) {
- RTFREE(ron->ro_rt);
- ron->ro_rt = NULL;
- }
- error = EHOSTUNREACH;
- goto done;
- }
}
-#endif
-
/*
- * When cloning is required, try to allocate a route to the
- * destination so that the caller can store path MTU
- * information.
+ * The node identified by that address must be a
+ * neighbor of the sending host.
*/
+ if (ron->ro_rt == NULL ||
+ (ron->ro_rt->rt_flags & RTF_GATEWAY) != 0)
+ error = EHOSTUNREACH;
goto done;
}
@@ -782,24 +839,27 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
static int
in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
- struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp,
+ struct ip6_moptions *mopts, struct ifnet **retifp,
struct ifnet *oifp, u_int fibnum)
{
int error;
struct route_in6 sro;
struct rtentry *rt = NULL;
+ int rt_flags;
KASSERT(retifp != NULL, ("%s: retifp is NULL", __func__));
- if (ro == NULL) {
- bzero(&sro, sizeof(sro));
- ro = &sro;
- }
+ bzero(&sro, sizeof(sro));
+ rt_flags = 0;
+
+ error = selectroute(dstsock, opts, mopts, &sro, retifp, &rt, 1, fibnum);
- if ((error = selectroute(dstsock, opts, mopts, ro, retifp,
- &rt, 1, fibnum)) != 0) {
- if (ro == &sro && rt && rt == sro.ro_rt)
- RTFREE(rt);
+ if (rt)
+ rt_flags = rt->rt_flags;
+ if (rt && rt == sro.ro_rt)
+ RTFREE(rt);
+
+ if (error != 0) {
/* Help ND. See oifp comment in in6_selectsrc(). */
if (oifp != NULL && fibnum == RT_DEFAULT_FIB) {
*retifp = oifp;
@@ -825,16 +885,12 @@ in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
* Although this may not be very harmful, it should still be confusing.
* We thus reject the case here.
*/
- if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) {
- int flags = (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
- if (ro == &sro && rt && rt == sro.ro_rt)
- RTFREE(rt);
- return (flags);
+ if (rt_flags & (RTF_REJECT | RTF_BLACKHOLE)) {
+ error = (rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
+ return (error);
}
- if (ro == &sro && rt && rt == sro.ro_rt)
- RTFREE(rt);
return (0);
}
@@ -882,19 +938,16 @@ in6_selecthlim(struct inpcb *in6p, struct ifnet *ifp)
else if (ifp)
return (ND_IFINFO(ifp)->chlim);
else if (in6p && !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) {
- struct route_in6 ro6;
- struct ifnet *lifp;
-
- bzero(&ro6, sizeof(ro6));
- ro6.ro_dst.sin6_family = AF_INET6;
- ro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6);
- ro6.ro_dst.sin6_addr = in6p->in6p_faddr;
- in6_rtalloc(&ro6, in6p->inp_inc.inc_fibnum);
- if (ro6.ro_rt) {
- lifp = ro6.ro_rt->rt_ifp;
- RTFREE(ro6.ro_rt);
- if (lifp)
- return (ND_IFINFO(lifp)->chlim);
+ struct nhop6_basic nh6;
+ struct in6_addr dst;
+ uint32_t fibnum, scopeid;
+ int hlim;
+
+ fibnum = in6p->inp_inc.inc_fibnum;
+ in6_splitscope(&in6p->in6p_faddr, &dst, &scopeid);
+ if (fib6_lookup_nh_basic(fibnum, &dst, scopeid, 0, 0, &nh6)==0){
+ hlim = ND_IFINFO(nh6.nh_ifp)->chlim;
+ return (hlim);
}
}
return (V_ip6_defhlim);
@@ -1005,7 +1058,6 @@ in6_src_sysctl(SYSCTL_HANDLER_ARGS)
int
in6_src_ioctl(u_long cmd, caddr_t data)
{
- int i;
struct in6_addrpolicy ent0;
if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY)
@@ -1019,10 +1071,7 @@ in6_src_ioctl(u_long cmd, caddr_t data)
if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0)
return (EINVAL);
/* clear trailing garbages (if any) of the prefix address. */
- for (i = 0; i < 4; i++) {
- ent0.addr.sin6_addr.s6_addr32[i] &=
- ent0.addrmask.sin6_addr.s6_addr32[i];
- }
+ IN6_MASK_ADDR(&ent0.addr.sin6_addr, &ent0.addrmask.sin6_addr);
ent0.use = 0;
switch (cmd) {
@@ -1125,8 +1174,7 @@ delete_addrsel_policyent(struct in6_addrpolicy *key)
}
static int
-walk_addrsel_policy(int (*callback)(struct in6_addrpolicy *, void *),
- void *w)
+walk_addrsel_policy(int (*callback)(struct in6_addrpolicy *, void *), void *w)
{
struct addrsel_policyent *pol;
int error = 0;
diff --git a/freebsd/sys/netinet6/in6_var.h b/freebsd/sys/netinet6/in6_var.h
index 90530a68..77e5920b 100644
--- a/freebsd/sys/netinet6/in6_var.h
+++ b/freebsd/sys/netinet6/in6_var.h
@@ -65,8 +65,10 @@
#define _NETINET6_IN6_VAR_H_
#include <sys/tree.h>
+#include <sys/counter.h>
#ifdef _KERNEL
+#include <sys/fnv_hash.h>
#include <sys/libkern.h>
#endif
@@ -95,19 +97,20 @@ struct in6_addrlifetime {
struct nd_ifinfo;
struct scope6_id;
struct lltable;
-struct mld_ifinfo;
+struct mld_ifsoftc;
struct in6_ifextra {
- struct in6_ifstat *in6_ifstat;
- struct icmp6_ifstat *icmp6_ifstat;
+ counter_u64_t *in6_ifstat;
+ counter_u64_t *icmp6_ifstat;
struct nd_ifinfo *nd_ifinfo;
struct scope6_id *scope6_id;
struct lltable *lltable;
- struct mld_ifinfo *mld_ifinfo;
+ struct mld_ifsoftc *mld_ifinfo;
};
#define LLTABLE6(ifp) (((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->lltable)
+#ifdef _KERNEL
struct in6_ifaddr {
struct ifaddr ia_ifa; /* protocol-independent info */
#define ia_ifp ia_ifa.ifa_ifp
@@ -131,10 +134,14 @@ struct in6_ifaddr {
/* multicast addresses joined from the kernel */
LIST_HEAD(, in6_multi_mship) ia6_memberships;
+ /* entry in bucket of inet6 addresses */
+ LIST_ENTRY(in6_ifaddr) ia6_hash;
};
/* List of in6_ifaddr's. */
TAILQ_HEAD(in6_ifaddrhead, in6_ifaddr);
+LIST_HEAD(in6_ifaddrlisthead, in6_ifaddr);
+#endif /* _KERNEL */
/* control structure to manage address selection policy */
struct in6_addrpolicy {
@@ -149,37 +156,37 @@ struct in6_addrpolicy {
* IPv6 interface statistics, as defined in RFC2465 Ipv6IfStatsEntry (p12).
*/
struct in6_ifstat {
- u_quad_t ifs6_in_receive; /* # of total input datagram */
- u_quad_t ifs6_in_hdrerr; /* # of datagrams with invalid hdr */
- u_quad_t ifs6_in_toobig; /* # of datagrams exceeded MTU */
- u_quad_t ifs6_in_noroute; /* # of datagrams with no route */
- u_quad_t ifs6_in_addrerr; /* # of datagrams with invalid dst */
- u_quad_t ifs6_in_protounknown; /* # of datagrams with unknown proto */
+ uint64_t ifs6_in_receive; /* # of total input datagram */
+ uint64_t ifs6_in_hdrerr; /* # of datagrams with invalid hdr */
+ uint64_t ifs6_in_toobig; /* # of datagrams exceeded MTU */
+ uint64_t ifs6_in_noroute; /* # of datagrams with no route */
+ uint64_t ifs6_in_addrerr; /* # of datagrams with invalid dst */
+ uint64_t ifs6_in_protounknown; /* # of datagrams with unknown proto */
/* NOTE: increment on final dst if */
- u_quad_t ifs6_in_truncated; /* # of truncated datagrams */
- u_quad_t ifs6_in_discard; /* # of discarded datagrams */
+ uint64_t ifs6_in_truncated; /* # of truncated datagrams */
+ uint64_t ifs6_in_discard; /* # of discarded datagrams */
/* NOTE: fragment timeout is not here */
- u_quad_t ifs6_in_deliver; /* # of datagrams delivered to ULP */
+ uint64_t ifs6_in_deliver; /* # of datagrams delivered to ULP */
/* NOTE: increment on final dst if */
- u_quad_t ifs6_out_forward; /* # of datagrams forwarded */
+ uint64_t ifs6_out_forward; /* # of datagrams forwarded */
/* NOTE: increment on outgoing if */
- u_quad_t ifs6_out_request; /* # of outgoing datagrams from ULP */
+ uint64_t ifs6_out_request; /* # of outgoing datagrams from ULP */
/* NOTE: does not include forwrads */
- u_quad_t ifs6_out_discard; /* # of discarded datagrams */
- u_quad_t ifs6_out_fragok; /* # of datagrams fragmented */
- u_quad_t ifs6_out_fragfail; /* # of datagrams failed on fragment */
- u_quad_t ifs6_out_fragcreat; /* # of fragment datagrams */
+ uint64_t ifs6_out_discard; /* # of discarded datagrams */
+ uint64_t ifs6_out_fragok; /* # of datagrams fragmented */
+ uint64_t ifs6_out_fragfail; /* # of datagrams failed on fragment */
+ uint64_t ifs6_out_fragcreat; /* # of fragment datagrams */
/* NOTE: this is # after fragment */
- u_quad_t ifs6_reass_reqd; /* # of incoming fragmented packets */
+ uint64_t ifs6_reass_reqd; /* # of incoming fragmented packets */
/* NOTE: increment on final dst if */
- u_quad_t ifs6_reass_ok; /* # of reassembled packets */
+ uint64_t ifs6_reass_ok; /* # of reassembled packets */
/* NOTE: this is # after reass */
/* NOTE: increment on final dst if */
- u_quad_t ifs6_reass_fail; /* # of reass failures */
+ uint64_t ifs6_reass_fail; /* # of reass failures */
/* NOTE: may not be packet count */
/* NOTE: increment on final dst if */
- u_quad_t ifs6_in_mcast; /* # of inbound multicast datagrams */
- u_quad_t ifs6_out_mcast; /* # of outbound multicast datagrams */
+ uint64_t ifs6_in_mcast; /* # of inbound multicast datagrams */
+ uint64_t ifs6_out_mcast; /* # of outbound multicast datagrams */
};
/*
@@ -191,77 +198,77 @@ struct icmp6_ifstat {
* Input statistics
*/
/* ipv6IfIcmpInMsgs, total # of input messages */
- u_quad_t ifs6_in_msg;
+ uint64_t ifs6_in_msg;
/* ipv6IfIcmpInErrors, # of input error messages */
- u_quad_t ifs6_in_error;
+ uint64_t ifs6_in_error;
/* ipv6IfIcmpInDestUnreachs, # of input dest unreach errors */
- u_quad_t ifs6_in_dstunreach;
+ uint64_t ifs6_in_dstunreach;
/* ipv6IfIcmpInAdminProhibs, # of input administratively prohibited errs */
- u_quad_t ifs6_in_adminprohib;
+ uint64_t ifs6_in_adminprohib;
/* ipv6IfIcmpInTimeExcds, # of input time exceeded errors */
- u_quad_t ifs6_in_timeexceed;
+ uint64_t ifs6_in_timeexceed;
/* ipv6IfIcmpInParmProblems, # of input parameter problem errors */
- u_quad_t ifs6_in_paramprob;
+ uint64_t ifs6_in_paramprob;
/* ipv6IfIcmpInPktTooBigs, # of input packet too big errors */
- u_quad_t ifs6_in_pkttoobig;
+ uint64_t ifs6_in_pkttoobig;
/* ipv6IfIcmpInEchos, # of input echo requests */
- u_quad_t ifs6_in_echo;
+ uint64_t ifs6_in_echo;
/* ipv6IfIcmpInEchoReplies, # of input echo replies */
- u_quad_t ifs6_in_echoreply;
+ uint64_t ifs6_in_echoreply;
/* ipv6IfIcmpInRouterSolicits, # of input router solicitations */
- u_quad_t ifs6_in_routersolicit;
+ uint64_t ifs6_in_routersolicit;
/* ipv6IfIcmpInRouterAdvertisements, # of input router advertisements */
- u_quad_t ifs6_in_routeradvert;
+ uint64_t ifs6_in_routeradvert;
/* ipv6IfIcmpInNeighborSolicits, # of input neighbor solicitations */
- u_quad_t ifs6_in_neighborsolicit;
+ uint64_t ifs6_in_neighborsolicit;
/* ipv6IfIcmpInNeighborAdvertisements, # of input neighbor advertisements */
- u_quad_t ifs6_in_neighboradvert;
+ uint64_t ifs6_in_neighboradvert;
/* ipv6IfIcmpInRedirects, # of input redirects */
- u_quad_t ifs6_in_redirect;
+ uint64_t ifs6_in_redirect;
/* ipv6IfIcmpInGroupMembQueries, # of input MLD queries */
- u_quad_t ifs6_in_mldquery;
+ uint64_t ifs6_in_mldquery;
/* ipv6IfIcmpInGroupMembResponses, # of input MLD reports */
- u_quad_t ifs6_in_mldreport;
+ uint64_t ifs6_in_mldreport;
/* ipv6IfIcmpInGroupMembReductions, # of input MLD done */
- u_quad_t ifs6_in_mlddone;
+ uint64_t ifs6_in_mlddone;
/*
* Output statistics. We should solve unresolved routing problem...
*/
/* ipv6IfIcmpOutMsgs, total # of output messages */
- u_quad_t ifs6_out_msg;
+ uint64_t ifs6_out_msg;
/* ipv6IfIcmpOutErrors, # of output error messages */
- u_quad_t ifs6_out_error;
+ uint64_t ifs6_out_error;
/* ipv6IfIcmpOutDestUnreachs, # of output dest unreach errors */
- u_quad_t ifs6_out_dstunreach;
+ uint64_t ifs6_out_dstunreach;
/* ipv6IfIcmpOutAdminProhibs, # of output administratively prohibited errs */
- u_quad_t ifs6_out_adminprohib;
+ uint64_t ifs6_out_adminprohib;
/* ipv6IfIcmpOutTimeExcds, # of output time exceeded errors */
- u_quad_t ifs6_out_timeexceed;
+ uint64_t ifs6_out_timeexceed;
/* ipv6IfIcmpOutParmProblems, # of output parameter problem errors */
- u_quad_t ifs6_out_paramprob;
+ uint64_t ifs6_out_paramprob;
/* ipv6IfIcmpOutPktTooBigs, # of output packet too big errors */
- u_quad_t ifs6_out_pkttoobig;
+ uint64_t ifs6_out_pkttoobig;
/* ipv6IfIcmpOutEchos, # of output echo requests */
- u_quad_t ifs6_out_echo;
+ uint64_t ifs6_out_echo;
/* ipv6IfIcmpOutEchoReplies, # of output echo replies */
- u_quad_t ifs6_out_echoreply;
+ uint64_t ifs6_out_echoreply;
/* ipv6IfIcmpOutRouterSolicits, # of output router solicitations */
- u_quad_t ifs6_out_routersolicit;
+ uint64_t ifs6_out_routersolicit;
/* ipv6IfIcmpOutRouterAdvertisements, # of output router advertisements */
- u_quad_t ifs6_out_routeradvert;
+ uint64_t ifs6_out_routeradvert;
/* ipv6IfIcmpOutNeighborSolicits, # of output neighbor solicitations */
- u_quad_t ifs6_out_neighborsolicit;
+ uint64_t ifs6_out_neighborsolicit;
/* ipv6IfIcmpOutNeighborAdvertisements, # of output neighbor advertisements */
- u_quad_t ifs6_out_neighboradvert;
+ uint64_t ifs6_out_neighboradvert;
/* ipv6IfIcmpOutRedirects, # of output redirects */
- u_quad_t ifs6_out_redirect;
+ uint64_t ifs6_out_redirect;
/* ipv6IfIcmpOutGroupMembQueries, # of output MLD queries */
- u_quad_t ifs6_out_mldquery;
+ uint64_t ifs6_out_mldquery;
/* ipv6IfIcmpOutGroupMembResponses, # of output MLD reports */
- u_quad_t ifs6_out_mldreport;
+ uint64_t ifs6_out_mldreport;
/* ipv6IfIcmpOutGroupMembReductions, # of output MLD done */
- u_quad_t ifs6_out_mlddone;
+ uint64_t ifs6_out_mlddone;
};
struct in6_ifreq {
@@ -287,6 +294,17 @@ struct in6_aliasreq {
struct sockaddr_in6 ifra_prefixmask;
int ifra_flags;
struct in6_addrlifetime ifra_lifetime;
+ int ifra_vhid;
+};
+
+/* pre-10.x compat */
+struct oin6_aliasreq {
+ char ifra_name[IFNAMSIZ];
+ struct sockaddr_in6 ifra_addr;
+ struct sockaddr_in6 ifra_dstaddr;
+ struct sockaddr_in6 ifra_prefixmask;
+ int ifra_flags;
+ struct in6_addrlifetime ifra_lifetime;
};
/* prefix type macro */
@@ -391,6 +409,12 @@ struct in6_rrenumreq {
(((d)->s6_addr32[1] ^ (a)->s6_addr32[1]) & (m)->s6_addr32[1]) == 0 && \
(((d)->s6_addr32[2] ^ (a)->s6_addr32[2]) & (m)->s6_addr32[2]) == 0 && \
(((d)->s6_addr32[3] ^ (a)->s6_addr32[3]) & (m)->s6_addr32[3]) == 0 )
+#define IN6_MASK_ADDR(a, m) do { \
+ (a)->s6_addr32[0] &= (m)->s6_addr32[0]; \
+ (a)->s6_addr32[1] &= (m)->s6_addr32[1]; \
+ (a)->s6_addr32[2] &= (m)->s6_addr32[2]; \
+ (a)->s6_addr32[3] &= (m)->s6_addr32[3]; \
+} while (0)
#endif
#define SIOCSIFADDR_IN6 _IOW('i', 12, struct in6_ifreq)
@@ -409,7 +433,8 @@ struct in6_rrenumreq {
#define SIOCGIFNETMASK_IN6 _IOWR('i', 37, struct in6_ifreq)
#define SIOCDIFADDR_IN6 _IOW('i', 25, struct in6_ifreq)
-#define SIOCAIFADDR_IN6 _IOW('i', 26, struct in6_aliasreq)
+#define OSIOCAIFADDR_IN6 _IOW('i', 26, struct oin6_aliasreq)
+#define SIOCAIFADDR_IN6 _IOW('i', 27, struct in6_aliasreq)
#define SIOCSIFPHYADDR_IN6 _IOW('i', 70, struct in6_aliasreq)
#define SIOCGIFPSRCADDR_IN6 _IOWR('i', 71, struct in6_ifreq)
@@ -417,11 +442,6 @@ struct in6_rrenumreq {
#define SIOCGIFAFLAG_IN6 _IOWR('i', 73, struct in6_ifreq)
-#define SIOCGDRLST_IN6 _IOWR('i', 74, struct in6_drlist)
-#ifdef _KERNEL
-/* XXX: SIOCGPRLST_IN6 is exposed in KAME but in6_oprlist is not. */
-#define SIOCGPRLST_IN6 _IOWR('i', 75, struct in6_oprlist)
-#endif
#ifdef _KERNEL
#define OSIOCGIFINFO_IN6 _IOWR('i', 76, struct in6_ondireq)
#endif
@@ -433,7 +453,6 @@ struct in6_rrenumreq {
#define SIOCSRTRFLUSH_IN6 _IOWR('i', 80, struct in6_ifreq)
#define SIOCGIFALIFETIME_IN6 _IOWR('i', 81, struct in6_ifreq)
-#define SIOCSIFALIFETIME_IN6 _IOWR('i', 82, struct in6_ifreq)
#define SIOCGIFSTAT_IN6 _IOWR('i', 83, struct in6_ifreq)
#define SIOCGIFSTAT_ICMP6 _IOWR('i', 84, struct in6_ifreq)
@@ -469,14 +488,11 @@ struct in6_rrenumreq {
#define IN6_IFF_DETACHED 0x08 /* may be detached from the link */
#define IN6_IFF_DEPRECATED 0x10 /* deprecated address */
#define IN6_IFF_NODAD 0x20 /* don't perform DAD on this address
- * (used only at first SIOC* call)
+ * (obsolete)
*/
#define IN6_IFF_AUTOCONF 0x40 /* autoconfigurable address. */
#define IN6_IFF_TEMPORARY 0x80 /* temporary (anonymous) address. */
#define IN6_IFF_PREFER_SOURCE 0x0100 /* preferred address for SAS */
-#define IN6_IFF_NOPFX 0x8000 /* skip kernel prefix management.
- * XXX: this should be temporary.
- */
/* do not input/output */
#define IN6_IFF_NOTREADY (IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED)
@@ -488,26 +504,45 @@ struct in6_rrenumreq {
#ifdef _KERNEL
VNET_DECLARE(struct in6_ifaddrhead, in6_ifaddrhead);
+VNET_DECLARE(struct in6_ifaddrlisthead *, in6_ifaddrhashtbl);
+VNET_DECLARE(u_long, in6_ifaddrhmask);
#define V_in6_ifaddrhead VNET(in6_ifaddrhead)
+#define V_in6_ifaddrhashtbl VNET(in6_ifaddrhashtbl)
+#define V_in6_ifaddrhmask VNET(in6_ifaddrhmask)
+
+#define IN6ADDR_NHASH_LOG2 8
+#define IN6ADDR_NHASH (1 << IN6ADDR_NHASH_LOG2)
+#define IN6ADDR_HASHVAL(x) (in6_addrhash(x))
+#define IN6ADDR_HASH(x) \
+ (&V_in6_ifaddrhashtbl[IN6ADDR_HASHVAL(x) & V_in6_ifaddrhmask])
+
+static __inline uint32_t
+in6_addrhash(const struct in6_addr *in6)
+{
+ uint32_t x;
+
+ x = in6->s6_addr32[0] ^ in6->s6_addr32[1] ^ in6->s6_addr32[2] ^
+ in6->s6_addr32[3];
+ return (fnv_32_buf(&x, sizeof(x), FNV1_32_INIT));
+}
+
+extern struct rmlock in6_ifaddr_lock;
+#define IN6_IFADDR_LOCK_ASSERT() rm_assert(&in6_ifaddr_lock, RA_LOCKED)
+#define IN6_IFADDR_RLOCK(t) rm_rlock(&in6_ifaddr_lock, (t))
+#define IN6_IFADDR_RLOCK_ASSERT() rm_assert(&in6_ifaddr_lock, RA_RLOCKED)
+#define IN6_IFADDR_RUNLOCK(t) rm_runlock(&in6_ifaddr_lock, (t))
+#define IN6_IFADDR_WLOCK() rm_wlock(&in6_ifaddr_lock)
+#define IN6_IFADDR_WLOCK_ASSERT() rm_assert(&in6_ifaddr_lock, RA_WLOCKED)
+#define IN6_IFADDR_WUNLOCK() rm_wunlock(&in6_ifaddr_lock)
-extern struct rwlock in6_ifaddr_lock;
-#define IN6_IFADDR_LOCK_ASSERT( ) rw_assert(&in6_ifaddr_lock, RA_LOCKED)
-#define IN6_IFADDR_RLOCK() rw_rlock(&in6_ifaddr_lock)
-#define IN6_IFADDR_RLOCK_ASSERT() rw_assert(&in6_ifaddr_lock, RA_RLOCKED)
-#define IN6_IFADDR_RUNLOCK() rw_runlock(&in6_ifaddr_lock)
-#define IN6_IFADDR_WLOCK() rw_wlock(&in6_ifaddr_lock)
-#define IN6_IFADDR_WLOCK_ASSERT() rw_assert(&in6_ifaddr_lock, RA_WLOCKED)
-#define IN6_IFADDR_WUNLOCK() rw_wunlock(&in6_ifaddr_lock)
-
-VNET_DECLARE(struct icmp6stat, icmp6stat);
-#define V_icmp6stat VNET(icmp6stat)
#define in6_ifstat_inc(ifp, tag) \
do { \
if (ifp) \
- ((struct in6_ifextra *)((ifp)->if_afdata[AF_INET6]))->in6_ifstat->tag++; \
+ counter_u64_add(((struct in6_ifextra *) \
+ ((ifp)->if_afdata[AF_INET6]))->in6_ifstat[ \
+ offsetof(struct in6_ifstat, tag) / sizeof(uint64_t)], 1);\
} while (/*CONSTCOND*/ 0)
-extern struct in6_addr zeroin6_addr;
extern u_char inet6ctlerrmap[];
VNET_DECLARE(unsigned long, in6_maxmtu);
#define V_in6_maxmtu VNET(in6_maxmtu)
@@ -552,7 +587,6 @@ ip6_msource_cmp(const struct ip6_msource *a, const struct ip6_msource *b)
return (memcmp(&a->im6s_addr, &b->im6s_addr, sizeof(struct in6_addr)));
}
RB_PROTOTYPE(ip6_msource_tree, ip6_msource, im6s_link, ip6_msource_cmp);
-#endif /* _KERNEL */
/*
* IPv6 multicast PCB-layer group filter descriptor.
@@ -603,12 +637,12 @@ struct in6_multi {
u_int in6m_timer; /* MLD6 listener report timer */
/* New fields for MLDv2 follow. */
- struct mld_ifinfo *in6m_mli; /* MLD info */
+ struct mld_ifsoftc *in6m_mli; /* MLD info */
SLIST_ENTRY(in6_multi) in6m_nrele; /* to-be-released by MLD */
struct ip6_msource_tree in6m_srcs; /* tree of sources */
u_long in6m_nsrc; /* # of tree entries */
- struct ifqueue in6m_scq; /* queue of pending
+ struct mbufq in6m_scq; /* queue of pending
* state-change packets */
struct timeval in6m_lastgsrtv; /* last G-S-R query */
uint16_t in6m_sctimer; /* state-change timer */
@@ -652,8 +686,6 @@ im6s_get_mode(const struct in6_multi *inm, const struct ip6_msource *ims,
return (MCAST_UNDEFINED);
}
-#ifdef _KERNEL
-
/*
* Lock macros for IPv6 layer multicast address lists. IPv6 lock goes
* before link layer multicast locks in the lock order. In most cases,
@@ -756,18 +788,20 @@ int in6_control(struct socket *, u_long, caddr_t, struct ifnet *,
struct thread *);
int in6_update_ifa(struct ifnet *, struct in6_aliasreq *,
struct in6_ifaddr *, int);
+void in6_prepare_ifra(struct in6_aliasreq *, const struct in6_addr *,
+ const struct in6_addr *);
void in6_purgeaddr(struct ifaddr *);
int in6if_do_dad(struct ifnet *);
-void in6_purgeif(struct ifnet *);
void in6_savemkludge(struct in6_ifaddr *);
void *in6_domifattach(struct ifnet *);
void in6_domifdetach(struct ifnet *, void *);
+int in6_domifmtu(struct ifnet *);
void in6_setmaxmtu(void);
int in6_if2idlen(struct ifnet *);
struct in6_ifaddr *in6ifa_ifpforlinklocal(struct ifnet *, int);
-struct in6_ifaddr *in6ifa_ifpwithaddr(struct ifnet *, struct in6_addr *);
+struct in6_ifaddr *in6ifa_ifpwithaddr(struct ifnet *, const struct in6_addr *);
+struct in6_ifaddr *in6ifa_ifwithaddr(const struct in6_addr *, uint32_t);
struct in6_ifaddr *in6ifa_llaonifp(struct ifnet *);
-char *ip6_sprintf(char *, const struct in6_addr *);
int in6_addr2zoneid(struct ifnet *, struct in6_addr *, u_int32_t *);
int in6_matchlen(struct in6_addr *, struct in6_addr *);
int in6_are_prefix_equal(struct in6_addr *, struct in6_addr *, int);
@@ -777,12 +811,11 @@ int in6_prefix_ioctl(struct socket *, u_long, caddr_t,
int in6_prefix_add_ifid(int, struct in6_ifaddr *);
void in6_prefix_remove_ifid(int, struct in6_ifaddr *);
void in6_purgeprefix(struct ifnet *);
-void in6_ifremloop(struct ifaddr *);
-void in6_ifaddloop(struct ifaddr *);
int in6_is_addr_deprecated(struct sockaddr_in6 *);
int in6_src_ioctl(u_long, caddr_t);
+void in6_newaddrmsg(struct in6_ifaddr *, int);
/*
* Extended API for IPv6 FIB support.
*/
diff --git a/freebsd/sys/netinet6/ip6_forward.c b/freebsd/sys/netinet6/ip6_forward.c
index 6efae91a..50583537 100644
--- a/freebsd/sys/netinet6/ip6_forward.c
+++ b/freebsd/sys/netinet6/ip6_forward.c
@@ -36,7 +36,6 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipfw.h>
#include <rtems/bsd/local/opt_ipsec.h>
#include <rtems/bsd/local/opt_ipstealth.h>
@@ -53,6 +52,7 @@ __FBSDID("$FreeBSD$");
#include <sys/syslog.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/netisr.h>
#include <net/route.h>
#include <net/pfil.h>
@@ -72,13 +72,12 @@ __FBSDID("$FreeBSD$");
#include <netinet/in_pcb.h>
#ifdef IPSEC
+#include <netinet6/ip6_ipsec.h>
#include <netipsec/ipsec.h>
#include <netipsec/ipsec6.h>
#include <netipsec/key.h>
#endif /* IPSEC */
-#include <netinet6/ip6protosw.h>
-
/*
* Forward a packet. If some error occurs return the sender
* an icmp packet. Note we can't always generate a meaningful
@@ -105,29 +104,10 @@ ip6_forward(struct mbuf *m, int srcrt)
struct in6_addr src_in6, dst_in6, odst;
#ifdef IPSEC
struct secpolicy *sp = NULL;
- int ipsecrt = 0;
-#endif
-#ifdef SCTP
- int sw_csum;
#endif
struct m_tag *fwd_tag;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
-#ifdef IPSEC
- /*
- * Check AH/ESP integrity.
- */
- /*
- * Don't increment ip6s_cantforward because this is the check
- * before forwarding packet actually.
- */
- if (ipsec6_in_reject(m, NULL)) {
- IPSEC6STAT_INC(in_polvio);
- m_freem(m);
- return;
- }
-#endif /* IPSEC */
-
/*
* Do not forward packets to multicast destination (should be handled
* by ip6_mforward().
@@ -139,8 +119,8 @@ ip6_forward(struct mbuf *m, int srcrt)
IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
IP6STAT_INC(ip6s_cantforward);
/* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */
- if (V_ip6_log_time + V_ip6_log_interval < time_second) {
- V_ip6_log_time = time_second;
+ if (V_ip6_log_time + V_ip6_log_interval < time_uptime) {
+ V_ip6_log_time = time_uptime;
log(LOG_DEBUG,
"cannot forward "
"from %s to %s nxt %d received on %s\n",
@@ -152,6 +132,17 @@ ip6_forward(struct mbuf *m, int srcrt)
m_freem(m);
return;
}
+#ifdef IPSEC
+ /*
+ * Check if this packet has an active SA and needs to be dropped
+ * instead of forwarded.
+ */
+ if (ip6_ipsec_fwd(m) != 0) {
+ IP6STAT_INC(ip6s_cantforward);
+ m_freem(m);
+ return;
+ }
+#endif /* IPSEC */
#ifdef IPSTEALTH
if (!V_ip6stealth) {
@@ -181,10 +172,9 @@ ip6_forward(struct mbuf *m, int srcrt)
#ifdef IPSEC
/* get a security policy for this packet */
- sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
- IP_FORWARDING, &error);
+ sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, &error);
if (sp == NULL) {
- IPSEC6STAT_INC(out_inval);
+ IPSEC6STAT_INC(ips_out_inval);
IP6STAT_INC(ip6s_cantforward);
if (mcopy) {
#if 0
@@ -205,7 +195,7 @@ ip6_forward(struct mbuf *m, int srcrt)
/*
* This packet is just discarded.
*/
- IPSEC6STAT_INC(out_polvio);
+ IPSEC6STAT_INC(ips_out_polvio);
IP6STAT_INC(ip6s_cantforward);
KEY_FREESP(&sp);
if (mcopy) {
@@ -253,12 +243,10 @@ ip6_forward(struct mbuf *m, int srcrt)
{
struct ipsecrequest *isr = NULL;
- struct ipsec_output_state state;
/*
* when the kernel forwards a packet, it is not proper to apply
- * IPsec transport mode to the packet is not proper. this check
- * avoid from this.
+ * IPsec transport mode to the packet. This check avoid from this.
* at present, if there is even a transport mode SA request in the
* security policy, the kernel does not apply IPsec to the packet.
* this check is not enough because the following case is valid.
@@ -286,18 +274,27 @@ ip6_forward(struct mbuf *m, int srcrt)
*
* IPv6 [ESP|AH] IPv6 [extension headers] payload
*/
- bzero(&state, sizeof(state));
- state.m = m;
- state.ro = NULL; /* update at ipsec6_output_tunnel() */
- state.dst = NULL; /* update at ipsec6_output_tunnel() */
-
- error = ipsec6_output_tunnel(&state, sp, 0);
- m = state.m;
- KEY_FREESP(&sp);
+ /*
+ * If we need to encapsulate the packet, do it here
+ * ipsec6_proces_packet will send the packet using ip6_output
+ */
+ error = ipsec6_process_packet(m, sp->req);
+ /* Release SP if an error occurred */
+ if (error != 0)
+ KEY_FREESP(&sp);
+ if (error == EJUSTRETURN) {
+ /*
+ * We had a SP with a level of 'use' and no SA. We
+ * will just continue to process the packet without
+ * IPsec processing.
+ */
+ error = 0;
+ goto skip_ipsec;
+ }
if (error) {
- /* mbuf is already reclaimed in ipsec6_output_tunnel. */
+ /* mbuf is already reclaimed in ipsec6_process_packet. */
switch (error) {
case EHOSTUNREACH:
case ENETUNREACH:
@@ -320,7 +317,6 @@ ip6_forward(struct mbuf *m, int srcrt)
m_freem(mcopy);
#endif
}
- m_freem(m);
return;
} else {
/*
@@ -332,25 +328,7 @@ ip6_forward(struct mbuf *m, int srcrt)
m = NULL;
goto freecopy;
}
-
- if ((m != NULL) && (ip6 != mtod(m, struct ip6_hdr *)) ){
- /*
- * now tunnel mode headers are added. we are originating
- * packet instead of forwarding the packet.
- */
- ip6_output(m, NULL, NULL, IPV6_FORWARDING/*XXX*/, NULL, NULL,
- NULL);
- goto freecopy;
- }
-
- /* adjust pointer */
- dst = (struct sockaddr_in6 *)state.dst;
- rt = state.ro ? state.ro->ro_rt : NULL;
- if (dst != NULL && rt != NULL)
- ipsecrt = 1;
}
- if (ipsecrt)
- goto skip_routing;
skip_ipsec:
#endif
again:
@@ -361,6 +339,7 @@ again:
dst->sin6_addr = ip6->ip6_dst;
again2:
rin6.ro_rt = in6_rtalloc1((struct sockaddr *)dst, 0, 0, M_GETFIB(m));
+ rt = rin6.ro_rt;
if (rin6.ro_rt != NULL)
RT_UNLOCK(rin6.ro_rt);
else {
@@ -372,10 +351,6 @@ again2:
}
goto bad;
}
- rt = rin6.ro_rt;
-#ifdef IPSEC
-skip_routing:
-#endif
/*
* Source scope check: if a packet can't be delivered to its
@@ -398,17 +373,13 @@ skip_routing:
IP6STAT_INC(ip6s_badscope);
goto bad;
}
- if (inzone != outzone
-#ifdef IPSEC
- && !ipsecrt
-#endif
- ) {
+ if (inzone != outzone) {
IP6STAT_INC(ip6s_cantforward);
IP6STAT_INC(ip6s_badscope);
in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard);
- if (V_ip6_log_time + V_ip6_log_interval < time_second) {
- V_ip6_log_time = time_second;
+ if (V_ip6_log_time + V_ip6_log_interval < time_uptime) {
+ V_ip6_log_time = time_uptime;
log(LOG_DEBUG,
"cannot forward "
"src %s, dst %s, nxt %d, rcvif %s, outif %s\n",
@@ -439,46 +410,6 @@ skip_routing:
goto bad;
}
- if (m->m_pkthdr.len > IN6_LINKMTU(rt->rt_ifp)) {
- in6_ifstat_inc(rt->rt_ifp, ifs6_in_toobig);
- if (mcopy) {
- u_long mtu;
-#ifdef IPSEC
- struct secpolicy *sp;
- int ipsecerror;
- size_t ipsechdrsiz;
-#endif /* IPSEC */
-
- mtu = IN6_LINKMTU(rt->rt_ifp);
-#ifdef IPSEC
- /*
- * When we do IPsec tunnel ingress, we need to play
- * with the link value (decrement IPsec header size
- * from mtu value). The code is much simpler than v4
- * case, as we have the outgoing interface for
- * encapsulated packet as "rt->rt_ifp".
- */
- sp = ipsec_getpolicybyaddr(mcopy, IPSEC_DIR_OUTBOUND,
- IP_FORWARDING, &ipsecerror);
- if (sp) {
- ipsechdrsiz = ipsec_hdrsiz(mcopy,
- IPSEC_DIR_OUTBOUND, NULL);
- if (ipsechdrsiz < mtu)
- mtu -= ipsechdrsiz;
- }
-
- /*
- * if mtu becomes less than minimum MTU,
- * tell minimum MTU (and I'll need to fragment it).
- */
- if (mtu < IPV6_MMTU)
- mtu = IPV6_MMTU;
-#endif /* IPSEC */
- icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, mtu);
- }
- goto bad;
- }
-
if (rt->rt_flags & RTF_GATEWAY)
dst = (struct sockaddr_in6 *)rt->rt_gateway;
@@ -492,9 +423,6 @@ skip_routing:
* modified by a redirect.
*/
if (V_ip6_sendredirects && rt->rt_ifp == m->m_pkthdr.rcvif && !srcrt &&
-#ifdef IPSEC
- !ipsecrt &&
-#endif /* IPSEC */
(rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0) {
if ((rt->rt_ifp->if_flags & IFF_POINTOPOINT) != 0) {
/*
@@ -573,23 +501,12 @@ skip_routing:
if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
m->m_flags |= M_SKIP_FIREWALL;
/* If destination is now ourself drop to ip6_input(). */
- if (in6_localip(&ip6->ip6_dst)) {
+ if (in6_localip(&ip6->ip6_dst))
m->m_flags |= M_FASTFWD_OURS;
- if (m->m_pkthdr.rcvif == NULL)
- m->m_pkthdr.rcvif = V_loif;
- if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
- m->m_pkthdr.csum_flags |=
- CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
- m->m_pkthdr.csum_data = 0xffff;
- }
-#ifdef SCTP
- if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
- m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
-#endif
- error = netisr_queue(NETISR_IPV6, m);
- goto out;
- } else
+ else {
+ RTFREE(rt);
goto again; /* Redo the routing table lookup. */
+ }
}
/* See if local, if yes, send it to netisr. */
@@ -616,11 +533,46 @@ skip_routing:
m->m_flags |= M_SKIP_FIREWALL;
m->m_flags &= ~M_IP6_NEXTHOP;
m_tag_delete(m, fwd_tag);
+ RTFREE(rt);
goto again2;
}
pass:
- error = nd6_output(rt->rt_ifp, origifp, m, dst, rt);
+ /* See if the size was changed by the packet filter. */
+ if (m->m_pkthdr.len > IN6_LINKMTU(rt->rt_ifp)) {
+ in6_ifstat_inc(rt->rt_ifp, ifs6_in_toobig);
+ if (mcopy) {
+ u_long mtu;
+#ifdef IPSEC
+ size_t ipsechdrsiz;
+#endif /* IPSEC */
+
+ mtu = IN6_LINKMTU(rt->rt_ifp);
+#ifdef IPSEC
+ /*
+ * When we do IPsec tunnel ingress, we need to play
+ * with the link value (decrement IPsec header size
+ * from mtu value). The code is much simpler than v4
+ * case, as we have the outgoing interface for
+ * encapsulated packet as "rt->rt_ifp".
+ */
+ ipsechdrsiz = ipsec_hdrsiz(mcopy, IPSEC_DIR_OUTBOUND,
+ NULL);
+ if (ipsechdrsiz < mtu)
+ mtu -= ipsechdrsiz;
+ /*
+ * if mtu becomes less than minimum MTU,
+ * tell minimum MTU (and I'll need to fragment it).
+ */
+ if (mtu < IPV6_MMTU)
+ mtu = IPV6_MMTU;
+#endif /* IPSEC */
+ icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, mtu);
+ }
+ goto bad;
+ }
+
+ error = nd6_output_ifp(rt->rt_ifp, origifp, m, dst, NULL);
if (error) {
in6_ifstat_inc(rt->rt_ifp, ifs6_out_discard);
IP6STAT_INC(ip6s_cantforward);
@@ -671,10 +623,6 @@ pass:
bad:
m_freem(m);
out:
- if (rt != NULL
-#ifdef IPSEC
- && !ipsecrt
-#endif
- )
+ if (rt != NULL)
RTFREE(rt);
}
diff --git a/freebsd/sys/netinet6/ip6_id.c b/freebsd/sys/netinet6/ip6_id.c
index e277def6..4e1a74e6 100644
--- a/freebsd/sys/netinet6/ip6_id.c
+++ b/freebsd/sys/netinet6/ip6_id.c
@@ -99,6 +99,7 @@ __FBSDID("$FreeBSD$");
#include <net/if.h>
#include <net/route.h>
+#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
@@ -109,7 +110,7 @@ __FBSDID("$FreeBSD$");
struct randomtab {
const int ru_bits; /* resulting bits */
- const long ru_out; /* Time after wich will be reseeded */
+ const long ru_out; /* Time after which will be reseeded */
const u_int32_t ru_max; /* Uniq cycle, avoid blackjack prediction */
const u_int32_t ru_gen; /* Starting generator */
const u_int32_t ru_n; /* ru_n: prime, ru_n - 1: product of pfacts[] */
@@ -129,7 +130,7 @@ struct randomtab {
static struct randomtab randomtab_32 = {
32, /* resulting bits */
- 180, /* Time after wich will be reseeded */
+ 180, /* Time after which will be reseeded */
1000000000, /* Uniq cycle, avoid blackjack prediction */
2, /* Starting generator */
2147483629, /* RU_N-1 = 2^2*3^2*59652323 */
@@ -140,7 +141,7 @@ static struct randomtab randomtab_32 = {
static struct randomtab randomtab_20 = {
20, /* resulting bits */
- 180, /* Time after wich will be reseeded */
+ 180, /* Time after which will be reseeded */
200000, /* Uniq cycle, avoid blackjack prediction */
2, /* Starting generator */
524269, /* RU_N-1 = 2^2*3^2*14563 */
@@ -223,7 +224,7 @@ initid(struct randomtab *p)
p->ru_g = pmod(p->ru_gen, j, p->ru_n);
p->ru_counter = 0;
- p->ru_reseed = time_second + p->ru_out;
+ p->ru_reseed = time_uptime + p->ru_out;
p->ru_msb = p->ru_msb ? 0 : (1U << (p->ru_bits - 1));
}
@@ -233,7 +234,7 @@ randomid(struct randomtab *p)
int i, n;
u_int32_t tmp;
- if (p->ru_counter >= p->ru_max || time_second > p->ru_reseed)
+ if (p->ru_counter >= p->ru_max || time_uptime > p->ru_reseed)
initid(p);
tmp = arc4random();
diff --git a/freebsd/sys/netinet6/ip6_input.c b/freebsd/sys/netinet6/ip6_input.c
index 10536316..c7ffe759 100644
--- a/freebsd/sys/netinet6/ip6_input.c
+++ b/freebsd/sys/netinet6/ip6_input.c
@@ -67,33 +67,41 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipfw.h>
#include <rtems/bsd/local/opt_ipsec.h>
#include <rtems/bsd/local/opt_route.h>
+#include <rtems/bsd/local/opt_rss.h>
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
+#include <sys/hhook.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/proc.h>
#include <sys/domain.h>
#include <sys/protosw.h>
+#include <sys/sdt.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <rtems/bsd/sys/errno.h>
#include <sys/time.h>
#include <sys/kernel.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/rmlock.h>
#include <sys/syslog.h>
+#include <sys/sysctl.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_types.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <net/netisr.h>
+#include <net/rss_config.h>
#include <net/pfil.h>
#include <net/vnet.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <netinet/ip_var.h>
#include <netinet/in_systm.h>
#include <net/if_llatbl.h>
@@ -108,7 +116,9 @@ __FBSDID("$FreeBSD$");
#include <netinet/icmp6.h>
#include <netinet6/scope6_var.h>
#include <netinet6/in6_ifattach.h>
+#include <netinet6/mld6_var.h>
#include <netinet6/nd6.h>
+#include <netinet6/in6_rss.h>
#ifdef IPSEC
#include <netipsec/ipsec.h>
@@ -118,39 +128,84 @@ __FBSDID("$FreeBSD$");
#include <netinet6/ip6protosw.h>
-#ifdef FLOWTABLE
-#include <net/flowtable.h>
-VNET_DECLARE(int, ip6_output_flowtable_size);
-#define V_ip6_output_flowtable_size VNET(ip6_output_flowtable_size)
-#endif
-
extern struct domain inet6domain;
u_char ip6_protox[IPPROTO_MAX];
VNET_DEFINE(struct in6_ifaddrhead, in6_ifaddrhead);
+VNET_DEFINE(struct in6_ifaddrlisthead *, in6_ifaddrhashtbl);
+VNET_DEFINE(u_long, in6_ifaddrhmask);
static struct netisr_handler ip6_nh = {
.nh_name = "ip6",
.nh_handler = ip6_input,
.nh_proto = NETISR_IPV6,
+#ifdef RSS
+ .nh_m2cpuid = rss_soft_m2cpuid_v6,
+ .nh_policy = NETISR_POLICY_CPU,
+ .nh_dispatch = NETISR_DISPATCH_HYBRID,
+#else
.nh_policy = NETISR_POLICY_FLOW,
+#endif
};
-VNET_DECLARE(struct callout, in6_tmpaddrtimer_ch);
-#define V_in6_tmpaddrtimer_ch VNET(in6_tmpaddrtimer_ch)
+static int
+sysctl_netinet6_intr_queue_maxlen(SYSCTL_HANDLER_ARGS)
+{
+ int error, qlimit;
+
+ netisr_getqlimit(&ip6_nh, &qlimit);
+ error = sysctl_handle_int(oidp, &qlimit, 0, req);
+ if (error || !req->newptr)
+ return (error);
+ if (qlimit < 1)
+ return (EINVAL);
+ return (netisr_setqlimit(&ip6_nh, qlimit));
+}
+SYSCTL_DECL(_net_inet6_ip6);
+SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_INTRQMAXLEN, intr_queue_maxlen,
+ CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet6_intr_queue_maxlen, "I",
+ "Maximum size of the IPv6 input queue");
+
+#ifdef RSS
+static struct netisr_handler ip6_direct_nh = {
+ .nh_name = "ip6_direct",
+ .nh_handler = ip6_direct_input,
+ .nh_proto = NETISR_IPV6_DIRECT,
+ .nh_m2cpuid = rss_soft_m2cpuid_v6,
+ .nh_policy = NETISR_POLICY_CPU,
+ .nh_dispatch = NETISR_DISPATCH_HYBRID,
+};
+
+static int
+sysctl_netinet6_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS)
+{
+ int error, qlimit;
+
+ netisr_getqlimit(&ip6_direct_nh, &qlimit);
+ error = sysctl_handle_int(oidp, &qlimit, 0, req);
+ if (error || !req->newptr)
+ return (error);
+ if (qlimit < 1)
+ return (EINVAL);
+ return (netisr_setqlimit(&ip6_direct_nh, qlimit));
+}
+SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_INTRDQMAXLEN, intr_direct_queue_maxlen,
+ CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet6_intr_direct_queue_maxlen,
+ "I", "Maximum size of the IPv6 direct input queue");
+
+#endif
VNET_DEFINE(struct pfil_head, inet6_pfil_hook);
-VNET_DEFINE(struct ip6stat, ip6stat);
+VNET_PCPUSTAT_DEFINE(struct ip6stat, ip6stat);
+VNET_PCPUSTAT_SYSINIT(ip6stat);
+#ifdef VIMAGE
+VNET_PCPUSTAT_SYSUNINIT(ip6stat);
+#endif /* VIMAGE */
-struct rwlock in6_ifaddr_lock;
-RW_SYSINIT(in6_ifaddr_lock, &in6_ifaddr_lock, "in6_ifaddr_lock");
+struct rmlock in6_ifaddr_lock;
+RM_SYSINIT(in6_ifaddr_lock, &in6_ifaddr_lock, "in6_ifaddr_lock");
-static void ip6_init2(void *);
-static struct ip6aux *ip6_setdstifaddr(struct mbuf *, struct in6_ifaddr *);
-static struct ip6aux *ip6_addaux(struct mbuf *);
-static struct ip6aux *ip6_findaux(struct mbuf *m);
-static void ip6_delaux (struct mbuf *);
static int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *);
#ifdef PULLDOWN_TEST
static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int);
@@ -163,7 +218,7 @@ static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int);
void
ip6_init(void)
{
- struct ip6protosw *pr;
+ struct protosw *pr;
int i;
TUNABLE_INT_FETCH("net.inet6.ip6.auto_linklocal",
@@ -172,6 +227,8 @@ ip6_init(void)
TUNABLE_INT_FETCH("net.inet6.ip6.no_radr", &V_ip6_no_radr);
TAILQ_INIT(&V_in6_ifaddrhead);
+ V_in6_ifaddrhashtbl = hashinit(IN6ADDR_NHASH, M_IFADDR,
+ &V_in6_ifaddrhmask);
/* Initialize packet filter hooks. */
V_inet6_pfil_hook.ph_type = PFIL_TYPE_AF;
@@ -180,40 +237,36 @@ ip6_init(void)
printf("%s: WARNING: unable to register pfil hook, "
"error %d\n", __func__, i);
+ if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET6,
+ &V_ipsec_hhh_in[HHOOK_IPSEC_INET6],
+ HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
+ printf("%s: WARNING: unable to register input helper hook\n",
+ __func__);
+ if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET6,
+ &V_ipsec_hhh_out[HHOOK_IPSEC_INET6],
+ HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
+ printf("%s: WARNING: unable to register output helper hook\n",
+ __func__);
+
scope6_init();
addrsel_policy_init();
nd6_init();
frag6_init();
-#ifdef FLOWTABLE
- if (TUNABLE_INT_FETCH("net.inet6.ip6.output_flowtable_size",
- &V_ip6_output_flowtable_size)) {
- if (V_ip6_output_flowtable_size < 256)
- V_ip6_output_flowtable_size = 256;
- if (!powerof2(V_ip6_output_flowtable_size)) {
- printf("flowtable must be power of 2 size\n");
- V_ip6_output_flowtable_size = 2048;
- }
- } else {
- /*
- * round up to the next power of 2
- */
- V_ip6_output_flowtable_size = 1 << fls((1024 + maxusers * 64)-1);
- }
- V_ip6_ft = flowtable_alloc("ipv6", V_ip6_output_flowtable_size, FL_IPV6|FL_PCPU);
-#endif
-
V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR;
/* Skip global initialization stuff for non-default instances. */
- if (!IS_DEFAULT_VNET(curvnet))
+#ifdef VIMAGE
+ if (!IS_DEFAULT_VNET(curvnet)) {
+ netisr_register_vnet(&ip6_nh);
+#ifdef RSS
+ netisr_register_vnet(&ip6_direct_nh);
+#endif
return;
-
-#ifdef DIAGNOSTIC
- if (sizeof(struct protosw) != sizeof(struct ip6protosw))
- panic("sizeof(protosw) != sizeof(ip6protosw)");
+ }
#endif
- pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
+
+ pr = pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
if (pr == NULL)
panic("ip6_init");
@@ -224,8 +277,8 @@ ip6_init(void)
* Cycle through IP protocols and put them into the appropriate place
* in ip6_protox[].
*/
- for (pr = (struct ip6protosw *)inet6domain.dom_protosw;
- pr < (struct ip6protosw *)inet6domain.dom_protoswNPROTOSW; pr++)
+ for (pr = inet6domain.dom_protosw;
+ pr < inet6domain.dom_protoswNPROTOSW; pr++)
if (pr->pr_domain->dom_family == PF_INET6 &&
pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
/* Be careful to only index valid IP protocols. */
@@ -234,6 +287,9 @@ ip6_init(void)
}
netisr_register(&ip6_nh);
+#ifdef RSS
+ netisr_register(&ip6_direct_nh);
+#endif
}
/*
@@ -243,7 +299,7 @@ ip6_init(void)
int
ip6proto_register(short ip6proto)
{
- struct ip6protosw *pr;
+ struct protosw *pr;
/* Sanity checks. */
if (ip6proto <= 0 || ip6proto >= IPPROTO_MAX)
@@ -253,7 +309,7 @@ ip6proto_register(short ip6proto)
* The protocol slot must not be occupied by another protocol
* already. An index pointing to IPPROTO_RAW is unused.
*/
- pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
+ pr = pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
if (pr == NULL)
return (EPFNOSUPPORT);
if (ip6_protox[ip6proto] != pr - inet6sw) /* IPPROTO_RAW */
@@ -262,8 +318,8 @@ ip6proto_register(short ip6proto)
/*
* Find the protocol position in inet6sw[] and set the index.
*/
- for (pr = (struct ip6protosw *)inet6domain.dom_protosw;
- pr < (struct ip6protosw *)inet6domain.dom_protoswNPROTOSW; pr++) {
+ for (pr = inet6domain.dom_protosw;
+ pr < inet6domain.dom_protoswNPROTOSW; pr++) {
if (pr->pr_domain->dom_family == PF_INET6 &&
pr->pr_protocol && pr->pr_protocol == ip6proto) {
ip6_protox[pr->pr_protocol] = pr - inet6sw;
@@ -276,14 +332,14 @@ ip6proto_register(short ip6proto)
int
ip6proto_unregister(short ip6proto)
{
- struct ip6protosw *pr;
+ struct protosw *pr;
/* Sanity checks. */
if (ip6proto <= 0 || ip6proto >= IPPROTO_MAX)
return (EPROTONOSUPPORT);
/* Check if the protocol was indeed registered. */
- pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
+ pr = pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
if (pr == NULL)
return (EPFNOSUPPORT);
if (ip6_protox[ip6proto] == pr - inet6sw) /* IPPROTO_RAW */
@@ -295,43 +351,61 @@ ip6proto_unregister(short ip6proto)
}
#ifdef VIMAGE
-void
-ip6_destroy()
+static void
+ip6_destroy(void *unused __unused)
{
+ struct ifaddr *ifa, *nifa;
+ struct ifnet *ifp;
+ int error;
- nd6_destroy();
- callout_drain(&V_in6_tmpaddrtimer_ch);
-}
+#ifdef RSS
+ netisr_unregister_vnet(&ip6_direct_nh);
#endif
+ netisr_unregister_vnet(&ip6_nh);
+
+ if ((error = pfil_head_unregister(&V_inet6_pfil_hook)) != 0)
+ printf("%s: WARNING: unable to unregister pfil hook, "
+ "error %d\n", __func__, error);
+ error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET6]);
+ if (error != 0) {
+ printf("%s: WARNING: unable to deregister input helper hook "
+ "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET6: "
+ "error %d returned\n", __func__, error);
+ }
+ error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET6]);
+ if (error != 0) {
+ printf("%s: WARNING: unable to deregister output helper hook "
+ "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET6: "
+ "error %d returned\n", __func__, error);
+ }
-static int
-ip6_init2_vnet(const void *unused __unused)
-{
-
- /* nd6_timer_init */
- callout_init(&V_nd6_timer_ch, 0);
- callout_reset(&V_nd6_timer_ch, hz, nd6_timer, curvnet);
-
- /* timer for regeneranation of temporary addresses randomize ID */
- callout_init(&V_in6_tmpaddrtimer_ch, 0);
- callout_reset(&V_in6_tmpaddrtimer_ch,
- (V_ip6_temp_preferred_lifetime - V_ip6_desync_factor -
- V_ip6_temp_regen_advance) * hz,
- in6_tmpaddrtimer, curvnet);
+ /* Cleanup addresses. */
+ IFNET_RLOCK();
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ /* Cannot lock here - lock recursion. */
+ /* IF_ADDR_LOCK(ifp); */
+ TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, nifa) {
- return (0);
-}
+ if (ifa->ifa_addr->sa_family != AF_INET6)
+ continue;
+ in6_purgeaddr(ifa);
+ }
+ /* IF_ADDR_UNLOCK(ifp); */
+ in6_ifdetach_destroy(ifp);
+ mld_domifdetach(ifp);
+ /* Make sure any routes are gone as well. */
+ rt_flushifroutes_af(ifp, AF_INET6);
+ }
+ IFNET_RUNLOCK();
-static void
-ip6_init2(void *dummy)
-{
+ nd6_destroy();
+ in6_ifattach_destroy();
- ip6_init2_vnet(NULL);
+ hashdestroy(V_in6_ifaddrhashtbl, M_IFADDR, V_in6_ifaddrhmask);
}
-/* cheat */
-/* This must be after route_init(), which is now SI_ORDER_THIRD */
-SYSINIT(netinet6init2, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ip6_init2, NULL);
+VNET_SYSUNINIT(inet6, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip6_destroy, NULL);
+#endif
static int
ip6_input_hbh(struct mbuf *m, uint32_t *plen, uint32_t *rtalert, int *off,
@@ -410,22 +484,78 @@ out:
return (1);
}
+#ifdef RSS
+/*
+ * IPv6 direct input routine.
+ *
+ * This is called when reinjecting completed fragments where
+ * all of the previous checking and book-keeping has been done.
+ */
+void
+ip6_direct_input(struct mbuf *m)
+{
+ int off, nxt;
+ int nest;
+ struct m_tag *mtag;
+ struct ip6_direct_ctx *ip6dc;
+
+ mtag = m_tag_locate(m, MTAG_ABI_IPV6, IPV6_TAG_DIRECT, NULL);
+ KASSERT(mtag != NULL, ("Reinjected packet w/o direct ctx tag!"));
+
+ ip6dc = (struct ip6_direct_ctx *)(mtag + 1);
+ nxt = ip6dc->ip6dc_nxt;
+ off = ip6dc->ip6dc_off;
+
+ nest = 0;
+
+ m_tag_delete(m, mtag);
+
+ while (nxt != IPPROTO_DONE) {
+ if (V_ip6_hdrnestlimit && (++nest > V_ip6_hdrnestlimit)) {
+ IP6STAT_INC(ip6s_toomanyhdr);
+ goto bad;
+ }
+
+ /*
+ * protection against faulty packet - there should be
+ * more sanity checks in header chain processing.
+ */
+ if (m->m_pkthdr.len < off) {
+ IP6STAT_INC(ip6s_tooshort);
+ in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
+ goto bad;
+ }
+
+#ifdef IPSEC
+ /*
+ * enforce IPsec policy checking if we are seeing last header.
+ * note that we do not visit this with protocols with pcb layer
+ * code - like udp/tcp/raw ip.
+ */
+ if (ip6_ipsec_input(m, nxt))
+ goto bad;
+#endif /* IPSEC */
+
+ nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt);
+ }
+ return;
+bad:
+ m_freem(m);
+}
+#endif
+
void
ip6_input(struct mbuf *m)
{
+ struct in6_addr odst;
struct ip6_hdr *ip6;
- int off = sizeof(struct ip6_hdr), nest;
+ struct in6_ifaddr *ia;
u_int32_t plen;
u_int32_t rtalert = ~0;
+ int off = sizeof(struct ip6_hdr), nest;
int nxt, ours = 0;
- struct ifnet *deliverifp = NULL, *ifp = NULL;
- struct in6_addr odst;
- struct route_in6 rin6;
int srcrt = 0;
- struct llentry *lle = NULL;
- struct sockaddr_in6 dst6, *dst;
- bzero(&rin6, sizeof(struct route_in6));
#ifdef IPSEC
/*
* should the inner packet be considered authentic?
@@ -438,18 +568,12 @@ ip6_input(struct mbuf *m)
#endif /* IPSEC */
- /*
- * make sure we don't have onion peering information into m_tag.
- */
- ip6_delaux(m);
-
if (m->m_flags & M_FASTFWD_OURS) {
/*
* Firewall changed destination to local.
*/
m->m_flags &= ~M_FASTFWD_OURS;
ours = 1;
- deliverifp = m->m_pkthdr.rcvif;
ip6 = mtod(m, struct ip6_hdr *);
goto hbhcheck;
}
@@ -476,10 +600,8 @@ ip6_input(struct mbuf *m)
}
/* drop the packet if IPv6 operation is disabled on the IF */
- if ((ND_IFINFO(m->m_pkthdr.rcvif)->flags & ND6_IFF_IFDISABLED)) {
- m_freem(m);
- return;
- }
+ if ((ND_IFINFO(m->m_pkthdr.rcvif)->flags & ND6_IFF_IFDISABLED))
+ goto bad;
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_receive);
IP6STAT_INC(ip6s_total);
@@ -493,21 +615,16 @@ ip6_input(struct mbuf *m)
if (m && m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) {
struct mbuf *n;
- MGETHDR(n, M_DONTWAIT, MT_HEADER);
- if (n)
- M_MOVE_PKTHDR(n, m);
- if (n && n->m_pkthdr.len > MHLEN) {
- MCLGET(n, M_DONTWAIT);
- if ((n->m_flags & M_EXT) == 0) {
- m_freem(n);
- n = NULL;
- }
- }
+ if (m->m_pkthdr.len > MHLEN)
+ n = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+ else
+ n = m_gethdr(M_NOWAIT, MT_DATA);
if (n == NULL) {
m_freem(m);
return; /* ENOBUFS */
}
+ m_move_pkthdr(n, m);
m_copydata(m, 0, n->m_pkthdr.len, mtod(n, caddr_t));
n->m_len = n->m_pkthdr.len;
m_freem(m);
@@ -536,6 +653,8 @@ ip6_input(struct mbuf *m)
IP6STAT_INC(ip6s_nxthist[ip6->ip6_nxt]);
+ IP_PROBE(receive, NULL, NULL, ip6, m->m_pkthdr.rcvif, NULL, ip6);
+
/*
* Check against address spoofing/corruption.
*/
@@ -643,7 +762,6 @@ ip6_input(struct mbuf *m)
if (m->m_flags & M_FASTFWD_OURS) {
m->m_flags &= ~M_FASTFWD_OURS;
ours = 1;
- deliverifp = m->m_pkthdr.rcvif;
goto hbhcheck;
}
if ((m->m_flags & M_IP6_NEXTHOP) &&
@@ -654,7 +772,7 @@ ip6_input(struct mbuf *m)
* connected host.
*/
ip6_forward(m, 1);
- goto out;
+ return;
}
passin:
@@ -677,7 +795,6 @@ passin:
IP6STAT_INC(ip6s_badscope);
goto bad;
}
-
/*
* Multicast check. Assume packet is for us to avoid
* prematurely taking locks.
@@ -685,167 +802,16 @@ passin:
if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
ours = 1;
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mcast);
- deliverifp = m->m_pkthdr.rcvif;
goto hbhcheck;
}
-
- /*
- * Unicast check
- */
-
- bzero(&dst6, sizeof(dst6));
- dst6.sin6_family = AF_INET6;
- dst6.sin6_len = sizeof(struct sockaddr_in6);
- dst6.sin6_addr = ip6->ip6_dst;
- ifp = m->m_pkthdr.rcvif;
- IF_AFDATA_RLOCK(ifp);
- lle = lla_lookup(LLTABLE6(ifp), 0,
- (struct sockaddr *)&dst6);
- IF_AFDATA_RUNLOCK(ifp);
- if ((lle != NULL) && (lle->la_flags & LLE_IFADDR)) {
- struct ifaddr *ifa;
- struct in6_ifaddr *ia6;
- int bad;
-
- bad = 1;
-#define sa_equal(a1, a2) \
- (bcmp((a1), (a2), ((a1))->sin6_len) == 0)
- IF_ADDR_RLOCK(ifp);
- TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- if (ifa->ifa_addr->sa_family != dst6.sin6_family)
- continue;
- if (sa_equal(&dst6, ifa->ifa_addr))
- break;
- }
- KASSERT(ifa != NULL, ("%s: ifa not found for lle %p",
- __func__, lle));
-#undef sa_equal
-
- ia6 = (struct in6_ifaddr *)ifa;
- if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) {
- /* Count the packet in the ip address stats */
- ia6->ia_ifa.if_ipackets++;
- ia6->ia_ifa.if_ibytes += m->m_pkthdr.len;
-
- /*
- * record address information into m_tag.
- */
- (void)ip6_setdstifaddr(m, ia6);
-
- bad = 0;
- } else {
- char ip6bufs[INET6_ADDRSTRLEN];
- char ip6bufd[INET6_ADDRSTRLEN];
- /* address is not ready, so discard the packet. */
- nd6log((LOG_INFO,
- "ip6_input: packet to an unready address %s->%s\n",
- ip6_sprintf(ip6bufs, &ip6->ip6_src),
- ip6_sprintf(ip6bufd, &ip6->ip6_dst)));
- }
- IF_ADDR_RUNLOCK(ifp);
- LLE_RUNLOCK(lle);
- if (bad)
- goto bad;
- else {
- ours = 1;
- deliverifp = ifp;
- goto hbhcheck;
- }
- }
- if (lle != NULL)
- LLE_RUNLOCK(lle);
-
- dst = &rin6.ro_dst;
- dst->sin6_len = sizeof(struct sockaddr_in6);
- dst->sin6_family = AF_INET6;
- dst->sin6_addr = ip6->ip6_dst;
- rin6.ro_rt = in6_rtalloc1((struct sockaddr *)dst, 0, 0, M_GETFIB(m));
- if (rin6.ro_rt)
- RT_UNLOCK(rin6.ro_rt);
-
-#define rt6_key(r) ((struct sockaddr_in6 *)((r)->rt_nodes->rn_key))
-
/*
- * Accept the packet if the forwarding interface to the destination
- * according to the routing table is the loopback interface,
- * unless the associated route has a gateway.
- * Note that this approach causes to accept a packet if there is a
- * route to the loopback interface for the destination of the packet.
- * But we think it's even useful in some situations, e.g. when using
- * a special daemon which wants to intercept the packet.
- *
- * XXX: some OSes automatically make a cloned route for the destination
- * of an outgoing packet. If the outgoing interface of the packet
- * is a loopback one, the kernel would consider the packet to be
- * accepted, even if we have no such address assinged on the interface.
- * We check the cloned flag of the route entry to reject such cases,
- * assuming that route entries for our own addresses are not made by
- * cloning (it should be true because in6_addloop explicitly installs
- * the host route). However, we might have to do an explicit check
- * while it would be less efficient. Or, should we rather install a
- * reject route for such a case?
+ * Unicast check
+ * XXX: For now we keep link-local IPv6 addresses with embedded
+ * scope zone id, therefore we use zero zoneid here.
*/
- if (rin6.ro_rt &&
- (rin6.ro_rt->rt_flags &
- (RTF_HOST|RTF_GATEWAY)) == RTF_HOST &&
-#ifdef RTF_WASCLONED
- !(rin6.ro_rt->rt_flags & RTF_WASCLONED) &&
-#endif
-#ifdef RTF_CLONED
- !(rin6.ro_rt->rt_flags & RTF_CLONED) &&
-#endif
-#if 0
- /*
- * The check below is redundant since the comparison of
- * the destination and the key of the rtentry has
- * already done through looking up the routing table.
- */
- IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
- &rt6_key(rin6.ro_rt)->sin6_addr)
-#endif
- rin6.ro_rt->rt_ifp->if_type == IFT_LOOP) {
- int free_ia6 = 0;
- struct in6_ifaddr *ia6;
-
- /*
- * found the loopback route to the interface address
- */
- if (rin6.ro_rt->rt_gateway->sa_family == AF_LINK) {
- struct sockaddr_in6 dest6;
-
- bzero(&dest6, sizeof(dest6));
- dest6.sin6_family = AF_INET6;
- dest6.sin6_len = sizeof(dest6);
- dest6.sin6_addr = ip6->ip6_dst;
- ia6 = (struct in6_ifaddr *)
- ifa_ifwithaddr((struct sockaddr *)&dest6);
- if (ia6 == NULL)
- goto bad;
- free_ia6 = 1;
- }
- else
- ia6 = (struct in6_ifaddr *)rin6.ro_rt->rt_ifa;
-
- /*
- * record address information into m_tag.
- */
- (void)ip6_setdstifaddr(m, ia6);
-
- /*
- * packets to a tentative, duplicated, or somehow invalid
- * address must not be accepted.
- */
- if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) {
- /* this address is ready */
- ours = 1;
- deliverifp = ia6->ia_ifp; /* correct? */
- /* Count the packet in the ip address stats */
- ia6->ia_ifa.if_ipackets++;
- ia6->ia_ifa.if_ibytes += m->m_pkthdr.len;
- if (ia6 != NULL && free_ia6 != 0)
- ifa_free(&ia6->ia_ifa);
- goto hbhcheck;
- } else {
+ ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
+ if (ia != NULL) {
+ if (ia->ia6_flags & IN6_IFF_NOTREADY) {
char ip6bufs[INET6_ADDRSTRLEN];
char ip6bufd[INET6_ADDRSTRLEN];
/* address is not ready, so discard the packet. */
@@ -853,24 +819,15 @@ passin:
"ip6_input: packet to an unready address %s->%s\n",
ip6_sprintf(ip6bufs, &ip6->ip6_src),
ip6_sprintf(ip6bufd, &ip6->ip6_dst)));
-
- if (ia6 != NULL && free_ia6 != 0)
- ifa_free(&ia6->ia_ifa);
+ ifa_free(&ia->ia_ifa);
goto bad;
}
- }
-
- /*
- * FAITH (Firewall Aided Internet Translator)
- */
- if (V_ip6_keepfaith) {
- if (rin6.ro_rt && rin6.ro_rt->rt_ifp &&
- rin6.ro_rt->rt_ifp->if_type == IFT_FAITH) {
- /* XXX do we need more sanity checks? */
- ours = 1;
- deliverifp = rin6.ro_rt->rt_ifp; /* faith */
- goto hbhcheck;
- }
+ /* Count the packet in the ip address stats */
+ counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
+ counter_u64_add(ia->ia_ifa.ifa_ibytes, m->m_pkthdr.len);
+ ifa_free(&ia->ia_ifa);
+ ours = 1;
+ goto hbhcheck;
}
/*
@@ -885,47 +842,25 @@ passin:
hbhcheck:
/*
- * record address information into m_tag, if we don't have one yet.
- * note that we are unable to record it, if the address is not listed
- * as our interface address (e.g. multicast addresses, addresses
- * within FAITH prefixes and such).
- */
- if (deliverifp) {
- struct in6_ifaddr *ia6;
-
- if ((ia6 = ip6_getdstifaddr(m)) != NULL) {
- ifa_free(&ia6->ia_ifa);
- } else {
- ia6 = in6_ifawithifp(deliverifp, &ip6->ip6_dst);
- if (ia6) {
- if (!ip6_setdstifaddr(m, ia6)) {
- /*
- * XXX maybe we should drop the packet here,
- * as we could not provide enough information
- * to the upper layers.
- */
- }
- ifa_free(&ia6->ia_ifa);
- }
- }
- }
-
- /*
* Process Hop-by-Hop options header if it's contained.
* m may be modified in ip6_hopopts_input().
* If a JumboPayload option is included, plen will also be modified.
*/
plen = (u_int32_t)ntohs(ip6->ip6_plen);
if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
- int error;
-
- error = ip6_input_hbh(m, &plen, &rtalert, &off, &nxt, &ours);
- if (error != 0)
- goto out;
+ if (ip6_input_hbh(m, &plen, &rtalert, &off, &nxt, &ours) != 0)
+ return;
} else
nxt = ip6->ip6_nxt;
/*
+ * Use mbuf flags to propagate Router Alert option to
+ * ICMPv6 layer, as hop-by-hop options have been stripped.
+ */
+ if (rtalert != ~0)
+ m->m_flags |= M_RTALERT_MLD;
+
+ /*
* Check that the amount of data in the buffers
* is as at least much as the IPv6 header would have us expect.
* Trim mbufs if longer than we expect.
@@ -968,7 +903,7 @@ passin:
}
} else if (!ours) {
ip6_forward(m, srcrt);
- goto out;
+ return;
}
ip6 = mtod(m, struct ip6_hdr *);
@@ -993,7 +928,7 @@ passin:
* Tell launch routine the next header
*/
IP6STAT_INC(ip6s_delivered);
- in6_ifstat_inc(deliverifp, ifs6_in_deliver);
+ in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_deliver);
nest = 0;
while (nxt != IPPROTO_DONE) {
@@ -1022,56 +957,11 @@ passin:
goto bad;
#endif /* IPSEC */
- /*
- * Use mbuf flags to propagate Router Alert option to
- * ICMPv6 layer, as hop-by-hop options have been stripped.
- */
- if (nxt == IPPROTO_ICMPV6 && rtalert != ~0)
- m->m_flags |= M_RTALERT_MLD;
-
nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt);
}
- goto out;
+ return;
bad:
m_freem(m);
-out:
- if (rin6.ro_rt)
- RTFREE(rin6.ro_rt);
-}
-
-/*
- * set/grab in6_ifaddr correspond to IPv6 destination address.
- * XXX backward compatibility wrapper
- *
- * XXXRW: We should bump the refcount on ia6 before sticking it in the m_tag,
- * and then bump it when the tag is copied, and release it when the tag is
- * freed. Unfortunately, m_tags don't support deep copies (yet), so instead
- * we just bump the ia refcount when we receive it. This should be fixed.
- */
-static struct ip6aux *
-ip6_setdstifaddr(struct mbuf *m, struct in6_ifaddr *ia6)
-{
- struct ip6aux *ip6a;
-
- ip6a = ip6_addaux(m);
- if (ip6a)
- ip6a->ip6a_dstia6 = ia6;
- return ip6a; /* NULL if failed to set */
-}
-
-struct in6_ifaddr *
-ip6_getdstifaddr(struct mbuf *m)
-{
- struct ip6aux *ip6a;
- struct in6_ifaddr *ia;
-
- ip6a = ip6_findaux(m);
- if (ip6a) {
- ia = ip6a->ip6a_dstia6;
- ifa_ref(&ia->ia_ifa);
- return ia;
- } else
- return NULL;
}
/*
@@ -1601,6 +1491,44 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
loopend:
;
}
+
+ if (in6p->inp_flags2 & INP_RECVFLOWID) {
+ uint32_t flowid, flow_type;
+
+ flowid = m->m_pkthdr.flowid;
+ flow_type = M_HASHTYPE_GET(m);
+
+ /*
+ * XXX should handle the failure of one or the
+ * other - don't populate both?
+ */
+ *mp = sbcreatecontrol((caddr_t) &flowid,
+ sizeof(uint32_t), IPV6_FLOWID, IPPROTO_IPV6);
+ if (*mp)
+ mp = &(*mp)->m_next;
+ *mp = sbcreatecontrol((caddr_t) &flow_type,
+ sizeof(uint32_t), IPV6_FLOWTYPE, IPPROTO_IPV6);
+ if (*mp)
+ mp = &(*mp)->m_next;
+ }
+
+#ifdef RSS
+ if (in6p->inp_flags2 & INP_RECVRSSBUCKETID) {
+ uint32_t flowid, flow_type;
+ uint32_t rss_bucketid;
+
+ flowid = m->m_pkthdr.flowid;
+ flow_type = M_HASHTYPE_GET(m);
+
+ if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) {
+ *mp = sbcreatecontrol((caddr_t) &rss_bucketid,
+ sizeof(uint32_t), IPV6_RSSBUCKETID, IPPROTO_IPV6);
+ if (*mp)
+ mp = &(*mp)->m_next;
+ }
+ }
+#endif
+
}
#undef IS2292
@@ -1674,22 +1602,12 @@ ip6_pullexthdr(struct mbuf *m, size_t off, int nxt)
else
elen = (ip6e.ip6e_len + 1) << 3;
- MGET(n, M_DONTWAIT, MT_DATA);
- if (n && elen >= MLEN) {
- MCLGET(n, M_DONTWAIT);
- if ((n->m_flags & M_EXT) == 0) {
- m_free(n);
- n = NULL;
- }
- }
- if (!n)
- return NULL;
-
- n->m_len = 0;
- if (elen >= M_TRAILINGSPACE(n)) {
- m_free(n);
+ if (elen > MLEN)
+ n = m_getcl(M_NOWAIT, MT_DATA, 0);
+ else
+ n = m_get(M_NOWAIT, MT_DATA);
+ if (n == NULL)
return NULL;
- }
m_copydata(m, off, elen, mtod(n, caddr_t));
n->m_len = elen;
@@ -1710,7 +1628,7 @@ ip6_pullexthdr(struct mbuf *m, size_t off, int nxt)
* we develop `neater' mechanism to process extension headers.
*/
char *
-ip6_get_prevhdr(struct mbuf *m, int off)
+ip6_get_prevhdr(const struct mbuf *m, int off)
{
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
@@ -1749,7 +1667,7 @@ ip6_get_prevhdr(struct mbuf *m, int off)
* get next header offset. m will be retained.
*/
int
-ip6_nexthdr(struct mbuf *m, int off, int proto, int *nxtp)
+ip6_nexthdr(const struct mbuf *m, int off, int proto, int *nxtp)
{
struct ip6_hdr ip6;
struct ip6_ext ip6e;
@@ -1817,14 +1735,14 @@ ip6_nexthdr(struct mbuf *m, int off, int proto, int *nxtp)
return -1;
}
- return -1;
+ /* NOTREACHED */
}
/*
* get offset for the last header in the chain. m will be kept untainted.
*/
int
-ip6_lasthdr(struct mbuf *m, int off, int proto, int *nxtp)
+ip6_lasthdr(const struct mbuf *m, int off, int proto, int *nxtp)
{
int newoff;
int nxt;
@@ -1847,42 +1765,6 @@ ip6_lasthdr(struct mbuf *m, int off, int proto, int *nxtp)
}
}
-static struct ip6aux *
-ip6_addaux(struct mbuf *m)
-{
- struct m_tag *mtag;
-
- mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
- if (!mtag) {
- mtag = m_tag_get(PACKET_TAG_IPV6_INPUT, sizeof(struct ip6aux),
- M_NOWAIT);
- if (mtag) {
- m_tag_prepend(m, mtag);
- bzero(mtag + 1, sizeof(struct ip6aux));
- }
- }
- return mtag ? (struct ip6aux *)(mtag + 1) : NULL;
-}
-
-static struct ip6aux *
-ip6_findaux(struct mbuf *m)
-{
- struct m_tag *mtag;
-
- mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
- return mtag ? (struct ip6aux *)(mtag + 1) : NULL;
-}
-
-static void
-ip6_delaux(struct mbuf *m)
-{
- struct m_tag *mtag;
-
- mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
- if (mtag)
- m_tag_delete(m, mtag);
-}
-
/*
* System control for IP6
*/
diff --git a/freebsd/sys/netinet6/ip6_ipsec.h b/freebsd/sys/netinet6/ip6_ipsec.h
index 86d1b005..e335d850 100644
--- a/freebsd/sys/netinet6/ip6_ipsec.h
+++ b/freebsd/sys/netinet6/ip6_ipsec.h
@@ -35,8 +35,7 @@
int ip6_ipsec_filtertunnel(struct mbuf *);
int ip6_ipsec_fwd(struct mbuf *);
int ip6_ipsec_input(struct mbuf *, int);
-int ip6_ipsec_output(struct mbuf **, struct inpcb *, int *, int *,
- struct ifnet **, struct secpolicy **sp);
+int ip6_ipsec_output(struct mbuf **, struct inpcb *, int *);
#if 0
int ip6_ipsec_mtu(struct mbuf *);
#endif
diff --git a/freebsd/sys/netinet6/ip6_mroute.c b/freebsd/sys/netinet6/ip6_mroute.c
index 02a98026..f74b71c3 100644
--- a/freebsd/sys/netinet6/ip6_mroute.c
+++ b/freebsd/sys/netinet6/ip6_mroute.c
@@ -95,6 +95,7 @@ __FBSDID("$FreeBSD$");
#include <sys/module.h>
#include <sys/domain.h>
#include <sys/protosw.h>
+#include <sys/sdt.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
@@ -106,6 +107,7 @@ __FBSDID("$FreeBSD$");
#include <sys/time.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_types.h>
#include <net/raw_cb.h>
#include <net/vnet.h>
@@ -116,19 +118,16 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip_encap.h>
#include <netinet/ip6.h>
+#include <netinet/in_kdtrace.h>
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
#include <netinet6/nd6.h>
#include <netinet6/ip6_mroute.h>
-#include <netinet6/ip6protosw.h>
#include <netinet6/pim6.h>
#include <netinet6/pim6_var.h>
static MALLOC_DEFINE(M_MRTABLE6, "mf6c", "multicast forwarding cache entry");
-/* XXX: this is a very common idiom; move to <sys/mbuf.h> ? */
-#define M_HASCL(m) ((m)->m_flags & M_EXT)
-
static int ip6_mdq(struct mbuf *, struct ifnet *, struct mf6c *);
static void phyint_send(struct ip6_hdr *, struct mif6 *, struct mbuf *);
static int register_send(struct ip6_hdr *, struct mif6 *, struct mbuf *);
@@ -140,7 +139,7 @@ extern int in6_mcast_loop;
extern struct domain inet6domain;
static const struct encaptab *pim6_encap_cookie;
-static const struct ip6protosw in6_pim_protosw = {
+static const struct protosw in6_pim_protosw = {
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_PIM,
@@ -199,9 +198,34 @@ static struct mtx mfc6_mtx;
static u_char n6expire[MF6CTBLSIZ];
static struct mif6 mif6table[MAXMIFS];
-SYSCTL_OPAQUE(_net_inet6_ip6, OID_AUTO, mif6table, CTLFLAG_RD,
- &mif6table, sizeof(mif6table), "S,mif6[MAXMIFS]",
- "IPv6 Multicast Interfaces (struct mif6[MAXMIFS], netinet6/ip6_mroute.h)");
+static int
+sysctl_mif6table(SYSCTL_HANDLER_ARGS)
+{
+ struct mif6_sctl *out;
+ int error;
+
+ out = malloc(sizeof(struct mif6_sctl) * MAXMIFS, M_TEMP, M_WAITOK);
+ for (int i = 0; i < MAXMIFS; i++) {
+ out[i].m6_flags = mif6table[i].m6_flags;
+ out[i].m6_rate_limit = mif6table[i].m6_rate_limit;
+ out[i].m6_lcl_addr = mif6table[i].m6_lcl_addr;
+ if (mif6table[i].m6_ifp != NULL)
+ out[i].m6_ifp = mif6table[i].m6_ifp->if_index;
+ else
+ out[i].m6_ifp = 0;
+ out[i].m6_pkt_in = mif6table[i].m6_pkt_in;
+ out[i].m6_pkt_out = mif6table[i].m6_pkt_out;
+ out[i].m6_bytes_in = mif6table[i].m6_bytes_in;
+ out[i].m6_bytes_out = mif6table[i].m6_bytes_out;
+ }
+ error = SYSCTL_OUT(req, out, sizeof(struct mif6_sctl) * MAXMIFS);
+ free(out, M_TEMP);
+ return (error);
+}
+SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, mif6table, CTLTYPE_OPAQUE | CTLFLAG_RD,
+ NULL, 0, sysctl_mif6table, "S,mif6_sctl[MAXMIFS]",
+ "IPv6 Multicast Interfaces (struct mif6_sctl[MAXMIFS], "
+ "netinet6/ip6_mroute.h)");
static struct mtx mif6_mtx;
#define MIF6_LOCK() mtx_lock(&mif6_mtx)
@@ -359,7 +383,7 @@ X_ip6_mrouter_set(struct socket *so, struct sockopt *sopt)
mifi_t mifi;
if (so != V_ip6_mrouter && sopt->sopt_name != MRT6_INIT)
- return (EACCES);
+ return (EPERM);
switch (sopt->sopt_name) {
case MRT6_INIT:
@@ -614,7 +638,7 @@ X_ip6_mrouter_done(void)
for (rte = rt->mf6c_stall; rte != NULL; ) {
struct rtdetq *n = rte->next;
- m_free(rte->m);
+ m_freem(rte->m);
free(rte, M_MRTABLE6);
rte = n;
}
@@ -1078,8 +1102,8 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m)
*/
if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
IP6STAT_INC(ip6s_cantforward);
- if (V_ip6_log_time + V_ip6_log_interval < time_second) {
- V_ip6_log_time = time_second;
+ if (V_ip6_log_time + V_ip6_log_interval < time_uptime) {
+ V_ip6_log_time = time_uptime;
log(LOG_DEBUG,
"cannot forward "
"from %s to %s nxt %d received on %s\n",
@@ -1128,7 +1152,7 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m)
* Pullup packet header if needed before storing it,
* as other references may modify it in the meantime.
*/
- if (mb0 && (M_HASCL(mb0) || mb0->m_len < sizeof(struct ip6_hdr)))
+ if (mb0 && (!M_WRITABLE(mb0) || mb0->m_len < sizeof(struct ip6_hdr)))
mb0 = m_pullup(mb0, sizeof(struct ip6_hdr));
if (mb0 == NULL) {
free(rte, M_MRTABLE6);
@@ -1397,7 +1421,7 @@ ip6_mdq(struct mbuf *m, struct ifnet *ifp, struct mf6c *rt)
mm = m_copy(m, 0, sizeof(struct ip6_hdr));
if (mm &&
- (M_HASCL(mm) ||
+ (!M_WRITABLE(mm) ||
mm->m_len < sizeof(struct ip6_hdr)))
mm = m_pullup(mm, sizeof(struct ip6_hdr));
if (mm == NULL)
@@ -1527,7 +1551,7 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m)
*/
mb_copy = m_copy(m, 0, M_COPYALL);
if (mb_copy &&
- (M_HASCL(mb_copy) || mb_copy->m_len < sizeof(struct ip6_hdr)))
+ (!M_WRITABLE(mb_copy) || mb_copy->m_len < sizeof(struct ip6_hdr)))
mb_copy = m_pullup(mb_copy, sizeof(struct ip6_hdr));
if (mb_copy == NULL) {
return;
@@ -1561,15 +1585,8 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m)
* If configured to loop back multicasts by default,
* loop back a copy now.
*/
- if (in6_mcast_loop) {
- struct sockaddr_in6 dst6;
-
- bzero(&dst6, sizeof(dst6));
- dst6.sin6_len = sizeof(struct sockaddr_in6);
- dst6.sin6_family = AF_INET6;
- dst6.sin6_addr = ip6->ip6_dst;
- ip6_mloopback(ifp, m, &dst6);
- }
+ if (in6_mcast_loop)
+ ip6_mloopback(ifp, m);
/*
* Put the packet into the sending queue of the outgoing interface
@@ -1583,10 +1600,13 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m)
dst6.sin6_len = sizeof(struct sockaddr_in6);
dst6.sin6_family = AF_INET6;
dst6.sin6_addr = ip6->ip6_dst;
+
+ IP_PROBE(send, NULL, NULL, ip6, ifp, NULL, ip6);
/*
* We just call if_output instead of nd6_output here, since
* we need no ND for a multicast forwarded packet...right?
*/
+ m_clrprotoflags(m); /* Avoid confusing lower layers. */
error = (*ifp->if_output)(ifp, mb_copy,
(struct sockaddr *)&dst6, NULL);
MRT6_DLOG(DEBUG_XMIT, "mif %u err %d",
@@ -1626,11 +1646,10 @@ register_send(struct ip6_hdr *ip6, struct mif6 *mif, struct mbuf *m)
ip6_sprintf(ip6bufd, &ip6->ip6_dst));
PIM6STAT_INC(pim6s_snd_registers);
- /* Make a copy of the packet to send to the user level process */
- MGETHDR(mm, M_DONTWAIT, MT_HEADER);
+ /* Make a copy of the packet to send to the user level process. */
+ mm = m_gethdr(M_NOWAIT, MT_DATA);
if (mm == NULL)
return (ENOBUFS);
- mm->m_pkthdr.rcvif = NULL;
mm->m_data += max_linkhdr;
mm->m_len = sizeof(struct ip6_hdr);
@@ -1949,4 +1968,4 @@ static moduledata_t ip6_mroutemod = {
0
};
-DECLARE_MODULE(ip6_mroute, ip6_mroutemod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+DECLARE_MODULE(ip6_mroute, ip6_mroutemod, SI_SUB_PROTO_MC, SI_ORDER_ANY);
diff --git a/freebsd/sys/netinet6/ip6_mroute.h b/freebsd/sys/netinet6/ip6_mroute.h
index 33b41310..51e1d496 100644
--- a/freebsd/sys/netinet6/ip6_mroute.h
+++ b/freebsd/sys/netinet6/ip6_mroute.h
@@ -121,19 +121,19 @@ struct mf6cctl {
* The kernel's multicast routing statistics.
*/
struct mrt6stat {
- u_quad_t mrt6s_mfc_lookups; /* # forw. cache hash table hits */
- u_quad_t mrt6s_mfc_misses; /* # forw. cache hash table misses */
- u_quad_t mrt6s_upcalls; /* # calls to multicast routing daemon */
- u_quad_t mrt6s_no_route; /* no route for packet's origin */
- u_quad_t mrt6s_bad_tunnel; /* malformed tunnel options */
- u_quad_t mrt6s_cant_tunnel; /* no room for tunnel options */
- u_quad_t mrt6s_wrong_if; /* arrived on wrong interface */
- u_quad_t mrt6s_upq_ovflw; /* upcall Q overflow */
- u_quad_t mrt6s_cache_cleanups; /* # entries with no upcalls */
- u_quad_t mrt6s_drop_sel; /* pkts dropped selectively */
- u_quad_t mrt6s_q_overflow; /* pkts dropped - Q overflow */
- u_quad_t mrt6s_pkt2large; /* pkts dropped - size > BKT SIZE */
- u_quad_t mrt6s_upq_sockfull; /* upcalls dropped - socket full */
+ uint64_t mrt6s_mfc_lookups; /* # forw. cache hash table hits */
+ uint64_t mrt6s_mfc_misses; /* # forw. cache hash table misses */
+ uint64_t mrt6s_upcalls; /* # calls to multicast routing daemon */
+ uint64_t mrt6s_no_route; /* no route for packet's origin */
+ uint64_t mrt6s_bad_tunnel; /* malformed tunnel options */
+ uint64_t mrt6s_cant_tunnel; /* no room for tunnel options */
+ uint64_t mrt6s_wrong_if; /* arrived on wrong interface */
+ uint64_t mrt6s_upq_ovflw; /* upcall Q overflow */
+ uint64_t mrt6s_cache_cleanups; /* # entries with no upcalls */
+ uint64_t mrt6s_drop_sel; /* pkts dropped selectively */
+ uint64_t mrt6s_q_overflow; /* pkts dropped - Q overflow */
+ uint64_t mrt6s_pkt2large; /* pkts dropped - size > BKT SIZE */
+ uint64_t mrt6s_upq_sockfull; /* upcalls dropped - socket full */
};
#ifdef MRT6_OINIT
@@ -194,6 +194,20 @@ struct sioc_mif_req6 {
u_quad_t obytes; /* Output byte count on mif */
};
+/*
+ * Structure to export 'struct mif6' to userland via sysctl.
+ */
+struct mif6_sctl {
+ u_char m6_flags; /* MIFF_ flags defined above */
+ u_int m6_rate_limit; /* max rate */
+ struct in6_addr m6_lcl_addr; /* local interface address */
+ uint32_t m6_ifp; /* interface index */
+ u_quad_t m6_pkt_in; /* # pkts in on interface */
+ u_quad_t m6_pkt_out; /* # pkts out on interface */
+ u_quad_t m6_bytes_in; /* # bytes in on interface */
+ u_quad_t m6_bytes_out; /* # bytes out on interface */
+};
+
#if defined(_KERNEL) || defined(KERNEL)
/*
* The kernel's multicast-interface structure.
diff --git a/freebsd/sys/netinet6/ip6_output.c b/freebsd/sys/netinet6/ip6_output.c
index 95231631..d3dc973e 100644
--- a/freebsd/sys/netinet6/ip6_output.c
+++ b/freebsd/sys/netinet6/ip6_output.c
@@ -67,10 +67,10 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipfw.h>
#include <rtems/bsd/local/opt_ipsec.h>
#include <rtems/bsd/local/opt_sctp.h>
#include <rtems/bsd/local/opt_route.h>
+#include <rtems/bsd/local/opt_rss.h>
#include <rtems/bsd/sys/param.h>
#include <sys/kernel.h>
@@ -88,14 +88,17 @@ __FBSDID("$FreeBSD$");
#include <machine/in_cksum.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/netisr.h>
#include <net/route.h>
#include <net/pfil.h>
+#include <net/rss_config.h>
#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/ip_var.h>
+#include <netinet6/in6_fib.h>
#include <netinet6/in6_var.h>
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
@@ -103,6 +106,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/in_pcb.h>
#include <netinet/tcp_var.h>
#include <netinet6/nd6.h>
+#include <netinet6/in6_rss.h>
#ifdef IPSEC
#include <netipsec/ipsec.h>
@@ -132,6 +136,8 @@ struct ip6_exthdrs {
struct mbuf *ip6e_dest2;
};
+static MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options");
+
static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **,
struct ucred *, int);
static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *,
@@ -145,8 +151,12 @@ static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
struct ip6_frag **);
static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
-static int ip6_getpmtu(struct route_in6 *, struct route_in6 *,
- struct ifnet *, struct in6_addr *, u_long *, int *, u_int);
+static int ip6_getpmtu(struct route_in6 *, int,
+ struct ifnet *, const struct in6_addr *, u_long *, int *, u_int,
+ u_int);
+static int ip6_calcmtu(struct ifnet *, const struct in6_addr *, u_long,
+ u_long *, int *, u_int);
+static int ip6_getpmtu_ctl(u_int, const struct in6_addr *, u_long *);
static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
@@ -186,7 +196,7 @@ static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
}\
} while (/*CONSTCOND*/ 0)
-static void
+void
in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset)
{
u_short csum;
@@ -198,8 +208,8 @@ in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset)
if (offset + sizeof(u_short) > m->m_len) {
printf("%s: delayed m_pullup, m->len: %d plen %u off %u "
- "csum_flags=0x%04x\n", __func__, m->m_len, plen, offset,
- m->m_pkthdr.csum_flags);
+ "csum_flags=%b\n", __func__, m->m_len, plen, offset,
+ (int)m->m_pkthdr.csum_flags, CSUM_BITS);
/*
* XXX this should not happen, but if it does, the correct
* behavior may be to insert the checksum in the appropriate
@@ -210,6 +220,64 @@ in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset)
*(u_short *)(m->m_data + offset) = csum;
}
+int
+ip6_fragment(struct ifnet *ifp, struct mbuf *m0, int hlen, u_char nextproto,
+ int mtu, uint32_t id)
+{
+ struct mbuf *m, **mnext, *m_frgpart;
+ struct ip6_hdr *ip6, *mhip6;
+ struct ip6_frag *ip6f;
+ int off;
+ int error;
+ int tlen = m0->m_pkthdr.len;
+
+ m = m0;
+ ip6 = mtod(m, struct ip6_hdr *);
+ mnext = &m->m_nextpkt;
+
+ for (off = hlen; off < tlen; off += mtu) {
+ m = m_gethdr(M_NOWAIT, MT_DATA);
+ if (!m) {
+ IP6STAT_INC(ip6s_odropped);
+ return (ENOBUFS);
+ }
+ m->m_flags = m0->m_flags & M_COPYFLAGS;
+ *mnext = m;
+ mnext = &m->m_nextpkt;
+ m->m_data += max_linkhdr;
+ mhip6 = mtod(m, struct ip6_hdr *);
+ *mhip6 = *ip6;
+ m->m_len = sizeof(*mhip6);
+ error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
+ if (error) {
+ IP6STAT_INC(ip6s_odropped);
+ return (error);
+ }
+ ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
+ if (off + mtu >= tlen)
+ mtu = tlen - off;
+ else
+ ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
+ mhip6->ip6_plen = htons((u_short)(mtu + hlen +
+ sizeof(*ip6f) - sizeof(struct ip6_hdr)));
+ if ((m_frgpart = m_copy(m0, off, mtu)) == NULL) {
+ IP6STAT_INC(ip6s_odropped);
+ return (ENOBUFS);
+ }
+ m_cat(m, m_frgpart);
+ m->m_pkthdr.len = mtu + hlen + sizeof(*ip6f);
+ m->m_pkthdr.fibnum = m0->m_pkthdr.fibnum;
+ m->m_pkthdr.rcvif = NULL;
+ ip6f->ip6f_reserved = 0;
+ ip6f->ip6f_ident = id;
+ ip6f->ip6f_nxt = nextproto;
+ IP6STAT_INC(ip6s_ofragments);
+ in6_ifstat_inc(ifp, ifs6_out_fragcreat);
+ }
+
+ return (0);
+}
+
/*
* IP6 output. The packet in mbuf chain m contains a skeletal IP6
* header (with pri, len, nxt, hlim, src, dst).
@@ -220,22 +288,25 @@ in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset)
* skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL,
* then result of route lookup is stored in ro->ro_rt.
*
- * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
+ * type of "mtu": rt_mtu is u_long, ifnet.ifr_mtu is int, and
* nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one,
- * which is rt_rmx.rmx_mtu.
+ * which is rt_mtu.
*
* ifpp - XXX: just for statistics
*/
+/*
+ * XXX TODO: no flowid is assigned for outbound flows?
+ */
int
ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
struct ifnet **ifpp, struct inpcb *inp)
{
- struct ip6_hdr *ip6, *mhip6;
+ struct ip6_hdr *ip6;
struct ifnet *ifp, *origifp;
struct mbuf *m = m0;
struct mbuf *mprev = NULL;
- int hlen, tlen, len, off;
+ int hlen, tlen, len;
struct route_in6 ip6route;
struct rtentry *rt = NULL;
struct sockaddr_in6 *dst, src_sa, dst_sa;
@@ -246,31 +317,25 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
int alwaysfrag, dontfrag;
u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
struct ip6_exthdrs exthdrs;
- struct in6_addr finaldst, src0, dst0;
+ struct in6_addr src0, dst0;
u_int32_t zone;
struct route_in6 *ro_pmtu = NULL;
int hdrsplit = 0;
- int needipsec = 0;
int sw_csum, tso;
-#ifdef IPSEC
- struct ipsec_output_state state;
- struct ip6_rthdr *rh = NULL;
- int needipsectun = 0;
- int segleft_org = 0;
- struct secpolicy *sp = NULL;
-#endif /* IPSEC */
+ int needfiblookup;
+ uint32_t fibnum;
struct m_tag *fwd_tag = NULL;
+ uint32_t id;
- ip6 = mtod(m, struct ip6_hdr *);
- if (ip6 == NULL) {
- printf ("ip6 is NULL");
- goto bad;
- }
-
- if (inp != NULL)
+ if (inp != NULL) {
M_SETFIB(m, inp->inp_inc.inc_fibnum);
+ if ((flags & IP_NODEFAULTFLOWID) == 0) {
+ /* unconditionally set flowid */
+ m->m_pkthdr.flowid = inp->inp_flowid;
+ M_HASHTYPE_SET(m, inp->inp_flowtype);
+ }
+ }
- finaldst = ip6->ip6_dst;
bzero(&exthdrs, sizeof(exthdrs));
if (opt) {
/* Hop-by-Hop options header */
@@ -299,27 +364,14 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
/*
* IPSec checking which handles several cases.
* FAST IPSEC: We re-injected the packet.
+ * XXX: need scope argument.
*/
- switch(ip6_ipsec_output(&m, inp, &flags, &error, &ifp, &sp))
+ switch(ip6_ipsec_output(&m, inp, &error))
{
case 1: /* Bad packet */
goto freehdrs;
- case -1: /* Do IPSec */
- needipsec = 1;
- /*
- * Do delayed checksums now, as we may send before returning.
- */
- if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
- plen = m->m_pkthdr.len - sizeof(*ip6);
- in6_delayed_cksum(m, plen, sizeof(struct ip6_hdr));
- m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
- }
-#ifdef SCTP
- if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) {
- sctp_delayed_cksum(m, sizeof(struct ip6_hdr));
- m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
- }
-#endif
+ case -1: /* IPSec done */
+ goto done;
case 0: /* No IPSec */
default:
break;
@@ -339,15 +391,15 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
optlen += exthdrs.ip6e_rthdr->m_len;
unfragpartlen = optlen + sizeof(struct ip6_hdr);
- /* NOTE: we don't add AH/ESP length here. do that later. */
+ /* NOTE: we don't add AH/ESP length here (done in ip6_ipsec_output) */
if (exthdrs.ip6e_dest2)
optlen += exthdrs.ip6e_dest2->m_len;
/*
- * If we need IPsec, or there is at least one extension header,
+ * If there is at least one extension header,
* separate IP6 header from the payload.
*/
- if ((needipsec || optlen) && !hdrsplit) {
+ if (optlen && !hdrsplit) {
if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
m = NULL;
goto freehdrs;
@@ -356,7 +408,6 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
hdrsplit++;
}
- /* adjust pointer */
ip6 = mtod(m, struct ip6_hdr *);
/* adjust mbuf packet header length */
@@ -422,72 +473,6 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
IPPROTO_ROUTING);
-#ifdef IPSEC
- if (!needipsec)
- goto skip_ipsec2;
-
- /*
- * pointers after IPsec headers are not valid any more.
- * other pointers need a great care too.
- * (IPsec routines should not mangle mbufs prior to AH/ESP)
- */
- exthdrs.ip6e_dest2 = NULL;
-
- if (exthdrs.ip6e_rthdr) {
- rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
- segleft_org = rh->ip6r_segleft;
- rh->ip6r_segleft = 0;
- }
-
- bzero(&state, sizeof(state));
- state.m = m;
- error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
- &needipsectun);
- m = state.m;
- if (error == EJUSTRETURN) {
- /*
- * We had a SP with a level of 'use' and no SA. We
- * will just continue to process the packet without
- * IPsec processing.
- */
- ;
- } else if (error) {
- /* mbuf is already reclaimed in ipsec6_output_trans. */
- m = NULL;
- switch (error) {
- case EHOSTUNREACH:
- case ENETUNREACH:
- case EMSGSIZE:
- case ENOBUFS:
- case ENOMEM:
- break;
- default:
- printf("[%s:%d] (ipsec): error code %d\n",
- __func__, __LINE__, error);
- /* FALLTHROUGH */
- case ENOENT:
- /* don't show these error codes to the user */
- error = 0;
- break;
- }
- goto bad;
- } else if (!needipsectun) {
- /*
- * In the FAST IPSec case we have already
- * re-injected the packet and it has been freed
- * by the ipsec_done() function. So, just clean
- * up after ourselves.
- */
- m = NULL;
- goto done;
- }
- if (exthdrs.ip6e_rthdr) {
- /* ah6_output doesn't modify mbuf chain */
- rh->ip6r_segleft = segleft_org;
- }
-skip_ipsec2:;
-#endif /* IPSEC */
-
/*
* If there is a routing header, discard the packet.
*/
@@ -514,29 +499,20 @@ skip_ipsec2:;
/*
* Route packet.
*/
- if (ro == 0) {
+ if (ro == NULL) {
ro = &ip6route;
bzero((caddr_t)ro, sizeof(*ro));
- }
+ } else
+ ro->ro_flags |= RT_LLE_CACHE;
ro_pmtu = ro;
if (opt && opt->ip6po_rthdr)
ro = &opt->ip6po_route;
dst = (struct sockaddr_in6 *)&ro->ro_dst;
#ifdef FLOWTABLE
- if (ro->ro_rt == NULL) {
- struct flentry *fle;
-
- /*
- * The flow table returns route entries valid for up to 30
- * seconds; we rely on the remainder of ip_output() taking no
- * longer than that long for the stability of ro_rt. The
- * flow ID assignment must have happened before this point.
- */
- fle = flowtable_lookup_mbuf(V_ip6_ft, m, AF_INET6);
- if (fle != NULL)
- flow_to_route_in6(fle, ro);
- }
+ if (ro->ro_rt == NULL)
+ (void )flowtable_lookup(AF_INET6, m, (struct route *)ro);
#endif
+ fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m);
again:
/*
* if specified, try to fill in the traffic class field.
@@ -563,82 +539,18 @@ again:
else
ip6->ip6_hlim = V_ip6_defmcasthlim;
}
-
-#ifdef IPSEC
/*
- * We may re-inject packets into the stack here.
+ * Validate route against routing table additions;
+ * a better/more specific route might have been added.
+ * Make sure address family is set in route.
*/
- if (needipsec && needipsectun) {
- struct ipsec_output_state state;
-
- /*
- * All the extension headers will become inaccessible
- * (since they can be encrypted).
- * Don't panic, we need no more updates to extension headers
- * on inner IPv6 packet (since they are now encapsulated).
- *
- * IPv6 [ESP|AH] IPv6 [extension headers] payload
- */
- bzero(&exthdrs, sizeof(exthdrs));
- exthdrs.ip6e_ip6 = m;
-
- bzero(&state, sizeof(state));
- state.m = m;
- state.ro = (struct route *)ro;
- state.dst = (struct sockaddr *)dst;
-
- error = ipsec6_output_tunnel(&state, sp, flags);
-
- m = state.m;
- ro = (struct route_in6 *)state.ro;
- dst = (struct sockaddr_in6 *)state.dst;
- if (error == EJUSTRETURN) {
- /*
- * We had a SP with a level of 'use' and no SA. We
- * will just continue to process the packet without
- * IPsec processing.
- */
- ;
- } else if (error) {
- /* mbuf is already reclaimed in ipsec6_output_tunnel. */
- m0 = m = NULL;
- m = NULL;
- switch (error) {
- case EHOSTUNREACH:
- case ENETUNREACH:
- case EMSGSIZE:
- case ENOBUFS:
- case ENOMEM:
- break;
- default:
- printf("[%s:%d] (ipsec): error code %d\n",
- __func__, __LINE__, error);
- /* FALLTHROUGH */
- case ENOENT:
- /* don't show these error codes to the user */
- error = 0;
- break;
- }
- goto bad;
- } else {
- /*
- * In the FAST IPSec case we have already
- * re-injected the packet and it has been freed
- * by the ipsec_done() function. So, just clean
- * up after ourselves.
- */
- m = NULL;
- goto done;
- }
-
- exthdrs.ip6e_ip6 = m;
+ if (inp) {
+ ro->ro_dst.sin6_family = AF_INET6;
+ RT_VALIDATE((struct route *)ro, &inp->inp_rt_cookie, fibnum);
}
-#endif /* IPSEC */
-
- /* adjust pointer */
- ip6 = mtod(m, struct ip6_hdr *);
-
- if (ro->ro_rt && fwd_tag == NULL) {
+ if (ro->ro_rt && fwd_tag == NULL && (ro->ro_rt->rt_flags & RTF_UP) &&
+ ro->ro_dst.sin6_family == AF_INET6 &&
+ IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, &ip6->ip6_dst)) {
rt = ro->ro_rt;
ifp = ro->ro_rt->rt_ifp;
} else {
@@ -649,7 +561,7 @@ again:
dst_sa.sin6_addr = ip6->ip6_dst;
}
error = in6_selectroute_fib(&dst_sa, opt, im6o, ro, &ifp,
- &rt, inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m));
+ &rt, fibnum);
if (error != 0) {
if (ifp != NULL)
in6_ifstat_inc(ifp, ifs6_out_discard);
@@ -673,7 +585,7 @@ again:
}
if (rt != NULL) {
ia = (struct in6_ifaddr *)(rt->rt_ifa);
- rt->rt_use++;
+ counter_u64_add(rt->rt_pksent, 1);
}
@@ -758,7 +670,7 @@ again:
* thus deferring a hash lookup and lock acquisition
* at the expense of an m_copym().
*/
- ip6_mloopback(ifp, m, dst);
+ ip6_mloopback(ifp, m);
} else {
/*
* If we are acting as a multicast router, perform
@@ -776,9 +688,7 @@ again:
/*
* XXX: ip6_mforward expects that rcvif is NULL
* when it is called from the originating path.
- * However, it is not always the case, since
- * some versions of MGETHDR() does not
- * initialize the field.
+ * However, it may not always be the case.
*/
m->m_pkthdr.rcvif = NULL;
if (ip6_mforward(ip6, ifp, m) != 0) {
@@ -810,8 +720,8 @@ again:
*ifpp = ifp;
/* Determine path MTU. */
- if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
- &alwaysfrag, inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m))) != 0)
+ if ((error = ip6_getpmtu(ro_pmtu, ro != ro_pmtu, ifp, &ip6->ip6_dst,
+ &mtu, &alwaysfrag, fibnum, *nexthdrp)) != 0)
goto bad;
/*
@@ -887,8 +797,10 @@ again:
error = pfil_run_hooks(&V_inet6_pfil_hook, &m, ifp, PFIL_OUT, inp);
if (error != 0 || m == NULL)
goto done;
+ /* adjust pointer */
ip6 = mtod(m, struct ip6_hdr *);
+ needfiblookup = 0;
/* See if destination IP address was changed by packet filter. */
if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
m->m_flags |= M_SKIP_FIREWALL;
@@ -908,9 +820,20 @@ again:
#endif
error = netisr_queue(NETISR_IPV6, m);
goto done;
- } else
- goto again; /* Redo the routing table lookup. */
+ } else {
+ RO_RTFREE(ro);
+ needfiblookup = 1; /* Redo the routing table lookup. */
+ }
}
+ /* See if fib was changed by packet filter. */
+ if (fibnum != M_GETFIB(m)) {
+ m->m_flags |= M_SKIP_FIREWALL;
+ fibnum = M_GETFIB(m);
+ RO_RTFREE(ro);
+ needfiblookup = 1;
+ }
+ if (needfiblookup)
+ goto again;
/* See if local, if yes, send it to netisr. */
if (m->m_flags & M_FASTFWD_OURS) {
@@ -1018,11 +941,13 @@ passout:
ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
if (ia6) {
/* Record statistics for this interface address. */
- ia6->ia_ifa.if_opackets++;
- ia6->ia_ifa.if_obytes += m->m_pkthdr.len;
+ counter_u64_add(ia6->ia_ifa.ifa_opackets, 1);
+ counter_u64_add(ia6->ia_ifa.ifa_obytes,
+ m->m_pkthdr.len);
ifa_free(&ia6->ia_ifa);
}
- error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
+ error = nd6_output_ifp(ifp, origifp, m, dst,
+ (struct route *)ro);
goto done;
}
@@ -1040,13 +965,8 @@ passout:
in6_ifstat_inc(ifp, ifs6_out_fragfail);
goto bad;
} else {
- struct mbuf **mnext, *m_frgpart;
- struct ip6_frag *ip6f;
- u_int32_t id = htonl(ip6_randomid());
u_char nextproto;
- int qslots = ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len;
-
/*
* Too large for the destination or interface;
* fragment if possible.
@@ -1064,18 +984,6 @@ passout:
}
/*
- * Verify that we have any chance at all of being able to queue
- * the packet or packet fragments
- */
- if (qslots <= 0 || ((u_int)qslots * (mtu - hlen)
- < tlen /* - hlen */)) {
- error = ENOBUFS;
- IP6STAT_INC(ip6s_odropped);
- goto bad;
- }
-
-
- /*
* If the interface will not calculate checksums on
* fragmented packets, then do it here.
* XXX-BZ handle the hw offloading case. Need flags.
@@ -1090,8 +998,6 @@ passout:
m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
}
#endif
- mnext = &m->m_nextpkt;
-
/*
* Change the next header field of the last header in the
* unfragmentable part.
@@ -1116,47 +1022,9 @@ passout:
* chain.
*/
m0 = m;
- for (off = hlen; off < tlen; off += len) {
- MGETHDR(m, M_DONTWAIT, MT_HEADER);
- if (!m) {
- error = ENOBUFS;
- IP6STAT_INC(ip6s_odropped);
- goto sendorfree;
- }
- m->m_pkthdr.rcvif = NULL;
- m->m_flags = m0->m_flags & M_COPYFLAGS; /* incl. FIB */
- *mnext = m;
- mnext = &m->m_nextpkt;
- m->m_data += max_linkhdr;
- mhip6 = mtod(m, struct ip6_hdr *);
- *mhip6 = *ip6;
- m->m_len = sizeof(*mhip6);
- error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
- if (error) {
- IP6STAT_INC(ip6s_odropped);
- goto sendorfree;
- }
- ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
- if (off + len >= tlen)
- len = tlen - off;
- else
- ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
- mhip6->ip6_plen = htons((u_short)(len + hlen +
- sizeof(*ip6f) - sizeof(struct ip6_hdr)));
- if ((m_frgpart = m_copy(m0, off, len)) == 0) {
- error = ENOBUFS;
- IP6STAT_INC(ip6s_odropped);
- goto sendorfree;
- }
- m_cat(m, m_frgpart);
- m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
- m->m_pkthdr.rcvif = NULL;
- ip6f->ip6f_reserved = 0;
- ip6f->ip6f_ident = id;
- ip6f->ip6f_nxt = nextproto;
- IP6STAT_INC(ip6s_ofragments);
- in6_ifstat_inc(ifp, ifs6_out_fragcreat);
- }
+ id = htonl(ip6_randomid());
+ if ((error = ip6_fragment(ifp, m, hlen, nextproto, len, id)))
+ goto sendorfree;
in6_ifstat_inc(ifp, ifs6_out_fragok);
}
@@ -1174,10 +1042,12 @@ sendorfree:
if (error == 0) {
/* Record statistics for this interface address. */
if (ia) {
- ia->ia_ifa.if_opackets++;
- ia->ia_ifa.if_obytes += m->m_pkthdr.len;
+ counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
+ counter_u64_add(ia->ia_ifa.ifa_obytes,
+ m->m_pkthdr.len);
}
- error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
+ error = nd6_output_ifp(ifp, origifp, m, dst,
+ (struct route *)ro);
} else
m_freem(m);
}
@@ -1186,15 +1056,13 @@ sendorfree:
IP6STAT_INC(ip6s_fragmented);
done:
- if (ro == &ip6route)
+ /*
+ * Release the route if using our private route, or if
+ * (with flowtable) we don't have our own reference.
+ */
+ if (ro == &ip6route ||
+ (ro != NULL && ro->ro_flags & RT_NORTREF))
RO_RTFREE(ro);
- if (ro_pmtu == &ip6route)
- RO_RTFREE(ro_pmtu);
-#ifdef IPSEC
- if (sp != NULL)
- KEY_FREESP(&sp);
-#endif
-
return (error);
freehdrs:
@@ -1217,17 +1085,12 @@ ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen)
if (hlen > MCLBYTES)
return (ENOBUFS); /* XXX */
- MGET(m, M_DONTWAIT, MT_DATA);
- if (!m)
+ if (hlen > MLEN)
+ m = m_getcl(M_NOWAIT, MT_DATA, 0);
+ else
+ m = m_get(M_NOWAIT, MT_DATA);
+ if (m == NULL)
return (ENOBUFS);
-
- if (hlen > MLEN) {
- MCLGET(m, M_DONTWAIT);
- if ((m->m_flags & M_EXT) == 0) {
- m_free(m);
- return (ENOBUFS);
- }
- }
m->m_len = hlen;
if (hdr)
bcopy(hdr, mtod(m, caddr_t), hlen);
@@ -1254,9 +1117,9 @@ ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
* jumbo payload option, allocate a cluster to store the whole options.
* Otherwise, use it to store the options.
*/
- if (exthdrs->ip6e_hbh == 0) {
- MGET(mopt, M_DONTWAIT, MT_DATA);
- if (mopt == 0)
+ if (exthdrs->ip6e_hbh == NULL) {
+ mopt = m_get(M_NOWAIT, MT_DATA);
+ if (mopt == NULL)
return (ENOBUFS);
mopt->m_len = JUMBOOPTLEN;
optbuf = mtod(mopt, u_char *);
@@ -1287,15 +1150,8 @@ ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
* As a consequence, we must always prepare a cluster
* at this point.
*/
- MGET(n, M_DONTWAIT, MT_DATA);
- if (n) {
- MCLGET(n, M_DONTWAIT);
- if ((n->m_flags & M_EXT) == 0) {
- m_freem(n);
- n = NULL;
- }
- }
- if (!n)
+ n = m_getcl(M_NOWAIT, MT_DATA, 0);
+ if (n == NULL)
return (ENOBUFS);
n->m_len = oldoptlen + JUMBOOPTLEN;
bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
@@ -1342,8 +1198,8 @@ ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
if (hlen > sizeof(struct ip6_hdr)) {
n = m_copym(m0, sizeof(struct ip6_hdr),
- hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
- if (n == 0)
+ hlen - sizeof(struct ip6_hdr), M_NOWAIT);
+ if (n == NULL)
return (ENOBUFS);
m->m_next = n;
} else
@@ -1353,7 +1209,7 @@ ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
for (mlast = n; mlast->m_next; mlast = mlast->m_next)
;
- if ((mlast->m_flags & M_EXT) == 0 &&
+ if (M_WRITABLE(mlast) &&
M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
/* use the trailing space of the last mbuf for the fragment hdr */
*frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
@@ -1364,8 +1220,8 @@ ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
/* allocate a new mbuf for the fragment header */
struct mbuf *mfrg;
- MGET(mfrg, M_DONTWAIT, MT_DATA);
- if (mfrg == 0)
+ mfrg = m_get(M_NOWAIT, MT_DATA);
+ if (mfrg == NULL)
return (ENOBUFS);
mfrg->m_len = sizeof(struct ip6_frag);
*frghdrp = mtod(mfrg, struct ip6_frag *);
@@ -1375,35 +1231,105 @@ ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
return (0);
}
+/*
+ * Calculates IPv6 path mtu for destination @dst.
+ * Resulting MTU is stored in @mtup.
+ *
+ * Returns 0 on success.
+ */
static int
-ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
- struct ifnet *ifp, struct in6_addr *dst, u_long *mtup,
- int *alwaysfragp, u_int fibnum)
+ip6_getpmtu_ctl(u_int fibnum, const struct in6_addr *dst, u_long *mtup)
{
- u_int32_t mtu = 0;
- int alwaysfrag = 0;
- int error = 0;
+ struct nhop6_extended nh6;
+ struct in6_addr kdst;
+ uint32_t scopeid;
+ struct ifnet *ifp;
+ u_long mtu;
+ int error;
- if (ro_pmtu != ro) {
- /* The first hop and the final destination may differ. */
- struct sockaddr_in6 *sa6_dst =
- (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
- if (ro_pmtu->ro_rt &&
- ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
- !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
- RTFREE(ro_pmtu->ro_rt);
- ro_pmtu->ro_rt = (struct rtentry *)NULL;
- }
- if (ro_pmtu->ro_rt == NULL) {
+ in6_splitscope(dst, &kdst, &scopeid);
+ if (fib6_lookup_nh_ext(fibnum, &kdst, scopeid, NHR_REF, 0, &nh6) != 0)
+ return (EHOSTUNREACH);
+
+ ifp = nh6.nh_ifp;
+ mtu = nh6.nh_mtu;
+
+ error = ip6_calcmtu(ifp, dst, mtu, mtup, NULL, 0);
+ fib6_free_nh_ext(fibnum, &nh6);
+
+ return (error);
+}
+
+/*
+ * Calculates IPv6 path MTU for @dst based on transmit @ifp,
+ * and cached data in @ro_pmtu.
+ * MTU from (successful) route lookup is saved (along with dst)
+ * inside @ro_pmtu to avoid subsequent route lookups after packet
+ * filter processing.
+ *
+ * Stores mtu and always-frag value into @mtup and @alwaysfragp.
+ * Returns 0 on success.
+ */
+static int
+ip6_getpmtu(struct route_in6 *ro_pmtu, int do_lookup,
+ struct ifnet *ifp, const struct in6_addr *dst, u_long *mtup,
+ int *alwaysfragp, u_int fibnum, u_int proto)
+{
+ struct nhop6_basic nh6;
+ struct in6_addr kdst;
+ uint32_t scopeid;
+ struct sockaddr_in6 *sa6_dst;
+ u_long mtu;
+
+ mtu = 0;
+ if (do_lookup) {
+
+ /*
+ * Here ro_pmtu has final destination address, while
+ * ro might represent immediate destination.
+ * Use ro_pmtu destination since mtu might differ.
+ */
+ sa6_dst = (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
+ if (!IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))
+ ro_pmtu->ro_mtu = 0;
+
+ if (ro_pmtu->ro_mtu == 0) {
bzero(sa6_dst, sizeof(*sa6_dst));
sa6_dst->sin6_family = AF_INET6;
sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
sa6_dst->sin6_addr = *dst;
- in6_rtalloc(ro_pmtu, fibnum);
+ in6_splitscope(dst, &kdst, &scopeid);
+ if (fib6_lookup_nh_basic(fibnum, &kdst, scopeid, 0, 0,
+ &nh6) == 0)
+ ro_pmtu->ro_mtu = nh6.nh_mtu;
}
+
+ mtu = ro_pmtu->ro_mtu;
}
- if (ro_pmtu->ro_rt) {
+
+ if (ro_pmtu->ro_rt)
+ mtu = ro_pmtu->ro_rt->rt_mtu;
+
+ return (ip6_calcmtu(ifp, dst, mtu, mtup, alwaysfragp, proto));
+}
+
+/*
+ * Calculate MTU based on transmit @ifp, route mtu @rt_mtu and
+ * hostcache data for @dst.
+ * Stores mtu and always-frag value into @mtup and @alwaysfragp.
+ *
+ * Returns 0 on success.
+ */
+static int
+ip6_calcmtu(struct ifnet *ifp, const struct in6_addr *dst, u_long rt_mtu,
+ u_long *mtup, int *alwaysfragp, u_int proto)
+{
+ u_long mtu = 0;
+ int alwaysfrag = 0;
+ int error = 0;
+
+ if (rt_mtu > 0) {
u_int32_t ifmtu;
struct in_conninfo inc;
@@ -1411,14 +1337,16 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
inc.inc_flags |= INC_ISIPV6;
inc.inc6_faddr = *dst;
- if (ifp == NULL)
- ifp = ro_pmtu->ro_rt->rt_ifp;
ifmtu = IN6_LINKMTU(ifp);
- mtu = tcp_hc_getmtu(&inc);
+
+ /* TCP is known to react to pmtu changes so skip hc */
+ if (proto != IPPROTO_TCP)
+ mtu = tcp_hc_getmtu(&inc);
+
if (mtu)
- mtu = min(mtu, ro_pmtu->ro_rt->rt_rmx.rmx_mtu);
+ mtu = min(mtu, rt_mtu);
else
- mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
+ mtu = rt_mtu;
if (mtu == 0)
mtu = ifmtu;
else if (mtu < IPV6_MMTU) {
@@ -1432,17 +1360,6 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
*/
alwaysfrag = 1;
mtu = IPV6_MMTU;
- } else if (mtu > ifmtu) {
- /*
- * The MTU on the route is larger than the MTU on
- * the interface! This shouldn't happen, unless the
- * MTU of the interface has been changed after the
- * interface was brought up. Change the MTU in the
- * route to match the interface MTU (as long as the
- * field isn't locked).
- */
- mtu = ifmtu;
- ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
}
} else if (ifp) {
mtu = IN6_LINKMTU(ifp);
@@ -1468,6 +1385,10 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
int level, op, optname;
int optlen;
struct thread *td;
+#ifdef RSS
+ uint32_t rss_bucket;
+ int retval;
+#endif
level = sopt->sopt_level;
op = sopt->sopt_dir;
@@ -1561,16 +1482,23 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
/* FALLTHROUGH */
case IPV6_UNICAST_HOPS:
case IPV6_HOPLIMIT:
- case IPV6_FAITH:
case IPV6_RECVPKTINFO:
case IPV6_RECVHOPLIMIT:
case IPV6_RECVRTHDR:
case IPV6_RECVPATHMTU:
case IPV6_RECVTCLASS:
+ case IPV6_RECVFLOWID:
+#ifdef RSS
+ case IPV6_RECVRSSBUCKETID:
+#endif
case IPV6_V6ONLY:
case IPV6_AUTOFLOWLABEL:
case IPV6_BINDANY:
+ case IPV6_BINDMULTI:
+#ifdef RSS
+ case IPV6_RSS_LISTEN_BUCKET:
+#endif
if (optname == IPV6_BINDANY && td != NULL) {
error = priv_check(td,
PRIV_NETINET_BINDANY);
@@ -1620,6 +1548,16 @@ do { \
} while (/*CONSTCOND*/ 0)
#define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0)
+#define OPTSET2(bit, val) do { \
+ INP_WLOCK(in6p); \
+ if (val) \
+ in6p->inp_flags2 |= bit; \
+ else \
+ in6p->inp_flags2 &= ~bit; \
+ INP_WUNLOCK(in6p); \
+} while (0)
+#define OPTBIT2(bit) (in6p->inp_flags2 & (bit) ? 1 : 0)
+
case IPV6_RECVPKTINFO:
/* cannot mix with RFC2292 */
if (OPTBIT(IN6P_RFC2292)) {
@@ -1691,10 +1629,6 @@ do { \
OPTSET(IN6P_RTHDR);
break;
- case IPV6_FAITH:
- OPTSET(INP_FAITH);
- break;
-
case IPV6_RECVPATHMTU:
/*
* We ignore this option for TCP
@@ -1706,6 +1640,16 @@ do { \
OPTSET(IN6P_MTU);
break;
+ case IPV6_RECVFLOWID:
+ OPTSET2(INP_RECVFLOWID, optval);
+ break;
+
+#ifdef RSS
+ case IPV6_RECVRSSBUCKETID:
+ OPTSET2(INP_RECVRSSBUCKETID, optval);
+ break;
+#endif
+
case IPV6_V6ONLY:
/*
* make setsockopt(IPV6_V6ONLY)
@@ -1738,6 +1682,21 @@ do { \
case IPV6_BINDANY:
OPTSET(INP_BINDANY);
break;
+
+ case IPV6_BINDMULTI:
+ OPTSET2(INP_BINDMULTI, optval);
+ break;
+#ifdef RSS
+ case IPV6_RSS_LISTEN_BUCKET:
+ if ((optval >= 0) &&
+ (optval < rss_getnumbuckets())) {
+ in6p->inp_rss_listen_bucket = optval;
+ OPTSET2(INP_RSS_BUCKET_SET, 1);
+ } else {
+ error = EINVAL;
+ }
+ break;
+#endif
}
break;
@@ -1947,12 +1906,19 @@ do { \
case IPV6_RECVRTHDR:
case IPV6_RECVPATHMTU:
- case IPV6_FAITH:
case IPV6_V6ONLY:
case IPV6_PORTRANGE:
case IPV6_RECVTCLASS:
case IPV6_AUTOFLOWLABEL:
case IPV6_BINDANY:
+ case IPV6_FLOWID:
+ case IPV6_FLOWTYPE:
+ case IPV6_RECVFLOWID:
+#ifdef RSS
+ case IPV6_RSSBUCKETID:
+ case IPV6_RECVRSSBUCKETID:
+#endif
+ case IPV6_BINDMULTI:
switch (optname) {
case IPV6_RECVHOPOPTS:
@@ -1987,10 +1953,6 @@ do { \
optval = OPTBIT(IN6P_MTU);
break;
- case IPV6_FAITH:
- optval = OPTBIT(INP_FAITH);
- break;
-
case IPV6_V6ONLY:
optval = OPTBIT(IN6P_IPV6_V6ONLY);
break;
@@ -2018,6 +1980,39 @@ do { \
case IPV6_BINDANY:
optval = OPTBIT(INP_BINDANY);
break;
+
+ case IPV6_FLOWID:
+ optval = in6p->inp_flowid;
+ break;
+
+ case IPV6_FLOWTYPE:
+ optval = in6p->inp_flowtype;
+ break;
+
+ case IPV6_RECVFLOWID:
+ optval = OPTBIT2(INP_RECVFLOWID);
+ break;
+#ifdef RSS
+ case IPV6_RSSBUCKETID:
+ retval =
+ rss_hash2bucket(in6p->inp_flowid,
+ in6p->inp_flowtype,
+ &rss_bucket);
+ if (retval == 0)
+ optval = rss_bucket;
+ else
+ error = EINVAL;
+ break;
+
+ case IPV6_RECVRSSBUCKETID:
+ optval = OPTBIT2(INP_RECVRSSBUCKETID);
+ break;
+#endif
+
+ case IPV6_BINDMULTI:
+ optval = OPTBIT2(INP_BINDMULTI);
+ break;
+
}
if (error)
break;
@@ -2029,9 +2024,6 @@ do { \
{
u_long pmtu = 0;
struct ip6_mtuinfo mtuinfo;
- struct route_in6 sro;
-
- bzero(&sro, sizeof(sro));
if (!(so->so_state & SS_ISCONNECTED))
return (ENOTCONN);
@@ -2040,11 +2032,8 @@ do { \
* routing, or optional information to specify
* the outgoing interface.
*/
- error = ip6_getpmtu(&sro, NULL, NULL,
- &in6p->in6p_faddr, &pmtu, NULL,
- so->so_fibnum);
- if (sro.ro_rt)
- RTFREE(sro.ro_rt);
+ error = ip6_getpmtu_ctl(so->so_fibnum,
+ &in6p->in6p_faddr, &pmtu);
if (error)
break;
if (pmtu > IPV6_MAXPACKET)
@@ -2307,12 +2296,14 @@ ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
switch (optname) {
case IPV6_PKTINFO:
- if (pktopt && pktopt->ip6po_pktinfo)
- optdata = (void *)pktopt->ip6po_pktinfo;
- else {
+ optdata = (void *)&null_pktinfo;
+ if (pktopt && pktopt->ip6po_pktinfo) {
+ bcopy(pktopt->ip6po_pktinfo, &null_pktinfo,
+ sizeof(null_pktinfo));
+ in6_clearscope(&null_pktinfo.ipi6_addr);
+ } else {
/* XXX: we don't have to do this every time... */
bzero(&null_pktinfo, sizeof(null_pktinfo));
- optdata = (void *)&null_pktinfo;
}
optdatalen = sizeof(struct in6_pktinfo);
break;
@@ -2529,7 +2520,7 @@ int
ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
struct ip6_pktopts *stickyopt, struct ucred *cred, int uproto)
{
- struct cmsghdr *cm = 0;
+ struct cmsghdr *cm = NULL;
if (control == NULL || opt == NULL)
return (EINVAL);
@@ -2666,18 +2657,30 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
return (EINVAL);
}
-
+ if (IN6_IS_ADDR_MULTICAST(&pktinfo->ipi6_addr))
+ return (EINVAL);
/* validate the interface index if specified. */
- if (pktinfo->ipi6_ifindex > V_if_index ||
- pktinfo->ipi6_ifindex < 0) {
+ if (pktinfo->ipi6_ifindex > V_if_index)
return (ENXIO);
- }
if (pktinfo->ipi6_ifindex) {
ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
if (ifp == NULL)
return (ENXIO);
}
-
+ if (ifp != NULL && (ifp->if_afdata[AF_INET6] == NULL ||
+ (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) != 0))
+ return (ENETDOWN);
+
+ if (ifp != NULL &&
+ !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
+ struct in6_ifaddr *ia;
+
+ in6_setscope(&pktinfo->ipi6_addr, ifp, NULL);
+ ia = in6ifa_ifpwithaddr(ifp, &pktinfo->ipi6_addr);
+ if (ia == NULL)
+ return (EADDRNOTAVAIL);
+ ifa_free(&ia->ia_ifa);
+ }
/*
* We store the address anyway, and let in6_selectsrc()
* validate the specified address. This is because ipi6_addr
@@ -2987,7 +2990,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
* pointer that might NOT be &loif -- easier than replicating that code here.
*/
void
-ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst)
+ip6_mloopback(struct ifnet *ifp, struct mbuf *m)
{
struct mbuf *copym;
struct ip6_hdr *ip6;
@@ -3001,20 +3004,12 @@ ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst)
* is in an mbuf cluster, so that we can safely override the IPv6
* header portion later.
*/
- if ((copym->m_flags & M_EXT) != 0 ||
+ if (!M_WRITABLE(copym) ||
copym->m_len < sizeof(struct ip6_hdr)) {
copym = m_pullup(copym, sizeof(struct ip6_hdr));
if (copym == NULL)
return;
}
-
-#ifdef DIAGNOSTIC
- if (copym->m_len < sizeof(*ip6)) {
- m_freem(copym);
- return;
- }
-#endif
-
ip6 = mtod(copym, struct ip6_hdr *);
/*
* clear embedded scope identifiers if necessary.
@@ -3022,8 +3017,12 @@ ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst)
*/
in6_clearscope(&ip6->ip6_src);
in6_clearscope(&ip6->ip6_dst);
-
- (void)if_simloop(ifp, copym, dst->sin6_family, 0);
+ if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
+ copym->m_pkthdr.csum_flags |= CSUM_DATA_VALID_IPV6 |
+ CSUM_PSEUDO_HDR;
+ copym->m_pkthdr.csum_data = 0xffff;
+ }
+ if_simloop(ifp, copym, AF_INET6, 0);
}
/*
@@ -3037,13 +3036,13 @@ ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
ip6 = mtod(m, struct ip6_hdr *);
if (m->m_len > sizeof(*ip6)) {
- MGETHDR(mh, M_DONTWAIT, MT_HEADER);
- if (mh == 0) {
+ mh = m_gethdr(M_NOWAIT, MT_DATA);
+ if (mh == NULL) {
m_freem(m);
return ENOBUFS;
}
- M_MOVE_PKTHDR(mh, m);
- MH_ALIGN(mh, sizeof(*ip6));
+ m_move_pkthdr(mh, m);
+ M_ALIGN(mh, sizeof(*ip6));
m->m_len -= sizeof(*ip6);
m->m_data += sizeof(*ip6);
mh->m_next = m;
diff --git a/freebsd/sys/netinet6/ip6_var.h b/freebsd/sys/netinet6/ip6_var.h
index 4e8c42bd..e52a3206 100644
--- a/freebsd/sys/netinet6/ip6_var.h
+++ b/freebsd/sys/netinet6/ip6_var.h
@@ -99,6 +99,14 @@ struct ip6asfrag {
#define IP6_REASS_MBUF(ip6af) (*(struct mbuf **)&((ip6af)->ip6af_m))
/*
+ * IP6 reinjecting structure.
+ */
+struct ip6_direct_ctx {
+ uint32_t ip6dc_nxt; /* next header to process */
+ uint32_t ip6dc_off; /* offset to next header */
+};
+
+/*
* Structure attached to inpcb.in6p_moptions and
* passed to ip6_output when IPv6 multicast options are in use.
* This structure is lazy-allocated.
@@ -181,39 +189,39 @@ struct ip6_pktopts {
*/
struct ip6stat {
- u_quad_t ip6s_total; /* total packets received */
- u_quad_t ip6s_tooshort; /* packet too short */
- u_quad_t ip6s_toosmall; /* not enough data */
- u_quad_t ip6s_fragments; /* fragments received */
- u_quad_t ip6s_fragdropped; /* frags dropped(dups, out of space) */
- u_quad_t ip6s_fragtimeout; /* fragments timed out */
- u_quad_t ip6s_fragoverflow; /* fragments that exceeded limit */
- u_quad_t ip6s_forward; /* packets forwarded */
- u_quad_t ip6s_cantforward; /* packets rcvd for unreachable dest */
- u_quad_t ip6s_redirectsent; /* packets forwarded on same net */
- u_quad_t ip6s_delivered; /* datagrams delivered to upper level*/
- u_quad_t ip6s_localout; /* total ip packets generated here */
- u_quad_t ip6s_odropped; /* lost packets due to nobufs, etc. */
- u_quad_t ip6s_reassembled; /* total packets reassembled ok */
- u_quad_t ip6s_fragmented; /* datagrams successfully fragmented */
- u_quad_t ip6s_ofragments; /* output fragments created */
- u_quad_t ip6s_cantfrag; /* don't fragment flag was set, etc. */
- u_quad_t ip6s_badoptions; /* error in option processing */
- u_quad_t ip6s_noroute; /* packets discarded due to no route */
- u_quad_t ip6s_badvers; /* ip6 version != 6 */
- u_quad_t ip6s_rawout; /* total raw ip packets generated */
- u_quad_t ip6s_badscope; /* scope error */
- u_quad_t ip6s_notmember; /* don't join this multicast group */
+ uint64_t ip6s_total; /* total packets received */
+ uint64_t ip6s_tooshort; /* packet too short */
+ uint64_t ip6s_toosmall; /* not enough data */
+ uint64_t ip6s_fragments; /* fragments received */
+ uint64_t ip6s_fragdropped; /* frags dropped(dups, out of space) */
+ uint64_t ip6s_fragtimeout; /* fragments timed out */
+ uint64_t ip6s_fragoverflow; /* fragments that exceeded limit */
+ uint64_t ip6s_forward; /* packets forwarded */
+ uint64_t ip6s_cantforward; /* packets rcvd for unreachable dest */
+ uint64_t ip6s_redirectsent; /* packets forwarded on same net */
+ uint64_t ip6s_delivered; /* datagrams delivered to upper level*/
+ uint64_t ip6s_localout; /* total ip packets generated here */
+ uint64_t ip6s_odropped; /* lost packets due to nobufs, etc. */
+ uint64_t ip6s_reassembled; /* total packets reassembled ok */
+ uint64_t ip6s_fragmented; /* datagrams successfully fragmented */
+ uint64_t ip6s_ofragments; /* output fragments created */
+ uint64_t ip6s_cantfrag; /* don't fragment flag was set, etc. */
+ uint64_t ip6s_badoptions; /* error in option processing */
+ uint64_t ip6s_noroute; /* packets discarded due to no route */
+ uint64_t ip6s_badvers; /* ip6 version != 6 */
+ uint64_t ip6s_rawout; /* total raw ip packets generated */
+ uint64_t ip6s_badscope; /* scope error */
+ uint64_t ip6s_notmember; /* don't join this multicast group */
#define IP6S_HDRCNT 256 /* headers count */
- u_quad_t ip6s_nxthist[IP6S_HDRCNT]; /* next header history */
- u_quad_t ip6s_m1; /* one mbuf */
+ uint64_t ip6s_nxthist[IP6S_HDRCNT]; /* next header history */
+ uint64_t ip6s_m1; /* one mbuf */
#define IP6S_M2MMAX 32
- u_quad_t ip6s_m2m[IP6S_M2MMAX]; /* two or more mbuf */
- u_quad_t ip6s_mext1; /* one ext mbuf */
- u_quad_t ip6s_mext2m; /* two or more ext mbuf */
- u_quad_t ip6s_exthdrtoolong; /* ext hdr are not contiguous */
- u_quad_t ip6s_nogif; /* no match gif found */
- u_quad_t ip6s_toomanyhdr; /* discarded due to too many headers */
+ uint64_t ip6s_m2m[IP6S_M2MMAX]; /* two or more mbuf */
+ uint64_t ip6s_mext1; /* one ext mbuf */
+ uint64_t ip6s_mext2m; /* two or more ext mbuf */
+ uint64_t ip6s_exthdrtoolong; /* ext hdr are not contiguous */
+ uint64_t ip6s_nogif; /* no match gif found */
+ uint64_t ip6s_toomanyhdr; /* discarded due to too many headers */
/*
* statistics for improvement of the source address selection
@@ -223,81 +231,51 @@ struct ip6stat {
#define IP6S_RULESMAX 16
#define IP6S_SCOPECNT 16
/* number of times that address selection fails */
- u_quad_t ip6s_sources_none;
+ uint64_t ip6s_sources_none;
/* number of times that an address on the outgoing I/F is chosen */
- u_quad_t ip6s_sources_sameif[IP6S_SCOPECNT];
+ uint64_t ip6s_sources_sameif[IP6S_SCOPECNT];
/* number of times that an address on a non-outgoing I/F is chosen */
- u_quad_t ip6s_sources_otherif[IP6S_SCOPECNT];
+ uint64_t ip6s_sources_otherif[IP6S_SCOPECNT];
/*
* number of times that an address that has the same scope
* from the destination is chosen.
*/
- u_quad_t ip6s_sources_samescope[IP6S_SCOPECNT];
+ uint64_t ip6s_sources_samescope[IP6S_SCOPECNT];
/*
* number of times that an address that has a different scope
* from the destination is chosen.
*/
- u_quad_t ip6s_sources_otherscope[IP6S_SCOPECNT];
+ uint64_t ip6s_sources_otherscope[IP6S_SCOPECNT];
/* number of times that a deprecated address is chosen */
- u_quad_t ip6s_sources_deprecated[IP6S_SCOPECNT];
+ uint64_t ip6s_sources_deprecated[IP6S_SCOPECNT];
/* number of times that each rule of source selection is applied. */
- u_quad_t ip6s_sources_rule[IP6S_RULESMAX];
+ uint64_t ip6s_sources_rule[IP6S_RULESMAX];
};
#ifdef _KERNEL
-#define IP6STAT_ADD(name, val) V_ip6stat.name += (val)
-#define IP6STAT_SUB(name, val) V_ip6stat.name -= (val)
+#include <sys/counter.h>
+
+VNET_PCPUSTAT_DECLARE(struct ip6stat, ip6stat);
+#define IP6STAT_ADD(name, val) \
+ VNET_PCPUSTAT_ADD(struct ip6stat, ip6stat, name, (val))
+#define IP6STAT_SUB(name, val) IP6STAT_ADD(name, -(val))
#define IP6STAT_INC(name) IP6STAT_ADD(name, 1)
#define IP6STAT_DEC(name) IP6STAT_SUB(name, 1)
#endif
#ifdef _KERNEL
-/*
- * IPv6 onion peeling state.
- * it will be initialized when we come into ip6_input().
- * XXX do not make it a kitchen sink!
- */
-struct ip6aux {
- u_int32_t ip6a_flags;
-#define IP6A_SWAP 0x01 /* swapped home/care-of on packet */
-#define IP6A_HASEEN 0x02 /* HA was present */
-#define IP6A_BRUID 0x04 /* BR Unique Identifier was present */
-#define IP6A_RTALERTSEEN 0x08 /* rtalert present */
-
- /* ip6.ip6_src */
- struct in6_addr ip6a_careof; /* care-of address of the peer */
- struct in6_addr ip6a_home; /* home address of the peer */
- u_int16_t ip6a_bruid; /* BR unique identifier */
-
- /* ip6.ip6_dst */
- struct in6_ifaddr *ip6a_dstia6; /* my ifaddr that matches ip6_dst */
-
- /* rtalert */
- u_int16_t ip6a_rtalert; /* rtalert option value */
-
- /*
- * decapsulation history will be here.
- * with IPsec it may not be accurate.
- */
-};
-#endif
-
-#ifdef _KERNEL
/* flags passed to ip6_output as last parameter */
#define IPV6_UNSPECSRC 0x01 /* allow :: as the source address */
#define IPV6_FORWARDING 0x02 /* most of IPv6 header exists */
#define IPV6_MINMTU 0x04 /* use minimum MTU (IPV6_USE_MIN_MTU) */
-#define M_IP6_NEXTHOP M_PROTO7 /* explicit ip nexthop */
-
#ifdef __NO_STRICT_ALIGNMENT
#define IP6_HDR_ALIGNED_P(ip) 1
#else
#define IP6_HDR_ALIGNED_P(ip) ((((intptr_t) (ip)) & 3) == 0)
#endif
-VNET_DECLARE(struct ip6stat, ip6stat); /* statistics */
VNET_DECLARE(int, ip6_defhlim); /* default hop limit */
VNET_DECLARE(int, ip6_defmcasthlim); /* default multicast hop limit */
VNET_DECLARE(int, ip6_forwarding); /* act as router? */
@@ -306,7 +284,6 @@ VNET_DECLARE(int, ip6_rr_prune); /* router renumbering prefix
* walk list every 5 sec. */
VNET_DECLARE(int, ip6_mcast_pmtu); /* enable pMTU discovery for multicast? */
VNET_DECLARE(int, ip6_v6only);
-#define V_ip6stat VNET(ip6stat)
#define V_ip6_defhlim VNET(ip6_defhlim)
#define V_ip6_defmcasthlim VNET(ip6_defmcasthlim)
#define V_ip6_forwarding VNET(ip6_forwarding)
@@ -327,7 +304,6 @@ VNET_DECLARE(int, ip6_norbit_raif); /* Disable R-bit in NA on RA
* receiving IF. */
VNET_DECLARE(int, ip6_rfc6204w3); /* Accept defroute from RA even when
forwarding enabled */
-VNET_DECLARE(int, ip6_keepfaith); /* Firewall Aided Internet Translator */
VNET_DECLARE(int, ip6_log_interval);
VNET_DECLARE(time_t, ip6_log_time);
VNET_DECLARE(int, ip6_hdrnestlimit); /* upper limit of # of extension
@@ -341,7 +317,6 @@ VNET_DECLARE(int, ip6_dad_count); /* DupAddrDetectionTransmits */
#define V_ip6_no_radr VNET(ip6_no_radr)
#define V_ip6_norbit_raif VNET(ip6_norbit_raif)
#define V_ip6_rfc6204w3 VNET(ip6_rfc6204w3)
-#define V_ip6_keepfaith VNET(ip6_keepfaith)
#define V_ip6_log_interval VNET(ip6_log_interval)
#define V_ip6_log_time VNET(ip6_log_time)
#define V_ip6_hdrnestlimit VNET(ip6_hdrnestlimit)
@@ -379,24 +354,17 @@ int icmp6_ctloutput(struct socket *, struct sockopt *sopt);
struct in6_ifaddr;
void ip6_init(void);
-#ifdef VIMAGE
-void ip6_destroy(void);
-#endif
int ip6proto_register(short);
int ip6proto_unregister(short);
void ip6_input(struct mbuf *);
-struct in6_ifaddr *ip6_getdstifaddr(struct mbuf *);
+void ip6_direct_input(struct mbuf *);
void ip6_freepcbopts(struct ip6_pktopts *);
int ip6_unknown_opt(u_int8_t *, struct mbuf *, int);
-char * ip6_get_prevhdr(struct mbuf *, int);
-int ip6_nexthdr(struct mbuf *, int, int, int *);
-int ip6_lasthdr(struct mbuf *, int, int, int *);
-
-#ifdef __notyet__
-struct ip6aux *ip6_findaux(struct mbuf *);
-#endif
+char * ip6_get_prevhdr(const struct mbuf *, int);
+int ip6_nexthdr(const struct mbuf *, int, int, int *);
+int ip6_lasthdr(const struct mbuf *, int, int, int *);
extern int (*ip6_mforward)(struct ip6_hdr *, struct ifnet *,
struct mbuf *);
@@ -411,7 +379,7 @@ int ip6_sysctl(int *, u_int, void *, size_t *, void *, size_t);
void ip6_forward(struct mbuf *, int);
-void ip6_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in6 *);
+void ip6_mloopback(struct ifnet *, struct mbuf *);
int ip6_output(struct mbuf *, struct ip6_pktopts *,
struct route_in6 *,
int,
@@ -425,6 +393,9 @@ int ip6_setpktopts(struct mbuf *, struct ip6_pktopts *,
void ip6_clearpktopts(struct ip6_pktopts *, int);
struct ip6_pktopts *ip6_copypktopts(struct ip6_pktopts *, int);
int ip6_optlen(struct inpcb *);
+int ip6_deletefraghdr(struct mbuf *, int, int);
+int ip6_fragment(struct ifnet *, struct mbuf *, int, u_char, int,
+ uint32_t);
int route6_input(struct mbuf **, int *, int);
@@ -437,16 +408,17 @@ void rip6_init(void);
int rip6_input(struct mbuf **, int *, int);
void rip6_ctlinput(int, struct sockaddr *, void *);
int rip6_ctloutput(struct socket *, struct sockopt *);
-int rip6_output(struct mbuf *, ...);
+int rip6_output(struct mbuf *, struct socket *, ...);
int rip6_usrreq(struct socket *,
int, struct mbuf *, struct mbuf *, struct mbuf *, struct thread *);
int dest6_input(struct mbuf **, int *, int);
int none_input(struct mbuf **, int *, int);
-int in6_selectsrc(struct sockaddr_in6 *, struct ip6_pktopts *,
- struct inpcb *inp, struct route_in6 *, struct ucred *cred,
- struct ifnet **, struct in6_addr *);
+int in6_selectsrc_socket(struct sockaddr_in6 *, struct ip6_pktopts *,
+ struct inpcb *, struct ucred *, int, struct in6_addr *, int *);
+int in6_selectsrc_addr(uint32_t, const struct in6_addr *,
+ uint32_t, struct ifnet *, struct in6_addr *, int *);
int in6_selectroute(struct sockaddr_in6 *, struct ip6_pktopts *,
struct ip6_moptions *, struct route_in6 *, struct ifnet **,
struct rtentry **);
@@ -455,6 +427,7 @@ int in6_selectroute_fib(struct sockaddr_in6 *, struct ip6_pktopts *,
struct rtentry **, u_int);
u_int32_t ip6_randomid(void);
u_int32_t ip6_randomflowlabel(void);
+void in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset);
#endif /* _KERNEL */
#endif /* !_NETINET6_IP6_VAR_H_ */
diff --git a/freebsd/sys/netinet6/ip6protosw.h b/freebsd/sys/netinet6/ip6protosw.h
index ec802a51..9e80a698 100644
--- a/freebsd/sys/netinet6/ip6protosw.h
+++ b/freebsd/sys/netinet6/ip6protosw.h
@@ -92,7 +92,7 @@ struct pr_usrreqs;
*
* ip6c_finaldst usually points to ip6c_ip6->ip6_dst. if the original
* (internal) packet carries a routing header, it may point the final
- * dstination address in the routing header.
+ * destination address in the routing header.
*
* ip6c_src: ip6c_ip6->ip6_src + scope info + flowlabel in ip6c_ip6
* (beware of flowlabel, if you try to compare it against others)
@@ -110,39 +110,8 @@ struct ip6ctlparam {
u_int8_t ip6c_nxt; /* final next header field */
};
-struct ip6protosw {
- short pr_type; /* socket type used for */
- struct domain *pr_domain; /* domain protocol a member of */
- short pr_protocol; /* protocol number */
- short pr_flags; /* see below */
-
-/* protocol-protocol hooks */
- int (*pr_input) /* input to protocol (from below) */
- (struct mbuf **, int *, int);
- int (*pr_output) /* output to protocol (from above) */
- (struct mbuf *, ...);
- void (*pr_ctlinput) /* control input (from below) */
- (int, struct sockaddr *, void *);
- int (*pr_ctloutput) /* control output (from above) */
- (struct socket *, struct sockopt *);
-
-/* utility hooks */
- void (*pr_init) /* initialization hook */
- (void);
- void (*pr_destroy) /* cleanup hook */
- (void);
-
- void (*pr_fasttimo) /* fast timeout (200ms) */
- (void);
- void (*pr_slowtimo) /* slow timeout (500ms) */
- (void);
- void (*pr_drain) /* flush any excess space possible */
- (void);
- struct pr_usrreqs *pr_usrreqs; /* supersedes pr_usrreq() */
-};
-
#ifdef _KERNEL
-extern struct ip6protosw inet6sw[];
+extern struct protosw inet6sw[];
#endif
#endif /* !_NETINET6_IP6PROTOSW_H_ */
diff --git a/freebsd/sys/netinet6/ip_fw_nat64.h b/freebsd/sys/netinet6/ip_fw_nat64.h
new file mode 100644
index 00000000..a5c38b2a
--- /dev/null
+++ b/freebsd/sys/netinet6/ip_fw_nat64.h
@@ -0,0 +1,154 @@
+/*-
+ * Copyright (c) 2015 Yandex LLC
+ * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET6_IP_FW_NAT64_H_
+#define _NETINET6_IP_FW_NAT64_H_
+
+struct ipfw_nat64stl_stats {
+ uint64_t opcnt64; /* 6to4 of packets translated */
+ uint64_t opcnt46; /* 4to6 of packets translated */
+ uint64_t ofrags; /* number of fragments generated */
+ uint64_t ifrags; /* number of fragments received */
+ uint64_t oerrors; /* number of output errors */
+ uint64_t noroute4;
+ uint64_t noroute6;
+ uint64_t noproto; /* Protocol not supported */
+ uint64_t nomem; /* mbuf allocation filed */
+ uint64_t dropped; /* dropped due to some errors */
+};
+
+struct ipfw_nat64lsn_stats {
+ uint64_t opcnt64; /* 6to4 of packets translated */
+ uint64_t opcnt46; /* 4to6 of packets translated */
+ uint64_t ofrags; /* number of fragments generated */
+ uint64_t ifrags; /* number of fragments received */
+ uint64_t oerrors; /* number of output errors */
+ uint64_t noroute4;
+ uint64_t noroute6;
+ uint64_t noproto; /* Protocol not supported */
+ uint64_t nomem; /* mbuf allocation filed */
+ uint64_t dropped; /* dropped due to some errors */
+
+ uint64_t nomatch4; /* No addr/port match */
+ uint64_t jcalls; /* Number of job handler calls */
+ uint64_t jrequests; /* Number of job requests */
+ uint64_t jhostsreq; /* Number of job host requests */
+ uint64_t jportreq; /* Number of portgroup requests */
+ uint64_t jhostfails; /* Number of failed host allocs */
+ uint64_t jportfails; /* Number of failed portgroup allocs */
+ uint64_t jreinjected; /* Number of packets reinjected to q */
+ uint64_t jmaxlen; /* Max queue length reached */
+ uint64_t jnomem; /* No memory to alloc queue item */
+
+ uint64_t screated; /* Number of states created */
+ uint64_t sdeleted; /* Number of states deleted */
+ uint64_t spgcreated; /* Number of portgroups created */
+ uint64_t spgdeleted; /* Number of portgroups deleted */
+ uint64_t hostcount; /* Number of hosts */
+ uint64_t tcpchunks; /* Number of TCP chunks */
+ uint64_t udpchunks; /* Number of UDP chunks */
+ uint64_t icmpchunks; /* Number of ICMP chunks */
+
+ uint64_t _reserved[4];
+};
+
+#define NAT64_LOG 0x0001 /* Enable logging via BPF */
+
+typedef struct _ipfw_nat64stl_cfg {
+ char name[64]; /* NAT name */
+ ipfw_obj_ntlv ntlv6; /* object name tlv */
+ ipfw_obj_ntlv ntlv4; /* object name tlv */
+ struct in6_addr prefix6; /* NAT64 prefix */
+ uint8_t plen6; /* Prefix length */
+ uint8_t set; /* Named instance set [0..31] */
+ uint8_t spare[2];
+ uint32_t flags;
+} ipfw_nat64stl_cfg;
+
+/*
+ * NAT64LSN default configuration values
+ */
+#define NAT64LSN_MAX_PORTS 2048 /* Max number of ports per host */
+#define NAT64LSN_JMAXLEN 2048 /* Max outstanding requests. */
+#define NAT64LSN_TCP_SYN_AGE 10 /* State's TTL after SYN received. */
+#define NAT64LSN_TCP_EST_AGE (2 * 3600) /* TTL for established connection */
+#define NAT64LSN_TCP_FIN_AGE 180 /* State's TTL after FIN/RST received */
+#define NAT64LSN_UDP_AGE 120 /* TTL for UDP states */
+#define NAT64LSN_ICMP_AGE 60 /* TTL for ICMP states */
+#define NAT64LSN_HOST_AGE 3600 /* TTL for stale host entry */
+#define NAT64LSN_PG_AGE 900 /* TTL for stale ports groups */
+
+typedef struct _ipfw_nat64lsn_cfg {
+ char name[64]; /* NAT name */
+ uint32_t flags;
+ uint32_t max_ports; /* Max ports per client */
+ uint32_t agg_prefix_len; /* Prefix length to count */
+ uint32_t agg_prefix_max; /* Max hosts per agg prefix */
+ struct in_addr prefix4;
+ uint16_t plen4; /* Prefix length */
+ uint16_t plen6; /* Prefix length */
+ struct in6_addr prefix6; /* NAT64 prefix */
+ uint32_t jmaxlen; /* Max jobqueue length */
+ uint16_t min_port; /* Min port group # to use */
+ uint16_t max_port; /* Max port group # to use */
+ uint16_t nh_delete_delay;/* Stale host delete delay */
+ uint16_t pg_delete_delay;/* Stale portgroup delete delay */
+ uint16_t st_syn_ttl; /* TCP syn expire */
+ uint16_t st_close_ttl; /* TCP fin expire */
+ uint16_t st_estab_ttl; /* TCP established expire */
+ uint16_t st_udp_ttl; /* UDP expire */
+ uint16_t st_icmp_ttl; /* ICMP expire */
+ uint8_t set; /* Named instance set [0..31] */
+ uint8_t spare;
+} ipfw_nat64lsn_cfg;
+
+typedef struct _ipfw_nat64lsn_state {
+ struct in_addr daddr; /* Remote IPv4 address */
+ uint16_t dport; /* Remote destination port */
+ uint16_t aport; /* Local alias port */
+ uint16_t sport; /* Source port */
+ uint8_t flags; /* State flags */
+ uint8_t spare[3];
+ uint16_t idle; /* Last used time */
+} ipfw_nat64lsn_state;
+
+typedef struct _ipfw_nat64lsn_stg {
+ uint64_t next_idx; /* next state index */
+ struct in_addr alias4; /* IPv4 alias address */
+ uint8_t proto; /* protocol */
+ uint8_t flags;
+ uint16_t spare;
+ struct in6_addr host6; /* Bound IPv6 host */
+ uint32_t count; /* Number of states */
+ uint32_t spare2;
+} ipfw_nat64lsn_stg;
+
+#endif /* _NETINET6_IP_FW_NAT64_H_ */
+
diff --git a/freebsd/sys/netinet6/ip_fw_nptv6.h b/freebsd/sys/netinet6/ip_fw_nptv6.h
new file mode 100644
index 00000000..e2357eff
--- /dev/null
+++ b/freebsd/sys/netinet6/ip_fw_nptv6.h
@@ -0,0 +1,51 @@
+/*-
+ * Copyright (c) 2016 Yandex LLC
+ * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET6_IP_FW_NPTV6_H_
+#define _NETINET6_IP_FW_NPTV6_H_
+
+struct ipfw_nptv6_stats {
+ uint64_t in2ex; /* Int->Ext packets translated */
+ uint64_t ex2in; /* Ext->Int packets translated */
+ uint64_t dropped; /* dropped due to some errors */
+ uint64_t reserved[5];
+};
+
+typedef struct _ipfw_nptv6_cfg {
+ char name[64]; /* NPTv6 instance name */
+ struct in6_addr internal; /* NPTv6 internal prefix */
+ struct in6_addr external; /* NPTv6 external prefix */
+ uint8_t plen; /* Prefix length */
+ uint8_t set; /* Named instance set [0..31] */
+ uint8_t spare[2];
+ uint32_t flags;
+} ipfw_nptv6_cfg;
+
+#endif /* _NETINET6_IP_FW_NPTV6_H_ */
+
diff --git a/freebsd/sys/netinet6/mld6.c b/freebsd/sys/netinet6/mld6.c
index 25f03411..26efa852 100644
--- a/freebsd/sys/netinet6/mld6.c
+++ b/freebsd/sys/netinet6/mld6.c
@@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$");
#include <sys/ktr.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/route.h>
#include <net/vnet.h>
@@ -103,49 +104,49 @@ __FBSDID("$FreeBSD$");
#define KTR_MLD KTR_INET6
#endif
-static struct mld_ifinfo *
+static struct mld_ifsoftc *
mli_alloc_locked(struct ifnet *);
static void mli_delete_locked(const struct ifnet *);
static void mld_dispatch_packet(struct mbuf *);
-static void mld_dispatch_queue(struct ifqueue *, int);
-static void mld_final_leave(struct in6_multi *, struct mld_ifinfo *);
+static void mld_dispatch_queue(struct mbufq *, int);
+static void mld_final_leave(struct in6_multi *, struct mld_ifsoftc *);
static void mld_fasttimo_vnet(void);
static int mld_handle_state_change(struct in6_multi *,
- struct mld_ifinfo *);
-static int mld_initial_join(struct in6_multi *, struct mld_ifinfo *,
+ struct mld_ifsoftc *);
+static int mld_initial_join(struct in6_multi *, struct mld_ifsoftc *,
const int);
#ifdef KTR
static char * mld_rec_type_to_str(const int);
#endif
-static void mld_set_version(struct mld_ifinfo *, const int);
+static void mld_set_version(struct mld_ifsoftc *, const int);
static void mld_slowtimo_vnet(void);
static int mld_v1_input_query(struct ifnet *, const struct ip6_hdr *,
/*const*/ struct mld_hdr *);
static int mld_v1_input_report(struct ifnet *, const struct ip6_hdr *,
/*const*/ struct mld_hdr *);
-static void mld_v1_process_group_timer(struct mld_ifinfo *,
+static void mld_v1_process_group_timer(struct mld_ifsoftc *,
struct in6_multi *);
-static void mld_v1_process_querier_timers(struct mld_ifinfo *);
+static void mld_v1_process_querier_timers(struct mld_ifsoftc *);
static int mld_v1_transmit_report(struct in6_multi *, const int);
static void mld_v1_update_group(struct in6_multi *, const int);
-static void mld_v2_cancel_link_timers(struct mld_ifinfo *);
-static void mld_v2_dispatch_general_query(struct mld_ifinfo *);
+static void mld_v2_cancel_link_timers(struct mld_ifsoftc *);
+static void mld_v2_dispatch_general_query(struct mld_ifsoftc *);
static struct mbuf *
mld_v2_encap_report(struct ifnet *, struct mbuf *);
-static int mld_v2_enqueue_filter_change(struct ifqueue *,
+static int mld_v2_enqueue_filter_change(struct mbufq *,
struct in6_multi *);
-static int mld_v2_enqueue_group_record(struct ifqueue *,
+static int mld_v2_enqueue_group_record(struct mbufq *,
struct in6_multi *, const int, const int, const int,
const int);
static int mld_v2_input_query(struct ifnet *, const struct ip6_hdr *,
struct mbuf *, const int, const int);
static int mld_v2_merge_state_changes(struct in6_multi *,
- struct ifqueue *);
-static void mld_v2_process_group_timers(struct mld_ifinfo *,
- struct ifqueue *, struct ifqueue *,
+ struct mbufq *);
+static void mld_v2_process_group_timers(struct mld_ifsoftc *,
+ struct mbufq *, struct mbufq *,
struct in6_multi *, const int);
static int mld_v2_process_group_query(struct in6_multi *,
- struct mld_ifinfo *mli, int, struct mbuf *, const int);
+ struct mld_ifsoftc *mli, int, struct mbuf *, const int);
static int sysctl_mld_gsr(SYSCTL_HANDLER_ARGS);
static int sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS);
@@ -207,7 +208,7 @@ static MALLOC_DEFINE(M_MLD, "mld", "mld state");
* VIMAGE-wide globals.
*/
static VNET_DEFINE(struct timeval, mld_gsrdelay) = {10, 0};
-static VNET_DEFINE(LIST_HEAD(, mld_ifinfo), mli_head);
+static VNET_DEFINE(LIST_HEAD(, mld_ifsoftc), mli_head);
static VNET_DEFINE(int, interface_timers_running6);
static VNET_DEFINE(int, state_change_timers_running6);
static VNET_DEFINE(int, current_state_timers_running6);
@@ -226,8 +227,8 @@ SYSCTL_NODE(_net_inet6, OID_AUTO, mld, CTLFLAG_RW, 0,
/*
* Virtualized sysctls.
*/
-SYSCTL_VNET_PROC(_net_inet6_mld, OID_AUTO, gsrdelay,
- CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+SYSCTL_PROC(_net_inet6_mld, OID_AUTO, gsrdelay,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
&VNET_NAME(mld_gsrdelay.tv_sec), 0, sysctl_mld_gsr, "I",
"Rate limit for MLDv2 Group-and-Source queries in seconds");
@@ -239,14 +240,12 @@ static SYSCTL_NODE(_net_inet6_mld, OID_AUTO, ifinfo,
"Per-interface MLDv2 state");
static int mld_v1enable = 1;
-SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RW,
+SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RWTUN,
&mld_v1enable, 0, "Enable fallback to MLDv1");
-TUNABLE_INT("net.inet6.mld.v1enable", &mld_v1enable);
static int mld_use_allow = 1;
-SYSCTL_INT(_net_inet6_mld, OID_AUTO, use_allow, CTLFLAG_RW,
+SYSCTL_INT(_net_inet6_mld, OID_AUTO, use_allow, CTLFLAG_RWTUN,
&mld_use_allow, 0, "Use ALLOW/BLOCK for RFC 4604 SSM joins/leaves");
-TUNABLE_INT("net.inet6.mld.use_allow", &mld_use_allow);
/*
* Packed Router Alert option structure declaration.
@@ -277,7 +276,7 @@ mld_save_context(struct mbuf *m, struct ifnet *ifp)
{
#ifdef VIMAGE
- m->m_pkthdr.header = ifp->if_vnet;
+ m->m_pkthdr.PH_loc.ptr = ifp->if_vnet;
#endif /* VIMAGE */
m->m_pkthdr.flowid = ifp->if_index;
}
@@ -286,7 +285,7 @@ static __inline void
mld_scrub_context(struct mbuf *m)
{
- m->m_pkthdr.header = NULL;
+ m->m_pkthdr.PH_loc.ptr = NULL;
m->m_pkthdr.flowid = 0;
}
@@ -302,8 +301,9 @@ mld_restore_context(struct mbuf *m)
{
#if defined(VIMAGE) && defined(INVARIANTS)
- KASSERT(curvnet == m->m_pkthdr.header,
- ("%s: called when curvnet was not restored", __func__));
+ KASSERT(curvnet == m->m_pkthdr.PH_loc.ptr,
+ ("%s: called when curvnet was not restored: cuvnet %p m ptr %p",
+ __func__, curvnet, m->m_pkthdr.PH_loc.ptr));
#endif
return (m->m_pkthdr.flowid);
}
@@ -347,7 +347,7 @@ out_locked:
}
/*
- * Expose struct mld_ifinfo to userland, keyed by ifindex.
+ * Expose struct mld_ifsoftc to userland, keyed by ifindex.
* For use by ifmcstat(8).
*
* SMPng: NOTE: Does an unlocked ifindex space read.
@@ -361,7 +361,7 @@ sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS)
int error;
u_int namelen;
struct ifnet *ifp;
- struct mld_ifinfo *mli;
+ struct mld_ifsoftc *mli;
name = (int *)arg1;
namelen = arg2;
@@ -392,8 +392,17 @@ sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS)
LIST_FOREACH(mli, &V_mli_head, mli_link) {
if (ifp == mli->mli_ifp) {
- error = SYSCTL_OUT(req, mli,
- sizeof(struct mld_ifinfo));
+ struct mld_ifinfo info;
+
+ info.mli_version = mli->mli_version;
+ info.mli_v1_timer = mli->mli_v1_timer;
+ info.mli_v2_timer = mli->mli_v2_timer;
+ info.mli_flags = mli->mli_flags;
+ info.mli_rv = mli->mli_rv;
+ info.mli_qi = mli->mli_qi;
+ info.mli_qri = mli->mli_qri;
+ info.mli_uri = mli->mli_uri;
+ error = SYSCTL_OUT(req, &info, sizeof(info));
break;
}
}
@@ -409,15 +418,12 @@ out_locked:
* VIMAGE: Assumes the vnet pointer has been set.
*/
static void
-mld_dispatch_queue(struct ifqueue *ifq, int limit)
+mld_dispatch_queue(struct mbufq *mq, int limit)
{
struct mbuf *m;
- for (;;) {
- _IF_DEQUEUE(ifq, m);
- if (m == NULL)
- break;
- CTR3(KTR_MLD, "%s: dispatch %p from %p", __func__, ifq, m);
+ while ((m = mbufq_dequeue(mq)) != NULL) {
+ CTR3(KTR_MLD, "%s: dispatch %p from %p", __func__, mq, m);
mld_dispatch_packet(m);
if (--limit == 0)
break;
@@ -460,13 +466,13 @@ mld_is_addr_reported(const struct in6_addr *addr)
*
* SMPng: Normally called with IF_AFDATA_LOCK held.
*/
-struct mld_ifinfo *
+struct mld_ifsoftc *
mld_domifattach(struct ifnet *ifp)
{
- struct mld_ifinfo *mli;
+ struct mld_ifsoftc *mli;
CTR3(KTR_MLD, "%s: called for ifp %p(%s)",
- __func__, ifp, ifp->if_xname);
+ __func__, ifp, if_name(ifp));
MLD_LOCK();
@@ -484,14 +490,14 @@ mld_domifattach(struct ifnet *ifp)
/*
* VIMAGE: assume curvnet set by caller.
*/
-static struct mld_ifinfo *
+static struct mld_ifsoftc *
mli_alloc_locked(/*const*/ struct ifnet *ifp)
{
- struct mld_ifinfo *mli;
+ struct mld_ifsoftc *mli;
MLD_LOCK_ASSERT();
- mli = malloc(sizeof(struct mld_ifinfo), M_MLD, M_NOWAIT|M_ZERO);
+ mli = malloc(sizeof(struct mld_ifsoftc), M_MLD, M_NOWAIT|M_ZERO);
if (mli == NULL)
goto out;
@@ -502,18 +508,13 @@ mli_alloc_locked(/*const*/ struct ifnet *ifp)
mli->mli_qi = MLD_QI_INIT;
mli->mli_qri = MLD_QRI_INIT;
mli->mli_uri = MLD_URI_INIT;
-
SLIST_INIT(&mli->mli_relinmhead);
-
- /*
- * Responses to general queries are subject to bounds.
- */
- IFQ_SET_MAXLEN(&mli->mli_gq, MLD_MAX_RESPONSE_PACKETS);
+ mbufq_init(&mli->mli_gq, MLD_MAX_RESPONSE_PACKETS);
LIST_INSERT_HEAD(&V_mli_head, mli, mli_link);
- CTR2(KTR_MLD, "allocate mld_ifinfo for ifp %p(%s)",
- ifp, ifp->if_xname);
+ CTR2(KTR_MLD, "allocate mld_ifsoftc for ifp %p(%s)",
+ ifp, if_name(ifp));
out:
return (mli);
@@ -533,12 +534,12 @@ out:
void
mld_ifdetach(struct ifnet *ifp)
{
- struct mld_ifinfo *mli;
+ struct mld_ifsoftc *mli;
struct ifmultiaddr *ifma;
struct in6_multi *inm, *tinm;
CTR3(KTR_MLD, "%s: called for ifp %p(%s)", __func__, ifp,
- ifp->if_xname);
+ if_name(ifp));
IN6_MULTI_LOCK_ASSERT();
MLD_LOCK();
@@ -579,7 +580,7 @@ mld_domifdetach(struct ifnet *ifp)
{
CTR3(KTR_MLD, "%s: called for ifp %p(%s)",
- __func__, ifp, ifp->if_xname);
+ __func__, ifp, if_name(ifp));
MLD_LOCK();
mli_delete_locked(ifp);
@@ -589,10 +590,10 @@ mld_domifdetach(struct ifnet *ifp)
static void
mli_delete_locked(const struct ifnet *ifp)
{
- struct mld_ifinfo *mli, *tmli;
+ struct mld_ifsoftc *mli, *tmli;
- CTR3(KTR_MLD, "%s: freeing mld_ifinfo for ifp %p(%s)",
- __func__, ifp, ifp->if_xname);
+ CTR3(KTR_MLD, "%s: freeing mld_ifsoftc for ifp %p(%s)",
+ __func__, ifp, if_name(ifp));
MLD_LOCK_ASSERT();
@@ -601,7 +602,7 @@ mli_delete_locked(const struct ifnet *ifp)
/*
* Free deferred General Query responses.
*/
- _IF_DRAIN(&mli->mli_gq);
+ mbufq_drain(&mli->mli_gq);
LIST_REMOVE(mli, mli_link);
@@ -613,9 +614,6 @@ mli_delete_locked(const struct ifnet *ifp)
return;
}
}
-#ifdef INVARIANTS
- panic("%s: mld_ifinfo not found for ifp %p\n", __func__, ifp);
-#endif
}
/*
@@ -630,7 +628,7 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
/*const*/ struct mld_hdr *mld)
{
struct ifmultiaddr *ifma;
- struct mld_ifinfo *mli;
+ struct mld_ifsoftc *mli;
struct in6_multi *inm;
int is_general_query;
uint16_t timer;
@@ -643,7 +641,7 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
if (!mld_v1enable) {
CTR3(KTR_MLD, "ignore v1 query %s on ifp %p(%s)",
ip6_sprintf(ip6tbuf, &mld->mld_addr),
- ifp, ifp->if_xname);
+ ifp, if_name(ifp));
return (0);
}
@@ -654,7 +652,7 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)",
ip6_sprintf(ip6tbuf, &ip6->ip6_src),
- ifp, ifp->if_xname);
+ ifp, if_name(ifp));
return (0);
}
@@ -689,7 +687,7 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
* Switch to MLDv1 host compatibility mode.
*/
mli = MLD_IFINFO(ifp);
- KASSERT(mli != NULL, ("%s: no mld_ifinfo for ifp %p", __func__, ifp));
+ KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp));
mld_set_version(mli, MLD_VERSION_1);
timer = (ntohs(mld->mld_maxdelay) * PR_FASTHZ) / MLD_TIMER_SCALE;
@@ -703,7 +701,7 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
* interface, kick the report timer.
*/
CTR2(KTR_MLD, "process v1 general query on ifp %p(%s)",
- ifp, ifp->if_xname);
+ ifp, if_name(ifp));
TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
if (ifma->ifma_addr->sa_family != AF_INET6 ||
ifma->ifma_protospec == NULL)
@@ -721,7 +719,7 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
if (inm != NULL) {
CTR3(KTR_MLD, "process v1 query %s on ifp %p(%s)",
ip6_sprintf(ip6tbuf, &mld->mld_addr),
- ifp, ifp->if_xname);
+ ifp, if_name(ifp));
mld_v1_update_group(inm, timer);
}
/* XXX Clear embedded scope ID as userland won't expect it. */
@@ -759,7 +757,7 @@ mld_v1_update_group(struct in6_multi *inm, const int timer)
CTR4(KTR_MLD, "%s: %s/%s timer=%d", __func__,
ip6_sprintf(ip6tbuf, &inm->in6m_addr),
- inm->in6m_ifp->if_xname, timer);
+ if_name(inm->in6m_ifp), timer);
IN6_MULTI_LOCK_ASSERT();
@@ -806,7 +804,7 @@ static int
mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
struct mbuf *m, const int off, const int icmp6len)
{
- struct mld_ifinfo *mli;
+ struct mld_ifsoftc *mli;
struct mldv2_query *mld;
struct in6_multi *inm;
uint32_t maxdelay, nsrc, qqi;
@@ -826,11 +824,11 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)",
ip6_sprintf(ip6tbuf, &ip6->ip6_src),
- ifp, ifp->if_xname);
+ ifp, if_name(ifp));
return (0);
}
- CTR2(KTR_MLD, "input v2 query on ifp %p(%s)", ifp, ifp->if_xname);
+ CTR2(KTR_MLD, "input v2 query on ifp %p(%s)", ifp, if_name(ifp));
mld = (struct mldv2_query *)(mtod(m, uint8_t *) + off);
@@ -888,7 +886,7 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
MLD_LOCK();
mli = MLD_IFINFO(ifp);
- KASSERT(mli != NULL, ("%s: no mld_ifinfo for ifp %p", __func__, ifp));
+ KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp));
/*
* Discard the v2 query if we're in Compatibility Mode.
@@ -919,7 +917,7 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
* Otherwise, reset the interface timer.
*/
CTR2(KTR_MLD, "process v2 general query on ifp %p(%s)",
- ifp, ifp->if_xname);
+ ifp, if_name(ifp));
if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) {
mli->mli_v2_timer = MLD_RANDOM_DELAY(timer);
V_interface_timers_running6 = 1;
@@ -949,7 +947,7 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
}
}
CTR2(KTR_MLD, "process v2 group query on ifp %p(%s)",
- ifp, ifp->if_xname);
+ ifp, if_name(ifp));
/*
* If there is a pending General Query response
* scheduled sooner than the selected delay, no
@@ -973,12 +971,12 @@ out_locked:
}
/*
- * Process a recieved MLDv2 group-specific or group-and-source-specific
+ * Process a received MLDv2 group-specific or group-and-source-specific
* query.
- * Return <0 if any error occured. Currently this is ignored.
+ * Return <0 if any error occurred. Currently this is ignored.
*/
static int
-mld_v2_process_group_query(struct in6_multi *inm, struct mld_ifinfo *mli,
+mld_v2_process_group_query(struct in6_multi *inm, struct mld_ifsoftc *mli,
int timer, struct mbuf *m0, const int off)
{
struct mldv2_query *mld;
@@ -1106,7 +1104,7 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6,
if (!mld_v1enable) {
CTR3(KTR_MLD, "ignore v1 report %s on ifp %p(%s)",
ip6_sprintf(ip6tbuf, &mld->mld_addr),
- ifp, ifp->if_xname);
+ ifp, if_name(ifp));
return (0);
}
@@ -1122,7 +1120,7 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6,
if (!IN6_IS_SCOPE_LINKLOCAL(&src) && !IN6_IS_ADDR_UNSPECIFIED(&src)) {
CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)",
ip6_sprintf(ip6tbuf, &ip6->ip6_src),
- ifp, ifp->if_xname);
+ ifp, if_name(ifp));
return (EINVAL);
}
@@ -1136,7 +1134,7 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6,
!IN6_ARE_ADDR_EQUAL(&mld->mld_addr, &dst)) {
CTR3(KTR_MLD, "ignore v1 query dst %s on ifp %p(%s)",
ip6_sprintf(ip6tbuf, &ip6->ip6_dst),
- ifp, ifp->if_xname);
+ ifp, if_name(ifp));
return (EINVAL);
}
@@ -1161,7 +1159,7 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6,
ifa_free(&ia->ia_ifa);
CTR3(KTR_MLD, "process v1 report %s on ifp %p(%s)",
- ip6_sprintf(ip6tbuf, &mld->mld_addr), ifp, ifp->if_xname);
+ ip6_sprintf(ip6tbuf, &mld->mld_addr), ifp, if_name(ifp));
/*
* Embed scope ID of receiving interface in MLD query for lookup
@@ -1182,7 +1180,7 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6,
*/
inm = in6m_lookup_locked(ifp, &mld->mld_addr);
if (inm != NULL) {
- struct mld_ifinfo *mli;
+ struct mld_ifsoftc *mli;
mli = inm->in6m_mli;
KASSERT(mli != NULL,
@@ -1208,7 +1206,7 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6,
CTR3(KTR_MLD,
"report suppressed for %s on ifp %p(%s)",
ip6_sprintf(ip6tbuf, &mld->mld_addr),
- ifp, ifp->if_xname);
+ ifp, if_name(ifp));
case MLD_LAZY_MEMBER:
inm->in6m_state = MLD_LAZY_MEMBER;
break;
@@ -1329,10 +1327,10 @@ mld_fasttimo(void)
static void
mld_fasttimo_vnet(void)
{
- struct ifqueue scq; /* State-change packets */
- struct ifqueue qrq; /* Query response packets */
+ struct mbufq scq; /* State-change packets */
+ struct mbufq qrq; /* Query response packets */
struct ifnet *ifp;
- struct mld_ifinfo *mli;
+ struct mld_ifsoftc *mli;
struct ifmultiaddr *ifma;
struct in6_multi *inm, *tinm;
int uri_fasthz;
@@ -1389,12 +1387,8 @@ mld_fasttimo_vnet(void)
if (mli->mli_version == MLD_VERSION_2) {
uri_fasthz = MLD_RANDOM_DELAY(mli->mli_uri *
PR_FASTHZ);
-
- memset(&qrq, 0, sizeof(struct ifqueue));
- IFQ_SET_MAXLEN(&qrq, MLD_MAX_G_GS_PACKETS);
-
- memset(&scq, 0, sizeof(struct ifqueue));
- IFQ_SET_MAXLEN(&scq, MLD_MAX_STATE_CHANGE_PACKETS);
+ mbufq_init(&qrq, MLD_MAX_G_GS_PACKETS);
+ mbufq_init(&scq, MLD_MAX_STATE_CHANGE_PACKETS);
}
IF_ADDR_RLOCK(ifp);
@@ -1461,7 +1455,7 @@ out_locked:
* Will update the global pending timer flags.
*/
static void
-mld_v1_process_group_timer(struct mld_ifinfo *mli, struct in6_multi *inm)
+mld_v1_process_group_timer(struct mld_ifsoftc *mli, struct in6_multi *inm)
{
int report_timer_expired;
@@ -1505,8 +1499,8 @@ mld_v1_process_group_timer(struct mld_ifinfo *mli, struct in6_multi *inm)
* Note: Unlocked read from mli.
*/
static void
-mld_v2_process_group_timers(struct mld_ifinfo *mli,
- struct ifqueue *qrq, struct ifqueue *scq,
+mld_v2_process_group_timers(struct mld_ifsoftc *mli,
+ struct mbufq *qrq, struct mbufq *scq,
struct in6_multi *inm, const int uri_fasthz)
{
int query_response_timer_expired;
@@ -1601,7 +1595,7 @@ mld_v2_process_group_timers(struct mld_ifinfo *mli,
in6m_commit(inm);
CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__,
ip6_sprintf(ip6tbuf, &inm->in6m_addr),
- inm->in6m_ifp->if_xname);
+ if_name(inm->in6m_ifp));
/*
* If we are leaving the group for good, make sure
@@ -1626,14 +1620,14 @@ mld_v2_process_group_timers(struct mld_ifinfo *mli,
* as per Section 9.12.
*/
static void
-mld_set_version(struct mld_ifinfo *mli, const int version)
+mld_set_version(struct mld_ifsoftc *mli, const int version)
{
int old_version_timer;
MLD_LOCK_ASSERT();
CTR4(KTR_MLD, "%s: switching to v%d on ifp %p(%s)", __func__,
- version, mli->mli_ifp, mli->mli_ifp->if_xname);
+ version, mli->mli_ifp, if_name(mli->mli_ifp));
if (version == MLD_VERSION_1) {
/*
@@ -1656,14 +1650,14 @@ mld_set_version(struct mld_ifinfo *mli, const int version)
* joined on it; state-change, general-query, and group-query timers.
*/
static void
-mld_v2_cancel_link_timers(struct mld_ifinfo *mli)
+mld_v2_cancel_link_timers(struct mld_ifsoftc *mli)
{
struct ifmultiaddr *ifma;
struct ifnet *ifp;
struct in6_multi *inm, *tinm;
CTR3(KTR_MLD, "%s: cancel v2 timers on ifp %p(%s)", __func__,
- mli->mli_ifp, mli->mli_ifp->if_xname);
+ mli->mli_ifp, if_name(mli->mli_ifp));
IN6_MULTI_LOCK_ASSERT();
MLD_LOCK_ASSERT();
@@ -1714,7 +1708,7 @@ mld_v2_cancel_link_timers(struct mld_ifinfo *mli)
/*
* Free any pending MLDv2 state-change records.
*/
- _IF_DRAIN(&inm->in6m_scq);
+ mbufq_drain(&inm->in6m_scq);
break;
}
}
@@ -1749,7 +1743,7 @@ mld_slowtimo(void)
static void
mld_slowtimo_vnet(void)
{
- struct mld_ifinfo *mli;
+ struct mld_ifsoftc *mli;
MLD_LOCK();
@@ -1765,7 +1759,7 @@ mld_slowtimo_vnet(void)
* See Section 9.12 of RFC 3810.
*/
static void
-mld_v1_process_querier_timers(struct mld_ifinfo *mli)
+mld_v1_process_querier_timers(struct mld_ifsoftc *mli)
{
MLD_LOCK_ASSERT();
@@ -1777,7 +1771,7 @@ mld_v1_process_querier_timers(struct mld_ifinfo *mli)
CTR5(KTR_MLD,
"%s: transition from v%d -> v%d on %p(%s)",
__func__, mli->mli_version, MLD_VERSION_2,
- mli->mli_ifp, mli->mli_ifp->if_xname);
+ mli->mli_ifp, if_name(mli->mli_ifp));
mli->mli_version = MLD_VERSION_2;
}
}
@@ -1801,13 +1795,13 @@ mld_v1_transmit_report(struct in6_multi *in6m, const int type)
ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
/* ia may be NULL if link-local address is tentative. */
- MGETHDR(mh, M_DONTWAIT, MT_HEADER);
+ mh = m_gethdr(M_NOWAIT, MT_DATA);
if (mh == NULL) {
if (ia != NULL)
ifa_free(&ia->ia_ifa);
return (ENOMEM);
}
- MGET(md, M_DONTWAIT, MT_DATA);
+ md = m_get(M_NOWAIT, MT_DATA);
if (md == NULL) {
m_free(mh);
if (ia != NULL)
@@ -1821,7 +1815,7 @@ mld_v1_transmit_report(struct in6_multi *in6m, const int type)
* that ether_output() does not need to allocate another mbuf
* for the header in the most common case.
*/
- MH_ALIGN(mh, sizeof(struct ip6_hdr));
+ M_ALIGN(mh, sizeof(struct ip6_hdr));
mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr);
mh->m_len = sizeof(struct ip6_hdr);
@@ -1881,7 +1875,7 @@ mld_v1_transmit_report(struct in6_multi *in6m, const int type)
int
mld_change_state(struct in6_multi *inm, const int delay)
{
- struct mld_ifinfo *mli;
+ struct mld_ifsoftc *mli;
struct ifnet *ifp;
int error;
@@ -1906,7 +1900,7 @@ mld_change_state(struct in6_multi *inm, const int delay)
MLD_LOCK();
mli = MLD_IFINFO(ifp);
- KASSERT(mli != NULL, ("%s: no mld_ifinfo for ifp %p", __func__, ifp));
+ KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp));
/*
* If we detect a state transition to or from MCAST_UNDEFINED
@@ -1949,11 +1943,11 @@ out_locked:
* initial state change for delay ticks (in units of PR_FASTHZ).
*/
static int
-mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli,
+mld_initial_join(struct in6_multi *inm, struct mld_ifsoftc *mli,
const int delay)
{
struct ifnet *ifp;
- struct ifqueue *ifq;
+ struct mbufq *mq;
int error, retval, syncstates;
int odelay;
#ifdef KTR
@@ -1962,7 +1956,7 @@ mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli,
CTR4(KTR_MLD, "%s: initial join %s on ifp %p(%s)",
__func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr),
- inm->in6m_ifp, inm->in6m_ifp->if_xname);
+ inm->in6m_ifp, if_name(inm->in6m_ifp));
error = 0;
syncstates = 1;
@@ -2040,9 +2034,9 @@ mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli,
* Don't kick the timers if there is nothing to do,
* or if an error occurred.
*/
- ifq = &inm->in6m_scq;
- _IF_DRAIN(ifq);
- retval = mld_v2_enqueue_group_record(ifq, inm, 1,
+ mq = &inm->in6m_scq;
+ mbufq_drain(mq);
+ retval = mld_v2_enqueue_group_record(mq, inm, 1,
0, 0, (mli->mli_flags & MLIF_USEALLOW));
CTR2(KTR_MLD, "%s: enqueue record = %d",
__func__, retval);
@@ -2088,7 +2082,7 @@ mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli,
in6m_commit(inm);
CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__,
ip6_sprintf(ip6tbuf, &inm->in6m_addr),
- inm->in6m_ifp->if_xname);
+ if_name(inm->in6m_ifp));
}
return (error);
@@ -2098,7 +2092,7 @@ mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli,
* Issue an intermediate state change during the life-cycle.
*/
static int
-mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli)
+mld_handle_state_change(struct in6_multi *inm, struct mld_ifsoftc *mli)
{
struct ifnet *ifp;
int retval;
@@ -2108,7 +2102,7 @@ mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli)
CTR4(KTR_MLD, "%s: state change for %s on ifp %p(%s)",
__func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr),
- inm->in6m_ifp, inm->in6m_ifp->if_xname);
+ inm->in6m_ifp, if_name(inm->in6m_ifp));
ifp = inm->in6m_ifp;
@@ -2130,11 +2124,11 @@ mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli)
in6m_commit(inm);
CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__,
ip6_sprintf(ip6tbuf, &inm->in6m_addr),
- inm->in6m_ifp->if_xname);
+ if_name(inm->in6m_ifp));
return (0);
}
- _IF_DRAIN(&inm->in6m_scq);
+ mbufq_drain(&inm->in6m_scq);
retval = mld_v2_enqueue_group_record(&inm->in6m_scq, inm, 1, 0, 0,
(mli->mli_flags & MLIF_USEALLOW));
@@ -2162,7 +2156,7 @@ mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli)
* to INCLUDE {} for immediate transmission.
*/
static void
-mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli)
+mld_final_leave(struct in6_multi *inm, struct mld_ifsoftc *mli)
{
int syncstates;
#ifdef KTR
@@ -2173,7 +2167,7 @@ mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli)
CTR4(KTR_MLD, "%s: final leave %s on ifp %p(%s)",
__func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr),
- inm->in6m_ifp, inm->in6m_ifp->if_xname);
+ inm->in6m_ifp, if_name(inm->in6m_ifp));
IN6_MULTI_LOCK_ASSERT();
MLD_LOCK_ASSERT();
@@ -2207,13 +2201,13 @@ mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli)
* TO_IN {} to be sent on the next fast timeout,
* giving us an opportunity to merge reports.
*/
- _IF_DRAIN(&inm->in6m_scq);
+ mbufq_drain(&inm->in6m_scq);
inm->in6m_timer = 0;
inm->in6m_scrv = mli->mli_rv;
CTR4(KTR_MLD, "%s: Leaving %s/%s with %d "
"pending retransmissions.", __func__,
ip6_sprintf(ip6tbuf, &inm->in6m_addr),
- inm->in6m_ifp->if_xname, inm->in6m_scrv);
+ if_name(inm->in6m_ifp), inm->in6m_scrv);
if (inm->in6m_scrv == 0) {
inm->in6m_state = MLD_NOT_MEMBER;
inm->in6m_sctimer = 0;
@@ -2248,10 +2242,10 @@ mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli)
in6m_commit(inm);
CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__,
ip6_sprintf(ip6tbuf, &inm->in6m_addr),
- inm->in6m_ifp->if_xname);
+ if_name(inm->in6m_ifp));
inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED;
CTR3(KTR_MLD, "%s: T1 now MCAST_UNDEFINED for %p/%s",
- __func__, &inm->in6m_addr, inm->in6m_ifp->if_xname);
+ __func__, &inm->in6m_addr, if_name(inm->in6m_ifp));
}
}
@@ -2283,7 +2277,7 @@ mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli)
* no record(s) were appended.
*/
static int
-mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm,
+mld_v2_enqueue_group_record(struct mbufq *mq, struct in6_multi *inm,
const int is_state_change, const int is_group_query,
const int is_source_query, const int use_block_allow)
{
@@ -2398,12 +2392,12 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm,
* Generate the filter list changes using a separate function.
*/
if (is_filter_list_change)
- return (mld_v2_enqueue_filter_change(ifq, inm));
+ return (mld_v2_enqueue_filter_change(mq, inm));
if (type == MLD_DO_NOTHING) {
CTR3(KTR_MLD, "%s: nothing to do for %s/%s",
__func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr),
- inm->in6m_ifp->if_xname);
+ if_name(inm->in6m_ifp));
return (0);
}
@@ -2419,7 +2413,7 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm,
CTR4(KTR_MLD, "%s: queueing %s for %s/%s", __func__,
mld_rec_type_to_str(type),
ip6_sprintf(ip6tbuf, &inm->in6m_addr),
- inm->in6m_ifp->if_xname);
+ if_name(inm->in6m_ifp));
/*
* Check if we have a packet in the tail of the queue for this
@@ -2429,7 +2423,7 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm,
* Note: Group records for G/GSR query responses MUST be sent
* in their own packet.
*/
- m0 = ifq->ifq_tail;
+ m0 = mbufq_last(mq);
if (!is_group_query &&
m0 != NULL &&
(m0->m_pkthdr.PH_vt.vt_nrecs + 1 <= MLD_V2_REPORT_MAXRECS) &&
@@ -2441,7 +2435,7 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm,
m = m0;
CTR1(KTR_MLD, "%s: use existing packet", __func__);
} else {
- if (_IF_QFULL(ifq)) {
+ if (mbufq_full(mq)) {
CTR1(KTR_MLD, "%s: outbound queue full", __func__);
return (-ENOMEM);
}
@@ -2449,9 +2443,9 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm,
m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
if (!is_state_change && !is_group_query)
- m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
if (m == NULL)
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return (-ENOMEM);
@@ -2554,7 +2548,7 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm,
if (m != m0) {
CTR1(KTR_MLD, "%s: enqueueing first packet", __func__);
m->m_pkthdr.PH_vt.vt_nrecs = 1;
- _IF_ENQUEUE(ifq, m);
+ mbufq_enqueue(mq, m);
} else
m->m_pkthdr.PH_vt.vt_nrecs++;
@@ -2570,13 +2564,13 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm,
* Always try for a cluster first.
*/
while (nims != NULL) {
- if (_IF_QFULL(ifq)) {
+ if (mbufq_full(mq)) {
CTR1(KTR_MLD, "%s: outbound queue full", __func__);
return (-ENOMEM);
}
- m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
if (m == NULL)
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return (-ENOMEM);
mld_save_context(m, ifp);
@@ -2629,7 +2623,7 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm,
nbytes += (msrcs * sizeof(struct in6_addr));
CTR1(KTR_MLD, "%s: enqueueing next packet", __func__);
- _IF_ENQUEUE(ifq, m);
+ mbufq_enqueue(mq, m);
}
return (nbytes);
@@ -2669,7 +2663,7 @@ typedef enum {
* no record(s) were appended.
*/
static int
-mld_v2_enqueue_filter_change(struct ifqueue *ifq, struct in6_multi *inm)
+mld_v2_enqueue_filter_change(struct mbufq *mq, struct in6_multi *inm)
{
static const int MINRECLEN =
sizeof(struct mldv2_record) + sizeof(struct in6_addr);
@@ -2715,7 +2709,7 @@ mld_v2_enqueue_filter_change(struct ifqueue *ifq, struct in6_multi *inm)
*/
while (drt != REC_FULL) {
do {
- m0 = ifq->ifq_tail;
+ m0 = mbufq_last(mq);
if (m0 != NULL &&
(m0->m_pkthdr.PH_vt.vt_nrecs + 1 <=
MLD_V2_REPORT_MAXRECS) &&
@@ -2728,9 +2722,9 @@ mld_v2_enqueue_filter_change(struct ifqueue *ifq, struct in6_multi *inm)
CTR1(KTR_MLD,
"%s: use previous packet", __func__);
} else {
- m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
if (m == NULL)
- m = m_gethdr(M_DONTWAIT, MT_DATA);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL) {
CTR1(KTR_MLD,
"%s: m_get*() failed", __func__);
@@ -2859,7 +2853,7 @@ mld_v2_enqueue_filter_change(struct ifqueue *ifq, struct in6_multi *inm)
*/
m->m_pkthdr.PH_vt.vt_nrecs++;
if (m != m0)
- _IF_ENQUEUE(ifq, m);
+ mbufq_enqueue(mq, m);
nbytes += npbytes;
} while (nims != NULL);
drt |= crt;
@@ -2873,9 +2867,9 @@ mld_v2_enqueue_filter_change(struct ifqueue *ifq, struct in6_multi *inm)
}
static int
-mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq)
+mld_v2_merge_state_changes(struct in6_multi *inm, struct mbufq *scq)
{
- struct ifqueue *gq;
+ struct mbufq *gq;
struct mbuf *m; /* pending state-change */
struct mbuf *m0; /* copy of pending state-change */
struct mbuf *mt; /* last state-change in packet */
@@ -2898,13 +2892,13 @@ mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq)
gq = &inm->in6m_scq;
#ifdef KTR
- if (gq->ifq_head == NULL) {
+ if (mbufq_first(gq) == NULL) {
CTR2(KTR_MLD, "%s: WARNING: queue for inm %p is empty",
__func__, inm);
}
#endif
- m = gq->ifq_head;
+ m = mbufq_first(gq);
while (m != NULL) {
/*
* Only merge the report into the current packet if
@@ -2915,7 +2909,7 @@ mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq)
* allocated clusters.
*/
domerge = 0;
- mt = ifscq->ifq_tail;
+ mt = mbufq_last(scq);
if (mt != NULL) {
recslen = m_length(m, NULL);
@@ -2927,7 +2921,7 @@ mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq)
domerge = 1;
}
- if (!domerge && _IF_QFULL(gq)) {
+ if (!domerge && mbufq_full(gq)) {
CTR2(KTR_MLD,
"%s: outbound queue full, skipping whole packet %p",
__func__, m);
@@ -2940,7 +2934,7 @@ mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq)
if (!docopy) {
CTR2(KTR_MLD, "%s: dequeueing %p", __func__, m);
- _IF_DEQUEUE(gq, m0);
+ m0 = mbufq_dequeue(gq);
m = m0->m_nextpkt;
} else {
CTR2(KTR_MLD, "%s: copying %p", __func__, m);
@@ -2952,9 +2946,9 @@ mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq)
}
if (!domerge) {
- CTR3(KTR_MLD, "%s: queueing %p to ifscq %p)",
- __func__, m0, ifscq);
- _IF_ENQUEUE(ifscq, m0);
+ CTR3(KTR_MLD, "%s: queueing %p to scq %p)",
+ __func__, m0, scq);
+ mbufq_enqueue(scq, m0);
} else {
struct mbuf *mtl; /* last mbuf of packet mt */
@@ -2978,7 +2972,7 @@ mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq)
* Respond to a pending MLDv2 General Query.
*/
static void
-mld_v2_dispatch_general_query(struct mld_ifinfo *mli)
+mld_v2_dispatch_general_query(struct mld_ifsoftc *mli)
{
struct ifmultiaddr *ifma;
struct ifnet *ifp;
@@ -2991,6 +2985,15 @@ mld_v2_dispatch_general_query(struct mld_ifinfo *mli)
KASSERT(mli->mli_version == MLD_VERSION_2,
("%s: called when version %d", __func__, mli->mli_version));
+ /*
+ * Check that there are some packets queued. If so, send them first.
+ * For large number of groups the reply to general query can take
+ * many packets, we should finish sending them before starting of
+ * queuing the new reply.
+ */
+ if (mbufq_len(&mli->mli_gq) != 0)
+ goto send;
+
ifp = mli->mli_ifp;
IF_ADDR_RLOCK(ifp);
@@ -3026,12 +3029,13 @@ mld_v2_dispatch_general_query(struct mld_ifinfo *mli)
}
IF_ADDR_RUNLOCK(ifp);
+send:
mld_dispatch_queue(&mli->mli_gq, MLD_MAX_RESPONSE_BURST);
/*
* Slew transmission of bursts over 500ms intervals.
*/
- if (mli->mli_gq.ifq_head != NULL) {
+ if (mbufq_first(&mli->mli_gq) != NULL) {
mli->mli_v2_timer = 1 + MLD_RANDOM_DELAY(
MLD_RESPONSE_BURST_INTERVAL);
V_interface_timers_running6 = 1;
@@ -3100,7 +3104,7 @@ mld_dispatch_packet(struct mbuf *m)
}
mld_scrub_context(m0);
- m->m_flags &= ~(M_PROTOFLAGS);
+ m_clrprotoflags(m);
m0->m_pkthdr.rcvif = V_loif;
ip6 = mtod(m0, struct ip6_hdr *);
@@ -3175,14 +3179,14 @@ mld_v2_encap_report(struct ifnet *ifp, struct mbuf *m)
if (ia == NULL)
CTR1(KTR_MLD, "%s: warning: ia is NULL", __func__);
- MGETHDR(mh, M_DONTWAIT, MT_HEADER);
+ mh = m_gethdr(M_NOWAIT, MT_DATA);
if (mh == NULL) {
if (ia != NULL)
ifa_free(&ia->ia_ifa);
m_freem(m);
return (NULL);
}
- MH_ALIGN(mh, sizeof(struct ip6_hdr) + sizeof(struct mldv2_report));
+ M_ALIGN(mh, sizeof(struct ip6_hdr) + sizeof(struct mldv2_report));
mldreclen = m_length(m, NULL);
CTR2(KTR_MLD, "%s: mldreclen is %d", __func__, mldreclen);
@@ -3260,7 +3264,7 @@ mld_init(void *unused __unused)
mld_po.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
mld_po.ip6po_flags = IP6PO_DONTFRAG;
}
-SYSINIT(mld_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, mld_init, NULL);
+SYSINIT(mld_init, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE, mld_init, NULL);
static void
mld_uninit(void *unused __unused)
@@ -3269,7 +3273,7 @@ mld_uninit(void *unused __unused)
CTR1(KTR_MLD, "%s: tearing down", __func__);
MLD_LOCK_DESTROY();
}
-SYSUNINIT(mld_uninit, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, mld_uninit, NULL);
+SYSUNINIT(mld_uninit, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE, mld_uninit, NULL);
static void
vnet_mld_init(const void *unused __unused)
@@ -3279,19 +3283,17 @@ vnet_mld_init(const void *unused __unused)
LIST_INIT(&V_mli_head);
}
-VNET_SYSINIT(vnet_mld_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_mld_init,
+VNET_SYSINIT(vnet_mld_init, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mld_init,
NULL);
static void
vnet_mld_uninit(const void *unused __unused)
{
+ /* This can happen if we shutdown the network stack. */
CTR1(KTR_MLD, "%s: tearing down", __func__);
-
- KASSERT(LIST_EMPTY(&V_mli_head),
- ("%s: mli list not empty; ifnets not detached?", __func__));
}
-VNET_SYSUNINIT(vnet_mld_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_mld_uninit,
+VNET_SYSUNINIT(vnet_mld_uninit, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mld_uninit,
NULL);
static int
@@ -3313,4 +3315,4 @@ static moduledata_t mld_mod = {
mld_modevent,
0
};
-DECLARE_MODULE(mld, mld_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+DECLARE_MODULE(mld, mld_mod, SI_SUB_PROTO_MC, SI_ORDER_ANY);
diff --git a/freebsd/sys/netinet6/mld6_var.h b/freebsd/sys/netinet6/mld6_var.h
index e62ec236..be7e9035 100644
--- a/freebsd/sys/netinet6/mld6_var.h
+++ b/freebsd/sys/netinet6/mld6_var.h
@@ -35,31 +35,6 @@
* implementation-specific definitions.
*/
-#ifdef _KERNEL
-
-/*
- * Per-link MLD state.
- */
-struct mld_ifinfo {
- LIST_ENTRY(mld_ifinfo) mli_link;
- struct ifnet *mli_ifp; /* interface this instance belongs to */
- uint32_t mli_version; /* MLDv1 Host Compatibility Mode */
- uint32_t mli_v1_timer; /* MLDv1 Querier Present timer (s) */
- uint32_t mli_v2_timer; /* MLDv2 General Query (interface) timer (s)*/
- uint32_t mli_flags; /* MLD per-interface flags */
- uint32_t mli_rv; /* MLDv2 Robustness Variable */
- uint32_t mli_qi; /* MLDv2 Query Interval (s) */
- uint32_t mli_qri; /* MLDv2 Query Response Interval (s) */
- uint32_t mli_uri; /* MLDv2 Unsolicited Report Interval (s) */
- SLIST_HEAD(,in6_multi) mli_relinmhead; /* released groups */
- struct ifqueue mli_gq; /* queue of general query responses */
-};
-#define MLIF_SILENT 0x00000001 /* Do not use MLD on this ifp */
-#define MLIF_USEALLOW 0x00000002 /* Use ALLOW/BLOCK for joins/leaves */
-
-#define MLD_RANDOM_DELAY(X) (arc4random() % (X) + 1)
-#define MLD_MAX_STATE_CHANGES 24 /* Max pending changes per group */
-
/*
* MLD per-group states.
*/
@@ -129,6 +104,44 @@ struct mld_ifinfo {
sizeof(struct icmp6_hdr))
/*
+ * Structure returned by net.inet6.mld.ifinfo.
+ */
+struct mld_ifinfo {
+ uint32_t mli_version; /* MLDv1 Host Compatibility Mode */
+ uint32_t mli_v1_timer; /* MLDv1 Querier Present timer (s) */
+ uint32_t mli_v2_timer; /* MLDv2 General Query (interface) timer (s)*/
+ uint32_t mli_flags; /* MLD per-interface flags */
+#define MLIF_SILENT 0x00000001 /* Do not use MLD on this ifp */
+#define MLIF_USEALLOW 0x00000002 /* Use ALLOW/BLOCK for joins/leaves */
+ uint32_t mli_rv; /* MLDv2 Robustness Variable */
+ uint32_t mli_qi; /* MLDv2 Query Interval (s) */
+ uint32_t mli_qri; /* MLDv2 Query Response Interval (s) */
+ uint32_t mli_uri; /* MLDv2 Unsolicited Report Interval (s) */
+};
+
+#ifdef _KERNEL
+/*
+ * Per-link MLD state.
+ */
+struct mld_ifsoftc {
+ LIST_ENTRY(mld_ifsoftc) mli_link;
+ struct ifnet *mli_ifp; /* interface this instance belongs to */
+ uint32_t mli_version; /* MLDv1 Host Compatibility Mode */
+ uint32_t mli_v1_timer; /* MLDv1 Querier Present timer (s) */
+ uint32_t mli_v2_timer; /* MLDv2 General Query (interface) timer (s)*/
+ uint32_t mli_flags; /* MLD per-interface flags */
+ uint32_t mli_rv; /* MLDv2 Robustness Variable */
+ uint32_t mli_qi; /* MLDv2 Query Interval (s) */
+ uint32_t mli_qri; /* MLDv2 Query Response Interval (s) */
+ uint32_t mli_uri; /* MLDv2 Unsolicited Report Interval (s) */
+ SLIST_HEAD(,in6_multi) mli_relinmhead; /* released groups */
+ struct mbufq mli_gq; /* queue of general query responses */
+};
+
+#define MLD_RANDOM_DELAY(X) (arc4random() % (X) + 1)
+#define MLD_MAX_STATE_CHANGES 24 /* Max pending changes per group */
+
+/*
* Subsystem lock macros.
* The MLD lock is only taken with MLD. Currently it is system-wide.
* VIMAGE: The lock could be pushed to per-VIMAGE granularity in future.
@@ -147,7 +160,7 @@ struct mld_ifinfo {
(((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->mld_ifinfo)
int mld_change_state(struct in6_multi *, const int);
-struct mld_ifinfo *
+struct mld_ifsoftc *
mld_domifattach(struct ifnet *);
void mld_domifdetach(struct ifnet *);
void mld_fasttimo(void);
diff --git a/freebsd/sys/netinet6/nd6.c b/freebsd/sys/netinet6/nd6.c
index 4369ebac..d1c7036d 100644
--- a/freebsd/sys/netinet6/nd6.c
+++ b/freebsd/sys/netinet6/nd6.c
@@ -52,9 +52,11 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/lock.h>
#include <sys/rwlock.h>
#include <sys/queue.h>
+#include <sys/sdt.h>
#include <sys/sysctl.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_arc.h>
#include <net/if_dl.h>
#include <net/if_types.h>
@@ -64,8 +66,8 @@ __FBSDID("$FreeBSD$");
#include <net/vnet.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <net/if_llatbl.h>
-#define L3_ADDR_SIN6(le) ((struct sockaddr_in6 *) L3_ADDR(le))
#include <netinet/if_ether.h>
#include <netinet6/in6_var.h>
#include <netinet/ip6.h>
@@ -83,7 +85,9 @@ __FBSDID("$FreeBSD$");
#define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
#define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
-#define SIN6(s) ((struct sockaddr_in6 *)s)
+#define SIN6(s) ((const struct sockaddr_in6 *)(s))
+
+MALLOC_DEFINE(M_IP6NDP, "ip6ndp", "IPv6 Neighbor Discovery");
/* timer values */
VNET_DEFINE(int, nd6_prune) = 1; /* walk list every 1 seconds */
@@ -111,54 +115,124 @@ VNET_DEFINE(int, nd6_debug) = 1;
VNET_DEFINE(int, nd6_debug) = 0;
#endif
-/* for debugging? */
-#if 0
-static int nd6_inuse, nd6_allocated;
-#endif
+static eventhandler_tag lle_event_eh, iflladdr_event_eh;
VNET_DEFINE(struct nd_drhead, nd_defrouter);
VNET_DEFINE(struct nd_prhead, nd_prefix);
+VNET_DEFINE(struct rwlock, nd6_lock);
VNET_DEFINE(int, nd6_recalc_reachtm_interval) = ND6_RECALC_REACHTM_INTERVAL;
#define V_nd6_recalc_reachtm_interval VNET(nd6_recalc_reachtm_interval)
-static struct sockaddr_in6 all1_sa;
-
int (*send_sendso_input_hook)(struct mbuf *, struct ifnet *, int, int);
-static int nd6_is_new_addr_neighbor(struct sockaddr_in6 *,
+static int nd6_is_new_addr_neighbor(const struct sockaddr_in6 *,
struct ifnet *);
static void nd6_setmtu0(struct ifnet *, struct nd_ifinfo *);
static void nd6_slowtimo(void *);
static int regen_tmpaddr(struct in6_ifaddr *);
-static struct llentry *nd6_free(struct llentry *, int);
+static void nd6_free(struct llentry **, int);
+static void nd6_free_redirect(const struct llentry *);
static void nd6_llinfo_timer(void *);
+static void nd6_llinfo_settimer_locked(struct llentry *, long);
static void clear_llinfo_pqueue(struct llentry *);
+static void nd6_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
+static int nd6_resolve_slow(struct ifnet *, int, struct mbuf *,
+ const struct sockaddr_in6 *, u_char *, uint32_t *, struct llentry **);
+static int nd6_need_cache(struct ifnet *);
+
static VNET_DEFINE(struct callout, nd6_slowtimo_ch);
#define V_nd6_slowtimo_ch VNET(nd6_slowtimo_ch)
VNET_DEFINE(struct callout, nd6_timer_ch);
+#define V_nd6_timer_ch VNET(nd6_timer_ch)
+
+static void
+nd6_lle_event(void *arg __unused, struct llentry *lle, int evt)
+{
+ struct rt_addrinfo rtinfo;
+ struct sockaddr_in6 dst;
+ struct sockaddr_dl gw;
+ struct ifnet *ifp;
+ int type;
+
+ LLE_WLOCK_ASSERT(lle);
+
+ if (lltable_get_af(lle->lle_tbl) != AF_INET6)
+ return;
+
+ switch (evt) {
+ case LLENTRY_RESOLVED:
+ type = RTM_ADD;
+ KASSERT(lle->la_flags & LLE_VALID,
+ ("%s: %p resolved but not valid?", __func__, lle));
+ break;
+ case LLENTRY_EXPIRED:
+ type = RTM_DELETE;
+ break;
+ default:
+ return;
+ }
+
+ ifp = lltable_get_ifp(lle->lle_tbl);
+
+ bzero(&dst, sizeof(dst));
+ bzero(&gw, sizeof(gw));
+ bzero(&rtinfo, sizeof(rtinfo));
+ lltable_fill_sa_entry(lle, (struct sockaddr *)&dst);
+ dst.sin6_scope_id = in6_getscopezone(ifp,
+ in6_addrscope(&dst.sin6_addr));
+ gw.sdl_len = sizeof(struct sockaddr_dl);
+ gw.sdl_family = AF_LINK;
+ gw.sdl_alen = ifp->if_addrlen;
+ gw.sdl_index = ifp->if_index;
+ gw.sdl_type = ifp->if_type;
+ if (evt == LLENTRY_RESOLVED)
+ bcopy(lle->ll_addr, gw.sdl_data, ifp->if_addrlen);
+ rtinfo.rti_info[RTAX_DST] = (struct sockaddr *)&dst;
+ rtinfo.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&gw;
+ rtinfo.rti_addrs = RTA_DST | RTA_GATEWAY;
+ rt_missmsg_fib(type, &rtinfo, RTF_HOST | RTF_LLDATA | (
+ type == RTM_ADD ? RTF_UP: 0), 0, RT_DEFAULT_FIB);
+}
+
+/*
+ * A handler for interface link layer address change event.
+ */
+static void
+nd6_iflladdr(void *arg __unused, struct ifnet *ifp)
+{
+
+ lltable_update_ifaddr(LLTABLE6(ifp));
+}
void
nd6_init(void)
{
- int i;
- LIST_INIT(&V_nd_prefix);
+ rw_init(&V_nd6_lock, "nd6");
- all1_sa.sin6_family = AF_INET6;
- all1_sa.sin6_len = sizeof(struct sockaddr_in6);
- for (i = 0; i < sizeof(all1_sa.sin6_addr); i++)
- all1_sa.sin6_addr.s6_addr[i] = 0xff;
+ LIST_INIT(&V_nd_prefix);
/* initialization of the default router list */
TAILQ_INIT(&V_nd_defrouter);
- /* start timer */
+ /* Start timers. */
callout_init(&V_nd6_slowtimo_ch, 0);
callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
nd6_slowtimo, curvnet);
+
+ callout_init(&V_nd6_timer_ch, 0);
+ callout_reset(&V_nd6_timer_ch, hz, nd6_timer, curvnet);
+
+ nd6_dad_init();
+ if (IS_DEFAULT_VNET(curvnet)) {
+ lle_event_eh = EVENTHANDLER_REGISTER(lle_event, nd6_lle_event,
+ NULL, EVENTHANDLER_PRI_ANY);
+ iflladdr_event_eh = EVENTHANDLER_REGISTER(iflladdr_event,
+ nd6_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
+ }
}
#ifdef VIMAGE
@@ -168,6 +242,11 @@ nd6_destroy()
callout_drain(&V_nd6_slowtimo_ch);
callout_drain(&V_nd6_timer_ch);
+ if (IS_DEFAULT_VNET(curvnet)) {
+ EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh);
+ EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_event_eh);
+ }
+ rw_destroy(&V_nd6_lock);
}
#endif
@@ -176,7 +255,7 @@ nd6_ifattach(struct ifnet *ifp)
{
struct nd_ifinfo *nd;
- nd = (struct nd_ifinfo *)malloc(sizeof(*nd), M_IP6NDP, M_WAITOK|M_ZERO);
+ nd = malloc(sizeof(*nd), M_IP6NDP, M_WAITOK | M_ZERO);
nd->initialized = 1;
nd->chlim = IPV6_DEFHLIM;
@@ -215,8 +294,19 @@ nd6_ifattach(struct ifnet *ifp)
}
void
-nd6_ifdetach(struct nd_ifinfo *nd)
+nd6_ifdetach(struct ifnet *ifp, struct nd_ifinfo *nd)
{
+ struct ifaddr *ifa, *next;
+
+ IF_ADDR_RLOCK(ifp);
+ TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
+ if (ifa->ifa_addr->sa_family != AF_INET6)
+ continue;
+
+ /* stop DAD processing */
+ nd6_dad_stop(ifa);
+ }
+ IF_ADDR_RUNLOCK(ifp);
free(nd, M_IP6NDP);
}
@@ -228,6 +318,8 @@ nd6_ifdetach(struct nd_ifinfo *nd)
void
nd6_setmtu(struct ifnet *ifp)
{
+ if (ifp->if_afdata[AF_INET6] == NULL)
+ return;
nd6_setmtu0(ifp, ND_IFINFO(ifp));
}
@@ -372,6 +464,7 @@ nd6_options(union nd_opts *ndopts)
case ND_OPT_TARGET_LINKADDR:
case ND_OPT_MTU:
case ND_OPT_REDIRECTED_HEADER:
+ case ND_OPT_NONCE:
if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
nd6log((LOG_INFO,
"duplicated ND6 option found (type=%d)\n",
@@ -401,7 +494,7 @@ nd6_options(union nd_opts *ndopts)
default:
/*
* Unknown options must be silently ignored,
- * to accomodate future extension to the protocol.
+ * to accommodate future extension to the protocol.
*/
nd6log((LOG_DEBUG,
"nd6_options: unsupported option %d - "
@@ -426,7 +519,7 @@ skip1:
/*
* ND6 timer routine to handle ND6 entries
*/
-void
+static void
nd6_llinfo_settimer_locked(struct llentry *ln, long tick)
{
int canceled;
@@ -436,48 +529,257 @@ nd6_llinfo_settimer_locked(struct llentry *ln, long tick)
if (tick < 0) {
ln->la_expire = 0;
ln->ln_ntick = 0;
- canceled = callout_stop(&ln->ln_timer_ch);
+ canceled = callout_stop(&ln->lle_timer);
} else {
- ln->la_expire = time_second + tick / hz;
+ ln->la_expire = time_uptime + tick / hz;
LLE_ADDREF(ln);
if (tick > INT_MAX) {
ln->ln_ntick = tick - INT_MAX;
- canceled = callout_reset(&ln->ln_timer_ch, INT_MAX,
+ canceled = callout_reset(&ln->lle_timer, INT_MAX,
nd6_llinfo_timer, ln);
} else {
ln->ln_ntick = 0;
- canceled = callout_reset(&ln->ln_timer_ch, tick,
+ canceled = callout_reset(&ln->lle_timer, tick,
nd6_llinfo_timer, ln);
}
}
- if (canceled)
+ if (canceled > 0)
LLE_REMREF(ln);
}
-void
-nd6_llinfo_settimer(struct llentry *ln, long tick)
+/*
+ * Gets source address of the first packet in hold queue
+ * and stores it in @src.
+ * Returns pointer to @src (if hold queue is not empty) or NULL.
+ *
+ * Set noinline to be dtrace-friendly
+ */
+static __noinline struct in6_addr *
+nd6_llinfo_get_holdsrc(struct llentry *ln, struct in6_addr *src)
{
+ struct ip6_hdr hdr;
+ struct mbuf *m;
- LLE_WLOCK(ln);
- nd6_llinfo_settimer_locked(ln, tick);
- LLE_WUNLOCK(ln);
+ if (ln->la_hold == NULL)
+ return (NULL);
+
+ /*
+ * assume every packet in la_hold has the same IP header
+ */
+ m = ln->la_hold;
+ if (sizeof(hdr) > m->m_len)
+ return (NULL);
+
+ m_copydata(m, 0, sizeof(hdr), (caddr_t)&hdr);
+ *src = hdr.ip6_src;
+
+ return (src);
}
-static void
+/*
+ * Checks if we need to switch from STALE state.
+ *
+ * RFC 4861 requires switching from STALE to DELAY state
+ * on first packet matching entry, waiting V_nd6_delay and
+ * transition to PROBE state (if upper layer confirmation was
+ * not received).
+ *
+ * This code performs a bit differently:
+ * On packet hit we don't change state (but desired state
+ * can be guessed by control plane). However, after V_nd6_delay
+ * seconds code will transition to PROBE state (so DELAY state
+ * is kinda skipped in most situations).
+ *
+ * Typically, V_nd6_gctimer is bigger than V_nd6_delay, so
+ * we perform the following upon entering STALE state:
+ *
+ * 1) Arm timer to run each V_nd6_delay seconds to make sure that
+ * if packet was transmitted at the start of given interval, we
+ * would be able to switch to PROBE state in V_nd6_delay seconds
+ * as user expects.
+ *
+ * 2) Reschedule timer until original V_nd6_gctimer expires keeping
+ * lle in STALE state (remaining timer value stored in lle_remtime).
+ *
+ * 3) Reschedule timer if packet was transmitted less that V_nd6_delay
+ * seconds ago.
+ *
+ * Returns non-zero value if the entry is still STALE (storing
+ * the next timer interval in @pdelay).
+ *
+ * Returns zero value if original timer expired or we need to switch to
+ * PROBE (store that in @do_switch variable).
+ */
+static int
+nd6_is_stale(struct llentry *lle, long *pdelay, int *do_switch)
+{
+ int nd_delay, nd_gctimer, r_skip_req;
+ time_t lle_hittime;
+ long delay;
+
+ *do_switch = 0;
+ nd_gctimer = V_nd6_gctimer;
+ nd_delay = V_nd6_delay;
+
+ LLE_REQ_LOCK(lle);
+ r_skip_req = lle->r_skip_req;
+ lle_hittime = lle->lle_hittime;
+ LLE_REQ_UNLOCK(lle);
+
+ if (r_skip_req > 0) {
+
+ /*
+ * Nonzero r_skip_req value was set upon entering
+ * STALE state. Since value was not changed, no
+ * packets were passed using this lle. Ask for
+ * timer reschedule and keep STALE state.
+ */
+ delay = (long)(MIN(nd_gctimer, nd_delay));
+ delay *= hz;
+ if (lle->lle_remtime > delay)
+ lle->lle_remtime -= delay;
+ else {
+ delay = lle->lle_remtime;
+ lle->lle_remtime = 0;
+ }
+
+ if (delay == 0) {
+
+ /*
+ * The original ng6_gctime timeout ended,
+ * no more rescheduling.
+ */
+ return (0);
+ }
+
+ *pdelay = delay;
+ return (1);
+ }
+
+ /*
+ * Packet received. Verify timestamp
+ */
+ delay = (long)(time_uptime - lle_hittime);
+ if (delay < nd_delay) {
+
+ /*
+ * V_nd6_delay still not passed since the first
+ * hit in STALE state.
+ * Reshedule timer and return.
+ */
+ *pdelay = (long)(nd_delay - delay) * hz;
+ return (1);
+ }
+
+ /* Request switching to probe */
+ *do_switch = 1;
+ return (0);
+}
+
+
+/*
+ * Switch @lle state to new state optionally arming timers.
+ *
+ * Set noinline to be dtrace-friendly
+ */
+__noinline void
+nd6_llinfo_setstate(struct llentry *lle, int newstate)
+{
+ struct ifnet *ifp;
+ int nd_gctimer, nd_delay;
+ long delay, remtime;
+
+ delay = 0;
+ remtime = 0;
+
+ switch (newstate) {
+ case ND6_LLINFO_INCOMPLETE:
+ ifp = lle->lle_tbl->llt_ifp;
+ delay = (long)ND_IFINFO(ifp)->retrans * hz / 1000;
+ break;
+ case ND6_LLINFO_REACHABLE:
+ if (!ND6_LLINFO_PERMANENT(lle)) {
+ ifp = lle->lle_tbl->llt_ifp;
+ delay = (long)ND_IFINFO(ifp)->reachable * hz;
+ }
+ break;
+ case ND6_LLINFO_STALE:
+
+ /*
+ * Notify fast path that we want to know if any packet
+ * is transmitted by setting r_skip_req.
+ */
+ LLE_REQ_LOCK(lle);
+ lle->r_skip_req = 1;
+ LLE_REQ_UNLOCK(lle);
+ nd_delay = V_nd6_delay;
+ nd_gctimer = V_nd6_gctimer;
+
+ delay = (long)(MIN(nd_gctimer, nd_delay)) * hz;
+ remtime = (long)nd_gctimer * hz - delay;
+ break;
+ case ND6_LLINFO_DELAY:
+ lle->la_asked = 0;
+ delay = (long)V_nd6_delay * hz;
+ break;
+ }
+
+ if (delay > 0)
+ nd6_llinfo_settimer_locked(lle, delay);
+
+ lle->lle_remtime = remtime;
+ lle->ln_state = newstate;
+}
+
+/*
+ * Timer-dependent part of nd state machine.
+ *
+ * Set noinline to be dtrace-friendly
+ */
+static __noinline void
nd6_llinfo_timer(void *arg)
{
struct llentry *ln;
- struct in6_addr *dst;
+ struct in6_addr *dst, *pdst, *psrc, src;
struct ifnet *ifp;
- struct nd_ifinfo *ndi = NULL;
+ struct nd_ifinfo *ndi;
+ int do_switch, send_ns;
+ long delay;
KASSERT(arg != NULL, ("%s: arg NULL", __func__));
ln = (struct llentry *)arg;
- LLE_WLOCK_ASSERT(ln);
- ifp = ln->lle_tbl->llt_ifp;
-
+ ifp = lltable_get_ifp(ln->lle_tbl);
CURVNET_SET(ifp->if_vnet);
+ ND6_RLOCK();
+ LLE_WLOCK(ln);
+ if (callout_pending(&ln->lle_timer)) {
+ /*
+ * Here we are a bit odd here in the treatment of
+ * active/pending. If the pending bit is set, it got
+ * rescheduled before I ran. The active
+ * bit we ignore, since if it was stopped
+ * in ll_tablefree() and was currently running
+ * it would have return 0 so the code would
+ * not have deleted it since the callout could
+ * not be stopped so we want to go through
+ * with the delete here now. If the callout
+ * was restarted, the pending bit will be back on and
+ * we just want to bail since the callout_reset would
+ * return 1 and our reference would have been removed
+ * by nd6_llinfo_settimer_locked above since canceled
+ * would have been 1.
+ */
+ LLE_WUNLOCK(ln);
+ ND6_RUNLOCK();
+ CURVNET_RESTORE();
+ return;
+ }
+ ndi = ND_IFINFO(ifp);
+ send_ns = 0;
+ dst = &ln->r_l3addr.addr6;
+ pdst = dst;
+
if (ln->ln_ntick > 0) {
if (ln->ln_ntick > INT_MAX) {
ln->ln_ntick -= INT_MAX;
@@ -489,15 +791,12 @@ nd6_llinfo_timer(void *arg)
goto done;
}
- ndi = ND_IFINFO(ifp);
- dst = &L3_ADDR_SIN6(ln)->sin6_addr;
if (ln->la_flags & LLE_STATIC) {
goto done;
}
if (ln->la_flags & LLE_DELETED) {
- (void)nd6_free(ln, 0);
- ln = NULL;
+ nd6_free(&ln, 0);
goto done;
}
@@ -505,10 +804,9 @@ nd6_llinfo_timer(void *arg)
case ND6_LLINFO_INCOMPLETE:
if (ln->la_asked < V_nd6_mmaxtries) {
ln->la_asked++;
- nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
- LLE_WUNLOCK(ln);
- nd6_ns_output(ifp, NULL, dst, ln, 0);
- LLE_WLOCK(ln);
+ send_ns = 1;
+ /* Send NS to multicast address */
+ pdst = NULL;
} else {
struct mbuf *m = ln->la_hold;
if (m) {
@@ -523,55 +821,59 @@ nd6_llinfo_timer(void *arg)
ln->la_hold = m0;
clear_llinfo_pqueue(ln);
}
- EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_TIMEDOUT);
- (void)nd6_free(ln, 0);
- ln = NULL;
+ nd6_free(&ln, 0);
if (m != NULL)
icmp6_error2(m, ICMP6_DST_UNREACH,
ICMP6_DST_UNREACH_ADDR, 0, ifp);
}
break;
case ND6_LLINFO_REACHABLE:
- if (!ND6_LLINFO_PERMANENT(ln)) {
- ln->ln_state = ND6_LLINFO_STALE;
- nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
- }
+ if (!ND6_LLINFO_PERMANENT(ln))
+ nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
break;
case ND6_LLINFO_STALE:
- /* Garbage Collection(RFC 2461 5.3) */
- if (!ND6_LLINFO_PERMANENT(ln)) {
- EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED);
- (void)nd6_free(ln, 1);
- ln = NULL;
+ if (nd6_is_stale(ln, &delay, &do_switch) != 0) {
+
+ /*
+ * No packet has used this entry and GC timeout
+ * has not been passed. Reshedule timer and
+ * return.
+ */
+ nd6_llinfo_settimer_locked(ln, delay);
+ break;
}
- break;
+
+ if (do_switch == 0) {
+
+ /*
+ * GC timer has ended and entry hasn't been used.
+ * Run Garbage collector (RFC 4861, 5.3)
+ */
+ if (!ND6_LLINFO_PERMANENT(ln))
+ nd6_free(&ln, 1);
+ break;
+ }
+
+ /* Entry has been used AND delay timer has ended. */
+
+ /* FALLTHROUGH */
case ND6_LLINFO_DELAY:
if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) {
/* We need NUD */
ln->la_asked = 1;
- ln->ln_state = ND6_LLINFO_PROBE;
- nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
- LLE_WUNLOCK(ln);
- nd6_ns_output(ifp, dst, dst, ln, 0);
- LLE_WLOCK(ln);
- } else {
- ln->ln_state = ND6_LLINFO_STALE; /* XXX */
- nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
- }
+ nd6_llinfo_setstate(ln, ND6_LLINFO_PROBE);
+ send_ns = 1;
+ } else
+ nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); /* XXX */
break;
case ND6_LLINFO_PROBE:
if (ln->la_asked < V_nd6_umaxtries) {
ln->la_asked++;
- nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
- LLE_WUNLOCK(ln);
- nd6_ns_output(ifp, dst, dst, ln, 0);
- LLE_WLOCK(ln);
+ send_ns = 1;
} else {
- EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED);
- (void)nd6_free(ln, 0);
- ln = NULL;
+ nd6_free(&ln, 0);
}
break;
default:
@@ -580,6 +882,16 @@ nd6_llinfo_timer(void *arg)
}
done:
if (ln != NULL)
+ ND6_RUNLOCK();
+ if (send_ns != 0) {
+ nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
+ psrc = nd6_llinfo_get_holdsrc(ln, &src);
+ LLE_FREE_LOCKED(ln);
+ ln = NULL;
+ nd6_ns_output(ifp, psrc, pdst, dst, NULL);
+ }
+
+ if (ln != NULL)
LLE_FREE_LOCKED(ln);
CURVNET_RESTORE();
}
@@ -592,19 +904,23 @@ void
nd6_timer(void *arg)
{
CURVNET_SET((struct vnet *) arg);
- int s;
+ struct nd_drhead drq;
struct nd_defrouter *dr, *ndr;
struct nd_prefix *pr, *npr;
struct in6_ifaddr *ia6, *nia6;
- callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz,
- nd6_timer, curvnet);
+ TAILQ_INIT(&drq);
/* expire default router list */
- s = splnet();
- TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) {
- if (dr->expire && dr->expire < time_second)
- defrtrlist_del(dr);
+ ND6_WLOCK();
+ TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr)
+ if (dr->expire && dr->expire < time_uptime)
+ defrouter_unlink(dr, &drq);
+ ND6_WUNLOCK();
+
+ while ((dr = TAILQ_FIRST(&drq)) != NULL) {
+ TAILQ_REMOVE(&drq, dr, dr_entry);
+ defrouter_del(dr);
}
/*
@@ -670,8 +986,31 @@ nd6_timer(void *arg)
goto addrloop;
}
}
+ } else if ((ia6->ia6_flags & IN6_IFF_TENTATIVE) != 0) {
+ /*
+ * Schedule DAD for a tentative address. This happens
+ * if the interface was down or not running
+ * when the address was configured.
+ */
+ int delay;
+
+ delay = arc4random() %
+ (MAX_RTR_SOLICITATION_DELAY * hz);
+ nd6_dad_start((struct ifaddr *)ia6, delay);
} else {
/*
+ * Check status of the interface. If it is down,
+ * mark the address as tentative for future DAD.
+ */
+ if ((ia6->ia_ifp->if_flags & IFF_UP) == 0 ||
+ (ia6->ia_ifp->if_drv_flags & IFF_DRV_RUNNING)
+ == 0 ||
+ (ND_IFINFO(ia6->ia_ifp)->flags &
+ ND6_IFF_IFDISABLED) != 0) {
+ ia6->ia6_flags &= ~IN6_IFF_DUPLICATED;
+ ia6->ia6_flags |= IN6_IFF_TENTATIVE;
+ }
+ /*
* A new RA might have made a deprecated address
* preferred.
*/
@@ -687,7 +1026,7 @@ nd6_timer(void *arg)
* prefix is not necessary.
*/
if (pr->ndpr_vltime != ND6_INFINITE_LIFETIME &&
- time_second - pr->ndpr_lastupdate > pr->ndpr_vltime) {
+ time_uptime - pr->ndpr_lastupdate > pr->ndpr_vltime) {
/*
* address expiration and prefix expiration are
@@ -696,7 +1035,10 @@ nd6_timer(void *arg)
prelist_remove(pr);
}
}
- splx(s);
+
+ callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz,
+ nd6_timer, curvnet);
+
CURVNET_RESTORE();
}
@@ -748,11 +1090,10 @@ regen_tmpaddr(struct in6_ifaddr *ia6)
* address with the prefix.
*/
if (!IFA6_IS_DEPRECATED(it6))
- public_ifa6 = it6;
-
- if (public_ifa6 != NULL)
- ifa_ref(&public_ifa6->ia_ifa);
+ public_ifa6 = it6;
}
+ if (public_ifa6 != NULL)
+ ifa_ref(&public_ifa6->ia_ifa);
IF_ADDR_RUNLOCK(ifp);
if (public_ifa6 != NULL) {
@@ -772,35 +1113,43 @@ regen_tmpaddr(struct in6_ifaddr *ia6)
}
/*
- * Nuke neighbor cache/prefix/default router management table, right before
- * ifp goes away.
+ * Remove prefix and default router list entries corresponding to ifp. Neighbor
+ * cache entries are freed in in6_domifdetach().
*/
void
nd6_purge(struct ifnet *ifp)
{
+ struct nd_drhead drq;
struct nd_defrouter *dr, *ndr;
struct nd_prefix *pr, *npr;
+ TAILQ_INIT(&drq);
+
/*
* Nuke default router list entries toward ifp.
* We defer removal of default router list entries that is installed
* in the routing table, in order to keep additional side effects as
* small as possible.
*/
+ ND6_WLOCK();
TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) {
if (dr->installed)
continue;
-
if (dr->ifp == ifp)
- defrtrlist_del(dr);
+ defrouter_unlink(dr, &drq);
}
TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) {
if (!dr->installed)
continue;
-
if (dr->ifp == ifp)
- defrtrlist_del(dr);
+ defrouter_unlink(dr, &drq);
+ }
+ ND6_WUNLOCK();
+
+ while ((dr = TAILQ_FIRST(&drq)) != NULL) {
+ TAILQ_REMOVE(&drq, dr, dr_entry);
+ defrouter_del(dr);
}
/* Nuke prefix list entries toward ifp */
@@ -814,14 +1163,6 @@ nd6_purge(struct ifnet *ifp)
*/
pr->ndpr_refcnt = 0;
- /*
- * Previously, pr->ndpr_addr is removed as well,
- * but I strongly believe we don't have to do it.
- * nd6_purge() is only called from in6_ifdetach(),
- * which removes all the associated interface addresses
- * by itself.
- * (jinmei@kame.net 20010129)
- */
prelist_remove(pr);
}
}
@@ -834,14 +1175,6 @@ nd6_purge(struct ifnet *ifp)
/* Refresh default router list. */
defrouter_select();
}
-
- /* XXXXX
- * We do not nuke the neighbor cache entries here any more
- * because the neighbor cache is kept in if_afdata[AF_INET6].
- * nd6_purge() is invoked by in6_ifdetach() which is called
- * from if_detach() where everything gets purged. So let
- * in6_domifdetach() do the actual L2 table purging work.
- */
}
/*
@@ -849,11 +1182,10 @@ nd6_purge(struct ifnet *ifp)
* Returns the llentry locked
*/
struct llentry *
-nd6_lookup(struct in6_addr *addr6, int flags, struct ifnet *ifp)
+nd6_lookup(const struct in6_addr *addr6, int flags, struct ifnet *ifp)
{
struct sockaddr_in6 sin6;
struct llentry *ln;
- int llflags;
bzero(&sin6, sizeof(sin6));
sin6.sin6_len = sizeof(struct sockaddr_in6);
@@ -862,16 +1194,26 @@ nd6_lookup(struct in6_addr *addr6, int flags, struct ifnet *ifp)
IF_AFDATA_LOCK_ASSERT(ifp);
- llflags = 0;
- if (flags & ND6_CREATE)
- llflags |= LLE_CREATE;
- if (flags & ND6_EXCLUSIVE)
- llflags |= LLE_EXCLUSIVE;
-
- ln = lla_lookup(LLTABLE6(ifp), llflags, (struct sockaddr *)&sin6);
- if ((ln != NULL) && (llflags & LLE_CREATE))
+ ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)&sin6);
+
+ return (ln);
+}
+
+struct llentry *
+nd6_alloc(const struct in6_addr *addr6, int flags, struct ifnet *ifp)
+{
+ struct sockaddr_in6 sin6;
+ struct llentry *ln;
+
+ bzero(&sin6, sizeof(sin6));
+ sin6.sin6_len = sizeof(struct sockaddr_in6);
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_addr = *addr6;
+
+ ln = lltable_alloc_entry(LLTABLE6(ifp), 0, (struct sockaddr *)&sin6);
+ if (ln != NULL)
ln->ln_state = ND6_LLINFO_NOSTATE;
-
+
return (ln);
}
@@ -881,10 +1223,14 @@ nd6_lookup(struct in6_addr *addr6, int flags, struct ifnet *ifp)
* to not reenter the routing code from within itself.
*/
static int
-nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
+nd6_is_new_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp)
{
struct nd_prefix *pr;
struct ifaddr *dstaddr;
+ struct rt_addrinfo info;
+ struct sockaddr_in6 rt_key;
+ struct sockaddr *dst6;
+ int fibnum;
/*
* A link-local address is always a neighbor.
@@ -909,6 +1255,13 @@ nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
return (0);
}
+ bzero(&rt_key, sizeof(rt_key));
+ bzero(&info, sizeof(info));
+ info.rti_info[RTAX_DST] = (struct sockaddr *)&rt_key;
+
+ /* Always use the default FIB here. XXME - why? */
+ fibnum = RT_DEFAULT_FIB;
+
/*
* If the address matches one of our addresses,
* it should be a neighbor.
@@ -920,12 +1273,13 @@ nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
continue;
if (!(pr->ndpr_stateflags & NDPRF_ONLINK)) {
- struct rtentry *rt;
/* Always use the default FIB here. */
- rt = in6_rtalloc1((struct sockaddr *)&pr->ndpr_prefix,
- 0, 0, RT_DEFAULT_FIB);
- if (rt == NULL)
+ dst6 = (struct sockaddr *)&pr->ndpr_prefix;
+
+ /* Restore length field before retrying lookup */
+ rt_key.sin6_len = sizeof(rt_key);
+ if (rib_lookup_info(fibnum, dst6, 0, 0, &info) != 0)
continue;
/*
* This is the case where multiple interfaces
@@ -938,11 +1292,8 @@ nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
* differ.
*/
if (!IN6_ARE_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
- &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr)) {
- RTFREE_LOCKED(rt);
+ &rt_key.sin6_addr))
continue;
- }
- RTFREE_LOCKED(rt);
}
if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
@@ -954,7 +1305,7 @@ nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
* If the address is assigned on the node of the other side of
* a p2p interface, the address should be a neighbor.
*/
- dstaddr = ifa_ifwithdstaddr((struct sockaddr *)addr);
+ dstaddr = ifa_ifwithdstaddr((const struct sockaddr *)addr, RT_ALL_FIBS);
if (dstaddr != NULL) {
if (dstaddr->ifa_ifp == ifp) {
ifa_free(dstaddr);
@@ -982,7 +1333,7 @@ nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
* XXX: should take care of the destination of a p2p link?
*/
int
-nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
+nd6_is_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp)
{
struct llentry *lle;
int rc = 0;
@@ -1009,15 +1360,31 @@ nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
* Since the function would cause significant changes in the kernel, DO NOT
* make it global, unless you have a strong reason for the change, and are sure
* that the change is safe.
+ *
+ * Set noinline to be dtrace-friendly
*/
-static struct llentry *
-nd6_free(struct llentry *ln, int gc)
+static __noinline void
+nd6_free(struct llentry **lnp, int gc)
{
- struct llentry *next;
- struct nd_defrouter *dr;
struct ifnet *ifp;
+ struct llentry *ln;
+ struct nd_defrouter *dr;
+
+ ln = *lnp;
+ *lnp = NULL;
LLE_WLOCK_ASSERT(ln);
+ ND6_RLOCK_ASSERT();
+
+ ifp = lltable_get_ifp(ln->lle_tbl);
+ if ((ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) != 0)
+ dr = defrouter_lookup_locked(&ln->r_l3addr.addr6, ifp);
+ else
+ dr = NULL;
+ ND6_RUNLOCK();
+
+ if ((ln->la_flags & LLE_DELETED) == 0)
+ EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED);
/*
* we used to have pfctlinput(PRC_HOSTDEAD) here.
@@ -1027,11 +1394,7 @@ nd6_free(struct llentry *ln, int gc)
/* cancel timer */
nd6_llinfo_settimer_locked(ln, -1);
- ifp = ln->lle_tbl->llt_ifp;
-
if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
- dr = defrouter_lookup(&L3_ADDR_SIN6(ln)->sin6_addr, ifp);
-
if (dr != NULL && dr->expire &&
ln->ln_state == ND6_LLINFO_STALE && gc) {
/*
@@ -1046,17 +1409,17 @@ nd6_free(struct llentry *ln, int gc)
* XXX: the check for ln_state would be redundant,
* but we intentionally keep it just in case.
*/
- if (dr->expire > time_second)
+ if (dr->expire > time_uptime)
nd6_llinfo_settimer_locked(ln,
- (dr->expire - time_second) * hz);
+ (dr->expire - time_uptime) * hz);
else
nd6_llinfo_settimer_locked(ln,
(long)V_nd6_gctimer * hz);
- next = LIST_NEXT(ln, lle_next);
LLE_REMREF(ln);
LLE_WUNLOCK(ln);
- return (next);
+ defrouter_rele(dr);
+ return;
}
if (dr) {
@@ -1091,7 +1454,7 @@ nd6_free(struct llentry *ln, int gc)
* is in the Default Router List.
* See a corresponding comment in nd6_na_input().
*/
- rt6_flush(&L3_ADDR_SIN6(ln)->sin6_addr, ifp);
+ rt6_flush(&ln->r_l3addr.addr6, ifp);
}
if (dr) {
@@ -1109,83 +1472,66 @@ nd6_free(struct llentry *ln, int gc)
defrouter_select();
}
+ /*
+ * If this entry was added by an on-link redirect, remove the
+ * corresponding host route.
+ */
+ if (ln->la_flags & LLE_REDIRECT)
+ nd6_free_redirect(ln);
+
if (ln->ln_router || dr)
LLE_WLOCK(ln);
}
/*
- * Before deleting the entry, remember the next entry as the
- * return value. We need this because pfxlist_onlink_check() above
- * might have freed other entries (particularly the old next entry) as
- * a side effect (XXX).
- */
- next = LIST_NEXT(ln, lle_next);
-
- /*
* Save to unlock. We still hold an extra reference and will not
* free(9) in llentry_free() if someone else holds one as well.
*/
LLE_WUNLOCK(ln);
IF_AFDATA_LOCK(ifp);
LLE_WLOCK(ln);
-
/* Guard against race with other llentry_free(). */
if (ln->la_flags & LLE_LINKED) {
+ /* Remove callout reference */
LLE_REMREF(ln);
- llentry_free(ln);
- } else
- LLE_FREE_LOCKED(ln);
-
+ lltable_unlink_entry(ln->lle_tbl, ln);
+ }
IF_AFDATA_UNLOCK(ifp);
- return (next);
+ llentry_free(ln);
+ if (dr != NULL)
+ defrouter_rele(dr);
}
-/*
- * Upper-layer reachability hint for Neighbor Unreachability Detection.
- *
- * XXX cost-effective methods?
- */
-void
-nd6_nud_hint(struct rtentry *rt, struct in6_addr *dst6, int force)
+static int
+nd6_isdynrte(const struct rtentry *rt, void *xap)
{
- struct llentry *ln;
- struct ifnet *ifp;
- if ((dst6 == NULL) || (rt == NULL))
- return;
-
- ifp = rt->rt_ifp;
- IF_AFDATA_RLOCK(ifp);
- ln = nd6_lookup(dst6, ND6_EXCLUSIVE, NULL);
- IF_AFDATA_RUNLOCK(ifp);
- if (ln == NULL)
- return;
+ if (rt->rt_flags == (RTF_UP | RTF_HOST | RTF_DYNAMIC))
+ return (1);
- if (ln->ln_state < ND6_LLINFO_REACHABLE)
- goto done;
+ return (0);
+}
+/*
+ * Remove the rtentry for the given llentry,
+ * both of which were installed by a redirect.
+ */
+static void
+nd6_free_redirect(const struct llentry *ln)
+{
+ int fibnum;
+ struct sockaddr_in6 sin6;
+ struct rt_addrinfo info;
- /*
- * if we get upper-layer reachability confirmation many times,
- * it is possible we have false information.
- */
- if (!force) {
- ln->ln_byhint++;
- if (ln->ln_byhint > V_nd6_maxnudhint) {
- goto done;
- }
- }
+ lltable_fill_sa_entry(ln, (struct sockaddr *)&sin6);
+ memset(&info, 0, sizeof(info));
+ info.rti_info[RTAX_DST] = (struct sockaddr *)&sin6;
+ info.rti_filter = nd6_isdynrte;
- ln->ln_state = ND6_LLINFO_REACHABLE;
- if (!ND6_LLINFO_PERMANENT(ln)) {
- nd6_llinfo_settimer_locked(ln,
- (long)ND_IFINFO(rt->rt_ifp)->reachable * hz);
- }
-done:
- LLE_WUNLOCK(ln);
+ for (fibnum = 0; fibnum < rt_numfibs; fibnum++)
+ rtrequest1_fib(RTM_DELETE, &info, NULL, fibnum);
}
-
/*
* Rejuvenate this function for routing operations related
* processing.
@@ -1197,7 +1543,6 @@ nd6_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info)
struct nd_defrouter *dr;
struct ifnet *ifp;
- RT_LOCK_ASSERT(rt);
gateway = (struct sockaddr_in6 *)rt->rt_gateway;
ifp = rt->rt_ifp;
@@ -1216,12 +1561,13 @@ nd6_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info)
/*
* check for default route
*/
- if (IN6_ARE_ADDR_EQUAL(&in6addr_any,
- &SIN6(rt_key(rt))->sin6_addr)) {
-
+ if (IN6_ARE_ADDR_EQUAL(&in6addr_any,
+ &SIN6(rt_key(rt))->sin6_addr)) {
dr = defrouter_lookup(&gateway->sin6_addr, ifp);
- if (dr != NULL)
+ if (dr != NULL) {
dr->installed = 0;
+ defrouter_rele(dr);
+ }
}
break;
}
@@ -1231,100 +1577,14 @@ nd6_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info)
int
nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
{
- struct in6_drlist *drl = (struct in6_drlist *)data;
- struct in6_oprlist *oprl = (struct in6_oprlist *)data;
struct in6_ndireq *ndi = (struct in6_ndireq *)data;
struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data;
struct in6_ndifreq *ndif = (struct in6_ndifreq *)data;
- struct nd_defrouter *dr;
- struct nd_prefix *pr;
- int i = 0, error = 0;
- int s;
+ int error = 0;
+ if (ifp->if_afdata[AF_INET6] == NULL)
+ return (EPFNOSUPPORT);
switch (cmd) {
- case SIOCGDRLST_IN6:
- /*
- * obsolete API, use sysctl under net.inet6.icmp6
- */
- bzero(drl, sizeof(*drl));
- s = splnet();
- TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
- if (i >= DRLSTSIZ)
- break;
- drl->defrouter[i].rtaddr = dr->rtaddr;
- in6_clearscope(&drl->defrouter[i].rtaddr);
-
- drl->defrouter[i].flags = dr->flags;
- drl->defrouter[i].rtlifetime = dr->rtlifetime;
- drl->defrouter[i].expire = dr->expire;
- drl->defrouter[i].if_index = dr->ifp->if_index;
- i++;
- }
- splx(s);
- break;
- case SIOCGPRLST_IN6:
- /*
- * obsolete API, use sysctl under net.inet6.icmp6
- *
- * XXX the structure in6_prlist was changed in backward-
- * incompatible manner. in6_oprlist is used for SIOCGPRLST_IN6,
- * in6_prlist is used for nd6_sysctl() - fill_prlist().
- */
- /*
- * XXX meaning of fields, especialy "raflags", is very
- * differnet between RA prefix list and RR/static prefix list.
- * how about separating ioctls into two?
- */
- bzero(oprl, sizeof(*oprl));
- s = splnet();
- LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
- struct nd_pfxrouter *pfr;
- int j;
-
- if (i >= PRLSTSIZ)
- break;
- oprl->prefix[i].prefix = pr->ndpr_prefix.sin6_addr;
- oprl->prefix[i].raflags = pr->ndpr_raf;
- oprl->prefix[i].prefixlen = pr->ndpr_plen;
- oprl->prefix[i].vltime = pr->ndpr_vltime;
- oprl->prefix[i].pltime = pr->ndpr_pltime;
- oprl->prefix[i].if_index = pr->ndpr_ifp->if_index;
- if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME)
- oprl->prefix[i].expire = 0;
- else {
- time_t maxexpire;
-
- /* XXX: we assume time_t is signed. */
- maxexpire = (-1) &
- ~((time_t)1 <<
- ((sizeof(maxexpire) * 8) - 1));
- if (pr->ndpr_vltime <
- maxexpire - pr->ndpr_lastupdate) {
- oprl->prefix[i].expire =
- pr->ndpr_lastupdate +
- pr->ndpr_vltime;
- } else
- oprl->prefix[i].expire = maxexpire;
- }
-
- j = 0;
- LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) {
- if (j < DRLSTSIZ) {
-#define RTRADDR oprl->prefix[i].advrtr[j]
- RTRADDR = pfr->router->rtaddr;
- in6_clearscope(&RTRADDR);
-#undef RTRADDR
- }
- j++;
- }
- oprl->prefix[i].advrtrs = j;
- oprl->prefix[i].origin = PR_ORIG_RA;
-
- i++;
- }
- splx(s);
-
- break;
case OSIOCGIFINFO_IN6:
#define ND ndi->ndi
/* XXX: old ndp(8) assumes a positive value for linkmtu. */
@@ -1344,7 +1604,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
case SIOCSIFINFO_IN6:
/*
* used to change host variables from userland.
- * intented for a use on router to reflect RA configurations.
+ * intended for a use on router to reflect RA configurations.
*/
/* 0 means 'unspecified' */
if (ND.linkmtu != 0) {
@@ -1384,22 +1644,19 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
* do not clear ND6_IFF_IFDISABLED.
* See RFC 4862, Section 5.4.5.
*/
- int duplicated_linklocal = 0;
-
IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
ia = (struct in6_ifaddr *)ifa;
if ((ia->ia6_flags & IN6_IFF_DUPLICATED) &&
- IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) {
- duplicated_linklocal = 1;
+ IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia)))
break;
- }
}
IF_ADDR_RUNLOCK(ifp);
- if (duplicated_linklocal) {
+ if (ifa != NULL) {
+ /* LLA is duplicated. */
ND.flags |= ND6_IFF_IFDISABLED;
log(LOG_ERR, "Cannot enable an interface"
" with a link-local address marked"
@@ -1415,14 +1672,19 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
/* Mark all IPv6 address as tentative. */
ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED;
- IF_ADDR_RLOCK(ifp);
- TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- if (ifa->ifa_addr->sa_family != AF_INET6)
- continue;
- ia = (struct in6_ifaddr *)ifa;
- ia->ia6_flags |= IN6_IFF_TENTATIVE;
+ if (V_ip6_dad_count > 0 &&
+ (ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD) == 0) {
+ IF_ADDR_RLOCK(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead,
+ ifa_link) {
+ if (ifa->ifa_addr->sa_family !=
+ AF_INET6)
+ continue;
+ ia = (struct in6_ifaddr *)ifa;
+ ia->ia6_flags |= IN6_IFF_TENTATIVE;
+ }
+ IF_ADDR_RUNLOCK(ifp);
}
- IF_ADDR_RUNLOCK(ifp);
}
if (ND.flags & ND6_IFF_AUTO_LINKLOCAL) {
@@ -1440,20 +1702,19 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
* address is assigned, and IFF_UP, try to
* assign one.
*/
- int haslinklocal = 0;
-
IF_ADDR_RLOCK(ifp);
- TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- if (ifa->ifa_addr->sa_family != AF_INET6)
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead,
+ ifa_link) {
+ if (ifa->ifa_addr->sa_family !=
+ AF_INET6)
continue;
ia = (struct in6_ifaddr *)ifa;
- if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) {
- haslinklocal = 1;
+ if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia)))
break;
- }
}
IF_ADDR_RUNLOCK(ifp);
- if (!haslinklocal)
+ if (ifa != NULL)
+ /* No LLA is configured. */
in6_ifattach(ifp, NULL);
}
}
@@ -1471,7 +1732,6 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
/* flush all the prefix advertised by routers */
struct nd_prefix *pr, *next;
- s = splnet();
LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, next) {
struct in6_ifaddr *ia, *ia_next;
@@ -1490,21 +1750,28 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
}
prelist_remove(pr);
}
- splx(s);
break;
}
case SIOCSRTRFLUSH_IN6:
{
/* flush all the default routers */
- struct nd_defrouter *dr, *next;
+ struct nd_drhead drq;
+ struct nd_defrouter *dr;
+
+ TAILQ_INIT(&drq);
- s = splnet();
defrouter_reset();
- TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, next) {
- defrtrlist_del(dr);
+
+ ND6_WLOCK();
+ while ((dr = TAILQ_FIRST(&V_nd_defrouter)) != NULL)
+ defrouter_unlink(dr, &drq);
+ ND6_WUNLOCK();
+ while ((dr = TAILQ_FIRST(&drq)) != NULL) {
+ TAILQ_REMOVE(&drq, dr, dr_entry);
+ defrouter_del(dr);
}
+
defrouter_select();
- splx(s);
break;
}
case SIOCGNBRINFO_IN6:
@@ -1526,7 +1793,11 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
nbi->state = ln->ln_state;
nbi->asked = ln->la_asked;
nbi->isrouter = ln->ln_router;
- nbi->expire = ln->la_expire;
+ if (ln->la_expire == 0)
+ nbi->expire = 0;
+ else
+ nbi->expire = ln->la_expire + ln->lle_remtime / hz +
+ (time_second - time_uptime);
LLE_RUNLOCK(ln);
break;
}
@@ -1540,31 +1811,108 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
}
/*
+ * Calculates new isRouter value based on provided parameters and
+ * returns it.
+ */
+static int
+nd6_is_router(int type, int code, int is_new, int old_addr, int new_addr,
+ int ln_router)
+{
+
+ /*
+ * ICMP6 type dependent behavior.
+ *
+ * NS: clear IsRouter if new entry
+ * RS: clear IsRouter
+ * RA: set IsRouter if there's lladdr
+ * redir: clear IsRouter if new entry
+ *
+ * RA case, (1):
+ * The spec says that we must set IsRouter in the following cases:
+ * - If lladdr exist, set IsRouter. This means (1-5).
+ * - If it is old entry (!newentry), set IsRouter. This means (7).
+ * So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
+ * A quetion arises for (1) case. (1) case has no lladdr in the
+ * neighbor cache, this is similar to (6).
+ * This case is rare but we figured that we MUST NOT set IsRouter.
+ *
+ * is_new old_addr new_addr NS RS RA redir
+ * D R
+ * 0 n n (1) c ? s
+ * 0 y n (2) c s s
+ * 0 n y (3) c s s
+ * 0 y y (4) c s s
+ * 0 y y (5) c s s
+ * 1 -- n (6) c c c s
+ * 1 -- y (7) c c s c s
+ *
+ * (c=clear s=set)
+ */
+ switch (type & 0xff) {
+ case ND_NEIGHBOR_SOLICIT:
+ /*
+ * New entry must have is_router flag cleared.
+ */
+ if (is_new) /* (6-7) */
+ ln_router = 0;
+ break;
+ case ND_REDIRECT:
+ /*
+ * If the icmp is a redirect to a better router, always set the
+ * is_router flag. Otherwise, if the entry is newly created,
+ * clear the flag. [RFC 2461, sec 8.3]
+ */
+ if (code == ND_REDIRECT_ROUTER)
+ ln_router = 1;
+ else {
+ if (is_new) /* (6-7) */
+ ln_router = 0;
+ }
+ break;
+ case ND_ROUTER_SOLICIT:
+ /*
+ * is_router flag must always be cleared.
+ */
+ ln_router = 0;
+ break;
+ case ND_ROUTER_ADVERT:
+ /*
+ * Mark an entry with lladdr as a router.
+ */
+ if ((!is_new && (old_addr || new_addr)) || /* (2-5) */
+ (is_new && new_addr)) { /* (7) */
+ ln_router = 1;
+ }
+ break;
+ }
+
+ return (ln_router);
+}
+
+/*
* Create neighbor cache entry and cache link-layer address,
* on reception of inbound ND6 packets. (RS/RA/NS/redirect)
*
* type - ICMP6 type
* code - type dependent information
*
- * XXXXX
- * The caller of this function already acquired the ndp
- * cache table lock because the cache entry is returned.
*/
-struct llentry *
+void
nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
int lladdrlen, int type, int code)
{
- struct llentry *ln = NULL;
+ struct llentry *ln = NULL, *ln_tmp;
int is_newentry;
int do_update;
int olladdr;
int llchange;
int flags;
- int newstate = 0;
uint16_t router = 0;
struct sockaddr_in6 sin6;
struct mbuf *chain = NULL;
- int static_route = 0;
+ u_char linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ int lladdr_off;
IF_AFDATA_UNLOCK_ASSERT(ifp);
@@ -1573,7 +1921,7 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
/* nothing must be updated for unspecified address */
if (IN6_IS_ADDR_UNSPECIFIED(from))
- return NULL;
+ return;
/*
* Validation about ifp->if_addrlen and lladdrlen must be done in
@@ -1584,197 +1932,122 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
* Spec says nothing in sections for RA, RS and NA. There's small
* description on it in NS section (RFC 2461 7.2.3).
*/
- flags = lladdr ? ND6_EXCLUSIVE : 0;
+ flags = lladdr ? LLE_EXCLUSIVE : 0;
IF_AFDATA_RLOCK(ifp);
ln = nd6_lookup(from, flags, ifp);
IF_AFDATA_RUNLOCK(ifp);
+ is_newentry = 0;
if (ln == NULL) {
- flags |= ND6_EXCLUSIVE;
- IF_AFDATA_LOCK(ifp);
- ln = nd6_lookup(from, flags | ND6_CREATE, ifp);
- IF_AFDATA_UNLOCK(ifp);
- is_newentry = 1;
- } else {
- /* do nothing if static ndp is set */
- if (ln->la_flags & LLE_STATIC) {
- static_route = 1;
- goto done;
+ flags |= LLE_EXCLUSIVE;
+ ln = nd6_alloc(from, 0, ifp);
+ if (ln == NULL)
+ return;
+
+ /*
+ * Since we already know all the data for the new entry,
+ * fill it before insertion.
+ */
+ if (lladdr != NULL) {
+ linkhdrsize = sizeof(linkhdr);
+ if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
+ linkhdr, &linkhdrsize, &lladdr_off) != 0)
+ return;
+ lltable_set_entry_addr(ifp, ln, linkhdr, linkhdrsize,
+ lladdr_off);
}
- is_newentry = 0;
+
+ IF_AFDATA_WLOCK(ifp);
+ LLE_WLOCK(ln);
+ /* Prefer any existing lle over newly-created one */
+ ln_tmp = nd6_lookup(from, LLE_EXCLUSIVE, ifp);
+ if (ln_tmp == NULL)
+ lltable_link_entry(LLTABLE6(ifp), ln);
+ IF_AFDATA_WUNLOCK(ifp);
+ if (ln_tmp == NULL) {
+ /* No existing lle, mark as new entry (6,7) */
+ is_newentry = 1;
+ nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
+ if (lladdr != NULL) /* (7) */
+ EVENTHANDLER_INVOKE(lle_event, ln,
+ LLENTRY_RESOLVED);
+ } else {
+ lltable_free_entry(LLTABLE6(ifp), ln);
+ ln = ln_tmp;
+ ln_tmp = NULL;
+ }
+ }
+ /* do nothing if static ndp is set */
+ if ((ln->la_flags & LLE_STATIC)) {
+ if (flags & LLE_EXCLUSIVE)
+ LLE_WUNLOCK(ln);
+ else
+ LLE_RUNLOCK(ln);
+ return;
}
- if (ln == NULL)
- return (NULL);
olladdr = (ln->la_flags & LLE_VALID) ? 1 : 0;
if (olladdr && lladdr) {
- llchange = bcmp(lladdr, &ln->ll_addr,
+ llchange = bcmp(lladdr, ln->ll_addr,
ifp->if_addrlen);
- } else
+ } else if (!olladdr && lladdr)
+ llchange = 1;
+ else
llchange = 0;
/*
* newentry olladdr lladdr llchange (*=record)
* 0 n n -- (1)
* 0 y n -- (2)
- * 0 n y -- (3) * STALE
+ * 0 n y y (3) * STALE
* 0 y y n (4) *
* 0 y y y (5) * STALE
* 1 -- n -- (6) NOSTATE(= PASSIVE)
* 1 -- y -- (7) * STALE
*/
- if (lladdr) { /* (3-5) and (7) */
+ do_update = 0;
+ if (is_newentry == 0 && llchange != 0) {
+ do_update = 1; /* (3,5) */
+
/*
* Record source link-layer address
* XXX is it dependent to ifp->if_type?
*/
- bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen);
- ln->la_flags |= LLE_VALID;
- EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
- }
-
- if (!is_newentry) {
- if ((!olladdr && lladdr != NULL) || /* (3) */
- (olladdr && lladdr != NULL && llchange)) { /* (5) */
- do_update = 1;
- newstate = ND6_LLINFO_STALE;
- } else /* (1-2,4) */
- do_update = 0;
- } else {
- do_update = 1;
- if (lladdr == NULL) /* (6) */
- newstate = ND6_LLINFO_NOSTATE;
- else /* (7) */
- newstate = ND6_LLINFO_STALE;
- }
-
- if (do_update) {
- /*
- * Update the state of the neighbor cache.
- */
- ln->ln_state = newstate;
+ linkhdrsize = sizeof(linkhdr);
+ if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
+ linkhdr, &linkhdrsize, &lladdr_off) != 0)
+ return;
- if (ln->ln_state == ND6_LLINFO_STALE) {
- /*
- * XXX: since nd6_output() below will cause
- * state tansition to DELAY and reset the timer,
- * we must set the timer now, although it is actually
- * meaningless.
- */
- nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
+ if (lltable_try_set_entry_addr(ifp, ln, linkhdr, linkhdrsize,
+ lladdr_off) == 0) {
+ /* Entry was deleted */
+ return;
+ }
- if (ln->la_hold) {
- struct mbuf *m_hold, *m_hold_next;
+ nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
- /*
- * reset the la_hold in advance, to explicitly
- * prevent a la_hold lookup in nd6_output()
- * (wouldn't happen, though...)
- */
- for (m_hold = ln->la_hold, ln->la_hold = NULL;
- m_hold; m_hold = m_hold_next) {
- m_hold_next = m_hold->m_nextpkt;
- m_hold->m_nextpkt = NULL;
+ EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
- /*
- * we assume ifp is not a p2p here, so
- * just set the 2nd argument as the
- * 1st one.
- */
- nd6_output_lle(ifp, ifp, m_hold, L3_ADDR_SIN6(ln), NULL, ln, &chain);
- }
- /*
- * If we have mbufs in the chain we need to do
- * deferred transmit. Copy the address from the
- * llentry before dropping the lock down below.
- */
- if (chain != NULL)
- memcpy(&sin6, L3_ADDR_SIN6(ln), sizeof(sin6));
- }
- } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
- /* probe right away */
- nd6_llinfo_settimer_locked((void *)ln, 0);
- }
+ if (ln->la_hold != NULL)
+ nd6_grab_holdchain(ln, &chain, &sin6);
}
- /*
- * ICMP6 type dependent behavior.
- *
- * NS: clear IsRouter if new entry
- * RS: clear IsRouter
- * RA: set IsRouter if there's lladdr
- * redir: clear IsRouter if new entry
- *
- * RA case, (1):
- * The spec says that we must set IsRouter in the following cases:
- * - If lladdr exist, set IsRouter. This means (1-5).
- * - If it is old entry (!newentry), set IsRouter. This means (7).
- * So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
- * A quetion arises for (1) case. (1) case has no lladdr in the
- * neighbor cache, this is similar to (6).
- * This case is rare but we figured that we MUST NOT set IsRouter.
- *
- * newentry olladdr lladdr llchange NS RS RA redir
- * D R
- * 0 n n -- (1) c ? s
- * 0 y n -- (2) c s s
- * 0 n y -- (3) c s s
- * 0 y y n (4) c s s
- * 0 y y y (5) c s s
- * 1 -- n -- (6) c c c s
- * 1 -- y -- (7) c c s c s
- *
- * (c=clear s=set)
- */
- switch (type & 0xff) {
- case ND_NEIGHBOR_SOLICIT:
- /*
- * New entry must have is_router flag cleared.
- */
- if (is_newentry) /* (6-7) */
- ln->ln_router = 0;
- break;
- case ND_REDIRECT:
- /*
- * If the icmp is a redirect to a better router, always set the
- * is_router flag. Otherwise, if the entry is newly created,
- * clear the flag. [RFC 2461, sec 8.3]
- */
- if (code == ND_REDIRECT_ROUTER)
- ln->ln_router = 1;
- else if (is_newentry) /* (6-7) */
- ln->ln_router = 0;
- break;
- case ND_ROUTER_SOLICIT:
- /*
- * is_router flag must always be cleared.
- */
- ln->ln_router = 0;
- break;
- case ND_ROUTER_ADVERT:
- /*
- * Mark an entry with lladdr as a router.
- */
- if ((!is_newentry && (olladdr || lladdr)) || /* (2-5) */
- (is_newentry && lladdr)) { /* (7) */
- ln->ln_router = 1;
- }
- break;
- }
+ /* Calculates new router status */
+ router = nd6_is_router(type, code, is_newentry, olladdr,
+ lladdr != NULL ? 1 : 0, ln->ln_router);
- if (ln != NULL) {
- static_route = (ln->la_flags & LLE_STATIC);
- router = ln->ln_router;
+ ln->ln_router = router;
+ /* Mark non-router redirects with special flag */
+ if ((type & 0xFF) == ND_REDIRECT && code != ND_REDIRECT_ROUTER)
+ ln->la_flags |= LLE_REDIRECT;
- if (flags & ND6_EXCLUSIVE)
- LLE_WUNLOCK(ln);
- else
- LLE_RUNLOCK(ln);
- if (static_route)
- ln = NULL;
- }
- if (chain)
- nd6_output_flush(ifp, ifp, chain, &sin6, NULL);
+ if (flags & LLE_EXCLUSIVE)
+ LLE_WUNLOCK(ln);
+ else
+ LLE_RUNLOCK(ln);
+
+ if (chain != NULL)
+ nd6_flush_holdchain(ifp, ifp, chain, &sin6);
/*
* When the link-layer address of a router changes, select the
@@ -1791,25 +2064,13 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
* for those are not autoconfigured hosts, we explicitly avoid such
* cases for safety.
*/
- if (do_update && router &&
+ if ((do_update || is_newentry) && router &&
ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
/*
* guaranteed recursion
*/
defrouter_select();
}
-
- return (ln);
-done:
- if (ln != NULL) {
- if (flags & ND6_EXCLUSIVE)
- LLE_WUNLOCK(ln);
- else
- LLE_RUNLOCK(ln);
- if (static_route)
- ln = NULL;
- }
- return (ln);
}
static void
@@ -1822,7 +2083,9 @@ nd6_slowtimo(void *arg)
callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
nd6_slowtimo, curvnet);
IFNET_RLOCK_NOSLEEP();
- TAILQ_FOREACH(ifp, &V_ifnet, if_list) {
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ if (ifp->if_afdata[AF_INET6] == NULL)
+ continue;
nd6if = ND_IFINFO(ifp);
if (nd6if->basereachable && /* already initialized */
(nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
@@ -1840,55 +2103,176 @@ nd6_slowtimo(void *arg)
CURVNET_RESTORE();
}
-int
-nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
- struct sockaddr_in6 *dst, struct rtentry *rt0)
+void
+nd6_grab_holdchain(struct llentry *ln, struct mbuf **chain,
+ struct sockaddr_in6 *sin6)
{
- return (nd6_output_lle(ifp, origifp, m0, dst, rt0, NULL, NULL));
+ LLE_WLOCK_ASSERT(ln);
+
+ *chain = ln->la_hold;
+ ln->la_hold = NULL;
+ lltable_fill_sa_entry(ln, (struct sockaddr *)sin6);
+
+ if (ln->ln_state == ND6_LLINFO_STALE) {
+
+ /*
+ * The first time we send a packet to a
+ * neighbor whose entry is STALE, we have
+ * to change the state to DELAY and a sets
+ * a timer to expire in DELAY_FIRST_PROBE_TIME
+ * seconds to ensure do neighbor unreachability
+ * detection on expiration.
+ * (RFC 2461 7.3.3)
+ */
+ nd6_llinfo_setstate(ln, ND6_LLINFO_DELAY);
+ }
}
+int
+nd6_output_ifp(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m,
+ struct sockaddr_in6 *dst, struct route *ro)
+{
+ int error;
+ int ip6len;
+ struct ip6_hdr *ip6;
+ struct m_tag *mtag;
+
+#ifdef MAC
+ mac_netinet6_nd6_send(ifp, m);
+#endif
+
+ /*
+ * If called from nd6_ns_output() (NS), nd6_na_output() (NA),
+ * icmp6_redirect_output() (REDIRECT) or from rip6_output() (RS, RA
+ * as handled by rtsol and rtadvd), mbufs will be tagged for SeND
+ * to be diverted to user space. When re-injected into the kernel,
+ * send_output() will directly dispatch them to the outgoing interface.
+ */
+ if (send_sendso_input_hook != NULL) {
+ mtag = m_tag_find(m, PACKET_TAG_ND_OUTGOING, NULL);
+ if (mtag != NULL) {
+ ip6 = mtod(m, struct ip6_hdr *);
+ ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen);
+ /* Use the SEND socket */
+ error = send_sendso_input_hook(m, ifp, SND_OUT,
+ ip6len);
+ /* -1 == no app on SEND socket */
+ if (error == 0 || error != -1)
+ return (error);
+ }
+ }
+
+ m_clrprotoflags(m); /* Avoid confusing lower layers. */
+ IP_PROBE(send, NULL, NULL, mtod(m, struct ip6_hdr *), ifp, NULL,
+ mtod(m, struct ip6_hdr *));
+
+ if ((ifp->if_flags & IFF_LOOPBACK) == 0)
+ origifp = ifp;
+
+ error = (*ifp->if_output)(origifp, m, (struct sockaddr *)dst, ro);
+ return (error);
+}
/*
- * Note that I'm not enforcing any global serialization
- * lle state or asked changes here as the logic is too
- * complicated to avoid having to always acquire an exclusive
- * lock
- * KMM
+ * Lookup link headerfor @sa_dst address. Stores found
+ * data in @desten buffer. Copy of lle ln_flags can be also
+ * saved in @pflags if @pflags is non-NULL.
+ *
+ * If destination LLE does not exists or lle state modification
+ * is required, call "slow" version.
*
+ * Return values:
+ * - 0 on success (address copied to buffer).
+ * - EWOULDBLOCK (no local error, but address is still unresolved)
+ * - other errors (alloc failure, etc)
*/
-#define senderr(e) { error = (e); goto bad;}
-
int
-nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
- struct sockaddr_in6 *dst, struct rtentry *rt0, struct llentry *lle,
- struct mbuf **chain)
+nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
+ const struct sockaddr *sa_dst, u_char *desten, uint32_t *pflags,
+ struct llentry **plle)
{
- struct mbuf *m = m0;
- struct m_tag *mtag;
- struct llentry *ln = lle;
- struct ip6_hdr *ip6;
- int error = 0;
- int flags = 0;
- int ip6len;
+ struct llentry *ln = NULL;
+ const struct sockaddr_in6 *dst6;
-#ifdef INVARIANTS
- if (lle != NULL) {
-
- LLE_WLOCK_ASSERT(lle);
+ if (pflags != NULL)
+ *pflags = 0;
+
+ dst6 = (const struct sockaddr_in6 *)sa_dst;
- KASSERT(chain != NULL, (" lle locked but no mbuf chain pointer passed"));
+ /* discard the packet if IPv6 operation is disabled on the interface */
+ if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) {
+ m_freem(m);
+ return (ENETDOWN); /* better error? */
}
-#endif
- if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr))
- goto sendpkt;
- if (nd6_need_cache(ifp) == 0)
- goto sendpkt;
+ if (m != NULL && m->m_flags & M_MCAST) {
+ switch (ifp->if_type) {
+ case IFT_ETHER:
+ case IFT_FDDI:
+ case IFT_L2VLAN:
+ case IFT_IEEE80211:
+ case IFT_BRIDGE:
+ case IFT_ISO88025:
+ ETHER_MAP_IPV6_MULTICAST(&dst6->sin6_addr,
+ desten);
+ return (0);
+ default:
+ m_freem(m);
+ return (EAFNOSUPPORT);
+ }
+ }
- /*
- * next hop determination. This routine is derived from ether_output.
- */
+ IF_AFDATA_RLOCK(ifp);
+ ln = nd6_lookup(&dst6->sin6_addr, plle ? LLE_EXCLUSIVE : LLE_UNLOCKED,
+ ifp);
+ if (ln != NULL && (ln->r_flags & RLLE_VALID) != 0) {
+ /* Entry found, let's copy lle info */
+ bcopy(ln->r_linkdata, desten, ln->r_hdrlen);
+ if (pflags != NULL)
+ *pflags = LLE_VALID | (ln->r_flags & RLLE_IFADDR);
+ /* Check if we have feedback request from nd6 timer */
+ if (ln->r_skip_req != 0) {
+ LLE_REQ_LOCK(ln);
+ ln->r_skip_req = 0; /* Notify that entry was used */
+ ln->lle_hittime = time_uptime;
+ LLE_REQ_UNLOCK(ln);
+ }
+ if (plle) {
+ LLE_ADDREF(ln);
+ *plle = ln;
+ LLE_WUNLOCK(ln);
+ }
+ IF_AFDATA_RUNLOCK(ifp);
+ return (0);
+ } else if (plle && ln)
+ LLE_WUNLOCK(ln);
+ IF_AFDATA_RUNLOCK(ifp);
+
+ return (nd6_resolve_slow(ifp, 0, m, dst6, desten, pflags, plle));
+}
+
+
+/*
+ * Do L2 address resolution for @sa_dst address. Stores found
+ * address in @desten buffer. Copy of lle ln_flags can be also
+ * saved in @pflags if @pflags is non-NULL.
+ *
+ * Heavy version.
+ * Function assume that destination LLE does not exist,
+ * is invalid or stale, so LLE_EXCLUSIVE lock needs to be acquired.
+ *
+ * Set noinline to be dtrace-friendly
+ */
+static __noinline int
+nd6_resolve_slow(struct ifnet *ifp, int flags, struct mbuf *m,
+ const struct sockaddr_in6 *dst, u_char *desten, uint32_t *pflags,
+ struct llentry **plle)
+{
+ struct llentry *lle = NULL, *lle_tmp;
+ struct in6_addr *psrc, src;
+ int send_ns, ll_len;
+ char *lladdr;
/*
* Address resolution or Neighbor Unreachability Detection
@@ -1896,50 +2280,54 @@ nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
* At this point, the destination of the packet must be a unicast
* or an anycast address(i.e. not a multicast).
*/
-
- flags = (lle != NULL) ? LLE_EXCLUSIVE : 0;
- if (ln == NULL) {
- retry:
+ if (lle == NULL) {
IF_AFDATA_RLOCK(ifp);
- ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)dst);
+ lle = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp);
IF_AFDATA_RUNLOCK(ifp);
- if ((ln == NULL) && nd6_is_addr_neighbor(dst, ifp)) {
+ if ((lle == NULL) && nd6_is_addr_neighbor(dst, ifp)) {
/*
* Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
* the condition below is not very efficient. But we believe
* it is tolerable, because this should be a rare case.
*/
- flags = ND6_CREATE | (m ? ND6_EXCLUSIVE : 0);
- IF_AFDATA_LOCK(ifp);
- ln = nd6_lookup(&dst->sin6_addr, flags, ifp);
- IF_AFDATA_UNLOCK(ifp);
+ lle = nd6_alloc(&dst->sin6_addr, 0, ifp);
+ if (lle == NULL) {
+ char ip6buf[INET6_ADDRSTRLEN];
+ log(LOG_DEBUG,
+ "nd6_output: can't allocate llinfo for %s "
+ "(ln=%p)\n",
+ ip6_sprintf(ip6buf, &dst->sin6_addr), lle);
+ m_freem(m);
+ return (ENOBUFS);
+ }
+
+ IF_AFDATA_WLOCK(ifp);
+ LLE_WLOCK(lle);
+ /* Prefer any existing entry over newly-created one */
+ lle_tmp = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp);
+ if (lle_tmp == NULL)
+ lltable_link_entry(LLTABLE6(ifp), lle);
+ IF_AFDATA_WUNLOCK(ifp);
+ if (lle_tmp != NULL) {
+ lltable_free_entry(LLTABLE6(ifp), lle);
+ lle = lle_tmp;
+ lle_tmp = NULL;
+ }
}
}
- if (ln == NULL) {
- if ((ifp->if_flags & IFF_POINTOPOINT) == 0 &&
- !(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) {
- char ip6buf[INET6_ADDRSTRLEN];
- log(LOG_DEBUG,
- "nd6_output: can't allocate llinfo for %s "
- "(ln=%p)\n",
- ip6_sprintf(ip6buf, &dst->sin6_addr), ln);
- senderr(EIO); /* XXX: good error? */
+ if (lle == NULL) {
+ if (!(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) {
+ m_freem(m);
+ return (ENOBUFS);
}
- goto sendpkt; /* send anyway */
- }
- /* We don't have to do link-layer address resolution on a p2p link. */
- if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
- ln->ln_state < ND6_LLINFO_REACHABLE) {
- if ((flags & LLE_EXCLUSIVE) == 0) {
- flags |= LLE_EXCLUSIVE;
- LLE_RUNLOCK(ln);
- goto retry;
- }
- ln->ln_state = ND6_LLINFO_STALE;
- nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
+ if (m != NULL)
+ m_freem(m);
+ return (ENOBUFS);
}
+ LLE_WLOCK_ASSERT(lle);
+
/*
* The first time we send a packet to a neighbor whose entry is
* STALE, we have to change the state to DELAY and a sets a timer to
@@ -1947,49 +2335,46 @@ nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
* neighbor unreachability detection on expiration.
* (RFC 2461 7.3.3)
*/
- if (ln->ln_state == ND6_LLINFO_STALE) {
- if ((flags & LLE_EXCLUSIVE) == 0) {
- flags |= LLE_EXCLUSIVE;
- LLE_RUNLOCK(ln);
- goto retry;
- }
- ln->la_asked = 0;
- ln->ln_state = ND6_LLINFO_DELAY;
- nd6_llinfo_settimer_locked(ln, (long)V_nd6_delay * hz);
- }
+ if (lle->ln_state == ND6_LLINFO_STALE)
+ nd6_llinfo_setstate(lle, ND6_LLINFO_DELAY);
/*
* If the neighbor cache entry has a state other than INCOMPLETE
* (i.e. its link-layer address is already resolved), just
* send the packet.
*/
- if (ln->ln_state > ND6_LLINFO_INCOMPLETE)
- goto sendpkt;
+ if (lle->ln_state > ND6_LLINFO_INCOMPLETE) {
+ if (flags & LLE_ADDRONLY) {
+ lladdr = lle->ll_addr;
+ ll_len = ifp->if_addrlen;
+ } else {
+ lladdr = lle->r_linkdata;
+ ll_len = lle->r_hdrlen;
+ }
+ bcopy(lladdr, desten, ll_len);
+ if (pflags != NULL)
+ *pflags = lle->la_flags;
+ if (plle) {
+ LLE_ADDREF(lle);
+ *plle = lle;
+ }
+ LLE_WUNLOCK(lle);
+ return (0);
+ }
/*
* There is a neighbor cache entry, but no ethernet address
* response yet. Append this latest packet to the end of the
- * packet queue in the mbuf, unless the number of the packet
- * does not exceed nd6_maxqueuelen. When it exceeds nd6_maxqueuelen,
+ * packet queue in the mbuf. When it exceeds nd6_maxqueuelen,
* the oldest packet in the queue will be removed.
*/
- if (ln->ln_state == ND6_LLINFO_NOSTATE)
- ln->ln_state = ND6_LLINFO_INCOMPLETE;
-
- if ((flags & LLE_EXCLUSIVE) == 0) {
- flags |= LLE_EXCLUSIVE;
- LLE_RUNLOCK(ln);
- goto retry;
- }
- LLE_WLOCK_ASSERT(ln);
-
- if (ln->la_hold) {
+ if (lle->la_hold != NULL) {
struct mbuf *m_hold;
int i;
i = 0;
- for (m_hold = ln->la_hold; m_hold; m_hold = m_hold->m_nextpkt) {
+ for (m_hold = lle->la_hold; m_hold; m_hold = m_hold->m_nextpkt){
i++;
if (m_hold->m_nextpkt == NULL) {
m_hold->m_nextpkt = m;
@@ -1997,134 +2382,63 @@ nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
}
}
while (i >= V_nd6_maxqueuelen) {
- m_hold = ln->la_hold;
- ln->la_hold = ln->la_hold->m_nextpkt;
+ m_hold = lle->la_hold;
+ lle->la_hold = lle->la_hold->m_nextpkt;
m_freem(m_hold);
i--;
}
} else {
- ln->la_hold = m;
+ lle->la_hold = m;
}
/*
* If there has been no NS for the neighbor after entering the
* INCOMPLETE state, send the first solicitation.
+ * Note that for newly-created lle la_asked will be 0,
+ * so we will transition from ND6_LLINFO_NOSTATE to
+ * ND6_LLINFO_INCOMPLETE state here.
*/
- if (!ND6_LLINFO_PERMANENT(ln) && ln->la_asked == 0) {
- ln->la_asked++;
-
- nd6_llinfo_settimer_locked(ln,
- (long)ND_IFINFO(ifp)->retrans * hz / 1000);
- LLE_WUNLOCK(ln);
- nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0);
- if (lle != NULL && ln == lle)
- LLE_WLOCK(lle);
-
- } else if (lle == NULL || ln != lle) {
- /*
- * We did the lookup (no lle arg) so we
- * need to do the unlock here.
- */
- LLE_WUNLOCK(ln);
- }
-
- return (0);
-
- sendpkt:
- /* discard the packet if IPv6 operation is disabled on the interface */
- if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) {
- error = ENETDOWN; /* better error? */
- goto bad;
- }
- /*
- * ln is valid and the caller did not pass in
- * an llentry
- */
- if ((ln != NULL) && (lle == NULL)) {
- if (flags & LLE_EXCLUSIVE)
- LLE_WUNLOCK(ln);
- else
- LLE_RUNLOCK(ln);
- }
-
-#ifdef MAC
- mac_netinet6_nd6_send(ifp, m);
-#endif
+ psrc = NULL;
+ send_ns = 0;
+ if (lle->la_asked == 0) {
+ lle->la_asked++;
+ send_ns = 1;
+ psrc = nd6_llinfo_get_holdsrc(lle, &src);
- /*
- * If called from nd6_ns_output() (NS), nd6_na_output() (NA),
- * icmp6_redirect_output() (REDIRECT) or from rip6_output() (RS, RA
- * as handled by rtsol and rtadvd), mbufs will be tagged for SeND
- * to be diverted to user space. When re-injected into the kernel,
- * send_output() will directly dispatch them to the outgoing interface.
- */
- if (send_sendso_input_hook != NULL) {
- mtag = m_tag_find(m, PACKET_TAG_ND_OUTGOING, NULL);
- if (mtag != NULL) {
- ip6 = mtod(m, struct ip6_hdr *);
- ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen);
- /* Use the SEND socket */
- error = send_sendso_input_hook(m, ifp, SND_OUT,
- ip6len);
- /* -1 == no app on SEND socket */
- if (error == 0 || error != -1)
- return (error);
- }
+ nd6_llinfo_setstate(lle, ND6_LLINFO_INCOMPLETE);
}
+ LLE_WUNLOCK(lle);
+ if (send_ns != 0)
+ nd6_ns_output(ifp, psrc, NULL, &dst->sin6_addr, NULL);
- /*
- * We were passed in a pointer to an lle with the lock held
- * this means that we can't call if_output as we will
- * recurse on the lle lock - so what we do is we create
- * a list of mbufs to send and transmit them in the caller
- * after the lock is dropped
- */
- if (lle != NULL) {
- if (*chain == NULL)
- *chain = m;
- else {
- struct mbuf *mb;
+ return (EWOULDBLOCK);
+}
- /*
- * append mbuf to end of deferred chain
- */
- mb = *chain;
- while (mb->m_nextpkt != NULL)
- mb = mb->m_nextpkt;
- mb->m_nextpkt = m;
- }
- return (error);
- }
- /* Reset layer specific mbuf flags to avoid confusing lower layers. */
- m->m_flags &= ~(M_PROTOFLAGS);
- if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
- return ((*ifp->if_output)(origifp, m, (struct sockaddr *)dst,
- NULL));
- }
- error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, NULL);
- return (error);
+/*
+ * Do L2 address resolution for @sa_dst address. Stores found
+ * address in @desten buffer. Copy of lle ln_flags can be also
+ * saved in @pflags if @pflags is non-NULL.
+ *
+ * Return values:
+ * - 0 on success (address copied to buffer).
+ * - EWOULDBLOCK (no local error, but address is still unresolved)
+ * - other errors (alloc failure, etc)
+ */
+int
+nd6_resolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst,
+ char *desten, uint32_t *pflags)
+{
+ int error;
- bad:
- /*
- * ln is valid and the caller did not pass in
- * an llentry
- */
- if ((ln != NULL) && (lle == NULL)) {
- if (flags & LLE_EXCLUSIVE)
- LLE_WUNLOCK(ln);
- else
- LLE_RUNLOCK(ln);
- }
- if (m)
- m_freem(m);
+ flags |= LLE_ADDRONLY;
+ error = nd6_resolve_slow(ifp, flags, NULL,
+ (const struct sockaddr_in6 *)dst, desten, pflags, NULL);
return (error);
}
-#undef senderr
-
int
-nd6_output_flush(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain,
- struct sockaddr_in6 *dst, struct route *ro)
+nd6_flush_holdchain(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain,
+ struct sockaddr_in6 *dst)
{
struct mbuf *m, *m_head;
struct ifnet *outifp;
@@ -2139,20 +2453,17 @@ nd6_output_flush(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain,
while (m_head) {
m = m_head;
m_head = m_head->m_nextpkt;
- error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, ro);
+ error = nd6_output_ifp(ifp, origifp, m, dst, NULL);
}
/*
* XXX
- * note that intermediate errors are blindly ignored - but this is
- * the same convention as used with nd6_output when called by
- * nd6_cache_lladdr
+ * note that intermediate errors are blindly ignored
*/
return (error);
}
-
-int
+static int
nd6_need_cache(struct ifnet *ifp)
{
/*
@@ -2167,19 +2478,9 @@ nd6_need_cache(struct ifnet *ifp)
case IFT_ETHER:
case IFT_FDDI:
case IFT_IEEE1394:
-#ifdef IFT_L2VLAN
case IFT_L2VLAN:
-#endif
-#ifdef IFT_IEEE80211
case IFT_IEEE80211:
-#endif
-#ifdef IFT_CARP
- case IFT_CARP:
-#endif
case IFT_INFINIBAND:
- case IFT_GIF: /* XXX need more cases? */
- case IFT_PPP:
- case IFT_TUNNEL:
case IFT_BRIDGE:
case IFT_PROPVIRTUAL:
return (1);
@@ -2189,75 +2490,76 @@ nd6_need_cache(struct ifnet *ifp)
}
/*
- * the callers of this function need to be re-worked to drop
- * the lle lock, drop here for now
+ * Add pernament ND6 link-layer record for given
+ * interface address.
+ *
+ * Very similar to IPv4 arp_ifinit(), but:
+ * 1) IPv6 DAD is performed in different place
+ * 2) It is called by IPv6 protocol stack in contrast to
+ * arp_ifinit() which is typically called in SIOCSIFADDR
+ * driver ioctl handler.
+ *
*/
int
-nd6_storelladdr(struct ifnet *ifp, struct mbuf *m,
- struct sockaddr *dst, u_char *desten, struct llentry **lle)
+nd6_add_ifa_lle(struct in6_ifaddr *ia)
{
- struct llentry *ln;
+ struct ifnet *ifp;
+ struct llentry *ln, *ln_tmp;
+ struct sockaddr *dst;
- *lle = NULL;
- IF_AFDATA_UNLOCK_ASSERT(ifp);
- if (m != NULL && m->m_flags & M_MCAST) {
- int i;
+ ifp = ia->ia_ifa.ifa_ifp;
+ if (nd6_need_cache(ifp) == 0)
+ return (0);
- switch (ifp->if_type) {
- case IFT_ETHER:
- case IFT_FDDI:
-#ifdef IFT_L2VLAN
- case IFT_L2VLAN:
-#endif
-#ifdef IFT_IEEE80211
- case IFT_IEEE80211:
-#endif
- case IFT_BRIDGE:
- case IFT_ISO88025:
- ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr,
- desten);
- return (0);
- case IFT_IEEE1394:
- /*
- * netbsd can use if_broadcastaddr, but we don't do so
- * to reduce # of ifdef.
- */
- for (i = 0; i < ifp->if_addrlen; i++)
- desten[i] = ~0;
- return (0);
- case IFT_ARCNET:
- *desten = 0;
- return (0);
- default:
- m_freem(m);
- return (EAFNOSUPPORT);
- }
- }
+ ia->ia_ifa.ifa_rtrequest = nd6_rtrequest;
+ dst = (struct sockaddr *)&ia->ia_addr;
+ ln = lltable_alloc_entry(LLTABLE6(ifp), LLE_IFADDR, dst);
+ if (ln == NULL)
+ return (ENOBUFS);
+ IF_AFDATA_WLOCK(ifp);
+ LLE_WLOCK(ln);
+ /* Unlink any entry if exists */
+ ln_tmp = lla_lookup(LLTABLE6(ifp), LLE_EXCLUSIVE, dst);
+ if (ln_tmp != NULL)
+ lltable_unlink_entry(LLTABLE6(ifp), ln_tmp);
+ lltable_link_entry(LLTABLE6(ifp), ln);
+ IF_AFDATA_WUNLOCK(ifp);
- /*
- * the entry should have been created in nd6_store_lladdr
- */
- IF_AFDATA_RLOCK(ifp);
- ln = lla_lookup(LLTABLE6(ifp), 0, dst);
- IF_AFDATA_RUNLOCK(ifp);
- if ((ln == NULL) || !(ln->la_flags & LLE_VALID)) {
- if (ln != NULL)
- LLE_RUNLOCK(ln);
- /* this could happen, if we could not allocate memory */
- m_freem(m);
- return (1);
- }
+ if (ln_tmp != NULL)
+ EVENTHANDLER_INVOKE(lle_event, ln_tmp, LLENTRY_EXPIRED);
+ EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
+
+ LLE_WUNLOCK(ln);
+ if (ln_tmp != NULL)
+ llentry_free(ln_tmp);
- bcopy(&ln->ll_addr, desten, ifp->if_addrlen);
- *lle = ln;
- LLE_RUNLOCK(ln);
- /*
- * A *small* use after free race exists here
- */
return (0);
}
+/*
+ * Removes either all lle entries for given @ia, or lle
+ * corresponding to @ia address.
+ */
+void
+nd6_rem_ifa_lle(struct in6_ifaddr *ia, int all)
+{
+ struct sockaddr_in6 mask, addr;
+ struct sockaddr *saddr, *smask;
+ struct ifnet *ifp;
+
+ ifp = ia->ia_ifa.ifa_ifp;
+ memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr));
+ memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask));
+ saddr = (struct sockaddr *)&addr;
+ smask = (struct sockaddr *)&mask;
+
+ if (all != 0)
+ lltable_prefix_free(AF_INET6, saddr, smask, LLE_STATIC);
+ else
+ lltable_delete_addr(LLTABLE6(ifp), LLE_IFADDR, saddr);
+}
+
static void
clear_llinfo_pqueue(struct llentry *ln)
{
@@ -2269,22 +2571,24 @@ clear_llinfo_pqueue(struct llentry *ln)
}
ln->la_hold = NULL;
- return;
}
static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS);
static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS);
-#ifdef SYSCTL_DECL
+
SYSCTL_DECL(_net_inet6_icmp6);
-#endif
-SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist,
- CTLFLAG_RD, nd6_sysctl_drlist, "");
-SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
- CTLFLAG_RD, nd6_sysctl_prlist, "");
-SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXQLEN, nd6_maxqueuelen,
- CTLFLAG_RW, &VNET_NAME(nd6_maxqueuelen), 1, "");
-SYSCTL_VNET_INT(_net_inet6_icmp6, OID_AUTO, nd6_gctimer,
- CTLFLAG_RW, &VNET_NAME(nd6_gctimer), (60 * 60 * 24), "");
+SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist,
+ CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ NULL, 0, nd6_sysctl_drlist, "S,in6_defrouter",
+ "NDP default router list");
+SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
+ CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ NULL, 0, nd6_sysctl_prlist, "S,in6_prefix",
+ "NDP prefix list");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXQLEN, nd6_maxqueuelen,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_maxqueuelen), 1, "");
+SYSCTL_INT(_net_inet6_icmp6, OID_AUTO, nd6_gctimer,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_gctimer), (60 * 60 * 24), "");
static int
nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS)
@@ -2293,30 +2597,33 @@ nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS)
struct nd_defrouter *dr;
int error;
- if (req->newptr)
+ if (req->newptr != NULL)
return (EPERM);
+ error = sysctl_wire_old_buffer(req, 0);
+ if (error != 0)
+ return (error);
+
bzero(&d, sizeof(d));
d.rtaddr.sin6_family = AF_INET6;
d.rtaddr.sin6_len = sizeof(d.rtaddr);
- /*
- * XXX locking
- */
+ ND6_RLOCK();
TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
d.rtaddr.sin6_addr = dr->rtaddr;
error = sa6_recoverscope(&d.rtaddr);
if (error != 0)
- return (error);
- d.flags = dr->flags;
+ break;
+ d.flags = dr->raflags;
d.rtlifetime = dr->rtlifetime;
- d.expire = dr->expire;
+ d.expire = dr->expire + (time_second - time_uptime);
d.if_index = dr->ifp->if_index;
error = SYSCTL_OUT(req, &d, sizeof(d));
if (error != 0)
- return (error);
+ break;
}
- return (0);
+ ND6_RUNLOCK();
+ return (error);
}
static int
@@ -2333,15 +2640,17 @@ nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
if (req->newptr)
return (EPERM);
+ error = sysctl_wire_old_buffer(req, 0);
+ if (error != 0)
+ return (error);
+
bzero(&p, sizeof(p));
p.origin = PR_ORIG_RA;
bzero(&s6, sizeof(s6));
s6.sin6_family = AF_INET6;
s6.sin6_len = sizeof(s6);
- /*
- * XXX locking
- */
+ ND6_RLOCK();
LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
p.prefix = pr->ndpr_prefix;
if (sa6_recoverscope(&p.prefix)) {
@@ -2362,7 +2671,8 @@ nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
~((time_t)1 << ((sizeof(maxexpire) * 8) - 1));
if (pr->ndpr_vltime < maxexpire - pr->ndpr_lastupdate)
p.expire = pr->ndpr_lastupdate +
- pr->ndpr_vltime;
+ pr->ndpr_vltime +
+ (time_second - time_uptime);
else
p.expire = maxexpire;
}
@@ -2373,7 +2683,7 @@ nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
p.advrtrs++;
error = SYSCTL_OUT(req, &p, sizeof(p));
if (error != 0)
- return (error);
+ break;
LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) {
s6.sin6_addr = pfr->router->rtaddr;
if (sa6_recoverscope(&s6))
@@ -2382,8 +2692,9 @@ nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
ip6_sprintf(ip6buf, &pfr->router->rtaddr));
error = SYSCTL_OUT(req, &s6, sizeof(s6));
if (error != 0)
- return (error);
+ break;
}
}
- return (0);
+ ND6_RUNLOCK();
+ return (error);
}
diff --git a/freebsd/sys/netinet6/nd6.h b/freebsd/sys/netinet6/nd6.h
index 94202e10..33ac4386 100644
--- a/freebsd/sys/netinet6/nd6.h
+++ b/freebsd/sys/netinet6/nd6.h
@@ -87,9 +87,7 @@ struct nd_ifinfo {
#define ND6_IFF_AUTO_LINKLOCAL 0x20
#define ND6_IFF_NO_RADR 0x40
#define ND6_IFF_NO_PREFER_IFACE 0x80 /* XXX: not related to ND. */
-
-#define ND6_CREATE LLE_CREATE
-#define ND6_EXCLUSIVE LLE_EXCLUSIVE
+#define ND6_IFF_NO_DAD 0x100
#ifdef _KERNEL
#define ND_IFINFO(ifp) \
@@ -234,14 +232,15 @@ struct in6_ndifreq {
((MAX_RANDOM_FACTOR - MIN_RANDOM_FACTOR) * (x >> 10)))) /1000)
TAILQ_HEAD(nd_drhead, nd_defrouter);
-struct nd_defrouter {
+struct nd_defrouter {
TAILQ_ENTRY(nd_defrouter) dr_entry;
- struct in6_addr rtaddr;
- u_char flags; /* flags on RA message */
+ struct in6_addr rtaddr;
+ u_char raflags; /* flags on RA message */
u_short rtlifetime;
u_long expire;
- struct ifnet *ifp;
+ struct ifnet *ifp;
int installed; /* is installed into kernel routing table */
+ u_int refcnt;
};
struct nd_prefixctl {
@@ -317,6 +316,10 @@ struct nd_pfxrouter {
LIST_HEAD(nd_prhead, nd_prefix);
+#ifdef MALLOC_DECLARE
+MALLOC_DECLARE(M_IP6NDP);
+#endif
+
/* nd6.c */
VNET_DECLARE(int, nd6_prune);
VNET_DECLARE(int, nd6_delay);
@@ -341,10 +344,20 @@ VNET_DECLARE(int, nd6_onlink_ns_rfc4861);
#define V_nd6_debug VNET(nd6_debug)
#define V_nd6_onlink_ns_rfc4861 VNET(nd6_onlink_ns_rfc4861)
-#define nd6log(x) do { if (V_nd6_debug) log x; } while (/*CONSTCOND*/ 0)
+/* Lock for the prefix and default router lists. */
+VNET_DECLARE(struct rwlock, nd6_lock);
+#define V_nd6_lock VNET(nd6_lock)
+
+#define ND6_RLOCK() rw_rlock(&V_nd6_lock)
+#define ND6_RUNLOCK() rw_runlock(&V_nd6_lock)
+#define ND6_WLOCK() rw_wlock(&V_nd6_lock)
+#define ND6_WUNLOCK() rw_wunlock(&V_nd6_lock)
+#define ND6_WLOCK_ASSERT() rw_assert(&V_nd6_lock, RA_WLOCKED)
+#define ND6_RLOCK_ASSERT() rw_assert(&V_nd6_lock, RA_RLOCKED)
+#define ND6_LOCK_ASSERT() rw_assert(&V_nd6_lock, RA_LOCKED)
+#define ND6_UNLOCK_ASSERT() rw_assert(&V_nd6_lock, RA_UNLOCKED)
-VNET_DECLARE(struct callout, nd6_timer_ch);
-#define V_nd6_timer_ch VNET(nd6_timer_ch)
+#define nd6log(x) do { if (V_nd6_debug) log x; } while (/*CONSTCOND*/ 0)
/* nd6_rtr.c */
VNET_DECLARE(int, nd6_defifindex);
@@ -359,7 +372,7 @@ VNET_DECLARE(int, ip6_temp_regen_advance); /* seconds */
#define V_ip6_temp_regen_advance VNET(ip6_temp_regen_advance)
union nd_opts {
- struct nd_opt_hdr *nd_opt_array[8]; /* max = target address list */
+ struct nd_opt_hdr *nd_opt_array[16]; /* max = ND_OPT_NONCE */
struct {
struct nd_opt_hdr *zero;
struct nd_opt_hdr *src_lladdr;
@@ -367,6 +380,16 @@ union nd_opts {
struct nd_opt_prefix_info *pi_beg; /* multiple opts, start */
struct nd_opt_rd_hdr *rh;
struct nd_opt_mtu *mtu;
+ struct nd_opt_hdr *__res6;
+ struct nd_opt_hdr *__res7;
+ struct nd_opt_hdr *__res8;
+ struct nd_opt_hdr *__res9;
+ struct nd_opt_hdr *__res10;
+ struct nd_opt_hdr *__res11;
+ struct nd_opt_hdr *__res12;
+ struct nd_opt_hdr *__res13;
+ struct nd_opt_nonce *nonce;
+ struct nd_opt_hdr *__res15;
struct nd_opt_hdr *search; /* multiple opts */
struct nd_opt_hdr *last; /* multiple opts */
int done;
@@ -379,6 +402,7 @@ union nd_opts {
#define nd_opts_pi_end nd_opt_each.pi_end
#define nd_opts_rh nd_opt_each.rh
#define nd_opts_mtu nd_opt_each.mtu
+#define nd_opts_nonce nd_opt_each.nonce
#define nd_opts_search nd_opt_each.search
#define nd_opts_last nd_opt_each.last
#define nd_opts_done nd_opt_each.done
@@ -390,34 +414,32 @@ void nd6_init(void);
void nd6_destroy(void);
#endif
struct nd_ifinfo *nd6_ifattach(struct ifnet *);
-void nd6_ifdetach(struct nd_ifinfo *);
-int nd6_is_addr_neighbor(struct sockaddr_in6 *, struct ifnet *);
+void nd6_ifdetach(struct ifnet *, struct nd_ifinfo *);
+int nd6_is_addr_neighbor(const struct sockaddr_in6 *, struct ifnet *);
void nd6_option_init(void *, int, union nd_opts *);
struct nd_opt_hdr *nd6_option(union nd_opts *);
int nd6_options(union nd_opts *);
-struct llentry *nd6_lookup(struct in6_addr *, int, struct ifnet *);
+struct llentry *nd6_lookup(const struct in6_addr *, int, struct ifnet *);
+struct llentry *nd6_alloc(const struct in6_addr *, int, struct ifnet *);
void nd6_setmtu(struct ifnet *);
-void nd6_llinfo_settimer(struct llentry *, long);
-void nd6_llinfo_settimer_locked(struct llentry *, long);
+void nd6_llinfo_setstate(struct llentry *lle, int newstate);
void nd6_timer(void *);
void nd6_purge(struct ifnet *);
-void nd6_nud_hint(struct rtentry *, struct in6_addr *, int);
-int nd6_resolve(struct ifnet *, struct rtentry *, struct mbuf *,
- struct sockaddr *, u_char *);
-void nd6_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
+int nd6_resolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst,
+ char *desten, uint32_t *pflags);
+int nd6_resolve(struct ifnet *, int, struct mbuf *,
+ const struct sockaddr *, u_char *, uint32_t *, struct llentry **);
int nd6_ioctl(u_long, caddr_t, struct ifnet *);
-struct llentry *nd6_cache_lladdr(struct ifnet *, struct in6_addr *,
+void nd6_cache_lladdr(struct ifnet *, struct in6_addr *,
char *, int, int, int);
-int nd6_output(struct ifnet *, struct ifnet *, struct mbuf *,
- struct sockaddr_in6 *, struct rtentry *);
-int nd6_output_lle(struct ifnet *, struct ifnet *, struct mbuf *,
- struct sockaddr_in6 *, struct rtentry *, struct llentry *,
- struct mbuf **);
-int nd6_output_flush(struct ifnet *, struct ifnet *, struct mbuf *,
- struct sockaddr_in6 *, struct route *);
-int nd6_need_cache(struct ifnet *);
-int nd6_storelladdr(struct ifnet *, struct mbuf *,
- struct sockaddr *, u_char *, struct llentry **);
+void nd6_grab_holdchain(struct llentry *, struct mbuf **,
+ struct sockaddr_in6 *);
+int nd6_flush_holdchain(struct ifnet *, struct ifnet *, struct mbuf *,
+ struct sockaddr_in6 *);
+int nd6_add_ifa_lle(struct in6_ifaddr *);
+void nd6_rem_ifa_lle(struct in6_ifaddr *, int);
+int nd6_output_ifp(struct ifnet *, struct ifnet *, struct mbuf *,
+ struct sockaddr_in6 *, struct route *);
/* nd6_nbr.c */
void nd6_na_input(struct mbuf *, int, int);
@@ -425,24 +447,28 @@ void nd6_na_output(struct ifnet *, const struct in6_addr *,
const struct in6_addr *, u_long, int, struct sockaddr *);
void nd6_ns_input(struct mbuf *, int, int);
void nd6_ns_output(struct ifnet *, const struct in6_addr *,
- const struct in6_addr *, struct llentry *, int);
+ const struct in6_addr *, const struct in6_addr *, uint8_t *);
caddr_t nd6_ifptomac(struct ifnet *);
+void nd6_dad_init(void);
void nd6_dad_start(struct ifaddr *, int);
void nd6_dad_stop(struct ifaddr *);
-void nd6_dad_duplicated(struct ifaddr *);
/* nd6_rtr.c */
void nd6_rs_input(struct mbuf *, int, int);
void nd6_ra_input(struct mbuf *, int, int);
-void prelist_del(struct nd_prefix *);
void defrouter_reset(void);
void defrouter_select(void);
-void defrtrlist_del(struct nd_defrouter *);
+void defrouter_ref(struct nd_defrouter *);
+void defrouter_rele(struct nd_defrouter *);
+bool defrouter_remove(struct in6_addr *, struct ifnet *);
+void defrouter_unlink(struct nd_defrouter *, struct nd_drhead *);
+void defrouter_del(struct nd_defrouter *);
void prelist_remove(struct nd_prefix *);
int nd6_prelist_add(struct nd_prefixctl *, struct nd_defrouter *,
struct nd_prefix **);
void pfxlist_onlink_check(void);
struct nd_defrouter *defrouter_lookup(struct in6_addr *, struct ifnet *);
+struct nd_defrouter *defrouter_lookup_locked(struct in6_addr *, struct ifnet *);
struct nd_prefix *nd6_prefix_lookup(struct nd_prefixctl *);
void rt6_flush(struct in6_addr *, struct ifnet *);
int nd6_setdefaultiface(int);
diff --git a/freebsd/sys/netinet6/nd6_nbr.c b/freebsd/sys/netinet6/nd6_nbr.c
index cb765549..df50fa93 100644
--- a/freebsd/sys/netinet6/nd6_nbr.c
+++ b/freebsd/sys/netinet6/nd6_nbr.c
@@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
+#include <sys/libkern.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/rwlock.h>
#include <sys/mbuf.h>
@@ -50,9 +51,11 @@ __FBSDID("$FreeBSD$");
#include <sys/time.h>
#include <sys/kernel.h>
#include <rtems/bsd/sys/errno.h>
+#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/queue.h>
#include <sys/callout.h>
+#include <sys/refcount.h>
#include <net/if.h>
#include <net/if_types.h>
@@ -62,11 +65,11 @@ __FBSDID("$FreeBSD$");
#ifdef RADIX_MPATH
#include <net/radix_mpath.h>
#endif
+#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <net/if_llatbl.h>
-#define L3_ADDR_SIN6(le) ((struct sockaddr_in6 *) L3_ADDR(le))
#include <netinet6/in6_var.h>
#include <netinet6/in6_ifattach.h>
#include <netinet/ip6.h>
@@ -80,19 +83,32 @@ __FBSDID("$FreeBSD$");
#define SDL(s) ((struct sockaddr_dl *)s)
struct dadq;
-static struct dadq *nd6_dad_find(struct ifaddr *);
-static void nd6_dad_starttimer(struct dadq *, int);
+static struct dadq *nd6_dad_find(struct ifaddr *, struct nd_opt_nonce *);
+static void nd6_dad_add(struct dadq *dp);
+static void nd6_dad_del(struct dadq *dp);
+static void nd6_dad_rele(struct dadq *);
+static void nd6_dad_starttimer(struct dadq *, int, int);
static void nd6_dad_stoptimer(struct dadq *);
static void nd6_dad_timer(struct dadq *);
-static void nd6_dad_ns_output(struct dadq *, struct ifaddr *);
-static void nd6_dad_ns_input(struct ifaddr *);
+static void nd6_dad_duplicated(struct ifaddr *, struct dadq *);
+static void nd6_dad_ns_output(struct dadq *);
+static void nd6_dad_ns_input(struct ifaddr *, struct nd_opt_nonce *);
static void nd6_dad_na_input(struct ifaddr *);
static void nd6_na_output_fib(struct ifnet *, const struct in6_addr *,
const struct in6_addr *, u_long, int, struct sockaddr *, u_int);
+static void nd6_ns_output_fib(struct ifnet *, const struct in6_addr *,
+ const struct in6_addr *, const struct in6_addr *, uint8_t *, u_int);
-VNET_DEFINE(int, dad_ignore_ns) = 0; /* ignore NS in DAD - specwise incorrect*/
-VNET_DEFINE(int, dad_maxtry) = 15; /* max # of *tries* to transmit DAD packet */
-#define V_dad_ignore_ns VNET(dad_ignore_ns)
+static VNET_DEFINE(int, dad_enhanced) = 1;
+#define V_dad_enhanced VNET(dad_enhanced)
+
+SYSCTL_DECL(_net_inet6_ip6);
+SYSCTL_INT(_net_inet6_ip6, OID_AUTO, dad_enhanced, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(dad_enhanced), 0,
+ "Enable Enhanced DAD, which adds a random nonce to NS messages for DAD.");
+
+static VNET_DEFINE(int, dad_maxtry) = 15; /* max # of *tries* to
+ transmit DAD packet */
#define V_dad_maxtry VNET(dad_maxtry)
/*
@@ -229,42 +245,40 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len)
/* (1) and (3) check. */
if (ifp->if_carp)
ifa = (*carp_iamatch6_p)(ifp, &taddr6);
- if (ifa == NULL)
+ else
ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
/* (2) check. */
if (ifa == NULL) {
- struct route_in6 ro;
- int need_proxy;
+ struct sockaddr_dl rt_gateway;
+ struct rt_addrinfo info;
+ struct sockaddr_in6 dst6;
- bzero(&ro, sizeof(ro));
- ro.ro_dst.sin6_len = sizeof(struct sockaddr_in6);
- ro.ro_dst.sin6_family = AF_INET6;
- ro.ro_dst.sin6_addr = taddr6;
+ bzero(&dst6, sizeof(dst6));
+ dst6.sin6_len = sizeof(struct sockaddr_in6);
+ dst6.sin6_family = AF_INET6;
+ dst6.sin6_addr = taddr6;
+
+ bzero(&rt_gateway, sizeof(rt_gateway));
+ rt_gateway.sdl_len = sizeof(rt_gateway);
+ bzero(&info, sizeof(info));
+ info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&rt_gateway;
/* Always use the default FIB. */
-#ifdef RADIX_MPATH
- rtalloc_mpath_fib((struct route *)&ro, RTF_ANNOUNCE,
- RT_DEFAULT_FIB);
-#else
- in6_rtalloc(&ro, RT_DEFAULT_FIB);
-#endif
- need_proxy = (ro.ro_rt &&
- (ro.ro_rt->rt_flags & RTF_ANNOUNCE) != 0 &&
- ro.ro_rt->rt_gateway->sa_family == AF_LINK);
- if (ro.ro_rt != NULL) {
- if (need_proxy)
- proxydl = *SDL(ro.ro_rt->rt_gateway);
- RTFREE(ro.ro_rt);
- }
- if (need_proxy) {
- /*
- * proxy NDP for single entry
- */
- ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp,
- IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
- if (ifa)
- proxy = 1;
+ if (rib_lookup_info(RT_DEFAULT_FIB, (struct sockaddr *)&dst6,
+ 0, 0, &info) == 0) {
+ if ((info.rti_flags & RTF_ANNOUNCE) != 0 &&
+ rt_gateway.sdl_family == AF_LINK) {
+
+ /*
+ * proxy NDP for single entry
+ */
+ proxydl = *SDL(&rt_gateway);
+ ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(
+ ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
+ if (ifa)
+ proxy = 1;
+ }
}
}
if (ifa == NULL) {
@@ -316,7 +330,7 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len)
* silently ignore it.
*/
if (IN6_IS_ADDR_UNSPECIFIED(&saddr6))
- nd6_dad_ns_input(ifa);
+ nd6_dad_ns_input(ifa, ndopts.nd_opts_nonce);
goto freeit;
}
@@ -377,12 +391,14 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len)
* Based on RFC 2461
* Based on RFC 2462 (duplicate address detection)
*
- * ln - for source address determination
- * dad - duplicate address detection
+ * ln - for source address determination
+ * nonce - If non-NULL, NS is used for duplicate address detection and
+ * the value (length is ND_OPT_NONCE_LEN) is used as a random nonce.
*/
-void
-nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
- const struct in6_addr *taddr6, struct llentry *ln, int dad)
+static void
+nd6_ns_output_fib(struct ifnet *ifp, const struct in6_addr *saddr6,
+ const struct in6_addr *daddr6, const struct in6_addr *taddr6,
+ uint8_t *nonce, u_int fibnum)
{
struct mbuf *m;
struct m_tag *mtag;
@@ -392,7 +408,6 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
int icmp6len;
int maxlen;
caddr_t mac;
- struct route_in6 ro;
if (IN6_IS_ADDR_MULTICAST(taddr6))
return;
@@ -400,27 +415,17 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
/* estimate the size of message */
maxlen = sizeof(*ip6) + sizeof(*nd_ns);
maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7;
- if (max_linkhdr + maxlen >= MCLBYTES) {
-#ifdef DIAGNOSTIC
- printf("nd6_ns_output: max_linkhdr + maxlen >= MCLBYTES "
- "(%d + %d > %d)\n", max_linkhdr, maxlen, MCLBYTES);
-#endif
- return;
- }
+ KASSERT(max_linkhdr + maxlen <= MCLBYTES, (
+ "%s: max_linkhdr + maxlen > MCLBYTES (%d + %d > %d)",
+ __func__, max_linkhdr, maxlen, MCLBYTES));
- MGETHDR(m, M_DONTWAIT, MT_DATA);
- if (m && max_linkhdr + maxlen >= MHLEN) {
- MCLGET(m, M_DONTWAIT);
- if ((m->m_flags & M_EXT) == 0) {
- m_free(m);
- m = NULL;
- }
- }
+ if (max_linkhdr + maxlen > MHLEN)
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+ else
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return;
- m->m_pkthdr.rcvif = NULL;
-
- bzero(&ro, sizeof(ro));
+ M_SETFIB(m, fibnum);
if (daddr6 == NULL || IN6_IS_ADDR_MULTICAST(daddr6)) {
m->m_flags |= M_MCAST;
@@ -431,7 +436,7 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
icmp6len = sizeof(*nd_ns);
m->m_pkthdr.len = m->m_len = sizeof(*ip6) + icmp6len;
- m->m_data += max_linkhdr; /* or MH_ALIGN() equivalent? */
+ m->m_data += max_linkhdr; /* or M_ALIGN() equivalent? */
/* fill neighbor solicitation packet */
ip6 = mtod(m, struct ip6_hdr *);
@@ -453,8 +458,8 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0)
goto bad;
}
- if (!dad) {
- struct ifaddr *ifa;
+ if (nonce == NULL) {
+ struct ifaddr *ifa = NULL;
/*
* RFC2461 7.2.2:
@@ -466,60 +471,33 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
* interface should be used."
*
* We use the source address for the prompting packet
- * (saddr6), if:
- * - saddr6 is given from the caller (by giving "ln"), and
- * - saddr6 belongs to the outgoing interface.
+ * (saddr6), if saddr6 belongs to the outgoing interface.
* Otherwise, we perform the source address selection as usual.
*/
- struct in6_addr *hsrc;
- hsrc = NULL;
- if (ln != NULL) {
- LLE_RLOCK(ln);
- if (ln->la_hold != NULL) {
- struct ip6_hdr *hip6; /* hold ip6 */
-
- /*
- * assuming every packet in la_hold has the same IP
- * header
- */
- hip6 = mtod(ln->la_hold, struct ip6_hdr *);
- /* XXX pullup? */
- if (sizeof(*hip6) < ln->la_hold->m_len) {
- ip6->ip6_src = hip6->ip6_src;
- hsrc = &hip6->ip6_src;
- }
- }
- LLE_RUNLOCK(ln);
- }
- if (hsrc && (ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp,
- hsrc)) != NULL) {
+ if (saddr6 != NULL)
+ ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, saddr6);
+ if (ifa != NULL) {
/* ip6_src set already. */
+ ip6->ip6_src = *saddr6;
ifa_free(ifa);
} else {
int error;
- struct sockaddr_in6 dst_sa;
- struct in6_addr src_in;
- struct ifnet *oifp;
-
- bzero(&dst_sa, sizeof(dst_sa));
- dst_sa.sin6_family = AF_INET6;
- dst_sa.sin6_len = sizeof(dst_sa);
- dst_sa.sin6_addr = ip6->ip6_dst;
-
- oifp = ifp;
- error = in6_selectsrc(&dst_sa, NULL,
- NULL, &ro, NULL, &oifp, &src_in);
+ struct in6_addr dst6, src6;
+ uint32_t scopeid;
+
+ in6_splitscope(&ip6->ip6_dst, &dst6, &scopeid);
+ error = in6_selectsrc_addr(RT_DEFAULT_FIB, &dst6,
+ scopeid, ifp, &src6, NULL);
if (error) {
char ip6buf[INET6_ADDRSTRLEN];
- nd6log((LOG_DEBUG,
- "nd6_ns_output: source can't be "
- "determined: dst=%s, error=%d\n",
- ip6_sprintf(ip6buf, &dst_sa.sin6_addr),
+ nd6log((LOG_DEBUG, "%s: source can't be "
+ "determined: dst=%s, error=%d\n", __func__,
+ ip6_sprintf(ip6buf, &dst6),
error));
goto bad;
}
- ip6->ip6_src = src_in;
+ ip6->ip6_src = src6;
}
} else {
/*
@@ -550,7 +528,7 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
* Multicast NS MUST add one add the option
* Unicast NS SHOULD add one add the option
*/
- if (!dad && (mac = nd6_ifptomac(ifp))) {
+ if (nonce == NULL && (mac = nd6_ifptomac(ifp))) {
int optlen = sizeof(struct nd_opt_hdr) + ifp->if_addrlen;
struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_ns + 1);
/* 8 byte alignments... */
@@ -564,7 +542,26 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
nd_opt->nd_opt_len = optlen >> 3;
bcopy(mac, (caddr_t)(nd_opt + 1), ifp->if_addrlen);
}
+ /*
+ * Add a Nonce option (RFC 3971) to detect looped back NS messages.
+ * This behavior is documented as Enhanced Duplicate Address
+ * Detection in RFC 7527.
+ * net.inet6.ip6.dad_enhanced=0 disables this.
+ */
+ if (V_dad_enhanced != 0 && nonce != NULL) {
+ int optlen = sizeof(struct nd_opt_hdr) + ND_OPT_NONCE_LEN;
+ struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_ns + 1);
+ /* 8-byte alignment is required. */
+ optlen = (optlen + 7) & ~7;
+ m->m_pkthdr.len += optlen;
+ m->m_len += optlen;
+ icmp6len += optlen;
+ bzero((caddr_t)nd_opt, optlen);
+ nd_opt->nd_opt_type = ND_OPT_NONCE;
+ nd_opt->nd_opt_len = optlen >> 3;
+ bcopy(nonce, (caddr_t)(nd_opt + 1), ND_OPT_NONCE_LEN);
+ }
ip6->ip6_plen = htons((u_short)icmp6len);
nd_ns->nd_ns_cksum = 0;
nd_ns->nd_ns_cksum =
@@ -579,24 +576,27 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
m_tag_prepend(m, mtag);
}
- ip6_output(m, NULL, &ro, dad ? IPV6_UNSPECSRC : 0, &im6o, NULL, NULL);
+ ip6_output(m, NULL, NULL, (nonce != NULL) ? IPV6_UNSPECSRC : 0,
+ &im6o, NULL, NULL);
icmp6_ifstat_inc(ifp, ifs6_out_msg);
icmp6_ifstat_inc(ifp, ifs6_out_neighborsolicit);
ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_SOLICIT]);
- /* We don't cache this route. */
- RO_RTFREE(&ro);
-
return;
bad:
- if (ro.ro_rt) {
- RTFREE(ro.ro_rt);
- }
m_freem(m);
- return;
}
+#ifndef BURN_BRIDGES
+void
+nd6_ns_output(struct ifnet *ifp, const struct in6_addr *saddr6,
+ const struct in6_addr *daddr6, const struct in6_addr *taddr6,uint8_t *nonce)
+{
+
+ nd6_ns_output_fib(ifp, saddr6, daddr6, taddr6, nonce, RT_DEFAULT_FIB);
+}
+#endif
/*
* Neighbor advertisement input handling.
*
@@ -626,8 +626,10 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
struct llentry *ln = NULL;
union nd_opts ndopts;
struct mbuf *chain = NULL;
- struct m_tag *mtag;
struct sockaddr_in6 sin6;
+ u_char linkhdr[LLE_MAX_LINKHDR];
+ size_t linkhdrsize;
+ int lladdr_off;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
if (ip6->ip6_hlim != 255) {
@@ -653,6 +655,7 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
is_router = ((flags & ND_NA_FLAG_ROUTER) != 0);
is_solicited = ((flags & ND_NA_FLAG_SOLICITED) != 0);
is_override = ((flags & ND_NA_FLAG_OVERRIDE) != 0);
+ memset(&sin6, 0, sizeof(sin6));
taddr6 = nd_na->nd_na_target;
if (in6_setscope(&taddr6, ifp, NULL))
@@ -685,7 +688,14 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
}
- ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
+ /*
+ * This effectively disables the DAD check on a non-master CARP
+ * address.
+ */
+ if (ifp->if_carp)
+ ifa = (*carp_iamatch6_p)(ifp, &taddr6);
+ else
+ ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
/*
* Target address matches one of my interface address.
@@ -742,20 +752,21 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
/*
* Record link-layer address, and update the state.
*/
- bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen);
- ln->la_flags |= LLE_VALID;
- EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
- if (is_solicited) {
- ln->ln_state = ND6_LLINFO_REACHABLE;
- ln->ln_byhint = 0;
- if (!ND6_LLINFO_PERMANENT(ln)) {
- nd6_llinfo_settimer_locked(ln,
- (long)ND_IFINFO(ln->lle_tbl->llt_ifp)->reachable * hz);
- }
- } else {
- ln->ln_state = ND6_LLINFO_STALE;
- nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
+ linkhdrsize = sizeof(linkhdr);
+ if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
+ linkhdr, &linkhdrsize, &lladdr_off) != 0)
+ return;
+
+ if (lltable_try_set_entry_addr(ifp, ln, linkhdr, linkhdrsize,
+ lladdr_off) == 0) {
+ ln = NULL;
+ goto freeit;
}
+ EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
+ if (is_solicited)
+ nd6_llinfo_setstate(ln, ND6_LLINFO_REACHABLE);
+ else
+ nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
if ((ln->ln_router = is_router) != 0) {
/*
* This means a router's state has changed from
@@ -774,7 +785,7 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
llchange = 0;
else {
if (ln->la_flags & LLE_VALID) {
- if (bcmp(lladdr, &ln->ll_addr, ifp->if_addrlen))
+ if (bcmp(lladdr, ln->ll_addr, ifp->if_addrlen))
llchange = 1;
else
llchange = 0;
@@ -806,10 +817,8 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
* If state is REACHABLE, make it STALE.
* no other updates should be done.
*/
- if (ln->ln_state == ND6_LLINFO_REACHABLE) {
- ln->ln_state = ND6_LLINFO_STALE;
- nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
- }
+ if (ln->ln_state == ND6_LLINFO_REACHABLE)
+ nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
goto freeit;
} else if (is_override /* (2a) */
|| (!is_override && (lladdr != NULL && !llchange)) /* (2b) */
@@ -818,8 +827,15 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
* Update link-local address, if any.
*/
if (lladdr != NULL) {
- bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen);
- ln->la_flags |= LLE_VALID;
+ linkhdrsize = sizeof(linkhdr);
+ if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
+ linkhdr, &linkhdrsize, &lladdr_off) != 0)
+ goto freeit;
+ if (lltable_try_set_entry_addr(ifp, ln, linkhdr,
+ linkhdrsize, lladdr_off) == 0) {
+ ln = NULL;
+ goto freeit;
+ }
EVENTHANDLER_INVOKE(lle_event, ln,
LLENTRY_RESOLVED);
}
@@ -829,19 +845,11 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
* If not solicited and the link-layer address was
* changed, make it STALE.
*/
- if (is_solicited) {
- ln->ln_state = ND6_LLINFO_REACHABLE;
- ln->ln_byhint = 0;
- if (!ND6_LLINFO_PERMANENT(ln)) {
- nd6_llinfo_settimer_locked(ln,
- (long)ND_IFINFO(ifp)->reachable * hz);
- }
- } else {
- if (lladdr != NULL && llchange) {
- ln->ln_state = ND6_LLINFO_STALE;
- nd6_llinfo_settimer_locked(ln,
- (long)V_nd6_gctimer * hz);
- }
+ if (is_solicited)
+ nd6_llinfo_setstate(ln, ND6_LLINFO_REACHABLE);
+ else {
+ if (lladdr != NULL && llchange)
+ nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
}
}
@@ -851,31 +859,19 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
* Remove the sender from the Default Router List and
* update the Destination Cache entries.
*/
- struct nd_defrouter *dr;
- struct in6_addr *in6;
-
- in6 = &L3_ADDR_SIN6(ln)->sin6_addr;
+ struct ifnet *nd6_ifp;
- /*
- * Lock to protect the default router list.
- * XXX: this might be unnecessary, since this function
- * is only called under the network software interrupt
- * context. However, we keep it just for safety.
- */
- dr = defrouter_lookup(in6, ln->lle_tbl->llt_ifp);
- if (dr)
- defrtrlist_del(dr);
- else if (ND_IFINFO(ln->lle_tbl->llt_ifp)->flags &
- ND6_IFF_ACCEPT_RTADV) {
+ nd6_ifp = lltable_get_ifp(ln->lle_tbl);
+ if (!defrouter_remove(&ln->r_l3addr.addr6, nd6_ifp) &&
+ (ND_IFINFO(nd6_ifp)->flags &
+ ND6_IFF_ACCEPT_RTADV) != 0)
/*
* Even if the neighbor is not in the default
- * router list, the neighbor may be used
- * as a next hop for some destinations
- * (e.g. redirect case). So we must
- * call rt6_flush explicitly.
+ * router list, the neighbor may be used as a
+ * next hop for some destinations (e.g. redirect
+ * case). So we must call rt6_flush explicitly.
*/
rt6_flush(&ip6->ip6_src, ifp);
- }
}
ln->ln_router = is_router;
}
@@ -884,43 +880,15 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
* rt->rt_flags &= ~RTF_REJECT;
*/
ln->la_asked = 0;
- if (ln->la_hold) {
- struct mbuf *m_hold, *m_hold_next;
-
- /*
- * reset the la_hold in advance, to explicitly
- * prevent a la_hold lookup in nd6_output()
- * (wouldn't happen, though...)
- */
- for (m_hold = ln->la_hold, ln->la_hold = NULL;
- m_hold; m_hold = m_hold_next) {
- m_hold_next = m_hold->m_nextpkt;
- m_hold->m_nextpkt = NULL;
- /*
- * we assume ifp is not a loopback here, so just set
- * the 2nd argument as the 1st one.
- */
-
- if (send_sendso_input_hook != NULL) {
- mtag = m_tag_get(PACKET_TAG_ND_OUTGOING,
- sizeof(unsigned short), M_NOWAIT);
- if (mtag == NULL)
- goto bad;
- m_tag_prepend(m, mtag);
- }
-
- nd6_output_lle(ifp, ifp, m_hold, L3_ADDR_SIN6(ln), NULL, ln, &chain);
- }
- }
+ if (ln->la_hold != NULL)
+ nd6_grab_holdchain(ln, &chain, &sin6);
freeit:
- if (ln != NULL) {
- if (chain)
- memcpy(&sin6, L3_ADDR_SIN6(ln), sizeof(sin6));
+ if (ln != NULL)
LLE_WUNLOCK(ln);
- if (chain)
- nd6_output_flush(ifp, ifp, chain, &sin6, NULL);
- }
+ if (chain != NULL)
+ nd6_flush_holdchain(ifp, ifp, chain, &sin6);
+
if (checklink)
pfxlist_onlink_check();
@@ -954,42 +922,30 @@ nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0,
{
struct mbuf *m;
struct m_tag *mtag;
- struct ifnet *oifp;
struct ip6_hdr *ip6;
struct nd_neighbor_advert *nd_na;
struct ip6_moptions im6o;
- struct in6_addr src, daddr6;
- struct sockaddr_in6 dst_sa;
+ struct in6_addr daddr6, dst6, src6;
+ uint32_t scopeid;
+
int icmp6len, maxlen, error;
caddr_t mac = NULL;
- struct route_in6 ro;
-
- bzero(&ro, sizeof(ro));
daddr6 = *daddr6_0; /* make a local copy for modification */
/* estimate the size of message */
maxlen = sizeof(*ip6) + sizeof(*nd_na);
maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7;
- if (max_linkhdr + maxlen >= MCLBYTES) {
-#ifdef DIAGNOSTIC
- printf("nd6_na_output: max_linkhdr + maxlen >= MCLBYTES "
- "(%d + %d > %d)\n", max_linkhdr, maxlen, MCLBYTES);
-#endif
- return;
- }
+ KASSERT(max_linkhdr + maxlen <= MCLBYTES, (
+ "%s: max_linkhdr + maxlen > MCLBYTES (%d + %d > %d)",
+ __func__, max_linkhdr, maxlen, MCLBYTES));
- MGETHDR(m, M_DONTWAIT, MT_DATA);
- if (m && max_linkhdr + maxlen >= MHLEN) {
- MCLGET(m, M_DONTWAIT);
- if ((m->m_flags & M_EXT) == 0) {
- m_free(m);
- m = NULL;
- }
- }
+ if (max_linkhdr + maxlen > MHLEN)
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+ else
+ m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL)
return;
- m->m_pkthdr.rcvif = NULL;
M_SETFIB(m, fibnum);
if (IN6_IS_ADDR_MULTICAST(&daddr6)) {
@@ -1001,7 +957,7 @@ nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0,
icmp6len = sizeof(*nd_na);
m->m_pkthdr.len = m->m_len = sizeof(struct ip6_hdr) + icmp6len;
- m->m_data += max_linkhdr; /* or MH_ALIGN() equivalent? */
+ m->m_data += max_linkhdr; /* or M_ALIGN() equivalent? */
/* fill neighbor advertisement packet */
ip6 = mtod(m, struct ip6_hdr *);
@@ -1023,25 +979,21 @@ nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0,
flags &= ~ND_NA_FLAG_SOLICITED;
}
ip6->ip6_dst = daddr6;
- bzero(&dst_sa, sizeof(struct sockaddr_in6));
- dst_sa.sin6_family = AF_INET6;
- dst_sa.sin6_len = sizeof(struct sockaddr_in6);
- dst_sa.sin6_addr = daddr6;
/*
* Select a source whose scope is the same as that of the dest.
*/
- bcopy(&dst_sa, &ro.ro_dst, sizeof(dst_sa));
- oifp = ifp;
- error = in6_selectsrc(&dst_sa, NULL, NULL, &ro, NULL, &oifp, &src);
+ in6_splitscope(&daddr6, &dst6, &scopeid);
+ error = in6_selectsrc_addr(RT_DEFAULT_FIB, &dst6,
+ scopeid, ifp, &src6, NULL);
if (error) {
char ip6buf[INET6_ADDRSTRLEN];
nd6log((LOG_DEBUG, "nd6_na_output: source can't be "
"determined: dst=%s, error=%d\n",
- ip6_sprintf(ip6buf, &dst_sa.sin6_addr), error));
+ ip6_sprintf(ip6buf, &daddr6), error));
goto bad;
}
- ip6->ip6_src = src;
+ ip6->ip6_src = src6;
nd_na = (struct nd_neighbor_advert *)(ip6 + 1);
nd_na->nd_na_type = ND_NEIGHBOR_ADVERT;
nd_na->nd_na_code = 0;
@@ -1104,22 +1056,15 @@ nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0,
m_tag_prepend(m, mtag);
}
- ip6_output(m, NULL, &ro, 0, &im6o, NULL, NULL);
+ ip6_output(m, NULL, NULL, 0, &im6o, NULL, NULL);
icmp6_ifstat_inc(ifp, ifs6_out_msg);
icmp6_ifstat_inc(ifp, ifs6_out_neighboradvert);
ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_ADVERT]);
- /* We don't cache this route. */
- RO_RTFREE(&ro);
-
return;
bad:
- if (ro.ro_rt) {
- RTFREE(ro.ro_rt);
- }
m_freem(m);
- return;
}
#ifndef BURN_BRIDGES
@@ -1142,15 +1087,8 @@ nd6_ifptomac(struct ifnet *ifp)
case IFT_ETHER:
case IFT_FDDI:
case IFT_IEEE1394:
-#ifdef IFT_L2VLAN
case IFT_L2VLAN:
-#endif
-#ifdef IFT_IEEE80211
case IFT_IEEE80211:
-#endif
-#ifdef IFT_CARP
- case IFT_CARP:
-#endif
case IFT_INFINIBAND:
case IFT_BRIDGE:
case IFT_ISO88025:
@@ -1168,31 +1106,80 @@ struct dadq {
int dad_ns_ocount; /* NS sent so far */
int dad_ns_icount;
int dad_na_icount;
+ int dad_ns_lcount; /* looped back NS */
+ int dad_loopbackprobe; /* probing state for loopback detection */
struct callout dad_timer_ch;
struct vnet *dad_vnet;
+ u_int dad_refcnt;
+#define ND_OPT_NONCE_LEN32 \
+ ((ND_OPT_NONCE_LEN + sizeof(uint32_t) - 1)/sizeof(uint32_t))
+ uint32_t dad_nonce[ND_OPT_NONCE_LEN32];
};
static VNET_DEFINE(TAILQ_HEAD(, dadq), dadq);
-VNET_DEFINE(int, dad_init) = 0;
-#define V_dadq VNET(dadq)
-#define V_dad_init VNET(dad_init)
+static VNET_DEFINE(struct rwlock, dad_rwlock);
+#define V_dadq VNET(dadq)
+#define V_dad_rwlock VNET(dad_rwlock)
+
+#define DADQ_RLOCK() rw_rlock(&V_dad_rwlock)
+#define DADQ_RUNLOCK() rw_runlock(&V_dad_rwlock)
+#define DADQ_WLOCK() rw_wlock(&V_dad_rwlock)
+#define DADQ_WUNLOCK() rw_wunlock(&V_dad_rwlock)
+
+static void
+nd6_dad_add(struct dadq *dp)
+{
+
+ DADQ_WLOCK();
+ TAILQ_INSERT_TAIL(&V_dadq, dp, dad_list);
+ DADQ_WUNLOCK();
+}
+
+static void
+nd6_dad_del(struct dadq *dp)
+{
+
+ DADQ_WLOCK();
+ TAILQ_REMOVE(&V_dadq, dp, dad_list);
+ DADQ_WUNLOCK();
+ nd6_dad_rele(dp);
+}
static struct dadq *
-nd6_dad_find(struct ifaddr *ifa)
+nd6_dad_find(struct ifaddr *ifa, struct nd_opt_nonce *n)
{
struct dadq *dp;
- TAILQ_FOREACH(dp, &V_dadq, dad_list)
- if (dp->dad_ifa == ifa)
- return (dp);
+ DADQ_RLOCK();
+ TAILQ_FOREACH(dp, &V_dadq, dad_list) {
+ if (dp->dad_ifa != ifa)
+ continue;
+ /*
+ * Skip if the nonce matches the received one.
+ * +2 in the length is required because of type and
+ * length fields are included in a header.
+ */
+ if (n != NULL &&
+ n->nd_opt_nonce_len == (ND_OPT_NONCE_LEN + 2) / 8 &&
+ memcmp(&n->nd_opt_nonce[0], &dp->dad_nonce[0],
+ ND_OPT_NONCE_LEN) == 0) {
+ dp->dad_ns_lcount++;
+ continue;
+ }
+ refcount_acquire(&dp->dad_refcnt);
+ break;
+ }
+ DADQ_RUNLOCK();
- return (NULL);
+ return (dp);
}
static void
-nd6_dad_starttimer(struct dadq *dp, int ticks)
+nd6_dad_starttimer(struct dadq *dp, int ticks, int send_ns)
{
+ if (send_ns != 0)
+ nd6_dad_ns_output(dp);
callout_reset(&dp->dad_timer_ch, ticks,
(void (*)(void *))nd6_dad_timer, (void *)dp);
}
@@ -1201,7 +1188,25 @@ static void
nd6_dad_stoptimer(struct dadq *dp)
{
- callout_stop(&dp->dad_timer_ch);
+ callout_drain(&dp->dad_timer_ch);
+}
+
+static void
+nd6_dad_rele(struct dadq *dp)
+{
+
+ if (refcount_release(&dp->dad_refcnt)) {
+ ifa_free(dp->dad_ifa);
+ free(dp, M_IP6NDP);
+ }
+}
+
+void
+nd6_dad_init(void)
+{
+
+ rw_init(&V_dad_rwlock, "nd6 DAD queue");
+ TAILQ_INIT(&V_dadq);
}
/*
@@ -1214,11 +1219,6 @@ nd6_dad_start(struct ifaddr *ifa, int delay)
struct dadq *dp;
char ip6buf[INET6_ADDRSTRLEN];
- if (!V_dad_init) {
- TAILQ_INIT(&V_dadq);
- V_dad_init++;
- }
-
/*
* If we don't need DAD, don't do it.
* There are several cases:
@@ -1243,17 +1243,26 @@ nd6_dad_start(struct ifaddr *ifa, int delay)
}
if (ifa->ifa_ifp == NULL)
panic("nd6_dad_start: ifa->ifa_ifp == NULL");
- if (!(ifa->ifa_ifp->if_flags & IFF_UP)) {
+ if (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_NO_DAD) {
+ ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
return;
}
- if (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_IFDISABLED)
+ if (!(ifa->ifa_ifp->if_flags & IFF_UP) ||
+ !(ifa->ifa_ifp->if_drv_flags & IFF_DRV_RUNNING) ||
+ (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_IFDISABLED)) {
+ ia->ia6_flags |= IN6_IFF_TENTATIVE;
return;
- if (nd6_dad_find(ifa) != NULL) {
- /* DAD already in progress */
+ }
+ if ((dp = nd6_dad_find(ifa, NULL)) != NULL) {
+ /*
+ * DAD is already in progress. Let the existing entry
+ * finish it.
+ */
+ nd6_dad_rele(dp);
return;
}
- dp = malloc(sizeof(*dp), M_IP6NDP, M_NOWAIT);
+ dp = malloc(sizeof(*dp), M_IP6NDP, M_NOWAIT | M_ZERO);
if (dp == NULL) {
log(LOG_ERR, "nd6_dad_start: memory allocation failed for "
"%s(%s)\n",
@@ -1261,13 +1270,10 @@ nd6_dad_start(struct ifaddr *ifa, int delay)
ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
return;
}
- bzero(dp, sizeof(*dp));
callout_init(&dp->dad_timer_ch, 0);
#ifdef VIMAGE
dp->dad_vnet = curvnet;
#endif
- TAILQ_INSERT_TAIL(&V_dadq, (struct dadq *)dp, dad_list);
-
nd6log((LOG_DEBUG, "%s: starting DAD for %s\n", if_name(ifa->ifa_ifp),
ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
@@ -1278,17 +1284,14 @@ nd6_dad_start(struct ifaddr *ifa, int delay)
* (re)initialization.
*/
dp->dad_ifa = ifa;
- ifa_ref(ifa); /* just for safety */
+ ifa_ref(dp->dad_ifa);
dp->dad_count = V_ip6_dad_count;
dp->dad_ns_icount = dp->dad_na_icount = 0;
dp->dad_ns_ocount = dp->dad_ns_tcount = 0;
- if (delay == 0) {
- nd6_dad_ns_output(dp, ifa);
- nd6_dad_starttimer(dp,
- (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000);
- } else {
- nd6_dad_starttimer(dp, delay);
- }
+ dp->dad_ns_lcount = dp->dad_loopbackprobe = 0;
+ refcount_init(&dp->dad_refcnt, 1);
+ nd6_dad_add(dp);
+ nd6_dad_starttimer(dp, delay, 0);
}
/*
@@ -1299,9 +1302,7 @@ nd6_dad_stop(struct ifaddr *ifa)
{
struct dadq *dp;
- if (!V_dad_init)
- return;
- dp = nd6_dad_find(ifa);
+ dp = nd6_dad_find(ifa, NULL);
if (!dp) {
/* DAD wasn't started yet */
return;
@@ -1309,53 +1310,61 @@ nd6_dad_stop(struct ifaddr *ifa)
nd6_dad_stoptimer(dp);
- TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list);
- free(dp, M_IP6NDP);
- dp = NULL;
- ifa_free(ifa);
+ /*
+ * The DAD queue entry may have been removed by nd6_dad_timer() while
+ * we were waiting for it to stop, so re-do the lookup.
+ */
+ nd6_dad_rele(dp);
+ if (nd6_dad_find(ifa, NULL) == NULL)
+ return;
+
+ nd6_dad_del(dp);
+ nd6_dad_rele(dp);
}
static void
nd6_dad_timer(struct dadq *dp)
{
CURVNET_SET(dp->dad_vnet);
- int s;
struct ifaddr *ifa = dp->dad_ifa;
+ struct ifnet *ifp = dp->dad_ifa->ifa_ifp;
struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
char ip6buf[INET6_ADDRSTRLEN];
- s = splnet(); /* XXX */
-
/* Sanity check */
if (ia == NULL) {
log(LOG_ERR, "nd6_dad_timer: called with null parameter\n");
- goto done;
+ goto err;
+ }
+ if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) {
+ /* Do not need DAD for ifdisabled interface. */
+ log(LOG_ERR, "nd6_dad_timer: cancel DAD on %s because of "
+ "ND6_IFF_IFDISABLED.\n", ifp->if_xname);
+ goto err;
}
if (ia->ia6_flags & IN6_IFF_DUPLICATED) {
log(LOG_ERR, "nd6_dad_timer: called with duplicated address "
"%s(%s)\n",
ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
- goto done;
+ goto err;
}
if ((ia->ia6_flags & IN6_IFF_TENTATIVE) == 0) {
log(LOG_ERR, "nd6_dad_timer: called with non-tentative address "
"%s(%s)\n",
ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
- goto done;
+ goto err;
}
- /* timeouted with IFF_{RUNNING,UP} check */
- if (dp->dad_ns_tcount > V_dad_maxtry) {
- nd6log((LOG_INFO, "%s: could not run DAD, driver problem?\n",
+ /* Stop DAD if the interface is down even after dad_maxtry attempts. */
+ if ((dp->dad_ns_tcount > V_dad_maxtry) &&
+ (((ifp->if_flags & IFF_UP) == 0) ||
+ ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0))) {
+ nd6log((LOG_INFO, "%s: could not run DAD "
+ "because the interface was down or not running.\n",
if_name(ifa->ifa_ifp)));
-
- TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list);
- free(dp, M_IP6NDP);
- dp = NULL;
- ifa_free(ifa);
- goto done;
+ goto err;
}
/* Need more checks? */
@@ -1363,84 +1372,85 @@ nd6_dad_timer(struct dadq *dp)
/*
* We have more NS to go. Send NS packet for DAD.
*/
- nd6_dad_ns_output(dp, ifa);
nd6_dad_starttimer(dp,
- (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000);
+ (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000, 1);
+ goto done;
} else {
/*
* We have transmitted sufficient number of DAD packets.
* See what we've got.
*/
- int duplicate;
-
- duplicate = 0;
-
- if (dp->dad_na_icount) {
+ if (dp->dad_ns_icount > 0 || dp->dad_na_icount > 0)
+ /* We've seen NS or NA, means DAD has failed. */
+ nd6_dad_duplicated(ifa, dp);
+ else if (V_dad_enhanced != 0 &&
+ dp->dad_ns_lcount > 0 &&
+ dp->dad_ns_lcount > dp->dad_loopbackprobe) {
/*
- * the check is in nd6_dad_na_input(),
- * but just in case
+ * Sec. 4.1 in RFC 7527 requires transmission of
+ * additional probes until the loopback condition
+ * becomes clear when a looped back probe is detected.
*/
- duplicate++;
- }
-
- if (dp->dad_ns_icount) {
- /* We've seen NS, means DAD has failed. */
- duplicate++;
- }
-
- if (duplicate) {
- /* (*dp) will be freed in nd6_dad_duplicated() */
- dp = NULL;
- nd6_dad_duplicated(ifa);
+ log(LOG_ERR, "%s: a looped back NS message is "
+ "detected during DAD for %s. "
+ "Another DAD probes are being sent.\n",
+ if_name(ifa->ifa_ifp),
+ ip6_sprintf(ip6buf, IFA_IN6(ifa)));
+ dp->dad_loopbackprobe = dp->dad_ns_lcount;
+ /*
+ * Send an NS immediately and increase dad_count by
+ * V_nd6_mmaxtries - 1.
+ */
+ dp->dad_count =
+ dp->dad_ns_ocount + V_nd6_mmaxtries - 1;
+ nd6_dad_starttimer(dp,
+ (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000,
+ 1);
+ goto done;
} else {
/*
* We are done with DAD. No NA came, no NS came.
- * No duplicate address found.
+ * No duplicate address found. Check IFDISABLED flag
+ * again in case that it is changed between the
+ * beginning of this function and here.
*/
- ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
+ if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) == 0)
+ ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
nd6log((LOG_DEBUG,
"%s: DAD complete for %s - no duplicates found\n",
if_name(ifa->ifa_ifp),
ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
-
- TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list);
- free(dp, M_IP6NDP);
- dp = NULL;
- ifa_free(ifa);
+ if (dp->dad_ns_lcount > 0)
+ log(LOG_ERR, "%s: DAD completed while "
+ "a looped back NS message is detected "
+ "during DAD for %s.\n",
+ if_name(ifa->ifa_ifp),
+ ip6_sprintf(ip6buf, IFA_IN6(ifa)));
}
}
-
+err:
+ nd6_dad_del(dp);
done:
- splx(s);
CURVNET_RESTORE();
}
-void
-nd6_dad_duplicated(struct ifaddr *ifa)
+static void
+nd6_dad_duplicated(struct ifaddr *ifa, struct dadq *dp)
{
struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
struct ifnet *ifp;
- struct dadq *dp;
char ip6buf[INET6_ADDRSTRLEN];
- dp = nd6_dad_find(ifa);
- if (dp == NULL) {
- log(LOG_ERR, "nd6_dad_duplicated: DAD structure not found\n");
- return;
- }
-
log(LOG_ERR, "%s: DAD detected duplicate IPv6 address %s: "
- "NS in/out=%d/%d, NA in=%d\n",
+ "NS in/out/loopback=%d/%d/%d, NA in=%d\n",
if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
- dp->dad_ns_icount, dp->dad_ns_ocount, dp->dad_na_icount);
+ dp->dad_ns_icount, dp->dad_ns_ocount, dp->dad_ns_lcount,
+ dp->dad_na_icount);
ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
ia->ia6_flags |= IN6_IFF_DUPLICATED;
- /* We are done with DAD, with duplicate address found. (failure) */
- nd6_dad_stoptimer(dp);
-
ifp = ifa->ifa_ifp;
log(LOG_ERR, "%s: DAD complete for %s - duplicate found\n",
if_name(ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr));
@@ -1466,9 +1476,7 @@ nd6_dad_duplicated(struct ifaddr *ifa)
case IFT_FDDI:
case IFT_ATM:
case IFT_IEEE1394:
-#ifdef IFT_IEEE80211
case IFT_IEEE80211:
-#endif
case IFT_INFINIBAND:
in6 = ia->ia_addr.sin6_addr;
if (in6_get_hw_ifid(ifp, &in6) == 0 &&
@@ -1481,18 +1489,14 @@ nd6_dad_duplicated(struct ifaddr *ifa)
break;
}
}
-
- TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list);
- free(dp, M_IP6NDP);
- dp = NULL;
- ifa_free(ifa);
}
static void
-nd6_dad_ns_output(struct dadq *dp, struct ifaddr *ifa)
+nd6_dad_ns_output(struct dadq *dp)
{
- struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
- struct ifnet *ifp = ifa->ifa_ifp;
+ struct in6_ifaddr *ia = (struct in6_ifaddr *)dp->dad_ifa;
+ struct ifnet *ifp = dp->dad_ifa->ifa_ifp;
+ int i;
dp->dad_ns_tcount++;
if ((ifp->if_flags & IFF_UP) == 0) {
@@ -1503,17 +1507,29 @@ nd6_dad_ns_output(struct dadq *dp, struct ifaddr *ifa)
}
dp->dad_ns_ocount++;
- nd6_ns_output(ifp, NULL, &ia->ia_addr.sin6_addr, NULL, 1);
+ if (V_dad_enhanced != 0) {
+ for (i = 0; i < ND_OPT_NONCE_LEN32; i++)
+ dp->dad_nonce[i] = arc4random();
+ /*
+ * XXXHRS: Note that in the case that
+ * DupAddrDetectTransmits > 1, multiple NS messages with
+ * different nonces can be looped back in an unexpected
+ * order. The current implementation recognizes only
+ * the latest nonce on the sender side. Practically it
+ * should work well in almost all cases.
+ */
+ }
+ nd6_ns_output(ifp, NULL, NULL, &ia->ia_addr.sin6_addr,
+ (uint8_t *)&dp->dad_nonce[0]);
}
static void
-nd6_dad_ns_input(struct ifaddr *ifa)
+nd6_dad_ns_input(struct ifaddr *ifa, struct nd_opt_nonce *ndopt_nonce)
{
struct in6_ifaddr *ia;
struct ifnet *ifp;
const struct in6_addr *taddr6;
struct dadq *dp;
- int duplicate;
if (ifa == NULL)
panic("ifa == NULL in nd6_dad_ns_input");
@@ -1521,39 +1537,15 @@ nd6_dad_ns_input(struct ifaddr *ifa)
ia = (struct in6_ifaddr *)ifa;
ifp = ifa->ifa_ifp;
taddr6 = &ia->ia_addr.sin6_addr;
- duplicate = 0;
- dp = nd6_dad_find(ifa);
-
- /* Quickhack - completely ignore DAD NS packets */
- if (V_dad_ignore_ns) {
- char ip6buf[INET6_ADDRSTRLEN];
- nd6log((LOG_INFO,
- "nd6_dad_ns_input: ignoring DAD NS packet for "
- "address %s(%s)\n", ip6_sprintf(ip6buf, taddr6),
- if_name(ifa->ifa_ifp)));
+ /* Ignore Nonce option when Enhanced DAD is disabled. */
+ if (V_dad_enhanced == 0)
+ ndopt_nonce = NULL;
+ dp = nd6_dad_find(ifa, ndopt_nonce);
+ if (dp == NULL)
return;
- }
-
- /*
- * if I'm yet to start DAD, someone else started using this address
- * first. I have a duplicate and you win.
- */
- if (dp == NULL || dp->dad_ns_ocount == 0)
- duplicate++;
-
- /* XXX more checks for loopback situation - see nd6_dad_timer too */
- if (duplicate) {
- dp = NULL; /* will be freed in nd6_dad_duplicated() */
- nd6_dad_duplicated(ifa);
- } else {
- /*
- * not sure if I got a duplicate.
- * increment ns count and see what happens.
- */
- if (dp)
- dp->dad_ns_icount++;
- }
+ dp->dad_ns_icount++;
+ nd6_dad_rele(dp);
}
static void
@@ -1564,10 +1556,9 @@ nd6_dad_na_input(struct ifaddr *ifa)
if (ifa == NULL)
panic("ifa == NULL in nd6_dad_na_input");
- dp = nd6_dad_find(ifa);
- if (dp)
+ dp = nd6_dad_find(ifa, NULL);
+ if (dp != NULL) {
dp->dad_na_icount++;
-
- /* remove the address. */
- nd6_dad_duplicated(ifa);
+ nd6_dad_rele(dp);
+ }
}
diff --git a/freebsd/sys/netinet6/nd6_rtr.c b/freebsd/sys/netinet6/nd6_rtr.c
index 8d150ae4..c8d7c0ef 100644
--- a/freebsd/sys/netinet6/nd6_rtr.c
+++ b/freebsd/sys/netinet6/nd6_rtr.c
@@ -41,20 +41,24 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
+#include <sys/refcount.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <rtems/bsd/sys/lock.h>
#include <rtems/bsd/sys/errno.h>
+#include <sys/rmlock.h>
#include <sys/rwlock.h>
#include <sys/syslog.h>
#include <sys/queue.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_types.h>
#include <net/if_dl.h>
#include <net/route.h>
+#include <net/route_var.h>
#include <net/radix.h>
#include <net/vnet.h>
@@ -89,7 +93,7 @@ static void in6_init_address_ltimes(struct nd_prefix *,
static int nd6_prefix_onlink(struct nd_prefix *);
static int nd6_prefix_offlink(struct nd_prefix *);
-static int rt6_deleteroute(struct radix_node *, void *);
+static int rt6_deleteroute(const struct rtentry *, void *);
VNET_DECLARE(int, nd6_recalc_reachtm_interval);
#define V_nd6_recalc_reachtm_interval VNET(nd6_recalc_reachtm_interval)
@@ -220,6 +224,8 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len)
struct nd_defrouter *dr;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
+ dr = NULL;
+
/*
* We only accept RAs only when the per-interface flag
* ND6_IFF_ACCEPT_RTADV is on the receiving interface.
@@ -272,7 +278,7 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len)
bzero(&dr0, sizeof(dr0));
dr0.rtaddr = saddr6;
- dr0.flags = nd_ra->nd_ra_flags_reserved;
+ dr0.raflags = nd_ra->nd_ra_flags_reserved;
/*
* Effectively-disable routes from RA messages when
* ND6_IFF_NO_RADR enabled on the receiving interface or
@@ -284,7 +290,7 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len)
dr0.rtlifetime = 0;
else
dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime);
- dr0.expire = time_second + dr0.rtlifetime;
+ dr0.expire = time_uptime + dr0.rtlifetime;
dr0.ifp = ifp;
/* unspecified or not? (RFC 2461 6.3.4) */
if (advreachable) {
@@ -369,6 +375,10 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len)
(void)prelist_update(&pr, dr, m, mcast);
}
}
+ if (dr != NULL) {
+ defrouter_rele(dr);
+ dr = NULL;
+ }
/*
* MTU
@@ -446,10 +456,6 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len)
m_freem(m);
}
-/*
- * default router list proccessing sub routines
- */
-
/* tell the change to user processes watching the routing socket. */
static void
nd6_rtmsg(int cmd, struct rtentry *rt)
@@ -478,12 +484,15 @@ nd6_rtmsg(int cmd, struct rtentry *rt)
ifa_free(ifa);
}
+/*
+ * default router list processing sub routines
+ */
+
static void
defrouter_addreq(struct nd_defrouter *new)
{
struct sockaddr_in6 def, mask, gate;
struct rtentry *newrt = NULL;
- int s;
int error;
bzero(&def, sizeof(def));
@@ -495,7 +504,6 @@ defrouter_addreq(struct nd_defrouter *new)
def.sin6_family = gate.sin6_family = AF_INET6;
gate.sin6_addr = new->rtaddr;
- s = splnet();
error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&def,
(struct sockaddr *)&gate, (struct sockaddr *)&mask,
RTF_GATEWAY, &newrt, RT_DEFAULT_FIB);
@@ -505,21 +513,46 @@ defrouter_addreq(struct nd_defrouter *new)
}
if (error == 0)
new->installed = 1;
- splx(s);
- return;
}
struct nd_defrouter *
-defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp)
+defrouter_lookup_locked(struct in6_addr *addr, struct ifnet *ifp)
{
struct nd_defrouter *dr;
- TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
- if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr))
+ ND6_LOCK_ASSERT();
+ TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry)
+ if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr)) {
+ defrouter_ref(dr);
return (dr);
- }
+ }
+ return (NULL);
+}
+
+struct nd_defrouter *
+defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp)
+{
+ struct nd_defrouter *dr;
+
+ ND6_RLOCK();
+ dr = defrouter_lookup_locked(addr, ifp);
+ ND6_RUNLOCK();
+ return (dr);
+}
+
+void
+defrouter_ref(struct nd_defrouter *dr)
+{
+
+ refcount_acquire(&dr->refcnt);
+}
+
+void
+defrouter_rele(struct nd_defrouter *dr)
+{
- return (NULL); /* search failed */
+ if (refcount_release(&dr->refcnt))
+ free(dr, M_IP6NDP);
}
/*
@@ -554,15 +587,41 @@ defrouter_delreq(struct nd_defrouter *dr)
}
/*
- * remove all default routes from default router list
+ * Remove all default routes from default router list.
*/
void
defrouter_reset(void)
{
- struct nd_defrouter *dr;
+ struct nd_defrouter *dr, **dra;
+ int count, i;
+
+ count = i = 0;
+ /*
+ * We can't delete routes with the ND lock held, so make a copy of the
+ * current default router list and use that when deleting routes.
+ */
+ ND6_RLOCK();
TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry)
- defrouter_delreq(dr);
+ count++;
+ ND6_RUNLOCK();
+
+ dra = malloc(count * sizeof(*dra), M_TEMP, M_WAITOK | M_ZERO);
+
+ ND6_RLOCK();
+ TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
+ if (i == count)
+ break;
+ defrouter_ref(dr);
+ dra[i++] = dr;
+ }
+ ND6_RUNLOCK();
+
+ for (i = 0; i < count && dra[i] != NULL; i++) {
+ defrouter_delreq(dra[i]);
+ defrouter_rele(dra[i]);
+ }
+ free(dra, M_TEMP);
/*
* XXX should we also nuke any default routers in the kernel, by
@@ -570,12 +629,53 @@ defrouter_reset(void)
*/
}
+/*
+ * Look up a matching default router list entry and remove it. Returns true if a
+ * matching entry was found, false otherwise.
+ */
+bool
+defrouter_remove(struct in6_addr *addr, struct ifnet *ifp)
+{
+ struct nd_defrouter *dr;
+
+ ND6_WLOCK();
+ dr = defrouter_lookup_locked(addr, ifp);
+ if (dr == NULL) {
+ ND6_WUNLOCK();
+ return (false);
+ }
+
+ defrouter_unlink(dr, NULL);
+ ND6_WUNLOCK();
+ defrouter_del(dr);
+ defrouter_rele(dr);
+ return (true);
+}
+
+/*
+ * Remove a router from the global list and optionally stash it in a
+ * caller-supplied queue.
+ *
+ * The ND lock must be held.
+ */
+void
+defrouter_unlink(struct nd_defrouter *dr, struct nd_drhead *drq)
+{
+
+ ND6_WLOCK_ASSERT();
+ TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
+ if (drq != NULL)
+ TAILQ_INSERT_TAIL(drq, dr, dr_entry);
+}
+
void
-defrtrlist_del(struct nd_defrouter *dr)
+defrouter_del(struct nd_defrouter *dr)
{
struct nd_defrouter *deldr = NULL;
struct nd_prefix *pr;
+ ND6_UNLOCK_ASSERT();
+
/*
* Flush all the routing table entries that use the router
* as a next hop.
@@ -587,7 +687,6 @@ defrtrlist_del(struct nd_defrouter *dr)
deldr = dr;
defrouter_delreq(dr);
}
- TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
/*
* Also delete all the pointers to the router in each prefix lists.
@@ -607,7 +706,10 @@ defrtrlist_del(struct nd_defrouter *dr)
if (deldr)
defrouter_select();
- free(dr, M_IP6NDP);
+ /*
+ * Release the list reference.
+ */
+ defrouter_rele(dr);
}
/*
@@ -634,16 +736,16 @@ defrtrlist_del(struct nd_defrouter *dr)
void
defrouter_select(void)
{
- int s = splnet();
- struct nd_defrouter *dr, *selected_dr = NULL, *installed_dr = NULL;
+ struct nd_defrouter *dr, *selected_dr, *installed_dr;
struct llentry *ln = NULL;
+ ND6_RLOCK();
/*
* Let's handle easy case (3) first:
* If default router list is empty, there's nothing to be done.
*/
if (TAILQ_EMPTY(&V_nd_defrouter)) {
- splx(s);
+ ND6_RUNLOCK();
return;
}
@@ -652,12 +754,14 @@ defrouter_select(void)
* We just pick up the first reachable one (if any), assuming that
* the ordering rule of the list described in defrtrlist_update().
*/
+ selected_dr = installed_dr = NULL;
TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
IF_AFDATA_RLOCK(dr->ifp);
if (selected_dr == NULL &&
(ln = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) &&
ND6_IS_LLINFO_PROBREACH(ln)) {
selected_dr = dr;
+ defrouter_ref(selected_dr);
}
IF_AFDATA_RUNLOCK(dr->ifp);
if (ln != NULL) {
@@ -665,12 +769,15 @@ defrouter_select(void)
ln = NULL;
}
- if (dr->installed && installed_dr == NULL)
- installed_dr = dr;
- else if (dr->installed && installed_dr) {
- /* this should not happen. warn for diagnosis. */
- log(LOG_ERR, "defrouter_select: more than one router"
- " is installed\n");
+ if (dr->installed) {
+ if (installed_dr == NULL) {
+ installed_dr = dr;
+ defrouter_ref(installed_dr);
+ } else {
+ /* this should not happen. warn for diagnosis. */
+ log(LOG_ERR,
+ "defrouter_select: more than one router is installed\n");
+ }
}
}
/*
@@ -682,21 +789,25 @@ defrouter_select(void)
* or when the new one has a really higher preference value.
*/
if (selected_dr == NULL) {
- if (installed_dr == NULL || !TAILQ_NEXT(installed_dr, dr_entry))
+ if (installed_dr == NULL ||
+ TAILQ_NEXT(installed_dr, dr_entry) == NULL)
selected_dr = TAILQ_FIRST(&V_nd_defrouter);
else
selected_dr = TAILQ_NEXT(installed_dr, dr_entry);
- } else if (installed_dr) {
+ defrouter_ref(selected_dr);
+ } else if (installed_dr != NULL) {
IF_AFDATA_RLOCK(installed_dr->ifp);
if ((ln = nd6_lookup(&installed_dr->rtaddr, 0, installed_dr->ifp)) &&
ND6_IS_LLINFO_PROBREACH(ln) &&
rtpref(selected_dr) <= rtpref(installed_dr)) {
+ defrouter_rele(selected_dr);
selected_dr = installed_dr;
}
IF_AFDATA_RUNLOCK(installed_dr->ifp);
if (ln != NULL)
LLE_RUNLOCK(ln);
}
+ ND6_RUNLOCK();
/*
* If the selected router is different than the installed one,
@@ -704,13 +815,13 @@ defrouter_select(void)
* Note that the selected router is never NULL here.
*/
if (installed_dr != selected_dr) {
- if (installed_dr)
+ if (installed_dr != NULL) {
defrouter_delreq(installed_dr);
+ defrouter_rele(installed_dr);
+ }
defrouter_addreq(selected_dr);
}
-
- splx(s);
- return;
+ defrouter_rele(selected_dr);
}
/*
@@ -720,7 +831,7 @@ defrouter_select(void)
static int
rtpref(struct nd_defrouter *dr)
{
- switch (dr->flags & ND_RA_FLAG_RTPREF_MASK) {
+ switch (dr->raflags & ND_RA_FLAG_RTPREF_MASK) {
case ND_RA_FLAG_RTPREF_HIGH:
return (RTPREF_HIGH);
case ND_RA_FLAG_RTPREF_MEDIUM:
@@ -734,7 +845,7 @@ rtpref(struct nd_defrouter *dr)
* serious bug of kernel internal. We thus always bark here.
* Or, can we even panic?
*/
- log(LOG_ERR, "rtpref: impossible RA flag %x\n", dr->flags);
+ log(LOG_ERR, "rtpref: impossible RA flag %x\n", dr->raflags);
return (RTPREF_INVALID);
}
/* NOTREACHED */
@@ -744,63 +855,50 @@ static struct nd_defrouter *
defrtrlist_update(struct nd_defrouter *new)
{
struct nd_defrouter *dr, *n;
- int s = splnet();
+ int oldpref;
- if ((dr = defrouter_lookup(&new->rtaddr, new->ifp)) != NULL) {
- /* entry exists */
- if (new->rtlifetime == 0) {
- defrtrlist_del(dr);
- dr = NULL;
- } else {
- int oldpref = rtpref(dr);
+ if (new->rtlifetime == 0) {
+ defrouter_remove(&new->rtaddr, new->ifp);
+ return (NULL);
+ }
- /* override */
- dr->flags = new->flags; /* xxx flag check */
- dr->rtlifetime = new->rtlifetime;
- dr->expire = new->expire;
+ ND6_WLOCK();
+ dr = defrouter_lookup_locked(&new->rtaddr, new->ifp);
+ if (dr != NULL) {
+ oldpref = rtpref(dr);
- /*
- * If the preference does not change, there's no need
- * to sort the entries. Also make sure the selected
- * router is still installed in the kernel.
- */
- if (dr->installed && rtpref(new) == oldpref) {
- splx(s);
- return (dr);
- }
+ /* override */
+ dr->raflags = new->raflags; /* XXX flag check */
+ dr->rtlifetime = new->rtlifetime;
+ dr->expire = new->expire;
- /*
- * preferred router may be changed, so relocate
- * this router.
- * XXX: calling TAILQ_REMOVE directly is a bad manner.
- * However, since defrtrlist_del() has many side
- * effects, we intentionally do so here.
- * defrouter_select() below will handle routing
- * changes later.
- */
- TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
- n = dr;
- goto insert;
+ /*
+ * If the preference does not change, there's no need
+ * to sort the entries. Also make sure the selected
+ * router is still installed in the kernel.
+ */
+ if (dr->installed && rtpref(new) == oldpref) {
+ ND6_WUNLOCK();
+ return (dr);
}
- splx(s);
- return (dr);
- }
-
- /* entry does not exist */
- if (new->rtlifetime == 0) {
- splx(s);
- return (NULL);
- }
- n = (struct nd_defrouter *)malloc(sizeof(*n), M_IP6NDP, M_NOWAIT);
- if (n == NULL) {
- splx(s);
- return (NULL);
+ /*
+ * The preferred router may have changed, so relocate this
+ * router.
+ */
+ TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
+ n = dr;
+ } else {
+ n = malloc(sizeof(*n), M_IP6NDP, M_NOWAIT | M_ZERO);
+ if (n == NULL) {
+ ND6_WUNLOCK();
+ return (NULL);
+ }
+ memcpy(n, new, sizeof(*n));
+ /* Initialize with an extra reference for the caller. */
+ refcount_init(&n->refcnt, 2);
}
- bzero(n, sizeof(*n));
- *n = *new;
-insert:
/*
* Insert the new router in the Default Router List;
* The Default Router List should be in the descending order
@@ -813,15 +911,14 @@ insert:
if (rtpref(n) > rtpref(dr))
break;
}
- if (dr)
+ if (dr != NULL)
TAILQ_INSERT_BEFORE(dr, n, dr_entry);
else
TAILQ_INSERT_TAIL(&V_nd_defrouter, n, dr_entry);
+ ND6_WUNLOCK();
defrouter_select();
- splx(s);
-
return (n);
}
@@ -843,11 +940,11 @@ pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr)
{
struct nd_pfxrouter *new;
- new = (struct nd_pfxrouter *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT);
+ new = malloc(sizeof(*new), M_IP6NDP, M_NOWAIT | M_ZERO);
if (new == NULL)
return;
- bzero(new, sizeof(*new));
new->router = dr;
+ defrouter_ref(dr);
LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry);
@@ -857,7 +954,9 @@ pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr)
static void
pfxrtr_del(struct nd_pfxrouter *pfr)
{
+
LIST_REMOVE(pfr, pfr_entry);
+ defrouter_rele(pfr->router);
free(pfr, M_IP6NDP);
}
@@ -884,13 +983,11 @@ nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr,
{
struct nd_prefix *new = NULL;
int error = 0;
- int i, s;
char ip6buf[INET6_ADDRSTRLEN];
- new = (struct nd_prefix *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT);
+ new = malloc(sizeof(*new), M_IP6NDP, M_NOWAIT | M_ZERO);
if (new == NULL)
- return(ENOMEM);
- bzero(new, sizeof(*new));
+ return (ENOMEM);
new->ndpr_ifp = pr->ndpr_ifp;
new->ndpr_prefix = pr->ndpr_prefix;
new->ndpr_plen = pr->ndpr_plen;
@@ -899,24 +996,18 @@ nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr,
new->ndpr_flags = pr->ndpr_flags;
if ((error = in6_init_prefix_ltimes(new)) != 0) {
free(new, M_IP6NDP);
- return(error);
+ return (error);
}
- new->ndpr_lastupdate = time_second;
- if (newp != NULL)
- *newp = new;
+ new->ndpr_lastupdate = time_uptime;
/* initialization */
LIST_INIT(&new->ndpr_advrtrs);
in6_prefixlen2mask(&new->ndpr_mask, new->ndpr_plen);
/* make prefix in the canonical form */
- for (i = 0; i < 4; i++)
- new->ndpr_prefix.sin6_addr.s6_addr32[i] &=
- new->ndpr_mask.s6_addr32[i];
+ IN6_MASK_ADDR(&new->ndpr_prefix.sin6_addr, &new->ndpr_mask);
- s = splnet();
/* link ndpr_entry to nd_prefix list */
LIST_INSERT_HEAD(&V_nd_prefix, new, ndpr_entry);
- splx(s);
/* ND_OPT_PI_FLAG_ONLINK processing */
if (new->ndpr_raf_onlink) {
@@ -931,17 +1022,18 @@ nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr,
}
}
- if (dr)
+ if (dr != NULL)
pfxrtr_add(new, dr);
-
- return 0;
+ if (newp != NULL)
+ *newp = new;
+ return (0);
}
void
prelist_remove(struct nd_prefix *pr)
{
struct nd_pfxrouter *pfr, *next;
- int e, s;
+ int e;
char ip6buf[INET6_ADDRSTRLEN];
/* make sure to invalidate the prefix until it is really freed. */
@@ -966,17 +1058,13 @@ prelist_remove(struct nd_prefix *pr)
if (pr->ndpr_refcnt > 0)
return; /* notice here? */
- s = splnet();
-
/* unlink ndpr_entry from nd_prefix list */
LIST_REMOVE(pr, ndpr_entry);
- /* free list of routers that adversed the prefix */
+ /* free list of routers that advertised the prefix */
LIST_FOREACH_SAFE(pfr, &pr->ndpr_advrtrs, pfr_entry, next) {
- free(pfr, M_IP6NDP);
+ pfxrtr_del(pfr);
}
- splx(s);
-
free(pr, M_IP6NDP);
pfxlist_onlink_check();
@@ -994,9 +1082,7 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
struct ifaddr *ifa;
struct ifnet *ifp = new->ndpr_ifp;
struct nd_prefix *pr;
- int s = splnet();
int error = 0;
- int newprefix = 0;
int auth;
struct in6_addrlifetime lt6_tmp;
char ip6buf[INET6_ADDRSTRLEN];
@@ -1032,7 +1118,7 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
pr->ndpr_vltime = new->ndpr_vltime;
pr->ndpr_pltime = new->ndpr_pltime;
(void)in6_init_prefix_ltimes(pr); /* XXX error case? */
- pr->ndpr_lastupdate = time_second;
+ pr->ndpr_lastupdate = time_uptime;
}
if (new->ndpr_raf_onlink &&
@@ -1054,23 +1140,17 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
if (dr && pfxrtr_lookup(pr, dr) == NULL)
pfxrtr_add(pr, dr);
} else {
- struct nd_prefix *newpr = NULL;
-
- newprefix = 1;
-
if (new->ndpr_vltime == 0)
goto end;
if (new->ndpr_raf_onlink == 0 && new->ndpr_raf_auto == 0)
goto end;
- error = nd6_prelist_add(new, dr, &newpr);
- if (error != 0 || newpr == NULL) {
+ error = nd6_prelist_add(new, dr, &pr);
+ if (error != 0) {
nd6log((LOG_NOTICE, "prelist_update: "
- "nd6_prelist_add failed for %s/%d on %s "
- "errno=%d, returnpr=%p\n",
+ "nd6_prelist_add failed for %s/%d on %s errno=%d\n",
ip6_sprintf(ip6buf, &new->ndpr_prefix.sin6_addr),
- new->ndpr_plen, if_name(new->ndpr_ifp),
- error, newpr));
+ new->ndpr_plen, if_name(new->ndpr_ifp), error));
goto end; /* we should just give up in this case. */
}
@@ -1081,13 +1161,11 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
* addresses. Thus, we explicitly make sure that the prefix
* itself expires now.
*/
- if (newpr->ndpr_raf_onlink == 0) {
- newpr->ndpr_vltime = 0;
- newpr->ndpr_pltime = 0;
- in6_init_prefix_ltimes(newpr);
+ if (pr->ndpr_raf_onlink == 0) {
+ pr->ndpr_vltime = 0;
+ pr->ndpr_pltime = 0;
+ in6_init_prefix_ltimes(pr);
}
-
- pr = newpr;
}
/*
@@ -1170,7 +1248,7 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME)
remaininglifetime = ND6_INFINITE_LIFETIME;
- else if (time_second - ifa6->ia6_updatetime >
+ else if (time_uptime - ifa6->ia6_updatetime >
lt6_tmp.ia6t_vltime) {
/*
* The case of "invalid" address. We should usually
@@ -1179,7 +1257,7 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
remaininglifetime = 0;
} else
remaininglifetime = lt6_tmp.ia6t_vltime -
- (time_second - ifa6->ia6_updatetime);
+ (time_uptime - ifa6->ia6_updatetime);
/* when not updating, keep the current stored lifetime. */
lt6_tmp.ia6t_vltime = remaininglifetime;
@@ -1215,18 +1293,18 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
u_int32_t maxvltime, maxpltime;
if (V_ip6_temp_valid_lifetime >
- (u_int32_t)((time_second - ifa6->ia6_createtime) +
+ (u_int32_t)((time_uptime - ifa6->ia6_createtime) +
V_ip6_desync_factor)) {
maxvltime = V_ip6_temp_valid_lifetime -
- (time_second - ifa6->ia6_createtime) -
+ (time_uptime - ifa6->ia6_createtime) -
V_ip6_desync_factor;
} else
maxvltime = 0;
if (V_ip6_temp_preferred_lifetime >
- (u_int32_t)((time_second - ifa6->ia6_createtime) +
+ (u_int32_t)((time_uptime - ifa6->ia6_createtime) +
V_ip6_desync_factor)) {
maxpltime = V_ip6_temp_preferred_lifetime -
- (time_second - ifa6->ia6_createtime) -
+ (time_uptime - ifa6->ia6_createtime) -
V_ip6_desync_factor;
} else
maxpltime = 0;
@@ -1241,7 +1319,7 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
}
}
ifa6->ia6_lifetime = lt6_tmp;
- ifa6->ia6_updatetime = time_second;
+ ifa6->ia6_updatetime = time_uptime;
}
IF_ADDR_RUNLOCK(ifp);
if (ia6_match == NULL && new->ndpr_vltime) {
@@ -1319,7 +1397,6 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
}
end:
- splx(s);
return error;
}
@@ -1363,12 +1440,13 @@ find_pfxlist_reachable_router(struct nd_prefix *pr)
* is no router around us.
*/
void
-pfxlist_onlink_check()
+pfxlist_onlink_check(void)
{
struct nd_prefix *pr;
struct in6_ifaddr *ifa;
struct nd_defrouter *dr;
struct nd_pfxrouter *pfxrtr = NULL;
+ struct rm_priotracker in6_ifa_tracker;
/*
* Check if there is a prefix that has a reachable advertising
@@ -1384,6 +1462,7 @@ pfxlist_onlink_check()
* that does not advertise any prefixes.
*/
if (pr == NULL) {
+ ND6_RLOCK();
TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
struct nd_prefix *pr0;
@@ -1394,6 +1473,7 @@ pfxlist_onlink_check()
if (pfxrtr != NULL)
break;
}
+ ND6_RUNLOCK();
}
if (pr != NULL || (!TAILQ_EMPTY(&V_nd_defrouter) && pfxrtr == NULL)) {
/*
@@ -1424,7 +1504,7 @@ pfxlist_onlink_check()
find_pfxlist_reachable_router(pr) == NULL)
pr->ndpr_stateflags |= NDPRF_DETACHED;
if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 &&
- find_pfxlist_reachable_router(pr) != 0)
+ find_pfxlist_reachable_router(pr) != NULL)
pr->ndpr_stateflags &= ~NDPRF_DETACHED;
}
} else {
@@ -1497,9 +1577,8 @@ pfxlist_onlink_check()
* detached. Note, however, that a manually configured address should
* always be attached.
* The precise detection logic is same as the one for prefixes.
- *
- * XXXRW: in6_ifaddrhead locking.
*/
+ IN6_IFADDR_RLOCK(&in6_ifa_tracker);
TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) {
if (!(ifa->ia6_flags & IN6_IFF_AUTOCONF))
continue;
@@ -1534,8 +1613,7 @@ pfxlist_onlink_check()
ifa->ia6_flags |= IN6_IFF_DETACHED;
}
}
- }
- else {
+ } else {
TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) {
if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0)
continue;
@@ -1548,13 +1626,14 @@ pfxlist_onlink_check()
}
}
}
+ IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
}
static int
nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa)
{
static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
- struct radix_node_head *rnh;
+ struct rib_head *rnh;
struct rtentry *rt;
struct sockaddr_in6 mask6;
u_long rtflags;
@@ -1583,7 +1662,7 @@ nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa)
rnh = rt_tables_get_rnh(rt->rt_fibnum, AF_INET6);
/* XXX what if rhn == NULL? */
- RADIX_NODE_HEAD_LOCK(rnh);
+ RIB_WLOCK(rnh);
RT_LOCK(rt);
if (rt_setgate(rt, rt_key(rt),
(struct sockaddr *)&null_sdl) == 0) {
@@ -1593,7 +1672,7 @@ nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa)
dl->sdl_type = rt->rt_ifp->if_type;
dl->sdl_index = rt->rt_ifp->if_index;
}
- RADIX_NODE_HEAD_UNLOCK(rnh);
+ RIB_WUNLOCK(rnh);
nd6_rtmsg(RTM_ADD, rt);
RT_UNLOCK(rt);
pr->ndpr_stateflags |= NDPRF_ONLINK;
@@ -1755,6 +1834,7 @@ nd6_prefix_offlink(struct nd_prefix *pr)
}
}
error = a_failure;
+ a_failure = 1;
if (error == 0) {
pr->ndpr_stateflags &= ~NDPRF_ONLINK;
@@ -1793,7 +1873,8 @@ nd6_prefix_offlink(struct nd_prefix *pr)
&opr->ndpr_prefix.sin6_addr),
opr->ndpr_plen, if_name(ifp),
if_name(opr->ndpr_ifp), e));
- }
+ } else
+ a_failure = 0;
}
}
} else {
@@ -1805,6 +1886,10 @@ nd6_prefix_offlink(struct nd_prefix *pr)
if_name(ifp), error));
}
+ if (a_failure)
+ lltable_prefix_free(AF_INET6, (struct sockaddr *)&sa6,
+ (struct sockaddr *)&mask6, LLE_STATIC);
+
return (error);
}
@@ -1860,22 +1945,9 @@ in6_ifadd(struct nd_prefixctl *pr, int mcast)
}
/* make ifaddr */
+ in6_prepare_ifra(&ifra, &pr->ndpr_prefix.sin6_addr, &mask);
- bzero(&ifra, sizeof(ifra));
- /*
- * in6_update_ifa() does not use ifra_name, but we accurately set it
- * for safety.
- */
- strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
- ifra.ifra_addr.sin6_family = AF_INET6;
- ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6);
- /* prefix */
- ifra.ifra_addr.sin6_addr = pr->ndpr_prefix.sin6_addr;
- ifra.ifra_addr.sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
- ifra.ifra_addr.sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
- ifra.ifra_addr.sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
- ifra.ifra_addr.sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
-
+ IN6_MASK_ADDR(&ifra.ifra_addr.sin6_addr, &mask);
/* interface ID */
ifra.ifra_addr.sin6_addr.s6_addr32[0] |=
(ib->ia_addr.sin6_addr.s6_addr32[0] & ~mask.s6_addr32[0]);
@@ -1887,12 +1959,6 @@ in6_ifadd(struct nd_prefixctl *pr, int mcast)
(ib->ia_addr.sin6_addr.s6_addr32[3] & ~mask.s6_addr32[3]);
ifa_free(ifa);
- /* new prefix mask. */
- ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
- ifra.ifra_prefixmask.sin6_family = AF_INET6;
- bcopy(&mask, &ifra.ifra_prefixmask.sin6_addr,
- sizeof(ifra.ifra_prefixmask.sin6_addr));
-
/* lifetimes. */
ifra.ifra_lifetime.ia6t_vltime = pr->ndpr_vltime;
ifra.ifra_lifetime.ia6t_pltime = pr->ndpr_pltime;
@@ -1949,24 +2015,21 @@ int
in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay)
{
struct ifnet *ifp = ia0->ia_ifa.ifa_ifp;
- struct in6_ifaddr *newia, *ia;
+ struct in6_ifaddr *newia;
struct in6_aliasreq ifra;
- int i, error;
+ int error;
int trylimit = 3; /* XXX: adhoc value */
int updateflags;
u_int32_t randid[2];
time_t vltime0, pltime0;
- bzero(&ifra, sizeof(ifra));
- strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
- ifra.ifra_addr = ia0->ia_addr;
- /* copy prefix mask */
- ifra.ifra_prefixmask = ia0->ia_prefixmask;
+ in6_prepare_ifra(&ifra, &ia0->ia_addr.sin6_addr,
+ &ia0->ia_prefixmask.sin6_addr);
+
+ ifra.ifra_addr = ia0->ia_addr; /* XXX: do we need this ? */
/* clear the old IFID */
- for (i = 0; i < 4; i++) {
- ifra.ifra_addr.sin6_addr.s6_addr32[i] &=
- ifra.ifra_prefixmask.sin6_addr.s6_addr32[i];
- }
+ IN6_MASK_ADDR(&ifra.ifra_addr.sin6_addr,
+ &ifra.ifra_prefixmask.sin6_addr);
again:
if (in6_get_tmpifid(ifp, (u_int8_t *)randid,
@@ -1986,26 +2049,18 @@ in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay)
* there may be a time lag between generation of the ID and generation
* of the address. So, we'll do one more sanity check.
*/
- IN6_IFADDR_RLOCK();
- TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
- if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
- &ifra.ifra_addr.sin6_addr)) {
- if (trylimit-- == 0) {
- IN6_IFADDR_RUNLOCK();
- /*
- * Give up. Something strange should have
- * happened.
- */
- nd6log((LOG_NOTICE, "in6_tmpifadd: failed to "
- "find a unique random IFID\n"));
- return (EEXIST);
- }
- IN6_IFADDR_RUNLOCK();
+
+ if (in6_localip(&ifra.ifra_addr.sin6_addr) != 0) {
+ if (trylimit-- > 0) {
forcegen = 1;
goto again;
}
+
+ /* Give up. Something strange should have happened. */
+ nd6log((LOG_NOTICE, "in6_tmpifadd: failed to "
+ "find a unique random IFID\n"));
+ return (EEXIST);
}
- IN6_IFADDR_RUNLOCK();
/*
* The Valid Lifetime is the lower of the Valid Lifetime of the
@@ -2017,7 +2072,7 @@ in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay)
if (ia0->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
vltime0 = IFA6_IS_INVALID(ia0) ? 0 :
(ia0->ia6_lifetime.ia6t_vltime -
- (time_second - ia0->ia6_updatetime));
+ (time_uptime - ia0->ia6_updatetime));
if (vltime0 > V_ip6_temp_valid_lifetime)
vltime0 = V_ip6_temp_valid_lifetime;
} else
@@ -2025,7 +2080,7 @@ in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay)
if (ia0->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
pltime0 = IFA6_IS_DEPRECATED(ia0) ? 0 :
(ia0->ia6_lifetime.ia6t_pltime -
- (time_second - ia0->ia6_updatetime));
+ (time_uptime - ia0->ia6_updatetime));
if (pltime0 > V_ip6_temp_preferred_lifetime - V_ip6_desync_factor){
pltime0 = V_ip6_temp_preferred_lifetime -
V_ip6_desync_factor;
@@ -2083,11 +2138,11 @@ in6_init_prefix_ltimes(struct nd_prefix *ndpr)
if (ndpr->ndpr_pltime == ND6_INFINITE_LIFETIME)
ndpr->ndpr_preferred = 0;
else
- ndpr->ndpr_preferred = time_second + ndpr->ndpr_pltime;
+ ndpr->ndpr_preferred = time_uptime + ndpr->ndpr_pltime;
if (ndpr->ndpr_vltime == ND6_INFINITE_LIFETIME)
ndpr->ndpr_expire = 0;
else
- ndpr->ndpr_expire = time_second + ndpr->ndpr_vltime;
+ ndpr->ndpr_expire = time_uptime + ndpr->ndpr_vltime;
return 0;
}
@@ -2099,7 +2154,7 @@ in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6)
if (lt6->ia6t_vltime == ND6_INFINITE_LIFETIME)
lt6->ia6t_expire = 0;
else {
- lt6->ia6t_expire = time_second;
+ lt6->ia6t_expire = time_uptime;
lt6->ia6t_expire += lt6->ia6t_vltime;
}
@@ -2107,7 +2162,7 @@ in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6)
if (lt6->ia6t_pltime == ND6_INFINITE_LIFETIME)
lt6->ia6t_preferred = 0;
else {
- lt6->ia6t_preferred = time_second;
+ lt6->ia6t_preferred = time_uptime;
lt6->ia6t_preferred += lt6->ia6t_pltime;
}
}
@@ -2120,34 +2175,19 @@ in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6)
void
rt6_flush(struct in6_addr *gateway, struct ifnet *ifp)
{
- struct radix_node_head *rnh;
- u_int fibnum;
- int s = splnet();
/* We'll care only link-local addresses */
- if (!IN6_IS_ADDR_LINKLOCAL(gateway)) {
- splx(s);
+ if (!IN6_IS_ADDR_LINKLOCAL(gateway))
return;
- }
/* XXX Do we really need to walk any but the default FIB? */
- for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
- rnh = rt_tables_get_rnh(fibnum, AF_INET6);
- if (rnh == NULL)
- continue;
-
- RADIX_NODE_HEAD_LOCK(rnh);
- rnh->rnh_walktree(rnh, rt6_deleteroute, (void *)gateway);
- RADIX_NODE_HEAD_UNLOCK(rnh);
- }
- splx(s);
+ rt_foreach_fib_walk_del(AF_INET6, rt6_deleteroute, (void *)gateway);
}
static int
-rt6_deleteroute(struct radix_node *rn, void *arg)
+rt6_deleteroute(const struct rtentry *rt, void *arg)
{
#define SIN6(s) ((struct sockaddr_in6 *)s)
- struct rtentry *rt = (struct rtentry *)rn;
struct in6_addr *gate = (struct in6_addr *)arg;
if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6)
@@ -2172,8 +2212,7 @@ rt6_deleteroute(struct radix_node *rn, void *arg)
if ((rt->rt_flags & RTF_HOST) == 0)
return (0);
- return (in6_rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
- rt_mask(rt), rt->rt_flags, NULL, rt->rt_fibnum));
+ return (1);
#undef SIN6
}
diff --git a/freebsd/sys/netinet6/pim6_var.h b/freebsd/sys/netinet6/pim6_var.h
index 060836ba..7f9262bb 100644
--- a/freebsd/sys/netinet6/pim6_var.h
+++ b/freebsd/sys/netinet6/pim6_var.h
@@ -42,13 +42,13 @@
#define _NETINET6_PIM6_VAR_H_
struct pim6stat {
- u_quad_t pim6s_rcv_total; /* total PIM messages received */
- u_quad_t pim6s_rcv_tooshort; /* received with too few bytes */
- u_quad_t pim6s_rcv_badsum; /* received with bad checksum */
- u_quad_t pim6s_rcv_badversion; /* received bad PIM version */
- u_quad_t pim6s_rcv_registers; /* received registers */
- u_quad_t pim6s_rcv_badregisters; /* received invalid registers */
- u_quad_t pim6s_snd_registers; /* sent registers */
+ uint64_t pim6s_rcv_total; /* total PIM messages received */
+ uint64_t pim6s_rcv_tooshort; /* received with too few bytes */
+ uint64_t pim6s_rcv_badsum; /* received with bad checksum */
+ uint64_t pim6s_rcv_badversion; /* received bad PIM version */
+ uint64_t pim6s_rcv_registers; /* received registers */
+ uint64_t pim6s_rcv_badregisters; /* received invalid registers */
+ uint64_t pim6s_snd_registers; /* sent registers */
};
#if (defined(KERNEL)) || (defined(_KERNEL))
@@ -56,13 +56,8 @@ int pim6_input(struct mbuf **, int*, int);
#endif /* KERNEL */
/*
- * Names for PIM sysctl objects
+ * Identifiers for PIM sysctl nodes
*/
#define PIM6CTL_STATS 1 /* statistics (read-only) */
-#define PIM6CTL_MAXID 2
-#define PIM6CTL_NAMES { \
- { 0, 0 }, \
- { 0, 0 }, \
-}
#endif /* _NETINET6_PIM6_VAR_H_ */
diff --git a/freebsd/sys/netinet6/raw_ip6.c b/freebsd/sys/netinet6/raw_ip6.c
index e2d6693a..dfd7c45b 100644
--- a/freebsd/sys/netinet6/raw_ip6.c
+++ b/freebsd/sys/netinet6/raw_ip6.c
@@ -70,6 +70,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/sys/param.h>
#include <rtems/bsd/sys/errno.h>
#include <sys/jail.h>
+#include <sys/kernel.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
@@ -83,6 +84,7 @@ __FBSDID("$FreeBSD$");
#include <sys/syslog.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_types.h>
#include <net/route.h>
#include <net/vnet.h>
@@ -126,7 +128,12 @@ VNET_DECLARE(struct inpcbinfo, ripcbinfo);
extern u_long rip_sendspace;
extern u_long rip_recvspace;
-VNET_DEFINE(struct rip6stat, rip6stat);
+VNET_PCPUSTAT_DEFINE(struct rip6stat, rip6stat);
+VNET_PCPUSTAT_SYSINIT(rip6stat);
+
+#ifdef VIMAGE
+VNET_PCPUSTAT_SYSUNINIT(rip6stat);
+#endif /* VIMAGE */
/*
* Hooks for multicast routing. They all default to NULL, so leave them not
@@ -158,18 +165,12 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
struct mbuf *m = *mp;
register struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
register struct inpcb *in6p;
- struct inpcb *last = 0;
+ struct inpcb *last = NULL;
struct mbuf *opts = NULL;
struct sockaddr_in6 fromsa;
RIP6STAT_INC(rip6s_ipackets);
- if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) {
- /* XXX Send icmp6 host/port unreach? */
- m_freem(m);
- return (IPPROTO_DONE);
- }
-
init_sin6(&fromsa, m); /* general init */
ifp = m->m_pkthdr.rcvif;
@@ -265,7 +266,6 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
*/
if (n && ipsec6_in_reject(n, last)) {
m_freem(n);
- IPSEC6STAT_INC(in_polvio);
/* Do not inject data into pcb. */
} else
#endif /* IPSEC */
@@ -297,7 +297,6 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
*/
if ((last != NULL) && ipsec6_in_reject(m, last)) {
m_freem(m);
- IPSEC6STAT_INC(in_polvio);
IP6STAT_DEC(ip6s_delivered);
/* Do not inject data into pcb. */
INP_RUNLOCK(last);
@@ -385,17 +384,10 @@ rip6_ctlinput(int cmd, struct sockaddr *sa, void *d)
* may have setup with control call.
*/
int
-#if __STDC__
-rip6_output(struct mbuf *m, ...)
-#else
-rip6_output(m, va_alist)
- struct mbuf *m;
- va_dcl
-#endif
+rip6_output(struct mbuf *m, struct socket *so, ...)
{
struct mbuf *control;
struct m_tag *mtag;
- struct socket *so;
struct sockaddr_in6 *dstsock;
struct in6_addr *dst;
struct ip6_hdr *ip6;
@@ -407,11 +399,11 @@ rip6_output(m, va_alist)
int type = 0, code = 0; /* for ICMPv6 output statistics only */
int scope_ambiguous = 0;
int use_defzone = 0;
+ int hlim = 0;
struct in6_addr in6a;
va_list ap;
- va_start(ap, m);
- so = va_arg(ap, struct socket *);
+ va_start(ap, so);
dstsock = va_arg(ap, struct sockaddr_in6 *);
control = va_arg(ap, struct mbuf *);
va_end(ap);
@@ -461,7 +453,7 @@ rip6_output(m, va_alist)
code = icmp6->icmp6_code;
}
- M_PREPEND(m, sizeof(*ip6), M_DONTWAIT);
+ M_PREPEND(m, sizeof(*ip6), M_NOWAIT);
if (m == NULL) {
error = ENOBUFS;
goto bad;
@@ -471,8 +463,9 @@ rip6_output(m, va_alist)
/*
* Source address selection.
*/
- error = in6_selectsrc(dstsock, optp, in6p, NULL, so->so_cred,
- &oifp, &in6a);
+ error = in6_selectsrc_socket(dstsock, optp, in6p, so->so_cred,
+ scope_ambiguous, &in6a, &hlim);
+
if (error)
goto bad;
error = prison_check_ip6(in6p->inp_cred, &in6a);
@@ -480,19 +473,6 @@ rip6_output(m, va_alist)
goto bad;
ip6->ip6_src = in6a;
- if (oifp && scope_ambiguous) {
- /*
- * Application should provide a proper zone ID or the use of
- * default zone IDs should be enabled. Unfortunately, some
- * applications do not behave as it should, so we need a
- * workaround. Even if an appropriate ID is not determined
- * (when it's required), if we can determine the outgoing
- * interface. determine the zone ID based on the interface.
- */
- error = in6_setscope(&dstsock->sin6_addr, oifp, NULL);
- if (error != 0)
- goto bad;
- }
ip6->ip6_dst = dstsock->sin6_addr;
/*
@@ -507,7 +487,7 @@ rip6_output(m, va_alist)
* ip6_plen will be filled in ip6_output, so not fill it here.
*/
ip6->ip6_nxt = in6p->inp_ip_p;
- ip6->ip6_hlim = in6_selecthlim(in6p, oifp);
+ ip6->ip6_hlim = hlim;
if (so->so_proto->pr_protocol == IPPROTO_ICMPV6 ||
in6p->in6p_cksum != -1) {
@@ -795,7 +775,6 @@ rip6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
struct inpcb *inp;
struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam;
struct in6_addr in6a;
- struct ifnet *ifp = NULL;
int error = 0, scope_ambiguous = 0;
inp = sotoinpcb(so);
@@ -824,21 +803,14 @@ rip6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
INP_INFO_WLOCK(&V_ripcbinfo);
INP_WLOCK(inp);
/* Source address selection. XXX: need pcblookup? */
- error = in6_selectsrc(addr, inp->in6p_outputopts,
- inp, NULL, so->so_cred, &ifp, &in6a);
+ error = in6_selectsrc_socket(addr, inp->in6p_outputopts,
+ inp, so->so_cred, scope_ambiguous, &in6a, NULL);
if (error) {
INP_WUNLOCK(inp);
INP_INFO_WUNLOCK(&V_ripcbinfo);
return (error);
}
- /* XXX: see above */
- if (ifp && scope_ambiguous &&
- (error = in6_setscope(&addr->sin6_addr, ifp, NULL)) != 0) {
- INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_ripcbinfo);
- return (error);
- }
inp->in6p_faddr = addr->sin6_addr;
inp->in6p_laddr = in6a;
soisconnected(so);
diff --git a/freebsd/sys/netinet6/raw_ip6.h b/freebsd/sys/netinet6/raw_ip6.h
index cc4bcdd0..5eec5fff 100644
--- a/freebsd/sys/netinet6/raw_ip6.h
+++ b/freebsd/sys/netinet6/raw_ip6.h
@@ -37,21 +37,23 @@
* ICMPv6 stat is counted separately. see netinet/icmp6.h
*/
struct rip6stat {
- u_quad_t rip6s_ipackets; /* total input packets */
- u_quad_t rip6s_isum; /* input checksum computations */
- u_quad_t rip6s_badsum; /* of above, checksum error */
- u_quad_t rip6s_nosock; /* no matching socket */
- u_quad_t rip6s_nosockmcast; /* of above, arrived as multicast */
- u_quad_t rip6s_fullsock; /* not delivered, input socket full */
+ uint64_t rip6s_ipackets; /* total input packets */
+ uint64_t rip6s_isum; /* input checksum computations */
+ uint64_t rip6s_badsum; /* of above, checksum error */
+ uint64_t rip6s_nosock; /* no matching socket */
+ uint64_t rip6s_nosockmcast; /* of above, arrived as multicast */
+ uint64_t rip6s_fullsock; /* not delivered, input socket full */
- u_quad_t rip6s_opackets; /* total output packets */
+ uint64_t rip6s_opackets; /* total output packets */
};
#ifdef _KERNEL
-#define RIP6STAT_ADD(name, val) V_rip6stat.name += (val)
+#include <sys/counter.h>
+
+VNET_PCPUSTAT_DECLARE(struct rip6stat, rip6stat);
+#define RIP6STAT_ADD(name, val) \
+ VNET_PCPUSTAT_ADD(struct rip6stat, rip6stat, name, (val))
#define RIP6STAT_INC(name) RIP6STAT_ADD(name, 1)
-VNET_DECLARE(struct rip6stat, rip6stat);
-#define V_rip6stat VNET(rip6stat)
-#endif
+#endif /* _KERNEL */
#endif
diff --git a/freebsd/sys/netinet6/route6.c b/freebsd/sys/netinet6/route6.c
index 90738461..d698d328 100644
--- a/freebsd/sys/netinet6/route6.c
+++ b/freebsd/sys/netinet6/route6.c
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include <sys/queue.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <netinet/in.h>
#include <netinet6/in6_var.h>
diff --git a/freebsd/sys/netinet6/scope6.c b/freebsd/sys/netinet6/scope6.c
index 2ccd2f7a..0f8ead2d 100644
--- a/freebsd/sys/netinet6/scope6.c
+++ b/freebsd/sys/netinet6/scope6.c
@@ -41,9 +41,11 @@ __FBSDID("$FreeBSD$");
#include <sys/sockio.h>
#include <sys/systm.h>
#include <sys/queue.h>
+#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/vnet.h>
#include <netinet/in.h>
@@ -58,6 +60,11 @@ VNET_DEFINE(int, ip6_use_defzone) = 1;
#else
VNET_DEFINE(int, ip6_use_defzone) = 0;
#endif
+VNET_DEFINE(int, deembed_scopeid) = 1;
+SYSCTL_DECL(_net_inet6_ip6);
+SYSCTL_INT(_net_inet6_ip6, OID_AUTO, deembed_scopeid, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(deembed_scopeid), 0,
+ "Extract embedded zone ID and set it to sin6_scope_id in sockaddr_in6.");
/*
* The scope6_lock protects the global sid default stored in
@@ -95,22 +102,14 @@ scope6_ifattach(struct ifnet *ifp)
{
struct scope6_id *sid;
- sid = (struct scope6_id *)malloc(sizeof(*sid), M_IFADDR, M_WAITOK);
- bzero(sid, sizeof(*sid));
-
+ sid = malloc(sizeof(*sid), M_IFADDR, M_WAITOK | M_ZERO);
/*
* XXX: IPV6_ADDR_SCOPE_xxx macros are not standard.
* Should we rather hardcode here?
*/
sid->s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL] = ifp->if_index;
sid->s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL] = ifp->if_index;
-#ifdef MULTI_SCOPE
- /* by default, we don't care about scope boundary for these scopes. */
- sid->s6id_list[IPV6_ADDR_SCOPE_SITELOCAL] = 1;
- sid->s6id_list[IPV6_ADDR_SCOPE_ORGLOCAL] = 1;
-#endif
-
- return sid;
+ return (sid);
}
void
@@ -230,62 +229,24 @@ scope6_get(struct ifnet *ifp, struct scope6_id *idlist)
* Get a scope of the address. Node-local, link-local, site-local or global.
*/
int
-in6_addrscope(struct in6_addr *addr)
+in6_addrscope(const struct in6_addr *addr)
{
- int scope;
-
- if (addr->s6_addr[0] == 0xfe) {
- scope = addr->s6_addr[1] & 0xc0;
-
- switch (scope) {
- case 0x80:
- return IPV6_ADDR_SCOPE_LINKLOCAL;
- break;
- case 0xc0:
- return IPV6_ADDR_SCOPE_SITELOCAL;
- break;
- default:
- return IPV6_ADDR_SCOPE_GLOBAL; /* just in case */
- break;
- }
- }
-
-
- if (addr->s6_addr[0] == 0xff) {
- scope = addr->s6_addr[1] & 0x0f;
+ if (IN6_IS_ADDR_MULTICAST(addr)) {
/*
- * due to other scope such as reserved,
- * return scope doesn't work.
+ * Addresses with reserved value F must be treated as
+ * global multicast addresses.
*/
- switch (scope) {
- case IPV6_ADDR_SCOPE_INTFACELOCAL:
- return IPV6_ADDR_SCOPE_INTFACELOCAL;
- break;
- case IPV6_ADDR_SCOPE_LINKLOCAL:
- return IPV6_ADDR_SCOPE_LINKLOCAL;
- break;
- case IPV6_ADDR_SCOPE_SITELOCAL:
- return IPV6_ADDR_SCOPE_SITELOCAL;
- break;
- default:
- return IPV6_ADDR_SCOPE_GLOBAL;
- break;
- }
+ if (IPV6_ADDR_MC_SCOPE(addr) == 0x0f)
+ return (IPV6_ADDR_SCOPE_GLOBAL);
+ return (IPV6_ADDR_MC_SCOPE(addr));
}
-
- /*
- * Regard loopback and unspecified addresses as global, since
- * they have no ambiguity.
- */
- if (bcmp(&in6addr_loopback, addr, sizeof(*addr) - 1) == 0) {
- if (addr->s6_addr[15] == 1) /* loopback */
- return IPV6_ADDR_SCOPE_LINKLOCAL;
- if (addr->s6_addr[15] == 0) /* unspecified */
- return IPV6_ADDR_SCOPE_GLOBAL; /* XXX: correct? */
- }
-
- return IPV6_ADDR_SCOPE_GLOBAL;
+ if (IN6_IS_ADDR_LINKLOCAL(addr) ||
+ IN6_IS_ADDR_LOOPBACK(addr))
+ return (IPV6_ADDR_SCOPE_LINKLOCAL);
+ if (IN6_IS_ADDR_SITELOCAL(addr))
+ return (IPV6_ADDR_SCOPE_SITELOCAL);
+ return (IPV6_ADDR_SCOPE_GLOBAL);
}
/*
@@ -359,7 +320,6 @@ scope6_addr2default(struct in6_addr *addr)
int
sa6_embedscope(struct sockaddr_in6 *sin6, int defaultok)
{
- struct ifnet *ifp;
u_int32_t zoneid;
if ((zoneid = sin6->sin6_scope_id) == 0 && defaultok)
@@ -374,15 +334,11 @@ sa6_embedscope(struct sockaddr_in6 *sin6, int defaultok)
* zone IDs assuming a one-to-one mapping between interfaces
* and links.
*/
- if (V_if_index < zoneid)
- return (ENXIO);
- ifp = ifnet_byindex(zoneid);
- if (ifp == NULL) /* XXX: this can happen for some OS */
+ if (V_if_index < zoneid || ifnet_byindex(zoneid) == NULL)
return (ENXIO);
/* XXX assignment to 16bit from 32bit variable */
sin6->sin6_addr.s6_addr16[1] = htons(zoneid & 0xffff);
-
sin6->sin6_scope_id = 0;
}
@@ -398,12 +354,6 @@ sa6_recoverscope(struct sockaddr_in6 *sin6)
char ip6buf[INET6_ADDRSTRLEN];
u_int32_t zoneid;
- if (sin6->sin6_scope_id != 0) {
- log(LOG_NOTICE,
- "sa6_recoverscope: assumption failure (non 0 ID): %s%%%d\n",
- ip6_sprintf(ip6buf, &sin6->sin6_addr), sin6->sin6_scope_id);
- /* XXX: proceed anyway... */
- }
if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) ||
IN6_IS_ADDR_MC_INTFACELOCAL(&sin6->sin6_addr)) {
/*
@@ -414,8 +364,19 @@ sa6_recoverscope(struct sockaddr_in6 *sin6)
/* sanity check */
if (V_if_index < zoneid)
return (ENXIO);
+#if 0
+ /* XXX: Disabled due to possible deadlock. */
if (!ifnet_byindex(zoneid))
return (ENXIO);
+#endif
+ if (sin6->sin6_scope_id != 0 &&
+ zoneid != sin6->sin6_scope_id) {
+ log(LOG_NOTICE,
+ "%s: embedded scope mismatch: %s%%%d. "
+ "sin6_scope_id was overridden\n", __func__,
+ ip6_sprintf(ip6buf, &sin6->sin6_addr),
+ sin6->sin6_scope_id);
+ }
sin6->sin6_addr.s6_addr16[1] = 0;
sin6->sin6_scope_id = zoneid;
}
@@ -438,63 +399,35 @@ in6_setscope(struct in6_addr *in6, struct ifnet *ifp, u_int32_t *ret_id)
u_int32_t zoneid = 0;
struct scope6_id *sid;
- IF_AFDATA_RLOCK(ifp);
-
- sid = SID(ifp);
-
-#ifdef DIAGNOSTIC
- if (sid == NULL) { /* should not happen */
- panic("in6_setscope: scope array is NULL");
- /* NOTREACHED */
- }
-#endif
-
/*
* special case: the loopback address can only belong to a loopback
* interface.
*/
if (IN6_IS_ADDR_LOOPBACK(in6)) {
- if (!(ifp->if_flags & IFF_LOOPBACK)) {
- IF_AFDATA_RUNLOCK(ifp);
+ if (!(ifp->if_flags & IFF_LOOPBACK))
return (EINVAL);
- } else {
- if (ret_id != NULL)
- *ret_id = 0; /* there's no ambiguity */
+ } else {
+ scope = in6_addrscope(in6);
+ if (scope == IPV6_ADDR_SCOPE_INTFACELOCAL ||
+ scope == IPV6_ADDR_SCOPE_LINKLOCAL) {
+ /*
+ * Currently we use interface indeces as the
+ * zone IDs for interface-local and link-local
+ * scopes.
+ */
+ zoneid = ifp->if_index;
+ in6->s6_addr16[1] = htons(zoneid & 0xffff); /* XXX */
+ } else if (scope != IPV6_ADDR_SCOPE_GLOBAL) {
+ IF_AFDATA_RLOCK(ifp);
+ sid = SID(ifp);
+ zoneid = sid->s6id_list[scope];
IF_AFDATA_RUNLOCK(ifp);
- return (0);
}
}
- scope = in6_addrscope(in6);
- switch (scope) {
- case IPV6_ADDR_SCOPE_INTFACELOCAL: /* should be interface index */
- zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL];
- break;
-
- case IPV6_ADDR_SCOPE_LINKLOCAL:
- zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL];
- break;
-
- case IPV6_ADDR_SCOPE_SITELOCAL:
- zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_SITELOCAL];
- break;
-
- case IPV6_ADDR_SCOPE_ORGLOCAL:
- zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_ORGLOCAL];
- break;
-
- default:
- zoneid = 0; /* XXX: treat as global. */
- break;
- }
- IF_AFDATA_RUNLOCK(ifp);
-
if (ret_id != NULL)
*ret_id = zoneid;
- if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6))
- in6->s6_addr16[1] = htons(zoneid & 0xffff); /* XXX */
-
return (0);
}
@@ -528,3 +461,114 @@ in6_getscope(struct in6_addr *in6)
return (0);
}
+
+/*
+ * Return pointer to ifnet structure, corresponding to the zone id of
+ * link-local scope.
+ */
+struct ifnet*
+in6_getlinkifnet(uint32_t zoneid)
+{
+
+ return (ifnet_byindex((u_short)zoneid));
+}
+
+/*
+ * Return zone id for the specified scope.
+ */
+uint32_t
+in6_getscopezone(const struct ifnet *ifp, int scope)
+{
+
+ if (scope == IPV6_ADDR_SCOPE_INTFACELOCAL ||
+ scope == IPV6_ADDR_SCOPE_LINKLOCAL)
+ return (ifp->if_index);
+ if (scope >= 0 && scope < IPV6_ADDR_SCOPES_COUNT)
+ return (SID(ifp)->s6id_list[scope]);
+ return (0);
+}
+
+/*
+ * Extracts scope from adddress @dst, stores cleared address
+ * inside @dst and zone inside @scopeid
+ */
+void
+in6_splitscope(const struct in6_addr *src, struct in6_addr *dst,
+ uint32_t *scopeid)
+{
+ uint32_t zoneid;
+
+ *dst = *src;
+ zoneid = ntohs(in6_getscope(dst));
+ in6_clearscope(dst);
+ *scopeid = zoneid;
+}
+
+/*
+ * This function is for checking sockaddr_in6 structure passed
+ * from the application level (usually).
+ *
+ * sin6_scope_id should be set for link-local unicast, link-local and
+ * interface-local multicast addresses.
+ *
+ * If it is zero, then look into default zone ids. If default zone id is
+ * not set or disabled, then return error.
+ */
+int
+sa6_checkzone(struct sockaddr_in6 *sa6)
+{
+ int scope;
+
+ scope = in6_addrscope(&sa6->sin6_addr);
+ if (scope == IPV6_ADDR_SCOPE_GLOBAL)
+ return (sa6->sin6_scope_id ? EINVAL: 0);
+ if (IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr) &&
+ scope != IPV6_ADDR_SCOPE_LINKLOCAL &&
+ scope != IPV6_ADDR_SCOPE_INTFACELOCAL) {
+ if (sa6->sin6_scope_id == 0 && V_ip6_use_defzone != 0)
+ sa6->sin6_scope_id = V_sid_default.s6id_list[scope];
+ return (0);
+ }
+ /*
+ * Since ::1 address always configured on the lo0, we can
+ * automatically set its zone id, when it is not specified.
+ * Return error, when specified zone id doesn't match with
+ * actual value.
+ */
+ if (IN6_IS_ADDR_LOOPBACK(&sa6->sin6_addr)) {
+ if (sa6->sin6_scope_id == 0)
+ sa6->sin6_scope_id = in6_getscopezone(V_loif, scope);
+ else if (sa6->sin6_scope_id != in6_getscopezone(V_loif, scope))
+ return (EADDRNOTAVAIL);
+ }
+ /* XXX: we can validate sin6_scope_id here */
+ if (sa6->sin6_scope_id != 0)
+ return (0);
+ if (V_ip6_use_defzone != 0)
+ sa6->sin6_scope_id = V_sid_default.s6id_list[scope];
+ /* Return error if we can't determine zone id */
+ return (sa6->sin6_scope_id ? 0: EADDRNOTAVAIL);
+}
+
+/*
+ * This function is similar to sa6_checkzone, but it uses given ifp
+ * to initialize sin6_scope_id.
+ */
+int
+sa6_checkzone_ifp(struct ifnet *ifp, struct sockaddr_in6 *sa6)
+{
+ int scope;
+
+ scope = in6_addrscope(&sa6->sin6_addr);
+ if (scope == IPV6_ADDR_SCOPE_LINKLOCAL ||
+ scope == IPV6_ADDR_SCOPE_INTFACELOCAL) {
+ if (sa6->sin6_scope_id == 0) {
+ sa6->sin6_scope_id = in6_getscopezone(ifp, scope);
+ return (0);
+ } else if (sa6->sin6_scope_id != in6_getscopezone(ifp, scope))
+ return (EADDRNOTAVAIL);
+ }
+ return (sa6_checkzone(sa6));
+}
+
+
diff --git a/freebsd/sys/netinet6/scope6_var.h b/freebsd/sys/netinet6/scope6_var.h
index 990325e9..e38d77a9 100644
--- a/freebsd/sys/netinet6/scope6_var.h
+++ b/freebsd/sys/netinet6/scope6_var.h
@@ -34,14 +34,20 @@
#define _NETINET6_SCOPE6_VAR_H_
#ifdef _KERNEL
+#include <net/vnet.h>
+
+#define IPV6_ADDR_SCOPES_COUNT 16
struct scope6_id {
/*
* 16 is correspondent to 4bit multicast scope field.
* i.e. from node-local to global with some reserved/unassigned types.
*/
- u_int32_t s6id_list[16];
+ uint32_t s6id_list[IPV6_ADDR_SCOPES_COUNT];
};
+VNET_DECLARE(int, deembed_scopeid);
+#define V_deembed_scopeid VNET(deembed_scopeid)
+
void scope6_init(void);
struct scope6_id *scope6_ifattach(struct ifnet *);
void scope6_ifdetach(struct scope6_id *);
@@ -51,9 +57,14 @@ int scope6_get_default(struct scope6_id *);
u_int32_t scope6_addr2default(struct in6_addr *);
int sa6_embedscope(struct sockaddr_in6 *, int);
int sa6_recoverscope(struct sockaddr_in6 *);
+int sa6_checkzone(struct sockaddr_in6 *);
+int sa6_checkzone_ifp(struct ifnet *, struct sockaddr_in6 *);
int in6_setscope(struct in6_addr *, struct ifnet *, u_int32_t *);
int in6_clearscope(struct in6_addr *);
uint16_t in6_getscope(struct in6_addr *);
+uint32_t in6_getscopezone(const struct ifnet *, int);
+void in6_splitscope(const struct in6_addr *, struct in6_addr *, uint32_t *);
+struct ifnet* in6_getlinkifnet(uint32_t);
#endif /* _KERNEL */
#endif /* _NETINET6_SCOPE6_VAR_H_ */
diff --git a/freebsd/sys/netinet6/sctp6_usrreq.c b/freebsd/sys/netinet6/sctp6_usrreq.c
index c8bc6620..962a622e 100644
--- a/freebsd/sys/netinet6/sctp6_usrreq.c
+++ b/freebsd/sys/netinet6/sctp6_usrreq.c
@@ -41,9 +41,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/sctp_pcb.h>
#include <netinet/sctp_header.h>
#include <netinet/sctp_var.h>
-#ifdef INET6
#include <netinet6/sctp6_var.h>
-#endif
#include <netinet/sctp_sysctl.h>
#include <netinet/sctp_output.h>
#include <netinet/sctp_uio.h>
@@ -56,13 +54,12 @@ __FBSDID("$FreeBSD$");
#include <netinet/sctp_output.h>
#include <netinet/sctp_bsd_addr.h>
#include <netinet/sctp_crc32.h>
+#include <netinet/icmp6.h>
#include <netinet/udp.h>
#ifdef IPSEC
#include <netipsec/ipsec.h>
-#ifdef INET6
#include <netipsec/ipsec6.h>
-#endif /* INET6 */
#endif /* IPSEC */
extern struct protosw inetsw[];
@@ -85,7 +82,8 @@ sctp6_input_with_port(struct mbuf **i_pak, int *offp, uint16_t port)
#endif
uint32_t mflowid;
- uint8_t use_mflowid;
+ uint8_t mflowtype;
+ uint16_t fibnum;
iphlen = *offp;
if (SCTP_GET_PKT_VRFID(*i_pak, vrf_id)) {
@@ -96,13 +94,7 @@ sctp6_input_with_port(struct mbuf **i_pak, int *offp, uint16_t port)
#ifdef SCTP_MBUF_LOGGING
/* Log in any input mbufs */
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
- struct mbuf *mat;
-
- for (mat = m; mat; mat = SCTP_BUF_NEXT(mat)) {
- if (SCTP_BUF_IS_EXTENDED(mat)) {
- sctp_log_mb(mat, SCTP_MBUF_INPUT);
- }
- }
+ sctp_log_mbc(m, SCTP_MBUF_INPUT);
}
#endif
#ifdef SCTP_PACKET_LOGGING
@@ -111,17 +103,13 @@ sctp6_input_with_port(struct mbuf **i_pak, int *offp, uint16_t port)
}
#endif
SCTPDBG(SCTP_DEBUG_CRCOFFLOAD,
- "sctp6_input(): Packet of length %d received on %s with csum_flags 0x%x.\n",
+ "sctp6_input(): Packet of length %d received on %s with csum_flags 0x%b.\n",
m->m_pkthdr.len,
if_name(m->m_pkthdr.rcvif),
- m->m_pkthdr.csum_flags);
- if (m->m_flags & M_FLOWID) {
- mflowid = m->m_pkthdr.flowid;
- use_mflowid = 1;
- } else {
- mflowid = 0;
- use_mflowid = 0;
- }
+ (int)m->m_pkthdr.csum_flags, CSUM_BITS);
+ mflowid = m->m_pkthdr.flowid;
+ mflowtype = M_HASHTYPE_GET(m);
+ fibnum = M_GETFIB(m);
SCTP_STAT_INCR(sctps_recvpackets);
SCTP_STAT_INCR_COUNTER64(sctps_inpackets);
/* Get IP, SCTP, and first chunk header together in the first mbuf. */
@@ -151,10 +139,6 @@ sctp6_input_with_port(struct mbuf **i_pak, int *offp, uint16_t port)
if (in6_setscope(&dst.sin6_addr, m->m_pkthdr.rcvif, NULL) != 0) {
goto out;
}
- if (faithprefix_p != NULL && (*faithprefix_p) (&dst.sin6_addr)) {
- /* XXX send icmp6 host/port unreach? */
- goto out;
- }
length = ntohs(ip6->ip6_plen) + iphlen;
/* Validate mbuf chain length with IP payload length. */
if (SCTP_HEADER_LEN(m) != length) {
@@ -186,7 +170,7 @@ sctp6_input_with_port(struct mbuf **i_pak, int *offp, uint16_t port)
compute_crc,
#endif
ecn_bits,
- use_mflowid, mflowid,
+ mflowtype, mflowid, fibnum,
vrf_id, port);
out:
if (m) {
@@ -202,250 +186,224 @@ sctp6_input(struct mbuf **i_pak, int *offp, int proto SCTP_UNUSED)
return (sctp6_input_with_port(i_pak, offp, 0));
}
-static void
-sctp6_notify_mbuf(struct sctp_inpcb *inp, struct icmp6_hdr *icmp6,
- struct sctphdr *sh, struct sctp_tcb *stcb, struct sctp_nets *net)
-{
- uint32_t nxtsz;
-
- if ((inp == NULL) || (stcb == NULL) || (net == NULL) ||
- (icmp6 == NULL) || (sh == NULL)) {
- goto out;
- }
- /* First do we even look at it? */
- if (ntohl(sh->v_tag) != (stcb->asoc.peer_vtag))
- goto out;
-
- if (icmp6->icmp6_type != ICMP6_PACKET_TOO_BIG) {
- /* not PACKET TO BIG */
- goto out;
- }
- /*
- * ok we need to look closely. We could even get smarter and look at
- * anyone that we sent to in case we get a different ICMP that tells
- * us there is no way to reach a host, but for this impl, all we
- * care about is MTU discovery.
- */
- nxtsz = ntohl(icmp6->icmp6_mtu);
- /* Stop any PMTU timer */
- sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, NULL, SCTP_FROM_SCTP6_USRREQ + SCTP_LOC_1);
-
- /* Adjust destination size limit */
- if (net->mtu > nxtsz) {
- net->mtu = nxtsz;
- if (net->port) {
- net->mtu -= sizeof(struct udphdr);
- }
- }
- /* now what about the ep? */
- if (stcb->asoc.smallest_mtu > nxtsz) {
- struct sctp_tmit_chunk *chk;
-
- /* Adjust that too */
- stcb->asoc.smallest_mtu = nxtsz;
- /* now off to subtract IP_DF flag if needed */
-
- TAILQ_FOREACH(chk, &stcb->asoc.send_queue, sctp_next) {
- if ((uint32_t) (chk->send_size + IP_HDR_SIZE) > nxtsz) {
- chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
- }
- }
- TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
- if ((uint32_t) (chk->send_size + IP_HDR_SIZE) > nxtsz) {
- /*
- * For this guy we also mark for immediate
- * resend since we sent to big of chunk
- */
- chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
- if (chk->sent != SCTP_DATAGRAM_RESEND)
- stcb->asoc.sent_queue_retran_cnt++;
- chk->sent = SCTP_DATAGRAM_RESEND;
- chk->rec.data.doing_fast_retransmit = 0;
-
- chk->sent = SCTP_DATAGRAM_RESEND;
- /* Clear any time so NO RTT is being done */
- chk->sent_rcv_time.tv_sec = 0;
- chk->sent_rcv_time.tv_usec = 0;
- stcb->asoc.total_flight -= chk->send_size;
- net->flight_size -= chk->send_size;
- }
- }
- }
- sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, NULL);
-out:
- if (stcb) {
- SCTP_TCB_UNLOCK(stcb);
- }
-}
-
-
void
sctp6_notify(struct sctp_inpcb *inp,
- struct icmp6_hdr *icmph,
- struct sctphdr *sh,
- struct sockaddr *to,
struct sctp_tcb *stcb,
- struct sctp_nets *net)
+ struct sctp_nets *net,
+ uint8_t icmp6_type,
+ uint8_t icmp6_code,
+ uint16_t next_mtu)
{
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
-
- /* protection */
- if ((inp == NULL) || (stcb == NULL) || (net == NULL) ||
- (sh == NULL) || (to == NULL)) {
- if (stcb)
- SCTP_TCB_UNLOCK(stcb);
- return;
- }
- /* First job is to verify the vtag matches what I would send */
- if (ntohl(sh->v_tag) != (stcb->asoc.peer_vtag)) {
- SCTP_TCB_UNLOCK(stcb);
- return;
- }
- if (icmph->icmp6_type != ICMP_UNREACH) {
- /* We only care about unreachable */
- SCTP_TCB_UNLOCK(stcb);
- return;
- }
- if ((icmph->icmp6_code == ICMP_UNREACH_NET) ||
- (icmph->icmp6_code == ICMP_UNREACH_HOST) ||
- (icmph->icmp6_code == ICMP_UNREACH_NET_UNKNOWN) ||
- (icmph->icmp6_code == ICMP_UNREACH_HOST_UNKNOWN) ||
- (icmph->icmp6_code == ICMP_UNREACH_ISOLATED) ||
- (icmph->icmp6_code == ICMP_UNREACH_NET_PROHIB) ||
- (icmph->icmp6_code == ICMP_UNREACH_HOST_PROHIB) ||
- (icmph->icmp6_code == ICMP_UNREACH_FILTER_PROHIB)) {
-
- /*
- * Hmm reachablity problems we must examine closely. If its
- * not reachable, we may have lost a network. Or if there is
- * NO protocol at the other end named SCTP. well we consider
- * it a OOTB abort.
- */
- if (net->dest_state & SCTP_ADDR_REACHABLE) {
- /* Ok that destination is NOT reachable */
- net->dest_state &= ~SCTP_ADDR_REACHABLE;
- net->dest_state &= ~SCTP_ADDR_PF;
- sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN,
- stcb, 0, (void *)net, SCTP_SO_NOT_LOCKED);
+ int timer_stopped;
+
+ switch (icmp6_type) {
+ case ICMP6_DST_UNREACH:
+ if ((icmp6_code == ICMP6_DST_UNREACH_NOROUTE) ||
+ (icmp6_code == ICMP6_DST_UNREACH_ADMIN) ||
+ (icmp6_code == ICMP6_DST_UNREACH_BEYONDSCOPE) ||
+ (icmp6_code == ICMP6_DST_UNREACH_ADDR)) {
+ /* Mark the net unreachable. */
+ if (net->dest_state & SCTP_ADDR_REACHABLE) {
+ /* Ok that destination is not reachable */
+ net->dest_state &= ~SCTP_ADDR_REACHABLE;
+ net->dest_state &= ~SCTP_ADDR_PF;
+ sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN,
+ stcb, 0, (void *)net, SCTP_SO_NOT_LOCKED);
+ }
}
SCTP_TCB_UNLOCK(stcb);
- } else if ((icmph->icmp6_code == ICMP_UNREACH_PROTOCOL) ||
- (icmph->icmp6_code == ICMP_UNREACH_PORT)) {
- /*
- * Here the peer is either playing tricks on us, including
- * an address that belongs to someone who does not support
- * SCTP OR was a userland implementation that shutdown and
- * now is dead. In either case treat it like a OOTB abort
- * with no TCB
- */
- sctp_abort_notification(stcb, 1, 0, NULL, SCTP_SO_NOT_LOCKED);
+ break;
+ case ICMP6_PARAM_PROB:
+ /* Treat it like an ABORT. */
+ if (icmp6_code == ICMP6_PARAMPROB_NEXTHEADER) {
+ sctp_abort_notification(stcb, 1, 0, NULL, SCTP_SO_NOT_LOCKED);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
- so = SCTP_INP_SO(inp);
- atomic_add_int(&stcb->asoc.refcnt, 1);
- SCTP_TCB_UNLOCK(stcb);
- SCTP_SOCKET_LOCK(so, 1);
- SCTP_TCB_LOCK(stcb);
- atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ so = SCTP_INP_SO(inp);
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
#endif
- (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_2);
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_2);
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
- SCTP_SOCKET_UNLOCK(so, 1);
- /* SCTP_TCB_UNLOCK(stcb); MT: I think this is not needed. */
+ SCTP_SOCKET_UNLOCK(so, 1);
#endif
- /* no need to unlock here, since the TCB is gone */
- } else {
+ } else {
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ break;
+ case ICMP6_PACKET_TOO_BIG:
+ if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
+ timer_stopped = 1;
+ sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_1);
+ } else {
+ timer_stopped = 0;
+ }
+ /* Update the path MTU. */
+ if (net->mtu > next_mtu) {
+ net->mtu = next_mtu;
+ if (net->port) {
+ net->mtu -= sizeof(struct udphdr);
+ }
+ }
+ /* Update the association MTU */
+ if (stcb->asoc.smallest_mtu > next_mtu) {
+ sctp_pathmtu_adjustment(stcb, next_mtu);
+ }
+ /* Finally, start the PMTU timer if it was running before. */
+ if (timer_stopped) {
+ sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net);
+ }
SCTP_TCB_UNLOCK(stcb);
+ break;
+ default:
+ SCTP_TCB_UNLOCK(stcb);
+ break;
}
}
-
-
void
sctp6_ctlinput(int cmd, struct sockaddr *pktdst, void *d)
{
+ struct ip6ctlparam *ip6cp;
+ struct sctp_inpcb *inp;
+ struct sctp_tcb *stcb;
+ struct sctp_nets *net;
struct sctphdr sh;
- struct ip6ctlparam *ip6cp = NULL;
- uint32_t vrf_id;
-
- vrf_id = SCTP_DEFAULT_VRFID;
+ struct sockaddr_in6 src, dst;
if (pktdst->sa_family != AF_INET6 ||
- pktdst->sa_len != sizeof(struct sockaddr_in6))
+ pktdst->sa_len != sizeof(struct sockaddr_in6)) {
return;
-
- if ((unsigned)cmd >= PRC_NCMDS)
+ }
+ if ((unsigned)cmd >= PRC_NCMDS) {
return;
+ }
if (PRC_IS_REDIRECT(cmd)) {
d = NULL;
} else if (inet6ctlerrmap[cmd] == 0) {
return;
}
- /* if the parameter is from icmp6, decode it. */
+ /* If the parameter is from icmp6, decode it. */
if (d != NULL) {
ip6cp = (struct ip6ctlparam *)d;
} else {
ip6cp = (struct ip6ctlparam *)NULL;
}
- if (ip6cp) {
+ if (ip6cp != NULL) {
/*
* XXX: We assume that when IPV6 is non NULL, M and OFF are
* valid.
*/
- /* check if we can safely examine src and dst ports */
- struct sctp_inpcb *inp = NULL;
- struct sctp_tcb *stcb = NULL;
- struct sctp_nets *net = NULL;
- struct sockaddr_in6 final;
-
- if (ip6cp->ip6c_m == NULL)
+ if (ip6cp->ip6c_m == NULL) {
return;
-
+ }
+ /*
+ * Check if we can safely examine the ports and the
+ * verification tag of the SCTP common header.
+ */
+ if (ip6cp->ip6c_m->m_pkthdr.len <
+ (int32_t) (ip6cp->ip6c_off + offsetof(struct sctphdr, checksum))) {
+ return;
+ }
+ /* Copy out the port numbers and the verification tag. */
bzero(&sh, sizeof(sh));
- bzero(&final, sizeof(final));
+ m_copydata(ip6cp->ip6c_m,
+ ip6cp->ip6c_off,
+ sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint32_t),
+ (caddr_t)&sh);
+ memset(&src, 0, sizeof(struct sockaddr_in6));
+ src.sin6_family = AF_INET6;
+ src.sin6_len = sizeof(struct sockaddr_in6);
+ src.sin6_port = sh.src_port;
+ src.sin6_addr = ip6cp->ip6c_ip6->ip6_src;
+ if (in6_setscope(&src.sin6_addr, ip6cp->ip6c_m->m_pkthdr.rcvif, NULL) != 0) {
+ return;
+ }
+ memset(&dst, 0, sizeof(struct sockaddr_in6));
+ dst.sin6_family = AF_INET6;
+ dst.sin6_len = sizeof(struct sockaddr_in6);
+ dst.sin6_port = sh.dest_port;
+ dst.sin6_addr = ip6cp->ip6c_ip6->ip6_dst;
+ if (in6_setscope(&dst.sin6_addr, ip6cp->ip6c_m->m_pkthdr.rcvif, NULL) != 0) {
+ return;
+ }
inp = NULL;
net = NULL;
- m_copydata(ip6cp->ip6c_m, ip6cp->ip6c_off, sizeof(sh),
- (caddr_t)&sh);
- ip6cp->ip6c_src->sin6_port = sh.src_port;
- final.sin6_len = sizeof(final);
- final.sin6_family = AF_INET6;
- final.sin6_addr = ((struct sockaddr_in6 *)pktdst)->sin6_addr;
- final.sin6_port = sh.dest_port;
- stcb = sctp_findassociation_addr_sa((struct sockaddr *)&final,
- (struct sockaddr *)ip6cp->ip6c_src,
- &inp, &net, 1, vrf_id);
- /* inp's ref-count increased && stcb locked */
- if (stcb != NULL && inp && (inp->sctp_socket != NULL)) {
- if (cmd == PRC_MSGSIZE) {
- sctp6_notify_mbuf(inp,
- ip6cp->ip6c_icmp6,
- &sh,
- stcb,
- net);
- /* inp's ref-count reduced && stcb unlocked */
+ stcb = sctp_findassociation_addr_sa((struct sockaddr *)&dst,
+ (struct sockaddr *)&src,
+ &inp, &net, 1, SCTP_DEFAULT_VRFID);
+ if ((stcb != NULL) &&
+ (net != NULL) &&
+ (inp != NULL)) {
+ /* Check the verification tag */
+ if (ntohl(sh.v_tag) != 0) {
+ /*
+ * This must be the verification tag used
+ * for sending out packets. We don't
+ * consider packets reflecting the
+ * verification tag.
+ */
+ if (ntohl(sh.v_tag) != stcb->asoc.peer_vtag) {
+ SCTP_TCB_UNLOCK(stcb);
+ return;
+ }
} else {
- sctp6_notify(inp, ip6cp->ip6c_icmp6, &sh,
- (struct sockaddr *)&final,
- stcb, net);
- /* inp's ref-count reduced && stcb unlocked */
+ if (ip6cp->ip6c_m->m_pkthdr.len >=
+ ip6cp->ip6c_off + sizeof(struct sctphdr) +
+ sizeof(struct sctp_chunkhdr) +
+ offsetof(struct sctp_init, a_rwnd)) {
+ /*
+ * In this case we can check if we
+ * got an INIT chunk and if the
+ * initiate tag matches.
+ */
+ uint32_t initiate_tag;
+ uint8_t chunk_type;
+
+ m_copydata(ip6cp->ip6c_m,
+ ip6cp->ip6c_off +
+ sizeof(struct sctphdr),
+ sizeof(uint8_t),
+ (caddr_t)&chunk_type);
+ m_copydata(ip6cp->ip6c_m,
+ ip6cp->ip6c_off +
+ sizeof(struct sctphdr) +
+ sizeof(struct sctp_chunkhdr),
+ sizeof(uint32_t),
+ (caddr_t)&initiate_tag);
+ if ((chunk_type != SCTP_INITIATION) ||
+ (ntohl(initiate_tag) != stcb->asoc.my_vtag)) {
+ SCTP_TCB_UNLOCK(stcb);
+ return;
+ }
+ } else {
+ SCTP_TCB_UNLOCK(stcb);
+ return;
+ }
}
+ sctp6_notify(inp, stcb, net,
+ ip6cp->ip6c_icmp6->icmp6_type,
+ ip6cp->ip6c_icmp6->icmp6_code,
+ (uint16_t) ntohl(ip6cp->ip6c_icmp6->icmp6_mtu));
} else {
- if (PRC_IS_REDIRECT(cmd) && inp) {
- in6_rtchange((struct in6pcb *)inp,
- inet6ctlerrmap[cmd]);
- }
- if (inp) {
+ if ((stcb == NULL) && (inp != NULL)) {
/* reduce inp's ref-count */
SCTP_INP_WLOCK(inp);
SCTP_INP_DECR_REF(inp);
SCTP_INP_WUNLOCK(inp);
}
- if (stcb)
+ if (stcb) {
SCTP_TCB_UNLOCK(stcb);
+ }
}
}
}
@@ -848,7 +806,7 @@ sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
#ifdef INET
struct in6pcb *inp6;
struct sockaddr_in6 *sin6;
- struct sockaddr_storage ss;
+ union sctp_sockstore store;
#endif
@@ -932,8 +890,8 @@ sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
}
if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
/* convert v4-mapped into v4 addr */
- in6_sin6_2_sin((struct sockaddr_in *)&ss, sin6);
- addr = (struct sockaddr *)&ss;
+ in6_sin6_2_sin(&store.sin, sin6);
+ addr = &store.sa;
}
#endif /* INET */
/* Now do we connect? */
@@ -964,7 +922,9 @@ sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
return (EALREADY);
}
/* We are GOOD to go */
- stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id, p);
+ stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id,
+ inp->sctp_ep.pre_open_stream_count,
+ inp->sctp_ep.port, p);
SCTP_ASOC_CREATE_UNLOCK(inp);
if (stcb == NULL) {
/* Gak! no memory */
@@ -1023,7 +983,10 @@ sctp6_getaddr(struct socket *so, struct sockaddr **addr)
stcb = LIST_FIRST(&inp->sctp_asoc_list);
if (stcb == NULL) {
- goto notConn6;
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_FREE_SONAME(sin6);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ENOENT);
+ return (ENOENT);
}
fnd = 0;
sin_a6 = NULL;
@@ -1040,7 +1003,10 @@ sctp6_getaddr(struct socket *so, struct sockaddr **addr)
}
if ((!fnd) || (sin_a6 == NULL)) {
/* punt */
- goto notConn6;
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_FREE_SONAME(sin6);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ENOENT);
+ return (ENOENT);
}
vrf_id = inp->def_vrf_id;
sctp_ifa = sctp_source_address_selection(inp, stcb, (sctp_route_t *) & net->ro, net, 0, vrf_id);
@@ -1049,7 +1015,6 @@ sctp6_getaddr(struct socket *so, struct sockaddr **addr)
}
} else {
/* For the bound all case you get back 0 */
- notConn6:
memset(&sin6->sin6_addr, 0, sizeof(sin6->sin6_addr));
}
} else {
@@ -1061,7 +1026,7 @@ sctp6_getaddr(struct socket *so, struct sockaddr **addr)
if (laddr->ifa->address.sa.sa_family == AF_INET6) {
struct sockaddr_in6 *sin_a;
- sin_a = (struct sockaddr_in6 *)&laddr->ifa->address.sin6;
+ sin_a = &laddr->ifa->address.sin6;
sin6->sin6_addr = sin_a->sin6_addr;
fnd = 1;
break;
@@ -1138,8 +1103,11 @@ sctp6_peeraddr(struct socket *so, struct sockaddr **addr)
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ENOENT);
return (ENOENT);
}
- if ((error = sa6_recoverscope(sin6)) != 0)
+ if ((error = sa6_recoverscope(sin6)) != 0) {
+ SCTP_FREE_SONAME(sin6);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, error);
return (error);
+ }
*addr = (struct sockaddr *)sin6;
return (0);
}
@@ -1147,10 +1115,6 @@ sctp6_peeraddr(struct socket *so, struct sockaddr **addr)
static int
sctp6_in6getaddr(struct socket *so, struct sockaddr **nam)
{
-#ifdef INET
- struct sockaddr *addr;
-
-#endif
struct in6pcb *inp6 = sotoin6pcb(so);
int error;
@@ -1162,19 +1126,21 @@ sctp6_in6getaddr(struct socket *so, struct sockaddr **nam)
error = sctp6_getaddr(so, nam);
#ifdef INET
if (error) {
+ struct sockaddr_in6 *sin6;
+
/* try v4 next if v6 failed */
error = sctp_ingetaddr(so, nam);
if (error) {
return (error);
}
- addr = *nam;
- /* if I'm V6ONLY, convert it to v4-mapped */
- if (SCTP_IPV6_V6ONLY(inp6)) {
- struct sockaddr_in6 sin6;
-
- in6_sin_2_v4mapsin6((struct sockaddr_in *)addr, &sin6);
- memcpy(addr, &sin6, sizeof(struct sockaddr_in6));
+ SCTP_MALLOC_SONAME(sin6, struct sockaddr_in6 *, sizeof *sin6);
+ if (sin6 == NULL) {
+ SCTP_FREE_SONAME(*nam);
+ return (ENOMEM);
}
+ in6_sin_2_v4mapsin6((struct sockaddr_in *)*nam, sin6);
+ SCTP_FREE_SONAME(*nam);
+ *nam = (struct sockaddr *)sin6;
}
#endif
return (error);
@@ -1184,10 +1150,6 @@ sctp6_in6getaddr(struct socket *so, struct sockaddr **nam)
static int
sctp6_getpeeraddr(struct socket *so, struct sockaddr **nam)
{
-#ifdef INET
- struct sockaddr *addr;
-
-#endif
struct in6pcb *inp6 = sotoin6pcb(so);
int error;
@@ -1199,19 +1161,21 @@ sctp6_getpeeraddr(struct socket *so, struct sockaddr **nam)
error = sctp6_peeraddr(so, nam);
#ifdef INET
if (error) {
+ struct sockaddr_in6 *sin6;
+
/* try v4 next if v6 failed */
error = sctp_peeraddr(so, nam);
if (error) {
return (error);
}
- addr = *nam;
- /* if I'm V6ONLY, convert it to v4-mapped */
- if (SCTP_IPV6_V6ONLY(inp6)) {
- struct sockaddr_in6 sin6;
-
- in6_sin_2_v4mapsin6((struct sockaddr_in *)addr, &sin6);
- memcpy(addr, &sin6, sizeof(struct sockaddr_in6));
+ SCTP_MALLOC_SONAME(sin6, struct sockaddr_in6 *, sizeof *sin6);
+ if (sin6 == NULL) {
+ SCTP_FREE_SONAME(*nam);
+ return (ENOMEM);
}
+ in6_sin_2_v4mapsin6((struct sockaddr_in *)*nam, sin6);
+ SCTP_FREE_SONAME(*nam);
+ *nam = (struct sockaddr *)sin6;
}
#endif
return (error);
diff --git a/freebsd/sys/netinet6/sctp6_var.h b/freebsd/sys/netinet6/sctp6_var.h
index 79d4c52b..782567c5 100644
--- a/freebsd/sys/netinet6/sctp6_var.h
+++ b/freebsd/sys/netinet6/sctp6_var.h
@@ -47,10 +47,9 @@ int
sctp6_output(struct sctp_inpcb *, struct mbuf *, struct sockaddr *,
struct mbuf *, struct proc *);
void sctp6_ctlinput(int, struct sockaddr *, void *);
-extern void
-sctp6_notify(struct sctp_inpcb *, struct icmp6_hdr *,
- struct sctphdr *, struct sockaddr *,
- struct sctp_tcb *, struct sctp_nets *);
+void
+sctp6_notify(struct sctp_inpcb *, struct sctp_tcb *, struct sctp_nets *,
+ uint8_t, uint8_t, uint16_t);
#endif
#endif
diff --git a/freebsd/sys/netinet6/udp6_usrreq.c b/freebsd/sys/netinet6/udp6_usrreq.c
index 8342cf7c..790bed2b 100644
--- a/freebsd/sys/netinet6/udp6_usrreq.c
+++ b/freebsd/sys/netinet6/udp6_usrreq.c
@@ -3,6 +3,7 @@
/*-
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* Copyright (c) 2010-2011 Juniper Networks, Inc.
+ * Copyright (c) 2014 Kevin Lo
* All rights reserved.
*
* Portions of this software were developed by Robert N. M. Watson under
@@ -73,8 +74,8 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
-#include <rtems/bsd/local/opt_ipfw.h>
#include <rtems/bsd/local/opt_ipsec.h>
+#include <rtems/bsd/local/opt_rss.h>
#include <rtems/bsd/sys/param.h>
#include <sys/jail.h>
@@ -84,6 +85,7 @@ __FBSDID("$FreeBSD$");
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
+#include <sys/sdt.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
@@ -93,10 +95,13 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/if_types.h>
#include <net/route.h>
+#include <net/rss_config.h>
#include <netinet/in.h>
+#include <netinet/in_kdtrace.h>
#include <netinet/in_pcb.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
@@ -108,10 +113,12 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip_var.h>
#include <netinet/udp.h>
#include <netinet/udp_var.h>
+#include <netinet/udplite.h>
#include <netinet6/ip6protosw.h>
#include <netinet6/ip6_var.h>
#include <netinet6/in6_pcb.h>
+#include <netinet6/in6_rss.h>
#include <netinet6/udp6_var.h>
#include <netinet6/scope6_var.h>
@@ -130,27 +137,39 @@ __FBSDID("$FreeBSD$");
extern struct protosw inetsw[];
static void udp6_detach(struct socket *so);
-static void
+static int
udp6_append(struct inpcb *inp, struct mbuf *n, int off,
struct sockaddr_in6 *fromsa)
{
struct socket *so;
struct mbuf *opts;
+ struct udpcb *up;
INP_LOCK_ASSERT(inp);
+ /*
+ * Engage the tunneling protocol.
+ */
+ up = intoudpcb(inp);
+ if (up->u_tun_func != NULL) {
+ in_pcbref(inp);
+ INP_RUNLOCK(inp);
+ (*up->u_tun_func)(n, off, inp, (struct sockaddr *)fromsa,
+ up->u_tun_ctx);
+ INP_RLOCK(inp);
+ return (in_pcbrele_rlocked(inp));
+ }
#ifdef IPSEC
/* Check AH/ESP integrity. */
if (ipsec6_in_reject(n, inp)) {
m_freem(n);
- IPSEC6STAT_INC(in_polvio);
- return;
+ return (0);
}
#endif /* IPSEC */
#ifdef MAC
if (mac_inpcb_check_deliver(inp, n) != 0) {
m_freem(n);
- return;
+ return (0);
}
#endif
opts = NULL;
@@ -170,6 +189,7 @@ udp6_append(struct inpcb *inp, struct mbuf *n, int off,
UDPSTAT_INC(udps_fullsock);
} else
sorwakeup_locked(so);
+ return (0);
}
int
@@ -180,22 +200,19 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
struct ip6_hdr *ip6;
struct udphdr *uh;
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
struct udpcb *up;
int off = *offp;
+ int cscov_partial;
int plen, ulen;
struct sockaddr_in6 fromsa;
struct m_tag *fwd_tag;
uint16_t uh_sum;
+ uint8_t nxt;
ifp = m->m_pkthdr.rcvif;
ip6 = mtod(m, struct ip6_hdr *);
- if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) {
- /* XXX send icmp6 host/port unreach? */
- m_freem(m);
- return (IPPROTO_DONE);
- }
-
#ifndef PULLDOWN_TEST
IP6_EXTHDR_CHECK(m, off, sizeof(struct udphdr), IPPROTO_DONE);
ip6 = mtod(m, struct ip6_hdr *);
@@ -217,28 +234,43 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
plen = ntohs(ip6->ip6_plen) - off + sizeof(*ip6);
ulen = ntohs((u_short)uh->uh_ulen);
- if (plen != ulen) {
- UDPSTAT_INC(udps_badlen);
- goto badunlocked;
- }
-
- /*
- * Checksum extended UDP header and data.
- */
- if (uh->uh_sum == 0) {
- UDPSTAT_INC(udps_nosum);
- goto badunlocked;
+ nxt = proto;
+ cscov_partial = (nxt == IPPROTO_UDPLITE) ? 1 : 0;
+ if (nxt == IPPROTO_UDPLITE) {
+ /* Zero means checksum over the complete packet. */
+ if (ulen == 0)
+ ulen = plen;
+ if (ulen == plen)
+ cscov_partial = 0;
+ if ((ulen < sizeof(struct udphdr)) || (ulen > plen)) {
+ /* XXX: What is the right UDPLite MIB counter? */
+ goto badunlocked;
+ }
+ if (uh->uh_sum == 0) {
+ /* XXX: What is the right UDPLite MIB counter? */
+ goto badunlocked;
+ }
+ } else {
+ if ((ulen < sizeof(struct udphdr)) || (plen != ulen)) {
+ UDPSTAT_INC(udps_badlen);
+ goto badunlocked;
+ }
+ if (uh->uh_sum == 0) {
+ UDPSTAT_INC(udps_nosum);
+ goto badunlocked;
+ }
}
- if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) {
+ if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) &&
+ !cscov_partial) {
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
uh_sum = m->m_pkthdr.csum_data;
else
- uh_sum = in6_cksum_pseudo(ip6, ulen,
- IPPROTO_UDP, m->m_pkthdr.csum_data);
+ uh_sum = in6_cksum_pseudo(ip6, ulen, nxt,
+ m->m_pkthdr.csum_data);
uh_sum ^= 0xffff;
} else
- uh_sum = in6_cksum(m, IPPROTO_UDP, off, ulen);
+ uh_sum = in6_cksum_partial(m, nxt, off, plen, ulen);
if (uh_sum != 0) {
UDPSTAT_INC(udps_badsum);
@@ -251,11 +283,13 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
init_sin6(&fromsa, m);
fromsa.sin6_port = uh->uh_sport;
+ pcbinfo = udp_get_inpcbinfo(nxt);
if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
struct inpcb *last;
+ struct inpcbhead *pcblist;
struct ip6_moptions *imo;
- INP_INFO_RLOCK(&V_udbinfo);
+ INP_INFO_RLOCK(pcbinfo);
/*
* In the event that laddr should be set to the link-local
* address (this happens in RIPng), the multicast address
@@ -271,8 +305,9 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
* here. We need udphdr for IPsec processing so we do that
* later.
*/
+ pcblist = udp_get_pcblist(nxt);
last = NULL;
- LIST_FOREACH(inp, &V_udb, inp_list) {
+ LIST_FOREACH(inp, pcblist, inp_list) {
if ((inp->inp_vflag & INP_IPV6) == 0)
continue;
if (inp->inp_lport != uh->uh_dport)
@@ -335,20 +370,10 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
if ((n = m_copy(m, 0, M_COPYALL)) != NULL) {
INP_RLOCK(last);
- up = intoudpcb(last);
- if (up->u_tun_func == NULL) {
- udp6_append(last, n, off, &fromsa);
- } else {
- /*
- * Engage the tunneling
- * protocol we will have to
- * leave the info_lock up,
- * since we are hunting
- * through multiple UDP's.
- *
- */
- (*up->u_tun_func)(n, off, last);
- }
+ UDP_PROBE(receive, NULL, last, ip6,
+ last, uh);
+ if (udp6_append(last, n, off, &fromsa))
+ goto inp_lost;
INP_RUNLOCK(last);
}
}
@@ -377,17 +402,11 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
goto badheadlocked;
}
INP_RLOCK(last);
- INP_INFO_RUNLOCK(&V_udbinfo);
- up = intoudpcb(last);
- if (up->u_tun_func == NULL) {
- udp6_append(last, m, off, &fromsa);
- } else {
- /*
- * Engage the tunneling protocol.
- */
- (*up->u_tun_func)(m, off, last);
- }
- INP_RUNLOCK(last);
+ INP_INFO_RUNLOCK(pcbinfo);
+ UDP_PROBE(receive, NULL, last, ip6, last, uh);
+ if (udp6_append(last, m, off, &fromsa) == 0)
+ INP_RUNLOCK(last);
+ inp_lost:
return (IPPROTO_DONE);
}
/*
@@ -407,8 +426,8 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
* Transparently forwarded. Pretend to be the destination.
* Already got one like this?
*/
- inp = in6_pcblookup_mbuf(&V_udbinfo,
- &ip6->ip6_src, uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
+ inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src,
+ uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif, m);
if (!inp) {
/*
@@ -416,7 +435,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
* Because we've rewritten the destination address,
* any hardware-generated hash is ignored.
*/
- inp = in6_pcblookup(&V_udbinfo, &ip6->ip6_src,
+ inp = in6_pcblookup(pcbinfo, &ip6->ip6_src,
uh->uh_sport, &next_hop6->sin6_addr,
next_hop6->sin6_port ? htons(next_hop6->sin6_port) :
uh->uh_dport, INPLOOKUP_WILDCARD |
@@ -426,7 +445,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
m_tag_delete(m, fwd_tag);
m->m_flags &= ~M_IP6_NEXTHOP;
} else
- inp = in6_pcblookup_mbuf(&V_udbinfo, &ip6->ip6_src,
+ inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src,
uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB,
m->m_pkthdr.rcvif, m);
@@ -457,28 +476,29 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
}
INP_RLOCK_ASSERT(inp);
up = intoudpcb(inp);
- if (up->u_tun_func == NULL) {
- udp6_append(inp, m, off, &fromsa);
- } else {
- /*
- * Engage the tunneling protocol.
- */
-
- (*up->u_tun_func)(m, off, inp);
+ if (cscov_partial) {
+ if (up->u_rxcslen == 0 || up->u_rxcslen > ulen) {
+ INP_RUNLOCK(inp);
+ m_freem(m);
+ return (IPPROTO_DONE);
+ }
}
- INP_RUNLOCK(inp);
+ UDP_PROBE(receive, NULL, inp, ip6, inp, uh);
+ if (udp6_append(inp, m, off, &fromsa) == 0)
+ INP_RUNLOCK(inp);
return (IPPROTO_DONE);
badheadlocked:
- INP_INFO_RUNLOCK(&V_udbinfo);
+ INP_INFO_RUNLOCK(pcbinfo);
badunlocked:
if (m)
m_freem(m);
return (IPPROTO_DONE);
}
-void
-udp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
+static void
+udp6_common_ctlinput(int cmd, struct sockaddr *sa, void *d,
+ struct inpcbinfo *pcbinfo)
{
struct udphdr uh;
struct ip6_hdr *ip6;
@@ -534,14 +554,51 @@ udp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
bzero(&uh, sizeof(uh));
m_copydata(m, off, sizeof(*uhp), (caddr_t)&uh);
- (void) in6_pcbnotify(&V_udbinfo, sa, uh.uh_dport,
+ if (!PRC_IS_REDIRECT(cmd)) {
+ /* Check to see if its tunneled */
+ struct inpcb *inp;
+ inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_dst,
+ uh.uh_dport, &ip6->ip6_src, uh.uh_sport,
+ INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB,
+ m->m_pkthdr.rcvif, m);
+ if (inp != NULL) {
+ struct udpcb *up;
+
+ up = intoudpcb(inp);
+ if (up->u_icmp_func) {
+ /* Yes it is. */
+ INP_RUNLOCK(inp);
+ (*up->u_icmp_func)(cmd, (struct sockaddr *)ip6cp->ip6c_src,
+ d, up->u_tun_ctx);
+ return;
+ } else {
+ /* Can't find it. */
+ INP_RUNLOCK(inp);
+ }
+ }
+ }
+ (void)in6_pcbnotify(pcbinfo, sa, uh.uh_dport,
(struct sockaddr *)ip6cp->ip6c_src, uh.uh_sport, cmd,
cmdarg, notify);
} else
- (void) in6_pcbnotify(&V_udbinfo, sa, 0,
+ (void)in6_pcbnotify(pcbinfo, sa, 0,
(const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify);
}
+void
+udp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
+{
+
+ return (udp6_common_ctlinput(cmd, sa, d, &V_udbinfo));
+}
+
+void
+udplite6_ctlinput(int cmd, struct sockaddr *sa, void *d)
+{
+
+ return (udp6_common_ctlinput(cmd, sa, d, &V_ulitecbinfo));
+}
+
static int
udp6_getcred(SYSCTL_HANDLER_ARGS)
{
@@ -598,10 +655,12 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
struct udphdr *udp6;
struct in6_addr *laddr, *faddr, in6a;
struct sockaddr_in6 *sin6 = NULL;
- struct ifnet *oifp = NULL;
+ int cscov_partial = 0;
int scope_ambiguous = 0;
u_short fport;
int error = 0;
+ uint8_t nxt;
+ uint16_t cscov = 0;
struct ip6_pktopts *optp, opt;
int af = AF_INET6, hlen = sizeof(struct ip6_hdr);
int flags;
@@ -632,9 +691,11 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
return (error);
}
+ nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
+ IPPROTO_UDP : IPPROTO_UDPLITE;
if (control) {
if ((error = ip6_setpktopts(control, &opt,
- inp->in6p_outputopts, td->td_ucred, IPPROTO_UDP)) != 0)
+ inp->in6p_outputopts, td->td_ucred, nxt)) != 0)
goto release;
optp = &opt;
} else
@@ -644,8 +705,6 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
faddr = &sin6->sin6_addr;
/*
- * IPv4 version of udp_output calls in_pcbconnect in this case,
- * which needs splnet and affects performance.
* Since we saw no essential reason for calling in_pcbconnect,
* we get rid of such kind of logic, and call in6_selectsrc
* and in6_pcbsetport in order to fill in the local address
@@ -695,15 +754,10 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
}
if (!IN6_IS_ADDR_V4MAPPED(faddr)) {
- error = in6_selectsrc(sin6, optp, inp, NULL,
- td->td_ucred, &oifp, &in6a);
+ error = in6_selectsrc_socket(sin6, optp, inp,
+ td->td_ucred, scope_ambiguous, &in6a, NULL);
if (error)
goto release;
- if (oifp && scope_ambiguous &&
- (error = in6_setscope(&sin6->sin6_addr,
- oifp, NULL))) {
- goto release;
- }
laddr = &in6a;
} else
laddr = &inp->in6p_laddr; /* XXX */
@@ -751,8 +805,8 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
* Calculate data length and get a mbuf
* for UDP and IP6 headers.
*/
- M_PREPEND(m, hlen + sizeof(struct udphdr), M_DONTWAIT);
- if (m == 0) {
+ M_PREPEND(m, hlen + sizeof(struct udphdr), M_NOWAIT);
+ if (m == NULL) {
error = ENOBUFS;
goto release;
}
@@ -763,7 +817,20 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
udp6 = (struct udphdr *)(mtod(m, caddr_t) + hlen);
udp6->uh_sport = inp->inp_lport; /* lport is always set in the PCB */
udp6->uh_dport = fport;
- if (plen <= 0xffff)
+ if (nxt == IPPROTO_UDPLITE) {
+ struct udpcb *up;
+
+ up = intoudpcb(inp);
+ cscov = up->u_txcslen;
+ if (cscov >= plen)
+ cscov = 0;
+ udp6->uh_ulen = htons(cscov);
+ /*
+ * For UDP-Lite, checksum coverage length of zero means
+ * the entire UDPLite packet is covered by the checksum.
+ */
+ cscov_partial = (cscov == 0) ? 0 : 1;
+ } else if (plen <= 0xffff)
udp6->uh_ulen = htons((u_short)plen);
else
udp6->uh_ulen = 0;
@@ -775,23 +842,66 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
ip6->ip6_flow = inp->inp_flow & IPV6_FLOWINFO_MASK;
ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
ip6->ip6_vfc |= IPV6_VERSION;
-#if 0 /* ip6_plen will be filled in ip6_output. */
ip6->ip6_plen = htons((u_short)plen);
-#endif
- ip6->ip6_nxt = IPPROTO_UDP;
+ ip6->ip6_nxt = nxt;
ip6->ip6_hlim = in6_selecthlim(inp, NULL);
ip6->ip6_src = *laddr;
ip6->ip6_dst = *faddr;
- udp6->uh_sum = in6_cksum_pseudo(ip6, plen, IPPROTO_UDP, 0);
- m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
- m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+ if (cscov_partial) {
+ if ((udp6->uh_sum = in6_cksum_partial(m, nxt,
+ sizeof(struct ip6_hdr), plen, cscov)) == 0)
+ udp6->uh_sum = 0xffff;
+ } else {
+ udp6->uh_sum = in6_cksum_pseudo(ip6, plen, nxt, 0);
+ m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
+ m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+ }
+
+#ifdef RSS
+ {
+ uint32_t hash_val, hash_type;
+ uint8_t pr;
+ pr = inp->inp_socket->so_proto->pr_protocol;
+ /*
+ * Calculate an appropriate RSS hash for UDP and
+ * UDP Lite.
+ *
+ * The called function will take care of figuring out
+ * whether a 2-tuple or 4-tuple hash is required based
+ * on the currently configured scheme.
+ *
+ * Later later on connected socket values should be
+ * cached in the inpcb and reused, rather than constantly
+ * re-calculating it.
+ *
+ * UDP Lite is a different protocol number and will
+ * likely end up being hashed as a 2-tuple until
+ * RSS / NICs grow UDP Lite protocol awareness.
+ */
+ if (rss_proto_software_hash_v6(faddr, laddr, fport,
+ inp->inp_lport, pr, &hash_val, &hash_type) == 0) {
+ m->m_pkthdr.flowid = hash_val;
+ M_HASHTYPE_SET(m, hash_type);
+ }
+ }
+#endif
flags = 0;
+#ifdef RSS
+ /*
+ * Don't override with the inp cached flowid.
+ *
+ * Until the whole UDP path is vetted, it may actually
+ * be incorrect.
+ */
+ flags |= IP_NODEFAULTFLOWID;
+#endif
+ UDP_PROBE(send, NULL, inp, ip6, inp, udp6);
UDPSTAT_INC(udps_opackets);
- error = ip6_output(m, optp, NULL, flags, inp->in6p_moptions,
- NULL, inp);
+ error = ip6_output(m, optp, NULL, flags,
+ inp->in6p_moptions, NULL, inp);
break;
case AF_INET:
error = EAFNOSUPPORT;
@@ -814,26 +924,32 @@ static void
udp6_abort(struct socket *so)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
+ pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp6_abort: inp == NULL"));
+ INP_WLOCK(inp);
#ifdef INET
if (inp->inp_vflag & INP_IPV4) {
struct pr_usrreqs *pru;
+ uint8_t nxt;
- pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
+ nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
+ IPPROTO_UDP : IPPROTO_UDPLITE;
+ INP_WUNLOCK(inp);
+ pru = inetsw[ip_protox[nxt]].pr_usrreqs;
(*pru->pru_abort)(so);
return;
}
#endif
- INP_WLOCK(inp);
if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
in6_pcbdisconnect(inp);
inp->in6p_laddr = in6addr_any;
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
soisdisconnected(so);
}
INP_WUNLOCK(inp);
@@ -843,8 +959,10 @@ static int
udp6_attach(struct socket *so, int proto, struct thread *td)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
int error;
+ pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp == NULL, ("udp6_attach: inp != NULL"));
@@ -853,10 +971,10 @@ udp6_attach(struct socket *so, int proto, struct thread *td)
if (error)
return (error);
}
- INP_INFO_WLOCK(&V_udbinfo);
- error = in_pcballoc(so, &V_udbinfo);
+ INP_INFO_WLOCK(pcbinfo);
+ error = in_pcballoc(so, pcbinfo);
if (error) {
- INP_INFO_WUNLOCK(&V_udbinfo);
+ INP_INFO_WUNLOCK(pcbinfo);
return (error);
}
inp = (struct inpcb *)so->so_pcb;
@@ -877,11 +995,11 @@ udp6_attach(struct socket *so, int proto, struct thread *td)
if (error) {
in_pcbdetach(inp);
in_pcbfree(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
+ INP_INFO_WUNLOCK(pcbinfo);
return (error);
}
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
+ INP_INFO_WUNLOCK(pcbinfo);
return (0);
}
@@ -889,13 +1007,15 @@ static int
udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
int error;
+ pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp6_bind: inp == NULL"));
INP_WLOCK(inp);
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
inp->inp_vflag &= ~INP_IPV4;
inp->inp_vflag |= INP_IPV6;
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
@@ -923,7 +1043,7 @@ udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
#ifdef INET
out:
#endif
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
INP_WUNLOCK(inp);
return (error);
}
@@ -932,25 +1052,31 @@ static void
udp6_close(struct socket *so)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
+ pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp6_close: inp == NULL"));
+ INP_WLOCK(inp);
#ifdef INET
if (inp->inp_vflag & INP_IPV4) {
struct pr_usrreqs *pru;
+ uint8_t nxt;
- pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
+ nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
+ IPPROTO_UDP : IPPROTO_UDPLITE;
+ INP_WUNLOCK(inp);
+ pru = inetsw[ip_protox[nxt]].pr_usrreqs;
(*pru->pru_disconnect)(so);
return;
}
#endif
- INP_WLOCK(inp);
if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
in6_pcbdisconnect(inp);
inp->in6p_laddr = in6addr_any;
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
soisdisconnected(so);
}
INP_WUNLOCK(inp);
@@ -960,9 +1086,11 @@ static int
udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
struct sockaddr_in6 *sin6;
int error;
+ pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
sin6 = (struct sockaddr_in6 *)nam;
KASSERT(inp != NULL, ("udp6_connect: inp == NULL"));
@@ -989,10 +1117,10 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
error = prison_remote_ip4(td->td_ucred, &sin.sin_addr);
if (error != 0)
goto out;
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
error = in_pcbconnect(inp, (struct sockaddr *)&sin,
td->td_ucred);
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
if (error == 0)
soisconnected(so);
goto out;
@@ -1007,9 +1135,9 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr);
if (error != 0)
goto out;
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
error = in6_pcbconnect(inp, nam, td->td_ucred);
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
if (error == 0)
soisconnected(so);
out:
@@ -1021,18 +1149,20 @@ static void
udp6_detach(struct socket *so)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
struct udpcb *up;
+ pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp6_detach: inp == NULL"));
- INP_INFO_WLOCK(&V_udbinfo);
+ INP_INFO_WLOCK(pcbinfo);
INP_WLOCK(inp);
up = intoudpcb(inp);
KASSERT(up != NULL, ("%s: up == NULL", __func__));
in_pcbdetach(inp);
in_pcbfree(inp);
- INP_INFO_WUNLOCK(&V_udbinfo);
+ INP_INFO_WUNLOCK(pcbinfo);
udp_discardcb(up);
}
@@ -1040,32 +1170,37 @@ static int
udp6_disconnect(struct socket *so)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
int error;
+ pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp6_disconnect: inp == NULL"));
+ INP_WLOCK(inp);
#ifdef INET
if (inp->inp_vflag & INP_IPV4) {
struct pr_usrreqs *pru;
+ uint8_t nxt;
- pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
+ nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
+ IPPROTO_UDP : IPPROTO_UDPLITE;
+ INP_WUNLOCK(inp);
+ pru = inetsw[ip_protox[nxt]].pr_usrreqs;
(void)(*pru->pru_disconnect)(so);
return (0);
}
#endif
- INP_WLOCK(inp);
-
if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
error = ENOTCONN;
goto out;
}
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
in6_pcbdisconnect(inp);
inp->in6p_laddr = in6addr_any;
- INP_HASH_WUNLOCK(&V_udbinfo);
+ INP_HASH_WUNLOCK(pcbinfo);
SOCK_LOCK(so);
so->so_state &= ~SS_ISCONNECTED; /* XXX */
SOCK_UNLOCK(so);
@@ -1079,8 +1214,10 @@ udp6_send(struct socket *so, int flags, struct mbuf *m,
struct sockaddr *addr, struct mbuf *control, struct thread *td)
{
struct inpcb *inp;
+ struct inpcbinfo *pcbinfo;
int error = 0;
+ pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("udp6_send: inp == NULL"));
@@ -1099,9 +1236,9 @@ udp6_send(struct socket *so, int flags, struct mbuf *m,
#ifdef INET
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
int hasv4addr;
- struct sockaddr_in6 *sin6 = 0;
+ struct sockaddr_in6 *sin6 = NULL;
- if (addr == 0)
+ if (addr == NULL)
hasv4addr = (inp->inp_vflag & INP_IPV4);
else {
sin6 = (struct sockaddr_in6 *)addr;
@@ -1110,7 +1247,10 @@ udp6_send(struct socket *so, int flags, struct mbuf *m,
}
if (hasv4addr) {
struct pr_usrreqs *pru;
+ uint8_t nxt;
+ nxt = (inp->inp_socket->so_proto->pr_protocol ==
+ IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE;
/*
* XXXRW: We release UDP-layer locks before calling
* udp_send() in order to avoid recursion. However,
@@ -1122,7 +1262,7 @@ udp6_send(struct socket *so, int flags, struct mbuf *m,
INP_WUNLOCK(inp);
if (sin6)
in6_sin6_2_sin_in_sock(addr);
- pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
+ pru = inetsw[ip_protox[nxt]].pr_usrreqs;
/* addr will just be freed in sendit(). */
return ((*pru->pru_send)(so, flags, m, addr, control,
td));
@@ -1132,11 +1272,9 @@ udp6_send(struct socket *so, int flags, struct mbuf *m,
#ifdef MAC
mac_inpcb_create_mbuf(inp, m);
#endif
- INP_HASH_WLOCK(&V_udbinfo);
+ INP_HASH_WLOCK(pcbinfo);
error = udp6_output(inp, m, addr, control, td);
- INP_HASH_WUNLOCK(&V_udbinfo);
-#ifdef INET
-#endif
+ INP_HASH_WUNLOCK(pcbinfo);
INP_WUNLOCK(inp);
return (error);
diff --git a/freebsd/sys/netinet6/udp6_var.h b/freebsd/sys/netinet6/udp6_var.h
index ae53c5a8..cdab98b0 100644
--- a/freebsd/sys/netinet6/udp6_var.h
+++ b/freebsd/sys/netinet6/udp6_var.h
@@ -69,6 +69,7 @@ SYSCTL_DECL(_net_inet6_udp6);
extern struct pr_usrreqs udp6_usrreqs;
void udp6_ctlinput(int, struct sockaddr *, void *);
+void udplite6_ctlinput(int, struct sockaddr *, void *);
int udp6_input(struct mbuf **, int *, int);
#endif