diff options
author | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2016-10-07 15:10:20 +0200 |
---|---|---|
committer | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2017-01-10 09:53:31 +0100 |
commit | c40e45b75eb76d79a05c7fa85c1fa9b5c728a12f (patch) | |
tree | ad4f2519067709f00ab98b3c591186c26dc3a21f /freebsd/sys/netinet6 | |
parent | userspace-header-gen.py: Simplify program ports (diff) | |
download | rtems-libbsd-c40e45b75eb76d79a05c7fa85c1fa9b5c728a12f.tar.bz2 |
Update to FreeBSD head 2016-08-23
Git mirror commit 9fe7c416e6abb28b1398fd3e5687099846800cfd.
Diffstat (limited to 'freebsd/sys/netinet6')
46 files changed, 6126 insertions, 5707 deletions
diff --git a/freebsd/sys/netinet6/dest6.c b/freebsd/sys/netinet6/dest6.c index c8c6f547..94386ddd 100644 --- a/freebsd/sys/netinet6/dest6.c +++ b/freebsd/sys/netinet6/dest6.c @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include <sys/kernel.h> #include <net/if.h> +#include <net/if_var.h> #include <net/route.h> #include <netinet/in.h> diff --git a/freebsd/sys/netinet6/frag6.c b/freebsd/sys/netinet6/frag6.c index 511c8601..4cbd3000 100644 --- a/freebsd/sys/netinet6/frag6.c +++ b/freebsd/sys/netinet6/frag6.c @@ -34,11 +34,14 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include <rtems/bsd/local/opt_rss.h> + #include <rtems/bsd/sys/param.h> #include <sys/systm.h> #include <sys/malloc.h> #include <sys/mbuf.h> #include <sys/domain.h> +#include <sys/eventhandler.h> #include <sys/protosw.h> #include <sys/socket.h> #include <rtems/bsd/sys/errno.h> @@ -47,6 +50,8 @@ __FBSDID("$FreeBSD$"); #include <sys/syslog.h> #include <net/if.h> +#include <net/if_var.h> +#include <net/netisr.h> #include <net/route.h> #include <net/vnet.h> @@ -60,13 +65,6 @@ __FBSDID("$FreeBSD$"); #include <security/mac/mac_framework.h> -/* - * Define it to get a correct behavior on per-interface statistics. - * You will need to perform an extra routing table lookup, per fragment, - * to do it. This may, or may not be, a performance hit. - */ -#define IN6_IFSTAT_STRICT - static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *); static void frag6_deq(struct ip6asfrag *); static void frag6_insque(struct ip6q *, struct ip6q *); @@ -139,7 +137,7 @@ frag6_init(void) * fragment's Fragment header. * -> should grab it from the first fragment only * - * The following note also contradicts with fragment rule - noone is going to + * The following note also contradicts with fragment rule - no one is going to * send different fragment with different next header field. * * additional note (p22): @@ -161,14 +159,17 @@ frag6_input(struct mbuf **mp, int *offp, int proto) struct ip6_frag *ip6f; struct ip6q *q6; struct ip6asfrag *af6, *ip6af, *af6dwn; -#ifdef IN6_IFSTAT_STRICT struct in6_ifaddr *ia; -#endif int offset = *offp, nxt, i, next; int first_frag = 0; int fragoff, frgpartlen; /* must be larger than u_int16_t */ struct ifnet *dstifp; u_int8_t ecn, ecn0; +#ifdef RSS + struct m_tag *mtag; + struct ip6_direct_ctx *ip6dc; +#endif + #if 0 char ip6buf[INET6_ADDRSTRLEN]; #endif @@ -184,18 +185,12 @@ frag6_input(struct mbuf **mp, int *offp, int proto) #endif dstifp = NULL; -#ifdef IN6_IFSTAT_STRICT /* find the destination interface of the packet. */ - if ((ia = ip6_getdstifaddr(m)) != NULL) { + ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); + if (ia != NULL) { dstifp = ia->ia_ifp; ifa_free(&ia->ia_ifa); } -#else - /* we are violating the spec, this is not the destination interface */ - if ((m->m_flags & M_PKTHDR) != 0) - dstifp = m->m_pkthdr.rcvif; -#endif - /* jumbo payload can't contain a fragment header */ if (ip6->ip6_plen == 0) { icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset); @@ -539,8 +534,8 @@ insert: frag6_deq(af6); while (t->m_next) t = t->m_next; - t->m_next = IP6_REASS_MBUF(af6); - m_adj(t->m_next, af6->ip6af_offset); + m_adj(IP6_REASS_MBUF(af6), af6->ip6af_offset); + m_cat(t, IP6_REASS_MBUF(af6)); free(af6, M_FTABLE); af6 = af6dwn; } @@ -557,27 +552,16 @@ insert: *q6->ip6q_nxtp = (u_char)(nxt & 0xff); #endif - /* Delete frag6 header */ - if (m->m_len >= offset + sizeof(struct ip6_frag)) { - /* This is the only possible case with !PULLDOWN_TEST */ - ovbcopy((caddr_t)ip6, (caddr_t)ip6 + sizeof(struct ip6_frag), - offset); - m->m_data += sizeof(struct ip6_frag); - m->m_len -= sizeof(struct ip6_frag); - } else { - /* this comes with no copy if the boundary is on cluster */ - if ((t = m_split(m, offset, M_DONTWAIT)) == NULL) { - frag6_remque(q6); - V_frag6_nfrags -= q6->ip6q_nfrag; + if (ip6_deletefraghdr(m, offset, M_NOWAIT) != 0) { + frag6_remque(q6); + V_frag6_nfrags -= q6->ip6q_nfrag; #ifdef MAC - mac_ip6q_destroy(q6); + mac_ip6q_destroy(q6); #endif - free(q6, M_FTABLE); - V_frag6_nfragpackets--; - goto dropfrag; - } - m_adj(t, sizeof(struct ip6_frag)); - m_cat(m, t); + free(q6, M_FTABLE); + V_frag6_nfragpackets--; + + goto dropfrag; } /* @@ -604,9 +588,31 @@ insert: m->m_pkthdr.len = plen; } +#ifdef RSS + mtag = m_tag_alloc(MTAG_ABI_IPV6, IPV6_TAG_DIRECT, sizeof(*ip6dc), + M_NOWAIT); + if (mtag == NULL) + goto dropfrag; + + ip6dc = (struct ip6_direct_ctx *)(mtag + 1); + ip6dc->ip6dc_nxt = nxt; + ip6dc->ip6dc_off = offset; + + m_tag_prepend(m, mtag); +#endif + + IP6Q_UNLOCK(); IP6STAT_INC(ip6s_reassembled); in6_ifstat_inc(dstifp, ifs6_reass_ok); +#ifdef RSS + /* + * Queue/dispatch for reprocessing. + */ + netisr_dispatch(NETISR_IPV6_DIRECT, m); + return IPPROTO_DONE; +#endif + /* * Tell launch routine the next header */ @@ -614,7 +620,6 @@ insert: *mp = m; *offp = offset; - IP6Q_UNLOCK(); return nxt; dropfrag: @@ -791,3 +796,27 @@ frag6_drain(void) IP6Q_UNLOCK(); VNET_LIST_RUNLOCK_NOSLEEP(); } + +int +ip6_deletefraghdr(struct mbuf *m, int offset, int wait) +{ + struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + struct mbuf *t; + + /* Delete frag6 header. */ + if (m->m_len >= offset + sizeof(struct ip6_frag)) { + /* This is the only possible case with !PULLDOWN_TEST. */ + bcopy(ip6, (char *)ip6 + sizeof(struct ip6_frag), + offset); + m->m_data += sizeof(struct ip6_frag); + m->m_len -= sizeof(struct ip6_frag); + } else { + /* This comes with no copy if the boundary is on cluster. */ + if ((t = m_split(m, offset, wait)) == NULL) + return (ENOMEM); + m_adj(t, sizeof(struct ip6_frag)); + m_cat(m, t); + } + + return (0); +} diff --git a/freebsd/sys/netinet6/icmp6.c b/freebsd/sys/netinet6/icmp6.c index 569b5dfa..6e3a4873 100644 --- a/freebsd/sys/netinet6/icmp6.c +++ b/freebsd/sys/netinet6/icmp6.c @@ -65,9 +65,10 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#define MBUF_PRIVATE /* XXXRW: Optimisation tries to avoid M_EXT mbufs */ + #include <rtems/bsd/local/opt_inet.h> #include <rtems/bsd/local/opt_inet6.h> -#include <rtems/bsd/local/opt_ipsec.h> #include <rtems/bsd/sys/param.h> #include <sys/domain.h> @@ -87,6 +88,7 @@ __FBSDID("$FreeBSD$"); #include <sys/time.h> #include <net/if.h> +#include <net/if_var.h> #include <net/if_dl.h> #include <net/if_llatbl.h> #include <net/if_types.h> @@ -100,6 +102,7 @@ __FBSDID("$FreeBSD$"); #include <netinet/icmp6.h> #include <netinet/tcp_var.h> +#include <netinet6/in6_fib.h> #include <netinet6/in6_ifattach.h> #include <netinet6/in6_pcb.h> #include <netinet6/ip6protosw.h> @@ -109,14 +112,14 @@ __FBSDID("$FreeBSD$"); #include <netinet6/nd6.h> #include <netinet6/send.h> -#ifdef IPSEC -#include <netipsec/ipsec.h> -#include <netipsec/key.h> -#endif - extern struct domain inet6domain; -VNET_DEFINE(struct icmp6stat, icmp6stat); +VNET_PCPUSTAT_DEFINE(struct icmp6stat, icmp6stat); +VNET_PCPUSTAT_SYSINIT(icmp6stat); + +#ifdef VIMAGE +VNET_PCPUSTAT_SYSUNINIT(icmp6stat); +#endif /* VIMAGE */ VNET_DECLARE(struct inpcbinfo, ripcbinfo); VNET_DECLARE(struct inpcbhead, ripcb); @@ -157,7 +160,7 @@ void kmod_icmp6stat_inc(int statnum) { - (*((u_quad_t *)&V_icmp6stat + statnum))++; + counter_u64_add(VNET(icmp6stat)[statnum], 1); } static void @@ -362,7 +365,7 @@ icmp6_error(struct mbuf *m, int type, int code, int param) m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len); preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); - M_PREPEND(m, preplen, M_DONTWAIT); /* FIB is also copied over. */ + M_PREPEND(m, preplen, M_NOWAIT); /* FIB is also copied over. */ if (m == NULL) { nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__)); return; @@ -474,22 +477,6 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) goto freeit; } - if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) { - /* - * Deliver very specific ICMP6 type only. - * This is important to deliver TOOBIG. Otherwise PMTUD - * will not work. - */ - switch (icmp6->icmp6_type) { - case ICMP6_DST_UNREACH: - case ICMP6_PACKET_TOO_BIG: - case ICMP6_TIME_EXCEEDED: - break; - default: - goto freeit; - } - } - ICMP6STAT_INC(icp6s_inhist[icmp6->icmp6_type]); icmp6_ifstat_inc(ifp, ifs6_in_msg); if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK) @@ -500,15 +487,13 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) icmp6_ifstat_inc(ifp, ifs6_in_dstunreach); switch (code) { case ICMP6_DST_UNREACH_NOROUTE: + case ICMP6_DST_UNREACH_ADDR: /* PRC_HOSTDEAD is a DOS */ code = PRC_UNREACH_NET; break; case ICMP6_DST_UNREACH_ADMIN: icmp6_ifstat_inc(ifp, ifs6_in_adminprohib); code = PRC_UNREACH_PROTOCOL; /* is this a good code? */ break; - case ICMP6_DST_UNREACH_ADDR: - code = PRC_HOSTDEAD; - break; case ICMP6_DST_UNREACH_BEYONDSCOPE: /* I mean "source address was incorrect." */ code = PRC_PARAMPROB; @@ -575,28 +560,21 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) /* Give up remote */ break; } - if ((n->m_flags & M_EXT) != 0 + if (!M_WRITABLE(n) || n->m_len < off + sizeof(struct icmp6_hdr)) { struct mbuf *n0 = n; - const int maxlen = sizeof(*nip6) + sizeof(*nicmp6); int n0len; - MGETHDR(n, M_DONTWAIT, n0->m_type); - n0len = n0->m_pkthdr.len; /* save for use below */ - if (n) - M_MOVE_PKTHDR(n, n0); /* FIB copied. */ - if (n && maxlen >= MHLEN) { - MCLGET(n, M_DONTWAIT); - if ((n->m_flags & M_EXT) == 0) { - m_free(n); - n = NULL; - } - } + CTASSERT(sizeof(*nip6) + sizeof(*nicmp6) <= MHLEN); + n = m_gethdr(M_NOWAIT, n0->m_type); if (n == NULL) { /* Give up remote */ m_freem(n0); break; } + + m_move_pkthdr(n, n0); /* FIB copied. */ + n0len = n0->m_pkthdr.len; /* save for use below */ /* * Copy IPv6 and ICMPv6 only. */ @@ -683,31 +661,27 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) } else { struct prison *pr; u_char *p; - int maxlen, maxhlen, hlen; + int maxhlen, hlen; /* * XXX: this combination of flags is pointless, * but should we keep this for compatibility? */ - if ((V_icmp6_nodeinfo & 5) != 5) + if ((V_icmp6_nodeinfo & (ICMP6_NODEINFO_FQDNOK | + ICMP6_NODEINFO_TMPADDROK)) != + (ICMP6_NODEINFO_FQDNOK | ICMP6_NODEINFO_TMPADDROK)) break; if (code != 0) goto badcode; - maxlen = sizeof(*nip6) + sizeof(*nicmp6) + 4; - if (maxlen >= MCLBYTES) { + + CTASSERT(sizeof(*nip6) + sizeof(*nicmp6) + 4 <= MHLEN); + n = m_gethdr(M_NOWAIT, m->m_type); + if (n == NULL) { /* Give up remote */ break; } - MGETHDR(n, M_DONTWAIT, m->m_type); - if (n && maxlen > MHLEN) { - MCLGET(n, M_DONTWAIT); - if ((n->m_flags & M_EXT) == 0) { - m_free(n); - n = NULL; - } - } - if (n && !m_dup_pkthdr(n, m, M_DONTWAIT)) { + if (!m_dup_pkthdr(n, m, M_NOWAIT)) { /* * Previous code did a blind M_COPY_PKTHDR * and said "just for rcvif". If true, then @@ -718,13 +692,8 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) m_free(n); n = NULL; } - if (n == NULL) { - /* Give up remote */ - break; - } - n->m_pkthdr.rcvif = NULL; - n->m_len = 0; - maxhlen = M_TRAILINGSPACE(n) - maxlen; + maxhlen = M_TRAILINGSPACE(n) - + (sizeof(*nip6) + sizeof(*nicmp6) + 4); #ifndef __rtems__ pr = curthread->td_ucred->cr_prison; #else /* __rtems__ */ @@ -771,7 +740,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) goto badcode; if (icmp6len < sizeof(struct nd_router_solicit)) goto badlen; - if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { + if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) { /* give up local */ /* Send incoming SeND packet to user space. */ @@ -809,7 +778,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) goto badcode; if (icmp6len < sizeof(struct nd_router_advert)) goto badlen; - if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { + if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) { /* Send incoming SeND-protected/ND packet to user space. */ if (send_sendso_input_hook != NULL) { @@ -840,7 +809,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) goto badcode; if (icmp6len < sizeof(struct nd_neighbor_solicit)) goto badlen; - if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { + if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) { if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(m, ifp, SND_IN, ip6len); @@ -869,7 +838,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) goto badcode; if (icmp6len < sizeof(struct nd_neighbor_advert)) goto badlen; - if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { + if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) { /* Send incoming SeND-protected/ND packet to user space. */ if (send_sendso_input_hook != NULL) { @@ -900,7 +869,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) goto badcode; if (icmp6len < sizeof(struct nd_redirect)) goto badlen; - if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { + if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) { if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(m, ifp, SND_IN, ip6len); @@ -1181,8 +1150,6 @@ icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code) ip6cp.ip6c_src = &icmp6src; ip6cp.ip6c_nxt = nxt; - m_addr_changed(m); - if (icmp6type == ICMP6_PACKET_TOO_BIG) { notifymtu = ntohl(icmp6->icmp6_mtu); ip6cp.ip6c_cmdarg = (void *)¬ifymtu; @@ -1329,7 +1296,8 @@ ni6_input(struct mbuf *m, int off) goto bad; /* else it's a link-local multicast, fine */ } else { /* unicast or anycast */ - if ((ia6 = ip6_getdstifaddr(m)) == NULL) + ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); + if (ia6 == NULL) goto bad; /* XXX impossible */ if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) && @@ -1505,26 +1473,23 @@ ni6_input(struct mbuf *m, int off) break; } - /* allocate an mbuf to reply. */ - MGETHDR(n, M_DONTWAIT, m->m_type); + /* Allocate an mbuf to reply. */ + if (replylen > MCLBYTES) { + /* + * XXX: should we try to allocate more? But MCLBYTES + * is probably much larger than IPV6_MMTU... + */ + goto bad; + } + if (replylen > MHLEN) + n = m_getcl(M_NOWAIT, m->m_type, M_PKTHDR); + else + n = m_gethdr(M_NOWAIT, m->m_type); if (n == NULL) { m_freem(m); return (NULL); } - M_MOVE_PKTHDR(n, m); /* just for recvif and FIB */ - if (replylen > MHLEN) { - if (replylen > MCLBYTES) { - /* - * XXX: should we try to allocate more? But MCLBYTES - * is probably much larger than IPV6_MMTU... - */ - goto bad; - } - MCLGET(n, M_DONTWAIT); - if ((n->m_flags & M_EXT) == 0) { - goto bad; - } - } + m_move_pkthdr(n, m); /* just for recvif and FIB */ n->m_pkthdr.len = n->m_len = replylen; /* copy mbuf header and IPv6 + Node Information base headers */ @@ -1623,16 +1588,13 @@ ni6_nametodns(const char *name, int namelen, int old) else len = MCLBYTES; - /* because MAXHOSTNAMELEN is usually 256, we use cluster mbuf */ - MGET(m, M_DONTWAIT, MT_DATA); - if (m && len > MLEN) { - MCLGET(m, M_DONTWAIT); - if ((m->m_flags & M_EXT) == 0) - goto fail; - } - if (!m) + /* Because MAXHOSTNAMELEN is usually 256, we use cluster mbuf. */ + if (len > MLEN) + m = m_getcl(M_NOWAIT, MT_DATA, 0); + else + m = m_get(M_NOWAIT, MT_DATA); + if (m == NULL) goto fail; - m->m_next = NULL; if (old) { m->m_len = len; @@ -1793,7 +1755,7 @@ ni6_addrs(struct icmp6_nodeinfo *ni6, struct mbuf *m, struct ifnet **ifpp, } IFNET_RLOCK_NOSLEEP(); - TAILQ_FOREACH(ifp, &V_ifnet, if_list) { + TAILQ_FOREACH(ifp, &V_ifnet, if_link) { addrsofif = 0; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { @@ -1880,7 +1842,7 @@ ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6, ifp = ifp0 ? ifp0 : TAILQ_FIRST(&V_ifnet); again: - for (; ifp; ifp = TAILQ_NEXT(ifp, if_list)) { + for (; ifp; ifp = TAILQ_NEXT(ifp, if_link)) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) @@ -1965,8 +1927,8 @@ ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6, ltime = ND6_INFINITE_LIFETIME; else { if (ifa6->ia6_lifetime.ia6t_expire > - time_second) - ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_second); + time_uptime) + ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_uptime); else ltime = 0; } @@ -2078,7 +2040,7 @@ icmp6_rip6_input(struct mbuf **mp, int off) */ if ((m->m_flags & M_EXT) && m->m_next == NULL && m->m_len <= MHLEN) { - MGET(n, M_DONTWAIT, m->m_type); + n = m_get(M_NOWAIT, m->m_type); if (n != NULL) { if (m_dup_pkthdr(n, m, M_NOWAIT)) { bcopy(m->m_data, n->m_data, @@ -2128,7 +2090,7 @@ icmp6_rip6_input(struct mbuf **mp, int off) m->m_len <= MHLEN) { struct mbuf *n; - MGET(n, M_DONTWAIT, m->m_type); + n = m_get(M_NOWAIT, m->m_type); if (n != NULL) { if (m_dup_pkthdr(n, m, M_NOWAIT)) { bcopy(m->m_data, n->m_data, m->m_len); @@ -2166,13 +2128,13 @@ icmp6_rip6_input(struct mbuf **mp, int off) void icmp6_reflect(struct mbuf *m, size_t off) { + struct in6_addr src6, *srcp; struct ip6_hdr *ip6; struct icmp6_hdr *icmp6; struct in6_ifaddr *ia = NULL; - int plen; - int type, code; struct ifnet *outif = NULL; - struct in6_addr origdst, src, *srcp = NULL; + int plen; + int type, code, hlim; /* too short to reflect */ if (off < sizeof(struct ip6_hdr)) { @@ -2218,13 +2180,8 @@ icmp6_reflect(struct mbuf *m, size_t off) icmp6 = (struct icmp6_hdr *)(ip6 + 1); type = icmp6->icmp6_type; /* keep type for statistics */ code = icmp6->icmp6_code; /* ditto. */ - - origdst = ip6->ip6_dst; - /* - * ip6_input() drops a packet if its src is multicast. - * So, the src is never multicast. - */ - ip6->ip6_dst = ip6->ip6_src; + hlim = 0; + srcp = NULL; /* * If the incoming packet was addressed directly to us (i.e. unicast), @@ -2232,74 +2189,59 @@ icmp6_reflect(struct mbuf *m, size_t off) * The IN6_IFF_NOTREADY case should be VERY rare, but is possible * (for example) when we encounter an error while forwarding procedure * destined to a duplicated address of ours. - * Note that ip6_getdstifaddr() may fail if we are in an error handling - * procedure of an outgoing packet of our own, in which case we need - * to search in the ifaddr list. */ - if (!IN6_IS_ADDR_MULTICAST(&origdst)) { - if ((ia = ip6_getdstifaddr(m))) { - if (!(ia->ia6_flags & - (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) - srcp = &ia->ia_addr.sin6_addr; - } else { - struct sockaddr_in6 d; - - bzero(&d, sizeof(d)); - d.sin6_family = AF_INET6; - d.sin6_len = sizeof(d); - d.sin6_addr = origdst; - ia = (struct in6_ifaddr *) - ifa_ifwithaddr((struct sockaddr *)&d); - if (ia && - !(ia->ia6_flags & - (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) { - srcp = &ia->ia_addr.sin6_addr; - } + if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { + ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); + if (ia != NULL && !(ia->ia6_flags & + (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) { + src6 = ia->ia_addr.sin6_addr; + srcp = &src6; + + if (m->m_pkthdr.rcvif != NULL) { + /* XXX: This may not be the outgoing interface */ + hlim = ND_IFINFO(m->m_pkthdr.rcvif)->chlim; + } else + hlim = V_ip6_defhlim; } + if (ia != NULL) + ifa_free(&ia->ia_ifa); } if (srcp == NULL) { - int e; - struct sockaddr_in6 sin6; - struct route_in6 ro; + int error; + struct in6_addr dst6; + uint32_t scopeid; /* * This case matches to multicasts, our anycast, or unicasts * that we do not own. Select a source address based on the * source address of the erroneous packet. */ - bzero(&sin6, sizeof(sin6)); - sin6.sin6_family = AF_INET6; - sin6.sin6_len = sizeof(sin6); - sin6.sin6_addr = ip6->ip6_dst; /* zone ID should be embedded */ - - bzero(&ro, sizeof(ro)); - e = in6_selectsrc(&sin6, NULL, NULL, &ro, NULL, &outif, &src); - if (ro.ro_rt) - RTFREE(ro.ro_rt); /* XXX: we could use this */ - if (e) { + in6_splitscope(&ip6->ip6_dst, &dst6, &scopeid); + error = in6_selectsrc_addr(RT_DEFAULT_FIB, &dst6, + scopeid, NULL, &src6, &hlim); + + if (error) { char ip6buf[INET6_ADDRSTRLEN]; nd6log((LOG_DEBUG, "icmp6_reflect: source can't be determined: " "dst=%s, error=%d\n", - ip6_sprintf(ip6buf, &sin6.sin6_addr), e)); + ip6_sprintf(ip6buf, &ip6->ip6_dst), error)); goto bad; } - srcp = &src; + srcp = &src6; } - + /* + * ip6_input() drops a packet if its src is multicast. + * So, the src is never multicast. + */ + ip6->ip6_dst = ip6->ip6_src; ip6->ip6_src = *srcp; ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; ip6->ip6_nxt = IPPROTO_ICMPV6; - if (outif) - ip6->ip6_hlim = ND_IFINFO(outif)->chlim; - else if (m->m_pkthdr.rcvif) { - /* XXX: This may not be the outgoing interface */ - ip6->ip6_hlim = ND_IFINFO(m->m_pkthdr.rcvif)->chlim; - } else - ip6->ip6_hlim = V_ip6_defhlim; + ip6->ip6_hlim = hlim; icmp6->icmp6_cksum = 0; icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6, @@ -2311,19 +2253,13 @@ icmp6_reflect(struct mbuf *m, size_t off) m->m_flags &= ~(M_BCAST|M_MCAST); - m_addr_changed(m); - ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL); if (outif) icmp6_ifoutstat_inc(outif, type, code); - if (ia != NULL) - ifa_free(&ia->ia_ifa); return; bad: - if (ia != NULL) - ifa_free(&ia->ia_ifa); m_freem(m); return; } @@ -2365,7 +2301,6 @@ icmp6_redirect_input(struct mbuf *m, int off) int icmp6len = ntohs(ip6->ip6_plen); char *lladdr = NULL; int lladdrlen = 0; - struct rtentry *rt = NULL; int is_router; int is_onlink; struct in6_addr src6 = ip6->ip6_src; @@ -2420,18 +2355,13 @@ icmp6_redirect_input(struct mbuf *m, int off) } { /* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */ - struct sockaddr_in6 sin6; - struct in6_addr *gw6; - - bzero(&sin6, sizeof(sin6)); - sin6.sin6_family = AF_INET6; - sin6.sin6_len = sizeof(struct sockaddr_in6); - bcopy(&reddst6, &sin6.sin6_addr, sizeof(reddst6)); - rt = in6_rtalloc1((struct sockaddr *)&sin6, 0, 0UL, RT_DEFAULT_FIB); - if (rt) { - if (rt->rt_gateway == NULL || - rt->rt_gateway->sa_family != AF_INET6) { - RTFREE_LOCKED(rt); + struct nhop6_basic nh6; + struct in6_addr kdst; + uint32_t scopeid; + + in6_splitscope(&reddst6, &kdst, &scopeid); + if (fib6_lookup_nh_basic(RT_DEFAULT_FIB, &kdst, scopeid, 0, 0,&nh6)==0){ + if ((nh6.nh_flags & NHF_GATEWAY) == 0) { nd6log((LOG_ERR, "ICMP6 redirect rejected; no route " "with inet6 gateway found for redirect dst: %s\n", @@ -2439,14 +2369,12 @@ icmp6_redirect_input(struct mbuf *m, int off) goto bad; } - gw6 = &(((struct sockaddr_in6 *)rt->rt_gateway)->sin6_addr); - if (bcmp(&src6, gw6, sizeof(struct in6_addr)) != 0) { - RTFREE_LOCKED(rt); + if (IN6_ARE_ADDR_EQUAL(&src6, &nh6.nh_addr) == 0) { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "not equal to gw-for-src=%s (must be same): " "%s\n", - ip6_sprintf(ip6buf, gw6), + ip6_sprintf(ip6buf, &nh6.nh_addr), icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } @@ -2457,8 +2385,6 @@ icmp6_redirect_input(struct mbuf *m, int off) icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } - RTFREE_LOCKED(rt); - rt = NULL; } if (IN6_IS_ADDR_MULTICAST(&reddst6)) { nd6log((LOG_ERR, @@ -2480,7 +2406,6 @@ icmp6_redirect_input(struct mbuf *m, int off) icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } - /* validation passed */ icmp6len -= sizeof(*nd_rd); nd6_option_init(nd_rd + 1, icmp6len, &ndopts); @@ -2505,31 +2430,45 @@ icmp6_redirect_input(struct mbuf *m, int off) goto bad; } + /* Validation passed. */ + /* RFC 2461 8.3 */ nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT, is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER); - if (!is_onlink) { /* better router case. perform rtredirect. */ - /* perform rtredirect */ + /* + * Install a gateway route in the better-router case or an interface + * route in the on-link-destination case. + */ + { struct sockaddr_in6 sdst; struct sockaddr_in6 sgw; struct sockaddr_in6 ssrc; + struct sockaddr *gw; + int rt_flags; u_int fibnum; bzero(&sdst, sizeof(sdst)); - bzero(&sgw, sizeof(sgw)); bzero(&ssrc, sizeof(ssrc)); - sdst.sin6_family = sgw.sin6_family = ssrc.sin6_family = AF_INET6; - sdst.sin6_len = sgw.sin6_len = ssrc.sin6_len = - sizeof(struct sockaddr_in6); - bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr)); + sdst.sin6_family = ssrc.sin6_family = AF_INET6; + sdst.sin6_len = ssrc.sin6_len = sizeof(struct sockaddr_in6); bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr)); bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr)); + rt_flags = RTF_HOST; + if (is_router) { + bzero(&sgw, sizeof(sgw)); + sgw.sin6_family = AF_INET6; + sgw.sin6_len = sizeof(struct sockaddr_in6); + bcopy(&redtgt6, &sgw.sin6_addr, + sizeof(struct in6_addr)); + gw = (struct sockaddr *)&sgw; + rt_flags |= RTF_GATEWAY; + } else + gw = ifp->if_addr->ifa_addr; for (fibnum = 0; fibnum < rt_numfibs; fibnum++) - in6_rtredirect((struct sockaddr *)&sdst, - (struct sockaddr *)&sgw, (struct sockaddr *)NULL, - RTF_GATEWAY | RTF_HOST, (struct sockaddr *)&ssrc, - fibnum); + in6_rtredirect((struct sockaddr *)&sdst, gw, + (struct sockaddr *)NULL, rt_flags, + (struct sockaddr *)&ssrc, fibnum); } /* finally update cached route in each socket via pfctlinput */ { @@ -2540,9 +2479,6 @@ icmp6_redirect_input(struct mbuf *m, int off) sdst.sin6_len = sizeof(struct sockaddr_in6); bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr)); pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst); -#ifdef IPSEC - key_sa_routechange((struct sockaddr *)&sdst); -#endif /* IPSEC */ } freeit: @@ -2609,14 +2545,10 @@ icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt) #if IPV6_MMTU >= MCLBYTES # error assumption failed about IPV6_MMTU and MCLBYTES #endif - MGETHDR(m, M_DONTWAIT, MT_HEADER); - if (m && IPV6_MMTU >= MHLEN) - MCLGET(m, M_DONTWAIT); - if (!m) + m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); + if (m == NULL) goto fail; M_SETFIB(m, rt->rt_fibnum); - m->m_pkthdr.rcvif = NULL; - m->m_len = 0; maxlen = M_TRAILINGSPACE(m); maxlen = min(IPV6_MMTU, maxlen); /* just for safety */ @@ -2711,7 +2643,7 @@ icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt) nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; nd_opt->nd_opt_len = len >> 3; lladdr = (char *)(nd_opt + 1); - bcopy(&ln->ll_addr, lladdr, ifp->if_addrlen); + bcopy(ln->ll_addr, lladdr, ifp->if_addrlen); p += len; } } diff --git a/freebsd/sys/netinet6/in6.c b/freebsd/sys/netinet6/in6.c index 66888fa8..f5d82524 100644 --- a/freebsd/sys/netinet6/in6.c +++ b/freebsd/sys/netinet6/in6.c @@ -70,6 +70,7 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/local/opt_inet6.h> #include <rtems/bsd/sys/param.h> +#include <sys/eventhandler.h> #include <rtems/bsd/sys/errno.h> #include <sys/jail.h> #include <sys/malloc.h> @@ -81,6 +82,8 @@ __FBSDID("$FreeBSD$"); #include <sys/proc.h> #include <sys/time.h> #include <sys/kernel.h> +#include <rtems/bsd/sys/lock.h> +#include <sys/rmlock.h> #include <sys/syslog.h> #include <net/if.h> @@ -97,6 +100,7 @@ __FBSDID("$FreeBSD$"); #include <netinet/in_systm.h> #include <netinet/ip.h> #include <netinet/in_pcb.h> +#include <netinet/ip_carp.h> #include <netinet/ip6.h> #include <netinet6/ip6_var.h> @@ -105,6 +109,7 @@ __FBSDID("$FreeBSD$"); #include <netinet6/ip6_mroute.h> #include <netinet6/in6_ifattach.h> #include <netinet6/scope6_var.h> +#include <netinet6/in6_fib.h> #include <netinet6/in6_pcb.h> VNET_DECLARE(int, icmp6_nodeinfo_oldmcprefix); @@ -135,49 +140,36 @@ const struct in6_addr in6mask128 = IN6MASK128; const struct sockaddr_in6 sa6_any = { sizeof(sa6_any), AF_INET6, 0, 0, IN6ADDR_ANY_INIT, 0 }; -static int in6_lifaddr_ioctl(struct socket *, u_long, caddr_t, - struct ifnet *, struct thread *); -static int in6_ifinit(struct ifnet *, struct in6_ifaddr *, - struct sockaddr_in6 *, int); +static int in6_notify_ifa(struct ifnet *, struct in6_ifaddr *, + struct in6_aliasreq *, int); static void in6_unlink_ifa(struct in6_ifaddr *, struct ifnet *); -int (*faithprefix_p)(struct in6_addr *); +static int in6_validate_ifra(struct ifnet *, struct in6_aliasreq *, + struct in6_ifaddr *, int); +static struct in6_ifaddr *in6_alloc_ifa(struct ifnet *, + struct in6_aliasreq *, int flags); +static int in6_update_ifa_internal(struct ifnet *, struct in6_aliasreq *, + struct in6_ifaddr *, int, int); +static int in6_broadcast_ifa(struct ifnet *, struct in6_aliasreq *, + struct in6_ifaddr *, int); #define ifa2ia6(ifa) ((struct in6_ifaddr *)(ifa)) #define ia62ifa(ia6) (&((ia6)->ia_ifa)) + void -in6_ifaddloop(struct ifaddr *ifa) +in6_newaddrmsg(struct in6_ifaddr *ia, int cmd) { struct sockaddr_dl gateway; struct sockaddr_in6 mask, addr; struct rtentry rt; - struct in6_ifaddr *ia; - struct ifnet *ifp; - struct llentry *ln; - - ia = ifa2ia6(ifa); - ifp = ifa->ifa_ifp; - IF_AFDATA_LOCK(ifp); - ifa->ifa_rtrequest = nd6_rtrequest; - ln = lla_lookup(LLTABLE6(ifp), (LLE_CREATE | LLE_IFADDR | - LLE_EXCLUSIVE), (struct sockaddr *)&ia->ia_addr); - IF_AFDATA_UNLOCK(ifp); - if (ln != NULL) { - ln->la_expire = 0; /* for IPv6 this means permanent */ - ln->ln_state = ND6_LLINFO_REACHABLE; - /* - * initialize for rtmsg generation - */ - bzero(&gateway, sizeof(gateway)); - gateway.sdl_len = sizeof(gateway); - gateway.sdl_family = AF_LINK; - gateway.sdl_nlen = 0; - gateway.sdl_alen = 6; - memcpy(gateway.sdl_data, &ln->ll_addr.mac_aligned, - sizeof(ln->ll_addr)); - LLE_WUNLOCK(ln); - } + + /* + * initialize for rtmsg generation + */ + bzero(&gateway, sizeof(gateway)); + gateway.sdl_len = sizeof(gateway); + gateway.sdl_family = AF_LINK; bzero(&rt, sizeof(rt)); rt.rt_gateway = (struct sockaddr *)&gateway; @@ -185,42 +177,11 @@ in6_ifaddloop(struct ifaddr *ifa) memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr)); rt_mask(&rt) = (struct sockaddr *)&mask; rt_key(&rt) = (struct sockaddr *)&addr; - rt.rt_flags = RTF_UP | RTF_HOST | RTF_STATIC; + rt.rt_flags = RTF_HOST | RTF_STATIC; + if (cmd == RTM_ADD) + rt.rt_flags |= RTF_UP; /* Announce arrival of local address to all FIBs. */ - rt_newaddrmsg(RTM_ADD, ifa, 0, &rt); -} - -void -in6_ifremloop(struct ifaddr *ifa) -{ - struct sockaddr_dl gateway; - struct sockaddr_in6 mask, addr; - struct rtentry rt0; - struct in6_ifaddr *ia; - struct ifnet *ifp; - - ia = ifa2ia6(ifa); - ifp = ifa->ifa_ifp; - memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr)); - memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask)); - lltable_prefix_free(AF_INET6, (struct sockaddr *)&addr, - (struct sockaddr *)&mask, LLE_STATIC); - - /* - * initialize for rtmsg generation - */ - bzero(&gateway, sizeof(gateway)); - gateway.sdl_len = sizeof(gateway); - gateway.sdl_family = AF_LINK; - gateway.sdl_nlen = 0; - gateway.sdl_alen = ifp->if_addrlen; - bzero(&rt0, sizeof(rt0)); - rt0.rt_gateway = (struct sockaddr *)&gateway; - rt_mask(&rt0) = (struct sockaddr *)&mask; - rt_key(&rt0) = (struct sockaddr *)&addr; - rt0.rt_flags = RTF_HOST | RTF_STATIC; - /* Announce removal of local address to all FIBs. */ - rt_newaddrmsg(RTM_DELETE, ifa, 0, &rt0); + rt_newaddrmsg(cmd, &ia->ia_ifa, 0, &rt); } int @@ -275,7 +236,15 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct in6_ifaddr *ia = NULL; struct in6_aliasreq *ifra = (struct in6_aliasreq *)data; struct sockaddr_in6 *sa6; + int carp_attached = 0; int error; + u_long ocmd = cmd; + + /* + * Compat to make pre-10.x ifconfig(8) operable. + */ + if (cmd == OSIOCAIFADDR_IN6) + cmd = SIOCAIFADDR_IN6; switch (cmd) { case SIOCGETSGCNT_IN6: @@ -317,8 +286,6 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, /* FALLTHROUGH */ case OSIOCGIFINFO_IN6: case SIOCGIFINFO_IN6: - case SIOCGDRLST_IN6: - case SIOCGPRLST_IN6: case SIOCGNBRINFO_IN6: case SIOCGDEFIFACE_IN6: return (nd6_ioctl(cmd, data, ifp)); @@ -366,26 +333,6 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, return (scope6_ioctl(cmd, data, ifp)); } - switch (cmd) { - case SIOCALIFADDR: - if (td != NULL) { - error = priv_check(td, PRIV_NET_ADDIFADDR); - if (error) - return (error); - } - return in6_lifaddr_ioctl(so, cmd, data, ifp, td); - - case SIOCDLIFADDR: - if (td != NULL) { - error = priv_check(td, PRIV_NET_DELIFADDR); - if (error) - return (error); - } - /* FALLTHROUGH */ - case SIOCGLIFADDR: - return in6_lifaddr_ioctl(so, cmd, data, ifp, td); - } - /* * Find address for this interface, if it exists. * @@ -417,7 +364,6 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, case SIOCSPFXFLUSH_IN6: case SIOCSRTRFLUSH_IN6: case SIOCGIFALIFETIME_IN6: - case SIOCSIFALIFETIME_IN6: case SIOCGIFSTAT_IN6: case SIOCGIFSTAT_ICMP6: sa6 = &ifr->ifr_addr; @@ -516,34 +462,6 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, goto out; } break; - - case SIOCSIFALIFETIME_IN6: - { - struct in6_addrlifetime *lt; - - if (td != NULL) { - error = priv_check(td, PRIV_NETINET_ALIFETIME6); - if (error) - goto out; - } - if (ia == NULL) { - error = EADDRNOTAVAIL; - goto out; - } - /* sanity for overflow - beware unsigned */ - lt = &ifr->ifr_ifru.ifru_lifetime; - if (lt->ia6t_vltime != ND6_INFINITE_LIFETIME && - lt->ia6t_vltime + time_second < time_second) { - error = EINVAL; - goto out; - } - if (lt->ia6t_pltime != ND6_INFINITE_LIFETIME && - lt->ia6t_pltime + time_second < time_second) { - error = EINVAL; - goto out; - } - break; - } } switch (cmd) { @@ -576,17 +494,17 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, break; case SIOCGIFSTAT_IN6: - bzero(&ifr->ifr_ifru.ifru_stat, - sizeof(ifr->ifr_ifru.ifru_stat)); - ifr->ifr_ifru.ifru_stat = - *((struct in6_ifextra *)ifp->if_afdata[AF_INET6])->in6_ifstat; + COUNTER_ARRAY_COPY(((struct in6_ifextra *) + ifp->if_afdata[AF_INET6])->in6_ifstat, + &ifr->ifr_ifru.ifru_stat, + sizeof(struct in6_ifstat) / sizeof(uint64_t)); break; case SIOCGIFSTAT_ICMP6: - bzero(&ifr->ifr_ifru.ifru_icmp6stat, - sizeof(ifr->ifr_ifru.ifru_icmp6stat)); - ifr->ifr_ifru.ifru_icmp6stat = - *((struct in6_ifextra *)ifp->if_afdata[AF_INET6])->icmp6_ifstat; + COUNTER_ARRAY_COPY(((struct in6_ifextra *) + ifp->if_afdata[AF_INET6])->icmp6_ifstat, + &ifr->ifr_ifru.ifru_icmp6stat, + sizeof(struct icmp6_ifstat) / sizeof(uint64_t)); break; case SIOCGIFALIFETIME_IN6: @@ -629,24 +547,8 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, } break; - case SIOCSIFALIFETIME_IN6: - ia->ia6_lifetime = ifr->ifr_ifru.ifru_lifetime; - /* for sanity */ - if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) { - ia->ia6_lifetime.ia6t_expire = - time_second + ia->ia6_lifetime.ia6t_vltime; - } else - ia->ia6_lifetime.ia6t_expire = 0; - if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) { - ia->ia6_lifetime.ia6t_preferred = - time_second + ia->ia6_lifetime.ia6t_pltime; - } else - ia->ia6_lifetime.ia6t_preferred = 0; - break; - case SIOCAIFADDR_IN6: { - int i; struct nd_prefixctl pr0; struct nd_prefix *pr; @@ -667,6 +569,18 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, break; } + if (cmd == ocmd && ifra->ifra_vhid > 0) { + if (carp_attach_p != NULL) + error = (*carp_attach_p)(&ia->ia_ifa, + ifra->ifra_vhid); + else + error = EPROTONOSUPPORT; + if (error) + goto out; + else + carp_attached = 1; + } + /* * then, make the prefix on-link on the interface. * XXX: we'd rather create the prefix before the address, but @@ -683,14 +597,14 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, pr0.ndpr_plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr, NULL); if (pr0.ndpr_plen == 128) { - break; /* we don't need to install a host route. */ + /* we don't need to install a host route. */ + goto aifaddr_out; } pr0.ndpr_prefix = ifra->ifra_addr; /* apply the mask for safety. */ - for (i = 0; i < 4; i++) { - pr0.ndpr_prefix.sin6_addr.s6_addr32[i] &= - ifra->ifra_prefixmask.sin6_addr.s6_addr32[i]; - } + IN6_MASK_ADDR(&pr0.ndpr_prefix.sin6_addr, + &ifra->ifra_prefixmask.sin6_addr); + /* * XXX: since we don't have an API to set prefix (not address) * lifetimes, we just use the same lifetimes as addresses. @@ -710,12 +624,9 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, * nd6_prelist_add will install the corresponding * interface route. */ - if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0) - goto out; - if (pr == NULL) { - log(LOG_ERR, "nd6_prelist_add succeeded but " - "no prefix\n"); - error = EINVAL; + if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0) { + if (carp_attached) + (*carp_detach_p)(&ia->ia_ifa); goto out; } } @@ -746,32 +657,28 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, * that is, this address might make other addresses detached. */ pfxlist_onlink_check(); - if (error == 0 && ia) { - if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) { - /* - * Try to clear the flag when a new - * IPv6 address is added onto an - * IFDISABLED interface and it - * succeeds. - */ - struct in6_ndireq nd; - - memset(&nd, 0, sizeof(nd)); - nd.ndi.flags = ND_IFINFO(ifp)->flags; - nd.ndi.flags &= ~ND6_IFF_IFDISABLED; - if (nd6_ioctl(SIOCSIFINFO_FLAGS, - (caddr_t)&nd, ifp) < 0) - log(LOG_NOTICE, "SIOCAIFADDR_IN6: " - "SIOCSIFINFO_FLAGS for -ifdisabled " - "failed."); - /* - * Ignore failure of clearing the flag - * intentionally. The failure means - * address duplication was detected. - */ - } - EVENTHANDLER_INVOKE(ifaddr_event, ifp); + +aifaddr_out: + /* + * Try to clear the flag when a new IPv6 address is added + * onto an IFDISABLED interface and it succeeds. + */ + if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) { + struct in6_ndireq nd; + + memset(&nd, 0, sizeof(nd)); + nd.ndi.flags = ND_IFINFO(ifp)->flags; + nd.ndi.flags &= ~ND6_IFF_IFDISABLED; + if (nd6_ioctl(SIOCSIFINFO_FLAGS, (caddr_t)&nd, ifp) < 0) + log(LOG_NOTICE, "SIOCAIFADDR_IN6: " + "SIOCSIFINFO_FLAGS for -ifdisabled " + "failed."); + /* + * Ignore failure of clearing the flag intentionally. + * The failure means address duplication was detected. + */ } + EVENTHANDLER_INVOKE(ifaddr_event, ifp); break; } @@ -823,27 +730,24 @@ in6_update_ifa_join_mc(struct ifnet *ifp, struct in6_aliasreq *ifra, struct in6_ifaddr *ia, int flags, struct in6_multi **in6m_sol) { char ip6buf[INET6_ADDRSTRLEN]; - struct sockaddr_in6 mltaddr, mltmask; - struct in6_addr llsol; + struct in6_addr mltaddr; struct in6_multi_mship *imm; - struct rtentry *rt; int delay, error; KASSERT(in6m_sol != NULL, ("%s: in6m_sol is NULL", __func__)); /* Join solicited multicast addr for new host id. */ - bzero(&llsol, sizeof(struct in6_addr)); - llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL; - llsol.s6_addr32[1] = 0; - llsol.s6_addr32[2] = htonl(1); - llsol.s6_addr32[3] = ifra->ifra_addr.sin6_addr.s6_addr32[3]; - llsol.s6_addr8[12] = 0xff; - if ((error = in6_setscope(&llsol, ifp, NULL)) != 0) { + bzero(&mltaddr, sizeof(struct in6_addr)); + mltaddr.s6_addr32[0] = IPV6_ADDR_INT32_MLL; + mltaddr.s6_addr32[2] = htonl(1); + mltaddr.s6_addr32[3] = ifra->ifra_addr.sin6_addr.s6_addr32[3]; + mltaddr.s6_addr8[12] = 0xff; + if ((error = in6_setscope(&mltaddr, ifp, NULL)) != 0) { /* XXX: should not happen */ log(LOG_ERR, "%s: in6_setscope failed\n", __func__); goto cleanup; } - delay = 0; + delay = error = 0; if ((flags & IN6_IFAUPDATE_DADDELAY)) { /* * We need a random delay for DAD on the address being @@ -853,62 +757,28 @@ in6_update_ifa_join_mc(struct ifnet *ifp, struct in6_aliasreq *ifra, */ delay = arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz); } - imm = in6_joingroup(ifp, &llsol, &error, delay); + imm = in6_joingroup(ifp, &mltaddr, &error, delay); if (imm == NULL) { - nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s " - "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, &llsol), + nd6log((LOG_WARNING, "%s: in6_joingroup failed for %s on %s " + "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, &mltaddr), if_name(ifp), error)); goto cleanup; } LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); *in6m_sol = imm->i6mm_maddr; - bzero(&mltmask, sizeof(mltmask)); - mltmask.sin6_len = sizeof(struct sockaddr_in6); - mltmask.sin6_family = AF_INET6; - mltmask.sin6_addr = in6mask32; -#define MLTMASK_LEN 4 /* mltmask's masklen (=32bit=4octet) */ - /* * Join link-local all-nodes address. */ - bzero(&mltaddr, sizeof(mltaddr)); - mltaddr.sin6_len = sizeof(struct sockaddr_in6); - mltaddr.sin6_family = AF_INET6; - mltaddr.sin6_addr = in6addr_linklocal_allnodes; - if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0) + mltaddr = in6addr_linklocal_allnodes; + if ((error = in6_setscope(&mltaddr, ifp, NULL)) != 0) goto cleanup; /* XXX: should not fail */ - /* - * XXX: do we really need this automatic routes? We should probably - * reconsider this stuff. Most applications actually do not need the - * routes, since they usually specify the outgoing interface. - */ - rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB); - if (rt != NULL) { - /* XXX: only works in !SCOPEDROUTING case. */ - if (memcmp(&mltaddr.sin6_addr, - &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr, - MLTMASK_LEN)) { - RTFREE_LOCKED(rt); - rt = NULL; - } - } - if (rt == NULL) { - error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, - (struct sockaddr *)&ia->ia_addr, - (struct sockaddr *)&mltmask, RTF_UP, - (struct rtentry **)0, RT_DEFAULT_FIB); - if (error) - goto cleanup; - } else - RTFREE_LOCKED(rt); - - imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0); + imm = in6_joingroup(ifp, &mltaddr, &error, 0); if (imm == NULL) { - nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s " - "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, - &mltaddr.sin6_addr), if_name(ifp), error)); + nd6log((LOG_WARNING, "%s: in6_joingroup failed for %s on %s " + "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, &mltaddr), + if_name(ifp), error)); goto cleanup; } LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); @@ -924,24 +794,26 @@ in6_update_ifa_join_mc(struct ifnet *ifp, struct in6_aliasreq *ifra, */ delay = arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz); } - if (in6_nigroup(ifp, NULL, -1, &mltaddr.sin6_addr) == 0) { + if (in6_nigroup(ifp, NULL, -1, &mltaddr) == 0) { /* XXX jinmei */ - imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, delay); + imm = in6_joingroup(ifp, &mltaddr, &error, delay); if (imm == NULL) - nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s " + nd6log((LOG_WARNING, + "%s: in6_joingroup failed for %s on %s " "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, - &mltaddr.sin6_addr), if_name(ifp), error)); + &mltaddr), if_name(ifp), error)); /* XXX not very fatal, go on... */ else LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); } - if (V_icmp6_nodeinfo_oldmcprefix && - in6_nigroup_oldmcprefix(ifp, NULL, -1, &mltaddr.sin6_addr) == 0) { - imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, delay); + if (V_icmp6_nodeinfo_oldmcprefix && + in6_nigroup_oldmcprefix(ifp, NULL, -1, &mltaddr) == 0) { + imm = in6_joingroup(ifp, &mltaddr, &error, delay); if (imm == NULL) - nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s " + nd6log((LOG_WARNING, + "%s: in6_joingroup failed for %s on %s " "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, - &mltaddr.sin6_addr), if_name(ifp), error)); + &mltaddr), if_name(ifp), error)); /* XXX not very fatal, go on... */ else LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); @@ -951,38 +823,18 @@ in6_update_ifa_join_mc(struct ifnet *ifp, struct in6_aliasreq *ifra, * Join interface-local all-nodes address. * (ff01::1%ifN, and ff01::%ifN/32) */ - mltaddr.sin6_addr = in6addr_nodelocal_allnodes; - if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0) + mltaddr = in6addr_nodelocal_allnodes; + if ((error = in6_setscope(&mltaddr, ifp, NULL)) != 0) goto cleanup; /* XXX: should not fail */ - /* XXX: again, do we really need the route? */ - rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB); - if (rt != NULL) { - if (memcmp(&mltaddr.sin6_addr, - &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr, - MLTMASK_LEN)) { - RTFREE_LOCKED(rt); - rt = NULL; - } - } - if (rt == NULL) { - error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, - (struct sockaddr *)&ia->ia_addr, - (struct sockaddr *)&mltmask, RTF_UP, - (struct rtentry **)0, RT_DEFAULT_FIB); - if (error) - goto cleanup; - } else - RTFREE_LOCKED(rt); - imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0); + imm = in6_joingroup(ifp, &mltaddr, &error, 0); if (imm == NULL) { - nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s " + nd6log((LOG_WARNING, "%s: in6_joingroup failed for %s on %s " "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, - &mltaddr.sin6_addr), if_name(ifp), error)); + &mltaddr), if_name(ifp), error)); goto cleanup; } LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); -#undef MLTMASK_LEN cleanup: return (error); @@ -992,17 +844,65 @@ cleanup: * Update parameters of an IPv6 interface address. * If necessary, a new entry is created and linked into address chains. * This function is separated from in6_control(). - * XXX: should this be performed under splnet()? */ int in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, struct in6_ifaddr *ia, int flags) { - int error = 0, hostIsNew = 0, plen = -1; + int error, hostIsNew = 0; + + if ((error = in6_validate_ifra(ifp, ifra, ia, flags)) != 0) + return (error); + + if (ia == NULL) { + hostIsNew = 1; + if ((ia = in6_alloc_ifa(ifp, ifra, flags)) == NULL) + return (ENOBUFS); + } + + error = in6_update_ifa_internal(ifp, ifra, ia, hostIsNew, flags); + if (error != 0) { + if (hostIsNew != 0) { + in6_unlink_ifa(ia, ifp); + ifa_free(&ia->ia_ifa); + } + return (error); + } + + if (hostIsNew) + error = in6_broadcast_ifa(ifp, ifra, ia, flags); + + return (error); +} + +/* + * Fill in basic IPv6 address request info. + */ +void +in6_prepare_ifra(struct in6_aliasreq *ifra, const struct in6_addr *addr, + const struct in6_addr *mask) +{ + + memset(ifra, 0, sizeof(struct in6_aliasreq)); + + ifra->ifra_addr.sin6_family = AF_INET6; + ifra->ifra_addr.sin6_len = sizeof(struct sockaddr_in6); + if (addr != NULL) + ifra->ifra_addr.sin6_addr = *addr; + + ifra->ifra_prefixmask.sin6_family = AF_INET6; + ifra->ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6); + if (mask != NULL) + ifra->ifra_prefixmask.sin6_addr = *mask; +} + +static int +in6_validate_ifra(struct ifnet *ifp, struct in6_aliasreq *ifra, + struct in6_ifaddr *ia, int flags) +{ + int plen = -1; struct sockaddr_in6 dst6; struct in6_addrlifetime *lt; - struct in6_multi *in6m_sol; - int delay; char ip6buf[INET6_ADDRSTRLEN]; /* Validate parameters */ @@ -1017,6 +917,14 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, ifra->ifra_dstaddr.sin6_family != AF_INET6 && ifra->ifra_dstaddr.sin6_family != AF_UNSPEC) return (EAFNOSUPPORT); + + /* + * Validate address + */ + if (ifra->ifra_addr.sin6_len != sizeof(struct sockaddr_in6) || + ifra->ifra_addr.sin6_family != AF_INET6) + return (EINVAL); + /* * validate ifra_prefixmask. don't check sin6_family, netmask * does not carry fields other than sin6_len. @@ -1069,6 +977,9 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, if (sa6_embedscope(&dst6, 0)) return (EINVAL); /* XXX: should be impossible */ } + /* Modify original ifra_dstaddr to reflect changes */ + ifra->ifra_dstaddr = dst6; + /* * The destination address can be specified only for a p2p or a * loopback interface. If specified, the corresponding prefix length @@ -1104,94 +1015,102 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, return (0); /* there's nothing to do */ } - /* - * If this is a new address, allocate a new ifaddr and link it - * into chains. - */ - if (ia == NULL) { - hostIsNew = 1; - /* - * When in6_update_ifa() is called in a process of a received - * RA, it is called under an interrupt context. So, we should - * call malloc with M_NOWAIT. - */ - ia = (struct in6_ifaddr *) malloc(sizeof(*ia), M_IFADDR, - M_NOWAIT); - if (ia == NULL) - return (ENOBUFS); - bzero((caddr_t)ia, sizeof(*ia)); - ifa_init(&ia->ia_ifa); - LIST_INIT(&ia->ia6_memberships); - /* Initialize the address and masks, and put time stamp */ - ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr; - ia->ia_addr.sin6_family = AF_INET6; - ia->ia_addr.sin6_len = sizeof(ia->ia_addr); - ia->ia6_createtime = time_second; - if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) != 0) { - /* - * XXX: some functions expect that ifa_dstaddr is not - * NULL for p2p interfaces. - */ - ia->ia_ifa.ifa_dstaddr = - (struct sockaddr *)&ia->ia_dstaddr; - } else { - ia->ia_ifa.ifa_dstaddr = NULL; - } - ia->ia_ifa.ifa_netmask = (struct sockaddr *)&ia->ia_prefixmask; - ia->ia_ifp = ifp; - ifa_ref(&ia->ia_ifa); /* if_addrhead */ - IF_ADDR_WLOCK(ifp); - TAILQ_INSERT_TAIL(&ifp->if_addrhead, &ia->ia_ifa, ifa_link); - IF_ADDR_WUNLOCK(ifp); - - ifa_ref(&ia->ia_ifa); /* in6_ifaddrhead */ - IN6_IFADDR_WLOCK(); - TAILQ_INSERT_TAIL(&V_in6_ifaddrhead, ia, ia_link); - IN6_IFADDR_WUNLOCK(); - } - - /* update timestamp */ - ia->ia6_updatetime = time_second; - - /* set prefix mask */ - if (ifra->ifra_prefixmask.sin6_len) { + /* Check prefix mask */ + if (ia != NULL && ifra->ifra_prefixmask.sin6_len != 0) { /* * We prohibit changing the prefix length of an existing * address, because * + such an operation should be rare in IPv6, and * + the operation would confuse prefix management. */ - if (ia->ia_prefixmask.sin6_len && + if (ia->ia_prefixmask.sin6_len != 0 && in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL) != plen) { - nd6log((LOG_INFO, "in6_update_ifa: the prefix length of an" - " existing (%s) address should not be changed\n", + nd6log((LOG_INFO, "in6_validate_ifa: the prefix length " + "of an existing %s address should not be changed\n", ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr))); - error = EINVAL; - goto unlink; + + return (EINVAL); } - ia->ia_prefixmask = ifra->ifra_prefixmask; } + return (0); +} + + +/* + * Allocate a new ifaddr and link it into chains. + */ +static struct in6_ifaddr * +in6_alloc_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, int flags) +{ + struct in6_ifaddr *ia; + /* - * If a new destination address is specified, scrub the old one and - * install the new destination. Note that the interface must be - * p2p or loopback (see the check above.) + * When in6_alloc_ifa() is called in a process of a received + * RA, it is called under an interrupt context. So, we should + * call malloc with M_NOWAIT. */ - if (dst6.sin6_family == AF_INET6 && - !IN6_ARE_ADDR_EQUAL(&dst6.sin6_addr, &ia->ia_dstaddr.sin6_addr)) { - int e; + ia = (struct in6_ifaddr *)ifa_alloc(sizeof(*ia), M_NOWAIT); + if (ia == NULL) + return (NULL); + LIST_INIT(&ia->ia6_memberships); + /* Initialize the address and masks, and put time stamp */ + ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr; + ia->ia_addr.sin6_family = AF_INET6; + ia->ia_addr.sin6_len = sizeof(ia->ia_addr); + /* XXX: Can we assign ,sin6_addr and skip the rest? */ + ia->ia_addr = ifra->ifra_addr; + ia->ia6_createtime = time_uptime; + if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) != 0) { + /* + * Some functions expect that ifa_dstaddr is not + * NULL for p2p interfaces. + */ + ia->ia_ifa.ifa_dstaddr = + (struct sockaddr *)&ia->ia_dstaddr; + } else { + ia->ia_ifa.ifa_dstaddr = NULL; + } - if ((ia->ia_flags & IFA_ROUTE) != 0 && - (e = rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST)) != 0) { - nd6log((LOG_ERR, "in6_update_ifa: failed to remove " - "a route to the old destination: %s\n", - ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr))); - /* proceed anyway... */ - } else - ia->ia_flags &= ~IFA_ROUTE; - ia->ia_dstaddr = dst6; + /* set prefix mask if any */ + ia->ia_ifa.ifa_netmask = (struct sockaddr *)&ia->ia_prefixmask; + if (ifra->ifra_prefixmask.sin6_len != 0) { + ia->ia_prefixmask.sin6_family = AF_INET6; + ia->ia_prefixmask.sin6_len = ifra->ifra_prefixmask.sin6_len; + ia->ia_prefixmask.sin6_addr = ifra->ifra_prefixmask.sin6_addr; } + ia->ia_ifp = ifp; + ifa_ref(&ia->ia_ifa); /* if_addrhead */ + IF_ADDR_WLOCK(ifp); + TAILQ_INSERT_TAIL(&ifp->if_addrhead, &ia->ia_ifa, ifa_link); + IF_ADDR_WUNLOCK(ifp); + + ifa_ref(&ia->ia_ifa); /* in6_ifaddrhead */ + IN6_IFADDR_WLOCK(); + TAILQ_INSERT_TAIL(&V_in6_ifaddrhead, ia, ia_link); + LIST_INSERT_HEAD(IN6ADDR_HASH(&ia->ia_addr.sin6_addr), ia, ia6_hash); + IN6_IFADDR_WUNLOCK(); + + return (ia); +} + +/* + * Update/configure interface address parameters: + * + * 1) Update lifetime + * 2) Update interface metric ad flags + * 3) Notify other subsystems + */ +static int +in6_update_ifa_internal(struct ifnet *ifp, struct in6_aliasreq *ifra, + struct in6_ifaddr *ia, int hostIsNew, int flags) +{ + int error; + + /* update timestamp */ + ia->ia6_updatetime = time_uptime; + /* * Set lifetimes. We do not refer to ia6t_expire and ia6t_preferred * to see if the address is deprecated or invalidated, but initialize @@ -1200,71 +1119,85 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, ia->ia6_lifetime = ifra->ifra_lifetime; if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) { ia->ia6_lifetime.ia6t_expire = - time_second + ia->ia6_lifetime.ia6t_vltime; + time_uptime + ia->ia6_lifetime.ia6t_vltime; } else ia->ia6_lifetime.ia6t_expire = 0; if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) { ia->ia6_lifetime.ia6t_preferred = - time_second + ia->ia6_lifetime.ia6t_pltime; + time_uptime + ia->ia6_lifetime.ia6t_pltime; } else ia->ia6_lifetime.ia6t_preferred = 0; - /* reset the interface and routing table appropriately. */ - if ((error = in6_ifinit(ifp, ia, &ifra->ifra_addr, hostIsNew)) != 0) - goto unlink; - - /* - * configure address flags. - */ - ia->ia6_flags = ifra->ifra_flags; /* * backward compatibility - if IN6_IFF_DEPRECATED is set from the * userland, make it deprecated. */ if ((ifra->ifra_flags & IN6_IFF_DEPRECATED) != 0) { ia->ia6_lifetime.ia6t_pltime = 0; - ia->ia6_lifetime.ia6t_preferred = time_second; + ia->ia6_lifetime.ia6t_preferred = time_uptime; } + + /* + * configure address flags. + */ + ia->ia6_flags = ifra->ifra_flags; + /* * Make the address tentative before joining multicast addresses, * so that corresponding MLD responses would not have a tentative * source address. */ ia->ia6_flags &= ~IN6_IFF_DUPLICATED; /* safety */ - if (hostIsNew && in6if_do_dad(ifp)) - ia->ia6_flags |= IN6_IFF_TENTATIVE; - - /* DAD should be performed after ND6_IFF_IFDISABLED is cleared. */ - if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) - ia->ia6_flags |= IN6_IFF_TENTATIVE; /* - * We are done if we have simply modified an existing address. + * DAD should be performed for an new address or addresses on + * an interface with ND6_IFF_IFDISABLED. */ - if (!hostIsNew) - return (error); + if (in6if_do_dad(ifp) && + (hostIsNew || (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED))) + ia->ia6_flags |= IN6_IFF_TENTATIVE; - /* - * Beyond this point, we should call in6_purgeaddr upon an error, - * not just go to unlink. - */ + /* notify other subsystems */ + error = in6_notify_ifa(ifp, ia, ifra, hostIsNew); + + return (error); +} + +/* + * Do link-level ifa job: + * 1) Add lle entry for added address + * 2) Notifies routing socket users about new address + * 3) join appropriate multicast group + * 4) start DAD if enabled + */ +static int +in6_broadcast_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, + struct in6_ifaddr *ia, int flags) +{ + struct in6_multi *in6m_sol; + int error = 0; + + /* Add local address to lltable, if necessary (ex. on p2p link). */ + if ((error = nd6_add_ifa_lle(ia)) != 0) { + in6_purgeaddr(&ia->ia_ifa); + ifa_free(&ia->ia_ifa); + return (error); + } /* Join necessary multicast groups. */ in6m_sol = NULL; if ((ifp->if_flags & IFF_MULTICAST) != 0) { error = in6_update_ifa_join_mc(ifp, ifra, ia, flags, &in6m_sol); - if (error) - goto cleanup; + if (error != 0) { + in6_purgeaddr(&ia->ia_ifa); + ifa_free(&ia->ia_ifa); + return (error); + } } - /* - * Perform DAD, if needed. - * XXX It may be of use, if we can administratively disable DAD. - */ - if (in6if_do_dad(ifp) && ((ifra->ifra_flags & IN6_IFF_NODAD) == 0) && - (ia->ia6_flags & IN6_IFF_TENTATIVE)) - { - int mindelay, maxdelay; + /* Perform DAD, if the address is TENTATIVE. */ + if ((ia->ia6_flags & IN6_IFF_TENTATIVE)) { + int delay, mindelay, maxdelay; delay = 0; if ((flags & IN6_IFAUPDATE_DADDELAY)) { @@ -1295,159 +1228,9 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, nd6_dad_start((struct ifaddr *)ia, delay); } - KASSERT(hostIsNew, ("in6_update_ifa: !hostIsNew")); + in6_newaddrmsg(ia, RTM_ADD); ifa_free(&ia->ia_ifa); return (error); - - unlink: - /* - * XXX: if a change of an existing address failed, keep the entry - * anyway. - */ - if (hostIsNew) { - in6_unlink_ifa(ia, ifp); - ifa_free(&ia->ia_ifa); - } - return (error); - - cleanup: - KASSERT(hostIsNew, ("in6_update_ifa: cleanup: !hostIsNew")); - ifa_free(&ia->ia_ifa); - in6_purgeaddr(&ia->ia_ifa); - return error; -} - -/* - * Leave multicast groups. Factored out from in6_purgeaddr(). - * This entire work should only be done once, for the default FIB. - */ -static int -in6_purgeaddr_mc(struct ifnet *ifp, struct in6_ifaddr *ia, struct ifaddr *ifa0) -{ - struct sockaddr_in6 mltaddr, mltmask; - struct in6_multi_mship *imm; - struct rtentry *rt; - struct sockaddr_in6 sin6; - int error; - - /* - * Leave from multicast groups we have joined for the interface. - */ - while ((imm = LIST_FIRST(&ia->ia6_memberships)) != NULL) { - LIST_REMOVE(imm, i6mm_chain); - in6_leavegroup(imm); - } - - /* - * Remove the link-local all-nodes address. - */ - bzero(&mltmask, sizeof(mltmask)); - mltmask.sin6_len = sizeof(struct sockaddr_in6); - mltmask.sin6_family = AF_INET6; - mltmask.sin6_addr = in6mask32; - - bzero(&mltaddr, sizeof(mltaddr)); - mltaddr.sin6_len = sizeof(struct sockaddr_in6); - mltaddr.sin6_family = AF_INET6; - mltaddr.sin6_addr = in6addr_linklocal_allnodes; - - if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0) - return (error); - - /* - * As for the mltaddr above, proactively prepare the sin6 to avoid - * rtentry un- and re-locking. - */ - if (ifa0 != NULL) { - bzero(&sin6, sizeof(sin6)); - sin6.sin6_len = sizeof(sin6); - sin6.sin6_family = AF_INET6; - memcpy(&sin6.sin6_addr, &satosin6(ifa0->ifa_addr)->sin6_addr, - sizeof(sin6.sin6_addr)); - error = in6_setscope(&sin6.sin6_addr, ifa0->ifa_ifp, NULL); - if (error != 0) - return (error); - } - - rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB); - if (rt != NULL && rt->rt_gateway != NULL && - (memcmp(&satosin6(rt->rt_gateway)->sin6_addr, - &ia->ia_addr.sin6_addr, - sizeof(ia->ia_addr.sin6_addr)) == 0)) { - /* - * If no more IPv6 address exists on this interface then - * remove the multicast address route. - */ - if (ifa0 == NULL) { - memcpy(&mltaddr.sin6_addr, - &satosin6(rt_key(rt))->sin6_addr, - sizeof(mltaddr.sin6_addr)); - RTFREE_LOCKED(rt); - error = in6_rtrequest(RTM_DELETE, - (struct sockaddr *)&mltaddr, - (struct sockaddr *)&ia->ia_addr, - (struct sockaddr *)&mltmask, RTF_UP, - (struct rtentry **)0, RT_DEFAULT_FIB); - if (error) - log(LOG_INFO, "%s: link-local all-nodes " - "multicast address deletion error\n", - __func__); - } else { - /* - * Replace the gateway of the route. - */ - memcpy(rt->rt_gateway, &sin6, sizeof(sin6)); - RTFREE_LOCKED(rt); - } - } else { - if (rt != NULL) - RTFREE_LOCKED(rt); - } - - /* - * Remove the node-local all-nodes address. - */ - mltaddr.sin6_addr = in6addr_nodelocal_allnodes; - if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0) - return (error); - - rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB); - if (rt != NULL && rt->rt_gateway != NULL && - (memcmp(&satosin6(rt->rt_gateway)->sin6_addr, - &ia->ia_addr.sin6_addr, - sizeof(ia->ia_addr.sin6_addr)) == 0)) { - /* - * If no more IPv6 address exists on this interface then - * remove the multicast address route. - */ - if (ifa0 == NULL) { - memcpy(&mltaddr.sin6_addr, - &satosin6(rt_key(rt))->sin6_addr, - sizeof(mltaddr.sin6_addr)); - - RTFREE_LOCKED(rt); - error = in6_rtrequest(RTM_DELETE, - (struct sockaddr *)&mltaddr, - (struct sockaddr *)&ia->ia_addr, - (struct sockaddr *)&mltmask, RTF_UP, - (struct rtentry **)0, RT_DEFAULT_FIB); - if (error) - log(LOG_INFO, "%s: node-local all-nodes" - "multicast address deletion error\n", - __func__); - } else { - /* - * Replace the gateway of the route. - */ - memcpy(rt->rt_gateway, &sin6, sizeof(sin6)); - RTFREE_LOCKED(rt); - } - } else { - if (rt != NULL) - RTFREE_LOCKED(rt); - } - - return (0); } void @@ -1455,26 +1238,11 @@ in6_purgeaddr(struct ifaddr *ifa) { struct ifnet *ifp = ifa->ifa_ifp; struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa; + struct in6_multi_mship *imm; int plen, error; - struct ifaddr *ifa0; - /* - * find another IPv6 address as the gateway for the - * link-local and node-local all-nodes multicast - * address routes - */ - IF_ADDR_RLOCK(ifp); - TAILQ_FOREACH(ifa0, &ifp->if_addrhead, ifa_link) { - if ((ifa0->ifa_addr->sa_family != AF_INET6) || - memcmp(&satosin6(ifa0->ifa_addr)->sin6_addr, - &ia->ia_addr.sin6_addr, sizeof(struct in6_addr)) == 0) - continue; - else - break; - } - if (ifa0 != NULL) - ifa_ref(ifa0); - IF_ADDR_RUNLOCK(ifp); + if (ifa->ifa_carp) + (*carp_detach_p)(ifa); /* * Remove the loopback route to the interface address. @@ -1491,32 +1259,30 @@ in6_purgeaddr(struct ifaddr *ifa) /* stop DAD processing */ nd6_dad_stop(ifa); - /* Remove local address entry from lltable. */ - in6_ifremloop(ifa); - /* Leave multicast groups. */ - error = in6_purgeaddr_mc(ifp, ia, ifa0); - - if (ifa0 != NULL) - ifa_free(ifa0); - + while ((imm = LIST_FIRST(&ia->ia6_memberships)) != NULL) { + LIST_REMOVE(imm, i6mm_chain); + in6_leavegroup(imm); + } plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */ if ((ia->ia_flags & IFA_ROUTE) && plen == 128) { error = rtinit(&(ia->ia_ifa), RTM_DELETE, ia->ia_flags | - (ia->ia_dstaddr.sin6_family == AF_INET6) ? RTF_HOST : 0); + (ia->ia_dstaddr.sin6_family == AF_INET6 ? RTF_HOST : 0)); if (error != 0) log(LOG_INFO, "%s: err=%d, destination address delete " "failed\n", __func__, error); ia->ia_flags &= ~IFA_ROUTE; } + in6_newaddrmsg(ia, RTM_DELETE); in6_unlink_ifa(ia, ifp); } static void in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp) { - int s = splnet(); + char ip6buf[INET6_ADDRSTRLEN]; + int remove_lle; IF_ADDR_WLOCK(ifp); TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link); @@ -1530,21 +1296,28 @@ in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp) */ IN6_IFADDR_WLOCK(); TAILQ_REMOVE(&V_in6_ifaddrhead, ia, ia_link); + LIST_REMOVE(ia, ia6_hash); IN6_IFADDR_WUNLOCK(); /* * Release the reference to the base prefix. There should be a * positive reference. */ + remove_lle = 0; if (ia->ia6_ndpr == NULL) { nd6log((LOG_NOTICE, "in6_unlink_ifa: autoconf'ed address " - "%p has no prefix\n", ia)); + "%s has no prefix\n", ip6_sprintf(ip6buf, IA6_IN6(ia)))); } else { ia->ia6_ndpr->ndpr_refcnt--; + /* Do not delete lles within prefix if refcont != 0 */ + if (ia->ia6_ndpr->ndpr_refcnt == 0) + remove_lle = 1; ia->ia6_ndpr = NULL; } + nd6_rem_ifa_lle(ia, remove_lle); + /* * Also, if the address being removed is autoconf'ed, call * pfxlist_onlink_check() since the release might affect the status of @@ -1554,335 +1327,63 @@ in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp) pfxlist_onlink_check(); } ifa_free(&ia->ia_ifa); /* in6_ifaddrhead */ - splx(s); -} - -void -in6_purgeif(struct ifnet *ifp) -{ - struct ifaddr *ifa, *nifa; - - TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, nifa) { - if (ifa->ifa_addr->sa_family != AF_INET6) - continue; - in6_purgeaddr(ifa); - } - - in6_ifdetach(ifp); } /* - * SIOC[GAD]LIFADDR. - * SIOCGLIFADDR: get first address. (?) - * SIOCGLIFADDR with IFLR_PREFIX: - * get first address that matches the specified prefix. - * SIOCALIFADDR: add the specified address. - * SIOCALIFADDR with IFLR_PREFIX: - * add the specified prefix, filling hostid part from - * the first link-local address. prefixlen must be <= 64. - * SIOCDLIFADDR: delete the specified address. - * SIOCDLIFADDR with IFLR_PREFIX: - * delete the first address that matches the specified prefix. - * return values: - * EINVAL on invalid parameters - * EADDRNOTAVAIL on prefix match failed/specified address not found - * other values may be returned from in6_ioctl() - * - * NOTE: SIOCALIFADDR(with IFLR_PREFIX set) allows prefixlen less than 64. - * this is to accomodate address naming scheme other than RFC2374, - * in the future. - * RFC2373 defines interface id to be 64bit, but it allows non-RFC2374 - * address encoding scheme. (see figure on page 8) + * Notifies other subsystems about address change/arrival: + * 1) Notifies device handler on the first IPv6 address assignment + * 2) Handle routing table changes for P2P links and route + * 3) Handle routing table changes for address host route */ static int -in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data, - struct ifnet *ifp, struct thread *td) +in6_notify_ifa(struct ifnet *ifp, struct in6_ifaddr *ia, + struct in6_aliasreq *ifra, int hostIsNew) { - struct if_laddrreq *iflr = (struct if_laddrreq *)data; + int error = 0, plen, ifacount = 0; struct ifaddr *ifa; - struct sockaddr *sa; - - /* sanity checks */ - if (!data || !ifp) { - panic("invalid argument to in6_lifaddr_ioctl"); - /* NOTREACHED */ - } - - switch (cmd) { - case SIOCGLIFADDR: - /* address must be specified on GET with IFLR_PREFIX */ - if ((iflr->flags & IFLR_PREFIX) == 0) - break; - /* FALLTHROUGH */ - case SIOCALIFADDR: - case SIOCDLIFADDR: - /* address must be specified on ADD and DELETE */ - sa = (struct sockaddr *)&iflr->addr; - if (sa->sa_family != AF_INET6) - return EINVAL; - if (sa->sa_len != sizeof(struct sockaddr_in6)) - return EINVAL; - /* XXX need improvement */ - sa = (struct sockaddr *)&iflr->dstaddr; - if (sa->sa_family && sa->sa_family != AF_INET6) - return EINVAL; - if (sa->sa_len && sa->sa_len != sizeof(struct sockaddr_in6)) - return EINVAL; - break; - default: /* shouldn't happen */ -#if 0 - panic("invalid cmd to in6_lifaddr_ioctl"); - /* NOTREACHED */ -#else - return EOPNOTSUPP; -#endif - } - if (sizeof(struct in6_addr) * 8 < iflr->prefixlen) - return EINVAL; - - switch (cmd) { - case SIOCALIFADDR: - { - struct in6_aliasreq ifra; - struct in6_addr *hostid = NULL; - int prefixlen; - - ifa = NULL; - if ((iflr->flags & IFLR_PREFIX) != 0) { - struct sockaddr_in6 *sin6; - - /* - * hostid is to fill in the hostid part of the - * address. hostid points to the first link-local - * address attached to the interface. - */ - ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0); - if (!ifa) - return EADDRNOTAVAIL; - hostid = IFA_IN6(ifa); - - /* prefixlen must be <= 64. */ - if (64 < iflr->prefixlen) { - if (ifa != NULL) - ifa_free(ifa); - return EINVAL; - } - prefixlen = iflr->prefixlen; - - /* hostid part must be zero. */ - sin6 = (struct sockaddr_in6 *)&iflr->addr; - if (sin6->sin6_addr.s6_addr32[2] != 0 || - sin6->sin6_addr.s6_addr32[3] != 0) { - if (ifa != NULL) - ifa_free(ifa); - return EINVAL; - } - } else - prefixlen = iflr->prefixlen; - - /* copy args to in6_aliasreq, perform ioctl(SIOCAIFADDR_IN6). */ - bzero(&ifra, sizeof(ifra)); - bcopy(iflr->iflr_name, ifra.ifra_name, sizeof(ifra.ifra_name)); - - bcopy(&iflr->addr, &ifra.ifra_addr, - ((struct sockaddr *)&iflr->addr)->sa_len); - if (hostid) { - /* fill in hostid part */ - ifra.ifra_addr.sin6_addr.s6_addr32[2] = - hostid->s6_addr32[2]; - ifra.ifra_addr.sin6_addr.s6_addr32[3] = - hostid->s6_addr32[3]; - } - - if (((struct sockaddr *)&iflr->dstaddr)->sa_family) { /* XXX */ - bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr, - ((struct sockaddr *)&iflr->dstaddr)->sa_len); - if (hostid) { - ifra.ifra_dstaddr.sin6_addr.s6_addr32[2] = - hostid->s6_addr32[2]; - ifra.ifra_dstaddr.sin6_addr.s6_addr32[3] = - hostid->s6_addr32[3]; - } - } - if (ifa != NULL) - ifa_free(ifa); - - ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6); - in6_prefixlen2mask(&ifra.ifra_prefixmask.sin6_addr, prefixlen); - - ifra.ifra_flags = iflr->flags & ~IFLR_PREFIX; - return in6_control(so, SIOCAIFADDR_IN6, (caddr_t)&ifra, ifp, td); - } - case SIOCGLIFADDR: - case SIOCDLIFADDR: - { - struct in6_ifaddr *ia; - struct in6_addr mask, candidate, match; - struct sockaddr_in6 *sin6; - int cmp; - - bzero(&mask, sizeof(mask)); - if (iflr->flags & IFLR_PREFIX) { - /* lookup a prefix rather than address. */ - in6_prefixlen2mask(&mask, iflr->prefixlen); - - sin6 = (struct sockaddr_in6 *)&iflr->addr; - bcopy(&sin6->sin6_addr, &match, sizeof(match)); - match.s6_addr32[0] &= mask.s6_addr32[0]; - match.s6_addr32[1] &= mask.s6_addr32[1]; - match.s6_addr32[2] &= mask.s6_addr32[2]; - match.s6_addr32[3] &= mask.s6_addr32[3]; - - /* if you set extra bits, that's wrong */ - if (bcmp(&match, &sin6->sin6_addr, sizeof(match))) - return EINVAL; - - cmp = 1; - } else { - if (cmd == SIOCGLIFADDR) { - /* on getting an address, take the 1st match */ - cmp = 0; /* XXX */ - } else { - /* on deleting an address, do exact match */ - in6_prefixlen2mask(&mask, 128); - sin6 = (struct sockaddr_in6 *)&iflr->addr; - bcopy(&sin6->sin6_addr, &match, sizeof(match)); - - cmp = 1; - } - } + struct sockaddr_in6 *pdst; + char ip6buf[INET6_ADDRSTRLEN]; + /* + * Give the interface a chance to initialize + * if this is its first address, + */ + if (hostIsNew != 0) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; - if (!cmp) - break; - - /* - * XXX: this is adhoc, but is necessary to allow - * a user to specify fe80::/64 (not /10) for a - * link-local address. - */ - bcopy(IFA_IN6(ifa), &candidate, sizeof(candidate)); - in6_clearscope(&candidate); - candidate.s6_addr32[0] &= mask.s6_addr32[0]; - candidate.s6_addr32[1] &= mask.s6_addr32[1]; - candidate.s6_addr32[2] &= mask.s6_addr32[2]; - candidate.s6_addr32[3] &= mask.s6_addr32[3]; - if (IN6_ARE_ADDR_EQUAL(&candidate, &match)) - break; + ifacount++; } - if (ifa != NULL) - ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); - if (!ifa) - return EADDRNOTAVAIL; - ia = ifa2ia6(ifa); - - if (cmd == SIOCGLIFADDR) { - int error; - - /* fill in the if_laddrreq structure */ - bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin6_len); - error = sa6_recoverscope( - (struct sockaddr_in6 *)&iflr->addr); - if (error != 0) { - ifa_free(ifa); - return (error); - } - - if ((ifp->if_flags & IFF_POINTOPOINT) != 0) { - bcopy(&ia->ia_dstaddr, &iflr->dstaddr, - ia->ia_dstaddr.sin6_len); - error = sa6_recoverscope( - (struct sockaddr_in6 *)&iflr->dstaddr); - if (error != 0) { - ifa_free(ifa); - return (error); - } - } else - bzero(&iflr->dstaddr, sizeof(iflr->dstaddr)); - - iflr->prefixlen = - in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); - - iflr->flags = ia->ia6_flags; /* XXX */ - ifa_free(ifa); - - return 0; - } else { - struct in6_aliasreq ifra; - - /* fill in6_aliasreq and do ioctl(SIOCDIFADDR_IN6) */ - bzero(&ifra, sizeof(ifra)); - bcopy(iflr->iflr_name, ifra.ifra_name, - sizeof(ifra.ifra_name)); - - bcopy(&ia->ia_addr, &ifra.ifra_addr, - ia->ia_addr.sin6_len); - if ((ifp->if_flags & IFF_POINTOPOINT) != 0) { - bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr, - ia->ia_dstaddr.sin6_len); - } else { - bzero(&ifra.ifra_dstaddr, - sizeof(ifra.ifra_dstaddr)); - } - bcopy(&ia->ia_prefixmask, &ifra.ifra_dstaddr, - ia->ia_prefixmask.sin6_len); - - ifra.ifra_flags = ia->ia6_flags; - ifa_free(ifa); - return in6_control(so, SIOCDIFADDR_IN6, (caddr_t)&ifra, - ifp, td); - } - } } - return EOPNOTSUPP; /* just for safety */ -} - -/* - * Initialize an interface's IPv6 address and routing table entry. - */ -static int -in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia, - struct sockaddr_in6 *sin6, int newhost) -{ - int error = 0, plen, ifacount = 0; - int s = splimp(); - struct ifaddr *ifa; - - /* - * Give the interface a chance to initialize - * if this is its first address, - * and to validate the address if necessary. - */ - IF_ADDR_RLOCK(ifp); - TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - if (ifa->ifa_addr->sa_family != AF_INET6) - continue; - ifacount++; - } - IF_ADDR_RUNLOCK(ifp); - - ia->ia_addr = *sin6; - if (ifacount <= 1 && ifp->if_ioctl) { error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia); - if (error) { - splx(s); + if (error) return (error); - } } - splx(s); - - ia->ia_ifa.ifa_metric = ifp->if_metric; - /* we could do in(6)_socktrim here, but just omit it at this moment. */ + /* + * If a new destination address is specified, scrub the old one and + * install the new destination. Note that the interface must be + * p2p or loopback. + */ + pdst = &ifra->ifra_dstaddr; + if (pdst->sin6_family == AF_INET6 && + !IN6_ARE_ADDR_EQUAL(&pdst->sin6_addr, &ia->ia_dstaddr.sin6_addr)) { + if ((ia->ia_flags & IFA_ROUTE) != 0 && + (rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST) != 0)) { + nd6log((LOG_ERR, "in6_update_ifa_internal: failed to " + "remove a route to the old destination: %s\n", + ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr))); + /* proceed anyway... */ + } else + ia->ia_flags &= ~IFA_ROUTE; + ia->ia_dstaddr = *pdst; + } /* - * Special case: * If a new destination address is specified for a point-to-point * interface, install a route to the destination as an interface * direct route. @@ -1893,19 +1394,19 @@ in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia, if (!(ia->ia_flags & IFA_ROUTE) && plen == 128 && ia->ia_dstaddr.sin6_family == AF_INET6) { int rtflags = RTF_UP | RTF_HOST; - error = rtinit(&ia->ia_ifa, RTM_ADD, ia->ia_flags | rtflags); - if (error) - return (error); - ia->ia_flags |= IFA_ROUTE; /* * Handle the case for ::1 . */ if (ifp->if_flags & IFF_LOOPBACK) ia->ia_flags |= IFA_RTSELF; + error = rtinit(&ia->ia_ifa, RTM_ADD, ia->ia_flags | rtflags); + if (error) + return (error); + ia->ia_flags |= IFA_ROUTE; } /* - * add a loopback route to self + * add a loopback route to self if not exists */ if (!(ia->ia_flags & IFA_RTSELF) && V_nd6_useloopback) { error = ifa_add_loopback_route((struct ifaddr *)ia, @@ -1914,10 +1415,6 @@ in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia, ia->ia_flags |= IFA_RTSELF; } - /* Add local address to lltable, if necessary (ex. on p2p link). */ - if (newhost) - in6_ifaddloop(&(ia->ia_ifa)); - return (error); } @@ -1949,11 +1446,35 @@ in6ifa_ifpforlinklocal(struct ifnet *ifp, int ignoreflags) /* + * find the internet address corresponding to a given address. + * ifaddr is returned referenced. + */ +struct in6_ifaddr * +in6ifa_ifwithaddr(const struct in6_addr *addr, uint32_t zoneid) +{ + struct rm_priotracker in6_ifa_tracker; + struct in6_ifaddr *ia; + + IN6_IFADDR_RLOCK(&in6_ifa_tracker); + LIST_FOREACH(ia, IN6ADDR_HASH(addr), ia6_hash) { + if (IN6_ARE_ADDR_EQUAL(IA6_IN6(ia), addr)) { + if (zoneid != 0 && + zoneid != ia->ia_addr.sin6_scope_id) + continue; + ifa_ref(&ia->ia_ifa); + break; + } + } + IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); + return (ia); +} + +/* * find the internet address corresponding to a given interface and address. * ifaddr is returned referenced. */ struct in6_ifaddr * -in6ifa_ifpwithaddr(struct ifnet *ifp, struct in6_addr *addr) +in6ifa_ifpwithaddr(struct ifnet *ifp, const struct in6_addr *addr) { struct ifaddr *ifa; @@ -1982,7 +1503,7 @@ in6ifa_llaonifp(struct ifnet *ifp) if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) return (NULL); - if_addr_rlock(ifp); + IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; @@ -1992,7 +1513,7 @@ in6ifa_llaonifp(struct ifnet *ifp) IN6_IS_ADDR_MC_NODELOCAL(&sin6->sin6_addr)) break; } - if_addr_runlock(ifp); + IF_ADDR_RUNLOCK(ifp); return ((struct in6_ifaddr *)ifa); } @@ -2082,20 +1603,21 @@ ip6_sprintf(char *ip6buf, const struct in6_addr *addr) int in6_localaddr(struct in6_addr *in6) { + struct rm_priotracker in6_ifa_tracker; struct in6_ifaddr *ia; if (IN6_IS_ADDR_LOOPBACK(in6) || IN6_IS_ADDR_LINKLOCAL(in6)) return 1; - IN6_IFADDR_RLOCK(); + IN6_IFADDR_RLOCK(&in6_ifa_tracker); TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { if (IN6_ARE_MASKED_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr, &ia->ia_prefixmask.sin6_addr)) { - IN6_IFADDR_RUNLOCK(); + IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); return 1; } } - IN6_IFADDR_RUNLOCK(); + IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); return (0); } @@ -2107,37 +1629,67 @@ in6_localaddr(struct in6_addr *in6) int in6_localip(struct in6_addr *in6) { + struct rm_priotracker in6_ifa_tracker; struct in6_ifaddr *ia; - IN6_IFADDR_RLOCK(); - TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { + IN6_IFADDR_RLOCK(&in6_ifa_tracker); + LIST_FOREACH(ia, IN6ADDR_HASH(in6), ia6_hash) { if (IN6_ARE_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr)) { - IN6_IFADDR_RUNLOCK(); + IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); return (1); } } - IN6_IFADDR_RUNLOCK(); + IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); return (0); } + +/* + * Return 1 if an internet address is configured on an interface. + */ +int +in6_ifhasaddr(struct ifnet *ifp, struct in6_addr *addr) +{ + struct in6_addr in6; + struct ifaddr *ifa; + struct in6_ifaddr *ia6; + in6 = *addr; + if (in6_clearscope(&in6)) + return (0); + in6_setscope(&in6, ifp, NULL); + + IF_ADDR_RLOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + ia6 = (struct in6_ifaddr *)ifa; + if (IN6_ARE_ADDR_EQUAL(&ia6->ia_addr.sin6_addr, &in6)) { + IF_ADDR_RUNLOCK(ifp); + return (1); + } + } + IF_ADDR_RUNLOCK(ifp); + + return (0); +} int in6_is_addr_deprecated(struct sockaddr_in6 *sa6) { + struct rm_priotracker in6_ifa_tracker; struct in6_ifaddr *ia; - IN6_IFADDR_RLOCK(); - TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { - if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, - &sa6->sin6_addr) && - (ia->ia6_flags & IN6_IFF_DEPRECATED) != 0) { - IN6_IFADDR_RUNLOCK(); - return (1); /* true */ + IN6_IFADDR_RLOCK(&in6_ifa_tracker); + LIST_FOREACH(ia, IN6ADDR_HASH(&sa6->sin6_addr), ia6_hash) { + if (IN6_ARE_ADDR_EQUAL(IA6_IN6(ia), &sa6->sin6_addr)) { + if (ia->ia6_flags & IN6_IFF_DEPRECATED) { + IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); + return (1); /* true */ + } + break; } - - /* XXX: do we still have to go thru the rest of the list? */ } - IN6_IFADDR_RUNLOCK(); + IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); return (0); /* false */ } @@ -2222,7 +1774,7 @@ in6_ifawithifp(struct ifnet *ifp, struct in6_addr *dst) { int dst_scope = in6_addrscope(dst), blen = -1, tlen; struct ifaddr *ifa; - struct in6_ifaddr *besta = 0; + struct in6_ifaddr *besta = NULL; struct in6_ifaddr *dep[2]; /* last-resort: deprecated */ dep[0] = dep[1] = NULL; @@ -2347,37 +1899,24 @@ in6if_do_dad(struct ifnet *ifp) if ((ifp->if_flags & IFF_LOOPBACK) != 0) return (0); - if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) + if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) || + (ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD)) return (0); - switch (ifp->if_type) { -#ifdef IFT_DUMMY - case IFT_DUMMY: -#endif - case IFT_FAITH: - /* - * These interfaces do not have the IFF_LOOPBACK flag, - * but loop packets back. We do not have to do DAD on such - * interfaces. We should even omit it, because loop-backed - * NS would confuse the DAD procedure. - */ - return (0); - default: - /* - * Our DAD routine requires the interface up and running. - * However, some interfaces can be up before the RUNNING - * status. Additionaly, users may try to assign addresses - * before the interface becomes up (or running). - * We simply skip DAD in such a case as a work around. - * XXX: we should rather mark "tentative" on such addresses, - * and do DAD after the interface becomes ready. - */ - if (!((ifp->if_flags & IFF_UP) && - (ifp->if_drv_flags & IFF_DRV_RUNNING))) - return (0); + /* + * Our DAD routine requires the interface up and running. + * However, some interfaces can be up before the RUNNING + * status. Additionally, users may try to assign addresses + * before the interface becomes up (or running). + * This function returns EAGAIN in that case. + * The caller should mark "tentative" on the address instead of + * performing DAD immediately. + */ + if (!((ifp->if_flags & IFF_UP) && + (ifp->if_drv_flags & IFF_DRV_RUNNING))) + return (EAGAIN); - return (1); - } + return (1); } /* @@ -2391,7 +1930,7 @@ in6_setmaxmtu(void) struct ifnet *ifp; IFNET_RLOCK_NOSLEEP(); - TAILQ_FOREACH(ifp, &V_ifnet, if_list) { + TAILQ_FOREACH(ifp, &V_ifnet, if_link) { /* this function can be called during ifnet initialization */ if (!ifp->if_afdata[AF_INET6]) continue; @@ -2417,18 +1956,10 @@ in6_if2idlen(struct ifnet *ifp) { switch (ifp->if_type) { case IFT_ETHER: /* RFC2464 */ -#ifdef IFT_PROPVIRTUAL case IFT_PROPVIRTUAL: /* XXX: no RFC. treat it as ether */ -#endif -#ifdef IFT_L2VLAN case IFT_L2VLAN: /* ditto */ -#endif -#ifdef IFT_IEEE80211 case IFT_IEEE80211: /* ditto */ -#endif -#ifdef IFT_MIP - case IFT_MIP: /* ditto */ -#endif + case IFT_BRIDGE: /* bridge(4) only does Ethernet-like links */ case IFT_INFINIBAND: return (64); case IFT_FDDI: /* RFC2467 */ @@ -2468,25 +1999,38 @@ in6_if2idlen(struct ifnet *ifp) struct in6_llentry { struct llentry base; - struct sockaddr_in6 l3_addr6; }; +#define IN6_LLTBL_DEFAULT_HSIZE 32 +#define IN6_LLTBL_HASH(k, h) \ + (((((((k >> 8) ^ k) >> 8) ^ k) >> 8) ^ k) & ((h) - 1)) + /* - * Deletes an address from the address table. - * This function is called by the timer functions - * such as arptimer() and nd6_llinfo_timer(), and - * the caller does the locking. + * Do actual deallocation of @lle. */ static void -in6_lltable_free(struct lltable *llt, struct llentry *lle) +in6_lltable_destroy_lle_unlocked(struct llentry *lle) { - LLE_WUNLOCK(lle); + LLE_LOCK_DESTROY(lle); + LLE_REQ_DESTROY(lle); free(lle, M_LLTABLE); } +/* + * Called by LLE_FREE_LOCKED when number of references + * drops to zero. + */ +static void +in6_lltable_destroy_lle(struct llentry *lle) +{ + + LLE_WUNLOCK(lle); + in6_lltable_destroy_lle_unlocked(lle); +} + static struct llentry * -in6_lltable_new(const struct sockaddr *l3addr, u_int flags) +in6_lltable_new(const struct in6_addr *addr6, u_int flags) { struct in6_llentry *lle; @@ -2494,45 +2038,69 @@ in6_lltable_new(const struct sockaddr *l3addr, u_int flags) if (lle == NULL) /* NB: caller generates msg */ return NULL; - lle->l3_addr6 = *(const struct sockaddr_in6 *)l3addr; + lle->base.r_l3addr.addr6 = *addr6; lle->base.lle_refcnt = 1; - lle->base.lle_free = in6_lltable_free; + lle->base.lle_free = in6_lltable_destroy_lle; LLE_LOCK_INIT(&lle->base); - callout_init_rw(&lle->base.ln_timer_ch, &lle->base.lle_lock, - CALLOUT_RETURNUNLOCKED); + LLE_REQ_INIT(&lle->base); + callout_init(&lle->base.lle_timer, 1); return (&lle->base); } +static int +in6_lltable_match_prefix(const struct sockaddr *saddr, + const struct sockaddr *smask, u_int flags, struct llentry *lle) +{ + const struct in6_addr *addr, *mask, *lle_addr; + + addr = &((const struct sockaddr_in6 *)saddr)->sin6_addr; + mask = &((const struct sockaddr_in6 *)smask)->sin6_addr; + lle_addr = &lle->r_l3addr.addr6; + + if (IN6_ARE_MASKED_ADDR_EQUAL(lle_addr, addr, mask) == 0) + return (0); + + if (lle->la_flags & LLE_IFADDR) { + + /* + * Delete LLE_IFADDR records IFF address & flag matches. + * Note that addr is the interface address within prefix + * being matched. + */ + if (IN6_ARE_ADDR_EQUAL(addr, lle_addr) && + (flags & LLE_STATIC) != 0) + return (1); + return (0); + } + + /* flags & LLE_STATIC means deleting both dynamic and static entries */ + if ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC)) + return (1); + + return (0); +} + static void -in6_lltable_prefix_free(struct lltable *llt, const struct sockaddr *prefix, - const struct sockaddr *mask, u_int flags) +in6_lltable_free_entry(struct lltable *llt, struct llentry *lle) { - const struct sockaddr_in6 *pfx = (const struct sockaddr_in6 *)prefix; - const struct sockaddr_in6 *msk = (const struct sockaddr_in6 *)mask; - struct llentry *lle, *next; - int i; + struct ifnet *ifp; - /* - * (flags & LLE_STATIC) means deleting all entries - * including static ND6 entries. - */ - IF_AFDATA_WLOCK(llt->llt_ifp); - for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) { - LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) { - if (IN6_ARE_MASKED_ADDR_EQUAL( - &satosin6(L3_ADDR(lle))->sin6_addr, - &pfx->sin6_addr, &msk->sin6_addr) && - ((flags & LLE_STATIC) || - !(lle->la_flags & LLE_STATIC))) { - LLE_WLOCK(lle); - if (callout_stop(&lle->la_timer)) - LLE_REMREF(lle); - llentry_free(lle); - } - } + LLE_WLOCK_ASSERT(lle); + KASSERT(llt != NULL, ("lltable is NULL")); + + /* Unlink entry from table */ + if ((lle->la_flags & LLE_LINKED) != 0) { + + ifp = llt->llt_ifp; + IF_AFDATA_WLOCK_ASSERT(ifp); + lltable_unlink_entry(llt, lle); } - IF_AFDATA_WUNLOCK(llt->llt_ifp); + + if (callout_stop(&lle->lle_timer) > 0) + LLE_REMREF(lle); + + llentry_free(lle); } static int @@ -2540,122 +2108,178 @@ in6_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr) { - struct rtentry *rt; + const struct sockaddr_in6 *sin6; + struct nhop6_basic nh6; + struct in6_addr dst; + uint32_t scopeid; + int error; char ip6buf[INET6_ADDRSTRLEN]; KASSERT(l3addr->sa_family == AF_INET6, ("sin_family %d", l3addr->sa_family)); /* Our local addresses are always only installed on the default FIB. */ - /* XXX rtalloc1 should take a const param */ - rt = in6_rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0, - RT_DEFAULT_FIB); - if (rt == NULL || (rt->rt_flags & RTF_GATEWAY) || rt->rt_ifp != ifp) { + + sin6 = (const struct sockaddr_in6 *)l3addr; + in6_splitscope(&sin6->sin6_addr, &dst, &scopeid); + error = fib6_lookup_nh_basic(RT_DEFAULT_FIB, &dst, scopeid, 0, 0, &nh6); + if (error != 0 || (nh6.nh_flags & NHF_GATEWAY) || nh6.nh_ifp != ifp) { struct ifaddr *ifa; /* * Create an ND6 cache for an IPv6 neighbor * that is not covered by our own prefix. */ - /* XXX ifaof_ifpforaddr should take a const param */ - ifa = ifaof_ifpforaddr(__DECONST(struct sockaddr *, l3addr), ifp); + ifa = ifaof_ifpforaddr(l3addr, ifp); if (ifa != NULL) { ifa_free(ifa); - if (rt != NULL) - RTFREE_LOCKED(rt); return 0; } log(LOG_INFO, "IPv6 address: \"%s\" is not on the network\n", - ip6_sprintf(ip6buf, &((const struct sockaddr_in6 *)l3addr)->sin6_addr)); - if (rt != NULL) - RTFREE_LOCKED(rt); + ip6_sprintf(ip6buf, &sin6->sin6_addr)); return EINVAL; } - RTFREE_LOCKED(rt); return 0; } -static struct llentry * -in6_lltable_lookup(struct lltable *llt, u_int flags, - const struct sockaddr *l3addr) +static inline uint32_t +in6_lltable_hash_dst(const struct in6_addr *dst, uint32_t hsize) +{ + + return (IN6_LLTBL_HASH(dst->s6_addr32[3], hsize)); +} + +static uint32_t +in6_lltable_hash(const struct llentry *lle, uint32_t hsize) +{ + + return (in6_lltable_hash_dst(&lle->r_l3addr.addr6, hsize)); +} + +static void +in6_lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa) +{ + struct sockaddr_in6 *sin6; + + sin6 = (struct sockaddr_in6 *)sa; + bzero(sin6, sizeof(*sin6)); + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof(*sin6); + sin6->sin6_addr = lle->r_l3addr.addr6; +} + +static inline struct llentry * +in6_lltable_find_dst(struct lltable *llt, const struct in6_addr *dst) { - const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr; - struct ifnet *ifp = llt->llt_ifp; struct llentry *lle; struct llentries *lleh; - u_int hashkey; - - IF_AFDATA_LOCK_ASSERT(ifp); - KASSERT(l3addr->sa_family == AF_INET6, - ("sin_family %d", l3addr->sa_family)); + u_int hashidx; - hashkey = sin6->sin6_addr.s6_addr32[3]; - lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)]; + hashidx = in6_lltable_hash_dst(dst, llt->llt_hsize); + lleh = &llt->lle_head[hashidx]; LIST_FOREACH(lle, lleh, lle_next) { - struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)L3_ADDR(lle); if (lle->la_flags & LLE_DELETED) continue; - if (bcmp(&sa6->sin6_addr, &sin6->sin6_addr, - sizeof(struct in6_addr)) == 0) + if (IN6_ARE_ADDR_EQUAL(&lle->r_l3addr.addr6, dst)) break; } - if (lle == NULL) { - if (!(flags & LLE_CREATE)) - return (NULL); - IF_AFDATA_WLOCK_ASSERT(ifp); - /* - * A route that covers the given address must have - * been installed 1st because we are doing a resolution, - * verify this. - */ - if (!(flags & LLE_IFADDR) && - in6_lltable_rtcheck(ifp, flags, l3addr) != 0) - return NULL; - - lle = in6_lltable_new(l3addr, flags); - if (lle == NULL) { - log(LOG_INFO, "lla_lookup: new lle malloc failed\n"); - return NULL; - } - lle->la_flags = flags & ~LLE_CREATE; - if ((flags & (LLE_CREATE | LLE_IFADDR)) == (LLE_CREATE | LLE_IFADDR)) { - bcopy(IF_LLADDR(ifp), &lle->ll_addr, ifp->if_addrlen); - lle->la_flags |= (LLE_VALID | LLE_STATIC); - } + return (lle); +} - lle->lle_tbl = llt; - lle->lle_head = lleh; - lle->la_flags |= LLE_LINKED; - LIST_INSERT_HEAD(lleh, lle, lle_next); - } else if (flags & LLE_DELETE) { - if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) { - LLE_WLOCK(lle); - lle->la_flags |= LLE_DELETED; +static void +in6_lltable_delete_entry(struct lltable *llt, struct llentry *lle) +{ + + lle->la_flags |= LLE_DELETED; + EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED); #ifdef DIAGNOSTIC - log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle); + log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle); #endif - if ((lle->la_flags & - (LLE_STATIC | LLE_IFADDR)) == LLE_STATIC) - llentry_free(lle); - else - LLE_WUNLOCK(lle); - } - lle = (void *)-1; + llentry_free(lle); +} + +static struct llentry * +in6_lltable_alloc(struct lltable *llt, u_int flags, + const struct sockaddr *l3addr) +{ + const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr; + struct ifnet *ifp = llt->llt_ifp; + struct llentry *lle; + char linkhdr[LLE_MAX_LINKHDR]; + size_t linkhdrsize; + int lladdr_off; + + KASSERT(l3addr->sa_family == AF_INET6, + ("sin_family %d", l3addr->sa_family)); + + /* + * A route that covers the given address must have + * been installed 1st because we are doing a resolution, + * verify this. + */ + if (!(flags & LLE_IFADDR) && + in6_lltable_rtcheck(ifp, flags, l3addr) != 0) + return (NULL); + + lle = in6_lltable_new(&sin6->sin6_addr, flags); + if (lle == NULL) { + log(LOG_INFO, "lla_lookup: new lle malloc failed\n"); + return (NULL); } - if (LLE_IS_VALID(lle)) { - if (flags & LLE_EXCLUSIVE) - LLE_WLOCK(lle); - else - LLE_RLOCK(lle); + lle->la_flags = flags; + if ((flags & LLE_IFADDR) == LLE_IFADDR) { + linkhdrsize = LLE_MAX_LINKHDR; + if (lltable_calc_llheader(ifp, AF_INET6, IF_LLADDR(ifp), + linkhdr, &linkhdrsize, &lladdr_off) != 0) { + in6_lltable_destroy_lle_unlocked(lle); + return (NULL); + } + lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize, + lladdr_off); + lle->la_flags |= LLE_STATIC; } + + if ((lle->la_flags & LLE_STATIC) != 0) + lle->ln_state = ND6_LLINFO_REACHABLE; + + return (lle); +} + +static struct llentry * +in6_lltable_lookup(struct lltable *llt, u_int flags, + const struct sockaddr *l3addr) +{ + const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr; + struct llentry *lle; + + IF_AFDATA_LOCK_ASSERT(llt->llt_ifp); + KASSERT(l3addr->sa_family == AF_INET6, + ("sin_family %d", l3addr->sa_family)); + + lle = in6_lltable_find_dst(llt, &sin6->sin6_addr); + + if (lle == NULL) + return (NULL); + + KASSERT((flags & (LLE_UNLOCKED|LLE_EXCLUSIVE)) != + (LLE_UNLOCKED|LLE_EXCLUSIVE),("wrong lle request flags: 0x%X", + flags)); + + if (flags & LLE_UNLOCKED) + return (lle); + + if (flags & LLE_EXCLUSIVE) + LLE_WLOCK(lle); + else + LLE_RLOCK(lle); return (lle); } static int -in6_lltable_dump(struct lltable *llt, struct sysctl_req *wr) +in6_lltable_dump_entry(struct lltable *llt, struct llentry *lle, + struct sysctl_req *wr) { struct ifnet *ifp = llt->llt_ifp; - struct llentry *lle; /* XXX stack use */ struct { struct rt_msghdr rtm; @@ -2668,39 +2292,32 @@ in6_lltable_dump(struct lltable *llt, struct sysctl_req *wr) #endif struct sockaddr_dl sdl; } ndpc; - int i, error; - - if (ifp->if_flags & IFF_LOOPBACK) - return 0; - - LLTABLE_LOCK_ASSERT(); - - error = 0; - for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) { - LIST_FOREACH(lle, &llt->lle_head[i], lle_next) { - struct sockaddr_dl *sdl; + struct sockaddr_dl *sdl; + int error; - /* skip deleted or invalid entries */ - if ((lle->la_flags & (LLE_DELETED|LLE_VALID)) != LLE_VALID) - continue; + bzero(&ndpc, sizeof(ndpc)); + /* skip deleted entries */ + if ((lle->la_flags & LLE_DELETED) == LLE_DELETED) + return (0); /* Skip if jailed and not a valid IP of the prison. */ - if (prison_if(wr->td->td_ucred, L3_ADDR(lle)) != 0) - continue; + lltable_fill_sa_entry(lle, + (struct sockaddr *)&ndpc.sin6); + if (prison_if(wr->td->td_ucred, + (struct sockaddr *)&ndpc.sin6) != 0) + return (0); /* * produce a msg made of: * struct rt_msghdr; * struct sockaddr_in6 (IPv6) * struct sockaddr_dl; */ - bzero(&ndpc, sizeof(ndpc)); ndpc.rtm.rtm_msglen = sizeof(ndpc); ndpc.rtm.rtm_version = RTM_VERSION; ndpc.rtm.rtm_type = RTM_GET; ndpc.rtm.rtm_flags = RTF_UP; ndpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY; - ndpc.sin6.sin6_family = AF_INET6; - ndpc.sin6.sin6_len = sizeof(ndpc.sin6); - bcopy(L3_ADDR(lle), &ndpc.sin6, L3_ADDR_LEN(lle)); + if (V_deembed_scopeid) + sa6_recoverscope(&ndpc.sin6); /* publish */ if (lle->la_flags & LLE_PUB) @@ -2709,22 +2326,56 @@ in6_lltable_dump(struct lltable *llt, struct sysctl_req *wr) sdl = &ndpc.sdl; sdl->sdl_family = AF_LINK; sdl->sdl_len = sizeof(*sdl); - sdl->sdl_alen = ifp->if_addrlen; sdl->sdl_index = ifp->if_index; sdl->sdl_type = ifp->if_type; - bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen); - ndpc.rtm.rtm_rmx.rmx_expire = - lle->la_flags & LLE_STATIC ? 0 : lle->la_expire; + if ((lle->la_flags & LLE_VALID) == LLE_VALID) { + sdl->sdl_alen = ifp->if_addrlen; + bcopy(lle->ll_addr, LLADDR(sdl), + ifp->if_addrlen); + } else { + sdl->sdl_alen = 0; + bzero(LLADDR(sdl), ifp->if_addrlen); + } + if (lle->la_expire != 0) + ndpc.rtm.rtm_rmx.rmx_expire = lle->la_expire + + lle->lle_remtime / hz + + time_second - time_uptime; ndpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA); if (lle->la_flags & LLE_STATIC) ndpc.rtm.rtm_flags |= RTF_STATIC; + if (lle->la_flags & LLE_IFADDR) + ndpc.rtm.rtm_flags |= RTF_PINNED; + if (lle->ln_router != 0) + ndpc.rtm.rtm_flags |= RTF_GATEWAY; + ndpc.rtm.rtm_rmx.rmx_pksent = lle->la_asked; + /* Store state in rmx_weight value */ + ndpc.rtm.rtm_rmx.rmx_state = lle->ln_state; ndpc.rtm.rtm_index = ifp->if_index; error = SYSCTL_OUT(wr, &ndpc, sizeof(ndpc)); - if (error) - break; - } - } - return error; + + return (error); +} + +static struct lltable * +in6_lltattach(struct ifnet *ifp) +{ + struct lltable *llt; + + llt = lltable_allocate_htbl(IN6_LLTBL_DEFAULT_HSIZE); + llt->llt_af = AF_INET6; + llt->llt_ifp = ifp; + + llt->llt_lookup = in6_lltable_lookup; + llt->llt_alloc_entry = in6_lltable_alloc; + llt->llt_delete_entry = in6_lltable_delete_entry; + llt->llt_dump_entry = in6_lltable_dump_entry; + llt->llt_hash = in6_lltable_hash; + llt->llt_fill_sa_entry = in6_lltable_fill_sa_entry; + llt->llt_free_entry = in6_lltable_free_entry; + llt->llt_match_prefix = in6_lltable_match_prefix; + lltable_link(llt); + + return (llt); } void * @@ -2732,32 +2383,45 @@ in6_domifattach(struct ifnet *ifp) { struct in6_ifextra *ext; + /* There are not IPv6-capable interfaces. */ + switch (ifp->if_type) { + case IFT_PFLOG: + case IFT_PFSYNC: + case IFT_USB: + return (NULL); + } ext = (struct in6_ifextra *)malloc(sizeof(*ext), M_IFADDR, M_WAITOK); bzero(ext, sizeof(*ext)); - ext->in6_ifstat = (struct in6_ifstat *)malloc(sizeof(struct in6_ifstat), - M_IFADDR, M_WAITOK); - bzero(ext->in6_ifstat, sizeof(*ext->in6_ifstat)); + ext->in6_ifstat = malloc(sizeof(counter_u64_t) * + sizeof(struct in6_ifstat) / sizeof(uint64_t), M_IFADDR, M_WAITOK); + COUNTER_ARRAY_ALLOC(ext->in6_ifstat, + sizeof(struct in6_ifstat) / sizeof(uint64_t), M_WAITOK); - ext->icmp6_ifstat = - (struct icmp6_ifstat *)malloc(sizeof(struct icmp6_ifstat), - M_IFADDR, M_WAITOK); - bzero(ext->icmp6_ifstat, sizeof(*ext->icmp6_ifstat)); + ext->icmp6_ifstat = malloc(sizeof(counter_u64_t) * + sizeof(struct icmp6_ifstat) / sizeof(uint64_t), M_IFADDR, + M_WAITOK); + COUNTER_ARRAY_ALLOC(ext->icmp6_ifstat, + sizeof(struct icmp6_ifstat) / sizeof(uint64_t), M_WAITOK); ext->nd_ifinfo = nd6_ifattach(ifp); ext->scope6_id = scope6_ifattach(ifp); - ext->lltable = lltable_init(ifp, AF_INET6); - if (ext->lltable != NULL) { - ext->lltable->llt_prefix_free = in6_lltable_prefix_free; - ext->lltable->llt_lookup = in6_lltable_lookup; - ext->lltable->llt_dump = in6_lltable_dump; - } + ext->lltable = in6_lltattach(ifp); ext->mld_ifinfo = mld_domifattach(ifp); return ext; } +int +in6_domifmtu(struct ifnet *ifp) +{ + if (ifp->if_afdata[AF_INET6] == NULL) + return ifp->if_mtu; + + return (IN6_LINKMTU(ifp)); +} + void in6_domifdetach(struct ifnet *ifp, void *aux) { @@ -2765,9 +2429,13 @@ in6_domifdetach(struct ifnet *ifp, void *aux) mld_domifdetach(ifp); scope6_ifdetach(ext->scope6_id); - nd6_ifdetach(ext->nd_ifinfo); + nd6_ifdetach(ifp, ext->nd_ifinfo); lltable_free(ext->lltable); + COUNTER_ARRAY_FREE(ext->in6_ifstat, + sizeof(struct in6_ifstat) / sizeof(uint64_t)); free(ext->in6_ifstat, M_IFADDR); + COUNTER_ARRAY_FREE(ext->icmp6_ifstat, + sizeof(struct icmp6_ifstat) / sizeof(uint64_t)); free(ext->icmp6_ifstat, M_IFADDR); free(ext, M_IFADDR); } diff --git a/freebsd/sys/netinet6/in6.h b/freebsd/sys/netinet6/in6.h index 616f1009..62c5e0b0 100644 --- a/freebsd/sys/netinet6/in6.h +++ b/freebsd/sys/netinet6/in6.h @@ -361,11 +361,11 @@ extern const struct in6_addr in6addr_linklocal_allv2routers; #define IFA6_IS_DEPRECATED(a) \ ((a)->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME && \ - (u_int32_t)((time_second - (a)->ia6_updatetime)) > \ + (u_int32_t)((time_uptime - (a)->ia6_updatetime)) > \ (a)->ia6_lifetime.ia6t_pltime) #define IFA6_IS_INVALID(a) \ ((a)->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME && \ - (u_int32_t)((time_second - (a)->ia6_updatetime)) > \ + (u_int32_t)((time_uptime - (a)->ia6_updatetime)) > \ (a)->ia6_lifetime.ia6t_vltime) #endif /* _KERNEL */ @@ -376,12 +376,24 @@ extern const struct in6_addr in6addr_linklocal_allv2routers; struct route_in6 { struct rtentry *ro_rt; struct llentry *ro_lle; - struct in6_addr *ro_ia6; - int ro_flags; + /* + * ro_prepend and ro_plen are only used for bpf to pass in a + * preformed header. They are not cacheable. + */ + char *ro_prepend; + uint16_t ro_plen; + uint16_t ro_flags; + uint16_t ro_mtu; /* saved ro_rt mtu */ + uint16_t spare; struct sockaddr_in6 ro_dst; }; #endif +#ifdef _KERNEL +#define MTAG_ABI_IPV6 1444287380 /* IPv6 ABI */ +#define IPV6_TAG_DIRECT 0 /* direct-dispatch IPv6 */ +#endif /* _KERNEL */ + /* * Options for use with [gs]etsockopt at the IPV6 level. * First word of comment is data type; bool is stored in int. @@ -424,8 +436,7 @@ struct route_in6 { #define IPV6_IPSEC_POLICY 28 /* struct; get/set security policy */ #endif /* IPSEC */ -#define IPV6_FAITH 29 /* bool; accept FAITH'ed connections */ - + /* 29; unused; was IPV6_FAITH */ #if 1 /* IPV6FIREWALL */ #define IPV6_FW_ADD 30 /* add a firewall rule to chain */ #define IPV6_FW_DEL 31 /* delete a firewall rule from chain */ @@ -481,6 +492,14 @@ struct route_in6 { #define IPV6_BINDANY 64 /* bool: allow bind to any address */ +#define IPV6_BINDMULTI 65 /* bool; allow multibind to same addr/port */ +#define IPV6_RSS_LISTEN_BUCKET 66 /* int; set RSS listen bucket */ +#define IPV6_FLOWID 67 /* int; flowid of given socket */ +#define IPV6_FLOWTYPE 68 /* int; flowtype of given socket */ +#define IPV6_RSSBUCKETID 69 /* int; RSS bucket ID of given socket */ +#define IPV6_RECVFLOWID 70 /* bool; receive IP6 flowid/flowtype w/ datagram */ +#define IPV6_RECVRSSBUCKETID 71 /* bool; receive IP6 RSS bucket id w/ datagram */ + /* * The following option is private; do not use it from user applications. * It is deliberately defined to the same value as IP_MSFILTER. @@ -574,7 +593,7 @@ struct ip6_mtuinfo { #define IPV6CTL_SOURCECHECK 10 /* verify source route and intf */ #define IPV6CTL_SOURCECHECK_LOGINT 11 /* minimume logging interval */ #define IPV6CTL_ACCEPT_RTADV 12 -#define IPV6CTL_KEEPFAITH 13 + /* 13; unused; was: IPV6CTL_KEEPFAITH */ #define IPV6CTL_LOG_INTERVAL 14 #define IPV6CTL_HDRNESTLIMIT 15 #define IPV6CTL_DAD_COUNT 16 @@ -588,9 +607,9 @@ struct ip6_mtuinfo { #define IPV6CTL_MAPPED_ADDR 23 #endif #define IPV6CTL_V6ONLY 24 -#define IPV6CTL_RTEXPIRE 25 /* cloned route expiration time */ -#define IPV6CTL_RTMINEXPIRE 26 /* min value for expiration time */ -#define IPV6CTL_RTMAXCACHE 27 /* trigger level for dynamic expire */ +/* IPV6CTL_RTEXPIRE 25 deprecated */ +/* IPV6CTL_RTMINEXPIRE 26 deprecated */ +/* IPV6CTL_RTMAXCACHE 27 deprecated */ #define IPV6CTL_USETEMPADDR 32 /* use temporary addresses (RFC3041) */ #define IPV6CTL_TEMPPLTIME 33 /* preferred lifetime for tmpaddrs */ @@ -618,17 +637,25 @@ struct ip6_mtuinfo { * receiving IF. */ #define IPV6CTL_RFC6204W3 50 /* Accept defroute even when forwarding enabled */ -#define IPV6CTL_MAXID 51 +#define IPV6CTL_INTRQMAXLEN 51 /* max length of IPv6 netisr queue */ +#define IPV6CTL_INTRDQMAXLEN 52 /* max length of direct IPv6 netisr + * queue */ +#define IPV6CTL_MAXID 53 #endif /* __BSD_VISIBLE */ /* - * Redefinition of mbuf flags - */ -#define M_AUTHIPHDR M_PROTO2 -#define M_DECRYPTED M_PROTO3 -#define M_LOOP M_PROTO4 -#define M_AUTHIPDGM M_PROTO5 -#define M_RTALERT_MLD M_PROTO6 + * Since both netinet/ and netinet6/ call into netipsec/ and netpfil/, + * the protocol specific mbuf flags are shared between them. + */ +#define M_FASTFWD_OURS M_PROTO1 /* changed dst to local */ +#define M_IP6_NEXTHOP M_PROTO2 /* explicit ip nexthop */ +#define M_IP_NEXTHOP M_PROTO2 /* explicit ip nexthop */ +#define M_SKIP_FIREWALL M_PROTO3 /* skip firewall processing */ +#define M_AUTHIPHDR M_PROTO4 +#define M_DECRYPTED M_PROTO5 +#define M_LOOP M_PROTO6 +#define M_AUTHIPDGM M_PROTO7 +#define M_RTALERT_MLD M_PROTO8 #ifdef _KERNEL struct cmsghdr; @@ -636,9 +663,13 @@ struct ip6_hdr; int in6_cksum_pseudo(struct ip6_hdr *, uint32_t, uint8_t, uint16_t); int in6_cksum(struct mbuf *, u_int8_t, u_int32_t, u_int32_t); +int in6_cksum_partial(struct mbuf *, u_int8_t, u_int32_t, u_int32_t, + u_int32_t); int in6_localaddr(struct in6_addr *); int in6_localip(struct in6_addr *); -int in6_addrscope(struct in6_addr *); +int in6_ifhasaddr(struct ifnet *, struct in6_addr *); +int in6_addrscope(const struct in6_addr *); +char *ip6_sprintf(char *, const struct in6_addr *); struct in6_ifaddr *in6_ifawithifp(struct ifnet *, struct in6_addr *); extern void in6_if_up(struct ifnet *); struct sockaddr; @@ -656,7 +687,6 @@ extern void addrsel_policy_init(void); #define sin6tosa(sin6) ((struct sockaddr *)(sin6)) #define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa)) -extern int (*faithprefix_p)(struct in6_addr *); #endif /* _KERNEL */ #ifndef _SIZE_T_DECLARED diff --git a/freebsd/sys/netinet6/in6_cksum.c b/freebsd/sys/netinet6/in6_cksum.c index e129ca71..6eebdadc 100644 --- a/freebsd/sys/netinet6/in6_cksum.c +++ b/freebsd/sys/netinet6/in6_cksum.c @@ -147,9 +147,11 @@ in6_cksum_pseudo(struct ip6_hdr *ip6, uint32_t len, uint8_t nxt, uint16_t csum) * off is an offset where TCP/UDP/ICMP6 header starts. * len is a total length of a transport segment. * (e.g. TCP header + TCP payload) + * cov is the number of bytes to be taken into account for the checksum */ int -in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len) +in6_cksum_partial(struct mbuf *m, u_int8_t nxt, u_int32_t off, + u_int32_t len, u_int32_t cov) { struct ip6_hdr *ip6; u_int16_t *w, scope; @@ -217,9 +219,9 @@ in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len) } w = (u_int16_t *)(mtod(m, u_char *) + off); mlen = m->m_len - off; - if (len < mlen) - mlen = len; - len -= mlen; + if (cov < mlen) + mlen = cov; + cov -= mlen; /* * Force to even boundary. */ @@ -275,7 +277,7 @@ in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len) * Lastly calculate a summary of the rest of mbufs. */ - for (;m && len; m = m->m_next) { + for (;m && cov; m = m->m_next) { if (m->m_len == 0) continue; w = mtod(m, u_int16_t *); @@ -292,12 +294,12 @@ in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len) sum += s_util.s; w = (u_int16_t *)((char *)w + 1); mlen = m->m_len - 1; - len--; + cov--; } else mlen = m->m_len; - if (len < mlen) - mlen = len; - len -= mlen; + if (cov < mlen) + mlen = cov; + cov -= mlen; /* * Force to even boundary. */ @@ -345,7 +347,7 @@ in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len) } else if (mlen == -1) s_util.c[0] = *(char *)w; } - if (len) + if (cov) panic("in6_cksum: out of data"); if (mlen == -1) { /* The last mbuf has odd # of bytes. Follow the @@ -357,3 +359,9 @@ in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len) REDUCE; return (~sum & 0xffff); } + +int +in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len) +{ + return (in6_cksum_partial(m, nxt, off, len, len)); +} diff --git a/freebsd/sys/netinet6/in6_fib.c b/freebsd/sys/netinet6/in6_fib.c new file mode 100644 index 00000000..824db1fc --- /dev/null +++ b/freebsd/sys/netinet6/in6_fib.c @@ -0,0 +1,278 @@ +#include <machine/rtems-bsd-kernel-space.h> + +/*- + * Copyright (c) 2015 + * Alexander V. Chernikov <melifaro@FreeBSD.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <rtems/bsd/local/opt_inet.h> +#include <rtems/bsd/local/opt_inet6.h> +#include <rtems/bsd/local/opt_route.h> +#include <rtems/bsd/local/opt_mpath.h> + +#include <rtems/bsd/sys/param.h> +#include <sys/systm.h> +#include <rtems/bsd/sys/lock.h> +#include <sys/rwlock.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/sysctl.h> +#include <sys/kernel.h> + +#include <net/if.h> +#include <net/if_var.h> +#include <net/if_dl.h> +#include <net/route.h> +#include <net/route_var.h> +#include <net/vnet.h> + +#ifdef RADIX_MPATH +#include <net/radix_mpath.h> +#endif + +#include <netinet/in.h> +#include <netinet/in_var.h> +#include <netinet/ip_mroute.h> +#include <netinet/ip6.h> +#include <netinet6/in6_fib.h> +#include <netinet6/in6_var.h> +#include <netinet6/nd6.h> +#include <netinet6/scope6_var.h> + +#include <net/if_types.h> + +#ifdef INET6 +static void fib6_rte_to_nh_extended(struct rtentry *rte, + const struct in6_addr *dst, uint32_t flags, struct nhop6_extended *pnh6); +static void fib6_rte_to_nh_basic(struct rtentry *rte, const struct in6_addr *dst, + uint32_t flags, struct nhop6_basic *pnh6); +static struct ifnet *fib6_get_ifaifp(struct rtentry *rte); +#define RNTORT(p) ((struct rtentry *)(p)) + +/* + * Gets real interface for the @rte. + * Returns rt_ifp for !IFF_LOOPBACK routers. + * Extracts "real" address interface from interface address + * loopback routes. + */ +static struct ifnet * +fib6_get_ifaifp(struct rtentry *rte) +{ + struct ifnet *ifp; + struct sockaddr_dl *sdl; + + ifp = rte->rt_ifp; + if ((ifp->if_flags & IFF_LOOPBACK) && + rte->rt_gateway->sa_family == AF_LINK) { + sdl = (struct sockaddr_dl *)rte->rt_gateway; + return (ifnet_byindex(sdl->sdl_index)); + } + + return (ifp); +} + +static void +fib6_rte_to_nh_basic(struct rtentry *rte, const struct in6_addr *dst, + uint32_t flags, struct nhop6_basic *pnh6) +{ + struct sockaddr_in6 *gw; + + /* Do explicit nexthop zero unless we're copying it */ + memset(pnh6, 0, sizeof(*pnh6)); + + if ((flags & NHR_IFAIF) != 0) + pnh6->nh_ifp = fib6_get_ifaifp(rte); + else + pnh6->nh_ifp = rte->rt_ifp; + + pnh6->nh_mtu = min(rte->rt_mtu, IN6_LINKMTU(rte->rt_ifp)); + if (rte->rt_flags & RTF_GATEWAY) { + gw = (struct sockaddr_in6 *)rte->rt_gateway; + pnh6->nh_addr = gw->sin6_addr; + in6_clearscope(&pnh6->nh_addr); + } else + pnh6->nh_addr = *dst; + /* Set flags */ + pnh6->nh_flags = fib_rte_to_nh_flags(rte->rt_flags); + gw = (struct sockaddr_in6 *)rt_key(rte); + if (IN6_IS_ADDR_UNSPECIFIED(&gw->sin6_addr)) + pnh6->nh_flags |= NHF_DEFAULT; +} + +static void +fib6_rte_to_nh_extended(struct rtentry *rte, const struct in6_addr *dst, + uint32_t flags, struct nhop6_extended *pnh6) +{ + struct sockaddr_in6 *gw; + + /* Do explicit nexthop zero unless we're copying it */ + memset(pnh6, 0, sizeof(*pnh6)); + + if ((flags & NHR_IFAIF) != 0) + pnh6->nh_ifp = fib6_get_ifaifp(rte); + else + pnh6->nh_ifp = rte->rt_ifp; + + pnh6->nh_mtu = min(rte->rt_mtu, IN6_LINKMTU(rte->rt_ifp)); + if (rte->rt_flags & RTF_GATEWAY) { + gw = (struct sockaddr_in6 *)rte->rt_gateway; + pnh6->nh_addr = gw->sin6_addr; + in6_clearscope(&pnh6->nh_addr); + } else + pnh6->nh_addr = *dst; + /* Set flags */ + pnh6->nh_flags = fib_rte_to_nh_flags(rte->rt_flags); + gw = (struct sockaddr_in6 *)rt_key(rte); + if (IN6_IS_ADDR_UNSPECIFIED(&gw->sin6_addr)) + pnh6->nh_flags |= NHF_DEFAULT; +} + +/* + * Performs IPv6 route table lookup on @dst. Returns 0 on success. + * Stores basic nexthop info into provided @pnh6 structure. + * Note that + * - nh_ifp represents logical transmit interface (rt_ifp) by default + * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed + * - mtu from logical transmit interface will be returned. + * - nh_ifp cannot be safely dereferenced + * - nh_ifp represents rt_ifp (e.g. if looking up address on + * interface "ix0" pointer to "ix0" interface will be returned instead + * of "lo0") + * - howewer mtu from "transmit" interface will be returned. + * - scope will be embedded in nh_addr + */ +int +fib6_lookup_nh_basic(uint32_t fibnum, const struct in6_addr *dst, uint32_t scopeid, + uint32_t flags, uint32_t flowid, struct nhop6_basic *pnh6) +{ + struct rib_head *rh; + struct radix_node *rn; + struct sockaddr_in6 sin6; + struct rtentry *rte; + + KASSERT((fibnum < rt_numfibs), ("fib6_lookup_nh_basic: bad fibnum")); + rh = rt_tables_get_rnh(fibnum, AF_INET6); + if (rh == NULL) + return (ENOENT); + + /* Prepare lookup key */ + memset(&sin6, 0, sizeof(sin6)); + sin6.sin6_addr = *dst; + sin6.sin6_len = sizeof(struct sockaddr_in6); + /* Assume scopeid is valid and embed it directly */ + if (IN6_IS_SCOPE_LINKLOCAL(dst)) + sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff); + + RIB_RLOCK(rh); + rn = rh->rnh_matchaddr((void *)&sin6, &rh->head); + if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { + rte = RNTORT(rn); + /* Ensure route & ifp is UP */ + if (RT_LINK_IS_UP(rte->rt_ifp)) { + fib6_rte_to_nh_basic(rte, &sin6.sin6_addr, flags, pnh6); + RIB_RUNLOCK(rh); + return (0); + } + } + RIB_RUNLOCK(rh); + + return (ENOENT); +} + +/* + * Performs IPv6 route table lookup on @dst. Returns 0 on success. + * Stores extended nexthop info into provided @pnh6 structure. + * Note that + * - nh_ifp cannot be safely dereferenced unless NHR_REF is specified. + * - in that case you need to call fib6_free_nh_ext() + * - nh_ifp represents logical transmit interface (rt_ifp) by default + * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed + * - mtu from logical transmit interface will be returned. + * - scope will be embedded in nh_addr + */ +int +fib6_lookup_nh_ext(uint32_t fibnum, const struct in6_addr *dst,uint32_t scopeid, + uint32_t flags, uint32_t flowid, struct nhop6_extended *pnh6) +{ + struct rib_head *rh; + struct radix_node *rn; + struct sockaddr_in6 sin6; + struct rtentry *rte; + + KASSERT((fibnum < rt_numfibs), ("fib6_lookup_nh_ext: bad fibnum")); + rh = rt_tables_get_rnh(fibnum, AF_INET6); + if (rh == NULL) + return (ENOENT); + + /* Prepare lookup key */ + memset(&sin6, 0, sizeof(sin6)); + sin6.sin6_len = sizeof(struct sockaddr_in6); + sin6.sin6_addr = *dst; + /* Assume scopeid is valid and embed it directly */ + if (IN6_IS_SCOPE_LINKLOCAL(dst)) + sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff); + + RIB_RLOCK(rh); + rn = rh->rnh_matchaddr((void *)&sin6, &rh->head); + if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { + rte = RNTORT(rn); +#ifdef RADIX_MPATH + rte = rt_mpath_select(rte, flowid); + if (rte == NULL) { + RIB_RUNLOCK(rh); + return (ENOENT); + } +#endif + /* Ensure route & ifp is UP */ + if (RT_LINK_IS_UP(rte->rt_ifp)) { + fib6_rte_to_nh_extended(rte, &sin6.sin6_addr, flags, + pnh6); + if ((flags & NHR_REF) != 0) { + /* TODO: Do lwref on egress ifp's */ + } + RIB_RUNLOCK(rh); + + return (0); + } + } + RIB_RUNLOCK(rh); + + return (ENOENT); +} + +void +fib6_free_nh_ext(uint32_t fibnum, struct nhop6_extended *pnh6) +{ + +} + +#endif + diff --git a/freebsd/sys/netinet6/in6_fib.h b/freebsd/sys/netinet6/in6_fib.h new file mode 100644 index 00000000..3d58cd22 --- /dev/null +++ b/freebsd/sys/netinet6/in6_fib.h @@ -0,0 +1,61 @@ +/*- + * Copyright (c) 2015 + * Alexander V. Chernikov <melifaro@FreeBSD.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NETINET6_IN6_FIB_H_ +#define _NETINET6_IN6_FIB_H_ + +/* Basic nexthop info used for uRPF/mtu checks */ +struct nhop6_basic { + struct ifnet *nh_ifp; /* Logical egress interface */ + uint16_t nh_mtu; /* nexthop mtu */ + uint16_t nh_flags; /* nhop flags */ + uint8_t spare[4]; + struct in6_addr nh_addr; /* GW/DST IPv4 address */ +}; + +/* Does not differ from nhop6_basic */ +struct nhop6_extended { + struct ifnet *nh_ifp; /* Logical egress interface */ + uint16_t nh_mtu; /* nexthop mtu */ + uint16_t nh_flags; /* nhop flags */ + uint8_t spare[4]; + struct in6_addr nh_addr; /* GW/DST IPv6 address */ + uint64_t spare2[2]; +}; + +int fib6_lookup_nh_basic(uint32_t fibnum, const struct in6_addr *dst, + uint32_t scopeid, uint32_t flags, uint32_t flowid,struct nhop6_basic *pnh6); +int fib6_lookup_nh_ext(uint32_t fibnum, const struct in6_addr *dst, + uint32_t scopeid, uint32_t flags, uint32_t flowid, + struct nhop6_extended *pnh6); +void fib6_free_nh_ext(uint32_t fibnum, struct nhop6_extended *pnh6); +#endif + diff --git a/freebsd/sys/netinet6/in6_gif.c b/freebsd/sys/netinet6/in6_gif.c index 9e0f37f0..6e1fb8b1 100644 --- a/freebsd/sys/netinet6/in6_gif.c +++ b/freebsd/sys/netinet6/in6_gif.c @@ -38,6 +38,8 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/local/opt_inet6.h> #include <rtems/bsd/sys/param.h> +#include <rtems/bsd/sys/lock.h> +#include <sys/rmlock.h> #include <sys/systm.h> #include <sys/socket.h> #include <sys/sockio.h> @@ -51,7 +53,9 @@ __FBSDID("$FreeBSD$"); #include <sys/malloc.h> #include <net/if.h> +#include <net/if_var.h> #include <net/route.h> +#include <net/vnet.h> #include <netinet/in.h> #include <netinet/in_systm.h> @@ -62,29 +66,28 @@ __FBSDID("$FreeBSD$"); #ifdef INET6 #include <netinet/ip6.h> #include <netinet6/ip6_var.h> -#include <netinet6/in6_gif.h> #include <netinet6/in6_var.h> #endif -#include <netinet6/ip6protosw.h> #include <netinet/ip_ecn.h> #ifdef INET6 #include <netinet6/ip6_ecn.h> +#include <netinet6/in6_fib.h> #endif #include <net/if_gif.h> -VNET_DEFINE(int, ip6_gif_hlim) = GIF_HLIM; +#define GIF_HLIM 30 +static VNET_DEFINE(int, ip6_gif_hlim) = GIF_HLIM; #define V_ip6_gif_hlim VNET(ip6_gif_hlim) SYSCTL_DECL(_net_inet6_ip6); -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_GIF_HLIM, gifhlim, CTLFLAG_RW, +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_GIF_HLIM, gifhlim, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_gif_hlim), 0, ""); -static int gif_validate6(const struct ip6_hdr *, struct gif_softc *, - struct ifnet *); +static int in6_gif_input(struct mbuf **, int *, int); extern struct domain inet6domain; -struct ip6protosw in6_gif_protosw = { +static struct protosw in6_gif_protosw = { .pr_type = SOCK_RAW, .pr_domain = &inet6domain, .pr_protocol = 0, /* IPPROTO_IPV[46] */ @@ -96,112 +99,24 @@ struct ip6protosw in6_gif_protosw = { }; int -in6_gif_output(struct ifnet *ifp, - int family, /* family of the packet to be encapsulate */ - struct mbuf *m) +in6_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn) { + GIF_RLOCK_TRACKER; struct gif_softc *sc = ifp->if_softc; - struct sockaddr_in6 *dst = (struct sockaddr_in6 *)&sc->gif_ro6.ro_dst; - struct sockaddr_in6 *sin6_src = (struct sockaddr_in6 *)sc->gif_psrc; - struct sockaddr_in6 *sin6_dst = (struct sockaddr_in6 *)sc->gif_pdst; struct ip6_hdr *ip6; - struct etherip_header eiphdr; - int error, len, proto; - u_int8_t itos, otos; - - GIF_LOCK_ASSERT(sc); - - if (sin6_src == NULL || sin6_dst == NULL || - sin6_src->sin6_family != AF_INET6 || - sin6_dst->sin6_family != AF_INET6) { - m_freem(m); - return EAFNOSUPPORT; - } - - switch (family) { -#ifdef INET - case AF_INET: - { - struct ip *ip; - - proto = IPPROTO_IPV4; - if (m->m_len < sizeof(*ip)) { - m = m_pullup(m, sizeof(*ip)); - if (!m) - return ENOBUFS; - } - ip = mtod(m, struct ip *); - itos = ip->ip_tos; - break; - } -#endif -#ifdef INET6 - case AF_INET6: - { - struct ip6_hdr *ip6; - proto = IPPROTO_IPV6; - if (m->m_len < sizeof(*ip6)) { - m = m_pullup(m, sizeof(*ip6)); - if (!m) - return ENOBUFS; - } - ip6 = mtod(m, struct ip6_hdr *); - itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; - break; - } -#endif - case AF_LINK: - proto = IPPROTO_ETHERIP; - - /* - * GIF_SEND_REVETHIP (disabled by default) intentionally - * sends an EtherIP packet with revered version field in - * the header. This is a knob for backward compatibility - * with FreeBSD 7.2R or prior. - */ - if ((sc->gif_options & GIF_SEND_REVETHIP)) { - eiphdr.eip_ver = 0; - eiphdr.eip_resvl = ETHERIP_VERSION; - eiphdr.eip_resvh = 0; - } else { - eiphdr.eip_ver = ETHERIP_VERSION; - eiphdr.eip_resvl = 0; - eiphdr.eip_resvh = 0; - } - /* prepend Ethernet-in-IP header */ - M_PREPEND(m, sizeof(struct etherip_header), M_DONTWAIT); - if (m && m->m_len < sizeof(struct etherip_header)) - m = m_pullup(m, sizeof(struct etherip_header)); - if (m == NULL) - return ENOBUFS; - bcopy(&eiphdr, mtod(m, struct etherip_header *), - sizeof(struct etherip_header)); - break; - - default: -#ifdef DEBUG - printf("in6_gif_output: warning: unknown family %d passed\n", - family); -#endif - m_freem(m); - return EAFNOSUPPORT; - } + int len; /* prepend new IP header */ len = sizeof(struct ip6_hdr); #ifndef __NO_STRICT_ALIGNMENT - if (family == AF_LINK) + if (proto == IPPROTO_ETHERIP) len += ETHERIP_ALIGN; #endif - M_PREPEND(m, len, M_DONTWAIT); - if (m != NULL && m->m_len < len) - m = m_pullup(m, len); - if (m == NULL) { - printf("ENOBUFS in in6_gif_output %d\n", __LINE__); - return ENOBUFS; - } + M_PREPEND(m, len, M_NOWAIT); + if (m == NULL) + return (ENOBUFS); #ifndef __NO_STRICT_ALIGNMENT - if (family == AF_LINK) { + if (proto == IPPROTO_ETHERIP) { len = mtod(m, vm_offset_t) & 3; KASSERT(len == 0 || len == ETHERIP_ALIGN, ("in6_gif_output: unexpected misalignment")); @@ -211,261 +126,107 @@ in6_gif_output(struct ifnet *ifp, #endif ip6 = mtod(m, struct ip6_hdr *); - ip6->ip6_flow = 0; - ip6->ip6_vfc &= ~IPV6_VERSION_MASK; - ip6->ip6_vfc |= IPV6_VERSION; - ip6->ip6_plen = htons((u_short)m->m_pkthdr.len); - ip6->ip6_nxt = proto; - ip6->ip6_hlim = V_ip6_gif_hlim; - ip6->ip6_src = sin6_src->sin6_addr; - /* bidirectional configured tunnel mode */ - if (!IN6_IS_ADDR_UNSPECIFIED(&sin6_dst->sin6_addr)) - ip6->ip6_dst = sin6_dst->sin6_addr; - else { + GIF_RLOCK(sc); + if (sc->gif_family != AF_INET6) { m_freem(m); - return ENETUNREACH; + GIF_RUNLOCK(sc); + return (ENETDOWN); } - ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED : ECN_NOCARE, - &otos, &itos); - ip6->ip6_flow &= ~htonl(0xff << 20); - ip6->ip6_flow |= htonl((u_int32_t)otos << 20); + bcopy(sc->gif_ip6hdr, ip6, sizeof(struct ip6_hdr)); + GIF_RUNLOCK(sc); - M_SETFIB(m, sc->gif_fibnum); - - if (dst->sin6_family != sin6_dst->sin6_family || - !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &sin6_dst->sin6_addr)) { - /* cache route doesn't match */ - bzero(dst, sizeof(*dst)); - dst->sin6_family = sin6_dst->sin6_family; - dst->sin6_len = sizeof(struct sockaddr_in6); - dst->sin6_addr = sin6_dst->sin6_addr; - if (sc->gif_ro6.ro_rt) { - RTFREE(sc->gif_ro6.ro_rt); - sc->gif_ro6.ro_rt = NULL; - } -#if 0 - GIF2IFP(sc)->if_mtu = GIF_MTU; -#endif - } - - if (sc->gif_ro6.ro_rt == NULL) { - in6_rtalloc(&sc->gif_ro6, sc->gif_fibnum); - if (sc->gif_ro6.ro_rt == NULL) { - m_freem(m); - return ENETUNREACH; - } - - /* if it constitutes infinite encapsulation, punt. */ - if (sc->gif_ro.ro_rt->rt_ifp == ifp) { - m_freem(m); - return ENETUNREACH; /*XXX*/ - } -#if 0 - ifp->if_mtu = sc->gif_ro6.ro_rt->rt_ifp->if_mtu - - sizeof(struct ip6_hdr); -#endif - } - - m_addr_changed(m); - -#ifdef IPV6_MINMTU + ip6->ip6_flow |= htonl((uint32_t)ecn << 20); + ip6->ip6_nxt = proto; + ip6->ip6_hlim = V_ip6_gif_hlim; /* * force fragmentation to minimum MTU, to avoid path MTU discovery. * it is too painful to ask for resend of inner packet, to achieve * path MTU discovery for encapsulated packets. */ - error = ip6_output(m, 0, &sc->gif_ro6, IPV6_MINMTU, 0, NULL, NULL); -#else - error = ip6_output(m, 0, &sc->gif_ro6, 0, 0, NULL, NULL); -#endif - - if (!(GIF2IFP(sc)->if_flags & IFF_LINK0) && - sc->gif_ro6.ro_rt != NULL) { - RTFREE(sc->gif_ro6.ro_rt); - sc->gif_ro6.ro_rt = NULL; - } - - return (error); + return (ip6_output(m, 0, NULL, IPV6_MINMTU, 0, NULL, NULL)); } -int +static int in6_gif_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp; - struct ifnet *gifp = NULL; + struct ifnet *gifp; struct gif_softc *sc; struct ip6_hdr *ip6; - int af = 0; - u_int32_t otos; + uint8_t ecn; - ip6 = mtod(m, struct ip6_hdr *); - - sc = (struct gif_softc *)encap_getarg(m); + sc = encap_getarg(m); if (sc == NULL) { m_freem(m); IP6STAT_INC(ip6s_nogif); - return IPPROTO_DONE; + return (IPPROTO_DONE); } - gifp = GIF2IFP(sc); - if (gifp == NULL || (gifp->if_flags & IFF_UP) == 0) { - m_freem(m); - IP6STAT_INC(ip6s_nogif); - return IPPROTO_DONE; - } - - otos = ip6->ip6_flow; - m_adj(m, *offp); - - switch (proto) { -#ifdef INET - case IPPROTO_IPV4: - { - struct ip *ip; - u_int8_t otos8; - af = AF_INET; - otos8 = (ntohl(otos) >> 20) & 0xff; - if (m->m_len < sizeof(*ip)) { - m = m_pullup(m, sizeof(*ip)); - if (!m) - return IPPROTO_DONE; - } - ip = mtod(m, struct ip *); - if (ip_ecn_egress((gifp->if_flags & IFF_LINK1) ? - ECN_ALLOWED : ECN_NOCARE, - &otos8, &ip->ip_tos) == 0) { - m_freem(m); - return IPPROTO_DONE; - } - break; - } -#endif /* INET */ -#ifdef INET6 - case IPPROTO_IPV6: - { - struct ip6_hdr *ip6; - af = AF_INET6; - if (m->m_len < sizeof(*ip6)) { - m = m_pullup(m, sizeof(*ip6)); - if (!m) - return IPPROTO_DONE; - } + if ((gifp->if_flags & IFF_UP) != 0) { ip6 = mtod(m, struct ip6_hdr *); - if (ip6_ecn_egress((gifp->if_flags & IFF_LINK1) ? - ECN_ALLOWED : ECN_NOCARE, - &otos, &ip6->ip6_flow) == 0) { - m_freem(m); - return IPPROTO_DONE; - } - break; - } -#endif - case IPPROTO_ETHERIP: - af = AF_LINK; - break; - - default: - IP6STAT_INC(ip6s_nogif); + ecn = (ntohl(ip6->ip6_flow) >> 20) & 0xff; + m_adj(m, *offp); + gif_input(m, gifp, proto, ecn); + } else { m_freem(m); - return IPPROTO_DONE; + IP6STAT_INC(ip6s_nogif); } - - gif_input(m, af, gifp); - return IPPROTO_DONE; + return (IPPROTO_DONE); } /* - * validate outer address. + * we know that we are in IFF_UP, outer address available, and outer family + * matched the physical addr family. see gif_encapcheck(). */ -static int -gif_validate6(const struct ip6_hdr *ip6, struct gif_softc *sc, - struct ifnet *ifp) +int +in6_gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg) { - struct sockaddr_in6 *src, *dst; + const struct ip6_hdr *ip6; + struct gif_softc *sc; + int ret; - src = (struct sockaddr_in6 *)sc->gif_psrc; - dst = (struct sockaddr_in6 *)sc->gif_pdst; + /* sanity check done in caller */ + sc = (struct gif_softc *)arg; + GIF_RLOCK_ASSERT(sc); /* * Check for address match. Note that the check is for an incoming * packet. We should compare the *source* address in our configuration * and the *destination* address of the packet, and vice versa. */ - if (!IN6_ARE_ADDR_EQUAL(&src->sin6_addr, &ip6->ip6_dst) || - !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_src)) - return 0; - - /* martian filters on outer source - done in ip6_input */ + ip6 = mtod(m, const struct ip6_hdr *); + if (!IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src, &ip6->ip6_dst)) + return (0); + ret = 128; + if (!IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_dst, &ip6->ip6_src)) { + if ((sc->gif_options & GIF_IGNORE_SOURCE) == 0) + return (0); + } else + ret += 128; /* ingress filters on outer source */ - if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0 && ifp) { - struct sockaddr_in6 sin6; - struct rtentry *rt; + if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0) { + struct nhop6_basic nh6; - bzero(&sin6, sizeof(sin6)); - sin6.sin6_family = AF_INET6; - sin6.sin6_len = sizeof(struct sockaddr_in6); - sin6.sin6_addr = ip6->ip6_src; - sin6.sin6_scope_id = 0; /* XXX */ + /* XXX empty scope id */ + if (fib6_lookup_nh_basic(sc->gif_fibnum, &ip6->ip6_src, 0, 0, 0, + &nh6) != 0) + return (0); - rt = in6_rtalloc1((struct sockaddr *)&sin6, 0, 0UL, - sc->gif_fibnum); - if (!rt || rt->rt_ifp != ifp) { -#if 0 - char ip6buf[INET6_ADDRSTRLEN]; - log(LOG_WARNING, "%s: packet from %s dropped " - "due to ingress filter\n", if_name(GIF2IFP(sc)), - ip6_sprintf(ip6buf, &sin6.sin6_addr)); -#endif - if (rt) - RTFREE_LOCKED(rt); - return 0; - } - RTFREE_LOCKED(rt); + if (nh6.nh_ifp != m->m_pkthdr.rcvif) + return (0); } - - return 128 * 2; -} - -/* - * we know that we are in IFF_UP, outer address available, and outer family - * matched the physical addr family. see gif_encapcheck(). - * sanity check for arg should have been done in the caller. - */ -int -gif_encapcheck6(const struct mbuf *m, int off, int proto, void *arg) -{ - struct ip6_hdr ip6; - struct gif_softc *sc; - struct ifnet *ifp; - - /* sanity check done in caller */ - sc = (struct gif_softc *)arg; - - /* LINTED const cast */ - m_copydata(m, 0, sizeof(ip6), (caddr_t)&ip6); - ifp = ((m->m_flags & M_PKTHDR) != 0) ? m->m_pkthdr.rcvif : NULL; - - return gif_validate6(&ip6, sc, ifp); + return (ret); } int in6_gif_attach(struct gif_softc *sc) { - sc->encap_cookie6 = encap_attach_func(AF_INET6, -1, gif_encapcheck, - (void *)&in6_gif_protosw, sc); - if (sc->encap_cookie6 == NULL) - return EEXIST; - return 0; -} - -int -in6_gif_detach(struct gif_softc *sc) -{ - int error; - error = encap_detach(sc->encap_cookie6); - if (error == 0) - sc->encap_cookie6 = NULL; - return error; + KASSERT(sc->gif_ecookie == NULL, ("gif_ecookie isn't NULL")); + sc->gif_ecookie = encap_attach_func(AF_INET6, -1, gif_encapcheck, + (void *)&in6_gif_protosw, sc); + if (sc->gif_ecookie == NULL) + return (EEXIST); + return (0); } diff --git a/freebsd/sys/netinet6/in6_ifattach.c b/freebsd/sys/netinet6/in6_ifattach.c index a8f03017..791e9e27 100644 --- a/freebsd/sys/netinet6/in6_ifattach.c +++ b/freebsd/sys/netinet6/in6_ifattach.c @@ -41,11 +41,14 @@ __FBSDID("$FreeBSD$"); #include <sys/sockio.h> #include <sys/jail.h> #include <sys/kernel.h> +#include <rtems/bsd/sys/lock.h> #include <sys/proc.h> +#include <sys/rmlock.h> #include <sys/syslog.h> #include <sys/md5.h> #include <net/if.h> +#include <net/if_var.h> #include <net/if_dl.h> #include <net/if_types.h> #include <net/route.h> @@ -279,9 +282,7 @@ found: case IFT_ISO88025: case IFT_ATM: case IFT_IEEE1394: -#ifdef IFT_IEEE80211 case IFT_IEEE80211: -#endif /* IEEE802/EUI64 cases - what others? */ /* IEEE1394 uses 16byte length address starting with EUI64 */ if (addrlen > 8) @@ -343,9 +344,7 @@ found: break; case IFT_GIF: -#ifdef IFT_STF case IFT_STF: -#endif /* * RFC2893 says: "SHOULD use IPv4 address as ifid source". * however, IPv4 address is not very suitable as unique @@ -412,7 +411,7 @@ get_ifid(struct ifnet *ifp0, struct ifnet *altifp, /* next, try to get it from some other hardware interface */ IFNET_RLOCK_NOSLEEP(); - TAILQ_FOREACH(ifp, &V_ifnet, if_list) { + TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (ifp == ifp0) continue; if (in6_get_hw_ifid(ifp, in6) != 0) @@ -460,21 +459,13 @@ in6_ifattach_linklocal(struct ifnet *ifp, struct ifnet *altifp) struct in6_ifaddr *ia; struct in6_aliasreq ifra; struct nd_prefixctl pr0; - int i, error; + int error; /* * configure link-local address. */ - bzero(&ifra, sizeof(ifra)); - - /* - * in6_update_ifa() does not use ifra_name, but we accurately set it - * for safety. - */ - strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name)); + in6_prepare_ifra(&ifra, NULL, &in6mask64); - ifra.ifra_addr.sin6_family = AF_INET6; - ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6); ifra.ifra_addr.sin6_addr.s6_addr32[0] = htonl(0xfe800000); ifra.ifra_addr.sin6_addr.s6_addr32[1] = 0; if ((ifp->if_flags & IFF_LOOPBACK) != 0) { @@ -490,9 +481,6 @@ in6_ifattach_linklocal(struct ifnet *ifp, struct ifnet *altifp) if (in6_setscope(&ifra.ifra_addr.sin6_addr, ifp, NULL)) return (-1); - ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6); - ifra.ifra_prefixmask.sin6_family = AF_INET6; - ifra.ifra_prefixmask.sin6_addr = in6mask64; /* link-local addresses should NEVER expire. */ ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME; ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME; @@ -537,10 +525,7 @@ in6_ifattach_linklocal(struct ifnet *ifp, struct ifnet *altifp) pr0.ndpr_plen = in6_mask2len(&ifra.ifra_prefixmask.sin6_addr, NULL); pr0.ndpr_prefix = ifra.ifra_addr; /* apply the mask for safety. (nd6_prelist_add will apply it again) */ - for (i = 0; i < 4; i++) { - pr0.ndpr_prefix.sin6_addr.s6_addr32[i] &= - in6mask64.s6_addr32[i]; - } + IN6_MASK_ADDR(&pr0.ndpr_prefix.sin6_addr, &in6mask64); /* * Initialize parameters. The link-local prefix must always be * on-link, and its lifetimes never expire. @@ -573,17 +558,7 @@ in6_ifattach_loopback(struct ifnet *ifp) struct in6_aliasreq ifra; int error; - bzero(&ifra, sizeof(ifra)); - - /* - * in6_update_ifa() does not use ifra_name, but we accurately set it - * for safety. - */ - strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name)); - - ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6); - ifra.ifra_prefixmask.sin6_family = AF_INET6; - ifra.ifra_prefixmask.sin6_addr = in6mask128; + in6_prepare_ifra(&ifra, &in6addr_loopback, &in6mask128); /* * Always initialize ia_dstaddr (= broadcast address) to loopback @@ -593,20 +568,10 @@ in6_ifattach_loopback(struct ifnet *ifp) ifra.ifra_dstaddr.sin6_family = AF_INET6; ifra.ifra_dstaddr.sin6_addr = in6addr_loopback; - ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6); - ifra.ifra_addr.sin6_family = AF_INET6; - ifra.ifra_addr.sin6_addr = in6addr_loopback; - /* the loopback address should NEVER expire. */ ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME; ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME; - /* we don't need to perform DAD on loopback interfaces. */ - ifra.ifra_flags |= IN6_IFF_NODAD; - - /* skip registration to the prefix list. XXX should be temporary. */ - ifra.ifra_flags |= IN6_IFF_NOPFX; - /* * We are sure that this is a newly assigned address, so we can set * NULL to the 3rd arg. @@ -734,15 +699,8 @@ in6_ifattach(struct ifnet *ifp, struct ifnet *altifp) struct in6_ifaddr *ia; struct in6_addr in6; - /* some of the interfaces are inherently not IPv6 capable */ - switch (ifp->if_type) { - case IFT_PFLOG: - case IFT_PFSYNC: - ND_IFINFO(ifp)->flags &= ~ND6_IFF_AUTO_LINKLOCAL; - ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED; + if (ifp->if_afdata[AF_INET6] == NULL) return; - } - /* * quirks based on interface type */ @@ -813,64 +771,45 @@ in6_ifattach(struct ifnet *ifp, struct ifnet *altifp) /* * NOTE: in6_ifdetach() does not support loopback if at this moment. - * We don't need this function in bsdi, because interfaces are never removed - * from the ifnet list in bsdi. + * + * When shutting down a VNET we clean up layers top-down. In that case + * upper layer protocols (ulp) are cleaned up already and locks are destroyed + * and we must not call into these cleanup functions anymore, thus purgeulp + * is set to 0 in that case by in6_ifdetach_destroy(). + * The normal case of destroying a (cloned) interface still needs to cleanup + * everything related to the interface and will have purgeulp set to 1. */ -void -in6_ifdetach(struct ifnet *ifp) +static void +_in6_ifdetach(struct ifnet *ifp, int purgeulp) { - struct in6_ifaddr *ia; struct ifaddr *ifa, *next; - struct radix_node_head *rnh; - struct rtentry *rt; - struct sockaddr_in6 sin6; - struct in6_multi_mship *imm; - /* remove neighbor management table */ - nd6_purge(ifp); + if (ifp->if_afdata[AF_INET6] == NULL) + return; - /* nuke any of IPv6 addresses we have */ + /* + * Remove neighbor management table. + * Enabling the nd6_purge will panic on vmove for interfaces on VNET + * teardown as the IPv6 layer is cleaned up already and the locks + * are destroyed. + */ + if (purgeulp) + nd6_purge(ifp); + + /* + * nuke any of IPv6 addresses we have + * XXX: all addresses should be already removed + */ TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; in6_purgeaddr(ifa); } - - /* undo everything done by in6_ifattach(), just in case */ - TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) { - if (ifa->ifa_addr->sa_family != AF_INET6 - || !IN6_IS_ADDR_LINKLOCAL(&satosin6(&ifa->ifa_addr)->sin6_addr)) { - continue; - } - - ia = (struct in6_ifaddr *)ifa; - - /* - * leave from multicast groups we have joined for the interface - */ - while ((imm = LIST_FIRST(&ia->ia6_memberships)) != NULL) { - LIST_REMOVE(imm, i6mm_chain); - in6_leavegroup(imm); - } - - /* Remove link-local from the routing table. */ - if (ia->ia_flags & IFA_ROUTE) - (void)rtinit(&ia->ia_ifa, RTM_DELETE, ia->ia_flags); - - /* remove from the linked list */ - IF_ADDR_WLOCK(ifp); - TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link); - IF_ADDR_WUNLOCK(ifp); - ifa_free(ifa); /* if_addrhead */ - - IN6_IFADDR_WLOCK(); - TAILQ_REMOVE(&V_in6_ifaddrhead, ia, ia_link); - IN6_IFADDR_WUNLOCK(); - ifa_free(ifa); + if (purgeulp) { + in6_pcbpurgeif0(&V_udbinfo, ifp); + in6_pcbpurgeif0(&V_ulitecbinfo, ifp); + in6_pcbpurgeif0(&V_ripcbinfo, ifp); } - - in6_pcbpurgeif0(&V_udbinfo, ifp); - in6_pcbpurgeif0(&V_ripcbinfo, ifp); /* leave from all multicast groups joined */ in6_purgemaddrs(ifp); @@ -882,32 +821,22 @@ in6_ifdetach(struct ifnet *ifp) * prefixes after removing all addresses above. * (Or can we just delay calling nd6_purge until at this point?) */ - nd6_purge(ifp); + if (purgeulp) + nd6_purge(ifp); +} - /* - * Remove route to link-local allnodes multicast (ff02::1). - * These only get automatically installed for the default FIB. - */ - bzero(&sin6, sizeof(sin6)); - sin6.sin6_len = sizeof(struct sockaddr_in6); - sin6.sin6_family = AF_INET6; - sin6.sin6_addr = in6addr_linklocal_allnodes; - if (in6_setscope(&sin6.sin6_addr, ifp, NULL)) - /* XXX: should not fail */ - return; - /* XXX grab lock first to avoid LOR */ - rnh = rt_tables_get_rnh(RT_DEFAULT_FIB, AF_INET6); - if (rnh != NULL) { - RADIX_NODE_HEAD_LOCK(rnh); - rt = in6_rtalloc1((struct sockaddr *)&sin6, 0, RTF_RNH_LOCKED, - RT_DEFAULT_FIB); - if (rt) { - if (rt->rt_ifp == ifp) - rtexpunge(rt); - RTFREE_LOCKED(rt); - } - RADIX_NODE_HEAD_UNLOCK(rnh); - } +void +in6_ifdetach(struct ifnet *ifp) +{ + + _in6_ifdetach(ifp, 1); +} + +void +in6_ifdetach_destroy(struct ifnet *ifp) +{ + + _in6_ifdetach(ifp, 0); } int @@ -948,7 +877,9 @@ in6_tmpaddrtimer(void *arg) V_ip6_temp_regen_advance) * hz, in6_tmpaddrtimer, curvnet); bzero(nullbuf, sizeof(nullbuf)); - TAILQ_FOREACH(ifp, &V_ifnet, if_list) { + TAILQ_FOREACH(ifp, &V_ifnet, if_link) { + if (ifp->if_afdata[AF_INET6] == NULL) + continue; ndi = ND_IFINFO(ifp); if (bcmp(ndi->randomid, nullbuf, sizeof(nullbuf)) != 0) { /* @@ -997,3 +928,29 @@ in6_purgemaddrs(struct ifnet *ifp) IN6_MULTI_UNLOCK(); } + +void +in6_ifattach_destroy(void) +{ + + callout_drain(&V_in6_tmpaddrtimer_ch); +} + +static void +in6_ifattach_init(void *dummy) +{ + + /* Timer for regeneranation of temporary addresses randomize ID. */ + callout_init(&V_in6_tmpaddrtimer_ch, 0); + callout_reset(&V_in6_tmpaddrtimer_ch, + (V_ip6_temp_preferred_lifetime - V_ip6_desync_factor - + V_ip6_temp_regen_advance) * hz, + in6_tmpaddrtimer, curvnet); +} + +/* + * Cheat. + * This must be after route_init(), which is now SI_ORDER_THIRD. + */ +SYSINIT(in6_ifattach_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, + in6_ifattach_init, NULL); diff --git a/freebsd/sys/netinet6/in6_ifattach.h b/freebsd/sys/netinet6/in6_ifattach.h index af627313..a34530db 100644 --- a/freebsd/sys/netinet6/in6_ifattach.h +++ b/freebsd/sys/netinet6/in6_ifattach.h @@ -35,7 +35,9 @@ #ifdef _KERNEL void in6_ifattach(struct ifnet *, struct ifnet *); +void in6_ifattach_destroy(void); void in6_ifdetach(struct ifnet *); +void in6_ifdetach_destroy(struct ifnet *); int in6_get_tmpifid(struct ifnet *, u_int8_t *, const u_int8_t *, int); void in6_tmpaddrtimer(void *); int in6_get_hw_ifid(struct ifnet *, struct in6_addr *); diff --git a/freebsd/sys/netinet6/in6_mcast.c b/freebsd/sys/netinet6/in6_mcast.c index d32d57c6..174f1109 100644 --- a/freebsd/sys/netinet6/in6_mcast.c +++ b/freebsd/sys/netinet6/in6_mcast.c @@ -54,12 +54,14 @@ __FBSDID("$FreeBSD$"); #include <sys/tree.h> #include <net/if.h> +#include <net/if_var.h> #include <net/if_dl.h> #include <net/route.h> #include <net/vnet.h> #include <netinet/in.h> #include <netinet/in_var.h> +#include <netinet6/in6_fib.h> #include <netinet6/in6_var.h> #include <netinet/ip6.h> #include <netinet/icmp6.h> @@ -159,21 +161,18 @@ static SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, mcast, CTLFLAG_RW, 0, static u_long in6_mcast_maxgrpsrc = IPV6_MAX_GROUP_SRC_FILTER; SYSCTL_ULONG(_net_inet6_ip6_mcast, OID_AUTO, maxgrpsrc, - CTLFLAG_RW | CTLFLAG_TUN, &in6_mcast_maxgrpsrc, 0, + CTLFLAG_RWTUN, &in6_mcast_maxgrpsrc, 0, "Max source filters per group"); -TUNABLE_ULONG("net.inet6.ip6.mcast.maxgrpsrc", &in6_mcast_maxgrpsrc); static u_long in6_mcast_maxsocksrc = IPV6_MAX_SOCK_SRC_FILTER; SYSCTL_ULONG(_net_inet6_ip6_mcast, OID_AUTO, maxsocksrc, - CTLFLAG_RW | CTLFLAG_TUN, &in6_mcast_maxsocksrc, 0, + CTLFLAG_RWTUN, &in6_mcast_maxsocksrc, 0, "Max source filters per socket"); -TUNABLE_ULONG("net.inet6.ip6.mcast.maxsocksrc", &in6_mcast_maxsocksrc); /* TODO Virtualize this switch. */ int in6_mcast_loop = IPV6_DEFAULT_MULTICAST_LOOP; -SYSCTL_INT(_net_inet6_ip6_mcast, OID_AUTO, loop, CTLFLAG_RW | CTLFLAG_TUN, +SYSCTL_INT(_net_inet6_ip6_mcast, OID_AUTO, loop, CTLFLAG_RWTUN, &in6_mcast_loop, 0, "Loopback multicast datagrams by default"); -TUNABLE_INT("net.inet6.ip6.mcast.loop", &in6_mcast_loop); static SYSCTL_NODE(_net_inet6_ip6_mcast, OID_AUTO, filters, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip6_mcast_filters, @@ -473,9 +472,9 @@ in6_mc_get(struct ifnet *ifp, const struct in6_addr *group, */ inm = malloc(sizeof(*inm), M_IP6MADDR, M_NOWAIT | M_ZERO); if (inm == NULL) { + IF_ADDR_WUNLOCK(ifp); if_delmulti_ifma(ifma); - error = ENOMEM; - goto out_locked; + return (ENOMEM); } inm->in6m_addr = *group; inm->in6m_ifp = ifp; @@ -483,7 +482,7 @@ in6_mc_get(struct ifnet *ifp, const struct in6_addr *group, inm->in6m_ifma = ifma; inm->in6m_refcount = 1; inm->in6m_state = MLD_NOT_MEMBER; - IFQ_SET_MAXLEN(&inm->in6m_scq, MLD_MAX_STATE_CHANGES); + mbufq_init(&inm->in6m_scq, MLD_MAX_STATE_CHANGES); inm->in6m_st[0].iss_fmode = MCAST_UNDEFINED; inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED; @@ -577,7 +576,7 @@ in6m_clear_recorded(struct in6_multi *inm) * * Return 0 if the source didn't exist or was already marked as recorded. * Return 1 if the source was marked as recorded by this function. - * Return <0 if any error occured (negated errno code). + * Return <0 if any error occurred (negated errno code). */ int in6m_record_source(struct in6_multi *inm, const struct in6_addr *addr) @@ -1078,7 +1077,7 @@ in6m_purge(struct in6_multi *inm) inm->in6m_nsrc--; } /* Free state-change requests that might be queued. */ - _IF_DRAIN(&inm->in6m_scq); + mbufq_drain(&inm->in6m_scq); } /* @@ -1187,7 +1186,7 @@ in6_mc_join_locked(struct ifnet *ifp, const struct in6_addr *mcaddr, IN6_MULTI_LOCK_ASSERT(); CTR4(KTR_MLD, "%s: join %s on %p(%s))", __func__, - ip6_sprintf(ip6tbuf, mcaddr), ifp, ifp->if_xname); + ip6_sprintf(ip6tbuf, mcaddr), ifp, if_name(ifp)); error = 0; inm = NULL; @@ -1278,7 +1277,7 @@ in6_mc_leave_locked(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf) CTR5(KTR_MLD, "%s: leave inm %p, %s/%s, imf %p", __func__, inm, ip6_sprintf(ip6tbuf, &inm->in6m_addr), - (in6m_is_ifp_detached(inm) ? "null" : inm->in6m_ifp->if_xname), + (in6m_is_ifp_detached(inm) ? "null" : if_name(inm->in6m_ifp)), imf); /* @@ -1776,28 +1775,22 @@ static struct ifnet * in6p_lookup_mcast_ifp(const struct inpcb *in6p, const struct sockaddr_in6 *gsin6) { - struct route_in6 ro6; - struct ifnet *ifp; + struct nhop6_basic nh6; + struct in6_addr dst; + uint32_t scopeid; + uint32_t fibnum; KASSERT(in6p->inp_vflag & INP_IPV6, ("%s: not INP_IPV6 inpcb", __func__)); KASSERT(gsin6->sin6_family == AF_INET6, ("%s: not AF_INET6 group", __func__)); - KASSERT(IN6_IS_ADDR_MULTICAST(&gsin6->sin6_addr), - ("%s: not multicast", __func__)); - ifp = NULL; - memset(&ro6, 0, sizeof(struct route_in6)); - memcpy(&ro6.ro_dst, gsin6, sizeof(struct sockaddr_in6)); - rtalloc_ign_fib((struct route *)&ro6, 0, - in6p ? in6p->inp_inc.inc_fibnum : RT_DEFAULT_FIB); - if (ro6.ro_rt != NULL) { - ifp = ro6.ro_rt->rt_ifp; - KASSERT(ifp != NULL, ("%s: null ifp", __func__)); - RTFREE(ro6.ro_rt); - } + in6_splitscope(&gsin6->sin6_addr, &dst, &scopeid); + fibnum = in6p ? in6p->inp_inc.inc_fibnum : RT_DEFAULT_FIB; + if (fib6_lookup_nh_basic(fibnum, &dst, scopeid, 0, 0, &nh6) != 0) + return (NULL); - return (ifp); + return (nh6.nh_ifp); } /* @@ -1853,8 +1846,7 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt) if (mreq.ipv6mr_interface == 0) { ifp = in6p_lookup_mcast_ifp(inp, &gsa->sin6); } else { - if (mreq.ipv6mr_interface < 0 || - V_if_index < mreq.ipv6mr_interface) + if (V_if_index < mreq.ipv6mr_interface) return (EADDRNOTAVAIL); ifp = ifnet_byindex(mreq.ipv6mr_interface); } @@ -2198,7 +2190,7 @@ in6p_leave_group(struct inpcb *inp, struct sockopt *sopt) * XXX SCOPE6 lock potentially taken here. */ if (ifindex != 0) { - if (ifindex < 0 || V_if_index < ifindex) + if (V_if_index < ifindex) return (EADDRNOTAVAIL); ifp = ifnet_byindex(ifindex); if (ifp == NULL) @@ -2353,13 +2345,17 @@ in6p_set_multicast_if(struct inpcb *inp, struct sockopt *sopt) error = sooptcopyin(sopt, &ifindex, sizeof(u_int), sizeof(u_int)); if (error) return (error); - if (ifindex < 0 || V_if_index < ifindex) + if (V_if_index < ifindex) return (EINVAL); - - ifp = ifnet_byindex(ifindex); - if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) - return (EADDRNOTAVAIL); - + if (ifindex == 0) + ifp = NULL; + else { + ifp = ifnet_byindex(ifindex); + if (ifp == NULL) + return (EINVAL); + if ((ifp->if_flags & IFF_MULTICAST) == 0) + return (EADDRNOTAVAIL); + } imo = in6p_findmoptions(inp); imo->im6o_multicast_ifp = ifp; INP_WUNLOCK(inp); @@ -2805,13 +2801,13 @@ in6m_print(const struct in6_multi *inm) printf("addr %s ifp %p(%s) ifma %p\n", ip6_sprintf(ip6tbuf, &inm->in6m_addr), inm->in6m_ifp, - inm->in6m_ifp->if_xname, + if_name(inm->in6m_ifp), inm->in6m_ifma); printf("timer %u state %s refcount %u scq.len %u\n", inm->in6m_timer, in6m_state_str(inm->in6m_state), inm->in6m_refcount, - inm->in6m_scq.ifq_len); + mbufq_len(&inm->in6m_scq)); printf("mli %p nsrc %lu sctimer %u scrv %u\n", inm->in6m_mli, inm->in6m_nsrc, diff --git a/freebsd/sys/netinet6/in6_pcb.c b/freebsd/sys/netinet6/in6_pcb.c index bf69996d..95e376c7 100644 --- a/freebsd/sys/netinet6/in6_pcb.c +++ b/freebsd/sys/netinet6/in6_pcb.c @@ -77,6 +77,7 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/local/opt_inet6.h> #include <rtems/bsd/local/opt_ipsec.h> #include <rtems/bsd/local/opt_pcbgroup.h> +#include <rtems/bsd/local/opt_rss.h> #include <rtems/bsd/sys/param.h> #include <sys/systm.h> @@ -96,6 +97,8 @@ __FBSDID("$FreeBSD$"); #include <vm/uma.h> #include <net/if.h> +#include <net/if_var.h> +#include <net/if_llatbl.h> #include <net/if_types.h> #include <net/route.h> @@ -112,7 +115,8 @@ __FBSDID("$FreeBSD$"); #include <netinet6/in6_pcb.h> #include <netinet6/scope6_var.h> -struct in6_addr zeroin6_addr; +static struct inpcb *in6_pcblookup_hash_locked(struct inpcbinfo *, + struct in6_addr *, u_int, struct in6_addr *, u_int, int, struct ifnet *); int in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam, @@ -208,6 +212,7 @@ in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam, &sin6->sin6_addr, lport, INPLOOKUP_WILDCARD, cred); if (t && + ((inp->inp_flags2 & INP_BINDMULTI) == 0) && ((t->inp_flags & INP_TIMEWAIT) == 0) && (so->so_type != SOCK_STREAM || IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) && @@ -221,6 +226,16 @@ in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam, 0) #endif /* __rtems__ */ return (EADDRINUSE); + + /* + * If the socket is a BINDMULTI socket, then + * the credentials need to match and the + * original socket also has to have been bound + * with BINDMULTI. + */ + if (t && (! in_pcbbind_check_bindmulti(inp, t))) + return (EADDRINUSE); + #ifdef INET if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { @@ -231,6 +246,7 @@ in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam, sin.sin_addr, lport, INPLOOKUP_WILDCARD, cred); if (t && + ((inp->inp_flags2 & INP_BINDMULTI) == 0) && ((t->inp_flags & INP_TIMEWAIT) == 0) && (so->so_type != SOCK_STREAM || @@ -243,6 +259,9 @@ in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam, 0) #endif /* __rtems__ */ return (EADDRINUSE); + + if (t && (! in_pcbbind_check_bindmulti(inp, t))) + return (EADDRINUSE); } #endif } @@ -318,13 +337,12 @@ in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam, * a bit of a kludge, but cleaning up the internal interfaces would * have forced minor changes in every protocol). */ -int +static int in6_pcbladdr(register struct inpcb *inp, struct sockaddr *nam, struct in6_addr *plocal_addr6) { register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; int error = 0; - struct ifnet *ifp = NULL; int scope_ambiguous = 0; struct in6_addr in6a; @@ -354,20 +372,15 @@ in6_pcbladdr(register struct inpcb *inp, struct sockaddr *nam, if ((error = prison_remote_ip6(inp->inp_cred, &sin6->sin6_addr)) != 0) return (error); - error = in6_selectsrc(sin6, inp->in6p_outputopts, - inp, NULL, inp->inp_cred, &ifp, &in6a); + error = in6_selectsrc_socket(sin6, inp->in6p_outputopts, + inp, inp->inp_cred, scope_ambiguous, &in6a, NULL); if (error) return (error); - if (ifp && scope_ambiguous && - (error = in6_setscope(&sin6->sin6_addr, ifp, NULL)) != 0) { - return(error); - } - /* * Do not update this earlier, in case we return with an error. * - * XXX: this in6_selectsrc result might replace the bound local + * XXX: this in6_selectsrc_socket result might replace the bound local * address with the address specified by setsockopt(IPV6_PKTINFO). * Is it the intended behavior? */ @@ -702,8 +715,9 @@ in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr, * Look for an unconnected (wildcard foreign addr) PCB that * matches the local address and port we're looking for. */ - head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, - 0, pcbinfo->ipi_hashmask)]; + head = &pcbinfo->ipi_hashbase[INP_PCBHASH( + INP6_PCBHASHKEY(&in6addr_any), lport, 0, + pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) @@ -784,7 +798,7 @@ in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp) struct ip6_moptions *im6o; int i, gap; - INP_INFO_RLOCK(pcbinfo); + INP_INFO_WLOCK(pcbinfo); LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) { INP_WLOCK(in6p); im6o = in6p->in6p_moptions; @@ -815,7 +829,7 @@ in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp) } INP_WUNLOCK(in6p); } - INP_INFO_RUNLOCK(pcbinfo); + INP_INFO_WUNLOCK(pcbinfo); } /* @@ -828,9 +842,12 @@ void in6_losing(struct inpcb *in6p) { - /* - * We don't store route pointers in the routing table anymore - */ + if (in6p->inp_route6.ro_rt) { + RTFREE(in6p->inp_route6.ro_rt); + in6p->inp_route6.ro_rt = (struct rtentry *)NULL; + } + if (in6p->inp_route.ro_lle) + LLE_FREE(in6p->inp_route.ro_lle); /* zeros ro_lle */ return; } @@ -841,9 +858,13 @@ in6_losing(struct inpcb *in6p) struct inpcb * in6_rtchange(struct inpcb *inp, int errno) { - /* - * We don't store route pointers in the routing table anymore - */ + + if (inp->inp_route6.ro_rt) { + RTFREE(inp->inp_route6.ro_rt); + inp->inp_route6.ro_rt = (struct rtentry *)NULL; + } + if (inp->inp_route.ro_lle) + LLE_FREE(inp->inp_route.ro_lle); /* zeros ro_lle */ return inp; } @@ -859,21 +880,14 @@ in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup, struct inpcbhead *head; struct inpcb *inp, *tmpinp; u_short fport = fport_arg, lport = lport_arg; - int faith; - - if (faithprefix_p != NULL) - faith = (*faithprefix_p)(laddr); - else - faith = 0; /* * First look for an exact match. */ tmpinp = NULL; INP_GROUP_LOCK(pcbgroup); - head = &pcbgroup->ipg_hashbase[ - INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, lport, fport, - pcbgroup->ipg_hashmask)]; + head = &pcbgroup->ipg_hashbase[INP_PCBHASH( + INP6_PCBHASHKEY(faddr), lport, fport, pcbgroup->ipg_hashmask)]; LIST_FOREACH(inp, head, inp_pcbgrouphash) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) @@ -899,7 +913,7 @@ in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup, } /* - * Then look for a wildcard match, if requested. + * Then look for a wildcard match in the pcbgroup. */ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { struct inpcb *local_wild = NULL, *local_exact = NULL; @@ -913,9 +927,9 @@ in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup, * 3. non-jailed, non-wild. * 4. non-jailed, wild. */ - head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, lport, - 0, pcbinfo->ipi_wildmask)]; - LIST_FOREACH(inp, head, inp_pcbgroup_wild) { + head = &pcbgroup->ipg_hashbase[ + INP_PCBHASH(INADDR_ANY, lport, 0, pcbgroup->ipg_hashmask)]; + LIST_FOREACH(inp, head, inp_pcbgrouphash) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) continue; @@ -925,9 +939,67 @@ in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup, continue; } + injail = prison_flag(inp->inp_cred, PR_IP6); + if (injail) { + if (prison_check_ip6(inp->inp_cred, + laddr) != 0) + continue; + } else { + if (local_exact != NULL) + continue; + } + + if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) { + if (injail) + goto found; + else + local_exact = inp; + } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { + if (injail) + jail_wild = inp; + else + local_wild = inp; + } + } /* LIST_FOREACH */ + + inp = jail_wild; + if (inp == NULL) + inp = jail_wild; + if (inp == NULL) + inp = local_exact; + if (inp == NULL) + inp = local_wild; + if (inp != NULL) + goto found; + } + + /* + * Then look for a wildcard match, if requested. + */ + if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { + struct inpcb *local_wild = NULL, *local_exact = NULL; + struct inpcb *jail_wild = NULL; + int injail; + + /* + * Order of socket selection - we always prefer jails. + * 1. jailed, non-wild. + * 2. jailed, wild. + * 3. non-jailed, non-wild. + * 4. non-jailed, wild. + */ + head = &pcbinfo->ipi_wildbase[INP_PCBHASH( + INP6_PCBHASHKEY(&in6addr_any), lport, 0, + pcbinfo->ipi_wildmask)]; + LIST_FOREACH(inp, head, inp_pcbgroup_wild) { /* XXX inp locking */ - if (faith && (inp->inp_flags & INP_FAITH) == 0) + if ((inp->inp_vflag & INP_IPV6) == 0) + continue; + + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || + inp->inp_lport != lport) { continue; + } injail = prison_flag(inp->inp_cred, PR_IP6); if (injail) { @@ -985,7 +1057,7 @@ found: /* * Lookup PCB in hash list. */ -struct inpcb * +static struct inpcb * in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport_arg, struct in6_addr *laddr, u_int lport_arg, int lookupflags, struct ifnet *ifp) @@ -993,25 +1065,18 @@ in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, struct inpcbhead *head; struct inpcb *inp, *tmpinp; u_short fport = fport_arg, lport = lport_arg; - int faith; KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); INP_HASH_LOCK_ASSERT(pcbinfo); - if (faithprefix_p != NULL) - faith = (*faithprefix_p)(laddr); - else - faith = 0; - /* * First look for an exact match. */ tmpinp = NULL; - head = &pcbinfo->ipi_hashbase[ - INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, lport, fport, - pcbinfo->ipi_hashmask)]; + head = &pcbinfo->ipi_hashbase[INP_PCBHASH( + INP6_PCBHASHKEY(faddr), lport, fport, pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) @@ -1049,8 +1114,9 @@ in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, * 3. non-jailed, non-wild. * 4. non-jailed, wild. */ - head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, - 0, pcbinfo->ipi_hashmask)]; + head = &pcbinfo->ipi_hashbase[INP_PCBHASH( + INP6_PCBHASHKEY(&in6addr_any), lport, 0, + pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV6) == 0) @@ -1061,10 +1127,6 @@ in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, continue; } - /* XXX inp locking */ - if (faith && (inp->inp_flags & INP_FAITH) == 0) - continue; - injail = prison_flag(inp->inp_cred, PR_IP6); if (injail) { if (prison_check_ip6(inp->inp_cred, @@ -1145,7 +1207,7 @@ struct inpcb * in6_pcblookup(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp) { -#if defined(PCBGROUP) +#if defined(PCBGROUP) && !defined(RSS) struct inpcbgroup *pcbgroup; #endif @@ -1154,7 +1216,17 @@ in6_pcblookup(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport, KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, ("%s: LOCKPCB not set", __func__)); -#if defined(PCBGROUP) + /* + * When not using RSS, use connection groups in preference to the + * reservation table when looking up 4-tuples. When using RSS, just + * use the reservation table, due to the cost of the Toeplitz hash + * in software. + * + * XXXRW: This policy belongs in the pcbgroup code, as in principle + * we could be doing RSS with a non-Toeplitz hash that is affordable + * in software. + */ +#if defined(PCBGROUP) && !defined(RSS) if (in_pcbgroup_enabled(pcbinfo)) { pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr, fport); @@ -1181,16 +1253,27 @@ in6_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, ("%s: LOCKPCB not set", __func__)); #ifdef PCBGROUP - if (in_pcbgroup_enabled(pcbinfo)) { + /* + * If we can use a hardware-generated hash to look up the connection + * group, use that connection group to find the inpcb. Otherwise + * fall back on a software hash -- or the reservation table if we're + * using RSS. + * + * XXXRW: As above, that policy belongs in the pcbgroup code. + */ + if (in_pcbgroup_enabled(pcbinfo) && + M_HASHTYPE_TEST(m, M_HASHTYPE_NONE) == 0) { pcbgroup = in6_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m), m->m_pkthdr.flowid); if (pcbgroup != NULL) return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, laddr, lport, lookupflags, ifp)); +#ifndef RSS pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr, fport); return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, laddr, lport, lookupflags, ifp)); +#endif } #endif return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, diff --git a/freebsd/sys/netinet6/in6_pcb.h b/freebsd/sys/netinet6/in6_pcb.h index 19d151b7..e758dace 100644 --- a/freebsd/sys/netinet6/in6_pcb.h +++ b/freebsd/sys/netinet6/in6_pcb.h @@ -86,7 +86,6 @@ int in6_pcbconnect(struct inpcb *, struct sockaddr *, struct ucred *); int in6_pcbconnect_mbuf(struct inpcb *, struct sockaddr *, struct ucred *, struct mbuf *); void in6_pcbdisconnect(struct inpcb *); -int in6_pcbladdr(struct inpcb *, struct sockaddr *, struct in6_addr *); struct inpcb * in6_pcblookup_local(struct inpcbinfo *, struct in6_addr *, u_short, int, @@ -96,10 +95,6 @@ struct inpcb * u_int, struct in6_addr *, u_int, int, struct ifnet *); struct inpcb * - in6_pcblookup_hash_locked(struct inpcbinfo *, struct in6_addr *, - u_int, struct in6_addr *, u_int, int, - struct ifnet *); -struct inpcb * in6_pcblookup_mbuf(struct inpcbinfo *, struct in6_addr *, u_int, struct in6_addr *, u_int, int, struct ifnet *ifp, struct mbuf *); diff --git a/freebsd/sys/netinet6/in6_proto.c b/freebsd/sys/netinet6/in6_proto.c index a6c3b4e8..8a9c1cd9 100644 --- a/freebsd/sys/netinet6/in6_proto.c +++ b/freebsd/sys/netinet6/in6_proto.c @@ -80,12 +80,14 @@ __FBSDID("$FreeBSD$"); #include <sys/protosw.h> #include <sys/jail.h> #include <sys/kernel.h> +#include <sys/malloc.h> #include <sys/domain.h> #include <sys/mbuf.h> #include <sys/systm.h> #include <sys/sysctl.h> #include <net/if.h> +#include <net/if_var.h> #include <net/radix.h> #include <net/route.h> #ifdef RADIX_MPATH @@ -128,10 +130,6 @@ __FBSDID("$FreeBSD$"); #include <netinet6/ip6protosw.h> -#ifdef FLOWTABLE -#include <net/flowtable.h> -#endif - /* * TCP/IP protocol family: IP6, ICMP6, UDP, TCP. */ @@ -151,15 +149,12 @@ static struct pr_usrreqs nousrreqs; .pr_usrreqs = &nousrreqs \ } -struct ip6protosw inet6sw[] = { +struct protosw inet6sw[] = { { .pr_type = 0, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_IPV6, .pr_init = ip6_init, -#ifdef VIMAGE - .pr_destroy = ip6_destroy, -#endif .pr_slowtimo = frag6_slowtimo, .pr_drain = frag6_drain, .pr_usrreqs = &nousrreqs, @@ -211,15 +206,28 @@ struct ip6protosw inet6sw[] = { .pr_type = SOCK_STREAM, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_SCTP, - .pr_flags = PR_WANTRCVD, + .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD, .pr_input = sctp6_input, - .pr_ctlinput = sctp6_ctlinput, + .pr_ctlinput = sctp6_ctlinput, .pr_ctloutput = sctp_ctloutput, .pr_drain = sctp_drain, .pr_usrreqs = &sctp6_usrreqs }, #endif /* SCTP */ { + .pr_type = SOCK_DGRAM, + .pr_domain = &inet6domain, + .pr_protocol = IPPROTO_UDPLITE, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = udp6_input, + .pr_ctlinput = udplite6_ctlinput, + .pr_ctloutput = udp_ctloutput, +#ifndef INET /* Do not call initialization twice. */ + .pr_init = udplite_init, +#endif + .pr_usrreqs = &udp6_usrreqs, +}, +{ .pr_type = SOCK_RAW, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_RAW, @@ -324,6 +332,17 @@ struct ip6protosw inet6sw[] = { { .pr_type = SOCK_RAW, .pr_domain = &inet6domain, + .pr_protocol = IPPROTO_GRE, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, + .pr_input = encap6_input, + .pr_output = rip6_output, + .pr_ctloutput = rip6_ctloutput, + .pr_init = encap_init, + .pr_usrreqs = &rip6_usrreqs +}, +{ + .pr_type = SOCK_RAW, + .pr_domain = &inet6domain, .pr_protocol = IPPROTO_PIM, .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, .pr_input = encap6_input, @@ -361,8 +380,7 @@ struct domain inet6domain = { .dom_family = AF_INET6, .dom_name = "internet6", .dom_protosw = (struct protosw *)inet6sw, - .dom_protoswNPROTOSW = (struct protosw *) - &inet6sw[sizeof(inet6sw)/sizeof(inet6sw[0])], + .dom_protoswNPROTOSW = (struct protosw *)&inet6sw[nitems(inet6sw)], #ifdef RADIX_MPATH .dom_rtattach = rn6_mpath_inithead, #else @@ -371,10 +389,9 @@ struct domain inet6domain = { #ifdef VIMAGE .dom_rtdetach = in6_detachhead, #endif - .dom_rtoffset = offsetof(struct sockaddr_in6, sin6_addr) << 3, - .dom_maxrtkey = sizeof(struct sockaddr_in6), .dom_ifattach = in6_domifattach, - .dom_ifdetach = in6_domifdetach + .dom_ifdetach = in6_domifdetach, + .dom_ifmtu = in6_domifmtu }; VNET_DOMAIN_SET(inet6); @@ -416,7 +433,6 @@ VNET_DEFINE(int, ip6_rr_prune) = 5; /* router renumbering prefix VNET_DEFINE(int, ip6_mcast_pmtu) = 0; /* enable pMTU discovery for multicast? */ VNET_DEFINE(int, ip6_v6only) = 1; -VNET_DEFINE(int, ip6_keepfaith) = 0; VNET_DEFINE(time_t, ip6_log_time) = (time_t)0L; #ifdef IPSTEALTH VNET_DEFINE(int, ip6stealth) = 0; @@ -433,16 +449,6 @@ VNET_DEFINE(int, nd6_onlink_ns_rfc4861) = 0;/* allow 'on-link' nd6 NS VNET_DEFINE(int, pmtu_expire) = 60*10; VNET_DEFINE(int, pmtu_probe) = 60*2; -/* raw IP6 parameters */ -/* - * Nominal space allocated to a raw ip socket. - */ -#define RIPV6SNDQ 8192 -#define RIPV6RCVQ 8192 - -VNET_DEFINE(u_long, rip6_sendspace) = RIPV6SNDQ; -VNET_DEFINE(u_long, rip6_recvspace) = RIPV6RCVQ; - /* ICMPV6 parameters */ VNET_DEFINE(int, icmp6_rediraccept) = 1;/* accept and process redirects */ VNET_DEFINE(int, icmp6_redirtimeout) = 10 * 60; /* 10 minutes */ @@ -452,11 +458,6 @@ VNET_DEFINE(int, icmp6_nodeinfo) = (ICMP6_NODEINFO_FQDNOK|ICMP6_NODEINFO_NODEADDROK); VNET_DEFINE(int, icmp6_nodeinfo_oldmcprefix) = 1; -/* UDP on IP6 parameters */ -VNET_DEFINE(int, udp6_sendspace) = 9216;/* really max datagram size */ -VNET_DEFINE(int, udp6_recvspace) = 40 * (1024 + sizeof(struct sockaddr_in6)); - /* 40 1K datagrams */ - /* * sysctl related items. */ @@ -479,158 +480,170 @@ SYSCTL_NODE(_net_inet6, IPPROTO_ESP, ipsec6, CTLFLAG_RW, 0, "IPSEC6"); static int sysctl_ip6_temppltime(SYSCTL_HANDLER_ARGS) { - int error = 0; - int old; - - VNET_SYSCTL_ARG(req, arg1); + int error, val; - error = SYSCTL_OUT(req, arg1, sizeof(int)); - if (error || !req->newptr) + val = V_ip6_temp_preferred_lifetime; + error = sysctl_handle_int(oidp, &val, 0, req); + if (error != 0 || !req->newptr) return (error); - old = V_ip6_temp_preferred_lifetime; - error = SYSCTL_IN(req, arg1, sizeof(int)); - if (V_ip6_temp_preferred_lifetime < - V_ip6_desync_factor + V_ip6_temp_regen_advance) { - V_ip6_temp_preferred_lifetime = old; + if (val < V_ip6_desync_factor + V_ip6_temp_regen_advance) return (EINVAL); - } - return (error); + V_ip6_temp_preferred_lifetime = val; + return (0); } static int sysctl_ip6_tempvltime(SYSCTL_HANDLER_ARGS) { - int error = 0; - int old; + int error, val; - VNET_SYSCTL_ARG(req, arg1); - - error = SYSCTL_OUT(req, arg1, sizeof(int)); - if (error || !req->newptr) + val = V_ip6_temp_valid_lifetime; + error = sysctl_handle_int(oidp, &val, 0, req); + if (error != 0 || !req->newptr) return (error); - old = V_ip6_temp_valid_lifetime; - error = SYSCTL_IN(req, arg1, sizeof(int)); - if (V_ip6_temp_valid_lifetime < V_ip6_temp_preferred_lifetime) { - V_ip6_temp_preferred_lifetime = old; + if (val < V_ip6_temp_preferred_lifetime) return (EINVAL); - } - return (error); + V_ip6_temp_valid_lifetime = val; + return (0); } -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_FORWARDING, forwarding, CTLFLAG_RW, - &VNET_NAME(ip6_forwarding), 0, ""); -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_SENDREDIRECTS, redirect, CTLFLAG_RW, - &VNET_NAME(ip6_sendredirects), 0, ""); -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_DEFHLIM, hlim, CTLFLAG_RW, - &VNET_NAME(ip6_defhlim), 0, ""); -SYSCTL_VNET_STRUCT(_net_inet6_ip6, IPV6CTL_STATS, stats, CTLFLAG_RW, - &VNET_NAME(ip6stat), ip6stat, ""); -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets, - CTLFLAG_RW, &VNET_NAME(ip6_maxfragpackets), 0, ""); -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_ACCEPT_RTADV, accept_rtadv, - CTLFLAG_RW, &VNET_NAME(ip6_accept_rtadv), 0, - "Default value of per-interface flag for accepting ICMPv6 Router" - "Advertisement messages"); -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_NO_RADR, no_radr, - CTLFLAG_RW, &VNET_NAME(ip6_no_radr), 0, +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_FORWARDING, forwarding, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_forwarding), 0, + "Enable IPv6 forwarding between interfaces"); +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_SENDREDIRECTS, redirect, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_sendredirects), 0, + "Send a redirect message when forwarding back to a source link"); +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFHLIM, hlim, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_defhlim), 0, + "Default hop limit"); +SYSCTL_VNET_PCPUSTAT(_net_inet6_ip6, IPV6CTL_STATS, stats, struct ip6stat, + ip6stat, + "IP6 statistics (struct ip6stat, netinet6/ip6_var.h)"); +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragpackets), 0, + "Maximum allowed number of outstanding fragmented IPv6 packets"); +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_ACCEPT_RTADV, accept_rtadv, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_accept_rtadv), 0, + "Default value of per-interface flag for accepting ICMPv6 RA messages"); +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_NO_RADR, no_radr, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_no_radr), 0, "Default value of per-interface flag to control whether routers " "sending ICMPv6 RA messages on that interface are added into the " - "default router list."); -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_NORBIT_RAIF, norbit_raif, CTLFLAG_RW, - &VNET_NAME(ip6_norbit_raif), 0, - "Always set 0 to R flag in ICMPv6 NA messages when accepting RA" - " on the interface."); -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_RFC6204W3, rfc6204w3, - CTLFLAG_RW, &VNET_NAME(ip6_rfc6204w3), 0, + "default router list"); +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_NORBIT_RAIF, norbit_raif, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_norbit_raif), 0, + "Always set clear the R flag in ICMPv6 NA messages when accepting RA " + "on the interface"); +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RFC6204W3, rfc6204w3, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_rfc6204w3), 0, "Accept the default router list from ICMPv6 RA messages even " - "when packet forwarding enabled."); -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_KEEPFAITH, keepfaith, CTLFLAG_RW, - &VNET_NAME(ip6_keepfaith), 0, ""); -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_LOG_INTERVAL, log_interval, - CTLFLAG_RW, &VNET_NAME(ip6_log_interval), 0, ""); -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_HDRNESTLIMIT, hdrnestlimit, - CTLFLAG_RW, &VNET_NAME(ip6_hdrnestlimit), 0, ""); -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_DAD_COUNT, dad_count, CTLFLAG_RW, - &VNET_NAME(ip6_dad_count), 0, ""); -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_AUTO_FLOWLABEL, auto_flowlabel, - CTLFLAG_RW, &VNET_NAME(ip6_auto_flowlabel), 0, ""); -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_DEFMCASTHLIM, defmcasthlim, - CTLFLAG_RW, &VNET_NAME(ip6_defmcasthlim), 0, ""); + "when packet forwarding is enabled"); +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_LOG_INTERVAL, log_interval, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_log_interval), 0, + "Frequency in seconds at which to log IPv6 forwarding errors"); +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_HDRNESTLIMIT, hdrnestlimit, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_hdrnestlimit), 0, + "Maximum allowed number of nested protocol headers"); +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DAD_COUNT, dad_count, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_dad_count), 0, + "Number of ICMPv6 NS messages sent during duplicate address detection"); +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_AUTO_FLOWLABEL, auto_flowlabel, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_auto_flowlabel), 0, + "Provide an IPv6 flowlabel in outbound packets"); +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFMCASTHLIM, defmcasthlim, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_defmcasthlim), 0, + "Default hop limit for multicast packets"); SYSCTL_STRING(_net_inet6_ip6, IPV6CTL_KAME_VERSION, kame_version, - CTLFLAG_RD, __KAME_VERSION, 0, ""); -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_USE_DEPRECATED, use_deprecated, - CTLFLAG_RW, &VNET_NAME(ip6_use_deprecated), 0, ""); -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_RR_PRUNE, rr_prune, CTLFLAG_RW, - &VNET_NAME(ip6_rr_prune), 0, ""); -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_USETEMPADDR, use_tempaddr, - CTLFLAG_RW, &VNET_NAME(ip6_use_tempaddr), 0, ""); -SYSCTL_VNET_PROC(_net_inet6_ip6, IPV6CTL_TEMPPLTIME, temppltime, - CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ip6_temp_preferred_lifetime), 0, - sysctl_ip6_temppltime, "I", ""); -SYSCTL_VNET_PROC(_net_inet6_ip6, IPV6CTL_TEMPVLTIME, tempvltime, - CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ip6_temp_valid_lifetime), 0, - sysctl_ip6_tempvltime, "I", ""); -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_V6ONLY, v6only, CTLFLAG_RW, - &VNET_NAME(ip6_v6only), 0, ""); -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_AUTO_LINKLOCAL, auto_linklocal, - CTLFLAG_RW, &VNET_NAME(ip6_auto_linklocal), 0, - "Default value of per-interface flag for automatically adding an IPv6" - " link-local address to interfaces when attached"); -SYSCTL_VNET_STRUCT(_net_inet6_ip6, IPV6CTL_RIP6STATS, rip6stats, CTLFLAG_RW, - &VNET_NAME(rip6stat), rip6stat, ""); -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_PREFER_TEMPADDR, prefer_tempaddr, - CTLFLAG_RW, &VNET_NAME(ip6_prefer_tempaddr), 0, ""); -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_USE_DEFAULTZONE, use_defaultzone, - CTLFLAG_RW, &VNET_NAME(ip6_use_defzone), 0,""); -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags, CTLFLAG_RW, - &VNET_NAME(ip6_maxfrags), 0, ""); -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_MCAST_PMTU, mcast_pmtu, CTLFLAG_RW, - &VNET_NAME(ip6_mcast_pmtu), 0, ""); + CTLFLAG_RD, __KAME_VERSION, 0, + "KAME version string"); +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USE_DEPRECATED, use_deprecated, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_use_deprecated), 0, + "Allow the use of addresses whose preferred lifetimes have expired"); +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RR_PRUNE, rr_prune, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_rr_prune), 0, + ""); /* XXX unused */ +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USETEMPADDR, use_tempaddr, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_use_tempaddr), 0, + "Create RFC3041 temporary addresses for autoconfigured addresses"); +SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_TEMPPLTIME, temppltime, + CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, + NULL, 0, sysctl_ip6_temppltime, "I", + "Maximum preferred lifetime for temporary addresses"); +SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_TEMPVLTIME, tempvltime, + CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, + NULL, 0, sysctl_ip6_tempvltime, "I", + "Maximum valid lifetime for temporary addresses"); +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_V6ONLY, v6only, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_v6only), 0, + "Restrict AF_INET6 sockets to IPv6 addresses only"); +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_AUTO_LINKLOCAL, auto_linklocal, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_auto_linklocal), 0, + "Default value of per-interface flag for automatically adding an IPv6 " + "link-local address to interfaces when attached"); +SYSCTL_VNET_PCPUSTAT(_net_inet6_ip6, IPV6CTL_RIP6STATS, rip6stats, + struct rip6stat, rip6stat, + "Raw IP6 statistics (struct rip6stat, netinet6/raw_ip6.h)"); +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_PREFER_TEMPADDR, prefer_tempaddr, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_prefer_tempaddr), 0, + "Prefer RFC3041 temporary addresses in source address selection"); +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USE_DEFAULTZONE, use_defaultzone, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_use_defzone), 0, + "Use the default scope zone when none is specified"); +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfrags), 0, + "Maximum allowed number of outstanding IPv6 packet fragments"); +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MCAST_PMTU, mcast_pmtu, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_mcast_pmtu), 0, + "Enable path MTU discovery for multicast packets"); #ifdef IPSTEALTH -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_STEALTH, stealth, CTLFLAG_RW, - &VNET_NAME(ip6stealth), 0, ""); -#endif - -#ifdef FLOWTABLE -VNET_DEFINE(int, ip6_output_flowtable_size) = 2048; -VNET_DEFINE(struct flowtable *, ip6_ft); -#define V_ip6_output_flowtable_size VNET(ip6_output_flowtable_size) - -SYSCTL_VNET_INT(_net_inet6_ip6, OID_AUTO, output_flowtable_size, CTLFLAG_RDTUN, - &VNET_NAME(ip6_output_flowtable_size), 2048, - "number of entries in the per-cpu output flow caches"); +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_STEALTH, stealth, CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(ip6stealth), 0, + "Forward IPv6 packets without decrementing their TTL"); #endif /* net.inet6.icmp6 */ -SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRACCEPT, rediraccept, - CTLFLAG_RW, &VNET_NAME(icmp6_rediraccept), 0, ""); -SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRTIMEOUT, redirtimeout, - CTLFLAG_RW, &VNET_NAME(icmp6_redirtimeout), 0, ""); -SYSCTL_VNET_STRUCT(_net_inet6_icmp6, ICMPV6CTL_STATS, stats, CTLFLAG_RW, - &VNET_NAME(icmp6stat), icmp6stat, ""); -SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_PRUNE, nd6_prune, CTLFLAG_RW, - &VNET_NAME(nd6_prune), 0, ""); -SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DELAY, nd6_delay, CTLFLAG_RW, - &VNET_NAME(nd6_delay), 0, ""); -SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_UMAXTRIES, nd6_umaxtries, - CTLFLAG_RW, &VNET_NAME(nd6_umaxtries), 0, ""); -SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MMAXTRIES, nd6_mmaxtries, - CTLFLAG_RW, &VNET_NAME(nd6_mmaxtries), 0, ""); -SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_USELOOPBACK, nd6_useloopback, - CTLFLAG_RW, &VNET_NAME(nd6_useloopback), 0, ""); -SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO, nodeinfo, CTLFLAG_RW, - &VNET_NAME(icmp6_nodeinfo), 0, ""); -SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO_OLDMCPREFIX, - nodeinfo_oldmcprefix, CTLFLAG_RW, - &VNET_NAME(icmp6_nodeinfo_oldmcprefix), 0, - "Join old IPv6 NI group address in draft-ietf-ipngwg-icmp-name-lookup" - " for compatibility with KAME implememtation."); -SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ERRPPSLIMIT, errppslimit, - CTLFLAG_RW, &VNET_NAME(icmp6errppslim), 0, ""); -SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXNUDHINT, nd6_maxnudhint, - CTLFLAG_RW, &VNET_NAME(nd6_maxnudhint), 0, ""); -SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DEBUG, nd6_debug, CTLFLAG_RW, - &VNET_NAME(nd6_debug), 0, ""); -SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_ONLINKNSRFC4861, - nd6_onlink_ns_rfc4861, CTLFLAG_RW, &VNET_NAME(nd6_onlink_ns_rfc4861), - 0, "Accept 'on-link' nd6 NS in compliance with RFC 4861."); +SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRACCEPT, rediraccept, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6_rediraccept), 0, + "Accept ICMPv6 redirect messages"); +SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRTIMEOUT, redirtimeout, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6_redirtimeout), 0, + ""); /* XXX unused */ +SYSCTL_VNET_PCPUSTAT(_net_inet6_icmp6, ICMPV6CTL_STATS, stats, + struct icmp6stat, icmp6stat, + "ICMPv6 statistics (struct icmp6stat, netinet/icmp6.h)"); +SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_PRUNE, nd6_prune, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_prune), 0, + "Frequency in seconds of checks for expired prefixes and routers"); +SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DELAY, nd6_delay, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_delay), 0, + "Delay in seconds before probing for reachability"); +SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_UMAXTRIES, nd6_umaxtries, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_umaxtries), 0, + "Number of ICMPv6 NS messages sent during reachability detection"); +SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MMAXTRIES, nd6_mmaxtries, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_mmaxtries), 0, + "Number of ICMPv6 NS messages sent during address resolution"); +SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_USELOOPBACK, nd6_useloopback, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_useloopback), 0, + "Create a loopback route when configuring an IPv6 address"); +SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO, nodeinfo, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6_nodeinfo), 0, + "Mask of enabled RF4620 node information query types"); +SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO_OLDMCPREFIX, + nodeinfo_oldmcprefix, CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(icmp6_nodeinfo_oldmcprefix), 0, + "Join old IPv6 NI group address in draft-ietf-ipngwg-icmp-name-lookup " + "for compatibility with KAME implementation"); +SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ERRPPSLIMIT, errppslimit, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6errppslim), 0, + "Maximum number of ICMPv6 error messages per second"); +SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXNUDHINT, nd6_maxnudhint, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_maxnudhint), 0, + ""); /* XXX unused */ +SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DEBUG, nd6_debug, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_debug), 0, + "Log NDP debug messages"); +SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_ONLINKNSRFC4861, + nd6_onlink_ns_rfc4861, CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(nd6_onlink_ns_rfc4861), 0, + "Accept 'on-link' ICMPv6 NS messages in compliance with RFC 4861"); diff --git a/freebsd/sys/netinet6/in6_rmx.c b/freebsd/sys/netinet6/in6_rmx.c index 4c59a1ad..f04e0058 100644 --- a/freebsd/sys/netinet6/in6_rmx.c +++ b/freebsd/sys/netinet6/in6_rmx.c @@ -68,7 +68,6 @@ __FBSDID("$FreeBSD$"); #include <sys/systm.h> #include <sys/kernel.h> #include <rtems/bsd/sys/lock.h> -#include <sys/sysctl.h> #include <sys/queue.h> #include <sys/socket.h> #include <sys/socketvar.h> @@ -78,7 +77,9 @@ __FBSDID("$FreeBSD$"); #include <sys/callout.h> #include <net/if.h> +#include <net/if_var.h> #include <net/route.h> +#include <net/route_var.h> #include <netinet/in.h> #include <netinet/ip_var.h> @@ -104,14 +105,12 @@ extern int in6_detachhead(void **head, int off); * Do what we need to do when inserting a route. */ static struct radix_node * -in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, +in6_addroute(void *v_arg, void *n_arg, struct radix_head *head, struct radix_node *treenodes) { struct rtentry *rt = (struct rtentry *)treenodes; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)rt_key(rt); - struct radix_node *ret; - RADIX_NODE_HEAD_WLOCK_ASSERT(head); if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) rt->rt_flags |= RTF_MULTICAST; @@ -137,116 +136,69 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, } } - if (!rt->rt_rmx.rmx_mtu && rt->rt_ifp) - rt->rt_rmx.rmx_mtu = IN6_LINKMTU(rt->rt_ifp); + if (rt->rt_ifp != NULL) { - ret = rn_addroute(v_arg, n_arg, head, treenodes); - if (ret == NULL) { - struct rtentry *rt2; /* - * We are trying to add a net route, but can't. - * The following case should be allowed, so we'll make a - * special check for this: - * Two IPv6 addresses with the same prefix is assigned - * to a single interrface. - * # ifconfig if0 inet6 3ffe:0501::1 prefix 64 alias (*1) - * # ifconfig if0 inet6 3ffe:0501::2 prefix 64 alias (*2) - * In this case, (*1) and (*2) want to add the same - * net route entry, 3ffe:0501:: -> if0. - * This case should not raise an error. + * Check route MTU: + * inherit interface MTU if not set or + * check if MTU is too large. */ - rt2 = in6_rtalloc1((struct sockaddr *)sin6, 0, RTF_RNH_LOCKED, - rt->rt_fibnum); - if (rt2) { - if (((rt2->rt_flags & (RTF_HOST|RTF_GATEWAY)) == 0) - && rt2->rt_gateway - && rt2->rt_gateway->sa_family == AF_LINK - && rt2->rt_ifp == rt->rt_ifp) { - ret = rt2->rt_nodes; - } - RTFREE_LOCKED(rt2); - } + if (rt->rt_mtu == 0) { + rt->rt_mtu = IN6_LINKMTU(rt->rt_ifp); + } else if (rt->rt_mtu > IN6_LINKMTU(rt->rt_ifp)) + rt->rt_mtu = IN6_LINKMTU(rt->rt_ifp); } - return (ret); -} -SYSCTL_DECL(_net_inet6_ip6); - -static VNET_DEFINE(int, rtq_toomany6) = 128; - /* 128 cached routes is ``too many'' */ -#define V_rtq_toomany6 VNET(rtq_toomany6) -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_RTMAXCACHE, rtmaxcache, CTLFLAG_RW, - &VNET_NAME(rtq_toomany6) , 0, ""); - -struct rtqk_arg { - struct radix_node_head *rnh; - int mode; - int updating; - int draining; - int killed; - int found; - time_t nextstop; -}; + return (rn_addroute(v_arg, n_arg, head, treenodes)); +} /* * Age old PMTUs. */ struct mtuex_arg { - struct radix_node_head *rnh; + struct rib_head *rnh; time_t nextstop; }; static VNET_DEFINE(struct callout, rtq_mtutimer); #define V_rtq_mtutimer VNET(rtq_mtutimer) static int -in6_mtuexpire(struct radix_node *rn, void *rock) +in6_mtuexpire(struct rtentry *rt, void *rock) { - struct rtentry *rt = (struct rtentry *)rn; struct mtuex_arg *ap = rock; - /* sanity */ - if (!rt) - panic("rt == NULL in in6_mtuexpire"); - - if (rt->rt_rmx.rmx_expire && !(rt->rt_flags & RTF_PROBEMTU)) { - if (rt->rt_rmx.rmx_expire <= time_uptime) { + if (rt->rt_expire && !(rt->rt_flags & RTF_PROBEMTU)) { + if (rt->rt_expire <= time_uptime) { rt->rt_flags |= RTF_PROBEMTU; } else { - ap->nextstop = lmin(ap->nextstop, - rt->rt_rmx.rmx_expire); + ap->nextstop = lmin(ap->nextstop, rt->rt_expire); } } - return 0; + return (0); } #define MTUTIMO_DEFAULT (60*1) static void -in6_mtutimo_one(struct radix_node_head *rnh) +in6_mtutimo_setwa(struct rib_head *rnh, uint32_t fibum, int af, + void *_arg) { - struct mtuex_arg arg; + struct mtuex_arg *arg; - arg.rnh = rnh; - arg.nextstop = time_uptime + MTUTIMO_DEFAULT; - RADIX_NODE_HEAD_LOCK(rnh); - rnh->rnh_walktree(rnh, in6_mtuexpire, &arg); - RADIX_NODE_HEAD_UNLOCK(rnh); + arg = (struct mtuex_arg *)_arg; + + arg->rnh = rnh; } static void in6_mtutimo(void *rock) { CURVNET_SET_QUIET((struct vnet *) rock); - struct radix_node_head *rnh; struct timeval atv; - u_int fibnum; + struct mtuex_arg arg; - for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { - rnh = rt_tables_get_rnh(fibnum, AF_INET6); - if (rnh != NULL) - in6_mtutimo_one(rnh); - } + rt_foreach_fib_walk(AF_INET6, in6_mtutimo_setwa, in6_mtuexpire, &arg); atv.tv_sec = MTUTIMO_DEFAULT; atv.tv_usec = 0; @@ -256,10 +208,6 @@ in6_mtutimo(void *rock) /* * Initialize our routing tree. - * XXX MRT When off == 0, we are being called from vfs_export.c - * so just set up their table and leave. (we know what the correct - * value should be so just use that).. FIX AFTER RELENG_7 is MFC'd - * see also comments in in_inithead() vfs_export.c and domain.h */ static VNET_DEFINE(int, _in6_rt_was_here); #define V__in6_rt_was_here VNET(_in6_rt_was_here) @@ -267,24 +215,22 @@ static VNET_DEFINE(int, _in6_rt_was_here); int in6_inithead(void **head, int off) { - struct radix_node_head *rnh; + struct rib_head *rh; - if (!rn_inithead(head, offsetof(struct sockaddr_in6, sin6_addr) << 3)) - return 0; /* See above */ + rh = rt_table_init(offsetof(struct sockaddr_in6, sin6_addr) << 3); + if (rh == NULL) + return (0); - if (off == 0) /* See above */ - return 1; /* only do the rest for the real thing */ - - rnh = *head; - rnh->rnh_addaddr = in6_addroute; + rh->rnh_addaddr = in6_addroute; + *head = (void *)rh; if (V__in6_rt_was_here == 0) { - callout_init(&V_rtq_mtutimer, CALLOUT_MPSAFE); + callout_init(&V_rtq_mtutimer, 1); in6_mtutimo(curvnet); /* kick off timeout first time */ V__in6_rt_was_here = 1; } - return 1; + return (1); } #ifdef VIMAGE @@ -293,6 +239,8 @@ in6_detachhead(void **head, int off) { callout_drain(&V_rtq_mtutimer); + rt_table_destroy((struct rib_head *)(*head)); + return (1); } #endif diff --git a/freebsd/sys/netinet6/in6_rss.h b/freebsd/sys/netinet6/in6_rss.h new file mode 100644 index 00000000..f5b48c71 --- /dev/null +++ b/freebsd/sys/netinet6/in6_rss.h @@ -0,0 +1,58 @@ +/*- + * Copyright (c) 2010-2011 Juniper Networks, Inc. + * All rights reserved. + * + * This software was developed by Robert N. M. Watson under contract + * to Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NETINET6_IN6_RSS_H_ +#define _NETINET6_IN6_RSS_H_ + +#include <netinet/in.h> /* in_addr_t */ + +/* + * Network stack interface to generate a hash for a protocol tuple. + */ +uint32_t rss_hash_ip6_4tuple(const struct in6_addr *src, u_short srcport, + const struct in6_addr *dst, u_short dstport); +uint32_t rss_hash_ip6_2tuple(const struct in6_addr *src, + const struct in6_addr *dst); + +/* + * Functions to calculate a software RSS hash for a given mbuf or + * packet detail. + */ +int rss_mbuf_software_hash_v6(const struct mbuf *m, int dir, + uint32_t *hashval, uint32_t *hashtype); +int rss_proto_software_hash_v6(const struct in6_addr *src, + const struct in6_addr *dst, u_short src_port, + u_short dst_port, int proto, uint32_t *hashval, + uint32_t *hashtype); +struct mbuf * rss_soft_m2cpuid_v6(struct mbuf *m, uintptr_t source, + u_int *cpuid); + +#endif /* !_NETINET6_IN6_RSS_H_ */ diff --git a/freebsd/sys/netinet6/in6_src.c b/freebsd/sys/netinet6/in6_src.c index a69ecf24..2a50a975 100644 --- a/freebsd/sys/netinet6/in6_src.c +++ b/freebsd/sys/netinet6/in6_src.c @@ -84,9 +84,11 @@ __FBSDID("$FreeBSD$"); #include <sys/time.h> #include <sys/jail.h> #include <sys/kernel.h> +#include <sys/rmlock.h> #include <sys/sx.h> #include <net/if.h> +#include <net/if_var.h> #include <net/if_dl.h> #include <net/route.h> #include <net/if_llatbl.h> @@ -105,6 +107,7 @@ __FBSDID("$FreeBSD$"); #include <netinet6/in6_var.h> #include <netinet/ip6.h> +#include <netinet6/in6_fib.h> #include <netinet6/in6_pcb.h> #include <netinet6/ip6_var.h> #include <netinet6/scope6_var.h> @@ -133,8 +136,11 @@ static int selectroute(struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *, struct ifnet **, struct rtentry **, int, u_int); static int in6_selectif(struct sockaddr_in6 *, struct ip6_pktopts *, - struct ip6_moptions *, struct route_in6 *ro, struct ifnet **, + struct ip6_moptions *, struct ifnet **, struct ifnet *, u_int); +static int in6_selectsrc(uint32_t, struct sockaddr_in6 *, + struct ip6_pktopts *, struct inpcb *, struct ucred *, + struct ifnet **, struct in6_addr *); static struct in6_addrpolicy *lookup_addrsel_policy(struct sockaddr_in6 *); @@ -142,7 +148,7 @@ static void init_policy_queue(void); static int add_addrsel_policyent(struct in6_addrpolicy *); static int delete_addrsel_policyent(struct in6_addrpolicy *); static int walk_addrsel_policy(int (*)(struct in6_addrpolicy *, void *), - void *); + void *); static int dump_addrsel_policyent(struct in6_addrpolicy *, void *); static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *); @@ -174,11 +180,12 @@ static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *); goto out; /* XXX: we can't use 'break' here */ \ } while(0) -int -in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, - struct inpcb *inp, struct route_in6 *ro, struct ucred *cred, +static int +in6_selectsrc(uint32_t fibnum, struct sockaddr_in6 *dstsock, + struct ip6_pktopts *opts, struct inpcb *inp, struct ucred *cred, struct ifnet **ifpp, struct in6_addr *srcp) { + struct rm_priotracker in6_ifa_tracker; struct in6_addr dst, tmp; struct ifnet *ifp = NULL, *oifp = NULL; struct in6_ifaddr *ia = NULL, *ia_best = NULL; @@ -221,12 +228,9 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, */ if (opts && (pi = opts->ip6po_pktinfo) && !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) { - struct sockaddr_in6 srcsock; - struct in6_ifaddr *ia6; - /* get the outgoing interface */ - if ((error = in6_selectif(dstsock, opts, mopts, ro, &ifp, oifp, - (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB)) + if ((error = in6_selectif(dstsock, opts, mopts, &ifp, oifp, + fibnum)) != 0) return (error); @@ -237,33 +241,36 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, * the interface must be specified; otherwise, ifa_ifwithaddr() * will fail matching the address. */ - bzero(&srcsock, sizeof(srcsock)); - srcsock.sin6_family = AF_INET6; - srcsock.sin6_len = sizeof(srcsock); - srcsock.sin6_addr = pi->ipi6_addr; + tmp = pi->ipi6_addr; if (ifp) { - error = in6_setscope(&srcsock.sin6_addr, ifp, NULL); + error = in6_setscope(&tmp, ifp, &odstzone); if (error) return (error); } if (cred != NULL && (error = prison_local_ip6(cred, - &srcsock.sin6_addr, (inp != NULL && - (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) + &tmp, (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0) return (error); - ia6 = (struct in6_ifaddr *)ifa_ifwithaddr( - (struct sockaddr *)&srcsock); - if (ia6 == NULL || - (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) { - if (ia6 != NULL) - ifa_free(&ia6->ia_ifa); - return (EADDRNOTAVAIL); - } - pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */ + /* + * If IPV6_BINDANY socket option is set, we allow to specify + * non local addresses as source address in IPV6_PKTINFO + * ancillary data. + */ + if ((inp->inp_flags & INP_BINDANY) == 0) { + ia = in6ifa_ifwithaddr(&tmp, 0 /* XXX */); + if (ia == NULL || (ia->ia6_flags & (IN6_IFF_ANYCAST | + IN6_IFF_NOTREADY))) { + if (ia != NULL) + ifa_free(&ia->ia_ifa); + return (EADDRNOTAVAIL); + } + bcopy(&ia->ia_addr.sin6_addr, srcp, sizeof(*srcp)); + ifa_free(&ia->ia_ifa); + } else + bcopy(&tmp, srcp, sizeof(*srcp)); + pi->ipi6_addr = tmp; /* XXX: this overrides pi */ if (ifpp) *ifpp = ifp; - bcopy(&ia6->ia_addr.sin6_addr, srcp, sizeof(*srcp)); - ifa_free(&ia6->ia_ifa); return (0); } @@ -291,7 +298,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, * the outgoing interface and the destination address. */ /* get the outgoing interface */ - if ((error = in6_selectif(dstsock, opts, mopts, ro, &ifp, oifp, + if ((error = in6_selectif(dstsock, opts, mopts, &ifp, oifp, (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB)) != 0) return (error); @@ -304,7 +311,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, return (error); rule = 0; - IN6_IFADDR_RLOCK(); + IN6_IFADDR_RLOCK(&in6_ifa_tracker); TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { int new_scope = -1, new_matchlen = -1; struct in6_addrpolicy *new_policy = NULL; @@ -445,6 +452,14 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, REPLACE(8); /* + * Rule 9: prefer address with better virtual status. + */ + if (ifa_preferred(&ia_best->ia_ifa, &ia->ia_ifa)) + REPLACE(9); + if (ifa_preferred(&ia->ia_ifa, &ia_best->ia_ifa)) + NEXT(9); + + /* * Rule 10: prefer address with `prefer_source' flag. */ if ((ia_best->ia6_flags & IN6_IFF_PREFER_SOURCE) == 0 && @@ -494,7 +509,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, } if ((ia = ia_best) == NULL) { - IN6_IFADDR_RUNLOCK(); + IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); IP6STAT_INC(ip6s_sources_none); return (EADDRNOTAVAIL); } @@ -511,7 +526,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, tmp = ia->ia_addr.sin6_addr; if (cred != NULL && prison_local_ip6(cred, &tmp, (inp != NULL && (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0) { - IN6_IFADDR_RUNLOCK(); + IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); IP6STAT_INC(ip6s_sources_none); return (EADDRNOTAVAIL); } @@ -530,11 +545,84 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, IP6STAT_INC(ip6s_sources_otherscope[best_scope]); if (IFA6_IS_DEPRECATED(ia)) IP6STAT_INC(ip6s_sources_deprecated[best_scope]); - IN6_IFADDR_RUNLOCK(); + IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); return (0); } /* + * Select source address based on @inp, @dstsock and @opts. + * Stores selected address to @srcp. If @scope_ambiguous is set, + * embed scope from selected outgoing interface. If @hlim pointer + * is provided, stores calculated hop limit there. + * Returns 0 on success. + */ +int +in6_selectsrc_socket(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, + struct inpcb *inp, struct ucred *cred, int scope_ambiguous, + struct in6_addr *srcp, int *hlim) +{ + struct ifnet *retifp; + uint32_t fibnum; + int error; + + fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB; + retifp = NULL; + + error = in6_selectsrc(fibnum, dstsock, opts, inp, cred, &retifp, srcp); + if (error != 0) + return (error); + + if (hlim != NULL) + *hlim = in6_selecthlim(inp, retifp); + + if (retifp == NULL || scope_ambiguous == 0) + return (0); + + /* + * Application should provide a proper zone ID or the use of + * default zone IDs should be enabled. Unfortunately, some + * applications do not behave as it should, so we need a + * workaround. Even if an appropriate ID is not determined + * (when it's required), if we can determine the outgoing + * interface. determine the zone ID based on the interface. + */ + error = in6_setscope(&dstsock->sin6_addr, retifp, NULL); + + return (error); +} + +/* + * Select source address based on @fibnum, @dst and @scopeid. + * Stores selected address to @srcp. + * Returns 0 on success. + * + * Used by non-socket based consumers (ND code mostly) + */ +int +in6_selectsrc_addr(uint32_t fibnum, const struct in6_addr *dst, + uint32_t scopeid, struct ifnet *ifp, struct in6_addr *srcp, + int *hlim) +{ + struct ifnet *retifp; + struct sockaddr_in6 dst_sa; + int error; + + retifp = ifp; + bzero(&dst_sa, sizeof(dst_sa)); + dst_sa.sin6_family = AF_INET6; + dst_sa.sin6_len = sizeof(dst_sa); + dst_sa.sin6_addr = *dst; + dst_sa.sin6_scope_id = scopeid; + sa6_embedscope(&dst_sa, 0); + + error = in6_selectsrc(fibnum, &dst_sa, NULL, NULL, NULL, &retifp, srcp); + if (hlim != NULL) + *hlim = in6_selecthlim(NULL, retifp); + + return (error); +} + +/* * clone - meaningful only for bsdi and freebsd */ static int @@ -548,6 +636,7 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct sockaddr_in6 *sin6_next; struct in6_pktinfo *pi = NULL; struct in6_addr *dst = &dstsock->sin6_addr; + uint32_t zoneid; #if 0 char ip6buf[INET6_ADDRSTRLEN]; @@ -578,7 +667,6 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, } else goto getroute; } - /* * If the destination address is a multicast address and the outgoing * interface for the address is specified by the caller, use it. @@ -587,6 +675,18 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) { goto done; /* we do not need a route for multicast. */ } + /* + * If destination address is LLA or link- or node-local multicast, + * use it's embedded scope zone id to determine outgoing interface. + */ + if (IN6_IS_ADDR_MC_LINKLOCAL(dst) || + IN6_IS_ADDR_MC_NODELOCAL(dst)) { + zoneid = ntohs(in6_getscope(dst)); + if (zoneid > 0) { + ifp = in6_getlinkifnet(zoneid); + goto done; + } + } getroute: /* @@ -595,81 +695,38 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, */ if (opts && opts->ip6po_nexthop) { struct route_in6 *ron; - struct llentry *la; - - sin6_next = satosin6(opts->ip6po_nexthop); - - /* at this moment, we only support AF_INET6 next hops */ - if (sin6_next->sin6_family != AF_INET6) { - error = EAFNOSUPPORT; /* or should we proceed? */ - goto done; - } - - /* - * If the next hop is an IPv6 address, then the node identified - * by that address must be a neighbor of the sending host. - */ - ron = &opts->ip6po_nextroute; - /* - * XXX what do we do here? - * PLZ to be fixing - */ - - if (ron->ro_rt == NULL) { - in6_rtalloc(ron, fibnum); /* multi path case? */ - if (ron->ro_rt == NULL) { - if (ron->ro_rt) { - RTFREE(ron->ro_rt); - ron->ro_rt = NULL; - } - error = EHOSTUNREACH; + sin6_next = satosin6(opts->ip6po_nexthop); + if (IN6_IS_ADDR_LINKLOCAL(&sin6_next->sin6_addr)) { + /* + * Next hop is LLA, thus it should be neighbor. + * Determine outgoing interface by zone index. + */ + zoneid = ntohs(in6_getscope(&sin6_next->sin6_addr)); + if (zoneid > 0) { + ifp = in6_getlinkifnet(zoneid); goto done; - } - } - - rt = ron->ro_rt; - ifp = rt->rt_ifp; - IF_AFDATA_RLOCK(ifp); - la = lla_lookup(LLTABLE6(ifp), 0, (struct sockaddr *)&sin6_next->sin6_addr); - IF_AFDATA_RUNLOCK(ifp); - if (la != NULL) - LLE_RUNLOCK(la); - else { - error = EHOSTUNREACH; - goto done; - } -#if 0 - if ((ron->ro_rt && - (ron->ro_rt->rt_flags & (RTF_UP | RTF_LLINFO)) != - (RTF_UP | RTF_LLINFO)) || - !IN6_ARE_ADDR_EQUAL(&satosin6(&ron->ro_dst)->sin6_addr, - &sin6_next->sin6_addr)) { - if (ron->ro_rt) { - RTFREE(ron->ro_rt); - ron->ro_rt = NULL; } - *satosin6(&ron->ro_dst) = *sin6_next; } + ron = &opts->ip6po_nextroute; + /* Use a cached route if it exists and is valid. */ + if (ron->ro_rt != NULL && ( + (ron->ro_rt->rt_flags & RTF_UP) == 0 || + ron->ro_dst.sin6_family != AF_INET6 || + !IN6_ARE_ADDR_EQUAL(&ron->ro_dst.sin6_addr, + &sin6_next->sin6_addr))) + RO_RTFREE(ron); if (ron->ro_rt == NULL) { + ron->ro_dst = *sin6_next; in6_rtalloc(ron, fibnum); /* multi path case? */ - if (ron->ro_rt == NULL || - !(ron->ro_rt->rt_flags & RTF_LLINFO)) { - if (ron->ro_rt) { - RTFREE(ron->ro_rt); - ron->ro_rt = NULL; - } - error = EHOSTUNREACH; - goto done; - } } -#endif - /* - * When cloning is required, try to allocate a route to the - * destination so that the caller can store path MTU - * information. + * The node identified by that address must be a + * neighbor of the sending host. */ + if (ron->ro_rt == NULL || + (ron->ro_rt->rt_flags & RTF_GATEWAY) != 0) + error = EHOSTUNREACH; goto done; } @@ -782,24 +839,27 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, static int in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, - struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp, + struct ip6_moptions *mopts, struct ifnet **retifp, struct ifnet *oifp, u_int fibnum) { int error; struct route_in6 sro; struct rtentry *rt = NULL; + int rt_flags; KASSERT(retifp != NULL, ("%s: retifp is NULL", __func__)); - if (ro == NULL) { - bzero(&sro, sizeof(sro)); - ro = &sro; - } + bzero(&sro, sizeof(sro)); + rt_flags = 0; + + error = selectroute(dstsock, opts, mopts, &sro, retifp, &rt, 1, fibnum); - if ((error = selectroute(dstsock, opts, mopts, ro, retifp, - &rt, 1, fibnum)) != 0) { - if (ro == &sro && rt && rt == sro.ro_rt) - RTFREE(rt); + if (rt) + rt_flags = rt->rt_flags; + if (rt && rt == sro.ro_rt) + RTFREE(rt); + + if (error != 0) { /* Help ND. See oifp comment in in6_selectsrc(). */ if (oifp != NULL && fibnum == RT_DEFAULT_FIB) { *retifp = oifp; @@ -825,16 +885,12 @@ in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, * Although this may not be very harmful, it should still be confusing. * We thus reject the case here. */ - if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) { - int flags = (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); - if (ro == &sro && rt && rt == sro.ro_rt) - RTFREE(rt); - return (flags); + if (rt_flags & (RTF_REJECT | RTF_BLACKHOLE)) { + error = (rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); + return (error); } - if (ro == &sro && rt && rt == sro.ro_rt) - RTFREE(rt); return (0); } @@ -882,19 +938,16 @@ in6_selecthlim(struct inpcb *in6p, struct ifnet *ifp) else if (ifp) return (ND_IFINFO(ifp)->chlim); else if (in6p && !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { - struct route_in6 ro6; - struct ifnet *lifp; - - bzero(&ro6, sizeof(ro6)); - ro6.ro_dst.sin6_family = AF_INET6; - ro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6); - ro6.ro_dst.sin6_addr = in6p->in6p_faddr; - in6_rtalloc(&ro6, in6p->inp_inc.inc_fibnum); - if (ro6.ro_rt) { - lifp = ro6.ro_rt->rt_ifp; - RTFREE(ro6.ro_rt); - if (lifp) - return (ND_IFINFO(lifp)->chlim); + struct nhop6_basic nh6; + struct in6_addr dst; + uint32_t fibnum, scopeid; + int hlim; + + fibnum = in6p->inp_inc.inc_fibnum; + in6_splitscope(&in6p->in6p_faddr, &dst, &scopeid); + if (fib6_lookup_nh_basic(fibnum, &dst, scopeid, 0, 0, &nh6)==0){ + hlim = ND_IFINFO(nh6.nh_ifp)->chlim; + return (hlim); } } return (V_ip6_defhlim); @@ -1005,7 +1058,6 @@ in6_src_sysctl(SYSCTL_HANDLER_ARGS) int in6_src_ioctl(u_long cmd, caddr_t data) { - int i; struct in6_addrpolicy ent0; if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY) @@ -1019,10 +1071,7 @@ in6_src_ioctl(u_long cmd, caddr_t data) if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0) return (EINVAL); /* clear trailing garbages (if any) of the prefix address. */ - for (i = 0; i < 4; i++) { - ent0.addr.sin6_addr.s6_addr32[i] &= - ent0.addrmask.sin6_addr.s6_addr32[i]; - } + IN6_MASK_ADDR(&ent0.addr.sin6_addr, &ent0.addrmask.sin6_addr); ent0.use = 0; switch (cmd) { @@ -1125,8 +1174,7 @@ delete_addrsel_policyent(struct in6_addrpolicy *key) } static int -walk_addrsel_policy(int (*callback)(struct in6_addrpolicy *, void *), - void *w) +walk_addrsel_policy(int (*callback)(struct in6_addrpolicy *, void *), void *w) { struct addrsel_policyent *pol; int error = 0; diff --git a/freebsd/sys/netinet6/in6_var.h b/freebsd/sys/netinet6/in6_var.h index 90530a68..77e5920b 100644 --- a/freebsd/sys/netinet6/in6_var.h +++ b/freebsd/sys/netinet6/in6_var.h @@ -65,8 +65,10 @@ #define _NETINET6_IN6_VAR_H_ #include <sys/tree.h> +#include <sys/counter.h> #ifdef _KERNEL +#include <sys/fnv_hash.h> #include <sys/libkern.h> #endif @@ -95,19 +97,20 @@ struct in6_addrlifetime { struct nd_ifinfo; struct scope6_id; struct lltable; -struct mld_ifinfo; +struct mld_ifsoftc; struct in6_ifextra { - struct in6_ifstat *in6_ifstat; - struct icmp6_ifstat *icmp6_ifstat; + counter_u64_t *in6_ifstat; + counter_u64_t *icmp6_ifstat; struct nd_ifinfo *nd_ifinfo; struct scope6_id *scope6_id; struct lltable *lltable; - struct mld_ifinfo *mld_ifinfo; + struct mld_ifsoftc *mld_ifinfo; }; #define LLTABLE6(ifp) (((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->lltable) +#ifdef _KERNEL struct in6_ifaddr { struct ifaddr ia_ifa; /* protocol-independent info */ #define ia_ifp ia_ifa.ifa_ifp @@ -131,10 +134,14 @@ struct in6_ifaddr { /* multicast addresses joined from the kernel */ LIST_HEAD(, in6_multi_mship) ia6_memberships; + /* entry in bucket of inet6 addresses */ + LIST_ENTRY(in6_ifaddr) ia6_hash; }; /* List of in6_ifaddr's. */ TAILQ_HEAD(in6_ifaddrhead, in6_ifaddr); +LIST_HEAD(in6_ifaddrlisthead, in6_ifaddr); +#endif /* _KERNEL */ /* control structure to manage address selection policy */ struct in6_addrpolicy { @@ -149,37 +156,37 @@ struct in6_addrpolicy { * IPv6 interface statistics, as defined in RFC2465 Ipv6IfStatsEntry (p12). */ struct in6_ifstat { - u_quad_t ifs6_in_receive; /* # of total input datagram */ - u_quad_t ifs6_in_hdrerr; /* # of datagrams with invalid hdr */ - u_quad_t ifs6_in_toobig; /* # of datagrams exceeded MTU */ - u_quad_t ifs6_in_noroute; /* # of datagrams with no route */ - u_quad_t ifs6_in_addrerr; /* # of datagrams with invalid dst */ - u_quad_t ifs6_in_protounknown; /* # of datagrams with unknown proto */ + uint64_t ifs6_in_receive; /* # of total input datagram */ + uint64_t ifs6_in_hdrerr; /* # of datagrams with invalid hdr */ + uint64_t ifs6_in_toobig; /* # of datagrams exceeded MTU */ + uint64_t ifs6_in_noroute; /* # of datagrams with no route */ + uint64_t ifs6_in_addrerr; /* # of datagrams with invalid dst */ + uint64_t ifs6_in_protounknown; /* # of datagrams with unknown proto */ /* NOTE: increment on final dst if */ - u_quad_t ifs6_in_truncated; /* # of truncated datagrams */ - u_quad_t ifs6_in_discard; /* # of discarded datagrams */ + uint64_t ifs6_in_truncated; /* # of truncated datagrams */ + uint64_t ifs6_in_discard; /* # of discarded datagrams */ /* NOTE: fragment timeout is not here */ - u_quad_t ifs6_in_deliver; /* # of datagrams delivered to ULP */ + uint64_t ifs6_in_deliver; /* # of datagrams delivered to ULP */ /* NOTE: increment on final dst if */ - u_quad_t ifs6_out_forward; /* # of datagrams forwarded */ + uint64_t ifs6_out_forward; /* # of datagrams forwarded */ /* NOTE: increment on outgoing if */ - u_quad_t ifs6_out_request; /* # of outgoing datagrams from ULP */ + uint64_t ifs6_out_request; /* # of outgoing datagrams from ULP */ /* NOTE: does not include forwrads */ - u_quad_t ifs6_out_discard; /* # of discarded datagrams */ - u_quad_t ifs6_out_fragok; /* # of datagrams fragmented */ - u_quad_t ifs6_out_fragfail; /* # of datagrams failed on fragment */ - u_quad_t ifs6_out_fragcreat; /* # of fragment datagrams */ + uint64_t ifs6_out_discard; /* # of discarded datagrams */ + uint64_t ifs6_out_fragok; /* # of datagrams fragmented */ + uint64_t ifs6_out_fragfail; /* # of datagrams failed on fragment */ + uint64_t ifs6_out_fragcreat; /* # of fragment datagrams */ /* NOTE: this is # after fragment */ - u_quad_t ifs6_reass_reqd; /* # of incoming fragmented packets */ + uint64_t ifs6_reass_reqd; /* # of incoming fragmented packets */ /* NOTE: increment on final dst if */ - u_quad_t ifs6_reass_ok; /* # of reassembled packets */ + uint64_t ifs6_reass_ok; /* # of reassembled packets */ /* NOTE: this is # after reass */ /* NOTE: increment on final dst if */ - u_quad_t ifs6_reass_fail; /* # of reass failures */ + uint64_t ifs6_reass_fail; /* # of reass failures */ /* NOTE: may not be packet count */ /* NOTE: increment on final dst if */ - u_quad_t ifs6_in_mcast; /* # of inbound multicast datagrams */ - u_quad_t ifs6_out_mcast; /* # of outbound multicast datagrams */ + uint64_t ifs6_in_mcast; /* # of inbound multicast datagrams */ + uint64_t ifs6_out_mcast; /* # of outbound multicast datagrams */ }; /* @@ -191,77 +198,77 @@ struct icmp6_ifstat { * Input statistics */ /* ipv6IfIcmpInMsgs, total # of input messages */ - u_quad_t ifs6_in_msg; + uint64_t ifs6_in_msg; /* ipv6IfIcmpInErrors, # of input error messages */ - u_quad_t ifs6_in_error; + uint64_t ifs6_in_error; /* ipv6IfIcmpInDestUnreachs, # of input dest unreach errors */ - u_quad_t ifs6_in_dstunreach; + uint64_t ifs6_in_dstunreach; /* ipv6IfIcmpInAdminProhibs, # of input administratively prohibited errs */ - u_quad_t ifs6_in_adminprohib; + uint64_t ifs6_in_adminprohib; /* ipv6IfIcmpInTimeExcds, # of input time exceeded errors */ - u_quad_t ifs6_in_timeexceed; + uint64_t ifs6_in_timeexceed; /* ipv6IfIcmpInParmProblems, # of input parameter problem errors */ - u_quad_t ifs6_in_paramprob; + uint64_t ifs6_in_paramprob; /* ipv6IfIcmpInPktTooBigs, # of input packet too big errors */ - u_quad_t ifs6_in_pkttoobig; + uint64_t ifs6_in_pkttoobig; /* ipv6IfIcmpInEchos, # of input echo requests */ - u_quad_t ifs6_in_echo; + uint64_t ifs6_in_echo; /* ipv6IfIcmpInEchoReplies, # of input echo replies */ - u_quad_t ifs6_in_echoreply; + uint64_t ifs6_in_echoreply; /* ipv6IfIcmpInRouterSolicits, # of input router solicitations */ - u_quad_t ifs6_in_routersolicit; + uint64_t ifs6_in_routersolicit; /* ipv6IfIcmpInRouterAdvertisements, # of input router advertisements */ - u_quad_t ifs6_in_routeradvert; + uint64_t ifs6_in_routeradvert; /* ipv6IfIcmpInNeighborSolicits, # of input neighbor solicitations */ - u_quad_t ifs6_in_neighborsolicit; + uint64_t ifs6_in_neighborsolicit; /* ipv6IfIcmpInNeighborAdvertisements, # of input neighbor advertisements */ - u_quad_t ifs6_in_neighboradvert; + uint64_t ifs6_in_neighboradvert; /* ipv6IfIcmpInRedirects, # of input redirects */ - u_quad_t ifs6_in_redirect; + uint64_t ifs6_in_redirect; /* ipv6IfIcmpInGroupMembQueries, # of input MLD queries */ - u_quad_t ifs6_in_mldquery; + uint64_t ifs6_in_mldquery; /* ipv6IfIcmpInGroupMembResponses, # of input MLD reports */ - u_quad_t ifs6_in_mldreport; + uint64_t ifs6_in_mldreport; /* ipv6IfIcmpInGroupMembReductions, # of input MLD done */ - u_quad_t ifs6_in_mlddone; + uint64_t ifs6_in_mlddone; /* * Output statistics. We should solve unresolved routing problem... */ /* ipv6IfIcmpOutMsgs, total # of output messages */ - u_quad_t ifs6_out_msg; + uint64_t ifs6_out_msg; /* ipv6IfIcmpOutErrors, # of output error messages */ - u_quad_t ifs6_out_error; + uint64_t ifs6_out_error; /* ipv6IfIcmpOutDestUnreachs, # of output dest unreach errors */ - u_quad_t ifs6_out_dstunreach; + uint64_t ifs6_out_dstunreach; /* ipv6IfIcmpOutAdminProhibs, # of output administratively prohibited errs */ - u_quad_t ifs6_out_adminprohib; + uint64_t ifs6_out_adminprohib; /* ipv6IfIcmpOutTimeExcds, # of output time exceeded errors */ - u_quad_t ifs6_out_timeexceed; + uint64_t ifs6_out_timeexceed; /* ipv6IfIcmpOutParmProblems, # of output parameter problem errors */ - u_quad_t ifs6_out_paramprob; + uint64_t ifs6_out_paramprob; /* ipv6IfIcmpOutPktTooBigs, # of output packet too big errors */ - u_quad_t ifs6_out_pkttoobig; + uint64_t ifs6_out_pkttoobig; /* ipv6IfIcmpOutEchos, # of output echo requests */ - u_quad_t ifs6_out_echo; + uint64_t ifs6_out_echo; /* ipv6IfIcmpOutEchoReplies, # of output echo replies */ - u_quad_t ifs6_out_echoreply; + uint64_t ifs6_out_echoreply; /* ipv6IfIcmpOutRouterSolicits, # of output router solicitations */ - u_quad_t ifs6_out_routersolicit; + uint64_t ifs6_out_routersolicit; /* ipv6IfIcmpOutRouterAdvertisements, # of output router advertisements */ - u_quad_t ifs6_out_routeradvert; + uint64_t ifs6_out_routeradvert; /* ipv6IfIcmpOutNeighborSolicits, # of output neighbor solicitations */ - u_quad_t ifs6_out_neighborsolicit; + uint64_t ifs6_out_neighborsolicit; /* ipv6IfIcmpOutNeighborAdvertisements, # of output neighbor advertisements */ - u_quad_t ifs6_out_neighboradvert; + uint64_t ifs6_out_neighboradvert; /* ipv6IfIcmpOutRedirects, # of output redirects */ - u_quad_t ifs6_out_redirect; + uint64_t ifs6_out_redirect; /* ipv6IfIcmpOutGroupMembQueries, # of output MLD queries */ - u_quad_t ifs6_out_mldquery; + uint64_t ifs6_out_mldquery; /* ipv6IfIcmpOutGroupMembResponses, # of output MLD reports */ - u_quad_t ifs6_out_mldreport; + uint64_t ifs6_out_mldreport; /* ipv6IfIcmpOutGroupMembReductions, # of output MLD done */ - u_quad_t ifs6_out_mlddone; + uint64_t ifs6_out_mlddone; }; struct in6_ifreq { @@ -287,6 +294,17 @@ struct in6_aliasreq { struct sockaddr_in6 ifra_prefixmask; int ifra_flags; struct in6_addrlifetime ifra_lifetime; + int ifra_vhid; +}; + +/* pre-10.x compat */ +struct oin6_aliasreq { + char ifra_name[IFNAMSIZ]; + struct sockaddr_in6 ifra_addr; + struct sockaddr_in6 ifra_dstaddr; + struct sockaddr_in6 ifra_prefixmask; + int ifra_flags; + struct in6_addrlifetime ifra_lifetime; }; /* prefix type macro */ @@ -391,6 +409,12 @@ struct in6_rrenumreq { (((d)->s6_addr32[1] ^ (a)->s6_addr32[1]) & (m)->s6_addr32[1]) == 0 && \ (((d)->s6_addr32[2] ^ (a)->s6_addr32[2]) & (m)->s6_addr32[2]) == 0 && \ (((d)->s6_addr32[3] ^ (a)->s6_addr32[3]) & (m)->s6_addr32[3]) == 0 ) +#define IN6_MASK_ADDR(a, m) do { \ + (a)->s6_addr32[0] &= (m)->s6_addr32[0]; \ + (a)->s6_addr32[1] &= (m)->s6_addr32[1]; \ + (a)->s6_addr32[2] &= (m)->s6_addr32[2]; \ + (a)->s6_addr32[3] &= (m)->s6_addr32[3]; \ +} while (0) #endif #define SIOCSIFADDR_IN6 _IOW('i', 12, struct in6_ifreq) @@ -409,7 +433,8 @@ struct in6_rrenumreq { #define SIOCGIFNETMASK_IN6 _IOWR('i', 37, struct in6_ifreq) #define SIOCDIFADDR_IN6 _IOW('i', 25, struct in6_ifreq) -#define SIOCAIFADDR_IN6 _IOW('i', 26, struct in6_aliasreq) +#define OSIOCAIFADDR_IN6 _IOW('i', 26, struct oin6_aliasreq) +#define SIOCAIFADDR_IN6 _IOW('i', 27, struct in6_aliasreq) #define SIOCSIFPHYADDR_IN6 _IOW('i', 70, struct in6_aliasreq) #define SIOCGIFPSRCADDR_IN6 _IOWR('i', 71, struct in6_ifreq) @@ -417,11 +442,6 @@ struct in6_rrenumreq { #define SIOCGIFAFLAG_IN6 _IOWR('i', 73, struct in6_ifreq) -#define SIOCGDRLST_IN6 _IOWR('i', 74, struct in6_drlist) -#ifdef _KERNEL -/* XXX: SIOCGPRLST_IN6 is exposed in KAME but in6_oprlist is not. */ -#define SIOCGPRLST_IN6 _IOWR('i', 75, struct in6_oprlist) -#endif #ifdef _KERNEL #define OSIOCGIFINFO_IN6 _IOWR('i', 76, struct in6_ondireq) #endif @@ -433,7 +453,6 @@ struct in6_rrenumreq { #define SIOCSRTRFLUSH_IN6 _IOWR('i', 80, struct in6_ifreq) #define SIOCGIFALIFETIME_IN6 _IOWR('i', 81, struct in6_ifreq) -#define SIOCSIFALIFETIME_IN6 _IOWR('i', 82, struct in6_ifreq) #define SIOCGIFSTAT_IN6 _IOWR('i', 83, struct in6_ifreq) #define SIOCGIFSTAT_ICMP6 _IOWR('i', 84, struct in6_ifreq) @@ -469,14 +488,11 @@ struct in6_rrenumreq { #define IN6_IFF_DETACHED 0x08 /* may be detached from the link */ #define IN6_IFF_DEPRECATED 0x10 /* deprecated address */ #define IN6_IFF_NODAD 0x20 /* don't perform DAD on this address - * (used only at first SIOC* call) + * (obsolete) */ #define IN6_IFF_AUTOCONF 0x40 /* autoconfigurable address. */ #define IN6_IFF_TEMPORARY 0x80 /* temporary (anonymous) address. */ #define IN6_IFF_PREFER_SOURCE 0x0100 /* preferred address for SAS */ -#define IN6_IFF_NOPFX 0x8000 /* skip kernel prefix management. - * XXX: this should be temporary. - */ /* do not input/output */ #define IN6_IFF_NOTREADY (IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED) @@ -488,26 +504,45 @@ struct in6_rrenumreq { #ifdef _KERNEL VNET_DECLARE(struct in6_ifaddrhead, in6_ifaddrhead); +VNET_DECLARE(struct in6_ifaddrlisthead *, in6_ifaddrhashtbl); +VNET_DECLARE(u_long, in6_ifaddrhmask); #define V_in6_ifaddrhead VNET(in6_ifaddrhead) +#define V_in6_ifaddrhashtbl VNET(in6_ifaddrhashtbl) +#define V_in6_ifaddrhmask VNET(in6_ifaddrhmask) + +#define IN6ADDR_NHASH_LOG2 8 +#define IN6ADDR_NHASH (1 << IN6ADDR_NHASH_LOG2) +#define IN6ADDR_HASHVAL(x) (in6_addrhash(x)) +#define IN6ADDR_HASH(x) \ + (&V_in6_ifaddrhashtbl[IN6ADDR_HASHVAL(x) & V_in6_ifaddrhmask]) + +static __inline uint32_t +in6_addrhash(const struct in6_addr *in6) +{ + uint32_t x; + + x = in6->s6_addr32[0] ^ in6->s6_addr32[1] ^ in6->s6_addr32[2] ^ + in6->s6_addr32[3]; + return (fnv_32_buf(&x, sizeof(x), FNV1_32_INIT)); +} + +extern struct rmlock in6_ifaddr_lock; +#define IN6_IFADDR_LOCK_ASSERT() rm_assert(&in6_ifaddr_lock, RA_LOCKED) +#define IN6_IFADDR_RLOCK(t) rm_rlock(&in6_ifaddr_lock, (t)) +#define IN6_IFADDR_RLOCK_ASSERT() rm_assert(&in6_ifaddr_lock, RA_RLOCKED) +#define IN6_IFADDR_RUNLOCK(t) rm_runlock(&in6_ifaddr_lock, (t)) +#define IN6_IFADDR_WLOCK() rm_wlock(&in6_ifaddr_lock) +#define IN6_IFADDR_WLOCK_ASSERT() rm_assert(&in6_ifaddr_lock, RA_WLOCKED) +#define IN6_IFADDR_WUNLOCK() rm_wunlock(&in6_ifaddr_lock) -extern struct rwlock in6_ifaddr_lock; -#define IN6_IFADDR_LOCK_ASSERT( ) rw_assert(&in6_ifaddr_lock, RA_LOCKED) -#define IN6_IFADDR_RLOCK() rw_rlock(&in6_ifaddr_lock) -#define IN6_IFADDR_RLOCK_ASSERT() rw_assert(&in6_ifaddr_lock, RA_RLOCKED) -#define IN6_IFADDR_RUNLOCK() rw_runlock(&in6_ifaddr_lock) -#define IN6_IFADDR_WLOCK() rw_wlock(&in6_ifaddr_lock) -#define IN6_IFADDR_WLOCK_ASSERT() rw_assert(&in6_ifaddr_lock, RA_WLOCKED) -#define IN6_IFADDR_WUNLOCK() rw_wunlock(&in6_ifaddr_lock) - -VNET_DECLARE(struct icmp6stat, icmp6stat); -#define V_icmp6stat VNET(icmp6stat) #define in6_ifstat_inc(ifp, tag) \ do { \ if (ifp) \ - ((struct in6_ifextra *)((ifp)->if_afdata[AF_INET6]))->in6_ifstat->tag++; \ + counter_u64_add(((struct in6_ifextra *) \ + ((ifp)->if_afdata[AF_INET6]))->in6_ifstat[ \ + offsetof(struct in6_ifstat, tag) / sizeof(uint64_t)], 1);\ } while (/*CONSTCOND*/ 0) -extern struct in6_addr zeroin6_addr; extern u_char inet6ctlerrmap[]; VNET_DECLARE(unsigned long, in6_maxmtu); #define V_in6_maxmtu VNET(in6_maxmtu) @@ -552,7 +587,6 @@ ip6_msource_cmp(const struct ip6_msource *a, const struct ip6_msource *b) return (memcmp(&a->im6s_addr, &b->im6s_addr, sizeof(struct in6_addr))); } RB_PROTOTYPE(ip6_msource_tree, ip6_msource, im6s_link, ip6_msource_cmp); -#endif /* _KERNEL */ /* * IPv6 multicast PCB-layer group filter descriptor. @@ -603,12 +637,12 @@ struct in6_multi { u_int in6m_timer; /* MLD6 listener report timer */ /* New fields for MLDv2 follow. */ - struct mld_ifinfo *in6m_mli; /* MLD info */ + struct mld_ifsoftc *in6m_mli; /* MLD info */ SLIST_ENTRY(in6_multi) in6m_nrele; /* to-be-released by MLD */ struct ip6_msource_tree in6m_srcs; /* tree of sources */ u_long in6m_nsrc; /* # of tree entries */ - struct ifqueue in6m_scq; /* queue of pending + struct mbufq in6m_scq; /* queue of pending * state-change packets */ struct timeval in6m_lastgsrtv; /* last G-S-R query */ uint16_t in6m_sctimer; /* state-change timer */ @@ -652,8 +686,6 @@ im6s_get_mode(const struct in6_multi *inm, const struct ip6_msource *ims, return (MCAST_UNDEFINED); } -#ifdef _KERNEL - /* * Lock macros for IPv6 layer multicast address lists. IPv6 lock goes * before link layer multicast locks in the lock order. In most cases, @@ -756,18 +788,20 @@ int in6_control(struct socket *, u_long, caddr_t, struct ifnet *, struct thread *); int in6_update_ifa(struct ifnet *, struct in6_aliasreq *, struct in6_ifaddr *, int); +void in6_prepare_ifra(struct in6_aliasreq *, const struct in6_addr *, + const struct in6_addr *); void in6_purgeaddr(struct ifaddr *); int in6if_do_dad(struct ifnet *); -void in6_purgeif(struct ifnet *); void in6_savemkludge(struct in6_ifaddr *); void *in6_domifattach(struct ifnet *); void in6_domifdetach(struct ifnet *, void *); +int in6_domifmtu(struct ifnet *); void in6_setmaxmtu(void); int in6_if2idlen(struct ifnet *); struct in6_ifaddr *in6ifa_ifpforlinklocal(struct ifnet *, int); -struct in6_ifaddr *in6ifa_ifpwithaddr(struct ifnet *, struct in6_addr *); +struct in6_ifaddr *in6ifa_ifpwithaddr(struct ifnet *, const struct in6_addr *); +struct in6_ifaddr *in6ifa_ifwithaddr(const struct in6_addr *, uint32_t); struct in6_ifaddr *in6ifa_llaonifp(struct ifnet *); -char *ip6_sprintf(char *, const struct in6_addr *); int in6_addr2zoneid(struct ifnet *, struct in6_addr *, u_int32_t *); int in6_matchlen(struct in6_addr *, struct in6_addr *); int in6_are_prefix_equal(struct in6_addr *, struct in6_addr *, int); @@ -777,12 +811,11 @@ int in6_prefix_ioctl(struct socket *, u_long, caddr_t, int in6_prefix_add_ifid(int, struct in6_ifaddr *); void in6_prefix_remove_ifid(int, struct in6_ifaddr *); void in6_purgeprefix(struct ifnet *); -void in6_ifremloop(struct ifaddr *); -void in6_ifaddloop(struct ifaddr *); int in6_is_addr_deprecated(struct sockaddr_in6 *); int in6_src_ioctl(u_long, caddr_t); +void in6_newaddrmsg(struct in6_ifaddr *, int); /* * Extended API for IPv6 FIB support. */ diff --git a/freebsd/sys/netinet6/ip6_forward.c b/freebsd/sys/netinet6/ip6_forward.c index 6efae91a..50583537 100644 --- a/freebsd/sys/netinet6/ip6_forward.c +++ b/freebsd/sys/netinet6/ip6_forward.c @@ -36,7 +36,6 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/local/opt_inet.h> #include <rtems/bsd/local/opt_inet6.h> -#include <rtems/bsd/local/opt_ipfw.h> #include <rtems/bsd/local/opt_ipsec.h> #include <rtems/bsd/local/opt_ipstealth.h> @@ -53,6 +52,7 @@ __FBSDID("$FreeBSD$"); #include <sys/syslog.h> #include <net/if.h> +#include <net/if_var.h> #include <net/netisr.h> #include <net/route.h> #include <net/pfil.h> @@ -72,13 +72,12 @@ __FBSDID("$FreeBSD$"); #include <netinet/in_pcb.h> #ifdef IPSEC +#include <netinet6/ip6_ipsec.h> #include <netipsec/ipsec.h> #include <netipsec/ipsec6.h> #include <netipsec/key.h> #endif /* IPSEC */ -#include <netinet6/ip6protosw.h> - /* * Forward a packet. If some error occurs return the sender * an icmp packet. Note we can't always generate a meaningful @@ -105,29 +104,10 @@ ip6_forward(struct mbuf *m, int srcrt) struct in6_addr src_in6, dst_in6, odst; #ifdef IPSEC struct secpolicy *sp = NULL; - int ipsecrt = 0; -#endif -#ifdef SCTP - int sw_csum; #endif struct m_tag *fwd_tag; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; -#ifdef IPSEC - /* - * Check AH/ESP integrity. - */ - /* - * Don't increment ip6s_cantforward because this is the check - * before forwarding packet actually. - */ - if (ipsec6_in_reject(m, NULL)) { - IPSEC6STAT_INC(in_polvio); - m_freem(m); - return; - } -#endif /* IPSEC */ - /* * Do not forward packets to multicast destination (should be handled * by ip6_mforward(). @@ -139,8 +119,8 @@ ip6_forward(struct mbuf *m, int srcrt) IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) { IP6STAT_INC(ip6s_cantforward); /* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */ - if (V_ip6_log_time + V_ip6_log_interval < time_second) { - V_ip6_log_time = time_second; + if (V_ip6_log_time + V_ip6_log_interval < time_uptime) { + V_ip6_log_time = time_uptime; log(LOG_DEBUG, "cannot forward " "from %s to %s nxt %d received on %s\n", @@ -152,6 +132,17 @@ ip6_forward(struct mbuf *m, int srcrt) m_freem(m); return; } +#ifdef IPSEC + /* + * Check if this packet has an active SA and needs to be dropped + * instead of forwarded. + */ + if (ip6_ipsec_fwd(m) != 0) { + IP6STAT_INC(ip6s_cantforward); + m_freem(m); + return; + } +#endif /* IPSEC */ #ifdef IPSTEALTH if (!V_ip6stealth) { @@ -181,10 +172,9 @@ ip6_forward(struct mbuf *m, int srcrt) #ifdef IPSEC /* get a security policy for this packet */ - sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, - IP_FORWARDING, &error); + sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, &error); if (sp == NULL) { - IPSEC6STAT_INC(out_inval); + IPSEC6STAT_INC(ips_out_inval); IP6STAT_INC(ip6s_cantforward); if (mcopy) { #if 0 @@ -205,7 +195,7 @@ ip6_forward(struct mbuf *m, int srcrt) /* * This packet is just discarded. */ - IPSEC6STAT_INC(out_polvio); + IPSEC6STAT_INC(ips_out_polvio); IP6STAT_INC(ip6s_cantforward); KEY_FREESP(&sp); if (mcopy) { @@ -253,12 +243,10 @@ ip6_forward(struct mbuf *m, int srcrt) { struct ipsecrequest *isr = NULL; - struct ipsec_output_state state; /* * when the kernel forwards a packet, it is not proper to apply - * IPsec transport mode to the packet is not proper. this check - * avoid from this. + * IPsec transport mode to the packet. This check avoid from this. * at present, if there is even a transport mode SA request in the * security policy, the kernel does not apply IPsec to the packet. * this check is not enough because the following case is valid. @@ -286,18 +274,27 @@ ip6_forward(struct mbuf *m, int srcrt) * * IPv6 [ESP|AH] IPv6 [extension headers] payload */ - bzero(&state, sizeof(state)); - state.m = m; - state.ro = NULL; /* update at ipsec6_output_tunnel() */ - state.dst = NULL; /* update at ipsec6_output_tunnel() */ - - error = ipsec6_output_tunnel(&state, sp, 0); - m = state.m; - KEY_FREESP(&sp); + /* + * If we need to encapsulate the packet, do it here + * ipsec6_proces_packet will send the packet using ip6_output + */ + error = ipsec6_process_packet(m, sp->req); + /* Release SP if an error occurred */ + if (error != 0) + KEY_FREESP(&sp); + if (error == EJUSTRETURN) { + /* + * We had a SP with a level of 'use' and no SA. We + * will just continue to process the packet without + * IPsec processing. + */ + error = 0; + goto skip_ipsec; + } if (error) { - /* mbuf is already reclaimed in ipsec6_output_tunnel. */ + /* mbuf is already reclaimed in ipsec6_process_packet. */ switch (error) { case EHOSTUNREACH: case ENETUNREACH: @@ -320,7 +317,6 @@ ip6_forward(struct mbuf *m, int srcrt) m_freem(mcopy); #endif } - m_freem(m); return; } else { /* @@ -332,25 +328,7 @@ ip6_forward(struct mbuf *m, int srcrt) m = NULL; goto freecopy; } - - if ((m != NULL) && (ip6 != mtod(m, struct ip6_hdr *)) ){ - /* - * now tunnel mode headers are added. we are originating - * packet instead of forwarding the packet. - */ - ip6_output(m, NULL, NULL, IPV6_FORWARDING/*XXX*/, NULL, NULL, - NULL); - goto freecopy; - } - - /* adjust pointer */ - dst = (struct sockaddr_in6 *)state.dst; - rt = state.ro ? state.ro->ro_rt : NULL; - if (dst != NULL && rt != NULL) - ipsecrt = 1; } - if (ipsecrt) - goto skip_routing; skip_ipsec: #endif again: @@ -361,6 +339,7 @@ again: dst->sin6_addr = ip6->ip6_dst; again2: rin6.ro_rt = in6_rtalloc1((struct sockaddr *)dst, 0, 0, M_GETFIB(m)); + rt = rin6.ro_rt; if (rin6.ro_rt != NULL) RT_UNLOCK(rin6.ro_rt); else { @@ -372,10 +351,6 @@ again2: } goto bad; } - rt = rin6.ro_rt; -#ifdef IPSEC -skip_routing: -#endif /* * Source scope check: if a packet can't be delivered to its @@ -398,17 +373,13 @@ skip_routing: IP6STAT_INC(ip6s_badscope); goto bad; } - if (inzone != outzone -#ifdef IPSEC - && !ipsecrt -#endif - ) { + if (inzone != outzone) { IP6STAT_INC(ip6s_cantforward); IP6STAT_INC(ip6s_badscope); in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard); - if (V_ip6_log_time + V_ip6_log_interval < time_second) { - V_ip6_log_time = time_second; + if (V_ip6_log_time + V_ip6_log_interval < time_uptime) { + V_ip6_log_time = time_uptime; log(LOG_DEBUG, "cannot forward " "src %s, dst %s, nxt %d, rcvif %s, outif %s\n", @@ -439,46 +410,6 @@ skip_routing: goto bad; } - if (m->m_pkthdr.len > IN6_LINKMTU(rt->rt_ifp)) { - in6_ifstat_inc(rt->rt_ifp, ifs6_in_toobig); - if (mcopy) { - u_long mtu; -#ifdef IPSEC - struct secpolicy *sp; - int ipsecerror; - size_t ipsechdrsiz; -#endif /* IPSEC */ - - mtu = IN6_LINKMTU(rt->rt_ifp); -#ifdef IPSEC - /* - * When we do IPsec tunnel ingress, we need to play - * with the link value (decrement IPsec header size - * from mtu value). The code is much simpler than v4 - * case, as we have the outgoing interface for - * encapsulated packet as "rt->rt_ifp". - */ - sp = ipsec_getpolicybyaddr(mcopy, IPSEC_DIR_OUTBOUND, - IP_FORWARDING, &ipsecerror); - if (sp) { - ipsechdrsiz = ipsec_hdrsiz(mcopy, - IPSEC_DIR_OUTBOUND, NULL); - if (ipsechdrsiz < mtu) - mtu -= ipsechdrsiz; - } - - /* - * if mtu becomes less than minimum MTU, - * tell minimum MTU (and I'll need to fragment it). - */ - if (mtu < IPV6_MMTU) - mtu = IPV6_MMTU; -#endif /* IPSEC */ - icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, mtu); - } - goto bad; - } - if (rt->rt_flags & RTF_GATEWAY) dst = (struct sockaddr_in6 *)rt->rt_gateway; @@ -492,9 +423,6 @@ skip_routing: * modified by a redirect. */ if (V_ip6_sendredirects && rt->rt_ifp == m->m_pkthdr.rcvif && !srcrt && -#ifdef IPSEC - !ipsecrt && -#endif /* IPSEC */ (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0) { if ((rt->rt_ifp->if_flags & IFF_POINTOPOINT) != 0) { /* @@ -573,23 +501,12 @@ skip_routing: if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) { m->m_flags |= M_SKIP_FIREWALL; /* If destination is now ourself drop to ip6_input(). */ - if (in6_localip(&ip6->ip6_dst)) { + if (in6_localip(&ip6->ip6_dst)) m->m_flags |= M_FASTFWD_OURS; - if (m->m_pkthdr.rcvif == NULL) - m->m_pkthdr.rcvif = V_loif; - if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { - m->m_pkthdr.csum_flags |= - CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR; - m->m_pkthdr.csum_data = 0xffff; - } -#ifdef SCTP - if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) - m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; -#endif - error = netisr_queue(NETISR_IPV6, m); - goto out; - } else + else { + RTFREE(rt); goto again; /* Redo the routing table lookup. */ + } } /* See if local, if yes, send it to netisr. */ @@ -616,11 +533,46 @@ skip_routing: m->m_flags |= M_SKIP_FIREWALL; m->m_flags &= ~M_IP6_NEXTHOP; m_tag_delete(m, fwd_tag); + RTFREE(rt); goto again2; } pass: - error = nd6_output(rt->rt_ifp, origifp, m, dst, rt); + /* See if the size was changed by the packet filter. */ + if (m->m_pkthdr.len > IN6_LINKMTU(rt->rt_ifp)) { + in6_ifstat_inc(rt->rt_ifp, ifs6_in_toobig); + if (mcopy) { + u_long mtu; +#ifdef IPSEC + size_t ipsechdrsiz; +#endif /* IPSEC */ + + mtu = IN6_LINKMTU(rt->rt_ifp); +#ifdef IPSEC + /* + * When we do IPsec tunnel ingress, we need to play + * with the link value (decrement IPsec header size + * from mtu value). The code is much simpler than v4 + * case, as we have the outgoing interface for + * encapsulated packet as "rt->rt_ifp". + */ + ipsechdrsiz = ipsec_hdrsiz(mcopy, IPSEC_DIR_OUTBOUND, + NULL); + if (ipsechdrsiz < mtu) + mtu -= ipsechdrsiz; + /* + * if mtu becomes less than minimum MTU, + * tell minimum MTU (and I'll need to fragment it). + */ + if (mtu < IPV6_MMTU) + mtu = IPV6_MMTU; +#endif /* IPSEC */ + icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, mtu); + } + goto bad; + } + + error = nd6_output_ifp(rt->rt_ifp, origifp, m, dst, NULL); if (error) { in6_ifstat_inc(rt->rt_ifp, ifs6_out_discard); IP6STAT_INC(ip6s_cantforward); @@ -671,10 +623,6 @@ pass: bad: m_freem(m); out: - if (rt != NULL -#ifdef IPSEC - && !ipsecrt -#endif - ) + if (rt != NULL) RTFREE(rt); } diff --git a/freebsd/sys/netinet6/ip6_id.c b/freebsd/sys/netinet6/ip6_id.c index e277def6..4e1a74e6 100644 --- a/freebsd/sys/netinet6/ip6_id.c +++ b/freebsd/sys/netinet6/ip6_id.c @@ -99,6 +99,7 @@ __FBSDID("$FreeBSD$"); #include <net/if.h> #include <net/route.h> +#include <net/vnet.h> #include <netinet/in.h> #include <netinet/ip6.h> #include <netinet6/ip6_var.h> @@ -109,7 +110,7 @@ __FBSDID("$FreeBSD$"); struct randomtab { const int ru_bits; /* resulting bits */ - const long ru_out; /* Time after wich will be reseeded */ + const long ru_out; /* Time after which will be reseeded */ const u_int32_t ru_max; /* Uniq cycle, avoid blackjack prediction */ const u_int32_t ru_gen; /* Starting generator */ const u_int32_t ru_n; /* ru_n: prime, ru_n - 1: product of pfacts[] */ @@ -129,7 +130,7 @@ struct randomtab { static struct randomtab randomtab_32 = { 32, /* resulting bits */ - 180, /* Time after wich will be reseeded */ + 180, /* Time after which will be reseeded */ 1000000000, /* Uniq cycle, avoid blackjack prediction */ 2, /* Starting generator */ 2147483629, /* RU_N-1 = 2^2*3^2*59652323 */ @@ -140,7 +141,7 @@ static struct randomtab randomtab_32 = { static struct randomtab randomtab_20 = { 20, /* resulting bits */ - 180, /* Time after wich will be reseeded */ + 180, /* Time after which will be reseeded */ 200000, /* Uniq cycle, avoid blackjack prediction */ 2, /* Starting generator */ 524269, /* RU_N-1 = 2^2*3^2*14563 */ @@ -223,7 +224,7 @@ initid(struct randomtab *p) p->ru_g = pmod(p->ru_gen, j, p->ru_n); p->ru_counter = 0; - p->ru_reseed = time_second + p->ru_out; + p->ru_reseed = time_uptime + p->ru_out; p->ru_msb = p->ru_msb ? 0 : (1U << (p->ru_bits - 1)); } @@ -233,7 +234,7 @@ randomid(struct randomtab *p) int i, n; u_int32_t tmp; - if (p->ru_counter >= p->ru_max || time_second > p->ru_reseed) + if (p->ru_counter >= p->ru_max || time_uptime > p->ru_reseed) initid(p); tmp = arc4random(); diff --git a/freebsd/sys/netinet6/ip6_input.c b/freebsd/sys/netinet6/ip6_input.c index 10536316..c7ffe759 100644 --- a/freebsd/sys/netinet6/ip6_input.c +++ b/freebsd/sys/netinet6/ip6_input.c @@ -67,33 +67,41 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/local/opt_inet.h> #include <rtems/bsd/local/opt_inet6.h> -#include <rtems/bsd/local/opt_ipfw.h> #include <rtems/bsd/local/opt_ipsec.h> #include <rtems/bsd/local/opt_route.h> +#include <rtems/bsd/local/opt_rss.h> #include <rtems/bsd/sys/param.h> #include <sys/systm.h> +#include <sys/hhook.h> #include <sys/malloc.h> #include <sys/mbuf.h> #include <sys/proc.h> #include <sys/domain.h> #include <sys/protosw.h> +#include <sys/sdt.h> #include <sys/socket.h> #include <sys/socketvar.h> #include <rtems/bsd/sys/errno.h> #include <sys/time.h> #include <sys/kernel.h> +#include <rtems/bsd/sys/lock.h> +#include <sys/rmlock.h> #include <sys/syslog.h> +#include <sys/sysctl.h> #include <net/if.h> +#include <net/if_var.h> #include <net/if_types.h> #include <net/if_dl.h> #include <net/route.h> #include <net/netisr.h> +#include <net/rss_config.h> #include <net/pfil.h> #include <net/vnet.h> #include <netinet/in.h> +#include <netinet/in_kdtrace.h> #include <netinet/ip_var.h> #include <netinet/in_systm.h> #include <net/if_llatbl.h> @@ -108,7 +116,9 @@ __FBSDID("$FreeBSD$"); #include <netinet/icmp6.h> #include <netinet6/scope6_var.h> #include <netinet6/in6_ifattach.h> +#include <netinet6/mld6_var.h> #include <netinet6/nd6.h> +#include <netinet6/in6_rss.h> #ifdef IPSEC #include <netipsec/ipsec.h> @@ -118,39 +128,84 @@ __FBSDID("$FreeBSD$"); #include <netinet6/ip6protosw.h> -#ifdef FLOWTABLE -#include <net/flowtable.h> -VNET_DECLARE(int, ip6_output_flowtable_size); -#define V_ip6_output_flowtable_size VNET(ip6_output_flowtable_size) -#endif - extern struct domain inet6domain; u_char ip6_protox[IPPROTO_MAX]; VNET_DEFINE(struct in6_ifaddrhead, in6_ifaddrhead); +VNET_DEFINE(struct in6_ifaddrlisthead *, in6_ifaddrhashtbl); +VNET_DEFINE(u_long, in6_ifaddrhmask); static struct netisr_handler ip6_nh = { .nh_name = "ip6", .nh_handler = ip6_input, .nh_proto = NETISR_IPV6, +#ifdef RSS + .nh_m2cpuid = rss_soft_m2cpuid_v6, + .nh_policy = NETISR_POLICY_CPU, + .nh_dispatch = NETISR_DISPATCH_HYBRID, +#else .nh_policy = NETISR_POLICY_FLOW, +#endif }; -VNET_DECLARE(struct callout, in6_tmpaddrtimer_ch); -#define V_in6_tmpaddrtimer_ch VNET(in6_tmpaddrtimer_ch) +static int +sysctl_netinet6_intr_queue_maxlen(SYSCTL_HANDLER_ARGS) +{ + int error, qlimit; + + netisr_getqlimit(&ip6_nh, &qlimit); + error = sysctl_handle_int(oidp, &qlimit, 0, req); + if (error || !req->newptr) + return (error); + if (qlimit < 1) + return (EINVAL); + return (netisr_setqlimit(&ip6_nh, qlimit)); +} +SYSCTL_DECL(_net_inet6_ip6); +SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_INTRQMAXLEN, intr_queue_maxlen, + CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet6_intr_queue_maxlen, "I", + "Maximum size of the IPv6 input queue"); + +#ifdef RSS +static struct netisr_handler ip6_direct_nh = { + .nh_name = "ip6_direct", + .nh_handler = ip6_direct_input, + .nh_proto = NETISR_IPV6_DIRECT, + .nh_m2cpuid = rss_soft_m2cpuid_v6, + .nh_policy = NETISR_POLICY_CPU, + .nh_dispatch = NETISR_DISPATCH_HYBRID, +}; + +static int +sysctl_netinet6_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS) +{ + int error, qlimit; + + netisr_getqlimit(&ip6_direct_nh, &qlimit); + error = sysctl_handle_int(oidp, &qlimit, 0, req); + if (error || !req->newptr) + return (error); + if (qlimit < 1) + return (EINVAL); + return (netisr_setqlimit(&ip6_direct_nh, qlimit)); +} +SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_INTRDQMAXLEN, intr_direct_queue_maxlen, + CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet6_intr_direct_queue_maxlen, + "I", "Maximum size of the IPv6 direct input queue"); + +#endif VNET_DEFINE(struct pfil_head, inet6_pfil_hook); -VNET_DEFINE(struct ip6stat, ip6stat); +VNET_PCPUSTAT_DEFINE(struct ip6stat, ip6stat); +VNET_PCPUSTAT_SYSINIT(ip6stat); +#ifdef VIMAGE +VNET_PCPUSTAT_SYSUNINIT(ip6stat); +#endif /* VIMAGE */ -struct rwlock in6_ifaddr_lock; -RW_SYSINIT(in6_ifaddr_lock, &in6_ifaddr_lock, "in6_ifaddr_lock"); +struct rmlock in6_ifaddr_lock; +RM_SYSINIT(in6_ifaddr_lock, &in6_ifaddr_lock, "in6_ifaddr_lock"); -static void ip6_init2(void *); -static struct ip6aux *ip6_setdstifaddr(struct mbuf *, struct in6_ifaddr *); -static struct ip6aux *ip6_addaux(struct mbuf *); -static struct ip6aux *ip6_findaux(struct mbuf *m); -static void ip6_delaux (struct mbuf *); static int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *); #ifdef PULLDOWN_TEST static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int); @@ -163,7 +218,7 @@ static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int); void ip6_init(void) { - struct ip6protosw *pr; + struct protosw *pr; int i; TUNABLE_INT_FETCH("net.inet6.ip6.auto_linklocal", @@ -172,6 +227,8 @@ ip6_init(void) TUNABLE_INT_FETCH("net.inet6.ip6.no_radr", &V_ip6_no_radr); TAILQ_INIT(&V_in6_ifaddrhead); + V_in6_ifaddrhashtbl = hashinit(IN6ADDR_NHASH, M_IFADDR, + &V_in6_ifaddrhmask); /* Initialize packet filter hooks. */ V_inet6_pfil_hook.ph_type = PFIL_TYPE_AF; @@ -180,40 +237,36 @@ ip6_init(void) printf("%s: WARNING: unable to register pfil hook, " "error %d\n", __func__, i); + if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET6, + &V_ipsec_hhh_in[HHOOK_IPSEC_INET6], + HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0) + printf("%s: WARNING: unable to register input helper hook\n", + __func__); + if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET6, + &V_ipsec_hhh_out[HHOOK_IPSEC_INET6], + HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0) + printf("%s: WARNING: unable to register output helper hook\n", + __func__); + scope6_init(); addrsel_policy_init(); nd6_init(); frag6_init(); -#ifdef FLOWTABLE - if (TUNABLE_INT_FETCH("net.inet6.ip6.output_flowtable_size", - &V_ip6_output_flowtable_size)) { - if (V_ip6_output_flowtable_size < 256) - V_ip6_output_flowtable_size = 256; - if (!powerof2(V_ip6_output_flowtable_size)) { - printf("flowtable must be power of 2 size\n"); - V_ip6_output_flowtable_size = 2048; - } - } else { - /* - * round up to the next power of 2 - */ - V_ip6_output_flowtable_size = 1 << fls((1024 + maxusers * 64)-1); - } - V_ip6_ft = flowtable_alloc("ipv6", V_ip6_output_flowtable_size, FL_IPV6|FL_PCPU); -#endif - V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR; /* Skip global initialization stuff for non-default instances. */ - if (!IS_DEFAULT_VNET(curvnet)) +#ifdef VIMAGE + if (!IS_DEFAULT_VNET(curvnet)) { + netisr_register_vnet(&ip6_nh); +#ifdef RSS + netisr_register_vnet(&ip6_direct_nh); +#endif return; - -#ifdef DIAGNOSTIC - if (sizeof(struct protosw) != sizeof(struct ip6protosw)) - panic("sizeof(protosw) != sizeof(ip6protosw)"); + } #endif - pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW); + + pr = pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW); if (pr == NULL) panic("ip6_init"); @@ -224,8 +277,8 @@ ip6_init(void) * Cycle through IP protocols and put them into the appropriate place * in ip6_protox[]. */ - for (pr = (struct ip6protosw *)inet6domain.dom_protosw; - pr < (struct ip6protosw *)inet6domain.dom_protoswNPROTOSW; pr++) + for (pr = inet6domain.dom_protosw; + pr < inet6domain.dom_protoswNPROTOSW; pr++) if (pr->pr_domain->dom_family == PF_INET6 && pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) { /* Be careful to only index valid IP protocols. */ @@ -234,6 +287,9 @@ ip6_init(void) } netisr_register(&ip6_nh); +#ifdef RSS + netisr_register(&ip6_direct_nh); +#endif } /* @@ -243,7 +299,7 @@ ip6_init(void) int ip6proto_register(short ip6proto) { - struct ip6protosw *pr; + struct protosw *pr; /* Sanity checks. */ if (ip6proto <= 0 || ip6proto >= IPPROTO_MAX) @@ -253,7 +309,7 @@ ip6proto_register(short ip6proto) * The protocol slot must not be occupied by another protocol * already. An index pointing to IPPROTO_RAW is unused. */ - pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW); + pr = pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW); if (pr == NULL) return (EPFNOSUPPORT); if (ip6_protox[ip6proto] != pr - inet6sw) /* IPPROTO_RAW */ @@ -262,8 +318,8 @@ ip6proto_register(short ip6proto) /* * Find the protocol position in inet6sw[] and set the index. */ - for (pr = (struct ip6protosw *)inet6domain.dom_protosw; - pr < (struct ip6protosw *)inet6domain.dom_protoswNPROTOSW; pr++) { + for (pr = inet6domain.dom_protosw; + pr < inet6domain.dom_protoswNPROTOSW; pr++) { if (pr->pr_domain->dom_family == PF_INET6 && pr->pr_protocol && pr->pr_protocol == ip6proto) { ip6_protox[pr->pr_protocol] = pr - inet6sw; @@ -276,14 +332,14 @@ ip6proto_register(short ip6proto) int ip6proto_unregister(short ip6proto) { - struct ip6protosw *pr; + struct protosw *pr; /* Sanity checks. */ if (ip6proto <= 0 || ip6proto >= IPPROTO_MAX) return (EPROTONOSUPPORT); /* Check if the protocol was indeed registered. */ - pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW); + pr = pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW); if (pr == NULL) return (EPFNOSUPPORT); if (ip6_protox[ip6proto] == pr - inet6sw) /* IPPROTO_RAW */ @@ -295,43 +351,61 @@ ip6proto_unregister(short ip6proto) } #ifdef VIMAGE -void -ip6_destroy() +static void +ip6_destroy(void *unused __unused) { + struct ifaddr *ifa, *nifa; + struct ifnet *ifp; + int error; - nd6_destroy(); - callout_drain(&V_in6_tmpaddrtimer_ch); -} +#ifdef RSS + netisr_unregister_vnet(&ip6_direct_nh); #endif + netisr_unregister_vnet(&ip6_nh); + + if ((error = pfil_head_unregister(&V_inet6_pfil_hook)) != 0) + printf("%s: WARNING: unable to unregister pfil hook, " + "error %d\n", __func__, error); + error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET6]); + if (error != 0) { + printf("%s: WARNING: unable to deregister input helper hook " + "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET6: " + "error %d returned\n", __func__, error); + } + error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET6]); + if (error != 0) { + printf("%s: WARNING: unable to deregister output helper hook " + "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET6: " + "error %d returned\n", __func__, error); + } -static int -ip6_init2_vnet(const void *unused __unused) -{ - - /* nd6_timer_init */ - callout_init(&V_nd6_timer_ch, 0); - callout_reset(&V_nd6_timer_ch, hz, nd6_timer, curvnet); - - /* timer for regeneranation of temporary addresses randomize ID */ - callout_init(&V_in6_tmpaddrtimer_ch, 0); - callout_reset(&V_in6_tmpaddrtimer_ch, - (V_ip6_temp_preferred_lifetime - V_ip6_desync_factor - - V_ip6_temp_regen_advance) * hz, - in6_tmpaddrtimer, curvnet); + /* Cleanup addresses. */ + IFNET_RLOCK(); + TAILQ_FOREACH(ifp, &V_ifnet, if_link) { + /* Cannot lock here - lock recursion. */ + /* IF_ADDR_LOCK(ifp); */ + TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, nifa) { - return (0); -} + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + in6_purgeaddr(ifa); + } + /* IF_ADDR_UNLOCK(ifp); */ + in6_ifdetach_destroy(ifp); + mld_domifdetach(ifp); + /* Make sure any routes are gone as well. */ + rt_flushifroutes_af(ifp, AF_INET6); + } + IFNET_RUNLOCK(); -static void -ip6_init2(void *dummy) -{ + nd6_destroy(); + in6_ifattach_destroy(); - ip6_init2_vnet(NULL); + hashdestroy(V_in6_ifaddrhashtbl, M_IFADDR, V_in6_ifaddrhmask); } -/* cheat */ -/* This must be after route_init(), which is now SI_ORDER_THIRD */ -SYSINIT(netinet6init2, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ip6_init2, NULL); +VNET_SYSUNINIT(inet6, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip6_destroy, NULL); +#endif static int ip6_input_hbh(struct mbuf *m, uint32_t *plen, uint32_t *rtalert, int *off, @@ -410,22 +484,78 @@ out: return (1); } +#ifdef RSS +/* + * IPv6 direct input routine. + * + * This is called when reinjecting completed fragments where + * all of the previous checking and book-keeping has been done. + */ +void +ip6_direct_input(struct mbuf *m) +{ + int off, nxt; + int nest; + struct m_tag *mtag; + struct ip6_direct_ctx *ip6dc; + + mtag = m_tag_locate(m, MTAG_ABI_IPV6, IPV6_TAG_DIRECT, NULL); + KASSERT(mtag != NULL, ("Reinjected packet w/o direct ctx tag!")); + + ip6dc = (struct ip6_direct_ctx *)(mtag + 1); + nxt = ip6dc->ip6dc_nxt; + off = ip6dc->ip6dc_off; + + nest = 0; + + m_tag_delete(m, mtag); + + while (nxt != IPPROTO_DONE) { + if (V_ip6_hdrnestlimit && (++nest > V_ip6_hdrnestlimit)) { + IP6STAT_INC(ip6s_toomanyhdr); + goto bad; + } + + /* + * protection against faulty packet - there should be + * more sanity checks in header chain processing. + */ + if (m->m_pkthdr.len < off) { + IP6STAT_INC(ip6s_tooshort); + in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated); + goto bad; + } + +#ifdef IPSEC + /* + * enforce IPsec policy checking if we are seeing last header. + * note that we do not visit this with protocols with pcb layer + * code - like udp/tcp/raw ip. + */ + if (ip6_ipsec_input(m, nxt)) + goto bad; +#endif /* IPSEC */ + + nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt); + } + return; +bad: + m_freem(m); +} +#endif + void ip6_input(struct mbuf *m) { + struct in6_addr odst; struct ip6_hdr *ip6; - int off = sizeof(struct ip6_hdr), nest; + struct in6_ifaddr *ia; u_int32_t plen; u_int32_t rtalert = ~0; + int off = sizeof(struct ip6_hdr), nest; int nxt, ours = 0; - struct ifnet *deliverifp = NULL, *ifp = NULL; - struct in6_addr odst; - struct route_in6 rin6; int srcrt = 0; - struct llentry *lle = NULL; - struct sockaddr_in6 dst6, *dst; - bzero(&rin6, sizeof(struct route_in6)); #ifdef IPSEC /* * should the inner packet be considered authentic? @@ -438,18 +568,12 @@ ip6_input(struct mbuf *m) #endif /* IPSEC */ - /* - * make sure we don't have onion peering information into m_tag. - */ - ip6_delaux(m); - if (m->m_flags & M_FASTFWD_OURS) { /* * Firewall changed destination to local. */ m->m_flags &= ~M_FASTFWD_OURS; ours = 1; - deliverifp = m->m_pkthdr.rcvif; ip6 = mtod(m, struct ip6_hdr *); goto hbhcheck; } @@ -476,10 +600,8 @@ ip6_input(struct mbuf *m) } /* drop the packet if IPv6 operation is disabled on the IF */ - if ((ND_IFINFO(m->m_pkthdr.rcvif)->flags & ND6_IFF_IFDISABLED)) { - m_freem(m); - return; - } + if ((ND_IFINFO(m->m_pkthdr.rcvif)->flags & ND6_IFF_IFDISABLED)) + goto bad; in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_receive); IP6STAT_INC(ip6s_total); @@ -493,21 +615,16 @@ ip6_input(struct mbuf *m) if (m && m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) { struct mbuf *n; - MGETHDR(n, M_DONTWAIT, MT_HEADER); - if (n) - M_MOVE_PKTHDR(n, m); - if (n && n->m_pkthdr.len > MHLEN) { - MCLGET(n, M_DONTWAIT); - if ((n->m_flags & M_EXT) == 0) { - m_freem(n); - n = NULL; - } - } + if (m->m_pkthdr.len > MHLEN) + n = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); + else + n = m_gethdr(M_NOWAIT, MT_DATA); if (n == NULL) { m_freem(m); return; /* ENOBUFS */ } + m_move_pkthdr(n, m); m_copydata(m, 0, n->m_pkthdr.len, mtod(n, caddr_t)); n->m_len = n->m_pkthdr.len; m_freem(m); @@ -536,6 +653,8 @@ ip6_input(struct mbuf *m) IP6STAT_INC(ip6s_nxthist[ip6->ip6_nxt]); + IP_PROBE(receive, NULL, NULL, ip6, m->m_pkthdr.rcvif, NULL, ip6); + /* * Check against address spoofing/corruption. */ @@ -643,7 +762,6 @@ ip6_input(struct mbuf *m) if (m->m_flags & M_FASTFWD_OURS) { m->m_flags &= ~M_FASTFWD_OURS; ours = 1; - deliverifp = m->m_pkthdr.rcvif; goto hbhcheck; } if ((m->m_flags & M_IP6_NEXTHOP) && @@ -654,7 +772,7 @@ ip6_input(struct mbuf *m) * connected host. */ ip6_forward(m, 1); - goto out; + return; } passin: @@ -677,7 +795,6 @@ passin: IP6STAT_INC(ip6s_badscope); goto bad; } - /* * Multicast check. Assume packet is for us to avoid * prematurely taking locks. @@ -685,167 +802,16 @@ passin: if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { ours = 1; in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mcast); - deliverifp = m->m_pkthdr.rcvif; goto hbhcheck; } - - /* - * Unicast check - */ - - bzero(&dst6, sizeof(dst6)); - dst6.sin6_family = AF_INET6; - dst6.sin6_len = sizeof(struct sockaddr_in6); - dst6.sin6_addr = ip6->ip6_dst; - ifp = m->m_pkthdr.rcvif; - IF_AFDATA_RLOCK(ifp); - lle = lla_lookup(LLTABLE6(ifp), 0, - (struct sockaddr *)&dst6); - IF_AFDATA_RUNLOCK(ifp); - if ((lle != NULL) && (lle->la_flags & LLE_IFADDR)) { - struct ifaddr *ifa; - struct in6_ifaddr *ia6; - int bad; - - bad = 1; -#define sa_equal(a1, a2) \ - (bcmp((a1), (a2), ((a1))->sin6_len) == 0) - IF_ADDR_RLOCK(ifp); - TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - if (ifa->ifa_addr->sa_family != dst6.sin6_family) - continue; - if (sa_equal(&dst6, ifa->ifa_addr)) - break; - } - KASSERT(ifa != NULL, ("%s: ifa not found for lle %p", - __func__, lle)); -#undef sa_equal - - ia6 = (struct in6_ifaddr *)ifa; - if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) { - /* Count the packet in the ip address stats */ - ia6->ia_ifa.if_ipackets++; - ia6->ia_ifa.if_ibytes += m->m_pkthdr.len; - - /* - * record address information into m_tag. - */ - (void)ip6_setdstifaddr(m, ia6); - - bad = 0; - } else { - char ip6bufs[INET6_ADDRSTRLEN]; - char ip6bufd[INET6_ADDRSTRLEN]; - /* address is not ready, so discard the packet. */ - nd6log((LOG_INFO, - "ip6_input: packet to an unready address %s->%s\n", - ip6_sprintf(ip6bufs, &ip6->ip6_src), - ip6_sprintf(ip6bufd, &ip6->ip6_dst))); - } - IF_ADDR_RUNLOCK(ifp); - LLE_RUNLOCK(lle); - if (bad) - goto bad; - else { - ours = 1; - deliverifp = ifp; - goto hbhcheck; - } - } - if (lle != NULL) - LLE_RUNLOCK(lle); - - dst = &rin6.ro_dst; - dst->sin6_len = sizeof(struct sockaddr_in6); - dst->sin6_family = AF_INET6; - dst->sin6_addr = ip6->ip6_dst; - rin6.ro_rt = in6_rtalloc1((struct sockaddr *)dst, 0, 0, M_GETFIB(m)); - if (rin6.ro_rt) - RT_UNLOCK(rin6.ro_rt); - -#define rt6_key(r) ((struct sockaddr_in6 *)((r)->rt_nodes->rn_key)) - /* - * Accept the packet if the forwarding interface to the destination - * according to the routing table is the loopback interface, - * unless the associated route has a gateway. - * Note that this approach causes to accept a packet if there is a - * route to the loopback interface for the destination of the packet. - * But we think it's even useful in some situations, e.g. when using - * a special daemon which wants to intercept the packet. - * - * XXX: some OSes automatically make a cloned route for the destination - * of an outgoing packet. If the outgoing interface of the packet - * is a loopback one, the kernel would consider the packet to be - * accepted, even if we have no such address assinged on the interface. - * We check the cloned flag of the route entry to reject such cases, - * assuming that route entries for our own addresses are not made by - * cloning (it should be true because in6_addloop explicitly installs - * the host route). However, we might have to do an explicit check - * while it would be less efficient. Or, should we rather install a - * reject route for such a case? + * Unicast check + * XXX: For now we keep link-local IPv6 addresses with embedded + * scope zone id, therefore we use zero zoneid here. */ - if (rin6.ro_rt && - (rin6.ro_rt->rt_flags & - (RTF_HOST|RTF_GATEWAY)) == RTF_HOST && -#ifdef RTF_WASCLONED - !(rin6.ro_rt->rt_flags & RTF_WASCLONED) && -#endif -#ifdef RTF_CLONED - !(rin6.ro_rt->rt_flags & RTF_CLONED) && -#endif -#if 0 - /* - * The check below is redundant since the comparison of - * the destination and the key of the rtentry has - * already done through looking up the routing table. - */ - IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, - &rt6_key(rin6.ro_rt)->sin6_addr) -#endif - rin6.ro_rt->rt_ifp->if_type == IFT_LOOP) { - int free_ia6 = 0; - struct in6_ifaddr *ia6; - - /* - * found the loopback route to the interface address - */ - if (rin6.ro_rt->rt_gateway->sa_family == AF_LINK) { - struct sockaddr_in6 dest6; - - bzero(&dest6, sizeof(dest6)); - dest6.sin6_family = AF_INET6; - dest6.sin6_len = sizeof(dest6); - dest6.sin6_addr = ip6->ip6_dst; - ia6 = (struct in6_ifaddr *) - ifa_ifwithaddr((struct sockaddr *)&dest6); - if (ia6 == NULL) - goto bad; - free_ia6 = 1; - } - else - ia6 = (struct in6_ifaddr *)rin6.ro_rt->rt_ifa; - - /* - * record address information into m_tag. - */ - (void)ip6_setdstifaddr(m, ia6); - - /* - * packets to a tentative, duplicated, or somehow invalid - * address must not be accepted. - */ - if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) { - /* this address is ready */ - ours = 1; - deliverifp = ia6->ia_ifp; /* correct? */ - /* Count the packet in the ip address stats */ - ia6->ia_ifa.if_ipackets++; - ia6->ia_ifa.if_ibytes += m->m_pkthdr.len; - if (ia6 != NULL && free_ia6 != 0) - ifa_free(&ia6->ia_ifa); - goto hbhcheck; - } else { + ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); + if (ia != NULL) { + if (ia->ia6_flags & IN6_IFF_NOTREADY) { char ip6bufs[INET6_ADDRSTRLEN]; char ip6bufd[INET6_ADDRSTRLEN]; /* address is not ready, so discard the packet. */ @@ -853,24 +819,15 @@ passin: "ip6_input: packet to an unready address %s->%s\n", ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst))); - - if (ia6 != NULL && free_ia6 != 0) - ifa_free(&ia6->ia_ifa); + ifa_free(&ia->ia_ifa); goto bad; } - } - - /* - * FAITH (Firewall Aided Internet Translator) - */ - if (V_ip6_keepfaith) { - if (rin6.ro_rt && rin6.ro_rt->rt_ifp && - rin6.ro_rt->rt_ifp->if_type == IFT_FAITH) { - /* XXX do we need more sanity checks? */ - ours = 1; - deliverifp = rin6.ro_rt->rt_ifp; /* faith */ - goto hbhcheck; - } + /* Count the packet in the ip address stats */ + counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); + counter_u64_add(ia->ia_ifa.ifa_ibytes, m->m_pkthdr.len); + ifa_free(&ia->ia_ifa); + ours = 1; + goto hbhcheck; } /* @@ -885,47 +842,25 @@ passin: hbhcheck: /* - * record address information into m_tag, if we don't have one yet. - * note that we are unable to record it, if the address is not listed - * as our interface address (e.g. multicast addresses, addresses - * within FAITH prefixes and such). - */ - if (deliverifp) { - struct in6_ifaddr *ia6; - - if ((ia6 = ip6_getdstifaddr(m)) != NULL) { - ifa_free(&ia6->ia_ifa); - } else { - ia6 = in6_ifawithifp(deliverifp, &ip6->ip6_dst); - if (ia6) { - if (!ip6_setdstifaddr(m, ia6)) { - /* - * XXX maybe we should drop the packet here, - * as we could not provide enough information - * to the upper layers. - */ - } - ifa_free(&ia6->ia_ifa); - } - } - } - - /* * Process Hop-by-Hop options header if it's contained. * m may be modified in ip6_hopopts_input(). * If a JumboPayload option is included, plen will also be modified. */ plen = (u_int32_t)ntohs(ip6->ip6_plen); if (ip6->ip6_nxt == IPPROTO_HOPOPTS) { - int error; - - error = ip6_input_hbh(m, &plen, &rtalert, &off, &nxt, &ours); - if (error != 0) - goto out; + if (ip6_input_hbh(m, &plen, &rtalert, &off, &nxt, &ours) != 0) + return; } else nxt = ip6->ip6_nxt; /* + * Use mbuf flags to propagate Router Alert option to + * ICMPv6 layer, as hop-by-hop options have been stripped. + */ + if (rtalert != ~0) + m->m_flags |= M_RTALERT_MLD; + + /* * Check that the amount of data in the buffers * is as at least much as the IPv6 header would have us expect. * Trim mbufs if longer than we expect. @@ -968,7 +903,7 @@ passin: } } else if (!ours) { ip6_forward(m, srcrt); - goto out; + return; } ip6 = mtod(m, struct ip6_hdr *); @@ -993,7 +928,7 @@ passin: * Tell launch routine the next header */ IP6STAT_INC(ip6s_delivered); - in6_ifstat_inc(deliverifp, ifs6_in_deliver); + in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_deliver); nest = 0; while (nxt != IPPROTO_DONE) { @@ -1022,56 +957,11 @@ passin: goto bad; #endif /* IPSEC */ - /* - * Use mbuf flags to propagate Router Alert option to - * ICMPv6 layer, as hop-by-hop options have been stripped. - */ - if (nxt == IPPROTO_ICMPV6 && rtalert != ~0) - m->m_flags |= M_RTALERT_MLD; - nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt); } - goto out; + return; bad: m_freem(m); -out: - if (rin6.ro_rt) - RTFREE(rin6.ro_rt); -} - -/* - * set/grab in6_ifaddr correspond to IPv6 destination address. - * XXX backward compatibility wrapper - * - * XXXRW: We should bump the refcount on ia6 before sticking it in the m_tag, - * and then bump it when the tag is copied, and release it when the tag is - * freed. Unfortunately, m_tags don't support deep copies (yet), so instead - * we just bump the ia refcount when we receive it. This should be fixed. - */ -static struct ip6aux * -ip6_setdstifaddr(struct mbuf *m, struct in6_ifaddr *ia6) -{ - struct ip6aux *ip6a; - - ip6a = ip6_addaux(m); - if (ip6a) - ip6a->ip6a_dstia6 = ia6; - return ip6a; /* NULL if failed to set */ -} - -struct in6_ifaddr * -ip6_getdstifaddr(struct mbuf *m) -{ - struct ip6aux *ip6a; - struct in6_ifaddr *ia; - - ip6a = ip6_findaux(m); - if (ip6a) { - ia = ip6a->ip6a_dstia6; - ifa_ref(&ia->ia_ifa); - return ia; - } else - return NULL; } /* @@ -1601,6 +1491,44 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) loopend: ; } + + if (in6p->inp_flags2 & INP_RECVFLOWID) { + uint32_t flowid, flow_type; + + flowid = m->m_pkthdr.flowid; + flow_type = M_HASHTYPE_GET(m); + + /* + * XXX should handle the failure of one or the + * other - don't populate both? + */ + *mp = sbcreatecontrol((caddr_t) &flowid, + sizeof(uint32_t), IPV6_FLOWID, IPPROTO_IPV6); + if (*mp) + mp = &(*mp)->m_next; + *mp = sbcreatecontrol((caddr_t) &flow_type, + sizeof(uint32_t), IPV6_FLOWTYPE, IPPROTO_IPV6); + if (*mp) + mp = &(*mp)->m_next; + } + +#ifdef RSS + if (in6p->inp_flags2 & INP_RECVRSSBUCKETID) { + uint32_t flowid, flow_type; + uint32_t rss_bucketid; + + flowid = m->m_pkthdr.flowid; + flow_type = M_HASHTYPE_GET(m); + + if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) { + *mp = sbcreatecontrol((caddr_t) &rss_bucketid, + sizeof(uint32_t), IPV6_RSSBUCKETID, IPPROTO_IPV6); + if (*mp) + mp = &(*mp)->m_next; + } + } +#endif + } #undef IS2292 @@ -1674,22 +1602,12 @@ ip6_pullexthdr(struct mbuf *m, size_t off, int nxt) else elen = (ip6e.ip6e_len + 1) << 3; - MGET(n, M_DONTWAIT, MT_DATA); - if (n && elen >= MLEN) { - MCLGET(n, M_DONTWAIT); - if ((n->m_flags & M_EXT) == 0) { - m_free(n); - n = NULL; - } - } - if (!n) - return NULL; - - n->m_len = 0; - if (elen >= M_TRAILINGSPACE(n)) { - m_free(n); + if (elen > MLEN) + n = m_getcl(M_NOWAIT, MT_DATA, 0); + else + n = m_get(M_NOWAIT, MT_DATA); + if (n == NULL) return NULL; - } m_copydata(m, off, elen, mtod(n, caddr_t)); n->m_len = elen; @@ -1710,7 +1628,7 @@ ip6_pullexthdr(struct mbuf *m, size_t off, int nxt) * we develop `neater' mechanism to process extension headers. */ char * -ip6_get_prevhdr(struct mbuf *m, int off) +ip6_get_prevhdr(const struct mbuf *m, int off) { struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); @@ -1749,7 +1667,7 @@ ip6_get_prevhdr(struct mbuf *m, int off) * get next header offset. m will be retained. */ int -ip6_nexthdr(struct mbuf *m, int off, int proto, int *nxtp) +ip6_nexthdr(const struct mbuf *m, int off, int proto, int *nxtp) { struct ip6_hdr ip6; struct ip6_ext ip6e; @@ -1817,14 +1735,14 @@ ip6_nexthdr(struct mbuf *m, int off, int proto, int *nxtp) return -1; } - return -1; + /* NOTREACHED */ } /* * get offset for the last header in the chain. m will be kept untainted. */ int -ip6_lasthdr(struct mbuf *m, int off, int proto, int *nxtp) +ip6_lasthdr(const struct mbuf *m, int off, int proto, int *nxtp) { int newoff; int nxt; @@ -1847,42 +1765,6 @@ ip6_lasthdr(struct mbuf *m, int off, int proto, int *nxtp) } } -static struct ip6aux * -ip6_addaux(struct mbuf *m) -{ - struct m_tag *mtag; - - mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL); - if (!mtag) { - mtag = m_tag_get(PACKET_TAG_IPV6_INPUT, sizeof(struct ip6aux), - M_NOWAIT); - if (mtag) { - m_tag_prepend(m, mtag); - bzero(mtag + 1, sizeof(struct ip6aux)); - } - } - return mtag ? (struct ip6aux *)(mtag + 1) : NULL; -} - -static struct ip6aux * -ip6_findaux(struct mbuf *m) -{ - struct m_tag *mtag; - - mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL); - return mtag ? (struct ip6aux *)(mtag + 1) : NULL; -} - -static void -ip6_delaux(struct mbuf *m) -{ - struct m_tag *mtag; - - mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL); - if (mtag) - m_tag_delete(m, mtag); -} - /* * System control for IP6 */ diff --git a/freebsd/sys/netinet6/ip6_ipsec.h b/freebsd/sys/netinet6/ip6_ipsec.h index 86d1b005..e335d850 100644 --- a/freebsd/sys/netinet6/ip6_ipsec.h +++ b/freebsd/sys/netinet6/ip6_ipsec.h @@ -35,8 +35,7 @@ int ip6_ipsec_filtertunnel(struct mbuf *); int ip6_ipsec_fwd(struct mbuf *); int ip6_ipsec_input(struct mbuf *, int); -int ip6_ipsec_output(struct mbuf **, struct inpcb *, int *, int *, - struct ifnet **, struct secpolicy **sp); +int ip6_ipsec_output(struct mbuf **, struct inpcb *, int *); #if 0 int ip6_ipsec_mtu(struct mbuf *); #endif diff --git a/freebsd/sys/netinet6/ip6_mroute.c b/freebsd/sys/netinet6/ip6_mroute.c index 02a98026..f74b71c3 100644 --- a/freebsd/sys/netinet6/ip6_mroute.c +++ b/freebsd/sys/netinet6/ip6_mroute.c @@ -95,6 +95,7 @@ __FBSDID("$FreeBSD$"); #include <sys/module.h> #include <sys/domain.h> #include <sys/protosw.h> +#include <sys/sdt.h> #include <sys/signalvar.h> #include <sys/socket.h> #include <sys/socketvar.h> @@ -106,6 +107,7 @@ __FBSDID("$FreeBSD$"); #include <sys/time.h> #include <net/if.h> +#include <net/if_var.h> #include <net/if_types.h> #include <net/raw_cb.h> #include <net/vnet.h> @@ -116,19 +118,16 @@ __FBSDID("$FreeBSD$"); #include <netinet/ip_encap.h> #include <netinet/ip6.h> +#include <netinet/in_kdtrace.h> #include <netinet6/ip6_var.h> #include <netinet6/scope6_var.h> #include <netinet6/nd6.h> #include <netinet6/ip6_mroute.h> -#include <netinet6/ip6protosw.h> #include <netinet6/pim6.h> #include <netinet6/pim6_var.h> static MALLOC_DEFINE(M_MRTABLE6, "mf6c", "multicast forwarding cache entry"); -/* XXX: this is a very common idiom; move to <sys/mbuf.h> ? */ -#define M_HASCL(m) ((m)->m_flags & M_EXT) - static int ip6_mdq(struct mbuf *, struct ifnet *, struct mf6c *); static void phyint_send(struct ip6_hdr *, struct mif6 *, struct mbuf *); static int register_send(struct ip6_hdr *, struct mif6 *, struct mbuf *); @@ -140,7 +139,7 @@ extern int in6_mcast_loop; extern struct domain inet6domain; static const struct encaptab *pim6_encap_cookie; -static const struct ip6protosw in6_pim_protosw = { +static const struct protosw in6_pim_protosw = { .pr_type = SOCK_RAW, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_PIM, @@ -199,9 +198,34 @@ static struct mtx mfc6_mtx; static u_char n6expire[MF6CTBLSIZ]; static struct mif6 mif6table[MAXMIFS]; -SYSCTL_OPAQUE(_net_inet6_ip6, OID_AUTO, mif6table, CTLFLAG_RD, - &mif6table, sizeof(mif6table), "S,mif6[MAXMIFS]", - "IPv6 Multicast Interfaces (struct mif6[MAXMIFS], netinet6/ip6_mroute.h)"); +static int +sysctl_mif6table(SYSCTL_HANDLER_ARGS) +{ + struct mif6_sctl *out; + int error; + + out = malloc(sizeof(struct mif6_sctl) * MAXMIFS, M_TEMP, M_WAITOK); + for (int i = 0; i < MAXMIFS; i++) { + out[i].m6_flags = mif6table[i].m6_flags; + out[i].m6_rate_limit = mif6table[i].m6_rate_limit; + out[i].m6_lcl_addr = mif6table[i].m6_lcl_addr; + if (mif6table[i].m6_ifp != NULL) + out[i].m6_ifp = mif6table[i].m6_ifp->if_index; + else + out[i].m6_ifp = 0; + out[i].m6_pkt_in = mif6table[i].m6_pkt_in; + out[i].m6_pkt_out = mif6table[i].m6_pkt_out; + out[i].m6_bytes_in = mif6table[i].m6_bytes_in; + out[i].m6_bytes_out = mif6table[i].m6_bytes_out; + } + error = SYSCTL_OUT(req, out, sizeof(struct mif6_sctl) * MAXMIFS); + free(out, M_TEMP); + return (error); +} +SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, mif6table, CTLTYPE_OPAQUE | CTLFLAG_RD, + NULL, 0, sysctl_mif6table, "S,mif6_sctl[MAXMIFS]", + "IPv6 Multicast Interfaces (struct mif6_sctl[MAXMIFS], " + "netinet6/ip6_mroute.h)"); static struct mtx mif6_mtx; #define MIF6_LOCK() mtx_lock(&mif6_mtx) @@ -359,7 +383,7 @@ X_ip6_mrouter_set(struct socket *so, struct sockopt *sopt) mifi_t mifi; if (so != V_ip6_mrouter && sopt->sopt_name != MRT6_INIT) - return (EACCES); + return (EPERM); switch (sopt->sopt_name) { case MRT6_INIT: @@ -614,7 +638,7 @@ X_ip6_mrouter_done(void) for (rte = rt->mf6c_stall; rte != NULL; ) { struct rtdetq *n = rte->next; - m_free(rte->m); + m_freem(rte->m); free(rte, M_MRTABLE6); rte = n; } @@ -1078,8 +1102,8 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m) */ if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) { IP6STAT_INC(ip6s_cantforward); - if (V_ip6_log_time + V_ip6_log_interval < time_second) { - V_ip6_log_time = time_second; + if (V_ip6_log_time + V_ip6_log_interval < time_uptime) { + V_ip6_log_time = time_uptime; log(LOG_DEBUG, "cannot forward " "from %s to %s nxt %d received on %s\n", @@ -1128,7 +1152,7 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m) * Pullup packet header if needed before storing it, * as other references may modify it in the meantime. */ - if (mb0 && (M_HASCL(mb0) || mb0->m_len < sizeof(struct ip6_hdr))) + if (mb0 && (!M_WRITABLE(mb0) || mb0->m_len < sizeof(struct ip6_hdr))) mb0 = m_pullup(mb0, sizeof(struct ip6_hdr)); if (mb0 == NULL) { free(rte, M_MRTABLE6); @@ -1397,7 +1421,7 @@ ip6_mdq(struct mbuf *m, struct ifnet *ifp, struct mf6c *rt) mm = m_copy(m, 0, sizeof(struct ip6_hdr)); if (mm && - (M_HASCL(mm) || + (!M_WRITABLE(mm) || mm->m_len < sizeof(struct ip6_hdr))) mm = m_pullup(mm, sizeof(struct ip6_hdr)); if (mm == NULL) @@ -1527,7 +1551,7 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m) */ mb_copy = m_copy(m, 0, M_COPYALL); if (mb_copy && - (M_HASCL(mb_copy) || mb_copy->m_len < sizeof(struct ip6_hdr))) + (!M_WRITABLE(mb_copy) || mb_copy->m_len < sizeof(struct ip6_hdr))) mb_copy = m_pullup(mb_copy, sizeof(struct ip6_hdr)); if (mb_copy == NULL) { return; @@ -1561,15 +1585,8 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m) * If configured to loop back multicasts by default, * loop back a copy now. */ - if (in6_mcast_loop) { - struct sockaddr_in6 dst6; - - bzero(&dst6, sizeof(dst6)); - dst6.sin6_len = sizeof(struct sockaddr_in6); - dst6.sin6_family = AF_INET6; - dst6.sin6_addr = ip6->ip6_dst; - ip6_mloopback(ifp, m, &dst6); - } + if (in6_mcast_loop) + ip6_mloopback(ifp, m); /* * Put the packet into the sending queue of the outgoing interface @@ -1583,10 +1600,13 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m) dst6.sin6_len = sizeof(struct sockaddr_in6); dst6.sin6_family = AF_INET6; dst6.sin6_addr = ip6->ip6_dst; + + IP_PROBE(send, NULL, NULL, ip6, ifp, NULL, ip6); /* * We just call if_output instead of nd6_output here, since * we need no ND for a multicast forwarded packet...right? */ + m_clrprotoflags(m); /* Avoid confusing lower layers. */ error = (*ifp->if_output)(ifp, mb_copy, (struct sockaddr *)&dst6, NULL); MRT6_DLOG(DEBUG_XMIT, "mif %u err %d", @@ -1626,11 +1646,10 @@ register_send(struct ip6_hdr *ip6, struct mif6 *mif, struct mbuf *m) ip6_sprintf(ip6bufd, &ip6->ip6_dst)); PIM6STAT_INC(pim6s_snd_registers); - /* Make a copy of the packet to send to the user level process */ - MGETHDR(mm, M_DONTWAIT, MT_HEADER); + /* Make a copy of the packet to send to the user level process. */ + mm = m_gethdr(M_NOWAIT, MT_DATA); if (mm == NULL) return (ENOBUFS); - mm->m_pkthdr.rcvif = NULL; mm->m_data += max_linkhdr; mm->m_len = sizeof(struct ip6_hdr); @@ -1949,4 +1968,4 @@ static moduledata_t ip6_mroutemod = { 0 }; -DECLARE_MODULE(ip6_mroute, ip6_mroutemod, SI_SUB_PSEUDO, SI_ORDER_ANY); +DECLARE_MODULE(ip6_mroute, ip6_mroutemod, SI_SUB_PROTO_MC, SI_ORDER_ANY); diff --git a/freebsd/sys/netinet6/ip6_mroute.h b/freebsd/sys/netinet6/ip6_mroute.h index 33b41310..51e1d496 100644 --- a/freebsd/sys/netinet6/ip6_mroute.h +++ b/freebsd/sys/netinet6/ip6_mroute.h @@ -121,19 +121,19 @@ struct mf6cctl { * The kernel's multicast routing statistics. */ struct mrt6stat { - u_quad_t mrt6s_mfc_lookups; /* # forw. cache hash table hits */ - u_quad_t mrt6s_mfc_misses; /* # forw. cache hash table misses */ - u_quad_t mrt6s_upcalls; /* # calls to multicast routing daemon */ - u_quad_t mrt6s_no_route; /* no route for packet's origin */ - u_quad_t mrt6s_bad_tunnel; /* malformed tunnel options */ - u_quad_t mrt6s_cant_tunnel; /* no room for tunnel options */ - u_quad_t mrt6s_wrong_if; /* arrived on wrong interface */ - u_quad_t mrt6s_upq_ovflw; /* upcall Q overflow */ - u_quad_t mrt6s_cache_cleanups; /* # entries with no upcalls */ - u_quad_t mrt6s_drop_sel; /* pkts dropped selectively */ - u_quad_t mrt6s_q_overflow; /* pkts dropped - Q overflow */ - u_quad_t mrt6s_pkt2large; /* pkts dropped - size > BKT SIZE */ - u_quad_t mrt6s_upq_sockfull; /* upcalls dropped - socket full */ + uint64_t mrt6s_mfc_lookups; /* # forw. cache hash table hits */ + uint64_t mrt6s_mfc_misses; /* # forw. cache hash table misses */ + uint64_t mrt6s_upcalls; /* # calls to multicast routing daemon */ + uint64_t mrt6s_no_route; /* no route for packet's origin */ + uint64_t mrt6s_bad_tunnel; /* malformed tunnel options */ + uint64_t mrt6s_cant_tunnel; /* no room for tunnel options */ + uint64_t mrt6s_wrong_if; /* arrived on wrong interface */ + uint64_t mrt6s_upq_ovflw; /* upcall Q overflow */ + uint64_t mrt6s_cache_cleanups; /* # entries with no upcalls */ + uint64_t mrt6s_drop_sel; /* pkts dropped selectively */ + uint64_t mrt6s_q_overflow; /* pkts dropped - Q overflow */ + uint64_t mrt6s_pkt2large; /* pkts dropped - size > BKT SIZE */ + uint64_t mrt6s_upq_sockfull; /* upcalls dropped - socket full */ }; #ifdef MRT6_OINIT @@ -194,6 +194,20 @@ struct sioc_mif_req6 { u_quad_t obytes; /* Output byte count on mif */ }; +/* + * Structure to export 'struct mif6' to userland via sysctl. + */ +struct mif6_sctl { + u_char m6_flags; /* MIFF_ flags defined above */ + u_int m6_rate_limit; /* max rate */ + struct in6_addr m6_lcl_addr; /* local interface address */ + uint32_t m6_ifp; /* interface index */ + u_quad_t m6_pkt_in; /* # pkts in on interface */ + u_quad_t m6_pkt_out; /* # pkts out on interface */ + u_quad_t m6_bytes_in; /* # bytes in on interface */ + u_quad_t m6_bytes_out; /* # bytes out on interface */ +}; + #if defined(_KERNEL) || defined(KERNEL) /* * The kernel's multicast-interface structure. diff --git a/freebsd/sys/netinet6/ip6_output.c b/freebsd/sys/netinet6/ip6_output.c index 95231631..d3dc973e 100644 --- a/freebsd/sys/netinet6/ip6_output.c +++ b/freebsd/sys/netinet6/ip6_output.c @@ -67,10 +67,10 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/local/opt_inet.h> #include <rtems/bsd/local/opt_inet6.h> -#include <rtems/bsd/local/opt_ipfw.h> #include <rtems/bsd/local/opt_ipsec.h> #include <rtems/bsd/local/opt_sctp.h> #include <rtems/bsd/local/opt_route.h> +#include <rtems/bsd/local/opt_rss.h> #include <rtems/bsd/sys/param.h> #include <sys/kernel.h> @@ -88,14 +88,17 @@ __FBSDID("$FreeBSD$"); #include <machine/in_cksum.h> #include <net/if.h> +#include <net/if_var.h> #include <net/netisr.h> #include <net/route.h> #include <net/pfil.h> +#include <net/rss_config.h> #include <net/vnet.h> #include <netinet/in.h> #include <netinet/in_var.h> #include <netinet/ip_var.h> +#include <netinet6/in6_fib.h> #include <netinet6/in6_var.h> #include <netinet/ip6.h> #include <netinet/icmp6.h> @@ -103,6 +106,7 @@ __FBSDID("$FreeBSD$"); #include <netinet/in_pcb.h> #include <netinet/tcp_var.h> #include <netinet6/nd6.h> +#include <netinet6/in6_rss.h> #ifdef IPSEC #include <netipsec/ipsec.h> @@ -132,6 +136,8 @@ struct ip6_exthdrs { struct mbuf *ip6e_dest2; }; +static MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options"); + static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **, struct ucred *, int); static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *, @@ -145,8 +151,12 @@ static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int, struct ip6_frag **); static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t); static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *); -static int ip6_getpmtu(struct route_in6 *, struct route_in6 *, - struct ifnet *, struct in6_addr *, u_long *, int *, u_int); +static int ip6_getpmtu(struct route_in6 *, int, + struct ifnet *, const struct in6_addr *, u_long *, int *, u_int, + u_int); +static int ip6_calcmtu(struct ifnet *, const struct in6_addr *, u_long, + u_long *, int *, u_int); +static int ip6_getpmtu_ctl(u_int, const struct in6_addr *, u_long *); static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int); @@ -186,7 +196,7 @@ static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int); }\ } while (/*CONSTCOND*/ 0) -static void +void in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset) { u_short csum; @@ -198,8 +208,8 @@ in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset) if (offset + sizeof(u_short) > m->m_len) { printf("%s: delayed m_pullup, m->len: %d plen %u off %u " - "csum_flags=0x%04x\n", __func__, m->m_len, plen, offset, - m->m_pkthdr.csum_flags); + "csum_flags=%b\n", __func__, m->m_len, plen, offset, + (int)m->m_pkthdr.csum_flags, CSUM_BITS); /* * XXX this should not happen, but if it does, the correct * behavior may be to insert the checksum in the appropriate @@ -210,6 +220,64 @@ in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset) *(u_short *)(m->m_data + offset) = csum; } +int +ip6_fragment(struct ifnet *ifp, struct mbuf *m0, int hlen, u_char nextproto, + int mtu, uint32_t id) +{ + struct mbuf *m, **mnext, *m_frgpart; + struct ip6_hdr *ip6, *mhip6; + struct ip6_frag *ip6f; + int off; + int error; + int tlen = m0->m_pkthdr.len; + + m = m0; + ip6 = mtod(m, struct ip6_hdr *); + mnext = &m->m_nextpkt; + + for (off = hlen; off < tlen; off += mtu) { + m = m_gethdr(M_NOWAIT, MT_DATA); + if (!m) { + IP6STAT_INC(ip6s_odropped); + return (ENOBUFS); + } + m->m_flags = m0->m_flags & M_COPYFLAGS; + *mnext = m; + mnext = &m->m_nextpkt; + m->m_data += max_linkhdr; + mhip6 = mtod(m, struct ip6_hdr *); + *mhip6 = *ip6; + m->m_len = sizeof(*mhip6); + error = ip6_insertfraghdr(m0, m, hlen, &ip6f); + if (error) { + IP6STAT_INC(ip6s_odropped); + return (error); + } + ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7)); + if (off + mtu >= tlen) + mtu = tlen - off; + else + ip6f->ip6f_offlg |= IP6F_MORE_FRAG; + mhip6->ip6_plen = htons((u_short)(mtu + hlen + + sizeof(*ip6f) - sizeof(struct ip6_hdr))); + if ((m_frgpart = m_copy(m0, off, mtu)) == NULL) { + IP6STAT_INC(ip6s_odropped); + return (ENOBUFS); + } + m_cat(m, m_frgpart); + m->m_pkthdr.len = mtu + hlen + sizeof(*ip6f); + m->m_pkthdr.fibnum = m0->m_pkthdr.fibnum; + m->m_pkthdr.rcvif = NULL; + ip6f->ip6f_reserved = 0; + ip6f->ip6f_ident = id; + ip6f->ip6f_nxt = nextproto; + IP6STAT_INC(ip6s_ofragments); + in6_ifstat_inc(ifp, ifs6_out_fragcreat); + } + + return (0); +} + /* * IP6 output. The packet in mbuf chain m contains a skeletal IP6 * header (with pri, len, nxt, hlim, src, dst). @@ -220,22 +288,25 @@ in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset) * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL, * then result of route lookup is stored in ro->ro_rt. * - * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and + * type of "mtu": rt_mtu is u_long, ifnet.ifr_mtu is int, and * nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one, - * which is rt_rmx.rmx_mtu. + * which is rt_mtu. * * ifpp - XXX: just for statistics */ +/* + * XXX TODO: no flowid is assigned for outbound flows? + */ int ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, int flags, struct ip6_moptions *im6o, struct ifnet **ifpp, struct inpcb *inp) { - struct ip6_hdr *ip6, *mhip6; + struct ip6_hdr *ip6; struct ifnet *ifp, *origifp; struct mbuf *m = m0; struct mbuf *mprev = NULL; - int hlen, tlen, len, off; + int hlen, tlen, len; struct route_in6 ip6route; struct rtentry *rt = NULL; struct sockaddr_in6 *dst, src_sa, dst_sa; @@ -246,31 +317,25 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, int alwaysfrag, dontfrag; u_int32_t optlen = 0, plen = 0, unfragpartlen = 0; struct ip6_exthdrs exthdrs; - struct in6_addr finaldst, src0, dst0; + struct in6_addr src0, dst0; u_int32_t zone; struct route_in6 *ro_pmtu = NULL; int hdrsplit = 0; - int needipsec = 0; int sw_csum, tso; -#ifdef IPSEC - struct ipsec_output_state state; - struct ip6_rthdr *rh = NULL; - int needipsectun = 0; - int segleft_org = 0; - struct secpolicy *sp = NULL; -#endif /* IPSEC */ + int needfiblookup; + uint32_t fibnum; struct m_tag *fwd_tag = NULL; + uint32_t id; - ip6 = mtod(m, struct ip6_hdr *); - if (ip6 == NULL) { - printf ("ip6 is NULL"); - goto bad; - } - - if (inp != NULL) + if (inp != NULL) { M_SETFIB(m, inp->inp_inc.inc_fibnum); + if ((flags & IP_NODEFAULTFLOWID) == 0) { + /* unconditionally set flowid */ + m->m_pkthdr.flowid = inp->inp_flowid; + M_HASHTYPE_SET(m, inp->inp_flowtype); + } + } - finaldst = ip6->ip6_dst; bzero(&exthdrs, sizeof(exthdrs)); if (opt) { /* Hop-by-Hop options header */ @@ -299,27 +364,14 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, /* * IPSec checking which handles several cases. * FAST IPSEC: We re-injected the packet. + * XXX: need scope argument. */ - switch(ip6_ipsec_output(&m, inp, &flags, &error, &ifp, &sp)) + switch(ip6_ipsec_output(&m, inp, &error)) { case 1: /* Bad packet */ goto freehdrs; - case -1: /* Do IPSec */ - needipsec = 1; - /* - * Do delayed checksums now, as we may send before returning. - */ - if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { - plen = m->m_pkthdr.len - sizeof(*ip6); - in6_delayed_cksum(m, plen, sizeof(struct ip6_hdr)); - m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; - } -#ifdef SCTP - if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) { - sctp_delayed_cksum(m, sizeof(struct ip6_hdr)); - m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6; - } -#endif + case -1: /* IPSec done */ + goto done; case 0: /* No IPSec */ default: break; @@ -339,15 +391,15 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, optlen += exthdrs.ip6e_rthdr->m_len; unfragpartlen = optlen + sizeof(struct ip6_hdr); - /* NOTE: we don't add AH/ESP length here. do that later. */ + /* NOTE: we don't add AH/ESP length here (done in ip6_ipsec_output) */ if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len; /* - * If we need IPsec, or there is at least one extension header, + * If there is at least one extension header, * separate IP6 header from the payload. */ - if ((needipsec || optlen) && !hdrsplit) { + if (optlen && !hdrsplit) { if ((error = ip6_splithdr(m, &exthdrs)) != 0) { m = NULL; goto freehdrs; @@ -356,7 +408,6 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, hdrsplit++; } - /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); /* adjust mbuf packet header length */ @@ -422,72 +473,6 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, IPPROTO_ROUTING); -#ifdef IPSEC - if (!needipsec) - goto skip_ipsec2; - - /* - * pointers after IPsec headers are not valid any more. - * other pointers need a great care too. - * (IPsec routines should not mangle mbufs prior to AH/ESP) - */ - exthdrs.ip6e_dest2 = NULL; - - if (exthdrs.ip6e_rthdr) { - rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *); - segleft_org = rh->ip6r_segleft; - rh->ip6r_segleft = 0; - } - - bzero(&state, sizeof(state)); - state.m = m; - error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags, - &needipsectun); - m = state.m; - if (error == EJUSTRETURN) { - /* - * We had a SP with a level of 'use' and no SA. We - * will just continue to process the packet without - * IPsec processing. - */ - ; - } else if (error) { - /* mbuf is already reclaimed in ipsec6_output_trans. */ - m = NULL; - switch (error) { - case EHOSTUNREACH: - case ENETUNREACH: - case EMSGSIZE: - case ENOBUFS: - case ENOMEM: - break; - default: - printf("[%s:%d] (ipsec): error code %d\n", - __func__, __LINE__, error); - /* FALLTHROUGH */ - case ENOENT: - /* don't show these error codes to the user */ - error = 0; - break; - } - goto bad; - } else if (!needipsectun) { - /* - * In the FAST IPSec case we have already - * re-injected the packet and it has been freed - * by the ipsec_done() function. So, just clean - * up after ourselves. - */ - m = NULL; - goto done; - } - if (exthdrs.ip6e_rthdr) { - /* ah6_output doesn't modify mbuf chain */ - rh->ip6r_segleft = segleft_org; - } -skip_ipsec2:; -#endif /* IPSEC */ - /* * If there is a routing header, discard the packet. */ @@ -514,29 +499,20 @@ skip_ipsec2:; /* * Route packet. */ - if (ro == 0) { + if (ro == NULL) { ro = &ip6route; bzero((caddr_t)ro, sizeof(*ro)); - } + } else + ro->ro_flags |= RT_LLE_CACHE; ro_pmtu = ro; if (opt && opt->ip6po_rthdr) ro = &opt->ip6po_route; dst = (struct sockaddr_in6 *)&ro->ro_dst; #ifdef FLOWTABLE - if (ro->ro_rt == NULL) { - struct flentry *fle; - - /* - * The flow table returns route entries valid for up to 30 - * seconds; we rely on the remainder of ip_output() taking no - * longer than that long for the stability of ro_rt. The - * flow ID assignment must have happened before this point. - */ - fle = flowtable_lookup_mbuf(V_ip6_ft, m, AF_INET6); - if (fle != NULL) - flow_to_route_in6(fle, ro); - } + if (ro->ro_rt == NULL) + (void )flowtable_lookup(AF_INET6, m, (struct route *)ro); #endif + fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m); again: /* * if specified, try to fill in the traffic class field. @@ -563,82 +539,18 @@ again: else ip6->ip6_hlim = V_ip6_defmcasthlim; } - -#ifdef IPSEC /* - * We may re-inject packets into the stack here. + * Validate route against routing table additions; + * a better/more specific route might have been added. + * Make sure address family is set in route. */ - if (needipsec && needipsectun) { - struct ipsec_output_state state; - - /* - * All the extension headers will become inaccessible - * (since they can be encrypted). - * Don't panic, we need no more updates to extension headers - * on inner IPv6 packet (since they are now encapsulated). - * - * IPv6 [ESP|AH] IPv6 [extension headers] payload - */ - bzero(&exthdrs, sizeof(exthdrs)); - exthdrs.ip6e_ip6 = m; - - bzero(&state, sizeof(state)); - state.m = m; - state.ro = (struct route *)ro; - state.dst = (struct sockaddr *)dst; - - error = ipsec6_output_tunnel(&state, sp, flags); - - m = state.m; - ro = (struct route_in6 *)state.ro; - dst = (struct sockaddr_in6 *)state.dst; - if (error == EJUSTRETURN) { - /* - * We had a SP with a level of 'use' and no SA. We - * will just continue to process the packet without - * IPsec processing. - */ - ; - } else if (error) { - /* mbuf is already reclaimed in ipsec6_output_tunnel. */ - m0 = m = NULL; - m = NULL; - switch (error) { - case EHOSTUNREACH: - case ENETUNREACH: - case EMSGSIZE: - case ENOBUFS: - case ENOMEM: - break; - default: - printf("[%s:%d] (ipsec): error code %d\n", - __func__, __LINE__, error); - /* FALLTHROUGH */ - case ENOENT: - /* don't show these error codes to the user */ - error = 0; - break; - } - goto bad; - } else { - /* - * In the FAST IPSec case we have already - * re-injected the packet and it has been freed - * by the ipsec_done() function. So, just clean - * up after ourselves. - */ - m = NULL; - goto done; - } - - exthdrs.ip6e_ip6 = m; + if (inp) { + ro->ro_dst.sin6_family = AF_INET6; + RT_VALIDATE((struct route *)ro, &inp->inp_rt_cookie, fibnum); } -#endif /* IPSEC */ - - /* adjust pointer */ - ip6 = mtod(m, struct ip6_hdr *); - - if (ro->ro_rt && fwd_tag == NULL) { + if (ro->ro_rt && fwd_tag == NULL && (ro->ro_rt->rt_flags & RTF_UP) && + ro->ro_dst.sin6_family == AF_INET6 && + IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, &ip6->ip6_dst)) { rt = ro->ro_rt; ifp = ro->ro_rt->rt_ifp; } else { @@ -649,7 +561,7 @@ again: dst_sa.sin6_addr = ip6->ip6_dst; } error = in6_selectroute_fib(&dst_sa, opt, im6o, ro, &ifp, - &rt, inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m)); + &rt, fibnum); if (error != 0) { if (ifp != NULL) in6_ifstat_inc(ifp, ifs6_out_discard); @@ -673,7 +585,7 @@ again: } if (rt != NULL) { ia = (struct in6_ifaddr *)(rt->rt_ifa); - rt->rt_use++; + counter_u64_add(rt->rt_pksent, 1); } @@ -758,7 +670,7 @@ again: * thus deferring a hash lookup and lock acquisition * at the expense of an m_copym(). */ - ip6_mloopback(ifp, m, dst); + ip6_mloopback(ifp, m); } else { /* * If we are acting as a multicast router, perform @@ -776,9 +688,7 @@ again: /* * XXX: ip6_mforward expects that rcvif is NULL * when it is called from the originating path. - * However, it is not always the case, since - * some versions of MGETHDR() does not - * initialize the field. + * However, it may not always be the case. */ m->m_pkthdr.rcvif = NULL; if (ip6_mforward(ip6, ifp, m) != 0) { @@ -810,8 +720,8 @@ again: *ifpp = ifp; /* Determine path MTU. */ - if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu, - &alwaysfrag, inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m))) != 0) + if ((error = ip6_getpmtu(ro_pmtu, ro != ro_pmtu, ifp, &ip6->ip6_dst, + &mtu, &alwaysfrag, fibnum, *nexthdrp)) != 0) goto bad; /* @@ -887,8 +797,10 @@ again: error = pfil_run_hooks(&V_inet6_pfil_hook, &m, ifp, PFIL_OUT, inp); if (error != 0 || m == NULL) goto done; + /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); + needfiblookup = 0; /* See if destination IP address was changed by packet filter. */ if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) { m->m_flags |= M_SKIP_FIREWALL; @@ -908,9 +820,20 @@ again: #endif error = netisr_queue(NETISR_IPV6, m); goto done; - } else - goto again; /* Redo the routing table lookup. */ + } else { + RO_RTFREE(ro); + needfiblookup = 1; /* Redo the routing table lookup. */ + } } + /* See if fib was changed by packet filter. */ + if (fibnum != M_GETFIB(m)) { + m->m_flags |= M_SKIP_FIREWALL; + fibnum = M_GETFIB(m); + RO_RTFREE(ro); + needfiblookup = 1; + } + if (needfiblookup) + goto again; /* See if local, if yes, send it to netisr. */ if (m->m_flags & M_FASTFWD_OURS) { @@ -1018,11 +941,13 @@ passout: ia6 = in6_ifawithifp(ifp, &ip6->ip6_src); if (ia6) { /* Record statistics for this interface address. */ - ia6->ia_ifa.if_opackets++; - ia6->ia_ifa.if_obytes += m->m_pkthdr.len; + counter_u64_add(ia6->ia_ifa.ifa_opackets, 1); + counter_u64_add(ia6->ia_ifa.ifa_obytes, + m->m_pkthdr.len); ifa_free(&ia6->ia_ifa); } - error = nd6_output(ifp, origifp, m, dst, ro->ro_rt); + error = nd6_output_ifp(ifp, origifp, m, dst, + (struct route *)ro); goto done; } @@ -1040,13 +965,8 @@ passout: in6_ifstat_inc(ifp, ifs6_out_fragfail); goto bad; } else { - struct mbuf **mnext, *m_frgpart; - struct ip6_frag *ip6f; - u_int32_t id = htonl(ip6_randomid()); u_char nextproto; - int qslots = ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len; - /* * Too large for the destination or interface; * fragment if possible. @@ -1064,18 +984,6 @@ passout: } /* - * Verify that we have any chance at all of being able to queue - * the packet or packet fragments - */ - if (qslots <= 0 || ((u_int)qslots * (mtu - hlen) - < tlen /* - hlen */)) { - error = ENOBUFS; - IP6STAT_INC(ip6s_odropped); - goto bad; - } - - - /* * If the interface will not calculate checksums on * fragmented packets, then do it here. * XXX-BZ handle the hw offloading case. Need flags. @@ -1090,8 +998,6 @@ passout: m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6; } #endif - mnext = &m->m_nextpkt; - /* * Change the next header field of the last header in the * unfragmentable part. @@ -1116,47 +1022,9 @@ passout: * chain. */ m0 = m; - for (off = hlen; off < tlen; off += len) { - MGETHDR(m, M_DONTWAIT, MT_HEADER); - if (!m) { - error = ENOBUFS; - IP6STAT_INC(ip6s_odropped); - goto sendorfree; - } - m->m_pkthdr.rcvif = NULL; - m->m_flags = m0->m_flags & M_COPYFLAGS; /* incl. FIB */ - *mnext = m; - mnext = &m->m_nextpkt; - m->m_data += max_linkhdr; - mhip6 = mtod(m, struct ip6_hdr *); - *mhip6 = *ip6; - m->m_len = sizeof(*mhip6); - error = ip6_insertfraghdr(m0, m, hlen, &ip6f); - if (error) { - IP6STAT_INC(ip6s_odropped); - goto sendorfree; - } - ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7)); - if (off + len >= tlen) - len = tlen - off; - else - ip6f->ip6f_offlg |= IP6F_MORE_FRAG; - mhip6->ip6_plen = htons((u_short)(len + hlen + - sizeof(*ip6f) - sizeof(struct ip6_hdr))); - if ((m_frgpart = m_copy(m0, off, len)) == 0) { - error = ENOBUFS; - IP6STAT_INC(ip6s_odropped); - goto sendorfree; - } - m_cat(m, m_frgpart); - m->m_pkthdr.len = len + hlen + sizeof(*ip6f); - m->m_pkthdr.rcvif = NULL; - ip6f->ip6f_reserved = 0; - ip6f->ip6f_ident = id; - ip6f->ip6f_nxt = nextproto; - IP6STAT_INC(ip6s_ofragments); - in6_ifstat_inc(ifp, ifs6_out_fragcreat); - } + id = htonl(ip6_randomid()); + if ((error = ip6_fragment(ifp, m, hlen, nextproto, len, id))) + goto sendorfree; in6_ifstat_inc(ifp, ifs6_out_fragok); } @@ -1174,10 +1042,12 @@ sendorfree: if (error == 0) { /* Record statistics for this interface address. */ if (ia) { - ia->ia_ifa.if_opackets++; - ia->ia_ifa.if_obytes += m->m_pkthdr.len; + counter_u64_add(ia->ia_ifa.ifa_opackets, 1); + counter_u64_add(ia->ia_ifa.ifa_obytes, + m->m_pkthdr.len); } - error = nd6_output(ifp, origifp, m, dst, ro->ro_rt); + error = nd6_output_ifp(ifp, origifp, m, dst, + (struct route *)ro); } else m_freem(m); } @@ -1186,15 +1056,13 @@ sendorfree: IP6STAT_INC(ip6s_fragmented); done: - if (ro == &ip6route) + /* + * Release the route if using our private route, or if + * (with flowtable) we don't have our own reference. + */ + if (ro == &ip6route || + (ro != NULL && ro->ro_flags & RT_NORTREF)) RO_RTFREE(ro); - if (ro_pmtu == &ip6route) - RO_RTFREE(ro_pmtu); -#ifdef IPSEC - if (sp != NULL) - KEY_FREESP(&sp); -#endif - return (error); freehdrs: @@ -1217,17 +1085,12 @@ ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen) if (hlen > MCLBYTES) return (ENOBUFS); /* XXX */ - MGET(m, M_DONTWAIT, MT_DATA); - if (!m) + if (hlen > MLEN) + m = m_getcl(M_NOWAIT, MT_DATA, 0); + else + m = m_get(M_NOWAIT, MT_DATA); + if (m == NULL) return (ENOBUFS); - - if (hlen > MLEN) { - MCLGET(m, M_DONTWAIT); - if ((m->m_flags & M_EXT) == 0) { - m_free(m); - return (ENOBUFS); - } - } m->m_len = hlen; if (hdr) bcopy(hdr, mtod(m, caddr_t), hlen); @@ -1254,9 +1117,9 @@ ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen) * jumbo payload option, allocate a cluster to store the whole options. * Otherwise, use it to store the options. */ - if (exthdrs->ip6e_hbh == 0) { - MGET(mopt, M_DONTWAIT, MT_DATA); - if (mopt == 0) + if (exthdrs->ip6e_hbh == NULL) { + mopt = m_get(M_NOWAIT, MT_DATA); + if (mopt == NULL) return (ENOBUFS); mopt->m_len = JUMBOOPTLEN; optbuf = mtod(mopt, u_char *); @@ -1287,15 +1150,8 @@ ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen) * As a consequence, we must always prepare a cluster * at this point. */ - MGET(n, M_DONTWAIT, MT_DATA); - if (n) { - MCLGET(n, M_DONTWAIT); - if ((n->m_flags & M_EXT) == 0) { - m_freem(n); - n = NULL; - } - } - if (!n) + n = m_getcl(M_NOWAIT, MT_DATA, 0); + if (n == NULL) return (ENOBUFS); n->m_len = oldoptlen + JUMBOOPTLEN; bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t), @@ -1342,8 +1198,8 @@ ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen, if (hlen > sizeof(struct ip6_hdr)) { n = m_copym(m0, sizeof(struct ip6_hdr), - hlen - sizeof(struct ip6_hdr), M_DONTWAIT); - if (n == 0) + hlen - sizeof(struct ip6_hdr), M_NOWAIT); + if (n == NULL) return (ENOBUFS); m->m_next = n; } else @@ -1353,7 +1209,7 @@ ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen, for (mlast = n; mlast->m_next; mlast = mlast->m_next) ; - if ((mlast->m_flags & M_EXT) == 0 && + if (M_WRITABLE(mlast) && M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) { /* use the trailing space of the last mbuf for the fragment hdr */ *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) + @@ -1364,8 +1220,8 @@ ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen, /* allocate a new mbuf for the fragment header */ struct mbuf *mfrg; - MGET(mfrg, M_DONTWAIT, MT_DATA); - if (mfrg == 0) + mfrg = m_get(M_NOWAIT, MT_DATA); + if (mfrg == NULL) return (ENOBUFS); mfrg->m_len = sizeof(struct ip6_frag); *frghdrp = mtod(mfrg, struct ip6_frag *); @@ -1375,35 +1231,105 @@ ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen, return (0); } +/* + * Calculates IPv6 path mtu for destination @dst. + * Resulting MTU is stored in @mtup. + * + * Returns 0 on success. + */ static int -ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro, - struct ifnet *ifp, struct in6_addr *dst, u_long *mtup, - int *alwaysfragp, u_int fibnum) +ip6_getpmtu_ctl(u_int fibnum, const struct in6_addr *dst, u_long *mtup) { - u_int32_t mtu = 0; - int alwaysfrag = 0; - int error = 0; + struct nhop6_extended nh6; + struct in6_addr kdst; + uint32_t scopeid; + struct ifnet *ifp; + u_long mtu; + int error; - if (ro_pmtu != ro) { - /* The first hop and the final destination may differ. */ - struct sockaddr_in6 *sa6_dst = - (struct sockaddr_in6 *)&ro_pmtu->ro_dst; - if (ro_pmtu->ro_rt && - ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 || - !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) { - RTFREE(ro_pmtu->ro_rt); - ro_pmtu->ro_rt = (struct rtentry *)NULL; - } - if (ro_pmtu->ro_rt == NULL) { + in6_splitscope(dst, &kdst, &scopeid); + if (fib6_lookup_nh_ext(fibnum, &kdst, scopeid, NHR_REF, 0, &nh6) != 0) + return (EHOSTUNREACH); + + ifp = nh6.nh_ifp; + mtu = nh6.nh_mtu; + + error = ip6_calcmtu(ifp, dst, mtu, mtup, NULL, 0); + fib6_free_nh_ext(fibnum, &nh6); + + return (error); +} + +/* + * Calculates IPv6 path MTU for @dst based on transmit @ifp, + * and cached data in @ro_pmtu. + * MTU from (successful) route lookup is saved (along with dst) + * inside @ro_pmtu to avoid subsequent route lookups after packet + * filter processing. + * + * Stores mtu and always-frag value into @mtup and @alwaysfragp. + * Returns 0 on success. + */ +static int +ip6_getpmtu(struct route_in6 *ro_pmtu, int do_lookup, + struct ifnet *ifp, const struct in6_addr *dst, u_long *mtup, + int *alwaysfragp, u_int fibnum, u_int proto) +{ + struct nhop6_basic nh6; + struct in6_addr kdst; + uint32_t scopeid; + struct sockaddr_in6 *sa6_dst; + u_long mtu; + + mtu = 0; + if (do_lookup) { + + /* + * Here ro_pmtu has final destination address, while + * ro might represent immediate destination. + * Use ro_pmtu destination since mtu might differ. + */ + sa6_dst = (struct sockaddr_in6 *)&ro_pmtu->ro_dst; + if (!IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst)) + ro_pmtu->ro_mtu = 0; + + if (ro_pmtu->ro_mtu == 0) { bzero(sa6_dst, sizeof(*sa6_dst)); sa6_dst->sin6_family = AF_INET6; sa6_dst->sin6_len = sizeof(struct sockaddr_in6); sa6_dst->sin6_addr = *dst; - in6_rtalloc(ro_pmtu, fibnum); + in6_splitscope(dst, &kdst, &scopeid); + if (fib6_lookup_nh_basic(fibnum, &kdst, scopeid, 0, 0, + &nh6) == 0) + ro_pmtu->ro_mtu = nh6.nh_mtu; } + + mtu = ro_pmtu->ro_mtu; } - if (ro_pmtu->ro_rt) { + + if (ro_pmtu->ro_rt) + mtu = ro_pmtu->ro_rt->rt_mtu; + + return (ip6_calcmtu(ifp, dst, mtu, mtup, alwaysfragp, proto)); +} + +/* + * Calculate MTU based on transmit @ifp, route mtu @rt_mtu and + * hostcache data for @dst. + * Stores mtu and always-frag value into @mtup and @alwaysfragp. + * + * Returns 0 on success. + */ +static int +ip6_calcmtu(struct ifnet *ifp, const struct in6_addr *dst, u_long rt_mtu, + u_long *mtup, int *alwaysfragp, u_int proto) +{ + u_long mtu = 0; + int alwaysfrag = 0; + int error = 0; + + if (rt_mtu > 0) { u_int32_t ifmtu; struct in_conninfo inc; @@ -1411,14 +1337,16 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro, inc.inc_flags |= INC_ISIPV6; inc.inc6_faddr = *dst; - if (ifp == NULL) - ifp = ro_pmtu->ro_rt->rt_ifp; ifmtu = IN6_LINKMTU(ifp); - mtu = tcp_hc_getmtu(&inc); + + /* TCP is known to react to pmtu changes so skip hc */ + if (proto != IPPROTO_TCP) + mtu = tcp_hc_getmtu(&inc); + if (mtu) - mtu = min(mtu, ro_pmtu->ro_rt->rt_rmx.rmx_mtu); + mtu = min(mtu, rt_mtu); else - mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu; + mtu = rt_mtu; if (mtu == 0) mtu = ifmtu; else if (mtu < IPV6_MMTU) { @@ -1432,17 +1360,6 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro, */ alwaysfrag = 1; mtu = IPV6_MMTU; - } else if (mtu > ifmtu) { - /* - * The MTU on the route is larger than the MTU on - * the interface! This shouldn't happen, unless the - * MTU of the interface has been changed after the - * interface was brought up. Change the MTU in the - * route to match the interface MTU (as long as the - * field isn't locked). - */ - mtu = ifmtu; - ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; } } else if (ifp) { mtu = IN6_LINKMTU(ifp); @@ -1468,6 +1385,10 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt) int level, op, optname; int optlen; struct thread *td; +#ifdef RSS + uint32_t rss_bucket; + int retval; +#endif level = sopt->sopt_level; op = sopt->sopt_dir; @@ -1561,16 +1482,23 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt) /* FALLTHROUGH */ case IPV6_UNICAST_HOPS: case IPV6_HOPLIMIT: - case IPV6_FAITH: case IPV6_RECVPKTINFO: case IPV6_RECVHOPLIMIT: case IPV6_RECVRTHDR: case IPV6_RECVPATHMTU: case IPV6_RECVTCLASS: + case IPV6_RECVFLOWID: +#ifdef RSS + case IPV6_RECVRSSBUCKETID: +#endif case IPV6_V6ONLY: case IPV6_AUTOFLOWLABEL: case IPV6_BINDANY: + case IPV6_BINDMULTI: +#ifdef RSS + case IPV6_RSS_LISTEN_BUCKET: +#endif if (optname == IPV6_BINDANY && td != NULL) { error = priv_check(td, PRIV_NETINET_BINDANY); @@ -1620,6 +1548,16 @@ do { \ } while (/*CONSTCOND*/ 0) #define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0) +#define OPTSET2(bit, val) do { \ + INP_WLOCK(in6p); \ + if (val) \ + in6p->inp_flags2 |= bit; \ + else \ + in6p->inp_flags2 &= ~bit; \ + INP_WUNLOCK(in6p); \ +} while (0) +#define OPTBIT2(bit) (in6p->inp_flags2 & (bit) ? 1 : 0) + case IPV6_RECVPKTINFO: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { @@ -1691,10 +1629,6 @@ do { \ OPTSET(IN6P_RTHDR); break; - case IPV6_FAITH: - OPTSET(INP_FAITH); - break; - case IPV6_RECVPATHMTU: /* * We ignore this option for TCP @@ -1706,6 +1640,16 @@ do { \ OPTSET(IN6P_MTU); break; + case IPV6_RECVFLOWID: + OPTSET2(INP_RECVFLOWID, optval); + break; + +#ifdef RSS + case IPV6_RECVRSSBUCKETID: + OPTSET2(INP_RECVRSSBUCKETID, optval); + break; +#endif + case IPV6_V6ONLY: /* * make setsockopt(IPV6_V6ONLY) @@ -1738,6 +1682,21 @@ do { \ case IPV6_BINDANY: OPTSET(INP_BINDANY); break; + + case IPV6_BINDMULTI: + OPTSET2(INP_BINDMULTI, optval); + break; +#ifdef RSS + case IPV6_RSS_LISTEN_BUCKET: + if ((optval >= 0) && + (optval < rss_getnumbuckets())) { + in6p->inp_rss_listen_bucket = optval; + OPTSET2(INP_RSS_BUCKET_SET, 1); + } else { + error = EINVAL; + } + break; +#endif } break; @@ -1947,12 +1906,19 @@ do { \ case IPV6_RECVRTHDR: case IPV6_RECVPATHMTU: - case IPV6_FAITH: case IPV6_V6ONLY: case IPV6_PORTRANGE: case IPV6_RECVTCLASS: case IPV6_AUTOFLOWLABEL: case IPV6_BINDANY: + case IPV6_FLOWID: + case IPV6_FLOWTYPE: + case IPV6_RECVFLOWID: +#ifdef RSS + case IPV6_RSSBUCKETID: + case IPV6_RECVRSSBUCKETID: +#endif + case IPV6_BINDMULTI: switch (optname) { case IPV6_RECVHOPOPTS: @@ -1987,10 +1953,6 @@ do { \ optval = OPTBIT(IN6P_MTU); break; - case IPV6_FAITH: - optval = OPTBIT(INP_FAITH); - break; - case IPV6_V6ONLY: optval = OPTBIT(IN6P_IPV6_V6ONLY); break; @@ -2018,6 +1980,39 @@ do { \ case IPV6_BINDANY: optval = OPTBIT(INP_BINDANY); break; + + case IPV6_FLOWID: + optval = in6p->inp_flowid; + break; + + case IPV6_FLOWTYPE: + optval = in6p->inp_flowtype; + break; + + case IPV6_RECVFLOWID: + optval = OPTBIT2(INP_RECVFLOWID); + break; +#ifdef RSS + case IPV6_RSSBUCKETID: + retval = + rss_hash2bucket(in6p->inp_flowid, + in6p->inp_flowtype, + &rss_bucket); + if (retval == 0) + optval = rss_bucket; + else + error = EINVAL; + break; + + case IPV6_RECVRSSBUCKETID: + optval = OPTBIT2(INP_RECVRSSBUCKETID); + break; +#endif + + case IPV6_BINDMULTI: + optval = OPTBIT2(INP_BINDMULTI); + break; + } if (error) break; @@ -2029,9 +2024,6 @@ do { \ { u_long pmtu = 0; struct ip6_mtuinfo mtuinfo; - struct route_in6 sro; - - bzero(&sro, sizeof(sro)); if (!(so->so_state & SS_ISCONNECTED)) return (ENOTCONN); @@ -2040,11 +2032,8 @@ do { \ * routing, or optional information to specify * the outgoing interface. */ - error = ip6_getpmtu(&sro, NULL, NULL, - &in6p->in6p_faddr, &pmtu, NULL, - so->so_fibnum); - if (sro.ro_rt) - RTFREE(sro.ro_rt); + error = ip6_getpmtu_ctl(so->so_fibnum, + &in6p->in6p_faddr, &pmtu); if (error) break; if (pmtu > IPV6_MAXPACKET) @@ -2307,12 +2296,14 @@ ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt) switch (optname) { case IPV6_PKTINFO: - if (pktopt && pktopt->ip6po_pktinfo) - optdata = (void *)pktopt->ip6po_pktinfo; - else { + optdata = (void *)&null_pktinfo; + if (pktopt && pktopt->ip6po_pktinfo) { + bcopy(pktopt->ip6po_pktinfo, &null_pktinfo, + sizeof(null_pktinfo)); + in6_clearscope(&null_pktinfo.ipi6_addr); + } else { /* XXX: we don't have to do this every time... */ bzero(&null_pktinfo, sizeof(null_pktinfo)); - optdata = (void *)&null_pktinfo; } optdatalen = sizeof(struct in6_pktinfo); break; @@ -2529,7 +2520,7 @@ int ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt, struct ip6_pktopts *stickyopt, struct ucred *cred, int uproto) { - struct cmsghdr *cm = 0; + struct cmsghdr *cm = NULL; if (control == NULL || opt == NULL) return (EINVAL); @@ -2666,18 +2657,30 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) { return (EINVAL); } - + if (IN6_IS_ADDR_MULTICAST(&pktinfo->ipi6_addr)) + return (EINVAL); /* validate the interface index if specified. */ - if (pktinfo->ipi6_ifindex > V_if_index || - pktinfo->ipi6_ifindex < 0) { + if (pktinfo->ipi6_ifindex > V_if_index) return (ENXIO); - } if (pktinfo->ipi6_ifindex) { ifp = ifnet_byindex(pktinfo->ipi6_ifindex); if (ifp == NULL) return (ENXIO); } - + if (ifp != NULL && (ifp->if_afdata[AF_INET6] == NULL || + (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) != 0)) + return (ENETDOWN); + + if (ifp != NULL && + !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) { + struct in6_ifaddr *ia; + + in6_setscope(&pktinfo->ipi6_addr, ifp, NULL); + ia = in6ifa_ifpwithaddr(ifp, &pktinfo->ipi6_addr); + if (ia == NULL) + return (EADDRNOTAVAIL); + ifa_free(&ia->ia_ifa); + } /* * We store the address anyway, and let in6_selectsrc() * validate the specified address. This is because ipi6_addr @@ -2987,7 +2990,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, * pointer that might NOT be &loif -- easier than replicating that code here. */ void -ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst) +ip6_mloopback(struct ifnet *ifp, struct mbuf *m) { struct mbuf *copym; struct ip6_hdr *ip6; @@ -3001,20 +3004,12 @@ ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst) * is in an mbuf cluster, so that we can safely override the IPv6 * header portion later. */ - if ((copym->m_flags & M_EXT) != 0 || + if (!M_WRITABLE(copym) || copym->m_len < sizeof(struct ip6_hdr)) { copym = m_pullup(copym, sizeof(struct ip6_hdr)); if (copym == NULL) return; } - -#ifdef DIAGNOSTIC - if (copym->m_len < sizeof(*ip6)) { - m_freem(copym); - return; - } -#endif - ip6 = mtod(copym, struct ip6_hdr *); /* * clear embedded scope identifiers if necessary. @@ -3022,8 +3017,12 @@ ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst) */ in6_clearscope(&ip6->ip6_src); in6_clearscope(&ip6->ip6_dst); - - (void)if_simloop(ifp, copym, dst->sin6_family, 0); + if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { + copym->m_pkthdr.csum_flags |= CSUM_DATA_VALID_IPV6 | + CSUM_PSEUDO_HDR; + copym->m_pkthdr.csum_data = 0xffff; + } + if_simloop(ifp, copym, AF_INET6, 0); } /* @@ -3037,13 +3036,13 @@ ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs) ip6 = mtod(m, struct ip6_hdr *); if (m->m_len > sizeof(*ip6)) { - MGETHDR(mh, M_DONTWAIT, MT_HEADER); - if (mh == 0) { + mh = m_gethdr(M_NOWAIT, MT_DATA); + if (mh == NULL) { m_freem(m); return ENOBUFS; } - M_MOVE_PKTHDR(mh, m); - MH_ALIGN(mh, sizeof(*ip6)); + m_move_pkthdr(mh, m); + M_ALIGN(mh, sizeof(*ip6)); m->m_len -= sizeof(*ip6); m->m_data += sizeof(*ip6); mh->m_next = m; diff --git a/freebsd/sys/netinet6/ip6_var.h b/freebsd/sys/netinet6/ip6_var.h index 4e8c42bd..e52a3206 100644 --- a/freebsd/sys/netinet6/ip6_var.h +++ b/freebsd/sys/netinet6/ip6_var.h @@ -99,6 +99,14 @@ struct ip6asfrag { #define IP6_REASS_MBUF(ip6af) (*(struct mbuf **)&((ip6af)->ip6af_m)) /* + * IP6 reinjecting structure. + */ +struct ip6_direct_ctx { + uint32_t ip6dc_nxt; /* next header to process */ + uint32_t ip6dc_off; /* offset to next header */ +}; + +/* * Structure attached to inpcb.in6p_moptions and * passed to ip6_output when IPv6 multicast options are in use. * This structure is lazy-allocated. @@ -181,39 +189,39 @@ struct ip6_pktopts { */ struct ip6stat { - u_quad_t ip6s_total; /* total packets received */ - u_quad_t ip6s_tooshort; /* packet too short */ - u_quad_t ip6s_toosmall; /* not enough data */ - u_quad_t ip6s_fragments; /* fragments received */ - u_quad_t ip6s_fragdropped; /* frags dropped(dups, out of space) */ - u_quad_t ip6s_fragtimeout; /* fragments timed out */ - u_quad_t ip6s_fragoverflow; /* fragments that exceeded limit */ - u_quad_t ip6s_forward; /* packets forwarded */ - u_quad_t ip6s_cantforward; /* packets rcvd for unreachable dest */ - u_quad_t ip6s_redirectsent; /* packets forwarded on same net */ - u_quad_t ip6s_delivered; /* datagrams delivered to upper level*/ - u_quad_t ip6s_localout; /* total ip packets generated here */ - u_quad_t ip6s_odropped; /* lost packets due to nobufs, etc. */ - u_quad_t ip6s_reassembled; /* total packets reassembled ok */ - u_quad_t ip6s_fragmented; /* datagrams successfully fragmented */ - u_quad_t ip6s_ofragments; /* output fragments created */ - u_quad_t ip6s_cantfrag; /* don't fragment flag was set, etc. */ - u_quad_t ip6s_badoptions; /* error in option processing */ - u_quad_t ip6s_noroute; /* packets discarded due to no route */ - u_quad_t ip6s_badvers; /* ip6 version != 6 */ - u_quad_t ip6s_rawout; /* total raw ip packets generated */ - u_quad_t ip6s_badscope; /* scope error */ - u_quad_t ip6s_notmember; /* don't join this multicast group */ + uint64_t ip6s_total; /* total packets received */ + uint64_t ip6s_tooshort; /* packet too short */ + uint64_t ip6s_toosmall; /* not enough data */ + uint64_t ip6s_fragments; /* fragments received */ + uint64_t ip6s_fragdropped; /* frags dropped(dups, out of space) */ + uint64_t ip6s_fragtimeout; /* fragments timed out */ + uint64_t ip6s_fragoverflow; /* fragments that exceeded limit */ + uint64_t ip6s_forward; /* packets forwarded */ + uint64_t ip6s_cantforward; /* packets rcvd for unreachable dest */ + uint64_t ip6s_redirectsent; /* packets forwarded on same net */ + uint64_t ip6s_delivered; /* datagrams delivered to upper level*/ + uint64_t ip6s_localout; /* total ip packets generated here */ + uint64_t ip6s_odropped; /* lost packets due to nobufs, etc. */ + uint64_t ip6s_reassembled; /* total packets reassembled ok */ + uint64_t ip6s_fragmented; /* datagrams successfully fragmented */ + uint64_t ip6s_ofragments; /* output fragments created */ + uint64_t ip6s_cantfrag; /* don't fragment flag was set, etc. */ + uint64_t ip6s_badoptions; /* error in option processing */ + uint64_t ip6s_noroute; /* packets discarded due to no route */ + uint64_t ip6s_badvers; /* ip6 version != 6 */ + uint64_t ip6s_rawout; /* total raw ip packets generated */ + uint64_t ip6s_badscope; /* scope error */ + uint64_t ip6s_notmember; /* don't join this multicast group */ #define IP6S_HDRCNT 256 /* headers count */ - u_quad_t ip6s_nxthist[IP6S_HDRCNT]; /* next header history */ - u_quad_t ip6s_m1; /* one mbuf */ + uint64_t ip6s_nxthist[IP6S_HDRCNT]; /* next header history */ + uint64_t ip6s_m1; /* one mbuf */ #define IP6S_M2MMAX 32 - u_quad_t ip6s_m2m[IP6S_M2MMAX]; /* two or more mbuf */ - u_quad_t ip6s_mext1; /* one ext mbuf */ - u_quad_t ip6s_mext2m; /* two or more ext mbuf */ - u_quad_t ip6s_exthdrtoolong; /* ext hdr are not contiguous */ - u_quad_t ip6s_nogif; /* no match gif found */ - u_quad_t ip6s_toomanyhdr; /* discarded due to too many headers */ + uint64_t ip6s_m2m[IP6S_M2MMAX]; /* two or more mbuf */ + uint64_t ip6s_mext1; /* one ext mbuf */ + uint64_t ip6s_mext2m; /* two or more ext mbuf */ + uint64_t ip6s_exthdrtoolong; /* ext hdr are not contiguous */ + uint64_t ip6s_nogif; /* no match gif found */ + uint64_t ip6s_toomanyhdr; /* discarded due to too many headers */ /* * statistics for improvement of the source address selection @@ -223,81 +231,51 @@ struct ip6stat { #define IP6S_RULESMAX 16 #define IP6S_SCOPECNT 16 /* number of times that address selection fails */ - u_quad_t ip6s_sources_none; + uint64_t ip6s_sources_none; /* number of times that an address on the outgoing I/F is chosen */ - u_quad_t ip6s_sources_sameif[IP6S_SCOPECNT]; + uint64_t ip6s_sources_sameif[IP6S_SCOPECNT]; /* number of times that an address on a non-outgoing I/F is chosen */ - u_quad_t ip6s_sources_otherif[IP6S_SCOPECNT]; + uint64_t ip6s_sources_otherif[IP6S_SCOPECNT]; /* * number of times that an address that has the same scope * from the destination is chosen. */ - u_quad_t ip6s_sources_samescope[IP6S_SCOPECNT]; + uint64_t ip6s_sources_samescope[IP6S_SCOPECNT]; /* * number of times that an address that has a different scope * from the destination is chosen. */ - u_quad_t ip6s_sources_otherscope[IP6S_SCOPECNT]; + uint64_t ip6s_sources_otherscope[IP6S_SCOPECNT]; /* number of times that a deprecated address is chosen */ - u_quad_t ip6s_sources_deprecated[IP6S_SCOPECNT]; + uint64_t ip6s_sources_deprecated[IP6S_SCOPECNT]; /* number of times that each rule of source selection is applied. */ - u_quad_t ip6s_sources_rule[IP6S_RULESMAX]; + uint64_t ip6s_sources_rule[IP6S_RULESMAX]; }; #ifdef _KERNEL -#define IP6STAT_ADD(name, val) V_ip6stat.name += (val) -#define IP6STAT_SUB(name, val) V_ip6stat.name -= (val) +#include <sys/counter.h> + +VNET_PCPUSTAT_DECLARE(struct ip6stat, ip6stat); +#define IP6STAT_ADD(name, val) \ + VNET_PCPUSTAT_ADD(struct ip6stat, ip6stat, name, (val)) +#define IP6STAT_SUB(name, val) IP6STAT_ADD(name, -(val)) #define IP6STAT_INC(name) IP6STAT_ADD(name, 1) #define IP6STAT_DEC(name) IP6STAT_SUB(name, 1) #endif #ifdef _KERNEL -/* - * IPv6 onion peeling state. - * it will be initialized when we come into ip6_input(). - * XXX do not make it a kitchen sink! - */ -struct ip6aux { - u_int32_t ip6a_flags; -#define IP6A_SWAP 0x01 /* swapped home/care-of on packet */ -#define IP6A_HASEEN 0x02 /* HA was present */ -#define IP6A_BRUID 0x04 /* BR Unique Identifier was present */ -#define IP6A_RTALERTSEEN 0x08 /* rtalert present */ - - /* ip6.ip6_src */ - struct in6_addr ip6a_careof; /* care-of address of the peer */ - struct in6_addr ip6a_home; /* home address of the peer */ - u_int16_t ip6a_bruid; /* BR unique identifier */ - - /* ip6.ip6_dst */ - struct in6_ifaddr *ip6a_dstia6; /* my ifaddr that matches ip6_dst */ - - /* rtalert */ - u_int16_t ip6a_rtalert; /* rtalert option value */ - - /* - * decapsulation history will be here. - * with IPsec it may not be accurate. - */ -}; -#endif - -#ifdef _KERNEL /* flags passed to ip6_output as last parameter */ #define IPV6_UNSPECSRC 0x01 /* allow :: as the source address */ #define IPV6_FORWARDING 0x02 /* most of IPv6 header exists */ #define IPV6_MINMTU 0x04 /* use minimum MTU (IPV6_USE_MIN_MTU) */ -#define M_IP6_NEXTHOP M_PROTO7 /* explicit ip nexthop */ - #ifdef __NO_STRICT_ALIGNMENT #define IP6_HDR_ALIGNED_P(ip) 1 #else #define IP6_HDR_ALIGNED_P(ip) ((((intptr_t) (ip)) & 3) == 0) #endif -VNET_DECLARE(struct ip6stat, ip6stat); /* statistics */ VNET_DECLARE(int, ip6_defhlim); /* default hop limit */ VNET_DECLARE(int, ip6_defmcasthlim); /* default multicast hop limit */ VNET_DECLARE(int, ip6_forwarding); /* act as router? */ @@ -306,7 +284,6 @@ VNET_DECLARE(int, ip6_rr_prune); /* router renumbering prefix * walk list every 5 sec. */ VNET_DECLARE(int, ip6_mcast_pmtu); /* enable pMTU discovery for multicast? */ VNET_DECLARE(int, ip6_v6only); -#define V_ip6stat VNET(ip6stat) #define V_ip6_defhlim VNET(ip6_defhlim) #define V_ip6_defmcasthlim VNET(ip6_defmcasthlim) #define V_ip6_forwarding VNET(ip6_forwarding) @@ -327,7 +304,6 @@ VNET_DECLARE(int, ip6_norbit_raif); /* Disable R-bit in NA on RA * receiving IF. */ VNET_DECLARE(int, ip6_rfc6204w3); /* Accept defroute from RA even when forwarding enabled */ -VNET_DECLARE(int, ip6_keepfaith); /* Firewall Aided Internet Translator */ VNET_DECLARE(int, ip6_log_interval); VNET_DECLARE(time_t, ip6_log_time); VNET_DECLARE(int, ip6_hdrnestlimit); /* upper limit of # of extension @@ -341,7 +317,6 @@ VNET_DECLARE(int, ip6_dad_count); /* DupAddrDetectionTransmits */ #define V_ip6_no_radr VNET(ip6_no_radr) #define V_ip6_norbit_raif VNET(ip6_norbit_raif) #define V_ip6_rfc6204w3 VNET(ip6_rfc6204w3) -#define V_ip6_keepfaith VNET(ip6_keepfaith) #define V_ip6_log_interval VNET(ip6_log_interval) #define V_ip6_log_time VNET(ip6_log_time) #define V_ip6_hdrnestlimit VNET(ip6_hdrnestlimit) @@ -379,24 +354,17 @@ int icmp6_ctloutput(struct socket *, struct sockopt *sopt); struct in6_ifaddr; void ip6_init(void); -#ifdef VIMAGE -void ip6_destroy(void); -#endif int ip6proto_register(short); int ip6proto_unregister(short); void ip6_input(struct mbuf *); -struct in6_ifaddr *ip6_getdstifaddr(struct mbuf *); +void ip6_direct_input(struct mbuf *); void ip6_freepcbopts(struct ip6_pktopts *); int ip6_unknown_opt(u_int8_t *, struct mbuf *, int); -char * ip6_get_prevhdr(struct mbuf *, int); -int ip6_nexthdr(struct mbuf *, int, int, int *); -int ip6_lasthdr(struct mbuf *, int, int, int *); - -#ifdef __notyet__ -struct ip6aux *ip6_findaux(struct mbuf *); -#endif +char * ip6_get_prevhdr(const struct mbuf *, int); +int ip6_nexthdr(const struct mbuf *, int, int, int *); +int ip6_lasthdr(const struct mbuf *, int, int, int *); extern int (*ip6_mforward)(struct ip6_hdr *, struct ifnet *, struct mbuf *); @@ -411,7 +379,7 @@ int ip6_sysctl(int *, u_int, void *, size_t *, void *, size_t); void ip6_forward(struct mbuf *, int); -void ip6_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in6 *); +void ip6_mloopback(struct ifnet *, struct mbuf *); int ip6_output(struct mbuf *, struct ip6_pktopts *, struct route_in6 *, int, @@ -425,6 +393,9 @@ int ip6_setpktopts(struct mbuf *, struct ip6_pktopts *, void ip6_clearpktopts(struct ip6_pktopts *, int); struct ip6_pktopts *ip6_copypktopts(struct ip6_pktopts *, int); int ip6_optlen(struct inpcb *); +int ip6_deletefraghdr(struct mbuf *, int, int); +int ip6_fragment(struct ifnet *, struct mbuf *, int, u_char, int, + uint32_t); int route6_input(struct mbuf **, int *, int); @@ -437,16 +408,17 @@ void rip6_init(void); int rip6_input(struct mbuf **, int *, int); void rip6_ctlinput(int, struct sockaddr *, void *); int rip6_ctloutput(struct socket *, struct sockopt *); -int rip6_output(struct mbuf *, ...); +int rip6_output(struct mbuf *, struct socket *, ...); int rip6_usrreq(struct socket *, int, struct mbuf *, struct mbuf *, struct mbuf *, struct thread *); int dest6_input(struct mbuf **, int *, int); int none_input(struct mbuf **, int *, int); -int in6_selectsrc(struct sockaddr_in6 *, struct ip6_pktopts *, - struct inpcb *inp, struct route_in6 *, struct ucred *cred, - struct ifnet **, struct in6_addr *); +int in6_selectsrc_socket(struct sockaddr_in6 *, struct ip6_pktopts *, + struct inpcb *, struct ucred *, int, struct in6_addr *, int *); +int in6_selectsrc_addr(uint32_t, const struct in6_addr *, + uint32_t, struct ifnet *, struct in6_addr *, int *); int in6_selectroute(struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *, struct ifnet **, struct rtentry **); @@ -455,6 +427,7 @@ int in6_selectroute_fib(struct sockaddr_in6 *, struct ip6_pktopts *, struct rtentry **, u_int); u_int32_t ip6_randomid(void); u_int32_t ip6_randomflowlabel(void); +void in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset); #endif /* _KERNEL */ #endif /* !_NETINET6_IP6_VAR_H_ */ diff --git a/freebsd/sys/netinet6/ip6protosw.h b/freebsd/sys/netinet6/ip6protosw.h index ec802a51..9e80a698 100644 --- a/freebsd/sys/netinet6/ip6protosw.h +++ b/freebsd/sys/netinet6/ip6protosw.h @@ -92,7 +92,7 @@ struct pr_usrreqs; * * ip6c_finaldst usually points to ip6c_ip6->ip6_dst. if the original * (internal) packet carries a routing header, it may point the final - * dstination address in the routing header. + * destination address in the routing header. * * ip6c_src: ip6c_ip6->ip6_src + scope info + flowlabel in ip6c_ip6 * (beware of flowlabel, if you try to compare it against others) @@ -110,39 +110,8 @@ struct ip6ctlparam { u_int8_t ip6c_nxt; /* final next header field */ }; -struct ip6protosw { - short pr_type; /* socket type used for */ - struct domain *pr_domain; /* domain protocol a member of */ - short pr_protocol; /* protocol number */ - short pr_flags; /* see below */ - -/* protocol-protocol hooks */ - int (*pr_input) /* input to protocol (from below) */ - (struct mbuf **, int *, int); - int (*pr_output) /* output to protocol (from above) */ - (struct mbuf *, ...); - void (*pr_ctlinput) /* control input (from below) */ - (int, struct sockaddr *, void *); - int (*pr_ctloutput) /* control output (from above) */ - (struct socket *, struct sockopt *); - -/* utility hooks */ - void (*pr_init) /* initialization hook */ - (void); - void (*pr_destroy) /* cleanup hook */ - (void); - - void (*pr_fasttimo) /* fast timeout (200ms) */ - (void); - void (*pr_slowtimo) /* slow timeout (500ms) */ - (void); - void (*pr_drain) /* flush any excess space possible */ - (void); - struct pr_usrreqs *pr_usrreqs; /* supersedes pr_usrreq() */ -}; - #ifdef _KERNEL -extern struct ip6protosw inet6sw[]; +extern struct protosw inet6sw[]; #endif #endif /* !_NETINET6_IP6PROTOSW_H_ */ diff --git a/freebsd/sys/netinet6/ip_fw_nat64.h b/freebsd/sys/netinet6/ip_fw_nat64.h new file mode 100644 index 00000000..a5c38b2a --- /dev/null +++ b/freebsd/sys/netinet6/ip_fw_nat64.h @@ -0,0 +1,154 @@ +/*- + * Copyright (c) 2015 Yandex LLC + * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org> + * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NETINET6_IP_FW_NAT64_H_ +#define _NETINET6_IP_FW_NAT64_H_ + +struct ipfw_nat64stl_stats { + uint64_t opcnt64; /* 6to4 of packets translated */ + uint64_t opcnt46; /* 4to6 of packets translated */ + uint64_t ofrags; /* number of fragments generated */ + uint64_t ifrags; /* number of fragments received */ + uint64_t oerrors; /* number of output errors */ + uint64_t noroute4; + uint64_t noroute6; + uint64_t noproto; /* Protocol not supported */ + uint64_t nomem; /* mbuf allocation filed */ + uint64_t dropped; /* dropped due to some errors */ +}; + +struct ipfw_nat64lsn_stats { + uint64_t opcnt64; /* 6to4 of packets translated */ + uint64_t opcnt46; /* 4to6 of packets translated */ + uint64_t ofrags; /* number of fragments generated */ + uint64_t ifrags; /* number of fragments received */ + uint64_t oerrors; /* number of output errors */ + uint64_t noroute4; + uint64_t noroute6; + uint64_t noproto; /* Protocol not supported */ + uint64_t nomem; /* mbuf allocation filed */ + uint64_t dropped; /* dropped due to some errors */ + + uint64_t nomatch4; /* No addr/port match */ + uint64_t jcalls; /* Number of job handler calls */ + uint64_t jrequests; /* Number of job requests */ + uint64_t jhostsreq; /* Number of job host requests */ + uint64_t jportreq; /* Number of portgroup requests */ + uint64_t jhostfails; /* Number of failed host allocs */ + uint64_t jportfails; /* Number of failed portgroup allocs */ + uint64_t jreinjected; /* Number of packets reinjected to q */ + uint64_t jmaxlen; /* Max queue length reached */ + uint64_t jnomem; /* No memory to alloc queue item */ + + uint64_t screated; /* Number of states created */ + uint64_t sdeleted; /* Number of states deleted */ + uint64_t spgcreated; /* Number of portgroups created */ + uint64_t spgdeleted; /* Number of portgroups deleted */ + uint64_t hostcount; /* Number of hosts */ + uint64_t tcpchunks; /* Number of TCP chunks */ + uint64_t udpchunks; /* Number of UDP chunks */ + uint64_t icmpchunks; /* Number of ICMP chunks */ + + uint64_t _reserved[4]; +}; + +#define NAT64_LOG 0x0001 /* Enable logging via BPF */ + +typedef struct _ipfw_nat64stl_cfg { + char name[64]; /* NAT name */ + ipfw_obj_ntlv ntlv6; /* object name tlv */ + ipfw_obj_ntlv ntlv4; /* object name tlv */ + struct in6_addr prefix6; /* NAT64 prefix */ + uint8_t plen6; /* Prefix length */ + uint8_t set; /* Named instance set [0..31] */ + uint8_t spare[2]; + uint32_t flags; +} ipfw_nat64stl_cfg; + +/* + * NAT64LSN default configuration values + */ +#define NAT64LSN_MAX_PORTS 2048 /* Max number of ports per host */ +#define NAT64LSN_JMAXLEN 2048 /* Max outstanding requests. */ +#define NAT64LSN_TCP_SYN_AGE 10 /* State's TTL after SYN received. */ +#define NAT64LSN_TCP_EST_AGE (2 * 3600) /* TTL for established connection */ +#define NAT64LSN_TCP_FIN_AGE 180 /* State's TTL after FIN/RST received */ +#define NAT64LSN_UDP_AGE 120 /* TTL for UDP states */ +#define NAT64LSN_ICMP_AGE 60 /* TTL for ICMP states */ +#define NAT64LSN_HOST_AGE 3600 /* TTL for stale host entry */ +#define NAT64LSN_PG_AGE 900 /* TTL for stale ports groups */ + +typedef struct _ipfw_nat64lsn_cfg { + char name[64]; /* NAT name */ + uint32_t flags; + uint32_t max_ports; /* Max ports per client */ + uint32_t agg_prefix_len; /* Prefix length to count */ + uint32_t agg_prefix_max; /* Max hosts per agg prefix */ + struct in_addr prefix4; + uint16_t plen4; /* Prefix length */ + uint16_t plen6; /* Prefix length */ + struct in6_addr prefix6; /* NAT64 prefix */ + uint32_t jmaxlen; /* Max jobqueue length */ + uint16_t min_port; /* Min port group # to use */ + uint16_t max_port; /* Max port group # to use */ + uint16_t nh_delete_delay;/* Stale host delete delay */ + uint16_t pg_delete_delay;/* Stale portgroup delete delay */ + uint16_t st_syn_ttl; /* TCP syn expire */ + uint16_t st_close_ttl; /* TCP fin expire */ + uint16_t st_estab_ttl; /* TCP established expire */ + uint16_t st_udp_ttl; /* UDP expire */ + uint16_t st_icmp_ttl; /* ICMP expire */ + uint8_t set; /* Named instance set [0..31] */ + uint8_t spare; +} ipfw_nat64lsn_cfg; + +typedef struct _ipfw_nat64lsn_state { + struct in_addr daddr; /* Remote IPv4 address */ + uint16_t dport; /* Remote destination port */ + uint16_t aport; /* Local alias port */ + uint16_t sport; /* Source port */ + uint8_t flags; /* State flags */ + uint8_t spare[3]; + uint16_t idle; /* Last used time */ +} ipfw_nat64lsn_state; + +typedef struct _ipfw_nat64lsn_stg { + uint64_t next_idx; /* next state index */ + struct in_addr alias4; /* IPv4 alias address */ + uint8_t proto; /* protocol */ + uint8_t flags; + uint16_t spare; + struct in6_addr host6; /* Bound IPv6 host */ + uint32_t count; /* Number of states */ + uint32_t spare2; +} ipfw_nat64lsn_stg; + +#endif /* _NETINET6_IP_FW_NAT64_H_ */ + diff --git a/freebsd/sys/netinet6/ip_fw_nptv6.h b/freebsd/sys/netinet6/ip_fw_nptv6.h new file mode 100644 index 00000000..e2357eff --- /dev/null +++ b/freebsd/sys/netinet6/ip_fw_nptv6.h @@ -0,0 +1,51 @@ +/*- + * Copyright (c) 2016 Yandex LLC + * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NETINET6_IP_FW_NPTV6_H_ +#define _NETINET6_IP_FW_NPTV6_H_ + +struct ipfw_nptv6_stats { + uint64_t in2ex; /* Int->Ext packets translated */ + uint64_t ex2in; /* Ext->Int packets translated */ + uint64_t dropped; /* dropped due to some errors */ + uint64_t reserved[5]; +}; + +typedef struct _ipfw_nptv6_cfg { + char name[64]; /* NPTv6 instance name */ + struct in6_addr internal; /* NPTv6 internal prefix */ + struct in6_addr external; /* NPTv6 external prefix */ + uint8_t plen; /* Prefix length */ + uint8_t set; /* Named instance set [0..31] */ + uint8_t spare[2]; + uint32_t flags; +} ipfw_nptv6_cfg; + +#endif /* _NETINET6_IP_FW_NPTV6_H_ */ + diff --git a/freebsd/sys/netinet6/mld6.c b/freebsd/sys/netinet6/mld6.c index 25f03411..26efa852 100644 --- a/freebsd/sys/netinet6/mld6.c +++ b/freebsd/sys/netinet6/mld6.c @@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$"); #include <sys/ktr.h> #include <net/if.h> +#include <net/if_var.h> #include <net/route.h> #include <net/vnet.h> @@ -103,49 +104,49 @@ __FBSDID("$FreeBSD$"); #define KTR_MLD KTR_INET6 #endif -static struct mld_ifinfo * +static struct mld_ifsoftc * mli_alloc_locked(struct ifnet *); static void mli_delete_locked(const struct ifnet *); static void mld_dispatch_packet(struct mbuf *); -static void mld_dispatch_queue(struct ifqueue *, int); -static void mld_final_leave(struct in6_multi *, struct mld_ifinfo *); +static void mld_dispatch_queue(struct mbufq *, int); +static void mld_final_leave(struct in6_multi *, struct mld_ifsoftc *); static void mld_fasttimo_vnet(void); static int mld_handle_state_change(struct in6_multi *, - struct mld_ifinfo *); -static int mld_initial_join(struct in6_multi *, struct mld_ifinfo *, + struct mld_ifsoftc *); +static int mld_initial_join(struct in6_multi *, struct mld_ifsoftc *, const int); #ifdef KTR static char * mld_rec_type_to_str(const int); #endif -static void mld_set_version(struct mld_ifinfo *, const int); +static void mld_set_version(struct mld_ifsoftc *, const int); static void mld_slowtimo_vnet(void); static int mld_v1_input_query(struct ifnet *, const struct ip6_hdr *, /*const*/ struct mld_hdr *); static int mld_v1_input_report(struct ifnet *, const struct ip6_hdr *, /*const*/ struct mld_hdr *); -static void mld_v1_process_group_timer(struct mld_ifinfo *, +static void mld_v1_process_group_timer(struct mld_ifsoftc *, struct in6_multi *); -static void mld_v1_process_querier_timers(struct mld_ifinfo *); +static void mld_v1_process_querier_timers(struct mld_ifsoftc *); static int mld_v1_transmit_report(struct in6_multi *, const int); static void mld_v1_update_group(struct in6_multi *, const int); -static void mld_v2_cancel_link_timers(struct mld_ifinfo *); -static void mld_v2_dispatch_general_query(struct mld_ifinfo *); +static void mld_v2_cancel_link_timers(struct mld_ifsoftc *); +static void mld_v2_dispatch_general_query(struct mld_ifsoftc *); static struct mbuf * mld_v2_encap_report(struct ifnet *, struct mbuf *); -static int mld_v2_enqueue_filter_change(struct ifqueue *, +static int mld_v2_enqueue_filter_change(struct mbufq *, struct in6_multi *); -static int mld_v2_enqueue_group_record(struct ifqueue *, +static int mld_v2_enqueue_group_record(struct mbufq *, struct in6_multi *, const int, const int, const int, const int); static int mld_v2_input_query(struct ifnet *, const struct ip6_hdr *, struct mbuf *, const int, const int); static int mld_v2_merge_state_changes(struct in6_multi *, - struct ifqueue *); -static void mld_v2_process_group_timers(struct mld_ifinfo *, - struct ifqueue *, struct ifqueue *, + struct mbufq *); +static void mld_v2_process_group_timers(struct mld_ifsoftc *, + struct mbufq *, struct mbufq *, struct in6_multi *, const int); static int mld_v2_process_group_query(struct in6_multi *, - struct mld_ifinfo *mli, int, struct mbuf *, const int); + struct mld_ifsoftc *mli, int, struct mbuf *, const int); static int sysctl_mld_gsr(SYSCTL_HANDLER_ARGS); static int sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS); @@ -207,7 +208,7 @@ static MALLOC_DEFINE(M_MLD, "mld", "mld state"); * VIMAGE-wide globals. */ static VNET_DEFINE(struct timeval, mld_gsrdelay) = {10, 0}; -static VNET_DEFINE(LIST_HEAD(, mld_ifinfo), mli_head); +static VNET_DEFINE(LIST_HEAD(, mld_ifsoftc), mli_head); static VNET_DEFINE(int, interface_timers_running6); static VNET_DEFINE(int, state_change_timers_running6); static VNET_DEFINE(int, current_state_timers_running6); @@ -226,8 +227,8 @@ SYSCTL_NODE(_net_inet6, OID_AUTO, mld, CTLFLAG_RW, 0, /* * Virtualized sysctls. */ -SYSCTL_VNET_PROC(_net_inet6_mld, OID_AUTO, gsrdelay, - CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, +SYSCTL_PROC(_net_inet6_mld, OID_AUTO, gsrdelay, + CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &VNET_NAME(mld_gsrdelay.tv_sec), 0, sysctl_mld_gsr, "I", "Rate limit for MLDv2 Group-and-Source queries in seconds"); @@ -239,14 +240,12 @@ static SYSCTL_NODE(_net_inet6_mld, OID_AUTO, ifinfo, "Per-interface MLDv2 state"); static int mld_v1enable = 1; -SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RW, +SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RWTUN, &mld_v1enable, 0, "Enable fallback to MLDv1"); -TUNABLE_INT("net.inet6.mld.v1enable", &mld_v1enable); static int mld_use_allow = 1; -SYSCTL_INT(_net_inet6_mld, OID_AUTO, use_allow, CTLFLAG_RW, +SYSCTL_INT(_net_inet6_mld, OID_AUTO, use_allow, CTLFLAG_RWTUN, &mld_use_allow, 0, "Use ALLOW/BLOCK for RFC 4604 SSM joins/leaves"); -TUNABLE_INT("net.inet6.mld.use_allow", &mld_use_allow); /* * Packed Router Alert option structure declaration. @@ -277,7 +276,7 @@ mld_save_context(struct mbuf *m, struct ifnet *ifp) { #ifdef VIMAGE - m->m_pkthdr.header = ifp->if_vnet; + m->m_pkthdr.PH_loc.ptr = ifp->if_vnet; #endif /* VIMAGE */ m->m_pkthdr.flowid = ifp->if_index; } @@ -286,7 +285,7 @@ static __inline void mld_scrub_context(struct mbuf *m) { - m->m_pkthdr.header = NULL; + m->m_pkthdr.PH_loc.ptr = NULL; m->m_pkthdr.flowid = 0; } @@ -302,8 +301,9 @@ mld_restore_context(struct mbuf *m) { #if defined(VIMAGE) && defined(INVARIANTS) - KASSERT(curvnet == m->m_pkthdr.header, - ("%s: called when curvnet was not restored", __func__)); + KASSERT(curvnet == m->m_pkthdr.PH_loc.ptr, + ("%s: called when curvnet was not restored: cuvnet %p m ptr %p", + __func__, curvnet, m->m_pkthdr.PH_loc.ptr)); #endif return (m->m_pkthdr.flowid); } @@ -347,7 +347,7 @@ out_locked: } /* - * Expose struct mld_ifinfo to userland, keyed by ifindex. + * Expose struct mld_ifsoftc to userland, keyed by ifindex. * For use by ifmcstat(8). * * SMPng: NOTE: Does an unlocked ifindex space read. @@ -361,7 +361,7 @@ sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS) int error; u_int namelen; struct ifnet *ifp; - struct mld_ifinfo *mli; + struct mld_ifsoftc *mli; name = (int *)arg1; namelen = arg2; @@ -392,8 +392,17 @@ sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS) LIST_FOREACH(mli, &V_mli_head, mli_link) { if (ifp == mli->mli_ifp) { - error = SYSCTL_OUT(req, mli, - sizeof(struct mld_ifinfo)); + struct mld_ifinfo info; + + info.mli_version = mli->mli_version; + info.mli_v1_timer = mli->mli_v1_timer; + info.mli_v2_timer = mli->mli_v2_timer; + info.mli_flags = mli->mli_flags; + info.mli_rv = mli->mli_rv; + info.mli_qi = mli->mli_qi; + info.mli_qri = mli->mli_qri; + info.mli_uri = mli->mli_uri; + error = SYSCTL_OUT(req, &info, sizeof(info)); break; } } @@ -409,15 +418,12 @@ out_locked: * VIMAGE: Assumes the vnet pointer has been set. */ static void -mld_dispatch_queue(struct ifqueue *ifq, int limit) +mld_dispatch_queue(struct mbufq *mq, int limit) { struct mbuf *m; - for (;;) { - _IF_DEQUEUE(ifq, m); - if (m == NULL) - break; - CTR3(KTR_MLD, "%s: dispatch %p from %p", __func__, ifq, m); + while ((m = mbufq_dequeue(mq)) != NULL) { + CTR3(KTR_MLD, "%s: dispatch %p from %p", __func__, mq, m); mld_dispatch_packet(m); if (--limit == 0) break; @@ -460,13 +466,13 @@ mld_is_addr_reported(const struct in6_addr *addr) * * SMPng: Normally called with IF_AFDATA_LOCK held. */ -struct mld_ifinfo * +struct mld_ifsoftc * mld_domifattach(struct ifnet *ifp) { - struct mld_ifinfo *mli; + struct mld_ifsoftc *mli; CTR3(KTR_MLD, "%s: called for ifp %p(%s)", - __func__, ifp, ifp->if_xname); + __func__, ifp, if_name(ifp)); MLD_LOCK(); @@ -484,14 +490,14 @@ mld_domifattach(struct ifnet *ifp) /* * VIMAGE: assume curvnet set by caller. */ -static struct mld_ifinfo * +static struct mld_ifsoftc * mli_alloc_locked(/*const*/ struct ifnet *ifp) { - struct mld_ifinfo *mli; + struct mld_ifsoftc *mli; MLD_LOCK_ASSERT(); - mli = malloc(sizeof(struct mld_ifinfo), M_MLD, M_NOWAIT|M_ZERO); + mli = malloc(sizeof(struct mld_ifsoftc), M_MLD, M_NOWAIT|M_ZERO); if (mli == NULL) goto out; @@ -502,18 +508,13 @@ mli_alloc_locked(/*const*/ struct ifnet *ifp) mli->mli_qi = MLD_QI_INIT; mli->mli_qri = MLD_QRI_INIT; mli->mli_uri = MLD_URI_INIT; - SLIST_INIT(&mli->mli_relinmhead); - - /* - * Responses to general queries are subject to bounds. - */ - IFQ_SET_MAXLEN(&mli->mli_gq, MLD_MAX_RESPONSE_PACKETS); + mbufq_init(&mli->mli_gq, MLD_MAX_RESPONSE_PACKETS); LIST_INSERT_HEAD(&V_mli_head, mli, mli_link); - CTR2(KTR_MLD, "allocate mld_ifinfo for ifp %p(%s)", - ifp, ifp->if_xname); + CTR2(KTR_MLD, "allocate mld_ifsoftc for ifp %p(%s)", + ifp, if_name(ifp)); out: return (mli); @@ -533,12 +534,12 @@ out: void mld_ifdetach(struct ifnet *ifp) { - struct mld_ifinfo *mli; + struct mld_ifsoftc *mli; struct ifmultiaddr *ifma; struct in6_multi *inm, *tinm; CTR3(KTR_MLD, "%s: called for ifp %p(%s)", __func__, ifp, - ifp->if_xname); + if_name(ifp)); IN6_MULTI_LOCK_ASSERT(); MLD_LOCK(); @@ -579,7 +580,7 @@ mld_domifdetach(struct ifnet *ifp) { CTR3(KTR_MLD, "%s: called for ifp %p(%s)", - __func__, ifp, ifp->if_xname); + __func__, ifp, if_name(ifp)); MLD_LOCK(); mli_delete_locked(ifp); @@ -589,10 +590,10 @@ mld_domifdetach(struct ifnet *ifp) static void mli_delete_locked(const struct ifnet *ifp) { - struct mld_ifinfo *mli, *tmli; + struct mld_ifsoftc *mli, *tmli; - CTR3(KTR_MLD, "%s: freeing mld_ifinfo for ifp %p(%s)", - __func__, ifp, ifp->if_xname); + CTR3(KTR_MLD, "%s: freeing mld_ifsoftc for ifp %p(%s)", + __func__, ifp, if_name(ifp)); MLD_LOCK_ASSERT(); @@ -601,7 +602,7 @@ mli_delete_locked(const struct ifnet *ifp) /* * Free deferred General Query responses. */ - _IF_DRAIN(&mli->mli_gq); + mbufq_drain(&mli->mli_gq); LIST_REMOVE(mli, mli_link); @@ -613,9 +614,6 @@ mli_delete_locked(const struct ifnet *ifp) return; } } -#ifdef INVARIANTS - panic("%s: mld_ifinfo not found for ifp %p\n", __func__, ifp); -#endif } /* @@ -630,7 +628,7 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, /*const*/ struct mld_hdr *mld) { struct ifmultiaddr *ifma; - struct mld_ifinfo *mli; + struct mld_ifsoftc *mli; struct in6_multi *inm; int is_general_query; uint16_t timer; @@ -643,7 +641,7 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, if (!mld_v1enable) { CTR3(KTR_MLD, "ignore v1 query %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &mld->mld_addr), - ifp, ifp->if_xname); + ifp, if_name(ifp)); return (0); } @@ -654,7 +652,7 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &ip6->ip6_src), - ifp, ifp->if_xname); + ifp, if_name(ifp)); return (0); } @@ -689,7 +687,7 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, * Switch to MLDv1 host compatibility mode. */ mli = MLD_IFINFO(ifp); - KASSERT(mli != NULL, ("%s: no mld_ifinfo for ifp %p", __func__, ifp)); + KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp)); mld_set_version(mli, MLD_VERSION_1); timer = (ntohs(mld->mld_maxdelay) * PR_FASTHZ) / MLD_TIMER_SCALE; @@ -703,7 +701,7 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, * interface, kick the report timer. */ CTR2(KTR_MLD, "process v1 general query on ifp %p(%s)", - ifp, ifp->if_xname); + ifp, if_name(ifp)); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_INET6 || ifma->ifma_protospec == NULL) @@ -721,7 +719,7 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, if (inm != NULL) { CTR3(KTR_MLD, "process v1 query %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &mld->mld_addr), - ifp, ifp->if_xname); + ifp, if_name(ifp)); mld_v1_update_group(inm, timer); } /* XXX Clear embedded scope ID as userland won't expect it. */ @@ -759,7 +757,7 @@ mld_v1_update_group(struct in6_multi *inm, const int timer) CTR4(KTR_MLD, "%s: %s/%s timer=%d", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), - inm->in6m_ifp->if_xname, timer); + if_name(inm->in6m_ifp), timer); IN6_MULTI_LOCK_ASSERT(); @@ -806,7 +804,7 @@ static int mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, struct mbuf *m, const int off, const int icmp6len) { - struct mld_ifinfo *mli; + struct mld_ifsoftc *mli; struct mldv2_query *mld; struct in6_multi *inm; uint32_t maxdelay, nsrc, qqi; @@ -826,11 +824,11 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &ip6->ip6_src), - ifp, ifp->if_xname); + ifp, if_name(ifp)); return (0); } - CTR2(KTR_MLD, "input v2 query on ifp %p(%s)", ifp, ifp->if_xname); + CTR2(KTR_MLD, "input v2 query on ifp %p(%s)", ifp, if_name(ifp)); mld = (struct mldv2_query *)(mtod(m, uint8_t *) + off); @@ -888,7 +886,7 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, MLD_LOCK(); mli = MLD_IFINFO(ifp); - KASSERT(mli != NULL, ("%s: no mld_ifinfo for ifp %p", __func__, ifp)); + KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp)); /* * Discard the v2 query if we're in Compatibility Mode. @@ -919,7 +917,7 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, * Otherwise, reset the interface timer. */ CTR2(KTR_MLD, "process v2 general query on ifp %p(%s)", - ifp, ifp->if_xname); + ifp, if_name(ifp)); if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) { mli->mli_v2_timer = MLD_RANDOM_DELAY(timer); V_interface_timers_running6 = 1; @@ -949,7 +947,7 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, } } CTR2(KTR_MLD, "process v2 group query on ifp %p(%s)", - ifp, ifp->if_xname); + ifp, if_name(ifp)); /* * If there is a pending General Query response * scheduled sooner than the selected delay, no @@ -973,12 +971,12 @@ out_locked: } /* - * Process a recieved MLDv2 group-specific or group-and-source-specific + * Process a received MLDv2 group-specific or group-and-source-specific * query. - * Return <0 if any error occured. Currently this is ignored. + * Return <0 if any error occurred. Currently this is ignored. */ static int -mld_v2_process_group_query(struct in6_multi *inm, struct mld_ifinfo *mli, +mld_v2_process_group_query(struct in6_multi *inm, struct mld_ifsoftc *mli, int timer, struct mbuf *m0, const int off) { struct mldv2_query *mld; @@ -1106,7 +1104,7 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6, if (!mld_v1enable) { CTR3(KTR_MLD, "ignore v1 report %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &mld->mld_addr), - ifp, ifp->if_xname); + ifp, if_name(ifp)); return (0); } @@ -1122,7 +1120,7 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6, if (!IN6_IS_SCOPE_LINKLOCAL(&src) && !IN6_IS_ADDR_UNSPECIFIED(&src)) { CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &ip6->ip6_src), - ifp, ifp->if_xname); + ifp, if_name(ifp)); return (EINVAL); } @@ -1136,7 +1134,7 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6, !IN6_ARE_ADDR_EQUAL(&mld->mld_addr, &dst)) { CTR3(KTR_MLD, "ignore v1 query dst %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &ip6->ip6_dst), - ifp, ifp->if_xname); + ifp, if_name(ifp)); return (EINVAL); } @@ -1161,7 +1159,7 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6, ifa_free(&ia->ia_ifa); CTR3(KTR_MLD, "process v1 report %s on ifp %p(%s)", - ip6_sprintf(ip6tbuf, &mld->mld_addr), ifp, ifp->if_xname); + ip6_sprintf(ip6tbuf, &mld->mld_addr), ifp, if_name(ifp)); /* * Embed scope ID of receiving interface in MLD query for lookup @@ -1182,7 +1180,7 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6, */ inm = in6m_lookup_locked(ifp, &mld->mld_addr); if (inm != NULL) { - struct mld_ifinfo *mli; + struct mld_ifsoftc *mli; mli = inm->in6m_mli; KASSERT(mli != NULL, @@ -1208,7 +1206,7 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6, CTR3(KTR_MLD, "report suppressed for %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &mld->mld_addr), - ifp, ifp->if_xname); + ifp, if_name(ifp)); case MLD_LAZY_MEMBER: inm->in6m_state = MLD_LAZY_MEMBER; break; @@ -1329,10 +1327,10 @@ mld_fasttimo(void) static void mld_fasttimo_vnet(void) { - struct ifqueue scq; /* State-change packets */ - struct ifqueue qrq; /* Query response packets */ + struct mbufq scq; /* State-change packets */ + struct mbufq qrq; /* Query response packets */ struct ifnet *ifp; - struct mld_ifinfo *mli; + struct mld_ifsoftc *mli; struct ifmultiaddr *ifma; struct in6_multi *inm, *tinm; int uri_fasthz; @@ -1389,12 +1387,8 @@ mld_fasttimo_vnet(void) if (mli->mli_version == MLD_VERSION_2) { uri_fasthz = MLD_RANDOM_DELAY(mli->mli_uri * PR_FASTHZ); - - memset(&qrq, 0, sizeof(struct ifqueue)); - IFQ_SET_MAXLEN(&qrq, MLD_MAX_G_GS_PACKETS); - - memset(&scq, 0, sizeof(struct ifqueue)); - IFQ_SET_MAXLEN(&scq, MLD_MAX_STATE_CHANGE_PACKETS); + mbufq_init(&qrq, MLD_MAX_G_GS_PACKETS); + mbufq_init(&scq, MLD_MAX_STATE_CHANGE_PACKETS); } IF_ADDR_RLOCK(ifp); @@ -1461,7 +1455,7 @@ out_locked: * Will update the global pending timer flags. */ static void -mld_v1_process_group_timer(struct mld_ifinfo *mli, struct in6_multi *inm) +mld_v1_process_group_timer(struct mld_ifsoftc *mli, struct in6_multi *inm) { int report_timer_expired; @@ -1505,8 +1499,8 @@ mld_v1_process_group_timer(struct mld_ifinfo *mli, struct in6_multi *inm) * Note: Unlocked read from mli. */ static void -mld_v2_process_group_timers(struct mld_ifinfo *mli, - struct ifqueue *qrq, struct ifqueue *scq, +mld_v2_process_group_timers(struct mld_ifsoftc *mli, + struct mbufq *qrq, struct mbufq *scq, struct in6_multi *inm, const int uri_fasthz) { int query_response_timer_expired; @@ -1601,7 +1595,7 @@ mld_v2_process_group_timers(struct mld_ifinfo *mli, in6m_commit(inm); CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), - inm->in6m_ifp->if_xname); + if_name(inm->in6m_ifp)); /* * If we are leaving the group for good, make sure @@ -1626,14 +1620,14 @@ mld_v2_process_group_timers(struct mld_ifinfo *mli, * as per Section 9.12. */ static void -mld_set_version(struct mld_ifinfo *mli, const int version) +mld_set_version(struct mld_ifsoftc *mli, const int version) { int old_version_timer; MLD_LOCK_ASSERT(); CTR4(KTR_MLD, "%s: switching to v%d on ifp %p(%s)", __func__, - version, mli->mli_ifp, mli->mli_ifp->if_xname); + version, mli->mli_ifp, if_name(mli->mli_ifp)); if (version == MLD_VERSION_1) { /* @@ -1656,14 +1650,14 @@ mld_set_version(struct mld_ifinfo *mli, const int version) * joined on it; state-change, general-query, and group-query timers. */ static void -mld_v2_cancel_link_timers(struct mld_ifinfo *mli) +mld_v2_cancel_link_timers(struct mld_ifsoftc *mli) { struct ifmultiaddr *ifma; struct ifnet *ifp; struct in6_multi *inm, *tinm; CTR3(KTR_MLD, "%s: cancel v2 timers on ifp %p(%s)", __func__, - mli->mli_ifp, mli->mli_ifp->if_xname); + mli->mli_ifp, if_name(mli->mli_ifp)); IN6_MULTI_LOCK_ASSERT(); MLD_LOCK_ASSERT(); @@ -1714,7 +1708,7 @@ mld_v2_cancel_link_timers(struct mld_ifinfo *mli) /* * Free any pending MLDv2 state-change records. */ - _IF_DRAIN(&inm->in6m_scq); + mbufq_drain(&inm->in6m_scq); break; } } @@ -1749,7 +1743,7 @@ mld_slowtimo(void) static void mld_slowtimo_vnet(void) { - struct mld_ifinfo *mli; + struct mld_ifsoftc *mli; MLD_LOCK(); @@ -1765,7 +1759,7 @@ mld_slowtimo_vnet(void) * See Section 9.12 of RFC 3810. */ static void -mld_v1_process_querier_timers(struct mld_ifinfo *mli) +mld_v1_process_querier_timers(struct mld_ifsoftc *mli) { MLD_LOCK_ASSERT(); @@ -1777,7 +1771,7 @@ mld_v1_process_querier_timers(struct mld_ifinfo *mli) CTR5(KTR_MLD, "%s: transition from v%d -> v%d on %p(%s)", __func__, mli->mli_version, MLD_VERSION_2, - mli->mli_ifp, mli->mli_ifp->if_xname); + mli->mli_ifp, if_name(mli->mli_ifp)); mli->mli_version = MLD_VERSION_2; } } @@ -1801,13 +1795,13 @@ mld_v1_transmit_report(struct in6_multi *in6m, const int type) ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); /* ia may be NULL if link-local address is tentative. */ - MGETHDR(mh, M_DONTWAIT, MT_HEADER); + mh = m_gethdr(M_NOWAIT, MT_DATA); if (mh == NULL) { if (ia != NULL) ifa_free(&ia->ia_ifa); return (ENOMEM); } - MGET(md, M_DONTWAIT, MT_DATA); + md = m_get(M_NOWAIT, MT_DATA); if (md == NULL) { m_free(mh); if (ia != NULL) @@ -1821,7 +1815,7 @@ mld_v1_transmit_report(struct in6_multi *in6m, const int type) * that ether_output() does not need to allocate another mbuf * for the header in the most common case. */ - MH_ALIGN(mh, sizeof(struct ip6_hdr)); + M_ALIGN(mh, sizeof(struct ip6_hdr)); mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr); mh->m_len = sizeof(struct ip6_hdr); @@ -1881,7 +1875,7 @@ mld_v1_transmit_report(struct in6_multi *in6m, const int type) int mld_change_state(struct in6_multi *inm, const int delay) { - struct mld_ifinfo *mli; + struct mld_ifsoftc *mli; struct ifnet *ifp; int error; @@ -1906,7 +1900,7 @@ mld_change_state(struct in6_multi *inm, const int delay) MLD_LOCK(); mli = MLD_IFINFO(ifp); - KASSERT(mli != NULL, ("%s: no mld_ifinfo for ifp %p", __func__, ifp)); + KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp)); /* * If we detect a state transition to or from MCAST_UNDEFINED @@ -1949,11 +1943,11 @@ out_locked: * initial state change for delay ticks (in units of PR_FASTHZ). */ static int -mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli, +mld_initial_join(struct in6_multi *inm, struct mld_ifsoftc *mli, const int delay) { struct ifnet *ifp; - struct ifqueue *ifq; + struct mbufq *mq; int error, retval, syncstates; int odelay; #ifdef KTR @@ -1962,7 +1956,7 @@ mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli, CTR4(KTR_MLD, "%s: initial join %s on ifp %p(%s)", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), - inm->in6m_ifp, inm->in6m_ifp->if_xname); + inm->in6m_ifp, if_name(inm->in6m_ifp)); error = 0; syncstates = 1; @@ -2040,9 +2034,9 @@ mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli, * Don't kick the timers if there is nothing to do, * or if an error occurred. */ - ifq = &inm->in6m_scq; - _IF_DRAIN(ifq); - retval = mld_v2_enqueue_group_record(ifq, inm, 1, + mq = &inm->in6m_scq; + mbufq_drain(mq); + retval = mld_v2_enqueue_group_record(mq, inm, 1, 0, 0, (mli->mli_flags & MLIF_USEALLOW)); CTR2(KTR_MLD, "%s: enqueue record = %d", __func__, retval); @@ -2088,7 +2082,7 @@ mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli, in6m_commit(inm); CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), - inm->in6m_ifp->if_xname); + if_name(inm->in6m_ifp)); } return (error); @@ -2098,7 +2092,7 @@ mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli, * Issue an intermediate state change during the life-cycle. */ static int -mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli) +mld_handle_state_change(struct in6_multi *inm, struct mld_ifsoftc *mli) { struct ifnet *ifp; int retval; @@ -2108,7 +2102,7 @@ mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli) CTR4(KTR_MLD, "%s: state change for %s on ifp %p(%s)", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), - inm->in6m_ifp, inm->in6m_ifp->if_xname); + inm->in6m_ifp, if_name(inm->in6m_ifp)); ifp = inm->in6m_ifp; @@ -2130,11 +2124,11 @@ mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli) in6m_commit(inm); CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), - inm->in6m_ifp->if_xname); + if_name(inm->in6m_ifp)); return (0); } - _IF_DRAIN(&inm->in6m_scq); + mbufq_drain(&inm->in6m_scq); retval = mld_v2_enqueue_group_record(&inm->in6m_scq, inm, 1, 0, 0, (mli->mli_flags & MLIF_USEALLOW)); @@ -2162,7 +2156,7 @@ mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli) * to INCLUDE {} for immediate transmission. */ static void -mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli) +mld_final_leave(struct in6_multi *inm, struct mld_ifsoftc *mli) { int syncstates; #ifdef KTR @@ -2173,7 +2167,7 @@ mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli) CTR4(KTR_MLD, "%s: final leave %s on ifp %p(%s)", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), - inm->in6m_ifp, inm->in6m_ifp->if_xname); + inm->in6m_ifp, if_name(inm->in6m_ifp)); IN6_MULTI_LOCK_ASSERT(); MLD_LOCK_ASSERT(); @@ -2207,13 +2201,13 @@ mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli) * TO_IN {} to be sent on the next fast timeout, * giving us an opportunity to merge reports. */ - _IF_DRAIN(&inm->in6m_scq); + mbufq_drain(&inm->in6m_scq); inm->in6m_timer = 0; inm->in6m_scrv = mli->mli_rv; CTR4(KTR_MLD, "%s: Leaving %s/%s with %d " "pending retransmissions.", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), - inm->in6m_ifp->if_xname, inm->in6m_scrv); + if_name(inm->in6m_ifp), inm->in6m_scrv); if (inm->in6m_scrv == 0) { inm->in6m_state = MLD_NOT_MEMBER; inm->in6m_sctimer = 0; @@ -2248,10 +2242,10 @@ mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli) in6m_commit(inm); CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), - inm->in6m_ifp->if_xname); + if_name(inm->in6m_ifp)); inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED; CTR3(KTR_MLD, "%s: T1 now MCAST_UNDEFINED for %p/%s", - __func__, &inm->in6m_addr, inm->in6m_ifp->if_xname); + __func__, &inm->in6m_addr, if_name(inm->in6m_ifp)); } } @@ -2283,7 +2277,7 @@ mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli) * no record(s) were appended. */ static int -mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm, +mld_v2_enqueue_group_record(struct mbufq *mq, struct in6_multi *inm, const int is_state_change, const int is_group_query, const int is_source_query, const int use_block_allow) { @@ -2398,12 +2392,12 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm, * Generate the filter list changes using a separate function. */ if (is_filter_list_change) - return (mld_v2_enqueue_filter_change(ifq, inm)); + return (mld_v2_enqueue_filter_change(mq, inm)); if (type == MLD_DO_NOTHING) { CTR3(KTR_MLD, "%s: nothing to do for %s/%s", __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), - inm->in6m_ifp->if_xname); + if_name(inm->in6m_ifp)); return (0); } @@ -2419,7 +2413,7 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm, CTR4(KTR_MLD, "%s: queueing %s for %s/%s", __func__, mld_rec_type_to_str(type), ip6_sprintf(ip6tbuf, &inm->in6m_addr), - inm->in6m_ifp->if_xname); + if_name(inm->in6m_ifp)); /* * Check if we have a packet in the tail of the queue for this @@ -2429,7 +2423,7 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm, * Note: Group records for G/GSR query responses MUST be sent * in their own packet. */ - m0 = ifq->ifq_tail; + m0 = mbufq_last(mq); if (!is_group_query && m0 != NULL && (m0->m_pkthdr.PH_vt.vt_nrecs + 1 <= MLD_V2_REPORT_MAXRECS) && @@ -2441,7 +2435,7 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm, m = m0; CTR1(KTR_MLD, "%s: use existing packet", __func__); } else { - if (_IF_QFULL(ifq)) { + if (mbufq_full(mq)) { CTR1(KTR_MLD, "%s: outbound queue full", __func__); return (-ENOMEM); } @@ -2449,9 +2443,9 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm, m0srcs = (ifp->if_mtu - MLD_MTUSPACE - sizeof(struct mldv2_record)) / sizeof(struct in6_addr); if (!is_state_change && !is_group_query) - m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); + m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) - m = m_gethdr(M_DONTWAIT, MT_DATA); + m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return (-ENOMEM); @@ -2554,7 +2548,7 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm, if (m != m0) { CTR1(KTR_MLD, "%s: enqueueing first packet", __func__); m->m_pkthdr.PH_vt.vt_nrecs = 1; - _IF_ENQUEUE(ifq, m); + mbufq_enqueue(mq, m); } else m->m_pkthdr.PH_vt.vt_nrecs++; @@ -2570,13 +2564,13 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm, * Always try for a cluster first. */ while (nims != NULL) { - if (_IF_QFULL(ifq)) { + if (mbufq_full(mq)) { CTR1(KTR_MLD, "%s: outbound queue full", __func__); return (-ENOMEM); } - m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); + m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) - m = m_gethdr(M_DONTWAIT, MT_DATA); + m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return (-ENOMEM); mld_save_context(m, ifp); @@ -2629,7 +2623,7 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm, nbytes += (msrcs * sizeof(struct in6_addr)); CTR1(KTR_MLD, "%s: enqueueing next packet", __func__); - _IF_ENQUEUE(ifq, m); + mbufq_enqueue(mq, m); } return (nbytes); @@ -2669,7 +2663,7 @@ typedef enum { * no record(s) were appended. */ static int -mld_v2_enqueue_filter_change(struct ifqueue *ifq, struct in6_multi *inm) +mld_v2_enqueue_filter_change(struct mbufq *mq, struct in6_multi *inm) { static const int MINRECLEN = sizeof(struct mldv2_record) + sizeof(struct in6_addr); @@ -2715,7 +2709,7 @@ mld_v2_enqueue_filter_change(struct ifqueue *ifq, struct in6_multi *inm) */ while (drt != REC_FULL) { do { - m0 = ifq->ifq_tail; + m0 = mbufq_last(mq); if (m0 != NULL && (m0->m_pkthdr.PH_vt.vt_nrecs + 1 <= MLD_V2_REPORT_MAXRECS) && @@ -2728,9 +2722,9 @@ mld_v2_enqueue_filter_change(struct ifqueue *ifq, struct in6_multi *inm) CTR1(KTR_MLD, "%s: use previous packet", __func__); } else { - m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); + m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) - m = m_gethdr(M_DONTWAIT, MT_DATA); + m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { CTR1(KTR_MLD, "%s: m_get*() failed", __func__); @@ -2859,7 +2853,7 @@ mld_v2_enqueue_filter_change(struct ifqueue *ifq, struct in6_multi *inm) */ m->m_pkthdr.PH_vt.vt_nrecs++; if (m != m0) - _IF_ENQUEUE(ifq, m); + mbufq_enqueue(mq, m); nbytes += npbytes; } while (nims != NULL); drt |= crt; @@ -2873,9 +2867,9 @@ mld_v2_enqueue_filter_change(struct ifqueue *ifq, struct in6_multi *inm) } static int -mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq) +mld_v2_merge_state_changes(struct in6_multi *inm, struct mbufq *scq) { - struct ifqueue *gq; + struct mbufq *gq; struct mbuf *m; /* pending state-change */ struct mbuf *m0; /* copy of pending state-change */ struct mbuf *mt; /* last state-change in packet */ @@ -2898,13 +2892,13 @@ mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq) gq = &inm->in6m_scq; #ifdef KTR - if (gq->ifq_head == NULL) { + if (mbufq_first(gq) == NULL) { CTR2(KTR_MLD, "%s: WARNING: queue for inm %p is empty", __func__, inm); } #endif - m = gq->ifq_head; + m = mbufq_first(gq); while (m != NULL) { /* * Only merge the report into the current packet if @@ -2915,7 +2909,7 @@ mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq) * allocated clusters. */ domerge = 0; - mt = ifscq->ifq_tail; + mt = mbufq_last(scq); if (mt != NULL) { recslen = m_length(m, NULL); @@ -2927,7 +2921,7 @@ mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq) domerge = 1; } - if (!domerge && _IF_QFULL(gq)) { + if (!domerge && mbufq_full(gq)) { CTR2(KTR_MLD, "%s: outbound queue full, skipping whole packet %p", __func__, m); @@ -2940,7 +2934,7 @@ mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq) if (!docopy) { CTR2(KTR_MLD, "%s: dequeueing %p", __func__, m); - _IF_DEQUEUE(gq, m0); + m0 = mbufq_dequeue(gq); m = m0->m_nextpkt; } else { CTR2(KTR_MLD, "%s: copying %p", __func__, m); @@ -2952,9 +2946,9 @@ mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq) } if (!domerge) { - CTR3(KTR_MLD, "%s: queueing %p to ifscq %p)", - __func__, m0, ifscq); - _IF_ENQUEUE(ifscq, m0); + CTR3(KTR_MLD, "%s: queueing %p to scq %p)", + __func__, m0, scq); + mbufq_enqueue(scq, m0); } else { struct mbuf *mtl; /* last mbuf of packet mt */ @@ -2978,7 +2972,7 @@ mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq) * Respond to a pending MLDv2 General Query. */ static void -mld_v2_dispatch_general_query(struct mld_ifinfo *mli) +mld_v2_dispatch_general_query(struct mld_ifsoftc *mli) { struct ifmultiaddr *ifma; struct ifnet *ifp; @@ -2991,6 +2985,15 @@ mld_v2_dispatch_general_query(struct mld_ifinfo *mli) KASSERT(mli->mli_version == MLD_VERSION_2, ("%s: called when version %d", __func__, mli->mli_version)); + /* + * Check that there are some packets queued. If so, send them first. + * For large number of groups the reply to general query can take + * many packets, we should finish sending them before starting of + * queuing the new reply. + */ + if (mbufq_len(&mli->mli_gq) != 0) + goto send; + ifp = mli->mli_ifp; IF_ADDR_RLOCK(ifp); @@ -3026,12 +3029,13 @@ mld_v2_dispatch_general_query(struct mld_ifinfo *mli) } IF_ADDR_RUNLOCK(ifp); +send: mld_dispatch_queue(&mli->mli_gq, MLD_MAX_RESPONSE_BURST); /* * Slew transmission of bursts over 500ms intervals. */ - if (mli->mli_gq.ifq_head != NULL) { + if (mbufq_first(&mli->mli_gq) != NULL) { mli->mli_v2_timer = 1 + MLD_RANDOM_DELAY( MLD_RESPONSE_BURST_INTERVAL); V_interface_timers_running6 = 1; @@ -3100,7 +3104,7 @@ mld_dispatch_packet(struct mbuf *m) } mld_scrub_context(m0); - m->m_flags &= ~(M_PROTOFLAGS); + m_clrprotoflags(m); m0->m_pkthdr.rcvif = V_loif; ip6 = mtod(m0, struct ip6_hdr *); @@ -3175,14 +3179,14 @@ mld_v2_encap_report(struct ifnet *ifp, struct mbuf *m) if (ia == NULL) CTR1(KTR_MLD, "%s: warning: ia is NULL", __func__); - MGETHDR(mh, M_DONTWAIT, MT_HEADER); + mh = m_gethdr(M_NOWAIT, MT_DATA); if (mh == NULL) { if (ia != NULL) ifa_free(&ia->ia_ifa); m_freem(m); return (NULL); } - MH_ALIGN(mh, sizeof(struct ip6_hdr) + sizeof(struct mldv2_report)); + M_ALIGN(mh, sizeof(struct ip6_hdr) + sizeof(struct mldv2_report)); mldreclen = m_length(m, NULL); CTR2(KTR_MLD, "%s: mldreclen is %d", __func__, mldreclen); @@ -3260,7 +3264,7 @@ mld_init(void *unused __unused) mld_po.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER; mld_po.ip6po_flags = IP6PO_DONTFRAG; } -SYSINIT(mld_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, mld_init, NULL); +SYSINIT(mld_init, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE, mld_init, NULL); static void mld_uninit(void *unused __unused) @@ -3269,7 +3273,7 @@ mld_uninit(void *unused __unused) CTR1(KTR_MLD, "%s: tearing down", __func__); MLD_LOCK_DESTROY(); } -SYSUNINIT(mld_uninit, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, mld_uninit, NULL); +SYSUNINIT(mld_uninit, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE, mld_uninit, NULL); static void vnet_mld_init(const void *unused __unused) @@ -3279,19 +3283,17 @@ vnet_mld_init(const void *unused __unused) LIST_INIT(&V_mli_head); } -VNET_SYSINIT(vnet_mld_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_mld_init, +VNET_SYSINIT(vnet_mld_init, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mld_init, NULL); static void vnet_mld_uninit(const void *unused __unused) { + /* This can happen if we shutdown the network stack. */ CTR1(KTR_MLD, "%s: tearing down", __func__); - - KASSERT(LIST_EMPTY(&V_mli_head), - ("%s: mli list not empty; ifnets not detached?", __func__)); } -VNET_SYSUNINIT(vnet_mld_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_mld_uninit, +VNET_SYSUNINIT(vnet_mld_uninit, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mld_uninit, NULL); static int @@ -3313,4 +3315,4 @@ static moduledata_t mld_mod = { mld_modevent, 0 }; -DECLARE_MODULE(mld, mld_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); +DECLARE_MODULE(mld, mld_mod, SI_SUB_PROTO_MC, SI_ORDER_ANY); diff --git a/freebsd/sys/netinet6/mld6_var.h b/freebsd/sys/netinet6/mld6_var.h index e62ec236..be7e9035 100644 --- a/freebsd/sys/netinet6/mld6_var.h +++ b/freebsd/sys/netinet6/mld6_var.h @@ -35,31 +35,6 @@ * implementation-specific definitions. */ -#ifdef _KERNEL - -/* - * Per-link MLD state. - */ -struct mld_ifinfo { - LIST_ENTRY(mld_ifinfo) mli_link; - struct ifnet *mli_ifp; /* interface this instance belongs to */ - uint32_t mli_version; /* MLDv1 Host Compatibility Mode */ - uint32_t mli_v1_timer; /* MLDv1 Querier Present timer (s) */ - uint32_t mli_v2_timer; /* MLDv2 General Query (interface) timer (s)*/ - uint32_t mli_flags; /* MLD per-interface flags */ - uint32_t mli_rv; /* MLDv2 Robustness Variable */ - uint32_t mli_qi; /* MLDv2 Query Interval (s) */ - uint32_t mli_qri; /* MLDv2 Query Response Interval (s) */ - uint32_t mli_uri; /* MLDv2 Unsolicited Report Interval (s) */ - SLIST_HEAD(,in6_multi) mli_relinmhead; /* released groups */ - struct ifqueue mli_gq; /* queue of general query responses */ -}; -#define MLIF_SILENT 0x00000001 /* Do not use MLD on this ifp */ -#define MLIF_USEALLOW 0x00000002 /* Use ALLOW/BLOCK for joins/leaves */ - -#define MLD_RANDOM_DELAY(X) (arc4random() % (X) + 1) -#define MLD_MAX_STATE_CHANGES 24 /* Max pending changes per group */ - /* * MLD per-group states. */ @@ -129,6 +104,44 @@ struct mld_ifinfo { sizeof(struct icmp6_hdr)) /* + * Structure returned by net.inet6.mld.ifinfo. + */ +struct mld_ifinfo { + uint32_t mli_version; /* MLDv1 Host Compatibility Mode */ + uint32_t mli_v1_timer; /* MLDv1 Querier Present timer (s) */ + uint32_t mli_v2_timer; /* MLDv2 General Query (interface) timer (s)*/ + uint32_t mli_flags; /* MLD per-interface flags */ +#define MLIF_SILENT 0x00000001 /* Do not use MLD on this ifp */ +#define MLIF_USEALLOW 0x00000002 /* Use ALLOW/BLOCK for joins/leaves */ + uint32_t mli_rv; /* MLDv2 Robustness Variable */ + uint32_t mli_qi; /* MLDv2 Query Interval (s) */ + uint32_t mli_qri; /* MLDv2 Query Response Interval (s) */ + uint32_t mli_uri; /* MLDv2 Unsolicited Report Interval (s) */ +}; + +#ifdef _KERNEL +/* + * Per-link MLD state. + */ +struct mld_ifsoftc { + LIST_ENTRY(mld_ifsoftc) mli_link; + struct ifnet *mli_ifp; /* interface this instance belongs to */ + uint32_t mli_version; /* MLDv1 Host Compatibility Mode */ + uint32_t mli_v1_timer; /* MLDv1 Querier Present timer (s) */ + uint32_t mli_v2_timer; /* MLDv2 General Query (interface) timer (s)*/ + uint32_t mli_flags; /* MLD per-interface flags */ + uint32_t mli_rv; /* MLDv2 Robustness Variable */ + uint32_t mli_qi; /* MLDv2 Query Interval (s) */ + uint32_t mli_qri; /* MLDv2 Query Response Interval (s) */ + uint32_t mli_uri; /* MLDv2 Unsolicited Report Interval (s) */ + SLIST_HEAD(,in6_multi) mli_relinmhead; /* released groups */ + struct mbufq mli_gq; /* queue of general query responses */ +}; + +#define MLD_RANDOM_DELAY(X) (arc4random() % (X) + 1) +#define MLD_MAX_STATE_CHANGES 24 /* Max pending changes per group */ + +/* * Subsystem lock macros. * The MLD lock is only taken with MLD. Currently it is system-wide. * VIMAGE: The lock could be pushed to per-VIMAGE granularity in future. @@ -147,7 +160,7 @@ struct mld_ifinfo { (((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->mld_ifinfo) int mld_change_state(struct in6_multi *, const int); -struct mld_ifinfo * +struct mld_ifsoftc * mld_domifattach(struct ifnet *); void mld_domifdetach(struct ifnet *); void mld_fasttimo(void); diff --git a/freebsd/sys/netinet6/nd6.c b/freebsd/sys/netinet6/nd6.c index 4369ebac..d1c7036d 100644 --- a/freebsd/sys/netinet6/nd6.c +++ b/freebsd/sys/netinet6/nd6.c @@ -52,9 +52,11 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/sys/lock.h> #include <sys/rwlock.h> #include <sys/queue.h> +#include <sys/sdt.h> #include <sys/sysctl.h> #include <net/if.h> +#include <net/if_var.h> #include <net/if_arc.h> #include <net/if_dl.h> #include <net/if_types.h> @@ -64,8 +66,8 @@ __FBSDID("$FreeBSD$"); #include <net/vnet.h> #include <netinet/in.h> +#include <netinet/in_kdtrace.h> #include <net/if_llatbl.h> -#define L3_ADDR_SIN6(le) ((struct sockaddr_in6 *) L3_ADDR(le)) #include <netinet/if_ether.h> #include <netinet6/in6_var.h> #include <netinet/ip6.h> @@ -83,7 +85,9 @@ __FBSDID("$FreeBSD$"); #define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */ #define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */ -#define SIN6(s) ((struct sockaddr_in6 *)s) +#define SIN6(s) ((const struct sockaddr_in6 *)(s)) + +MALLOC_DEFINE(M_IP6NDP, "ip6ndp", "IPv6 Neighbor Discovery"); /* timer values */ VNET_DEFINE(int, nd6_prune) = 1; /* walk list every 1 seconds */ @@ -111,54 +115,124 @@ VNET_DEFINE(int, nd6_debug) = 1; VNET_DEFINE(int, nd6_debug) = 0; #endif -/* for debugging? */ -#if 0 -static int nd6_inuse, nd6_allocated; -#endif +static eventhandler_tag lle_event_eh, iflladdr_event_eh; VNET_DEFINE(struct nd_drhead, nd_defrouter); VNET_DEFINE(struct nd_prhead, nd_prefix); +VNET_DEFINE(struct rwlock, nd6_lock); VNET_DEFINE(int, nd6_recalc_reachtm_interval) = ND6_RECALC_REACHTM_INTERVAL; #define V_nd6_recalc_reachtm_interval VNET(nd6_recalc_reachtm_interval) -static struct sockaddr_in6 all1_sa; - int (*send_sendso_input_hook)(struct mbuf *, struct ifnet *, int, int); -static int nd6_is_new_addr_neighbor(struct sockaddr_in6 *, +static int nd6_is_new_addr_neighbor(const struct sockaddr_in6 *, struct ifnet *); static void nd6_setmtu0(struct ifnet *, struct nd_ifinfo *); static void nd6_slowtimo(void *); static int regen_tmpaddr(struct in6_ifaddr *); -static struct llentry *nd6_free(struct llentry *, int); +static void nd6_free(struct llentry **, int); +static void nd6_free_redirect(const struct llentry *); static void nd6_llinfo_timer(void *); +static void nd6_llinfo_settimer_locked(struct llentry *, long); static void clear_llinfo_pqueue(struct llentry *); +static void nd6_rtrequest(int, struct rtentry *, struct rt_addrinfo *); +static int nd6_resolve_slow(struct ifnet *, int, struct mbuf *, + const struct sockaddr_in6 *, u_char *, uint32_t *, struct llentry **); +static int nd6_need_cache(struct ifnet *); + static VNET_DEFINE(struct callout, nd6_slowtimo_ch); #define V_nd6_slowtimo_ch VNET(nd6_slowtimo_ch) VNET_DEFINE(struct callout, nd6_timer_ch); +#define V_nd6_timer_ch VNET(nd6_timer_ch) + +static void +nd6_lle_event(void *arg __unused, struct llentry *lle, int evt) +{ + struct rt_addrinfo rtinfo; + struct sockaddr_in6 dst; + struct sockaddr_dl gw; + struct ifnet *ifp; + int type; + + LLE_WLOCK_ASSERT(lle); + + if (lltable_get_af(lle->lle_tbl) != AF_INET6) + return; + + switch (evt) { + case LLENTRY_RESOLVED: + type = RTM_ADD; + KASSERT(lle->la_flags & LLE_VALID, + ("%s: %p resolved but not valid?", __func__, lle)); + break; + case LLENTRY_EXPIRED: + type = RTM_DELETE; + break; + default: + return; + } + + ifp = lltable_get_ifp(lle->lle_tbl); + + bzero(&dst, sizeof(dst)); + bzero(&gw, sizeof(gw)); + bzero(&rtinfo, sizeof(rtinfo)); + lltable_fill_sa_entry(lle, (struct sockaddr *)&dst); + dst.sin6_scope_id = in6_getscopezone(ifp, + in6_addrscope(&dst.sin6_addr)); + gw.sdl_len = sizeof(struct sockaddr_dl); + gw.sdl_family = AF_LINK; + gw.sdl_alen = ifp->if_addrlen; + gw.sdl_index = ifp->if_index; + gw.sdl_type = ifp->if_type; + if (evt == LLENTRY_RESOLVED) + bcopy(lle->ll_addr, gw.sdl_data, ifp->if_addrlen); + rtinfo.rti_info[RTAX_DST] = (struct sockaddr *)&dst; + rtinfo.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&gw; + rtinfo.rti_addrs = RTA_DST | RTA_GATEWAY; + rt_missmsg_fib(type, &rtinfo, RTF_HOST | RTF_LLDATA | ( + type == RTM_ADD ? RTF_UP: 0), 0, RT_DEFAULT_FIB); +} + +/* + * A handler for interface link layer address change event. + */ +static void +nd6_iflladdr(void *arg __unused, struct ifnet *ifp) +{ + + lltable_update_ifaddr(LLTABLE6(ifp)); +} void nd6_init(void) { - int i; - LIST_INIT(&V_nd_prefix); + rw_init(&V_nd6_lock, "nd6"); - all1_sa.sin6_family = AF_INET6; - all1_sa.sin6_len = sizeof(struct sockaddr_in6); - for (i = 0; i < sizeof(all1_sa.sin6_addr); i++) - all1_sa.sin6_addr.s6_addr[i] = 0xff; + LIST_INIT(&V_nd_prefix); /* initialization of the default router list */ TAILQ_INIT(&V_nd_defrouter); - /* start timer */ + /* Start timers. */ callout_init(&V_nd6_slowtimo_ch, 0); callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz, nd6_slowtimo, curvnet); + + callout_init(&V_nd6_timer_ch, 0); + callout_reset(&V_nd6_timer_ch, hz, nd6_timer, curvnet); + + nd6_dad_init(); + if (IS_DEFAULT_VNET(curvnet)) { + lle_event_eh = EVENTHANDLER_REGISTER(lle_event, nd6_lle_event, + NULL, EVENTHANDLER_PRI_ANY); + iflladdr_event_eh = EVENTHANDLER_REGISTER(iflladdr_event, + nd6_iflladdr, NULL, EVENTHANDLER_PRI_ANY); + } } #ifdef VIMAGE @@ -168,6 +242,11 @@ nd6_destroy() callout_drain(&V_nd6_slowtimo_ch); callout_drain(&V_nd6_timer_ch); + if (IS_DEFAULT_VNET(curvnet)) { + EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh); + EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_event_eh); + } + rw_destroy(&V_nd6_lock); } #endif @@ -176,7 +255,7 @@ nd6_ifattach(struct ifnet *ifp) { struct nd_ifinfo *nd; - nd = (struct nd_ifinfo *)malloc(sizeof(*nd), M_IP6NDP, M_WAITOK|M_ZERO); + nd = malloc(sizeof(*nd), M_IP6NDP, M_WAITOK | M_ZERO); nd->initialized = 1; nd->chlim = IPV6_DEFHLIM; @@ -215,8 +294,19 @@ nd6_ifattach(struct ifnet *ifp) } void -nd6_ifdetach(struct nd_ifinfo *nd) +nd6_ifdetach(struct ifnet *ifp, struct nd_ifinfo *nd) { + struct ifaddr *ifa, *next; + + IF_ADDR_RLOCK(ifp); + TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + + /* stop DAD processing */ + nd6_dad_stop(ifa); + } + IF_ADDR_RUNLOCK(ifp); free(nd, M_IP6NDP); } @@ -228,6 +318,8 @@ nd6_ifdetach(struct nd_ifinfo *nd) void nd6_setmtu(struct ifnet *ifp) { + if (ifp->if_afdata[AF_INET6] == NULL) + return; nd6_setmtu0(ifp, ND_IFINFO(ifp)); } @@ -372,6 +464,7 @@ nd6_options(union nd_opts *ndopts) case ND_OPT_TARGET_LINKADDR: case ND_OPT_MTU: case ND_OPT_REDIRECTED_HEADER: + case ND_OPT_NONCE: if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) { nd6log((LOG_INFO, "duplicated ND6 option found (type=%d)\n", @@ -401,7 +494,7 @@ nd6_options(union nd_opts *ndopts) default: /* * Unknown options must be silently ignored, - * to accomodate future extension to the protocol. + * to accommodate future extension to the protocol. */ nd6log((LOG_DEBUG, "nd6_options: unsupported option %d - " @@ -426,7 +519,7 @@ skip1: /* * ND6 timer routine to handle ND6 entries */ -void +static void nd6_llinfo_settimer_locked(struct llentry *ln, long tick) { int canceled; @@ -436,48 +529,257 @@ nd6_llinfo_settimer_locked(struct llentry *ln, long tick) if (tick < 0) { ln->la_expire = 0; ln->ln_ntick = 0; - canceled = callout_stop(&ln->ln_timer_ch); + canceled = callout_stop(&ln->lle_timer); } else { - ln->la_expire = time_second + tick / hz; + ln->la_expire = time_uptime + tick / hz; LLE_ADDREF(ln); if (tick > INT_MAX) { ln->ln_ntick = tick - INT_MAX; - canceled = callout_reset(&ln->ln_timer_ch, INT_MAX, + canceled = callout_reset(&ln->lle_timer, INT_MAX, nd6_llinfo_timer, ln); } else { ln->ln_ntick = 0; - canceled = callout_reset(&ln->ln_timer_ch, tick, + canceled = callout_reset(&ln->lle_timer, tick, nd6_llinfo_timer, ln); } } - if (canceled) + if (canceled > 0) LLE_REMREF(ln); } -void -nd6_llinfo_settimer(struct llentry *ln, long tick) +/* + * Gets source address of the first packet in hold queue + * and stores it in @src. + * Returns pointer to @src (if hold queue is not empty) or NULL. + * + * Set noinline to be dtrace-friendly + */ +static __noinline struct in6_addr * +nd6_llinfo_get_holdsrc(struct llentry *ln, struct in6_addr *src) { + struct ip6_hdr hdr; + struct mbuf *m; - LLE_WLOCK(ln); - nd6_llinfo_settimer_locked(ln, tick); - LLE_WUNLOCK(ln); + if (ln->la_hold == NULL) + return (NULL); + + /* + * assume every packet in la_hold has the same IP header + */ + m = ln->la_hold; + if (sizeof(hdr) > m->m_len) + return (NULL); + + m_copydata(m, 0, sizeof(hdr), (caddr_t)&hdr); + *src = hdr.ip6_src; + + return (src); } -static void +/* + * Checks if we need to switch from STALE state. + * + * RFC 4861 requires switching from STALE to DELAY state + * on first packet matching entry, waiting V_nd6_delay and + * transition to PROBE state (if upper layer confirmation was + * not received). + * + * This code performs a bit differently: + * On packet hit we don't change state (but desired state + * can be guessed by control plane). However, after V_nd6_delay + * seconds code will transition to PROBE state (so DELAY state + * is kinda skipped in most situations). + * + * Typically, V_nd6_gctimer is bigger than V_nd6_delay, so + * we perform the following upon entering STALE state: + * + * 1) Arm timer to run each V_nd6_delay seconds to make sure that + * if packet was transmitted at the start of given interval, we + * would be able to switch to PROBE state in V_nd6_delay seconds + * as user expects. + * + * 2) Reschedule timer until original V_nd6_gctimer expires keeping + * lle in STALE state (remaining timer value stored in lle_remtime). + * + * 3) Reschedule timer if packet was transmitted less that V_nd6_delay + * seconds ago. + * + * Returns non-zero value if the entry is still STALE (storing + * the next timer interval in @pdelay). + * + * Returns zero value if original timer expired or we need to switch to + * PROBE (store that in @do_switch variable). + */ +static int +nd6_is_stale(struct llentry *lle, long *pdelay, int *do_switch) +{ + int nd_delay, nd_gctimer, r_skip_req; + time_t lle_hittime; + long delay; + + *do_switch = 0; + nd_gctimer = V_nd6_gctimer; + nd_delay = V_nd6_delay; + + LLE_REQ_LOCK(lle); + r_skip_req = lle->r_skip_req; + lle_hittime = lle->lle_hittime; + LLE_REQ_UNLOCK(lle); + + if (r_skip_req > 0) { + + /* + * Nonzero r_skip_req value was set upon entering + * STALE state. Since value was not changed, no + * packets were passed using this lle. Ask for + * timer reschedule and keep STALE state. + */ + delay = (long)(MIN(nd_gctimer, nd_delay)); + delay *= hz; + if (lle->lle_remtime > delay) + lle->lle_remtime -= delay; + else { + delay = lle->lle_remtime; + lle->lle_remtime = 0; + } + + if (delay == 0) { + + /* + * The original ng6_gctime timeout ended, + * no more rescheduling. + */ + return (0); + } + + *pdelay = delay; + return (1); + } + + /* + * Packet received. Verify timestamp + */ + delay = (long)(time_uptime - lle_hittime); + if (delay < nd_delay) { + + /* + * V_nd6_delay still not passed since the first + * hit in STALE state. + * Reshedule timer and return. + */ + *pdelay = (long)(nd_delay - delay) * hz; + return (1); + } + + /* Request switching to probe */ + *do_switch = 1; + return (0); +} + + +/* + * Switch @lle state to new state optionally arming timers. + * + * Set noinline to be dtrace-friendly + */ +__noinline void +nd6_llinfo_setstate(struct llentry *lle, int newstate) +{ + struct ifnet *ifp; + int nd_gctimer, nd_delay; + long delay, remtime; + + delay = 0; + remtime = 0; + + switch (newstate) { + case ND6_LLINFO_INCOMPLETE: + ifp = lle->lle_tbl->llt_ifp; + delay = (long)ND_IFINFO(ifp)->retrans * hz / 1000; + break; + case ND6_LLINFO_REACHABLE: + if (!ND6_LLINFO_PERMANENT(lle)) { + ifp = lle->lle_tbl->llt_ifp; + delay = (long)ND_IFINFO(ifp)->reachable * hz; + } + break; + case ND6_LLINFO_STALE: + + /* + * Notify fast path that we want to know if any packet + * is transmitted by setting r_skip_req. + */ + LLE_REQ_LOCK(lle); + lle->r_skip_req = 1; + LLE_REQ_UNLOCK(lle); + nd_delay = V_nd6_delay; + nd_gctimer = V_nd6_gctimer; + + delay = (long)(MIN(nd_gctimer, nd_delay)) * hz; + remtime = (long)nd_gctimer * hz - delay; + break; + case ND6_LLINFO_DELAY: + lle->la_asked = 0; + delay = (long)V_nd6_delay * hz; + break; + } + + if (delay > 0) + nd6_llinfo_settimer_locked(lle, delay); + + lle->lle_remtime = remtime; + lle->ln_state = newstate; +} + +/* + * Timer-dependent part of nd state machine. + * + * Set noinline to be dtrace-friendly + */ +static __noinline void nd6_llinfo_timer(void *arg) { struct llentry *ln; - struct in6_addr *dst; + struct in6_addr *dst, *pdst, *psrc, src; struct ifnet *ifp; - struct nd_ifinfo *ndi = NULL; + struct nd_ifinfo *ndi; + int do_switch, send_ns; + long delay; KASSERT(arg != NULL, ("%s: arg NULL", __func__)); ln = (struct llentry *)arg; - LLE_WLOCK_ASSERT(ln); - ifp = ln->lle_tbl->llt_ifp; - + ifp = lltable_get_ifp(ln->lle_tbl); CURVNET_SET(ifp->if_vnet); + ND6_RLOCK(); + LLE_WLOCK(ln); + if (callout_pending(&ln->lle_timer)) { + /* + * Here we are a bit odd here in the treatment of + * active/pending. If the pending bit is set, it got + * rescheduled before I ran. The active + * bit we ignore, since if it was stopped + * in ll_tablefree() and was currently running + * it would have return 0 so the code would + * not have deleted it since the callout could + * not be stopped so we want to go through + * with the delete here now. If the callout + * was restarted, the pending bit will be back on and + * we just want to bail since the callout_reset would + * return 1 and our reference would have been removed + * by nd6_llinfo_settimer_locked above since canceled + * would have been 1. + */ + LLE_WUNLOCK(ln); + ND6_RUNLOCK(); + CURVNET_RESTORE(); + return; + } + ndi = ND_IFINFO(ifp); + send_ns = 0; + dst = &ln->r_l3addr.addr6; + pdst = dst; + if (ln->ln_ntick > 0) { if (ln->ln_ntick > INT_MAX) { ln->ln_ntick -= INT_MAX; @@ -489,15 +791,12 @@ nd6_llinfo_timer(void *arg) goto done; } - ndi = ND_IFINFO(ifp); - dst = &L3_ADDR_SIN6(ln)->sin6_addr; if (ln->la_flags & LLE_STATIC) { goto done; } if (ln->la_flags & LLE_DELETED) { - (void)nd6_free(ln, 0); - ln = NULL; + nd6_free(&ln, 0); goto done; } @@ -505,10 +804,9 @@ nd6_llinfo_timer(void *arg) case ND6_LLINFO_INCOMPLETE: if (ln->la_asked < V_nd6_mmaxtries) { ln->la_asked++; - nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000); - LLE_WUNLOCK(ln); - nd6_ns_output(ifp, NULL, dst, ln, 0); - LLE_WLOCK(ln); + send_ns = 1; + /* Send NS to multicast address */ + pdst = NULL; } else { struct mbuf *m = ln->la_hold; if (m) { @@ -523,55 +821,59 @@ nd6_llinfo_timer(void *arg) ln->la_hold = m0; clear_llinfo_pqueue(ln); } - EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_TIMEDOUT); - (void)nd6_free(ln, 0); - ln = NULL; + nd6_free(&ln, 0); if (m != NULL) icmp6_error2(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR, 0, ifp); } break; case ND6_LLINFO_REACHABLE: - if (!ND6_LLINFO_PERMANENT(ln)) { - ln->ln_state = ND6_LLINFO_STALE; - nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); - } + if (!ND6_LLINFO_PERMANENT(ln)) + nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); break; case ND6_LLINFO_STALE: - /* Garbage Collection(RFC 2461 5.3) */ - if (!ND6_LLINFO_PERMANENT(ln)) { - EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED); - (void)nd6_free(ln, 1); - ln = NULL; + if (nd6_is_stale(ln, &delay, &do_switch) != 0) { + + /* + * No packet has used this entry and GC timeout + * has not been passed. Reshedule timer and + * return. + */ + nd6_llinfo_settimer_locked(ln, delay); + break; } - break; + + if (do_switch == 0) { + + /* + * GC timer has ended and entry hasn't been used. + * Run Garbage collector (RFC 4861, 5.3) + */ + if (!ND6_LLINFO_PERMANENT(ln)) + nd6_free(&ln, 1); + break; + } + + /* Entry has been used AND delay timer has ended. */ + + /* FALLTHROUGH */ case ND6_LLINFO_DELAY: if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) { /* We need NUD */ ln->la_asked = 1; - ln->ln_state = ND6_LLINFO_PROBE; - nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000); - LLE_WUNLOCK(ln); - nd6_ns_output(ifp, dst, dst, ln, 0); - LLE_WLOCK(ln); - } else { - ln->ln_state = ND6_LLINFO_STALE; /* XXX */ - nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); - } + nd6_llinfo_setstate(ln, ND6_LLINFO_PROBE); + send_ns = 1; + } else + nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); /* XXX */ break; case ND6_LLINFO_PROBE: if (ln->la_asked < V_nd6_umaxtries) { ln->la_asked++; - nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000); - LLE_WUNLOCK(ln); - nd6_ns_output(ifp, dst, dst, ln, 0); - LLE_WLOCK(ln); + send_ns = 1; } else { - EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED); - (void)nd6_free(ln, 0); - ln = NULL; + nd6_free(&ln, 0); } break; default: @@ -580,6 +882,16 @@ nd6_llinfo_timer(void *arg) } done: if (ln != NULL) + ND6_RUNLOCK(); + if (send_ns != 0) { + nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000); + psrc = nd6_llinfo_get_holdsrc(ln, &src); + LLE_FREE_LOCKED(ln); + ln = NULL; + nd6_ns_output(ifp, psrc, pdst, dst, NULL); + } + + if (ln != NULL) LLE_FREE_LOCKED(ln); CURVNET_RESTORE(); } @@ -592,19 +904,23 @@ void nd6_timer(void *arg) { CURVNET_SET((struct vnet *) arg); - int s; + struct nd_drhead drq; struct nd_defrouter *dr, *ndr; struct nd_prefix *pr, *npr; struct in6_ifaddr *ia6, *nia6; - callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz, - nd6_timer, curvnet); + TAILQ_INIT(&drq); /* expire default router list */ - s = splnet(); - TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) { - if (dr->expire && dr->expire < time_second) - defrtrlist_del(dr); + ND6_WLOCK(); + TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) + if (dr->expire && dr->expire < time_uptime) + defrouter_unlink(dr, &drq); + ND6_WUNLOCK(); + + while ((dr = TAILQ_FIRST(&drq)) != NULL) { + TAILQ_REMOVE(&drq, dr, dr_entry); + defrouter_del(dr); } /* @@ -670,8 +986,31 @@ nd6_timer(void *arg) goto addrloop; } } + } else if ((ia6->ia6_flags & IN6_IFF_TENTATIVE) != 0) { + /* + * Schedule DAD for a tentative address. This happens + * if the interface was down or not running + * when the address was configured. + */ + int delay; + + delay = arc4random() % + (MAX_RTR_SOLICITATION_DELAY * hz); + nd6_dad_start((struct ifaddr *)ia6, delay); } else { /* + * Check status of the interface. If it is down, + * mark the address as tentative for future DAD. + */ + if ((ia6->ia_ifp->if_flags & IFF_UP) == 0 || + (ia6->ia_ifp->if_drv_flags & IFF_DRV_RUNNING) + == 0 || + (ND_IFINFO(ia6->ia_ifp)->flags & + ND6_IFF_IFDISABLED) != 0) { + ia6->ia6_flags &= ~IN6_IFF_DUPLICATED; + ia6->ia6_flags |= IN6_IFF_TENTATIVE; + } + /* * A new RA might have made a deprecated address * preferred. */ @@ -687,7 +1026,7 @@ nd6_timer(void *arg) * prefix is not necessary. */ if (pr->ndpr_vltime != ND6_INFINITE_LIFETIME && - time_second - pr->ndpr_lastupdate > pr->ndpr_vltime) { + time_uptime - pr->ndpr_lastupdate > pr->ndpr_vltime) { /* * address expiration and prefix expiration are @@ -696,7 +1035,10 @@ nd6_timer(void *arg) prelist_remove(pr); } } - splx(s); + + callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz, + nd6_timer, curvnet); + CURVNET_RESTORE(); } @@ -748,11 +1090,10 @@ regen_tmpaddr(struct in6_ifaddr *ia6) * address with the prefix. */ if (!IFA6_IS_DEPRECATED(it6)) - public_ifa6 = it6; - - if (public_ifa6 != NULL) - ifa_ref(&public_ifa6->ia_ifa); + public_ifa6 = it6; } + if (public_ifa6 != NULL) + ifa_ref(&public_ifa6->ia_ifa); IF_ADDR_RUNLOCK(ifp); if (public_ifa6 != NULL) { @@ -772,35 +1113,43 @@ regen_tmpaddr(struct in6_ifaddr *ia6) } /* - * Nuke neighbor cache/prefix/default router management table, right before - * ifp goes away. + * Remove prefix and default router list entries corresponding to ifp. Neighbor + * cache entries are freed in in6_domifdetach(). */ void nd6_purge(struct ifnet *ifp) { + struct nd_drhead drq; struct nd_defrouter *dr, *ndr; struct nd_prefix *pr, *npr; + TAILQ_INIT(&drq); + /* * Nuke default router list entries toward ifp. * We defer removal of default router list entries that is installed * in the routing table, in order to keep additional side effects as * small as possible. */ + ND6_WLOCK(); TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) { if (dr->installed) continue; - if (dr->ifp == ifp) - defrtrlist_del(dr); + defrouter_unlink(dr, &drq); } TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) { if (!dr->installed) continue; - if (dr->ifp == ifp) - defrtrlist_del(dr); + defrouter_unlink(dr, &drq); + } + ND6_WUNLOCK(); + + while ((dr = TAILQ_FIRST(&drq)) != NULL) { + TAILQ_REMOVE(&drq, dr, dr_entry); + defrouter_del(dr); } /* Nuke prefix list entries toward ifp */ @@ -814,14 +1163,6 @@ nd6_purge(struct ifnet *ifp) */ pr->ndpr_refcnt = 0; - /* - * Previously, pr->ndpr_addr is removed as well, - * but I strongly believe we don't have to do it. - * nd6_purge() is only called from in6_ifdetach(), - * which removes all the associated interface addresses - * by itself. - * (jinmei@kame.net 20010129) - */ prelist_remove(pr); } } @@ -834,14 +1175,6 @@ nd6_purge(struct ifnet *ifp) /* Refresh default router list. */ defrouter_select(); } - - /* XXXXX - * We do not nuke the neighbor cache entries here any more - * because the neighbor cache is kept in if_afdata[AF_INET6]. - * nd6_purge() is invoked by in6_ifdetach() which is called - * from if_detach() where everything gets purged. So let - * in6_domifdetach() do the actual L2 table purging work. - */ } /* @@ -849,11 +1182,10 @@ nd6_purge(struct ifnet *ifp) * Returns the llentry locked */ struct llentry * -nd6_lookup(struct in6_addr *addr6, int flags, struct ifnet *ifp) +nd6_lookup(const struct in6_addr *addr6, int flags, struct ifnet *ifp) { struct sockaddr_in6 sin6; struct llentry *ln; - int llflags; bzero(&sin6, sizeof(sin6)); sin6.sin6_len = sizeof(struct sockaddr_in6); @@ -862,16 +1194,26 @@ nd6_lookup(struct in6_addr *addr6, int flags, struct ifnet *ifp) IF_AFDATA_LOCK_ASSERT(ifp); - llflags = 0; - if (flags & ND6_CREATE) - llflags |= LLE_CREATE; - if (flags & ND6_EXCLUSIVE) - llflags |= LLE_EXCLUSIVE; - - ln = lla_lookup(LLTABLE6(ifp), llflags, (struct sockaddr *)&sin6); - if ((ln != NULL) && (llflags & LLE_CREATE)) + ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)&sin6); + + return (ln); +} + +struct llentry * +nd6_alloc(const struct in6_addr *addr6, int flags, struct ifnet *ifp) +{ + struct sockaddr_in6 sin6; + struct llentry *ln; + + bzero(&sin6, sizeof(sin6)); + sin6.sin6_len = sizeof(struct sockaddr_in6); + sin6.sin6_family = AF_INET6; + sin6.sin6_addr = *addr6; + + ln = lltable_alloc_entry(LLTABLE6(ifp), 0, (struct sockaddr *)&sin6); + if (ln != NULL) ln->ln_state = ND6_LLINFO_NOSTATE; - + return (ln); } @@ -881,10 +1223,14 @@ nd6_lookup(struct in6_addr *addr6, int flags, struct ifnet *ifp) * to not reenter the routing code from within itself. */ static int -nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp) +nd6_is_new_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp) { struct nd_prefix *pr; struct ifaddr *dstaddr; + struct rt_addrinfo info; + struct sockaddr_in6 rt_key; + struct sockaddr *dst6; + int fibnum; /* * A link-local address is always a neighbor. @@ -909,6 +1255,13 @@ nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp) return (0); } + bzero(&rt_key, sizeof(rt_key)); + bzero(&info, sizeof(info)); + info.rti_info[RTAX_DST] = (struct sockaddr *)&rt_key; + + /* Always use the default FIB here. XXME - why? */ + fibnum = RT_DEFAULT_FIB; + /* * If the address matches one of our addresses, * it should be a neighbor. @@ -920,12 +1273,13 @@ nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp) continue; if (!(pr->ndpr_stateflags & NDPRF_ONLINK)) { - struct rtentry *rt; /* Always use the default FIB here. */ - rt = in6_rtalloc1((struct sockaddr *)&pr->ndpr_prefix, - 0, 0, RT_DEFAULT_FIB); - if (rt == NULL) + dst6 = (struct sockaddr *)&pr->ndpr_prefix; + + /* Restore length field before retrying lookup */ + rt_key.sin6_len = sizeof(rt_key); + if (rib_lookup_info(fibnum, dst6, 0, 0, &info) != 0) continue; /* * This is the case where multiple interfaces @@ -938,11 +1292,8 @@ nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp) * differ. */ if (!IN6_ARE_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr, - &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr)) { - RTFREE_LOCKED(rt); + &rt_key.sin6_addr)) continue; - } - RTFREE_LOCKED(rt); } if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr, @@ -954,7 +1305,7 @@ nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp) * If the address is assigned on the node of the other side of * a p2p interface, the address should be a neighbor. */ - dstaddr = ifa_ifwithdstaddr((struct sockaddr *)addr); + dstaddr = ifa_ifwithdstaddr((const struct sockaddr *)addr, RT_ALL_FIBS); if (dstaddr != NULL) { if (dstaddr->ifa_ifp == ifp) { ifa_free(dstaddr); @@ -982,7 +1333,7 @@ nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp) * XXX: should take care of the destination of a p2p link? */ int -nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp) +nd6_is_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp) { struct llentry *lle; int rc = 0; @@ -1009,15 +1360,31 @@ nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp) * Since the function would cause significant changes in the kernel, DO NOT * make it global, unless you have a strong reason for the change, and are sure * that the change is safe. + * + * Set noinline to be dtrace-friendly */ -static struct llentry * -nd6_free(struct llentry *ln, int gc) +static __noinline void +nd6_free(struct llentry **lnp, int gc) { - struct llentry *next; - struct nd_defrouter *dr; struct ifnet *ifp; + struct llentry *ln; + struct nd_defrouter *dr; + + ln = *lnp; + *lnp = NULL; LLE_WLOCK_ASSERT(ln); + ND6_RLOCK_ASSERT(); + + ifp = lltable_get_ifp(ln->lle_tbl); + if ((ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) != 0) + dr = defrouter_lookup_locked(&ln->r_l3addr.addr6, ifp); + else + dr = NULL; + ND6_RUNLOCK(); + + if ((ln->la_flags & LLE_DELETED) == 0) + EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED); /* * we used to have pfctlinput(PRC_HOSTDEAD) here. @@ -1027,11 +1394,7 @@ nd6_free(struct llentry *ln, int gc) /* cancel timer */ nd6_llinfo_settimer_locked(ln, -1); - ifp = ln->lle_tbl->llt_ifp; - if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) { - dr = defrouter_lookup(&L3_ADDR_SIN6(ln)->sin6_addr, ifp); - if (dr != NULL && dr->expire && ln->ln_state == ND6_LLINFO_STALE && gc) { /* @@ -1046,17 +1409,17 @@ nd6_free(struct llentry *ln, int gc) * XXX: the check for ln_state would be redundant, * but we intentionally keep it just in case. */ - if (dr->expire > time_second) + if (dr->expire > time_uptime) nd6_llinfo_settimer_locked(ln, - (dr->expire - time_second) * hz); + (dr->expire - time_uptime) * hz); else nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); - next = LIST_NEXT(ln, lle_next); LLE_REMREF(ln); LLE_WUNLOCK(ln); - return (next); + defrouter_rele(dr); + return; } if (dr) { @@ -1091,7 +1454,7 @@ nd6_free(struct llentry *ln, int gc) * is in the Default Router List. * See a corresponding comment in nd6_na_input(). */ - rt6_flush(&L3_ADDR_SIN6(ln)->sin6_addr, ifp); + rt6_flush(&ln->r_l3addr.addr6, ifp); } if (dr) { @@ -1109,83 +1472,66 @@ nd6_free(struct llentry *ln, int gc) defrouter_select(); } + /* + * If this entry was added by an on-link redirect, remove the + * corresponding host route. + */ + if (ln->la_flags & LLE_REDIRECT) + nd6_free_redirect(ln); + if (ln->ln_router || dr) LLE_WLOCK(ln); } /* - * Before deleting the entry, remember the next entry as the - * return value. We need this because pfxlist_onlink_check() above - * might have freed other entries (particularly the old next entry) as - * a side effect (XXX). - */ - next = LIST_NEXT(ln, lle_next); - - /* * Save to unlock. We still hold an extra reference and will not * free(9) in llentry_free() if someone else holds one as well. */ LLE_WUNLOCK(ln); IF_AFDATA_LOCK(ifp); LLE_WLOCK(ln); - /* Guard against race with other llentry_free(). */ if (ln->la_flags & LLE_LINKED) { + /* Remove callout reference */ LLE_REMREF(ln); - llentry_free(ln); - } else - LLE_FREE_LOCKED(ln); - + lltable_unlink_entry(ln->lle_tbl, ln); + } IF_AFDATA_UNLOCK(ifp); - return (next); + llentry_free(ln); + if (dr != NULL) + defrouter_rele(dr); } -/* - * Upper-layer reachability hint for Neighbor Unreachability Detection. - * - * XXX cost-effective methods? - */ -void -nd6_nud_hint(struct rtentry *rt, struct in6_addr *dst6, int force) +static int +nd6_isdynrte(const struct rtentry *rt, void *xap) { - struct llentry *ln; - struct ifnet *ifp; - if ((dst6 == NULL) || (rt == NULL)) - return; - - ifp = rt->rt_ifp; - IF_AFDATA_RLOCK(ifp); - ln = nd6_lookup(dst6, ND6_EXCLUSIVE, NULL); - IF_AFDATA_RUNLOCK(ifp); - if (ln == NULL) - return; + if (rt->rt_flags == (RTF_UP | RTF_HOST | RTF_DYNAMIC)) + return (1); - if (ln->ln_state < ND6_LLINFO_REACHABLE) - goto done; + return (0); +} +/* + * Remove the rtentry for the given llentry, + * both of which were installed by a redirect. + */ +static void +nd6_free_redirect(const struct llentry *ln) +{ + int fibnum; + struct sockaddr_in6 sin6; + struct rt_addrinfo info; - /* - * if we get upper-layer reachability confirmation many times, - * it is possible we have false information. - */ - if (!force) { - ln->ln_byhint++; - if (ln->ln_byhint > V_nd6_maxnudhint) { - goto done; - } - } + lltable_fill_sa_entry(ln, (struct sockaddr *)&sin6); + memset(&info, 0, sizeof(info)); + info.rti_info[RTAX_DST] = (struct sockaddr *)&sin6; + info.rti_filter = nd6_isdynrte; - ln->ln_state = ND6_LLINFO_REACHABLE; - if (!ND6_LLINFO_PERMANENT(ln)) { - nd6_llinfo_settimer_locked(ln, - (long)ND_IFINFO(rt->rt_ifp)->reachable * hz); - } -done: - LLE_WUNLOCK(ln); + for (fibnum = 0; fibnum < rt_numfibs; fibnum++) + rtrequest1_fib(RTM_DELETE, &info, NULL, fibnum); } - /* * Rejuvenate this function for routing operations related * processing. @@ -1197,7 +1543,6 @@ nd6_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info) struct nd_defrouter *dr; struct ifnet *ifp; - RT_LOCK_ASSERT(rt); gateway = (struct sockaddr_in6 *)rt->rt_gateway; ifp = rt->rt_ifp; @@ -1216,12 +1561,13 @@ nd6_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info) /* * check for default route */ - if (IN6_ARE_ADDR_EQUAL(&in6addr_any, - &SIN6(rt_key(rt))->sin6_addr)) { - + if (IN6_ARE_ADDR_EQUAL(&in6addr_any, + &SIN6(rt_key(rt))->sin6_addr)) { dr = defrouter_lookup(&gateway->sin6_addr, ifp); - if (dr != NULL) + if (dr != NULL) { dr->installed = 0; + defrouter_rele(dr); + } } break; } @@ -1231,100 +1577,14 @@ nd6_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info) int nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) { - struct in6_drlist *drl = (struct in6_drlist *)data; - struct in6_oprlist *oprl = (struct in6_oprlist *)data; struct in6_ndireq *ndi = (struct in6_ndireq *)data; struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data; struct in6_ndifreq *ndif = (struct in6_ndifreq *)data; - struct nd_defrouter *dr; - struct nd_prefix *pr; - int i = 0, error = 0; - int s; + int error = 0; + if (ifp->if_afdata[AF_INET6] == NULL) + return (EPFNOSUPPORT); switch (cmd) { - case SIOCGDRLST_IN6: - /* - * obsolete API, use sysctl under net.inet6.icmp6 - */ - bzero(drl, sizeof(*drl)); - s = splnet(); - TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { - if (i >= DRLSTSIZ) - break; - drl->defrouter[i].rtaddr = dr->rtaddr; - in6_clearscope(&drl->defrouter[i].rtaddr); - - drl->defrouter[i].flags = dr->flags; - drl->defrouter[i].rtlifetime = dr->rtlifetime; - drl->defrouter[i].expire = dr->expire; - drl->defrouter[i].if_index = dr->ifp->if_index; - i++; - } - splx(s); - break; - case SIOCGPRLST_IN6: - /* - * obsolete API, use sysctl under net.inet6.icmp6 - * - * XXX the structure in6_prlist was changed in backward- - * incompatible manner. in6_oprlist is used for SIOCGPRLST_IN6, - * in6_prlist is used for nd6_sysctl() - fill_prlist(). - */ - /* - * XXX meaning of fields, especialy "raflags", is very - * differnet between RA prefix list and RR/static prefix list. - * how about separating ioctls into two? - */ - bzero(oprl, sizeof(*oprl)); - s = splnet(); - LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { - struct nd_pfxrouter *pfr; - int j; - - if (i >= PRLSTSIZ) - break; - oprl->prefix[i].prefix = pr->ndpr_prefix.sin6_addr; - oprl->prefix[i].raflags = pr->ndpr_raf; - oprl->prefix[i].prefixlen = pr->ndpr_plen; - oprl->prefix[i].vltime = pr->ndpr_vltime; - oprl->prefix[i].pltime = pr->ndpr_pltime; - oprl->prefix[i].if_index = pr->ndpr_ifp->if_index; - if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME) - oprl->prefix[i].expire = 0; - else { - time_t maxexpire; - - /* XXX: we assume time_t is signed. */ - maxexpire = (-1) & - ~((time_t)1 << - ((sizeof(maxexpire) * 8) - 1)); - if (pr->ndpr_vltime < - maxexpire - pr->ndpr_lastupdate) { - oprl->prefix[i].expire = - pr->ndpr_lastupdate + - pr->ndpr_vltime; - } else - oprl->prefix[i].expire = maxexpire; - } - - j = 0; - LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) { - if (j < DRLSTSIZ) { -#define RTRADDR oprl->prefix[i].advrtr[j] - RTRADDR = pfr->router->rtaddr; - in6_clearscope(&RTRADDR); -#undef RTRADDR - } - j++; - } - oprl->prefix[i].advrtrs = j; - oprl->prefix[i].origin = PR_ORIG_RA; - - i++; - } - splx(s); - - break; case OSIOCGIFINFO_IN6: #define ND ndi->ndi /* XXX: old ndp(8) assumes a positive value for linkmtu. */ @@ -1344,7 +1604,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) case SIOCSIFINFO_IN6: /* * used to change host variables from userland. - * intented for a use on router to reflect RA configurations. + * intended for a use on router to reflect RA configurations. */ /* 0 means 'unspecified' */ if (ND.linkmtu != 0) { @@ -1384,22 +1644,19 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) * do not clear ND6_IFF_IFDISABLED. * See RFC 4862, Section 5.4.5. */ - int duplicated_linklocal = 0; - IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ia = (struct in6_ifaddr *)ifa; if ((ia->ia6_flags & IN6_IFF_DUPLICATED) && - IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) { - duplicated_linklocal = 1; + IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) break; - } } IF_ADDR_RUNLOCK(ifp); - if (duplicated_linklocal) { + if (ifa != NULL) { + /* LLA is duplicated. */ ND.flags |= ND6_IFF_IFDISABLED; log(LOG_ERR, "Cannot enable an interface" " with a link-local address marked" @@ -1415,14 +1672,19 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) /* Mark all IPv6 address as tentative. */ ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED; - IF_ADDR_RLOCK(ifp); - TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - if (ifa->ifa_addr->sa_family != AF_INET6) - continue; - ia = (struct in6_ifaddr *)ifa; - ia->ia6_flags |= IN6_IFF_TENTATIVE; + if (V_ip6_dad_count > 0 && + (ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD) == 0) { + IF_ADDR_RLOCK(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, + ifa_link) { + if (ifa->ifa_addr->sa_family != + AF_INET6) + continue; + ia = (struct in6_ifaddr *)ifa; + ia->ia6_flags |= IN6_IFF_TENTATIVE; + } + IF_ADDR_RUNLOCK(ifp); } - IF_ADDR_RUNLOCK(ifp); } if (ND.flags & ND6_IFF_AUTO_LINKLOCAL) { @@ -1440,20 +1702,19 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) * address is assigned, and IFF_UP, try to * assign one. */ - int haslinklocal = 0; - IF_ADDR_RLOCK(ifp); - TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - if (ifa->ifa_addr->sa_family != AF_INET6) + TAILQ_FOREACH(ifa, &ifp->if_addrhead, + ifa_link) { + if (ifa->ifa_addr->sa_family != + AF_INET6) continue; ia = (struct in6_ifaddr *)ifa; - if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) { - haslinklocal = 1; + if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) break; - } } IF_ADDR_RUNLOCK(ifp); - if (!haslinklocal) + if (ifa != NULL) + /* No LLA is configured. */ in6_ifattach(ifp, NULL); } } @@ -1471,7 +1732,6 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) /* flush all the prefix advertised by routers */ struct nd_prefix *pr, *next; - s = splnet(); LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, next) { struct in6_ifaddr *ia, *ia_next; @@ -1490,21 +1750,28 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) } prelist_remove(pr); } - splx(s); break; } case SIOCSRTRFLUSH_IN6: { /* flush all the default routers */ - struct nd_defrouter *dr, *next; + struct nd_drhead drq; + struct nd_defrouter *dr; + + TAILQ_INIT(&drq); - s = splnet(); defrouter_reset(); - TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, next) { - defrtrlist_del(dr); + + ND6_WLOCK(); + while ((dr = TAILQ_FIRST(&V_nd_defrouter)) != NULL) + defrouter_unlink(dr, &drq); + ND6_WUNLOCK(); + while ((dr = TAILQ_FIRST(&drq)) != NULL) { + TAILQ_REMOVE(&drq, dr, dr_entry); + defrouter_del(dr); } + defrouter_select(); - splx(s); break; } case SIOCGNBRINFO_IN6: @@ -1526,7 +1793,11 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) nbi->state = ln->ln_state; nbi->asked = ln->la_asked; nbi->isrouter = ln->ln_router; - nbi->expire = ln->la_expire; + if (ln->la_expire == 0) + nbi->expire = 0; + else + nbi->expire = ln->la_expire + ln->lle_remtime / hz + + (time_second - time_uptime); LLE_RUNLOCK(ln); break; } @@ -1540,31 +1811,108 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) } /* + * Calculates new isRouter value based on provided parameters and + * returns it. + */ +static int +nd6_is_router(int type, int code, int is_new, int old_addr, int new_addr, + int ln_router) +{ + + /* + * ICMP6 type dependent behavior. + * + * NS: clear IsRouter if new entry + * RS: clear IsRouter + * RA: set IsRouter if there's lladdr + * redir: clear IsRouter if new entry + * + * RA case, (1): + * The spec says that we must set IsRouter in the following cases: + * - If lladdr exist, set IsRouter. This means (1-5). + * - If it is old entry (!newentry), set IsRouter. This means (7). + * So, based on the spec, in (1-5) and (7) cases we must set IsRouter. + * A quetion arises for (1) case. (1) case has no lladdr in the + * neighbor cache, this is similar to (6). + * This case is rare but we figured that we MUST NOT set IsRouter. + * + * is_new old_addr new_addr NS RS RA redir + * D R + * 0 n n (1) c ? s + * 0 y n (2) c s s + * 0 n y (3) c s s + * 0 y y (4) c s s + * 0 y y (5) c s s + * 1 -- n (6) c c c s + * 1 -- y (7) c c s c s + * + * (c=clear s=set) + */ + switch (type & 0xff) { + case ND_NEIGHBOR_SOLICIT: + /* + * New entry must have is_router flag cleared. + */ + if (is_new) /* (6-7) */ + ln_router = 0; + break; + case ND_REDIRECT: + /* + * If the icmp is a redirect to a better router, always set the + * is_router flag. Otherwise, if the entry is newly created, + * clear the flag. [RFC 2461, sec 8.3] + */ + if (code == ND_REDIRECT_ROUTER) + ln_router = 1; + else { + if (is_new) /* (6-7) */ + ln_router = 0; + } + break; + case ND_ROUTER_SOLICIT: + /* + * is_router flag must always be cleared. + */ + ln_router = 0; + break; + case ND_ROUTER_ADVERT: + /* + * Mark an entry with lladdr as a router. + */ + if ((!is_new && (old_addr || new_addr)) || /* (2-5) */ + (is_new && new_addr)) { /* (7) */ + ln_router = 1; + } + break; + } + + return (ln_router); +} + +/* * Create neighbor cache entry and cache link-layer address, * on reception of inbound ND6 packets. (RS/RA/NS/redirect) * * type - ICMP6 type * code - type dependent information * - * XXXXX - * The caller of this function already acquired the ndp - * cache table lock because the cache entry is returned. */ -struct llentry * +void nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr, int lladdrlen, int type, int code) { - struct llentry *ln = NULL; + struct llentry *ln = NULL, *ln_tmp; int is_newentry; int do_update; int olladdr; int llchange; int flags; - int newstate = 0; uint16_t router = 0; struct sockaddr_in6 sin6; struct mbuf *chain = NULL; - int static_route = 0; + u_char linkhdr[LLE_MAX_LINKHDR]; + size_t linkhdrsize; + int lladdr_off; IF_AFDATA_UNLOCK_ASSERT(ifp); @@ -1573,7 +1921,7 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr, /* nothing must be updated for unspecified address */ if (IN6_IS_ADDR_UNSPECIFIED(from)) - return NULL; + return; /* * Validation about ifp->if_addrlen and lladdrlen must be done in @@ -1584,197 +1932,122 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr, * Spec says nothing in sections for RA, RS and NA. There's small * description on it in NS section (RFC 2461 7.2.3). */ - flags = lladdr ? ND6_EXCLUSIVE : 0; + flags = lladdr ? LLE_EXCLUSIVE : 0; IF_AFDATA_RLOCK(ifp); ln = nd6_lookup(from, flags, ifp); IF_AFDATA_RUNLOCK(ifp); + is_newentry = 0; if (ln == NULL) { - flags |= ND6_EXCLUSIVE; - IF_AFDATA_LOCK(ifp); - ln = nd6_lookup(from, flags | ND6_CREATE, ifp); - IF_AFDATA_UNLOCK(ifp); - is_newentry = 1; - } else { - /* do nothing if static ndp is set */ - if (ln->la_flags & LLE_STATIC) { - static_route = 1; - goto done; + flags |= LLE_EXCLUSIVE; + ln = nd6_alloc(from, 0, ifp); + if (ln == NULL) + return; + + /* + * Since we already know all the data for the new entry, + * fill it before insertion. + */ + if (lladdr != NULL) { + linkhdrsize = sizeof(linkhdr); + if (lltable_calc_llheader(ifp, AF_INET6, lladdr, + linkhdr, &linkhdrsize, &lladdr_off) != 0) + return; + lltable_set_entry_addr(ifp, ln, linkhdr, linkhdrsize, + lladdr_off); } - is_newentry = 0; + + IF_AFDATA_WLOCK(ifp); + LLE_WLOCK(ln); + /* Prefer any existing lle over newly-created one */ + ln_tmp = nd6_lookup(from, LLE_EXCLUSIVE, ifp); + if (ln_tmp == NULL) + lltable_link_entry(LLTABLE6(ifp), ln); + IF_AFDATA_WUNLOCK(ifp); + if (ln_tmp == NULL) { + /* No existing lle, mark as new entry (6,7) */ + is_newentry = 1; + nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); + if (lladdr != NULL) /* (7) */ + EVENTHANDLER_INVOKE(lle_event, ln, + LLENTRY_RESOLVED); + } else { + lltable_free_entry(LLTABLE6(ifp), ln); + ln = ln_tmp; + ln_tmp = NULL; + } + } + /* do nothing if static ndp is set */ + if ((ln->la_flags & LLE_STATIC)) { + if (flags & LLE_EXCLUSIVE) + LLE_WUNLOCK(ln); + else + LLE_RUNLOCK(ln); + return; } - if (ln == NULL) - return (NULL); olladdr = (ln->la_flags & LLE_VALID) ? 1 : 0; if (olladdr && lladdr) { - llchange = bcmp(lladdr, &ln->ll_addr, + llchange = bcmp(lladdr, ln->ll_addr, ifp->if_addrlen); - } else + } else if (!olladdr && lladdr) + llchange = 1; + else llchange = 0; /* * newentry olladdr lladdr llchange (*=record) * 0 n n -- (1) * 0 y n -- (2) - * 0 n y -- (3) * STALE + * 0 n y y (3) * STALE * 0 y y n (4) * * 0 y y y (5) * STALE * 1 -- n -- (6) NOSTATE(= PASSIVE) * 1 -- y -- (7) * STALE */ - if (lladdr) { /* (3-5) and (7) */ + do_update = 0; + if (is_newentry == 0 && llchange != 0) { + do_update = 1; /* (3,5) */ + /* * Record source link-layer address * XXX is it dependent to ifp->if_type? */ - bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen); - ln->la_flags |= LLE_VALID; - EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); - } - - if (!is_newentry) { - if ((!olladdr && lladdr != NULL) || /* (3) */ - (olladdr && lladdr != NULL && llchange)) { /* (5) */ - do_update = 1; - newstate = ND6_LLINFO_STALE; - } else /* (1-2,4) */ - do_update = 0; - } else { - do_update = 1; - if (lladdr == NULL) /* (6) */ - newstate = ND6_LLINFO_NOSTATE; - else /* (7) */ - newstate = ND6_LLINFO_STALE; - } - - if (do_update) { - /* - * Update the state of the neighbor cache. - */ - ln->ln_state = newstate; + linkhdrsize = sizeof(linkhdr); + if (lltable_calc_llheader(ifp, AF_INET6, lladdr, + linkhdr, &linkhdrsize, &lladdr_off) != 0) + return; - if (ln->ln_state == ND6_LLINFO_STALE) { - /* - * XXX: since nd6_output() below will cause - * state tansition to DELAY and reset the timer, - * we must set the timer now, although it is actually - * meaningless. - */ - nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); + if (lltable_try_set_entry_addr(ifp, ln, linkhdr, linkhdrsize, + lladdr_off) == 0) { + /* Entry was deleted */ + return; + } - if (ln->la_hold) { - struct mbuf *m_hold, *m_hold_next; + nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); - /* - * reset the la_hold in advance, to explicitly - * prevent a la_hold lookup in nd6_output() - * (wouldn't happen, though...) - */ - for (m_hold = ln->la_hold, ln->la_hold = NULL; - m_hold; m_hold = m_hold_next) { - m_hold_next = m_hold->m_nextpkt; - m_hold->m_nextpkt = NULL; + EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); - /* - * we assume ifp is not a p2p here, so - * just set the 2nd argument as the - * 1st one. - */ - nd6_output_lle(ifp, ifp, m_hold, L3_ADDR_SIN6(ln), NULL, ln, &chain); - } - /* - * If we have mbufs in the chain we need to do - * deferred transmit. Copy the address from the - * llentry before dropping the lock down below. - */ - if (chain != NULL) - memcpy(&sin6, L3_ADDR_SIN6(ln), sizeof(sin6)); - } - } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) { - /* probe right away */ - nd6_llinfo_settimer_locked((void *)ln, 0); - } + if (ln->la_hold != NULL) + nd6_grab_holdchain(ln, &chain, &sin6); } - /* - * ICMP6 type dependent behavior. - * - * NS: clear IsRouter if new entry - * RS: clear IsRouter - * RA: set IsRouter if there's lladdr - * redir: clear IsRouter if new entry - * - * RA case, (1): - * The spec says that we must set IsRouter in the following cases: - * - If lladdr exist, set IsRouter. This means (1-5). - * - If it is old entry (!newentry), set IsRouter. This means (7). - * So, based on the spec, in (1-5) and (7) cases we must set IsRouter. - * A quetion arises for (1) case. (1) case has no lladdr in the - * neighbor cache, this is similar to (6). - * This case is rare but we figured that we MUST NOT set IsRouter. - * - * newentry olladdr lladdr llchange NS RS RA redir - * D R - * 0 n n -- (1) c ? s - * 0 y n -- (2) c s s - * 0 n y -- (3) c s s - * 0 y y n (4) c s s - * 0 y y y (5) c s s - * 1 -- n -- (6) c c c s - * 1 -- y -- (7) c c s c s - * - * (c=clear s=set) - */ - switch (type & 0xff) { - case ND_NEIGHBOR_SOLICIT: - /* - * New entry must have is_router flag cleared. - */ - if (is_newentry) /* (6-7) */ - ln->ln_router = 0; - break; - case ND_REDIRECT: - /* - * If the icmp is a redirect to a better router, always set the - * is_router flag. Otherwise, if the entry is newly created, - * clear the flag. [RFC 2461, sec 8.3] - */ - if (code == ND_REDIRECT_ROUTER) - ln->ln_router = 1; - else if (is_newentry) /* (6-7) */ - ln->ln_router = 0; - break; - case ND_ROUTER_SOLICIT: - /* - * is_router flag must always be cleared. - */ - ln->ln_router = 0; - break; - case ND_ROUTER_ADVERT: - /* - * Mark an entry with lladdr as a router. - */ - if ((!is_newentry && (olladdr || lladdr)) || /* (2-5) */ - (is_newentry && lladdr)) { /* (7) */ - ln->ln_router = 1; - } - break; - } + /* Calculates new router status */ + router = nd6_is_router(type, code, is_newentry, olladdr, + lladdr != NULL ? 1 : 0, ln->ln_router); - if (ln != NULL) { - static_route = (ln->la_flags & LLE_STATIC); - router = ln->ln_router; + ln->ln_router = router; + /* Mark non-router redirects with special flag */ + if ((type & 0xFF) == ND_REDIRECT && code != ND_REDIRECT_ROUTER) + ln->la_flags |= LLE_REDIRECT; - if (flags & ND6_EXCLUSIVE) - LLE_WUNLOCK(ln); - else - LLE_RUNLOCK(ln); - if (static_route) - ln = NULL; - } - if (chain) - nd6_output_flush(ifp, ifp, chain, &sin6, NULL); + if (flags & LLE_EXCLUSIVE) + LLE_WUNLOCK(ln); + else + LLE_RUNLOCK(ln); + + if (chain != NULL) + nd6_flush_holdchain(ifp, ifp, chain, &sin6); /* * When the link-layer address of a router changes, select the @@ -1791,25 +2064,13 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr, * for those are not autoconfigured hosts, we explicitly avoid such * cases for safety. */ - if (do_update && router && + if ((do_update || is_newentry) && router && ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) { /* * guaranteed recursion */ defrouter_select(); } - - return (ln); -done: - if (ln != NULL) { - if (flags & ND6_EXCLUSIVE) - LLE_WUNLOCK(ln); - else - LLE_RUNLOCK(ln); - if (static_route) - ln = NULL; - } - return (ln); } static void @@ -1822,7 +2083,9 @@ nd6_slowtimo(void *arg) callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz, nd6_slowtimo, curvnet); IFNET_RLOCK_NOSLEEP(); - TAILQ_FOREACH(ifp, &V_ifnet, if_list) { + TAILQ_FOREACH(ifp, &V_ifnet, if_link) { + if (ifp->if_afdata[AF_INET6] == NULL) + continue; nd6if = ND_IFINFO(ifp); if (nd6if->basereachable && /* already initialized */ (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) { @@ -1840,55 +2103,176 @@ nd6_slowtimo(void *arg) CURVNET_RESTORE(); } -int -nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, - struct sockaddr_in6 *dst, struct rtentry *rt0) +void +nd6_grab_holdchain(struct llentry *ln, struct mbuf **chain, + struct sockaddr_in6 *sin6) { - return (nd6_output_lle(ifp, origifp, m0, dst, rt0, NULL, NULL)); + LLE_WLOCK_ASSERT(ln); + + *chain = ln->la_hold; + ln->la_hold = NULL; + lltable_fill_sa_entry(ln, (struct sockaddr *)sin6); + + if (ln->ln_state == ND6_LLINFO_STALE) { + + /* + * The first time we send a packet to a + * neighbor whose entry is STALE, we have + * to change the state to DELAY and a sets + * a timer to expire in DELAY_FIRST_PROBE_TIME + * seconds to ensure do neighbor unreachability + * detection on expiration. + * (RFC 2461 7.3.3) + */ + nd6_llinfo_setstate(ln, ND6_LLINFO_DELAY); + } } +int +nd6_output_ifp(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m, + struct sockaddr_in6 *dst, struct route *ro) +{ + int error; + int ip6len; + struct ip6_hdr *ip6; + struct m_tag *mtag; + +#ifdef MAC + mac_netinet6_nd6_send(ifp, m); +#endif + + /* + * If called from nd6_ns_output() (NS), nd6_na_output() (NA), + * icmp6_redirect_output() (REDIRECT) or from rip6_output() (RS, RA + * as handled by rtsol and rtadvd), mbufs will be tagged for SeND + * to be diverted to user space. When re-injected into the kernel, + * send_output() will directly dispatch them to the outgoing interface. + */ + if (send_sendso_input_hook != NULL) { + mtag = m_tag_find(m, PACKET_TAG_ND_OUTGOING, NULL); + if (mtag != NULL) { + ip6 = mtod(m, struct ip6_hdr *); + ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen); + /* Use the SEND socket */ + error = send_sendso_input_hook(m, ifp, SND_OUT, + ip6len); + /* -1 == no app on SEND socket */ + if (error == 0 || error != -1) + return (error); + } + } + + m_clrprotoflags(m); /* Avoid confusing lower layers. */ + IP_PROBE(send, NULL, NULL, mtod(m, struct ip6_hdr *), ifp, NULL, + mtod(m, struct ip6_hdr *)); + + if ((ifp->if_flags & IFF_LOOPBACK) == 0) + origifp = ifp; + + error = (*ifp->if_output)(origifp, m, (struct sockaddr *)dst, ro); + return (error); +} /* - * Note that I'm not enforcing any global serialization - * lle state or asked changes here as the logic is too - * complicated to avoid having to always acquire an exclusive - * lock - * KMM + * Lookup link headerfor @sa_dst address. Stores found + * data in @desten buffer. Copy of lle ln_flags can be also + * saved in @pflags if @pflags is non-NULL. + * + * If destination LLE does not exists or lle state modification + * is required, call "slow" version. * + * Return values: + * - 0 on success (address copied to buffer). + * - EWOULDBLOCK (no local error, but address is still unresolved) + * - other errors (alloc failure, etc) */ -#define senderr(e) { error = (e); goto bad;} - int -nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, - struct sockaddr_in6 *dst, struct rtentry *rt0, struct llentry *lle, - struct mbuf **chain) +nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m, + const struct sockaddr *sa_dst, u_char *desten, uint32_t *pflags, + struct llentry **plle) { - struct mbuf *m = m0; - struct m_tag *mtag; - struct llentry *ln = lle; - struct ip6_hdr *ip6; - int error = 0; - int flags = 0; - int ip6len; + struct llentry *ln = NULL; + const struct sockaddr_in6 *dst6; -#ifdef INVARIANTS - if (lle != NULL) { - - LLE_WLOCK_ASSERT(lle); + if (pflags != NULL) + *pflags = 0; + + dst6 = (const struct sockaddr_in6 *)sa_dst; - KASSERT(chain != NULL, (" lle locked but no mbuf chain pointer passed")); + /* discard the packet if IPv6 operation is disabled on the interface */ + if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) { + m_freem(m); + return (ENETDOWN); /* better error? */ } -#endif - if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr)) - goto sendpkt; - if (nd6_need_cache(ifp) == 0) - goto sendpkt; + if (m != NULL && m->m_flags & M_MCAST) { + switch (ifp->if_type) { + case IFT_ETHER: + case IFT_FDDI: + case IFT_L2VLAN: + case IFT_IEEE80211: + case IFT_BRIDGE: + case IFT_ISO88025: + ETHER_MAP_IPV6_MULTICAST(&dst6->sin6_addr, + desten); + return (0); + default: + m_freem(m); + return (EAFNOSUPPORT); + } + } - /* - * next hop determination. This routine is derived from ether_output. - */ + IF_AFDATA_RLOCK(ifp); + ln = nd6_lookup(&dst6->sin6_addr, plle ? LLE_EXCLUSIVE : LLE_UNLOCKED, + ifp); + if (ln != NULL && (ln->r_flags & RLLE_VALID) != 0) { + /* Entry found, let's copy lle info */ + bcopy(ln->r_linkdata, desten, ln->r_hdrlen); + if (pflags != NULL) + *pflags = LLE_VALID | (ln->r_flags & RLLE_IFADDR); + /* Check if we have feedback request from nd6 timer */ + if (ln->r_skip_req != 0) { + LLE_REQ_LOCK(ln); + ln->r_skip_req = 0; /* Notify that entry was used */ + ln->lle_hittime = time_uptime; + LLE_REQ_UNLOCK(ln); + } + if (plle) { + LLE_ADDREF(ln); + *plle = ln; + LLE_WUNLOCK(ln); + } + IF_AFDATA_RUNLOCK(ifp); + return (0); + } else if (plle && ln) + LLE_WUNLOCK(ln); + IF_AFDATA_RUNLOCK(ifp); + + return (nd6_resolve_slow(ifp, 0, m, dst6, desten, pflags, plle)); +} + + +/* + * Do L2 address resolution for @sa_dst address. Stores found + * address in @desten buffer. Copy of lle ln_flags can be also + * saved in @pflags if @pflags is non-NULL. + * + * Heavy version. + * Function assume that destination LLE does not exist, + * is invalid or stale, so LLE_EXCLUSIVE lock needs to be acquired. + * + * Set noinline to be dtrace-friendly + */ +static __noinline int +nd6_resolve_slow(struct ifnet *ifp, int flags, struct mbuf *m, + const struct sockaddr_in6 *dst, u_char *desten, uint32_t *pflags, + struct llentry **plle) +{ + struct llentry *lle = NULL, *lle_tmp; + struct in6_addr *psrc, src; + int send_ns, ll_len; + char *lladdr; /* * Address resolution or Neighbor Unreachability Detection @@ -1896,50 +2280,54 @@ nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, * At this point, the destination of the packet must be a unicast * or an anycast address(i.e. not a multicast). */ - - flags = (lle != NULL) ? LLE_EXCLUSIVE : 0; - if (ln == NULL) { - retry: + if (lle == NULL) { IF_AFDATA_RLOCK(ifp); - ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)dst); + lle = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp); IF_AFDATA_RUNLOCK(ifp); - if ((ln == NULL) && nd6_is_addr_neighbor(dst, ifp)) { + if ((lle == NULL) && nd6_is_addr_neighbor(dst, ifp)) { /* * Since nd6_is_addr_neighbor() internally calls nd6_lookup(), * the condition below is not very efficient. But we believe * it is tolerable, because this should be a rare case. */ - flags = ND6_CREATE | (m ? ND6_EXCLUSIVE : 0); - IF_AFDATA_LOCK(ifp); - ln = nd6_lookup(&dst->sin6_addr, flags, ifp); - IF_AFDATA_UNLOCK(ifp); + lle = nd6_alloc(&dst->sin6_addr, 0, ifp); + if (lle == NULL) { + char ip6buf[INET6_ADDRSTRLEN]; + log(LOG_DEBUG, + "nd6_output: can't allocate llinfo for %s " + "(ln=%p)\n", + ip6_sprintf(ip6buf, &dst->sin6_addr), lle); + m_freem(m); + return (ENOBUFS); + } + + IF_AFDATA_WLOCK(ifp); + LLE_WLOCK(lle); + /* Prefer any existing entry over newly-created one */ + lle_tmp = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp); + if (lle_tmp == NULL) + lltable_link_entry(LLTABLE6(ifp), lle); + IF_AFDATA_WUNLOCK(ifp); + if (lle_tmp != NULL) { + lltable_free_entry(LLTABLE6(ifp), lle); + lle = lle_tmp; + lle_tmp = NULL; + } } } - if (ln == NULL) { - if ((ifp->if_flags & IFF_POINTOPOINT) == 0 && - !(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) { - char ip6buf[INET6_ADDRSTRLEN]; - log(LOG_DEBUG, - "nd6_output: can't allocate llinfo for %s " - "(ln=%p)\n", - ip6_sprintf(ip6buf, &dst->sin6_addr), ln); - senderr(EIO); /* XXX: good error? */ + if (lle == NULL) { + if (!(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) { + m_freem(m); + return (ENOBUFS); } - goto sendpkt; /* send anyway */ - } - /* We don't have to do link-layer address resolution on a p2p link. */ - if ((ifp->if_flags & IFF_POINTOPOINT) != 0 && - ln->ln_state < ND6_LLINFO_REACHABLE) { - if ((flags & LLE_EXCLUSIVE) == 0) { - flags |= LLE_EXCLUSIVE; - LLE_RUNLOCK(ln); - goto retry; - } - ln->ln_state = ND6_LLINFO_STALE; - nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); + if (m != NULL) + m_freem(m); + return (ENOBUFS); } + LLE_WLOCK_ASSERT(lle); + /* * The first time we send a packet to a neighbor whose entry is * STALE, we have to change the state to DELAY and a sets a timer to @@ -1947,49 +2335,46 @@ nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, * neighbor unreachability detection on expiration. * (RFC 2461 7.3.3) */ - if (ln->ln_state == ND6_LLINFO_STALE) { - if ((flags & LLE_EXCLUSIVE) == 0) { - flags |= LLE_EXCLUSIVE; - LLE_RUNLOCK(ln); - goto retry; - } - ln->la_asked = 0; - ln->ln_state = ND6_LLINFO_DELAY; - nd6_llinfo_settimer_locked(ln, (long)V_nd6_delay * hz); - } + if (lle->ln_state == ND6_LLINFO_STALE) + nd6_llinfo_setstate(lle, ND6_LLINFO_DELAY); /* * If the neighbor cache entry has a state other than INCOMPLETE * (i.e. its link-layer address is already resolved), just * send the packet. */ - if (ln->ln_state > ND6_LLINFO_INCOMPLETE) - goto sendpkt; + if (lle->ln_state > ND6_LLINFO_INCOMPLETE) { + if (flags & LLE_ADDRONLY) { + lladdr = lle->ll_addr; + ll_len = ifp->if_addrlen; + } else { + lladdr = lle->r_linkdata; + ll_len = lle->r_hdrlen; + } + bcopy(lladdr, desten, ll_len); + if (pflags != NULL) + *pflags = lle->la_flags; + if (plle) { + LLE_ADDREF(lle); + *plle = lle; + } + LLE_WUNLOCK(lle); + return (0); + } /* * There is a neighbor cache entry, but no ethernet address * response yet. Append this latest packet to the end of the - * packet queue in the mbuf, unless the number of the packet - * does not exceed nd6_maxqueuelen. When it exceeds nd6_maxqueuelen, + * packet queue in the mbuf. When it exceeds nd6_maxqueuelen, * the oldest packet in the queue will be removed. */ - if (ln->ln_state == ND6_LLINFO_NOSTATE) - ln->ln_state = ND6_LLINFO_INCOMPLETE; - - if ((flags & LLE_EXCLUSIVE) == 0) { - flags |= LLE_EXCLUSIVE; - LLE_RUNLOCK(ln); - goto retry; - } - LLE_WLOCK_ASSERT(ln); - - if (ln->la_hold) { + if (lle->la_hold != NULL) { struct mbuf *m_hold; int i; i = 0; - for (m_hold = ln->la_hold; m_hold; m_hold = m_hold->m_nextpkt) { + for (m_hold = lle->la_hold; m_hold; m_hold = m_hold->m_nextpkt){ i++; if (m_hold->m_nextpkt == NULL) { m_hold->m_nextpkt = m; @@ -1997,134 +2382,63 @@ nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, } } while (i >= V_nd6_maxqueuelen) { - m_hold = ln->la_hold; - ln->la_hold = ln->la_hold->m_nextpkt; + m_hold = lle->la_hold; + lle->la_hold = lle->la_hold->m_nextpkt; m_freem(m_hold); i--; } } else { - ln->la_hold = m; + lle->la_hold = m; } /* * If there has been no NS for the neighbor after entering the * INCOMPLETE state, send the first solicitation. + * Note that for newly-created lle la_asked will be 0, + * so we will transition from ND6_LLINFO_NOSTATE to + * ND6_LLINFO_INCOMPLETE state here. */ - if (!ND6_LLINFO_PERMANENT(ln) && ln->la_asked == 0) { - ln->la_asked++; - - nd6_llinfo_settimer_locked(ln, - (long)ND_IFINFO(ifp)->retrans * hz / 1000); - LLE_WUNLOCK(ln); - nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0); - if (lle != NULL && ln == lle) - LLE_WLOCK(lle); - - } else if (lle == NULL || ln != lle) { - /* - * We did the lookup (no lle arg) so we - * need to do the unlock here. - */ - LLE_WUNLOCK(ln); - } - - return (0); - - sendpkt: - /* discard the packet if IPv6 operation is disabled on the interface */ - if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) { - error = ENETDOWN; /* better error? */ - goto bad; - } - /* - * ln is valid and the caller did not pass in - * an llentry - */ - if ((ln != NULL) && (lle == NULL)) { - if (flags & LLE_EXCLUSIVE) - LLE_WUNLOCK(ln); - else - LLE_RUNLOCK(ln); - } - -#ifdef MAC - mac_netinet6_nd6_send(ifp, m); -#endif + psrc = NULL; + send_ns = 0; + if (lle->la_asked == 0) { + lle->la_asked++; + send_ns = 1; + psrc = nd6_llinfo_get_holdsrc(lle, &src); - /* - * If called from nd6_ns_output() (NS), nd6_na_output() (NA), - * icmp6_redirect_output() (REDIRECT) or from rip6_output() (RS, RA - * as handled by rtsol and rtadvd), mbufs will be tagged for SeND - * to be diverted to user space. When re-injected into the kernel, - * send_output() will directly dispatch them to the outgoing interface. - */ - if (send_sendso_input_hook != NULL) { - mtag = m_tag_find(m, PACKET_TAG_ND_OUTGOING, NULL); - if (mtag != NULL) { - ip6 = mtod(m, struct ip6_hdr *); - ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen); - /* Use the SEND socket */ - error = send_sendso_input_hook(m, ifp, SND_OUT, - ip6len); - /* -1 == no app on SEND socket */ - if (error == 0 || error != -1) - return (error); - } + nd6_llinfo_setstate(lle, ND6_LLINFO_INCOMPLETE); } + LLE_WUNLOCK(lle); + if (send_ns != 0) + nd6_ns_output(ifp, psrc, NULL, &dst->sin6_addr, NULL); - /* - * We were passed in a pointer to an lle with the lock held - * this means that we can't call if_output as we will - * recurse on the lle lock - so what we do is we create - * a list of mbufs to send and transmit them in the caller - * after the lock is dropped - */ - if (lle != NULL) { - if (*chain == NULL) - *chain = m; - else { - struct mbuf *mb; + return (EWOULDBLOCK); +} - /* - * append mbuf to end of deferred chain - */ - mb = *chain; - while (mb->m_nextpkt != NULL) - mb = mb->m_nextpkt; - mb->m_nextpkt = m; - } - return (error); - } - /* Reset layer specific mbuf flags to avoid confusing lower layers. */ - m->m_flags &= ~(M_PROTOFLAGS); - if ((ifp->if_flags & IFF_LOOPBACK) != 0) { - return ((*ifp->if_output)(origifp, m, (struct sockaddr *)dst, - NULL)); - } - error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, NULL); - return (error); +/* + * Do L2 address resolution for @sa_dst address. Stores found + * address in @desten buffer. Copy of lle ln_flags can be also + * saved in @pflags if @pflags is non-NULL. + * + * Return values: + * - 0 on success (address copied to buffer). + * - EWOULDBLOCK (no local error, but address is still unresolved) + * - other errors (alloc failure, etc) + */ +int +nd6_resolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst, + char *desten, uint32_t *pflags) +{ + int error; - bad: - /* - * ln is valid and the caller did not pass in - * an llentry - */ - if ((ln != NULL) && (lle == NULL)) { - if (flags & LLE_EXCLUSIVE) - LLE_WUNLOCK(ln); - else - LLE_RUNLOCK(ln); - } - if (m) - m_freem(m); + flags |= LLE_ADDRONLY; + error = nd6_resolve_slow(ifp, flags, NULL, + (const struct sockaddr_in6 *)dst, desten, pflags, NULL); return (error); } -#undef senderr - int -nd6_output_flush(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain, - struct sockaddr_in6 *dst, struct route *ro) +nd6_flush_holdchain(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain, + struct sockaddr_in6 *dst) { struct mbuf *m, *m_head; struct ifnet *outifp; @@ -2139,20 +2453,17 @@ nd6_output_flush(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain, while (m_head) { m = m_head; m_head = m_head->m_nextpkt; - error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, ro); + error = nd6_output_ifp(ifp, origifp, m, dst, NULL); } /* * XXX - * note that intermediate errors are blindly ignored - but this is - * the same convention as used with nd6_output when called by - * nd6_cache_lladdr + * note that intermediate errors are blindly ignored */ return (error); } - -int +static int nd6_need_cache(struct ifnet *ifp) { /* @@ -2167,19 +2478,9 @@ nd6_need_cache(struct ifnet *ifp) case IFT_ETHER: case IFT_FDDI: case IFT_IEEE1394: -#ifdef IFT_L2VLAN case IFT_L2VLAN: -#endif -#ifdef IFT_IEEE80211 case IFT_IEEE80211: -#endif -#ifdef IFT_CARP - case IFT_CARP: -#endif case IFT_INFINIBAND: - case IFT_GIF: /* XXX need more cases? */ - case IFT_PPP: - case IFT_TUNNEL: case IFT_BRIDGE: case IFT_PROPVIRTUAL: return (1); @@ -2189,75 +2490,76 @@ nd6_need_cache(struct ifnet *ifp) } /* - * the callers of this function need to be re-worked to drop - * the lle lock, drop here for now + * Add pernament ND6 link-layer record for given + * interface address. + * + * Very similar to IPv4 arp_ifinit(), but: + * 1) IPv6 DAD is performed in different place + * 2) It is called by IPv6 protocol stack in contrast to + * arp_ifinit() which is typically called in SIOCSIFADDR + * driver ioctl handler. + * */ int -nd6_storelladdr(struct ifnet *ifp, struct mbuf *m, - struct sockaddr *dst, u_char *desten, struct llentry **lle) +nd6_add_ifa_lle(struct in6_ifaddr *ia) { - struct llentry *ln; + struct ifnet *ifp; + struct llentry *ln, *ln_tmp; + struct sockaddr *dst; - *lle = NULL; - IF_AFDATA_UNLOCK_ASSERT(ifp); - if (m != NULL && m->m_flags & M_MCAST) { - int i; + ifp = ia->ia_ifa.ifa_ifp; + if (nd6_need_cache(ifp) == 0) + return (0); - switch (ifp->if_type) { - case IFT_ETHER: - case IFT_FDDI: -#ifdef IFT_L2VLAN - case IFT_L2VLAN: -#endif -#ifdef IFT_IEEE80211 - case IFT_IEEE80211: -#endif - case IFT_BRIDGE: - case IFT_ISO88025: - ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr, - desten); - return (0); - case IFT_IEEE1394: - /* - * netbsd can use if_broadcastaddr, but we don't do so - * to reduce # of ifdef. - */ - for (i = 0; i < ifp->if_addrlen; i++) - desten[i] = ~0; - return (0); - case IFT_ARCNET: - *desten = 0; - return (0); - default: - m_freem(m); - return (EAFNOSUPPORT); - } - } + ia->ia_ifa.ifa_rtrequest = nd6_rtrequest; + dst = (struct sockaddr *)&ia->ia_addr; + ln = lltable_alloc_entry(LLTABLE6(ifp), LLE_IFADDR, dst); + if (ln == NULL) + return (ENOBUFS); + IF_AFDATA_WLOCK(ifp); + LLE_WLOCK(ln); + /* Unlink any entry if exists */ + ln_tmp = lla_lookup(LLTABLE6(ifp), LLE_EXCLUSIVE, dst); + if (ln_tmp != NULL) + lltable_unlink_entry(LLTABLE6(ifp), ln_tmp); + lltable_link_entry(LLTABLE6(ifp), ln); + IF_AFDATA_WUNLOCK(ifp); - /* - * the entry should have been created in nd6_store_lladdr - */ - IF_AFDATA_RLOCK(ifp); - ln = lla_lookup(LLTABLE6(ifp), 0, dst); - IF_AFDATA_RUNLOCK(ifp); - if ((ln == NULL) || !(ln->la_flags & LLE_VALID)) { - if (ln != NULL) - LLE_RUNLOCK(ln); - /* this could happen, if we could not allocate memory */ - m_freem(m); - return (1); - } + if (ln_tmp != NULL) + EVENTHANDLER_INVOKE(lle_event, ln_tmp, LLENTRY_EXPIRED); + EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); + + LLE_WUNLOCK(ln); + if (ln_tmp != NULL) + llentry_free(ln_tmp); - bcopy(&ln->ll_addr, desten, ifp->if_addrlen); - *lle = ln; - LLE_RUNLOCK(ln); - /* - * A *small* use after free race exists here - */ return (0); } +/* + * Removes either all lle entries for given @ia, or lle + * corresponding to @ia address. + */ +void +nd6_rem_ifa_lle(struct in6_ifaddr *ia, int all) +{ + struct sockaddr_in6 mask, addr; + struct sockaddr *saddr, *smask; + struct ifnet *ifp; + + ifp = ia->ia_ifa.ifa_ifp; + memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr)); + memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask)); + saddr = (struct sockaddr *)&addr; + smask = (struct sockaddr *)&mask; + + if (all != 0) + lltable_prefix_free(AF_INET6, saddr, smask, LLE_STATIC); + else + lltable_delete_addr(LLTABLE6(ifp), LLE_IFADDR, saddr); +} + static void clear_llinfo_pqueue(struct llentry *ln) { @@ -2269,22 +2571,24 @@ clear_llinfo_pqueue(struct llentry *ln) } ln->la_hold = NULL; - return; } static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS); static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS); -#ifdef SYSCTL_DECL + SYSCTL_DECL(_net_inet6_icmp6); -#endif -SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist, - CTLFLAG_RD, nd6_sysctl_drlist, ""); -SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist, - CTLFLAG_RD, nd6_sysctl_prlist, ""); -SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXQLEN, nd6_maxqueuelen, - CTLFLAG_RW, &VNET_NAME(nd6_maxqueuelen), 1, ""); -SYSCTL_VNET_INT(_net_inet6_icmp6, OID_AUTO, nd6_gctimer, - CTLFLAG_RW, &VNET_NAME(nd6_gctimer), (60 * 60 * 24), ""); +SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist, + CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, + NULL, 0, nd6_sysctl_drlist, "S,in6_defrouter", + "NDP default router list"); +SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist, + CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, + NULL, 0, nd6_sysctl_prlist, "S,in6_prefix", + "NDP prefix list"); +SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXQLEN, nd6_maxqueuelen, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_maxqueuelen), 1, ""); +SYSCTL_INT(_net_inet6_icmp6, OID_AUTO, nd6_gctimer, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_gctimer), (60 * 60 * 24), ""); static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS) @@ -2293,30 +2597,33 @@ nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS) struct nd_defrouter *dr; int error; - if (req->newptr) + if (req->newptr != NULL) return (EPERM); + error = sysctl_wire_old_buffer(req, 0); + if (error != 0) + return (error); + bzero(&d, sizeof(d)); d.rtaddr.sin6_family = AF_INET6; d.rtaddr.sin6_len = sizeof(d.rtaddr); - /* - * XXX locking - */ + ND6_RLOCK(); TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { d.rtaddr.sin6_addr = dr->rtaddr; error = sa6_recoverscope(&d.rtaddr); if (error != 0) - return (error); - d.flags = dr->flags; + break; + d.flags = dr->raflags; d.rtlifetime = dr->rtlifetime; - d.expire = dr->expire; + d.expire = dr->expire + (time_second - time_uptime); d.if_index = dr->ifp->if_index; error = SYSCTL_OUT(req, &d, sizeof(d)); if (error != 0) - return (error); + break; } - return (0); + ND6_RUNLOCK(); + return (error); } static int @@ -2333,15 +2640,17 @@ nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS) if (req->newptr) return (EPERM); + error = sysctl_wire_old_buffer(req, 0); + if (error != 0) + return (error); + bzero(&p, sizeof(p)); p.origin = PR_ORIG_RA; bzero(&s6, sizeof(s6)); s6.sin6_family = AF_INET6; s6.sin6_len = sizeof(s6); - /* - * XXX locking - */ + ND6_RLOCK(); LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { p.prefix = pr->ndpr_prefix; if (sa6_recoverscope(&p.prefix)) { @@ -2362,7 +2671,8 @@ nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS) ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1)); if (pr->ndpr_vltime < maxexpire - pr->ndpr_lastupdate) p.expire = pr->ndpr_lastupdate + - pr->ndpr_vltime; + pr->ndpr_vltime + + (time_second - time_uptime); else p.expire = maxexpire; } @@ -2373,7 +2683,7 @@ nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS) p.advrtrs++; error = SYSCTL_OUT(req, &p, sizeof(p)); if (error != 0) - return (error); + break; LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) { s6.sin6_addr = pfr->router->rtaddr; if (sa6_recoverscope(&s6)) @@ -2382,8 +2692,9 @@ nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS) ip6_sprintf(ip6buf, &pfr->router->rtaddr)); error = SYSCTL_OUT(req, &s6, sizeof(s6)); if (error != 0) - return (error); + break; } } - return (0); + ND6_RUNLOCK(); + return (error); } diff --git a/freebsd/sys/netinet6/nd6.h b/freebsd/sys/netinet6/nd6.h index 94202e10..33ac4386 100644 --- a/freebsd/sys/netinet6/nd6.h +++ b/freebsd/sys/netinet6/nd6.h @@ -87,9 +87,7 @@ struct nd_ifinfo { #define ND6_IFF_AUTO_LINKLOCAL 0x20 #define ND6_IFF_NO_RADR 0x40 #define ND6_IFF_NO_PREFER_IFACE 0x80 /* XXX: not related to ND. */ - -#define ND6_CREATE LLE_CREATE -#define ND6_EXCLUSIVE LLE_EXCLUSIVE +#define ND6_IFF_NO_DAD 0x100 #ifdef _KERNEL #define ND_IFINFO(ifp) \ @@ -234,14 +232,15 @@ struct in6_ndifreq { ((MAX_RANDOM_FACTOR - MIN_RANDOM_FACTOR) * (x >> 10)))) /1000) TAILQ_HEAD(nd_drhead, nd_defrouter); -struct nd_defrouter { +struct nd_defrouter { TAILQ_ENTRY(nd_defrouter) dr_entry; - struct in6_addr rtaddr; - u_char flags; /* flags on RA message */ + struct in6_addr rtaddr; + u_char raflags; /* flags on RA message */ u_short rtlifetime; u_long expire; - struct ifnet *ifp; + struct ifnet *ifp; int installed; /* is installed into kernel routing table */ + u_int refcnt; }; struct nd_prefixctl { @@ -317,6 +316,10 @@ struct nd_pfxrouter { LIST_HEAD(nd_prhead, nd_prefix); +#ifdef MALLOC_DECLARE +MALLOC_DECLARE(M_IP6NDP); +#endif + /* nd6.c */ VNET_DECLARE(int, nd6_prune); VNET_DECLARE(int, nd6_delay); @@ -341,10 +344,20 @@ VNET_DECLARE(int, nd6_onlink_ns_rfc4861); #define V_nd6_debug VNET(nd6_debug) #define V_nd6_onlink_ns_rfc4861 VNET(nd6_onlink_ns_rfc4861) -#define nd6log(x) do { if (V_nd6_debug) log x; } while (/*CONSTCOND*/ 0) +/* Lock for the prefix and default router lists. */ +VNET_DECLARE(struct rwlock, nd6_lock); +#define V_nd6_lock VNET(nd6_lock) + +#define ND6_RLOCK() rw_rlock(&V_nd6_lock) +#define ND6_RUNLOCK() rw_runlock(&V_nd6_lock) +#define ND6_WLOCK() rw_wlock(&V_nd6_lock) +#define ND6_WUNLOCK() rw_wunlock(&V_nd6_lock) +#define ND6_WLOCK_ASSERT() rw_assert(&V_nd6_lock, RA_WLOCKED) +#define ND6_RLOCK_ASSERT() rw_assert(&V_nd6_lock, RA_RLOCKED) +#define ND6_LOCK_ASSERT() rw_assert(&V_nd6_lock, RA_LOCKED) +#define ND6_UNLOCK_ASSERT() rw_assert(&V_nd6_lock, RA_UNLOCKED) -VNET_DECLARE(struct callout, nd6_timer_ch); -#define V_nd6_timer_ch VNET(nd6_timer_ch) +#define nd6log(x) do { if (V_nd6_debug) log x; } while (/*CONSTCOND*/ 0) /* nd6_rtr.c */ VNET_DECLARE(int, nd6_defifindex); @@ -359,7 +372,7 @@ VNET_DECLARE(int, ip6_temp_regen_advance); /* seconds */ #define V_ip6_temp_regen_advance VNET(ip6_temp_regen_advance) union nd_opts { - struct nd_opt_hdr *nd_opt_array[8]; /* max = target address list */ + struct nd_opt_hdr *nd_opt_array[16]; /* max = ND_OPT_NONCE */ struct { struct nd_opt_hdr *zero; struct nd_opt_hdr *src_lladdr; @@ -367,6 +380,16 @@ union nd_opts { struct nd_opt_prefix_info *pi_beg; /* multiple opts, start */ struct nd_opt_rd_hdr *rh; struct nd_opt_mtu *mtu; + struct nd_opt_hdr *__res6; + struct nd_opt_hdr *__res7; + struct nd_opt_hdr *__res8; + struct nd_opt_hdr *__res9; + struct nd_opt_hdr *__res10; + struct nd_opt_hdr *__res11; + struct nd_opt_hdr *__res12; + struct nd_opt_hdr *__res13; + struct nd_opt_nonce *nonce; + struct nd_opt_hdr *__res15; struct nd_opt_hdr *search; /* multiple opts */ struct nd_opt_hdr *last; /* multiple opts */ int done; @@ -379,6 +402,7 @@ union nd_opts { #define nd_opts_pi_end nd_opt_each.pi_end #define nd_opts_rh nd_opt_each.rh #define nd_opts_mtu nd_opt_each.mtu +#define nd_opts_nonce nd_opt_each.nonce #define nd_opts_search nd_opt_each.search #define nd_opts_last nd_opt_each.last #define nd_opts_done nd_opt_each.done @@ -390,34 +414,32 @@ void nd6_init(void); void nd6_destroy(void); #endif struct nd_ifinfo *nd6_ifattach(struct ifnet *); -void nd6_ifdetach(struct nd_ifinfo *); -int nd6_is_addr_neighbor(struct sockaddr_in6 *, struct ifnet *); +void nd6_ifdetach(struct ifnet *, struct nd_ifinfo *); +int nd6_is_addr_neighbor(const struct sockaddr_in6 *, struct ifnet *); void nd6_option_init(void *, int, union nd_opts *); struct nd_opt_hdr *nd6_option(union nd_opts *); int nd6_options(union nd_opts *); -struct llentry *nd6_lookup(struct in6_addr *, int, struct ifnet *); +struct llentry *nd6_lookup(const struct in6_addr *, int, struct ifnet *); +struct llentry *nd6_alloc(const struct in6_addr *, int, struct ifnet *); void nd6_setmtu(struct ifnet *); -void nd6_llinfo_settimer(struct llentry *, long); -void nd6_llinfo_settimer_locked(struct llentry *, long); +void nd6_llinfo_setstate(struct llentry *lle, int newstate); void nd6_timer(void *); void nd6_purge(struct ifnet *); -void nd6_nud_hint(struct rtentry *, struct in6_addr *, int); -int nd6_resolve(struct ifnet *, struct rtentry *, struct mbuf *, - struct sockaddr *, u_char *); -void nd6_rtrequest(int, struct rtentry *, struct rt_addrinfo *); +int nd6_resolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst, + char *desten, uint32_t *pflags); +int nd6_resolve(struct ifnet *, int, struct mbuf *, + const struct sockaddr *, u_char *, uint32_t *, struct llentry **); int nd6_ioctl(u_long, caddr_t, struct ifnet *); -struct llentry *nd6_cache_lladdr(struct ifnet *, struct in6_addr *, +void nd6_cache_lladdr(struct ifnet *, struct in6_addr *, char *, int, int, int); -int nd6_output(struct ifnet *, struct ifnet *, struct mbuf *, - struct sockaddr_in6 *, struct rtentry *); -int nd6_output_lle(struct ifnet *, struct ifnet *, struct mbuf *, - struct sockaddr_in6 *, struct rtentry *, struct llentry *, - struct mbuf **); -int nd6_output_flush(struct ifnet *, struct ifnet *, struct mbuf *, - struct sockaddr_in6 *, struct route *); -int nd6_need_cache(struct ifnet *); -int nd6_storelladdr(struct ifnet *, struct mbuf *, - struct sockaddr *, u_char *, struct llentry **); +void nd6_grab_holdchain(struct llentry *, struct mbuf **, + struct sockaddr_in6 *); +int nd6_flush_holdchain(struct ifnet *, struct ifnet *, struct mbuf *, + struct sockaddr_in6 *); +int nd6_add_ifa_lle(struct in6_ifaddr *); +void nd6_rem_ifa_lle(struct in6_ifaddr *, int); +int nd6_output_ifp(struct ifnet *, struct ifnet *, struct mbuf *, + struct sockaddr_in6 *, struct route *); /* nd6_nbr.c */ void nd6_na_input(struct mbuf *, int, int); @@ -425,24 +447,28 @@ void nd6_na_output(struct ifnet *, const struct in6_addr *, const struct in6_addr *, u_long, int, struct sockaddr *); void nd6_ns_input(struct mbuf *, int, int); void nd6_ns_output(struct ifnet *, const struct in6_addr *, - const struct in6_addr *, struct llentry *, int); + const struct in6_addr *, const struct in6_addr *, uint8_t *); caddr_t nd6_ifptomac(struct ifnet *); +void nd6_dad_init(void); void nd6_dad_start(struct ifaddr *, int); void nd6_dad_stop(struct ifaddr *); -void nd6_dad_duplicated(struct ifaddr *); /* nd6_rtr.c */ void nd6_rs_input(struct mbuf *, int, int); void nd6_ra_input(struct mbuf *, int, int); -void prelist_del(struct nd_prefix *); void defrouter_reset(void); void defrouter_select(void); -void defrtrlist_del(struct nd_defrouter *); +void defrouter_ref(struct nd_defrouter *); +void defrouter_rele(struct nd_defrouter *); +bool defrouter_remove(struct in6_addr *, struct ifnet *); +void defrouter_unlink(struct nd_defrouter *, struct nd_drhead *); +void defrouter_del(struct nd_defrouter *); void prelist_remove(struct nd_prefix *); int nd6_prelist_add(struct nd_prefixctl *, struct nd_defrouter *, struct nd_prefix **); void pfxlist_onlink_check(void); struct nd_defrouter *defrouter_lookup(struct in6_addr *, struct ifnet *); +struct nd_defrouter *defrouter_lookup_locked(struct in6_addr *, struct ifnet *); struct nd_prefix *nd6_prefix_lookup(struct nd_prefixctl *); void rt6_flush(struct in6_addr *, struct ifnet *); int nd6_setdefaultiface(int); diff --git a/freebsd/sys/netinet6/nd6_nbr.c b/freebsd/sys/netinet6/nd6_nbr.c index cb765549..df50fa93 100644 --- a/freebsd/sys/netinet6/nd6_nbr.c +++ b/freebsd/sys/netinet6/nd6_nbr.c @@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/sys/param.h> #include <sys/systm.h> #include <sys/malloc.h> +#include <sys/libkern.h> #include <rtems/bsd/sys/lock.h> #include <sys/rwlock.h> #include <sys/mbuf.h> @@ -50,9 +51,11 @@ __FBSDID("$FreeBSD$"); #include <sys/time.h> #include <sys/kernel.h> #include <rtems/bsd/sys/errno.h> +#include <sys/sysctl.h> #include <sys/syslog.h> #include <sys/queue.h> #include <sys/callout.h> +#include <sys/refcount.h> #include <net/if.h> #include <net/if_types.h> @@ -62,11 +65,11 @@ __FBSDID("$FreeBSD$"); #ifdef RADIX_MPATH #include <net/radix_mpath.h> #endif +#include <net/vnet.h> #include <netinet/in.h> #include <netinet/in_var.h> #include <net/if_llatbl.h> -#define L3_ADDR_SIN6(le) ((struct sockaddr_in6 *) L3_ADDR(le)) #include <netinet6/in6_var.h> #include <netinet6/in6_ifattach.h> #include <netinet/ip6.h> @@ -80,19 +83,32 @@ __FBSDID("$FreeBSD$"); #define SDL(s) ((struct sockaddr_dl *)s) struct dadq; -static struct dadq *nd6_dad_find(struct ifaddr *); -static void nd6_dad_starttimer(struct dadq *, int); +static struct dadq *nd6_dad_find(struct ifaddr *, struct nd_opt_nonce *); +static void nd6_dad_add(struct dadq *dp); +static void nd6_dad_del(struct dadq *dp); +static void nd6_dad_rele(struct dadq *); +static void nd6_dad_starttimer(struct dadq *, int, int); static void nd6_dad_stoptimer(struct dadq *); static void nd6_dad_timer(struct dadq *); -static void nd6_dad_ns_output(struct dadq *, struct ifaddr *); -static void nd6_dad_ns_input(struct ifaddr *); +static void nd6_dad_duplicated(struct ifaddr *, struct dadq *); +static void nd6_dad_ns_output(struct dadq *); +static void nd6_dad_ns_input(struct ifaddr *, struct nd_opt_nonce *); static void nd6_dad_na_input(struct ifaddr *); static void nd6_na_output_fib(struct ifnet *, const struct in6_addr *, const struct in6_addr *, u_long, int, struct sockaddr *, u_int); +static void nd6_ns_output_fib(struct ifnet *, const struct in6_addr *, + const struct in6_addr *, const struct in6_addr *, uint8_t *, u_int); -VNET_DEFINE(int, dad_ignore_ns) = 0; /* ignore NS in DAD - specwise incorrect*/ -VNET_DEFINE(int, dad_maxtry) = 15; /* max # of *tries* to transmit DAD packet */ -#define V_dad_ignore_ns VNET(dad_ignore_ns) +static VNET_DEFINE(int, dad_enhanced) = 1; +#define V_dad_enhanced VNET(dad_enhanced) + +SYSCTL_DECL(_net_inet6_ip6); +SYSCTL_INT(_net_inet6_ip6, OID_AUTO, dad_enhanced, CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(dad_enhanced), 0, + "Enable Enhanced DAD, which adds a random nonce to NS messages for DAD."); + +static VNET_DEFINE(int, dad_maxtry) = 15; /* max # of *tries* to + transmit DAD packet */ #define V_dad_maxtry VNET(dad_maxtry) /* @@ -229,42 +245,40 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) /* (1) and (3) check. */ if (ifp->if_carp) ifa = (*carp_iamatch6_p)(ifp, &taddr6); - if (ifa == NULL) + else ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6); /* (2) check. */ if (ifa == NULL) { - struct route_in6 ro; - int need_proxy; + struct sockaddr_dl rt_gateway; + struct rt_addrinfo info; + struct sockaddr_in6 dst6; - bzero(&ro, sizeof(ro)); - ro.ro_dst.sin6_len = sizeof(struct sockaddr_in6); - ro.ro_dst.sin6_family = AF_INET6; - ro.ro_dst.sin6_addr = taddr6; + bzero(&dst6, sizeof(dst6)); + dst6.sin6_len = sizeof(struct sockaddr_in6); + dst6.sin6_family = AF_INET6; + dst6.sin6_addr = taddr6; + + bzero(&rt_gateway, sizeof(rt_gateway)); + rt_gateway.sdl_len = sizeof(rt_gateway); + bzero(&info, sizeof(info)); + info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&rt_gateway; /* Always use the default FIB. */ -#ifdef RADIX_MPATH - rtalloc_mpath_fib((struct route *)&ro, RTF_ANNOUNCE, - RT_DEFAULT_FIB); -#else - in6_rtalloc(&ro, RT_DEFAULT_FIB); -#endif - need_proxy = (ro.ro_rt && - (ro.ro_rt->rt_flags & RTF_ANNOUNCE) != 0 && - ro.ro_rt->rt_gateway->sa_family == AF_LINK); - if (ro.ro_rt != NULL) { - if (need_proxy) - proxydl = *SDL(ro.ro_rt->rt_gateway); - RTFREE(ro.ro_rt); - } - if (need_proxy) { - /* - * proxy NDP for single entry - */ - ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, - IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); - if (ifa) - proxy = 1; + if (rib_lookup_info(RT_DEFAULT_FIB, (struct sockaddr *)&dst6, + 0, 0, &info) == 0) { + if ((info.rti_flags & RTF_ANNOUNCE) != 0 && + rt_gateway.sdl_family == AF_LINK) { + + /* + * proxy NDP for single entry + */ + proxydl = *SDL(&rt_gateway); + ifa = (struct ifaddr *)in6ifa_ifpforlinklocal( + ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); + if (ifa) + proxy = 1; + } } } if (ifa == NULL) { @@ -316,7 +330,7 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) * silently ignore it. */ if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) - nd6_dad_ns_input(ifa); + nd6_dad_ns_input(ifa, ndopts.nd_opts_nonce); goto freeit; } @@ -377,12 +391,14 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len) * Based on RFC 2461 * Based on RFC 2462 (duplicate address detection) * - * ln - for source address determination - * dad - duplicate address detection + * ln - for source address determination + * nonce - If non-NULL, NS is used for duplicate address detection and + * the value (length is ND_OPT_NONCE_LEN) is used as a random nonce. */ -void -nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6, - const struct in6_addr *taddr6, struct llentry *ln, int dad) +static void +nd6_ns_output_fib(struct ifnet *ifp, const struct in6_addr *saddr6, + const struct in6_addr *daddr6, const struct in6_addr *taddr6, + uint8_t *nonce, u_int fibnum) { struct mbuf *m; struct m_tag *mtag; @@ -392,7 +408,6 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6, int icmp6len; int maxlen; caddr_t mac; - struct route_in6 ro; if (IN6_IS_ADDR_MULTICAST(taddr6)) return; @@ -400,27 +415,17 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6, /* estimate the size of message */ maxlen = sizeof(*ip6) + sizeof(*nd_ns); maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7; - if (max_linkhdr + maxlen >= MCLBYTES) { -#ifdef DIAGNOSTIC - printf("nd6_ns_output: max_linkhdr + maxlen >= MCLBYTES " - "(%d + %d > %d)\n", max_linkhdr, maxlen, MCLBYTES); -#endif - return; - } + KASSERT(max_linkhdr + maxlen <= MCLBYTES, ( + "%s: max_linkhdr + maxlen > MCLBYTES (%d + %d > %d)", + __func__, max_linkhdr, maxlen, MCLBYTES)); - MGETHDR(m, M_DONTWAIT, MT_DATA); - if (m && max_linkhdr + maxlen >= MHLEN) { - MCLGET(m, M_DONTWAIT); - if ((m->m_flags & M_EXT) == 0) { - m_free(m); - m = NULL; - } - } + if (max_linkhdr + maxlen > MHLEN) + m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); + else + m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return; - m->m_pkthdr.rcvif = NULL; - - bzero(&ro, sizeof(ro)); + M_SETFIB(m, fibnum); if (daddr6 == NULL || IN6_IS_ADDR_MULTICAST(daddr6)) { m->m_flags |= M_MCAST; @@ -431,7 +436,7 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6, icmp6len = sizeof(*nd_ns); m->m_pkthdr.len = m->m_len = sizeof(*ip6) + icmp6len; - m->m_data += max_linkhdr; /* or MH_ALIGN() equivalent? */ + m->m_data += max_linkhdr; /* or M_ALIGN() equivalent? */ /* fill neighbor solicitation packet */ ip6 = mtod(m, struct ip6_hdr *); @@ -453,8 +458,8 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6, if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0) goto bad; } - if (!dad) { - struct ifaddr *ifa; + if (nonce == NULL) { + struct ifaddr *ifa = NULL; /* * RFC2461 7.2.2: @@ -466,60 +471,33 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6, * interface should be used." * * We use the source address for the prompting packet - * (saddr6), if: - * - saddr6 is given from the caller (by giving "ln"), and - * - saddr6 belongs to the outgoing interface. + * (saddr6), if saddr6 belongs to the outgoing interface. * Otherwise, we perform the source address selection as usual. */ - struct in6_addr *hsrc; - hsrc = NULL; - if (ln != NULL) { - LLE_RLOCK(ln); - if (ln->la_hold != NULL) { - struct ip6_hdr *hip6; /* hold ip6 */ - - /* - * assuming every packet in la_hold has the same IP - * header - */ - hip6 = mtod(ln->la_hold, struct ip6_hdr *); - /* XXX pullup? */ - if (sizeof(*hip6) < ln->la_hold->m_len) { - ip6->ip6_src = hip6->ip6_src; - hsrc = &hip6->ip6_src; - } - } - LLE_RUNLOCK(ln); - } - if (hsrc && (ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, - hsrc)) != NULL) { + if (saddr6 != NULL) + ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, saddr6); + if (ifa != NULL) { /* ip6_src set already. */ + ip6->ip6_src = *saddr6; ifa_free(ifa); } else { int error; - struct sockaddr_in6 dst_sa; - struct in6_addr src_in; - struct ifnet *oifp; - - bzero(&dst_sa, sizeof(dst_sa)); - dst_sa.sin6_family = AF_INET6; - dst_sa.sin6_len = sizeof(dst_sa); - dst_sa.sin6_addr = ip6->ip6_dst; - - oifp = ifp; - error = in6_selectsrc(&dst_sa, NULL, - NULL, &ro, NULL, &oifp, &src_in); + struct in6_addr dst6, src6; + uint32_t scopeid; + + in6_splitscope(&ip6->ip6_dst, &dst6, &scopeid); + error = in6_selectsrc_addr(RT_DEFAULT_FIB, &dst6, + scopeid, ifp, &src6, NULL); if (error) { char ip6buf[INET6_ADDRSTRLEN]; - nd6log((LOG_DEBUG, - "nd6_ns_output: source can't be " - "determined: dst=%s, error=%d\n", - ip6_sprintf(ip6buf, &dst_sa.sin6_addr), + nd6log((LOG_DEBUG, "%s: source can't be " + "determined: dst=%s, error=%d\n", __func__, + ip6_sprintf(ip6buf, &dst6), error)); goto bad; } - ip6->ip6_src = src_in; + ip6->ip6_src = src6; } } else { /* @@ -550,7 +528,7 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6, * Multicast NS MUST add one add the option * Unicast NS SHOULD add one add the option */ - if (!dad && (mac = nd6_ifptomac(ifp))) { + if (nonce == NULL && (mac = nd6_ifptomac(ifp))) { int optlen = sizeof(struct nd_opt_hdr) + ifp->if_addrlen; struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_ns + 1); /* 8 byte alignments... */ @@ -564,7 +542,26 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6, nd_opt->nd_opt_len = optlen >> 3; bcopy(mac, (caddr_t)(nd_opt + 1), ifp->if_addrlen); } + /* + * Add a Nonce option (RFC 3971) to detect looped back NS messages. + * This behavior is documented as Enhanced Duplicate Address + * Detection in RFC 7527. + * net.inet6.ip6.dad_enhanced=0 disables this. + */ + if (V_dad_enhanced != 0 && nonce != NULL) { + int optlen = sizeof(struct nd_opt_hdr) + ND_OPT_NONCE_LEN; + struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_ns + 1); + /* 8-byte alignment is required. */ + optlen = (optlen + 7) & ~7; + m->m_pkthdr.len += optlen; + m->m_len += optlen; + icmp6len += optlen; + bzero((caddr_t)nd_opt, optlen); + nd_opt->nd_opt_type = ND_OPT_NONCE; + nd_opt->nd_opt_len = optlen >> 3; + bcopy(nonce, (caddr_t)(nd_opt + 1), ND_OPT_NONCE_LEN); + } ip6->ip6_plen = htons((u_short)icmp6len); nd_ns->nd_ns_cksum = 0; nd_ns->nd_ns_cksum = @@ -579,24 +576,27 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6, m_tag_prepend(m, mtag); } - ip6_output(m, NULL, &ro, dad ? IPV6_UNSPECSRC : 0, &im6o, NULL, NULL); + ip6_output(m, NULL, NULL, (nonce != NULL) ? IPV6_UNSPECSRC : 0, + &im6o, NULL, NULL); icmp6_ifstat_inc(ifp, ifs6_out_msg); icmp6_ifstat_inc(ifp, ifs6_out_neighborsolicit); ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_SOLICIT]); - /* We don't cache this route. */ - RO_RTFREE(&ro); - return; bad: - if (ro.ro_rt) { - RTFREE(ro.ro_rt); - } m_freem(m); - return; } +#ifndef BURN_BRIDGES +void +nd6_ns_output(struct ifnet *ifp, const struct in6_addr *saddr6, + const struct in6_addr *daddr6, const struct in6_addr *taddr6,uint8_t *nonce) +{ + + nd6_ns_output_fib(ifp, saddr6, daddr6, taddr6, nonce, RT_DEFAULT_FIB); +} +#endif /* * Neighbor advertisement input handling. * @@ -626,8 +626,10 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) struct llentry *ln = NULL; union nd_opts ndopts; struct mbuf *chain = NULL; - struct m_tag *mtag; struct sockaddr_in6 sin6; + u_char linkhdr[LLE_MAX_LINKHDR]; + size_t linkhdrsize; + int lladdr_off; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; if (ip6->ip6_hlim != 255) { @@ -653,6 +655,7 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) is_router = ((flags & ND_NA_FLAG_ROUTER) != 0); is_solicited = ((flags & ND_NA_FLAG_SOLICITED) != 0); is_override = ((flags & ND_NA_FLAG_OVERRIDE) != 0); + memset(&sin6, 0, sizeof(sin6)); taddr6 = nd_na->nd_na_target; if (in6_setscope(&taddr6, ifp, NULL)) @@ -685,7 +688,14 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3; } - ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6); + /* + * This effectively disables the DAD check on a non-master CARP + * address. + */ + if (ifp->if_carp) + ifa = (*carp_iamatch6_p)(ifp, &taddr6); + else + ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6); /* * Target address matches one of my interface address. @@ -742,20 +752,21 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) /* * Record link-layer address, and update the state. */ - bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen); - ln->la_flags |= LLE_VALID; - EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); - if (is_solicited) { - ln->ln_state = ND6_LLINFO_REACHABLE; - ln->ln_byhint = 0; - if (!ND6_LLINFO_PERMANENT(ln)) { - nd6_llinfo_settimer_locked(ln, - (long)ND_IFINFO(ln->lle_tbl->llt_ifp)->reachable * hz); - } - } else { - ln->ln_state = ND6_LLINFO_STALE; - nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); + linkhdrsize = sizeof(linkhdr); + if (lltable_calc_llheader(ifp, AF_INET6, lladdr, + linkhdr, &linkhdrsize, &lladdr_off) != 0) + return; + + if (lltable_try_set_entry_addr(ifp, ln, linkhdr, linkhdrsize, + lladdr_off) == 0) { + ln = NULL; + goto freeit; } + EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); + if (is_solicited) + nd6_llinfo_setstate(ln, ND6_LLINFO_REACHABLE); + else + nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); if ((ln->ln_router = is_router) != 0) { /* * This means a router's state has changed from @@ -774,7 +785,7 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) llchange = 0; else { if (ln->la_flags & LLE_VALID) { - if (bcmp(lladdr, &ln->ll_addr, ifp->if_addrlen)) + if (bcmp(lladdr, ln->ll_addr, ifp->if_addrlen)) llchange = 1; else llchange = 0; @@ -806,10 +817,8 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) * If state is REACHABLE, make it STALE. * no other updates should be done. */ - if (ln->ln_state == ND6_LLINFO_REACHABLE) { - ln->ln_state = ND6_LLINFO_STALE; - nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); - } + if (ln->ln_state == ND6_LLINFO_REACHABLE) + nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); goto freeit; } else if (is_override /* (2a) */ || (!is_override && (lladdr != NULL && !llchange)) /* (2b) */ @@ -818,8 +827,15 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) * Update link-local address, if any. */ if (lladdr != NULL) { - bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen); - ln->la_flags |= LLE_VALID; + linkhdrsize = sizeof(linkhdr); + if (lltable_calc_llheader(ifp, AF_INET6, lladdr, + linkhdr, &linkhdrsize, &lladdr_off) != 0) + goto freeit; + if (lltable_try_set_entry_addr(ifp, ln, linkhdr, + linkhdrsize, lladdr_off) == 0) { + ln = NULL; + goto freeit; + } EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); } @@ -829,19 +845,11 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) * If not solicited and the link-layer address was * changed, make it STALE. */ - if (is_solicited) { - ln->ln_state = ND6_LLINFO_REACHABLE; - ln->ln_byhint = 0; - if (!ND6_LLINFO_PERMANENT(ln)) { - nd6_llinfo_settimer_locked(ln, - (long)ND_IFINFO(ifp)->reachable * hz); - } - } else { - if (lladdr != NULL && llchange) { - ln->ln_state = ND6_LLINFO_STALE; - nd6_llinfo_settimer_locked(ln, - (long)V_nd6_gctimer * hz); - } + if (is_solicited) + nd6_llinfo_setstate(ln, ND6_LLINFO_REACHABLE); + else { + if (lladdr != NULL && llchange) + nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); } } @@ -851,31 +859,19 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) * Remove the sender from the Default Router List and * update the Destination Cache entries. */ - struct nd_defrouter *dr; - struct in6_addr *in6; - - in6 = &L3_ADDR_SIN6(ln)->sin6_addr; + struct ifnet *nd6_ifp; - /* - * Lock to protect the default router list. - * XXX: this might be unnecessary, since this function - * is only called under the network software interrupt - * context. However, we keep it just for safety. - */ - dr = defrouter_lookup(in6, ln->lle_tbl->llt_ifp); - if (dr) - defrtrlist_del(dr); - else if (ND_IFINFO(ln->lle_tbl->llt_ifp)->flags & - ND6_IFF_ACCEPT_RTADV) { + nd6_ifp = lltable_get_ifp(ln->lle_tbl); + if (!defrouter_remove(&ln->r_l3addr.addr6, nd6_ifp) && + (ND_IFINFO(nd6_ifp)->flags & + ND6_IFF_ACCEPT_RTADV) != 0) /* * Even if the neighbor is not in the default - * router list, the neighbor may be used - * as a next hop for some destinations - * (e.g. redirect case). So we must - * call rt6_flush explicitly. + * router list, the neighbor may be used as a + * next hop for some destinations (e.g. redirect + * case). So we must call rt6_flush explicitly. */ rt6_flush(&ip6->ip6_src, ifp); - } } ln->ln_router = is_router; } @@ -884,43 +880,15 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len) * rt->rt_flags &= ~RTF_REJECT; */ ln->la_asked = 0; - if (ln->la_hold) { - struct mbuf *m_hold, *m_hold_next; - - /* - * reset the la_hold in advance, to explicitly - * prevent a la_hold lookup in nd6_output() - * (wouldn't happen, though...) - */ - for (m_hold = ln->la_hold, ln->la_hold = NULL; - m_hold; m_hold = m_hold_next) { - m_hold_next = m_hold->m_nextpkt; - m_hold->m_nextpkt = NULL; - /* - * we assume ifp is not a loopback here, so just set - * the 2nd argument as the 1st one. - */ - - if (send_sendso_input_hook != NULL) { - mtag = m_tag_get(PACKET_TAG_ND_OUTGOING, - sizeof(unsigned short), M_NOWAIT); - if (mtag == NULL) - goto bad; - m_tag_prepend(m, mtag); - } - - nd6_output_lle(ifp, ifp, m_hold, L3_ADDR_SIN6(ln), NULL, ln, &chain); - } - } + if (ln->la_hold != NULL) + nd6_grab_holdchain(ln, &chain, &sin6); freeit: - if (ln != NULL) { - if (chain) - memcpy(&sin6, L3_ADDR_SIN6(ln), sizeof(sin6)); + if (ln != NULL) LLE_WUNLOCK(ln); - if (chain) - nd6_output_flush(ifp, ifp, chain, &sin6, NULL); - } + if (chain != NULL) + nd6_flush_holdchain(ifp, ifp, chain, &sin6); + if (checklink) pfxlist_onlink_check(); @@ -954,42 +922,30 @@ nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0, { struct mbuf *m; struct m_tag *mtag; - struct ifnet *oifp; struct ip6_hdr *ip6; struct nd_neighbor_advert *nd_na; struct ip6_moptions im6o; - struct in6_addr src, daddr6; - struct sockaddr_in6 dst_sa; + struct in6_addr daddr6, dst6, src6; + uint32_t scopeid; + int icmp6len, maxlen, error; caddr_t mac = NULL; - struct route_in6 ro; - - bzero(&ro, sizeof(ro)); daddr6 = *daddr6_0; /* make a local copy for modification */ /* estimate the size of message */ maxlen = sizeof(*ip6) + sizeof(*nd_na); maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7; - if (max_linkhdr + maxlen >= MCLBYTES) { -#ifdef DIAGNOSTIC - printf("nd6_na_output: max_linkhdr + maxlen >= MCLBYTES " - "(%d + %d > %d)\n", max_linkhdr, maxlen, MCLBYTES); -#endif - return; - } + KASSERT(max_linkhdr + maxlen <= MCLBYTES, ( + "%s: max_linkhdr + maxlen > MCLBYTES (%d + %d > %d)", + __func__, max_linkhdr, maxlen, MCLBYTES)); - MGETHDR(m, M_DONTWAIT, MT_DATA); - if (m && max_linkhdr + maxlen >= MHLEN) { - MCLGET(m, M_DONTWAIT); - if ((m->m_flags & M_EXT) == 0) { - m_free(m); - m = NULL; - } - } + if (max_linkhdr + maxlen > MHLEN) + m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); + else + m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return; - m->m_pkthdr.rcvif = NULL; M_SETFIB(m, fibnum); if (IN6_IS_ADDR_MULTICAST(&daddr6)) { @@ -1001,7 +957,7 @@ nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0, icmp6len = sizeof(*nd_na); m->m_pkthdr.len = m->m_len = sizeof(struct ip6_hdr) + icmp6len; - m->m_data += max_linkhdr; /* or MH_ALIGN() equivalent? */ + m->m_data += max_linkhdr; /* or M_ALIGN() equivalent? */ /* fill neighbor advertisement packet */ ip6 = mtod(m, struct ip6_hdr *); @@ -1023,25 +979,21 @@ nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0, flags &= ~ND_NA_FLAG_SOLICITED; } ip6->ip6_dst = daddr6; - bzero(&dst_sa, sizeof(struct sockaddr_in6)); - dst_sa.sin6_family = AF_INET6; - dst_sa.sin6_len = sizeof(struct sockaddr_in6); - dst_sa.sin6_addr = daddr6; /* * Select a source whose scope is the same as that of the dest. */ - bcopy(&dst_sa, &ro.ro_dst, sizeof(dst_sa)); - oifp = ifp; - error = in6_selectsrc(&dst_sa, NULL, NULL, &ro, NULL, &oifp, &src); + in6_splitscope(&daddr6, &dst6, &scopeid); + error = in6_selectsrc_addr(RT_DEFAULT_FIB, &dst6, + scopeid, ifp, &src6, NULL); if (error) { char ip6buf[INET6_ADDRSTRLEN]; nd6log((LOG_DEBUG, "nd6_na_output: source can't be " "determined: dst=%s, error=%d\n", - ip6_sprintf(ip6buf, &dst_sa.sin6_addr), error)); + ip6_sprintf(ip6buf, &daddr6), error)); goto bad; } - ip6->ip6_src = src; + ip6->ip6_src = src6; nd_na = (struct nd_neighbor_advert *)(ip6 + 1); nd_na->nd_na_type = ND_NEIGHBOR_ADVERT; nd_na->nd_na_code = 0; @@ -1104,22 +1056,15 @@ nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0, m_tag_prepend(m, mtag); } - ip6_output(m, NULL, &ro, 0, &im6o, NULL, NULL); + ip6_output(m, NULL, NULL, 0, &im6o, NULL, NULL); icmp6_ifstat_inc(ifp, ifs6_out_msg); icmp6_ifstat_inc(ifp, ifs6_out_neighboradvert); ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_ADVERT]); - /* We don't cache this route. */ - RO_RTFREE(&ro); - return; bad: - if (ro.ro_rt) { - RTFREE(ro.ro_rt); - } m_freem(m); - return; } #ifndef BURN_BRIDGES @@ -1142,15 +1087,8 @@ nd6_ifptomac(struct ifnet *ifp) case IFT_ETHER: case IFT_FDDI: case IFT_IEEE1394: -#ifdef IFT_L2VLAN case IFT_L2VLAN: -#endif -#ifdef IFT_IEEE80211 case IFT_IEEE80211: -#endif -#ifdef IFT_CARP - case IFT_CARP: -#endif case IFT_INFINIBAND: case IFT_BRIDGE: case IFT_ISO88025: @@ -1168,31 +1106,80 @@ struct dadq { int dad_ns_ocount; /* NS sent so far */ int dad_ns_icount; int dad_na_icount; + int dad_ns_lcount; /* looped back NS */ + int dad_loopbackprobe; /* probing state for loopback detection */ struct callout dad_timer_ch; struct vnet *dad_vnet; + u_int dad_refcnt; +#define ND_OPT_NONCE_LEN32 \ + ((ND_OPT_NONCE_LEN + sizeof(uint32_t) - 1)/sizeof(uint32_t)) + uint32_t dad_nonce[ND_OPT_NONCE_LEN32]; }; static VNET_DEFINE(TAILQ_HEAD(, dadq), dadq); -VNET_DEFINE(int, dad_init) = 0; -#define V_dadq VNET(dadq) -#define V_dad_init VNET(dad_init) +static VNET_DEFINE(struct rwlock, dad_rwlock); +#define V_dadq VNET(dadq) +#define V_dad_rwlock VNET(dad_rwlock) + +#define DADQ_RLOCK() rw_rlock(&V_dad_rwlock) +#define DADQ_RUNLOCK() rw_runlock(&V_dad_rwlock) +#define DADQ_WLOCK() rw_wlock(&V_dad_rwlock) +#define DADQ_WUNLOCK() rw_wunlock(&V_dad_rwlock) + +static void +nd6_dad_add(struct dadq *dp) +{ + + DADQ_WLOCK(); + TAILQ_INSERT_TAIL(&V_dadq, dp, dad_list); + DADQ_WUNLOCK(); +} + +static void +nd6_dad_del(struct dadq *dp) +{ + + DADQ_WLOCK(); + TAILQ_REMOVE(&V_dadq, dp, dad_list); + DADQ_WUNLOCK(); + nd6_dad_rele(dp); +} static struct dadq * -nd6_dad_find(struct ifaddr *ifa) +nd6_dad_find(struct ifaddr *ifa, struct nd_opt_nonce *n) { struct dadq *dp; - TAILQ_FOREACH(dp, &V_dadq, dad_list) - if (dp->dad_ifa == ifa) - return (dp); + DADQ_RLOCK(); + TAILQ_FOREACH(dp, &V_dadq, dad_list) { + if (dp->dad_ifa != ifa) + continue; + /* + * Skip if the nonce matches the received one. + * +2 in the length is required because of type and + * length fields are included in a header. + */ + if (n != NULL && + n->nd_opt_nonce_len == (ND_OPT_NONCE_LEN + 2) / 8 && + memcmp(&n->nd_opt_nonce[0], &dp->dad_nonce[0], + ND_OPT_NONCE_LEN) == 0) { + dp->dad_ns_lcount++; + continue; + } + refcount_acquire(&dp->dad_refcnt); + break; + } + DADQ_RUNLOCK(); - return (NULL); + return (dp); } static void -nd6_dad_starttimer(struct dadq *dp, int ticks) +nd6_dad_starttimer(struct dadq *dp, int ticks, int send_ns) { + if (send_ns != 0) + nd6_dad_ns_output(dp); callout_reset(&dp->dad_timer_ch, ticks, (void (*)(void *))nd6_dad_timer, (void *)dp); } @@ -1201,7 +1188,25 @@ static void nd6_dad_stoptimer(struct dadq *dp) { - callout_stop(&dp->dad_timer_ch); + callout_drain(&dp->dad_timer_ch); +} + +static void +nd6_dad_rele(struct dadq *dp) +{ + + if (refcount_release(&dp->dad_refcnt)) { + ifa_free(dp->dad_ifa); + free(dp, M_IP6NDP); + } +} + +void +nd6_dad_init(void) +{ + + rw_init(&V_dad_rwlock, "nd6 DAD queue"); + TAILQ_INIT(&V_dadq); } /* @@ -1214,11 +1219,6 @@ nd6_dad_start(struct ifaddr *ifa, int delay) struct dadq *dp; char ip6buf[INET6_ADDRSTRLEN]; - if (!V_dad_init) { - TAILQ_INIT(&V_dadq); - V_dad_init++; - } - /* * If we don't need DAD, don't do it. * There are several cases: @@ -1243,17 +1243,26 @@ nd6_dad_start(struct ifaddr *ifa, int delay) } if (ifa->ifa_ifp == NULL) panic("nd6_dad_start: ifa->ifa_ifp == NULL"); - if (!(ifa->ifa_ifp->if_flags & IFF_UP)) { + if (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_NO_DAD) { + ia->ia6_flags &= ~IN6_IFF_TENTATIVE; return; } - if (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_IFDISABLED) + if (!(ifa->ifa_ifp->if_flags & IFF_UP) || + !(ifa->ifa_ifp->if_drv_flags & IFF_DRV_RUNNING) || + (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_IFDISABLED)) { + ia->ia6_flags |= IN6_IFF_TENTATIVE; return; - if (nd6_dad_find(ifa) != NULL) { - /* DAD already in progress */ + } + if ((dp = nd6_dad_find(ifa, NULL)) != NULL) { + /* + * DAD is already in progress. Let the existing entry + * finish it. + */ + nd6_dad_rele(dp); return; } - dp = malloc(sizeof(*dp), M_IP6NDP, M_NOWAIT); + dp = malloc(sizeof(*dp), M_IP6NDP, M_NOWAIT | M_ZERO); if (dp == NULL) { log(LOG_ERR, "nd6_dad_start: memory allocation failed for " "%s(%s)\n", @@ -1261,13 +1270,10 @@ nd6_dad_start(struct ifaddr *ifa, int delay) ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???"); return; } - bzero(dp, sizeof(*dp)); callout_init(&dp->dad_timer_ch, 0); #ifdef VIMAGE dp->dad_vnet = curvnet; #endif - TAILQ_INSERT_TAIL(&V_dadq, (struct dadq *)dp, dad_list); - nd6log((LOG_DEBUG, "%s: starting DAD for %s\n", if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr))); @@ -1278,17 +1284,14 @@ nd6_dad_start(struct ifaddr *ifa, int delay) * (re)initialization. */ dp->dad_ifa = ifa; - ifa_ref(ifa); /* just for safety */ + ifa_ref(dp->dad_ifa); dp->dad_count = V_ip6_dad_count; dp->dad_ns_icount = dp->dad_na_icount = 0; dp->dad_ns_ocount = dp->dad_ns_tcount = 0; - if (delay == 0) { - nd6_dad_ns_output(dp, ifa); - nd6_dad_starttimer(dp, - (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000); - } else { - nd6_dad_starttimer(dp, delay); - } + dp->dad_ns_lcount = dp->dad_loopbackprobe = 0; + refcount_init(&dp->dad_refcnt, 1); + nd6_dad_add(dp); + nd6_dad_starttimer(dp, delay, 0); } /* @@ -1299,9 +1302,7 @@ nd6_dad_stop(struct ifaddr *ifa) { struct dadq *dp; - if (!V_dad_init) - return; - dp = nd6_dad_find(ifa); + dp = nd6_dad_find(ifa, NULL); if (!dp) { /* DAD wasn't started yet */ return; @@ -1309,53 +1310,61 @@ nd6_dad_stop(struct ifaddr *ifa) nd6_dad_stoptimer(dp); - TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list); - free(dp, M_IP6NDP); - dp = NULL; - ifa_free(ifa); + /* + * The DAD queue entry may have been removed by nd6_dad_timer() while + * we were waiting for it to stop, so re-do the lookup. + */ + nd6_dad_rele(dp); + if (nd6_dad_find(ifa, NULL) == NULL) + return; + + nd6_dad_del(dp); + nd6_dad_rele(dp); } static void nd6_dad_timer(struct dadq *dp) { CURVNET_SET(dp->dad_vnet); - int s; struct ifaddr *ifa = dp->dad_ifa; + struct ifnet *ifp = dp->dad_ifa->ifa_ifp; struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; char ip6buf[INET6_ADDRSTRLEN]; - s = splnet(); /* XXX */ - /* Sanity check */ if (ia == NULL) { log(LOG_ERR, "nd6_dad_timer: called with null parameter\n"); - goto done; + goto err; + } + if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) { + /* Do not need DAD for ifdisabled interface. */ + log(LOG_ERR, "nd6_dad_timer: cancel DAD on %s because of " + "ND6_IFF_IFDISABLED.\n", ifp->if_xname); + goto err; } if (ia->ia6_flags & IN6_IFF_DUPLICATED) { log(LOG_ERR, "nd6_dad_timer: called with duplicated address " "%s(%s)\n", ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???"); - goto done; + goto err; } if ((ia->ia6_flags & IN6_IFF_TENTATIVE) == 0) { log(LOG_ERR, "nd6_dad_timer: called with non-tentative address " "%s(%s)\n", ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???"); - goto done; + goto err; } - /* timeouted with IFF_{RUNNING,UP} check */ - if (dp->dad_ns_tcount > V_dad_maxtry) { - nd6log((LOG_INFO, "%s: could not run DAD, driver problem?\n", + /* Stop DAD if the interface is down even after dad_maxtry attempts. */ + if ((dp->dad_ns_tcount > V_dad_maxtry) && + (((ifp->if_flags & IFF_UP) == 0) || + ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0))) { + nd6log((LOG_INFO, "%s: could not run DAD " + "because the interface was down or not running.\n", if_name(ifa->ifa_ifp))); - - TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list); - free(dp, M_IP6NDP); - dp = NULL; - ifa_free(ifa); - goto done; + goto err; } /* Need more checks? */ @@ -1363,84 +1372,85 @@ nd6_dad_timer(struct dadq *dp) /* * We have more NS to go. Send NS packet for DAD. */ - nd6_dad_ns_output(dp, ifa); nd6_dad_starttimer(dp, - (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000); + (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000, 1); + goto done; } else { /* * We have transmitted sufficient number of DAD packets. * See what we've got. */ - int duplicate; - - duplicate = 0; - - if (dp->dad_na_icount) { + if (dp->dad_ns_icount > 0 || dp->dad_na_icount > 0) + /* We've seen NS or NA, means DAD has failed. */ + nd6_dad_duplicated(ifa, dp); + else if (V_dad_enhanced != 0 && + dp->dad_ns_lcount > 0 && + dp->dad_ns_lcount > dp->dad_loopbackprobe) { /* - * the check is in nd6_dad_na_input(), - * but just in case + * Sec. 4.1 in RFC 7527 requires transmission of + * additional probes until the loopback condition + * becomes clear when a looped back probe is detected. */ - duplicate++; - } - - if (dp->dad_ns_icount) { - /* We've seen NS, means DAD has failed. */ - duplicate++; - } - - if (duplicate) { - /* (*dp) will be freed in nd6_dad_duplicated() */ - dp = NULL; - nd6_dad_duplicated(ifa); + log(LOG_ERR, "%s: a looped back NS message is " + "detected during DAD for %s. " + "Another DAD probes are being sent.\n", + if_name(ifa->ifa_ifp), + ip6_sprintf(ip6buf, IFA_IN6(ifa))); + dp->dad_loopbackprobe = dp->dad_ns_lcount; + /* + * Send an NS immediately and increase dad_count by + * V_nd6_mmaxtries - 1. + */ + dp->dad_count = + dp->dad_ns_ocount + V_nd6_mmaxtries - 1; + nd6_dad_starttimer(dp, + (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000, + 1); + goto done; } else { /* * We are done with DAD. No NA came, no NS came. - * No duplicate address found. + * No duplicate address found. Check IFDISABLED flag + * again in case that it is changed between the + * beginning of this function and here. */ - ia->ia6_flags &= ~IN6_IFF_TENTATIVE; + if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) == 0) + ia->ia6_flags &= ~IN6_IFF_TENTATIVE; nd6log((LOG_DEBUG, "%s: DAD complete for %s - no duplicates found\n", if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr))); - - TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list); - free(dp, M_IP6NDP); - dp = NULL; - ifa_free(ifa); + if (dp->dad_ns_lcount > 0) + log(LOG_ERR, "%s: DAD completed while " + "a looped back NS message is detected " + "during DAD for %s.\n", + if_name(ifa->ifa_ifp), + ip6_sprintf(ip6buf, IFA_IN6(ifa))); } } - +err: + nd6_dad_del(dp); done: - splx(s); CURVNET_RESTORE(); } -void -nd6_dad_duplicated(struct ifaddr *ifa) +static void +nd6_dad_duplicated(struct ifaddr *ifa, struct dadq *dp) { struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; struct ifnet *ifp; - struct dadq *dp; char ip6buf[INET6_ADDRSTRLEN]; - dp = nd6_dad_find(ifa); - if (dp == NULL) { - log(LOG_ERR, "nd6_dad_duplicated: DAD structure not found\n"); - return; - } - log(LOG_ERR, "%s: DAD detected duplicate IPv6 address %s: " - "NS in/out=%d/%d, NA in=%d\n", + "NS in/out/loopback=%d/%d/%d, NA in=%d\n", if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), - dp->dad_ns_icount, dp->dad_ns_ocount, dp->dad_na_icount); + dp->dad_ns_icount, dp->dad_ns_ocount, dp->dad_ns_lcount, + dp->dad_na_icount); ia->ia6_flags &= ~IN6_IFF_TENTATIVE; ia->ia6_flags |= IN6_IFF_DUPLICATED; - /* We are done with DAD, with duplicate address found. (failure) */ - nd6_dad_stoptimer(dp); - ifp = ifa->ifa_ifp; log(LOG_ERR, "%s: DAD complete for %s - duplicate found\n", if_name(ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)); @@ -1466,9 +1476,7 @@ nd6_dad_duplicated(struct ifaddr *ifa) case IFT_FDDI: case IFT_ATM: case IFT_IEEE1394: -#ifdef IFT_IEEE80211 case IFT_IEEE80211: -#endif case IFT_INFINIBAND: in6 = ia->ia_addr.sin6_addr; if (in6_get_hw_ifid(ifp, &in6) == 0 && @@ -1481,18 +1489,14 @@ nd6_dad_duplicated(struct ifaddr *ifa) break; } } - - TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list); - free(dp, M_IP6NDP); - dp = NULL; - ifa_free(ifa); } static void -nd6_dad_ns_output(struct dadq *dp, struct ifaddr *ifa) +nd6_dad_ns_output(struct dadq *dp) { - struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; - struct ifnet *ifp = ifa->ifa_ifp; + struct in6_ifaddr *ia = (struct in6_ifaddr *)dp->dad_ifa; + struct ifnet *ifp = dp->dad_ifa->ifa_ifp; + int i; dp->dad_ns_tcount++; if ((ifp->if_flags & IFF_UP) == 0) { @@ -1503,17 +1507,29 @@ nd6_dad_ns_output(struct dadq *dp, struct ifaddr *ifa) } dp->dad_ns_ocount++; - nd6_ns_output(ifp, NULL, &ia->ia_addr.sin6_addr, NULL, 1); + if (V_dad_enhanced != 0) { + for (i = 0; i < ND_OPT_NONCE_LEN32; i++) + dp->dad_nonce[i] = arc4random(); + /* + * XXXHRS: Note that in the case that + * DupAddrDetectTransmits > 1, multiple NS messages with + * different nonces can be looped back in an unexpected + * order. The current implementation recognizes only + * the latest nonce on the sender side. Practically it + * should work well in almost all cases. + */ + } + nd6_ns_output(ifp, NULL, NULL, &ia->ia_addr.sin6_addr, + (uint8_t *)&dp->dad_nonce[0]); } static void -nd6_dad_ns_input(struct ifaddr *ifa) +nd6_dad_ns_input(struct ifaddr *ifa, struct nd_opt_nonce *ndopt_nonce) { struct in6_ifaddr *ia; struct ifnet *ifp; const struct in6_addr *taddr6; struct dadq *dp; - int duplicate; if (ifa == NULL) panic("ifa == NULL in nd6_dad_ns_input"); @@ -1521,39 +1537,15 @@ nd6_dad_ns_input(struct ifaddr *ifa) ia = (struct in6_ifaddr *)ifa; ifp = ifa->ifa_ifp; taddr6 = &ia->ia_addr.sin6_addr; - duplicate = 0; - dp = nd6_dad_find(ifa); - - /* Quickhack - completely ignore DAD NS packets */ - if (V_dad_ignore_ns) { - char ip6buf[INET6_ADDRSTRLEN]; - nd6log((LOG_INFO, - "nd6_dad_ns_input: ignoring DAD NS packet for " - "address %s(%s)\n", ip6_sprintf(ip6buf, taddr6), - if_name(ifa->ifa_ifp))); + /* Ignore Nonce option when Enhanced DAD is disabled. */ + if (V_dad_enhanced == 0) + ndopt_nonce = NULL; + dp = nd6_dad_find(ifa, ndopt_nonce); + if (dp == NULL) return; - } - - /* - * if I'm yet to start DAD, someone else started using this address - * first. I have a duplicate and you win. - */ - if (dp == NULL || dp->dad_ns_ocount == 0) - duplicate++; - - /* XXX more checks for loopback situation - see nd6_dad_timer too */ - if (duplicate) { - dp = NULL; /* will be freed in nd6_dad_duplicated() */ - nd6_dad_duplicated(ifa); - } else { - /* - * not sure if I got a duplicate. - * increment ns count and see what happens. - */ - if (dp) - dp->dad_ns_icount++; - } + dp->dad_ns_icount++; + nd6_dad_rele(dp); } static void @@ -1564,10 +1556,9 @@ nd6_dad_na_input(struct ifaddr *ifa) if (ifa == NULL) panic("ifa == NULL in nd6_dad_na_input"); - dp = nd6_dad_find(ifa); - if (dp) + dp = nd6_dad_find(ifa, NULL); + if (dp != NULL) { dp->dad_na_icount++; - - /* remove the address. */ - nd6_dad_duplicated(ifa); + nd6_dad_rele(dp); + } } diff --git a/freebsd/sys/netinet6/nd6_rtr.c b/freebsd/sys/netinet6/nd6_rtr.c index 8d150ae4..c8d7c0ef 100644 --- a/freebsd/sys/netinet6/nd6_rtr.c +++ b/freebsd/sys/netinet6/nd6_rtr.c @@ -41,20 +41,24 @@ __FBSDID("$FreeBSD$"); #include <sys/systm.h> #include <sys/malloc.h> #include <sys/mbuf.h> +#include <sys/refcount.h> #include <sys/socket.h> #include <sys/sockio.h> #include <sys/time.h> #include <sys/kernel.h> #include <rtems/bsd/sys/lock.h> #include <rtems/bsd/sys/errno.h> +#include <sys/rmlock.h> #include <sys/rwlock.h> #include <sys/syslog.h> #include <sys/queue.h> #include <net/if.h> +#include <net/if_var.h> #include <net/if_types.h> #include <net/if_dl.h> #include <net/route.h> +#include <net/route_var.h> #include <net/radix.h> #include <net/vnet.h> @@ -89,7 +93,7 @@ static void in6_init_address_ltimes(struct nd_prefix *, static int nd6_prefix_onlink(struct nd_prefix *); static int nd6_prefix_offlink(struct nd_prefix *); -static int rt6_deleteroute(struct radix_node *, void *); +static int rt6_deleteroute(const struct rtentry *, void *); VNET_DECLARE(int, nd6_recalc_reachtm_interval); #define V_nd6_recalc_reachtm_interval VNET(nd6_recalc_reachtm_interval) @@ -220,6 +224,8 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len) struct nd_defrouter *dr; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; + dr = NULL; + /* * We only accept RAs only when the per-interface flag * ND6_IFF_ACCEPT_RTADV is on the receiving interface. @@ -272,7 +278,7 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len) bzero(&dr0, sizeof(dr0)); dr0.rtaddr = saddr6; - dr0.flags = nd_ra->nd_ra_flags_reserved; + dr0.raflags = nd_ra->nd_ra_flags_reserved; /* * Effectively-disable routes from RA messages when * ND6_IFF_NO_RADR enabled on the receiving interface or @@ -284,7 +290,7 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len) dr0.rtlifetime = 0; else dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime); - dr0.expire = time_second + dr0.rtlifetime; + dr0.expire = time_uptime + dr0.rtlifetime; dr0.ifp = ifp; /* unspecified or not? (RFC 2461 6.3.4) */ if (advreachable) { @@ -369,6 +375,10 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len) (void)prelist_update(&pr, dr, m, mcast); } } + if (dr != NULL) { + defrouter_rele(dr); + dr = NULL; + } /* * MTU @@ -446,10 +456,6 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len) m_freem(m); } -/* - * default router list proccessing sub routines - */ - /* tell the change to user processes watching the routing socket. */ static void nd6_rtmsg(int cmd, struct rtentry *rt) @@ -478,12 +484,15 @@ nd6_rtmsg(int cmd, struct rtentry *rt) ifa_free(ifa); } +/* + * default router list processing sub routines + */ + static void defrouter_addreq(struct nd_defrouter *new) { struct sockaddr_in6 def, mask, gate; struct rtentry *newrt = NULL; - int s; int error; bzero(&def, sizeof(def)); @@ -495,7 +504,6 @@ defrouter_addreq(struct nd_defrouter *new) def.sin6_family = gate.sin6_family = AF_INET6; gate.sin6_addr = new->rtaddr; - s = splnet(); error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&def, (struct sockaddr *)&gate, (struct sockaddr *)&mask, RTF_GATEWAY, &newrt, RT_DEFAULT_FIB); @@ -505,21 +513,46 @@ defrouter_addreq(struct nd_defrouter *new) } if (error == 0) new->installed = 1; - splx(s); - return; } struct nd_defrouter * -defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp) +defrouter_lookup_locked(struct in6_addr *addr, struct ifnet *ifp) { struct nd_defrouter *dr; - TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { - if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr)) + ND6_LOCK_ASSERT(); + TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) + if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr)) { + defrouter_ref(dr); return (dr); - } + } + return (NULL); +} + +struct nd_defrouter * +defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp) +{ + struct nd_defrouter *dr; + + ND6_RLOCK(); + dr = defrouter_lookup_locked(addr, ifp); + ND6_RUNLOCK(); + return (dr); +} + +void +defrouter_ref(struct nd_defrouter *dr) +{ + + refcount_acquire(&dr->refcnt); +} + +void +defrouter_rele(struct nd_defrouter *dr) +{ - return (NULL); /* search failed */ + if (refcount_release(&dr->refcnt)) + free(dr, M_IP6NDP); } /* @@ -554,15 +587,41 @@ defrouter_delreq(struct nd_defrouter *dr) } /* - * remove all default routes from default router list + * Remove all default routes from default router list. */ void defrouter_reset(void) { - struct nd_defrouter *dr; + struct nd_defrouter *dr, **dra; + int count, i; + + count = i = 0; + /* + * We can't delete routes with the ND lock held, so make a copy of the + * current default router list and use that when deleting routes. + */ + ND6_RLOCK(); TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) - defrouter_delreq(dr); + count++; + ND6_RUNLOCK(); + + dra = malloc(count * sizeof(*dra), M_TEMP, M_WAITOK | M_ZERO); + + ND6_RLOCK(); + TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { + if (i == count) + break; + defrouter_ref(dr); + dra[i++] = dr; + } + ND6_RUNLOCK(); + + for (i = 0; i < count && dra[i] != NULL; i++) { + defrouter_delreq(dra[i]); + defrouter_rele(dra[i]); + } + free(dra, M_TEMP); /* * XXX should we also nuke any default routers in the kernel, by @@ -570,12 +629,53 @@ defrouter_reset(void) */ } +/* + * Look up a matching default router list entry and remove it. Returns true if a + * matching entry was found, false otherwise. + */ +bool +defrouter_remove(struct in6_addr *addr, struct ifnet *ifp) +{ + struct nd_defrouter *dr; + + ND6_WLOCK(); + dr = defrouter_lookup_locked(addr, ifp); + if (dr == NULL) { + ND6_WUNLOCK(); + return (false); + } + + defrouter_unlink(dr, NULL); + ND6_WUNLOCK(); + defrouter_del(dr); + defrouter_rele(dr); + return (true); +} + +/* + * Remove a router from the global list and optionally stash it in a + * caller-supplied queue. + * + * The ND lock must be held. + */ +void +defrouter_unlink(struct nd_defrouter *dr, struct nd_drhead *drq) +{ + + ND6_WLOCK_ASSERT(); + TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry); + if (drq != NULL) + TAILQ_INSERT_TAIL(drq, dr, dr_entry); +} + void -defrtrlist_del(struct nd_defrouter *dr) +defrouter_del(struct nd_defrouter *dr) { struct nd_defrouter *deldr = NULL; struct nd_prefix *pr; + ND6_UNLOCK_ASSERT(); + /* * Flush all the routing table entries that use the router * as a next hop. @@ -587,7 +687,6 @@ defrtrlist_del(struct nd_defrouter *dr) deldr = dr; defrouter_delreq(dr); } - TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry); /* * Also delete all the pointers to the router in each prefix lists. @@ -607,7 +706,10 @@ defrtrlist_del(struct nd_defrouter *dr) if (deldr) defrouter_select(); - free(dr, M_IP6NDP); + /* + * Release the list reference. + */ + defrouter_rele(dr); } /* @@ -634,16 +736,16 @@ defrtrlist_del(struct nd_defrouter *dr) void defrouter_select(void) { - int s = splnet(); - struct nd_defrouter *dr, *selected_dr = NULL, *installed_dr = NULL; + struct nd_defrouter *dr, *selected_dr, *installed_dr; struct llentry *ln = NULL; + ND6_RLOCK(); /* * Let's handle easy case (3) first: * If default router list is empty, there's nothing to be done. */ if (TAILQ_EMPTY(&V_nd_defrouter)) { - splx(s); + ND6_RUNLOCK(); return; } @@ -652,12 +754,14 @@ defrouter_select(void) * We just pick up the first reachable one (if any), assuming that * the ordering rule of the list described in defrtrlist_update(). */ + selected_dr = installed_dr = NULL; TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { IF_AFDATA_RLOCK(dr->ifp); if (selected_dr == NULL && (ln = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) && ND6_IS_LLINFO_PROBREACH(ln)) { selected_dr = dr; + defrouter_ref(selected_dr); } IF_AFDATA_RUNLOCK(dr->ifp); if (ln != NULL) { @@ -665,12 +769,15 @@ defrouter_select(void) ln = NULL; } - if (dr->installed && installed_dr == NULL) - installed_dr = dr; - else if (dr->installed && installed_dr) { - /* this should not happen. warn for diagnosis. */ - log(LOG_ERR, "defrouter_select: more than one router" - " is installed\n"); + if (dr->installed) { + if (installed_dr == NULL) { + installed_dr = dr; + defrouter_ref(installed_dr); + } else { + /* this should not happen. warn for diagnosis. */ + log(LOG_ERR, + "defrouter_select: more than one router is installed\n"); + } } } /* @@ -682,21 +789,25 @@ defrouter_select(void) * or when the new one has a really higher preference value. */ if (selected_dr == NULL) { - if (installed_dr == NULL || !TAILQ_NEXT(installed_dr, dr_entry)) + if (installed_dr == NULL || + TAILQ_NEXT(installed_dr, dr_entry) == NULL) selected_dr = TAILQ_FIRST(&V_nd_defrouter); else selected_dr = TAILQ_NEXT(installed_dr, dr_entry); - } else if (installed_dr) { + defrouter_ref(selected_dr); + } else if (installed_dr != NULL) { IF_AFDATA_RLOCK(installed_dr->ifp); if ((ln = nd6_lookup(&installed_dr->rtaddr, 0, installed_dr->ifp)) && ND6_IS_LLINFO_PROBREACH(ln) && rtpref(selected_dr) <= rtpref(installed_dr)) { + defrouter_rele(selected_dr); selected_dr = installed_dr; } IF_AFDATA_RUNLOCK(installed_dr->ifp); if (ln != NULL) LLE_RUNLOCK(ln); } + ND6_RUNLOCK(); /* * If the selected router is different than the installed one, @@ -704,13 +815,13 @@ defrouter_select(void) * Note that the selected router is never NULL here. */ if (installed_dr != selected_dr) { - if (installed_dr) + if (installed_dr != NULL) { defrouter_delreq(installed_dr); + defrouter_rele(installed_dr); + } defrouter_addreq(selected_dr); } - - splx(s); - return; + defrouter_rele(selected_dr); } /* @@ -720,7 +831,7 @@ defrouter_select(void) static int rtpref(struct nd_defrouter *dr) { - switch (dr->flags & ND_RA_FLAG_RTPREF_MASK) { + switch (dr->raflags & ND_RA_FLAG_RTPREF_MASK) { case ND_RA_FLAG_RTPREF_HIGH: return (RTPREF_HIGH); case ND_RA_FLAG_RTPREF_MEDIUM: @@ -734,7 +845,7 @@ rtpref(struct nd_defrouter *dr) * serious bug of kernel internal. We thus always bark here. * Or, can we even panic? */ - log(LOG_ERR, "rtpref: impossible RA flag %x\n", dr->flags); + log(LOG_ERR, "rtpref: impossible RA flag %x\n", dr->raflags); return (RTPREF_INVALID); } /* NOTREACHED */ @@ -744,63 +855,50 @@ static struct nd_defrouter * defrtrlist_update(struct nd_defrouter *new) { struct nd_defrouter *dr, *n; - int s = splnet(); + int oldpref; - if ((dr = defrouter_lookup(&new->rtaddr, new->ifp)) != NULL) { - /* entry exists */ - if (new->rtlifetime == 0) { - defrtrlist_del(dr); - dr = NULL; - } else { - int oldpref = rtpref(dr); + if (new->rtlifetime == 0) { + defrouter_remove(&new->rtaddr, new->ifp); + return (NULL); + } - /* override */ - dr->flags = new->flags; /* xxx flag check */ - dr->rtlifetime = new->rtlifetime; - dr->expire = new->expire; + ND6_WLOCK(); + dr = defrouter_lookup_locked(&new->rtaddr, new->ifp); + if (dr != NULL) { + oldpref = rtpref(dr); - /* - * If the preference does not change, there's no need - * to sort the entries. Also make sure the selected - * router is still installed in the kernel. - */ - if (dr->installed && rtpref(new) == oldpref) { - splx(s); - return (dr); - } + /* override */ + dr->raflags = new->raflags; /* XXX flag check */ + dr->rtlifetime = new->rtlifetime; + dr->expire = new->expire; - /* - * preferred router may be changed, so relocate - * this router. - * XXX: calling TAILQ_REMOVE directly is a bad manner. - * However, since defrtrlist_del() has many side - * effects, we intentionally do so here. - * defrouter_select() below will handle routing - * changes later. - */ - TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry); - n = dr; - goto insert; + /* + * If the preference does not change, there's no need + * to sort the entries. Also make sure the selected + * router is still installed in the kernel. + */ + if (dr->installed && rtpref(new) == oldpref) { + ND6_WUNLOCK(); + return (dr); } - splx(s); - return (dr); - } - - /* entry does not exist */ - if (new->rtlifetime == 0) { - splx(s); - return (NULL); - } - n = (struct nd_defrouter *)malloc(sizeof(*n), M_IP6NDP, M_NOWAIT); - if (n == NULL) { - splx(s); - return (NULL); + /* + * The preferred router may have changed, so relocate this + * router. + */ + TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry); + n = dr; + } else { + n = malloc(sizeof(*n), M_IP6NDP, M_NOWAIT | M_ZERO); + if (n == NULL) { + ND6_WUNLOCK(); + return (NULL); + } + memcpy(n, new, sizeof(*n)); + /* Initialize with an extra reference for the caller. */ + refcount_init(&n->refcnt, 2); } - bzero(n, sizeof(*n)); - *n = *new; -insert: /* * Insert the new router in the Default Router List; * The Default Router List should be in the descending order @@ -813,15 +911,14 @@ insert: if (rtpref(n) > rtpref(dr)) break; } - if (dr) + if (dr != NULL) TAILQ_INSERT_BEFORE(dr, n, dr_entry); else TAILQ_INSERT_TAIL(&V_nd_defrouter, n, dr_entry); + ND6_WUNLOCK(); defrouter_select(); - splx(s); - return (n); } @@ -843,11 +940,11 @@ pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr) { struct nd_pfxrouter *new; - new = (struct nd_pfxrouter *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT); + new = malloc(sizeof(*new), M_IP6NDP, M_NOWAIT | M_ZERO); if (new == NULL) return; - bzero(new, sizeof(*new)); new->router = dr; + defrouter_ref(dr); LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry); @@ -857,7 +954,9 @@ pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr) static void pfxrtr_del(struct nd_pfxrouter *pfr) { + LIST_REMOVE(pfr, pfr_entry); + defrouter_rele(pfr->router); free(pfr, M_IP6NDP); } @@ -884,13 +983,11 @@ nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr, { struct nd_prefix *new = NULL; int error = 0; - int i, s; char ip6buf[INET6_ADDRSTRLEN]; - new = (struct nd_prefix *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT); + new = malloc(sizeof(*new), M_IP6NDP, M_NOWAIT | M_ZERO); if (new == NULL) - return(ENOMEM); - bzero(new, sizeof(*new)); + return (ENOMEM); new->ndpr_ifp = pr->ndpr_ifp; new->ndpr_prefix = pr->ndpr_prefix; new->ndpr_plen = pr->ndpr_plen; @@ -899,24 +996,18 @@ nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr, new->ndpr_flags = pr->ndpr_flags; if ((error = in6_init_prefix_ltimes(new)) != 0) { free(new, M_IP6NDP); - return(error); + return (error); } - new->ndpr_lastupdate = time_second; - if (newp != NULL) - *newp = new; + new->ndpr_lastupdate = time_uptime; /* initialization */ LIST_INIT(&new->ndpr_advrtrs); in6_prefixlen2mask(&new->ndpr_mask, new->ndpr_plen); /* make prefix in the canonical form */ - for (i = 0; i < 4; i++) - new->ndpr_prefix.sin6_addr.s6_addr32[i] &= - new->ndpr_mask.s6_addr32[i]; + IN6_MASK_ADDR(&new->ndpr_prefix.sin6_addr, &new->ndpr_mask); - s = splnet(); /* link ndpr_entry to nd_prefix list */ LIST_INSERT_HEAD(&V_nd_prefix, new, ndpr_entry); - splx(s); /* ND_OPT_PI_FLAG_ONLINK processing */ if (new->ndpr_raf_onlink) { @@ -931,17 +1022,18 @@ nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr, } } - if (dr) + if (dr != NULL) pfxrtr_add(new, dr); - - return 0; + if (newp != NULL) + *newp = new; + return (0); } void prelist_remove(struct nd_prefix *pr) { struct nd_pfxrouter *pfr, *next; - int e, s; + int e; char ip6buf[INET6_ADDRSTRLEN]; /* make sure to invalidate the prefix until it is really freed. */ @@ -966,17 +1058,13 @@ prelist_remove(struct nd_prefix *pr) if (pr->ndpr_refcnt > 0) return; /* notice here? */ - s = splnet(); - /* unlink ndpr_entry from nd_prefix list */ LIST_REMOVE(pr, ndpr_entry); - /* free list of routers that adversed the prefix */ + /* free list of routers that advertised the prefix */ LIST_FOREACH_SAFE(pfr, &pr->ndpr_advrtrs, pfr_entry, next) { - free(pfr, M_IP6NDP); + pfxrtr_del(pfr); } - splx(s); - free(pr, M_IP6NDP); pfxlist_onlink_check(); @@ -994,9 +1082,7 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr, struct ifaddr *ifa; struct ifnet *ifp = new->ndpr_ifp; struct nd_prefix *pr; - int s = splnet(); int error = 0; - int newprefix = 0; int auth; struct in6_addrlifetime lt6_tmp; char ip6buf[INET6_ADDRSTRLEN]; @@ -1032,7 +1118,7 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr, pr->ndpr_vltime = new->ndpr_vltime; pr->ndpr_pltime = new->ndpr_pltime; (void)in6_init_prefix_ltimes(pr); /* XXX error case? */ - pr->ndpr_lastupdate = time_second; + pr->ndpr_lastupdate = time_uptime; } if (new->ndpr_raf_onlink && @@ -1054,23 +1140,17 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr, if (dr && pfxrtr_lookup(pr, dr) == NULL) pfxrtr_add(pr, dr); } else { - struct nd_prefix *newpr = NULL; - - newprefix = 1; - if (new->ndpr_vltime == 0) goto end; if (new->ndpr_raf_onlink == 0 && new->ndpr_raf_auto == 0) goto end; - error = nd6_prelist_add(new, dr, &newpr); - if (error != 0 || newpr == NULL) { + error = nd6_prelist_add(new, dr, &pr); + if (error != 0) { nd6log((LOG_NOTICE, "prelist_update: " - "nd6_prelist_add failed for %s/%d on %s " - "errno=%d, returnpr=%p\n", + "nd6_prelist_add failed for %s/%d on %s errno=%d\n", ip6_sprintf(ip6buf, &new->ndpr_prefix.sin6_addr), - new->ndpr_plen, if_name(new->ndpr_ifp), - error, newpr)); + new->ndpr_plen, if_name(new->ndpr_ifp), error)); goto end; /* we should just give up in this case. */ } @@ -1081,13 +1161,11 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr, * addresses. Thus, we explicitly make sure that the prefix * itself expires now. */ - if (newpr->ndpr_raf_onlink == 0) { - newpr->ndpr_vltime = 0; - newpr->ndpr_pltime = 0; - in6_init_prefix_ltimes(newpr); + if (pr->ndpr_raf_onlink == 0) { + pr->ndpr_vltime = 0; + pr->ndpr_pltime = 0; + in6_init_prefix_ltimes(pr); } - - pr = newpr; } /* @@ -1170,7 +1248,7 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr, if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME) remaininglifetime = ND6_INFINITE_LIFETIME; - else if (time_second - ifa6->ia6_updatetime > + else if (time_uptime - ifa6->ia6_updatetime > lt6_tmp.ia6t_vltime) { /* * The case of "invalid" address. We should usually @@ -1179,7 +1257,7 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr, remaininglifetime = 0; } else remaininglifetime = lt6_tmp.ia6t_vltime - - (time_second - ifa6->ia6_updatetime); + (time_uptime - ifa6->ia6_updatetime); /* when not updating, keep the current stored lifetime. */ lt6_tmp.ia6t_vltime = remaininglifetime; @@ -1215,18 +1293,18 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr, u_int32_t maxvltime, maxpltime; if (V_ip6_temp_valid_lifetime > - (u_int32_t)((time_second - ifa6->ia6_createtime) + + (u_int32_t)((time_uptime - ifa6->ia6_createtime) + V_ip6_desync_factor)) { maxvltime = V_ip6_temp_valid_lifetime - - (time_second - ifa6->ia6_createtime) - + (time_uptime - ifa6->ia6_createtime) - V_ip6_desync_factor; } else maxvltime = 0; if (V_ip6_temp_preferred_lifetime > - (u_int32_t)((time_second - ifa6->ia6_createtime) + + (u_int32_t)((time_uptime - ifa6->ia6_createtime) + V_ip6_desync_factor)) { maxpltime = V_ip6_temp_preferred_lifetime - - (time_second - ifa6->ia6_createtime) - + (time_uptime - ifa6->ia6_createtime) - V_ip6_desync_factor; } else maxpltime = 0; @@ -1241,7 +1319,7 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr, } } ifa6->ia6_lifetime = lt6_tmp; - ifa6->ia6_updatetime = time_second; + ifa6->ia6_updatetime = time_uptime; } IF_ADDR_RUNLOCK(ifp); if (ia6_match == NULL && new->ndpr_vltime) { @@ -1319,7 +1397,6 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr, } end: - splx(s); return error; } @@ -1363,12 +1440,13 @@ find_pfxlist_reachable_router(struct nd_prefix *pr) * is no router around us. */ void -pfxlist_onlink_check() +pfxlist_onlink_check(void) { struct nd_prefix *pr; struct in6_ifaddr *ifa; struct nd_defrouter *dr; struct nd_pfxrouter *pfxrtr = NULL; + struct rm_priotracker in6_ifa_tracker; /* * Check if there is a prefix that has a reachable advertising @@ -1384,6 +1462,7 @@ pfxlist_onlink_check() * that does not advertise any prefixes. */ if (pr == NULL) { + ND6_RLOCK(); TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { struct nd_prefix *pr0; @@ -1394,6 +1473,7 @@ pfxlist_onlink_check() if (pfxrtr != NULL) break; } + ND6_RUNLOCK(); } if (pr != NULL || (!TAILQ_EMPTY(&V_nd_defrouter) && pfxrtr == NULL)) { /* @@ -1424,7 +1504,7 @@ pfxlist_onlink_check() find_pfxlist_reachable_router(pr) == NULL) pr->ndpr_stateflags |= NDPRF_DETACHED; if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 && - find_pfxlist_reachable_router(pr) != 0) + find_pfxlist_reachable_router(pr) != NULL) pr->ndpr_stateflags &= ~NDPRF_DETACHED; } } else { @@ -1497,9 +1577,8 @@ pfxlist_onlink_check() * detached. Note, however, that a manually configured address should * always be attached. * The precise detection logic is same as the one for prefixes. - * - * XXXRW: in6_ifaddrhead locking. */ + IN6_IFADDR_RLOCK(&in6_ifa_tracker); TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) { if (!(ifa->ia6_flags & IN6_IFF_AUTOCONF)) continue; @@ -1534,8 +1613,7 @@ pfxlist_onlink_check() ifa->ia6_flags |= IN6_IFF_DETACHED; } } - } - else { + } else { TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) { if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0) continue; @@ -1548,13 +1626,14 @@ pfxlist_onlink_check() } } } + IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); } static int nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa) { static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; - struct radix_node_head *rnh; + struct rib_head *rnh; struct rtentry *rt; struct sockaddr_in6 mask6; u_long rtflags; @@ -1583,7 +1662,7 @@ nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa) rnh = rt_tables_get_rnh(rt->rt_fibnum, AF_INET6); /* XXX what if rhn == NULL? */ - RADIX_NODE_HEAD_LOCK(rnh); + RIB_WLOCK(rnh); RT_LOCK(rt); if (rt_setgate(rt, rt_key(rt), (struct sockaddr *)&null_sdl) == 0) { @@ -1593,7 +1672,7 @@ nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa) dl->sdl_type = rt->rt_ifp->if_type; dl->sdl_index = rt->rt_ifp->if_index; } - RADIX_NODE_HEAD_UNLOCK(rnh); + RIB_WUNLOCK(rnh); nd6_rtmsg(RTM_ADD, rt); RT_UNLOCK(rt); pr->ndpr_stateflags |= NDPRF_ONLINK; @@ -1755,6 +1834,7 @@ nd6_prefix_offlink(struct nd_prefix *pr) } } error = a_failure; + a_failure = 1; if (error == 0) { pr->ndpr_stateflags &= ~NDPRF_ONLINK; @@ -1793,7 +1873,8 @@ nd6_prefix_offlink(struct nd_prefix *pr) &opr->ndpr_prefix.sin6_addr), opr->ndpr_plen, if_name(ifp), if_name(opr->ndpr_ifp), e)); - } + } else + a_failure = 0; } } } else { @@ -1805,6 +1886,10 @@ nd6_prefix_offlink(struct nd_prefix *pr) if_name(ifp), error)); } + if (a_failure) + lltable_prefix_free(AF_INET6, (struct sockaddr *)&sa6, + (struct sockaddr *)&mask6, LLE_STATIC); + return (error); } @@ -1860,22 +1945,9 @@ in6_ifadd(struct nd_prefixctl *pr, int mcast) } /* make ifaddr */ + in6_prepare_ifra(&ifra, &pr->ndpr_prefix.sin6_addr, &mask); - bzero(&ifra, sizeof(ifra)); - /* - * in6_update_ifa() does not use ifra_name, but we accurately set it - * for safety. - */ - strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name)); - ifra.ifra_addr.sin6_family = AF_INET6; - ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6); - /* prefix */ - ifra.ifra_addr.sin6_addr = pr->ndpr_prefix.sin6_addr; - ifra.ifra_addr.sin6_addr.s6_addr32[0] &= mask.s6_addr32[0]; - ifra.ifra_addr.sin6_addr.s6_addr32[1] &= mask.s6_addr32[1]; - ifra.ifra_addr.sin6_addr.s6_addr32[2] &= mask.s6_addr32[2]; - ifra.ifra_addr.sin6_addr.s6_addr32[3] &= mask.s6_addr32[3]; - + IN6_MASK_ADDR(&ifra.ifra_addr.sin6_addr, &mask); /* interface ID */ ifra.ifra_addr.sin6_addr.s6_addr32[0] |= (ib->ia_addr.sin6_addr.s6_addr32[0] & ~mask.s6_addr32[0]); @@ -1887,12 +1959,6 @@ in6_ifadd(struct nd_prefixctl *pr, int mcast) (ib->ia_addr.sin6_addr.s6_addr32[3] & ~mask.s6_addr32[3]); ifa_free(ifa); - /* new prefix mask. */ - ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6); - ifra.ifra_prefixmask.sin6_family = AF_INET6; - bcopy(&mask, &ifra.ifra_prefixmask.sin6_addr, - sizeof(ifra.ifra_prefixmask.sin6_addr)); - /* lifetimes. */ ifra.ifra_lifetime.ia6t_vltime = pr->ndpr_vltime; ifra.ifra_lifetime.ia6t_pltime = pr->ndpr_pltime; @@ -1949,24 +2015,21 @@ int in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay) { struct ifnet *ifp = ia0->ia_ifa.ifa_ifp; - struct in6_ifaddr *newia, *ia; + struct in6_ifaddr *newia; struct in6_aliasreq ifra; - int i, error; + int error; int trylimit = 3; /* XXX: adhoc value */ int updateflags; u_int32_t randid[2]; time_t vltime0, pltime0; - bzero(&ifra, sizeof(ifra)); - strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name)); - ifra.ifra_addr = ia0->ia_addr; - /* copy prefix mask */ - ifra.ifra_prefixmask = ia0->ia_prefixmask; + in6_prepare_ifra(&ifra, &ia0->ia_addr.sin6_addr, + &ia0->ia_prefixmask.sin6_addr); + + ifra.ifra_addr = ia0->ia_addr; /* XXX: do we need this ? */ /* clear the old IFID */ - for (i = 0; i < 4; i++) { - ifra.ifra_addr.sin6_addr.s6_addr32[i] &= - ifra.ifra_prefixmask.sin6_addr.s6_addr32[i]; - } + IN6_MASK_ADDR(&ifra.ifra_addr.sin6_addr, + &ifra.ifra_prefixmask.sin6_addr); again: if (in6_get_tmpifid(ifp, (u_int8_t *)randid, @@ -1986,26 +2049,18 @@ in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay) * there may be a time lag between generation of the ID and generation * of the address. So, we'll do one more sanity check. */ - IN6_IFADDR_RLOCK(); - TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { - if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, - &ifra.ifra_addr.sin6_addr)) { - if (trylimit-- == 0) { - IN6_IFADDR_RUNLOCK(); - /* - * Give up. Something strange should have - * happened. - */ - nd6log((LOG_NOTICE, "in6_tmpifadd: failed to " - "find a unique random IFID\n")); - return (EEXIST); - } - IN6_IFADDR_RUNLOCK(); + + if (in6_localip(&ifra.ifra_addr.sin6_addr) != 0) { + if (trylimit-- > 0) { forcegen = 1; goto again; } + + /* Give up. Something strange should have happened. */ + nd6log((LOG_NOTICE, "in6_tmpifadd: failed to " + "find a unique random IFID\n")); + return (EEXIST); } - IN6_IFADDR_RUNLOCK(); /* * The Valid Lifetime is the lower of the Valid Lifetime of the @@ -2017,7 +2072,7 @@ in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay) if (ia0->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) { vltime0 = IFA6_IS_INVALID(ia0) ? 0 : (ia0->ia6_lifetime.ia6t_vltime - - (time_second - ia0->ia6_updatetime)); + (time_uptime - ia0->ia6_updatetime)); if (vltime0 > V_ip6_temp_valid_lifetime) vltime0 = V_ip6_temp_valid_lifetime; } else @@ -2025,7 +2080,7 @@ in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay) if (ia0->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) { pltime0 = IFA6_IS_DEPRECATED(ia0) ? 0 : (ia0->ia6_lifetime.ia6t_pltime - - (time_second - ia0->ia6_updatetime)); + (time_uptime - ia0->ia6_updatetime)); if (pltime0 > V_ip6_temp_preferred_lifetime - V_ip6_desync_factor){ pltime0 = V_ip6_temp_preferred_lifetime - V_ip6_desync_factor; @@ -2083,11 +2138,11 @@ in6_init_prefix_ltimes(struct nd_prefix *ndpr) if (ndpr->ndpr_pltime == ND6_INFINITE_LIFETIME) ndpr->ndpr_preferred = 0; else - ndpr->ndpr_preferred = time_second + ndpr->ndpr_pltime; + ndpr->ndpr_preferred = time_uptime + ndpr->ndpr_pltime; if (ndpr->ndpr_vltime == ND6_INFINITE_LIFETIME) ndpr->ndpr_expire = 0; else - ndpr->ndpr_expire = time_second + ndpr->ndpr_vltime; + ndpr->ndpr_expire = time_uptime + ndpr->ndpr_vltime; return 0; } @@ -2099,7 +2154,7 @@ in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6) if (lt6->ia6t_vltime == ND6_INFINITE_LIFETIME) lt6->ia6t_expire = 0; else { - lt6->ia6t_expire = time_second; + lt6->ia6t_expire = time_uptime; lt6->ia6t_expire += lt6->ia6t_vltime; } @@ -2107,7 +2162,7 @@ in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6) if (lt6->ia6t_pltime == ND6_INFINITE_LIFETIME) lt6->ia6t_preferred = 0; else { - lt6->ia6t_preferred = time_second; + lt6->ia6t_preferred = time_uptime; lt6->ia6t_preferred += lt6->ia6t_pltime; } } @@ -2120,34 +2175,19 @@ in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6) void rt6_flush(struct in6_addr *gateway, struct ifnet *ifp) { - struct radix_node_head *rnh; - u_int fibnum; - int s = splnet(); /* We'll care only link-local addresses */ - if (!IN6_IS_ADDR_LINKLOCAL(gateway)) { - splx(s); + if (!IN6_IS_ADDR_LINKLOCAL(gateway)) return; - } /* XXX Do we really need to walk any but the default FIB? */ - for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { - rnh = rt_tables_get_rnh(fibnum, AF_INET6); - if (rnh == NULL) - continue; - - RADIX_NODE_HEAD_LOCK(rnh); - rnh->rnh_walktree(rnh, rt6_deleteroute, (void *)gateway); - RADIX_NODE_HEAD_UNLOCK(rnh); - } - splx(s); + rt_foreach_fib_walk_del(AF_INET6, rt6_deleteroute, (void *)gateway); } static int -rt6_deleteroute(struct radix_node *rn, void *arg) +rt6_deleteroute(const struct rtentry *rt, void *arg) { #define SIN6(s) ((struct sockaddr_in6 *)s) - struct rtentry *rt = (struct rtentry *)rn; struct in6_addr *gate = (struct in6_addr *)arg; if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6) @@ -2172,8 +2212,7 @@ rt6_deleteroute(struct radix_node *rn, void *arg) if ((rt->rt_flags & RTF_HOST) == 0) return (0); - return (in6_rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, - rt_mask(rt), rt->rt_flags, NULL, rt->rt_fibnum)); + return (1); #undef SIN6 } diff --git a/freebsd/sys/netinet6/pim6_var.h b/freebsd/sys/netinet6/pim6_var.h index 060836ba..7f9262bb 100644 --- a/freebsd/sys/netinet6/pim6_var.h +++ b/freebsd/sys/netinet6/pim6_var.h @@ -42,13 +42,13 @@ #define _NETINET6_PIM6_VAR_H_ struct pim6stat { - u_quad_t pim6s_rcv_total; /* total PIM messages received */ - u_quad_t pim6s_rcv_tooshort; /* received with too few bytes */ - u_quad_t pim6s_rcv_badsum; /* received with bad checksum */ - u_quad_t pim6s_rcv_badversion; /* received bad PIM version */ - u_quad_t pim6s_rcv_registers; /* received registers */ - u_quad_t pim6s_rcv_badregisters; /* received invalid registers */ - u_quad_t pim6s_snd_registers; /* sent registers */ + uint64_t pim6s_rcv_total; /* total PIM messages received */ + uint64_t pim6s_rcv_tooshort; /* received with too few bytes */ + uint64_t pim6s_rcv_badsum; /* received with bad checksum */ + uint64_t pim6s_rcv_badversion; /* received bad PIM version */ + uint64_t pim6s_rcv_registers; /* received registers */ + uint64_t pim6s_rcv_badregisters; /* received invalid registers */ + uint64_t pim6s_snd_registers; /* sent registers */ }; #if (defined(KERNEL)) || (defined(_KERNEL)) @@ -56,13 +56,8 @@ int pim6_input(struct mbuf **, int*, int); #endif /* KERNEL */ /* - * Names for PIM sysctl objects + * Identifiers for PIM sysctl nodes */ #define PIM6CTL_STATS 1 /* statistics (read-only) */ -#define PIM6CTL_MAXID 2 -#define PIM6CTL_NAMES { \ - { 0, 0 }, \ - { 0, 0 }, \ -} #endif /* _NETINET6_PIM6_VAR_H_ */ diff --git a/freebsd/sys/netinet6/raw_ip6.c b/freebsd/sys/netinet6/raw_ip6.c index e2d6693a..dfd7c45b 100644 --- a/freebsd/sys/netinet6/raw_ip6.c +++ b/freebsd/sys/netinet6/raw_ip6.c @@ -70,6 +70,7 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/sys/param.h> #include <rtems/bsd/sys/errno.h> #include <sys/jail.h> +#include <sys/kernel.h> #include <rtems/bsd/sys/lock.h> #include <sys/malloc.h> #include <sys/mbuf.h> @@ -83,6 +84,7 @@ __FBSDID("$FreeBSD$"); #include <sys/syslog.h> #include <net/if.h> +#include <net/if_var.h> #include <net/if_types.h> #include <net/route.h> #include <net/vnet.h> @@ -126,7 +128,12 @@ VNET_DECLARE(struct inpcbinfo, ripcbinfo); extern u_long rip_sendspace; extern u_long rip_recvspace; -VNET_DEFINE(struct rip6stat, rip6stat); +VNET_PCPUSTAT_DEFINE(struct rip6stat, rip6stat); +VNET_PCPUSTAT_SYSINIT(rip6stat); + +#ifdef VIMAGE +VNET_PCPUSTAT_SYSUNINIT(rip6stat); +#endif /* VIMAGE */ /* * Hooks for multicast routing. They all default to NULL, so leave them not @@ -158,18 +165,12 @@ rip6_input(struct mbuf **mp, int *offp, int proto) struct mbuf *m = *mp; register struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); register struct inpcb *in6p; - struct inpcb *last = 0; + struct inpcb *last = NULL; struct mbuf *opts = NULL; struct sockaddr_in6 fromsa; RIP6STAT_INC(rip6s_ipackets); - if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) { - /* XXX Send icmp6 host/port unreach? */ - m_freem(m); - return (IPPROTO_DONE); - } - init_sin6(&fromsa, m); /* general init */ ifp = m->m_pkthdr.rcvif; @@ -265,7 +266,6 @@ rip6_input(struct mbuf **mp, int *offp, int proto) */ if (n && ipsec6_in_reject(n, last)) { m_freem(n); - IPSEC6STAT_INC(in_polvio); /* Do not inject data into pcb. */ } else #endif /* IPSEC */ @@ -297,7 +297,6 @@ rip6_input(struct mbuf **mp, int *offp, int proto) */ if ((last != NULL) && ipsec6_in_reject(m, last)) { m_freem(m); - IPSEC6STAT_INC(in_polvio); IP6STAT_DEC(ip6s_delivered); /* Do not inject data into pcb. */ INP_RUNLOCK(last); @@ -385,17 +384,10 @@ rip6_ctlinput(int cmd, struct sockaddr *sa, void *d) * may have setup with control call. */ int -#if __STDC__ -rip6_output(struct mbuf *m, ...) -#else -rip6_output(m, va_alist) - struct mbuf *m; - va_dcl -#endif +rip6_output(struct mbuf *m, struct socket *so, ...) { struct mbuf *control; struct m_tag *mtag; - struct socket *so; struct sockaddr_in6 *dstsock; struct in6_addr *dst; struct ip6_hdr *ip6; @@ -407,11 +399,11 @@ rip6_output(m, va_alist) int type = 0, code = 0; /* for ICMPv6 output statistics only */ int scope_ambiguous = 0; int use_defzone = 0; + int hlim = 0; struct in6_addr in6a; va_list ap; - va_start(ap, m); - so = va_arg(ap, struct socket *); + va_start(ap, so); dstsock = va_arg(ap, struct sockaddr_in6 *); control = va_arg(ap, struct mbuf *); va_end(ap); @@ -461,7 +453,7 @@ rip6_output(m, va_alist) code = icmp6->icmp6_code; } - M_PREPEND(m, sizeof(*ip6), M_DONTWAIT); + M_PREPEND(m, sizeof(*ip6), M_NOWAIT); if (m == NULL) { error = ENOBUFS; goto bad; @@ -471,8 +463,9 @@ rip6_output(m, va_alist) /* * Source address selection. */ - error = in6_selectsrc(dstsock, optp, in6p, NULL, so->so_cred, - &oifp, &in6a); + error = in6_selectsrc_socket(dstsock, optp, in6p, so->so_cred, + scope_ambiguous, &in6a, &hlim); + if (error) goto bad; error = prison_check_ip6(in6p->inp_cred, &in6a); @@ -480,19 +473,6 @@ rip6_output(m, va_alist) goto bad; ip6->ip6_src = in6a; - if (oifp && scope_ambiguous) { - /* - * Application should provide a proper zone ID or the use of - * default zone IDs should be enabled. Unfortunately, some - * applications do not behave as it should, so we need a - * workaround. Even if an appropriate ID is not determined - * (when it's required), if we can determine the outgoing - * interface. determine the zone ID based on the interface. - */ - error = in6_setscope(&dstsock->sin6_addr, oifp, NULL); - if (error != 0) - goto bad; - } ip6->ip6_dst = dstsock->sin6_addr; /* @@ -507,7 +487,7 @@ rip6_output(m, va_alist) * ip6_plen will be filled in ip6_output, so not fill it here. */ ip6->ip6_nxt = in6p->inp_ip_p; - ip6->ip6_hlim = in6_selecthlim(in6p, oifp); + ip6->ip6_hlim = hlim; if (so->so_proto->pr_protocol == IPPROTO_ICMPV6 || in6p->in6p_cksum != -1) { @@ -795,7 +775,6 @@ rip6_connect(struct socket *so, struct sockaddr *nam, struct thread *td) struct inpcb *inp; struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam; struct in6_addr in6a; - struct ifnet *ifp = NULL; int error = 0, scope_ambiguous = 0; inp = sotoinpcb(so); @@ -824,21 +803,14 @@ rip6_connect(struct socket *so, struct sockaddr *nam, struct thread *td) INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); /* Source address selection. XXX: need pcblookup? */ - error = in6_selectsrc(addr, inp->in6p_outputopts, - inp, NULL, so->so_cred, &ifp, &in6a); + error = in6_selectsrc_socket(addr, inp->in6p_outputopts, + inp, so->so_cred, scope_ambiguous, &in6a, NULL); if (error) { INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (error); } - /* XXX: see above */ - if (ifp && scope_ambiguous && - (error = in6_setscope(&addr->sin6_addr, ifp, NULL)) != 0) { - INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_ripcbinfo); - return (error); - } inp->in6p_faddr = addr->sin6_addr; inp->in6p_laddr = in6a; soisconnected(so); diff --git a/freebsd/sys/netinet6/raw_ip6.h b/freebsd/sys/netinet6/raw_ip6.h index cc4bcdd0..5eec5fff 100644 --- a/freebsd/sys/netinet6/raw_ip6.h +++ b/freebsd/sys/netinet6/raw_ip6.h @@ -37,21 +37,23 @@ * ICMPv6 stat is counted separately. see netinet/icmp6.h */ struct rip6stat { - u_quad_t rip6s_ipackets; /* total input packets */ - u_quad_t rip6s_isum; /* input checksum computations */ - u_quad_t rip6s_badsum; /* of above, checksum error */ - u_quad_t rip6s_nosock; /* no matching socket */ - u_quad_t rip6s_nosockmcast; /* of above, arrived as multicast */ - u_quad_t rip6s_fullsock; /* not delivered, input socket full */ + uint64_t rip6s_ipackets; /* total input packets */ + uint64_t rip6s_isum; /* input checksum computations */ + uint64_t rip6s_badsum; /* of above, checksum error */ + uint64_t rip6s_nosock; /* no matching socket */ + uint64_t rip6s_nosockmcast; /* of above, arrived as multicast */ + uint64_t rip6s_fullsock; /* not delivered, input socket full */ - u_quad_t rip6s_opackets; /* total output packets */ + uint64_t rip6s_opackets; /* total output packets */ }; #ifdef _KERNEL -#define RIP6STAT_ADD(name, val) V_rip6stat.name += (val) +#include <sys/counter.h> + +VNET_PCPUSTAT_DECLARE(struct rip6stat, rip6stat); +#define RIP6STAT_ADD(name, val) \ + VNET_PCPUSTAT_ADD(struct rip6stat, rip6stat, name, (val)) #define RIP6STAT_INC(name) RIP6STAT_ADD(name, 1) -VNET_DECLARE(struct rip6stat, rip6stat); -#define V_rip6stat VNET(rip6stat) -#endif +#endif /* _KERNEL */ #endif diff --git a/freebsd/sys/netinet6/route6.c b/freebsd/sys/netinet6/route6.c index 90738461..d698d328 100644 --- a/freebsd/sys/netinet6/route6.c +++ b/freebsd/sys/netinet6/route6.c @@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$"); #include <sys/queue.h> #include <net/if.h> +#include <net/if_var.h> #include <netinet/in.h> #include <netinet6/in6_var.h> diff --git a/freebsd/sys/netinet6/scope6.c b/freebsd/sys/netinet6/scope6.c index 2ccd2f7a..0f8ead2d 100644 --- a/freebsd/sys/netinet6/scope6.c +++ b/freebsd/sys/netinet6/scope6.c @@ -41,9 +41,11 @@ __FBSDID("$FreeBSD$"); #include <sys/sockio.h> #include <sys/systm.h> #include <sys/queue.h> +#include <sys/sysctl.h> #include <sys/syslog.h> #include <net/if.h> +#include <net/if_var.h> #include <net/vnet.h> #include <netinet/in.h> @@ -58,6 +60,11 @@ VNET_DEFINE(int, ip6_use_defzone) = 1; #else VNET_DEFINE(int, ip6_use_defzone) = 0; #endif +VNET_DEFINE(int, deembed_scopeid) = 1; +SYSCTL_DECL(_net_inet6_ip6); +SYSCTL_INT(_net_inet6_ip6, OID_AUTO, deembed_scopeid, CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(deembed_scopeid), 0, + "Extract embedded zone ID and set it to sin6_scope_id in sockaddr_in6."); /* * The scope6_lock protects the global sid default stored in @@ -95,22 +102,14 @@ scope6_ifattach(struct ifnet *ifp) { struct scope6_id *sid; - sid = (struct scope6_id *)malloc(sizeof(*sid), M_IFADDR, M_WAITOK); - bzero(sid, sizeof(*sid)); - + sid = malloc(sizeof(*sid), M_IFADDR, M_WAITOK | M_ZERO); /* * XXX: IPV6_ADDR_SCOPE_xxx macros are not standard. * Should we rather hardcode here? */ sid->s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL] = ifp->if_index; sid->s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL] = ifp->if_index; -#ifdef MULTI_SCOPE - /* by default, we don't care about scope boundary for these scopes. */ - sid->s6id_list[IPV6_ADDR_SCOPE_SITELOCAL] = 1; - sid->s6id_list[IPV6_ADDR_SCOPE_ORGLOCAL] = 1; -#endif - - return sid; + return (sid); } void @@ -230,62 +229,24 @@ scope6_get(struct ifnet *ifp, struct scope6_id *idlist) * Get a scope of the address. Node-local, link-local, site-local or global. */ int -in6_addrscope(struct in6_addr *addr) +in6_addrscope(const struct in6_addr *addr) { - int scope; - - if (addr->s6_addr[0] == 0xfe) { - scope = addr->s6_addr[1] & 0xc0; - - switch (scope) { - case 0x80: - return IPV6_ADDR_SCOPE_LINKLOCAL; - break; - case 0xc0: - return IPV6_ADDR_SCOPE_SITELOCAL; - break; - default: - return IPV6_ADDR_SCOPE_GLOBAL; /* just in case */ - break; - } - } - - - if (addr->s6_addr[0] == 0xff) { - scope = addr->s6_addr[1] & 0x0f; + if (IN6_IS_ADDR_MULTICAST(addr)) { /* - * due to other scope such as reserved, - * return scope doesn't work. + * Addresses with reserved value F must be treated as + * global multicast addresses. */ - switch (scope) { - case IPV6_ADDR_SCOPE_INTFACELOCAL: - return IPV6_ADDR_SCOPE_INTFACELOCAL; - break; - case IPV6_ADDR_SCOPE_LINKLOCAL: - return IPV6_ADDR_SCOPE_LINKLOCAL; - break; - case IPV6_ADDR_SCOPE_SITELOCAL: - return IPV6_ADDR_SCOPE_SITELOCAL; - break; - default: - return IPV6_ADDR_SCOPE_GLOBAL; - break; - } + if (IPV6_ADDR_MC_SCOPE(addr) == 0x0f) + return (IPV6_ADDR_SCOPE_GLOBAL); + return (IPV6_ADDR_MC_SCOPE(addr)); } - - /* - * Regard loopback and unspecified addresses as global, since - * they have no ambiguity. - */ - if (bcmp(&in6addr_loopback, addr, sizeof(*addr) - 1) == 0) { - if (addr->s6_addr[15] == 1) /* loopback */ - return IPV6_ADDR_SCOPE_LINKLOCAL; - if (addr->s6_addr[15] == 0) /* unspecified */ - return IPV6_ADDR_SCOPE_GLOBAL; /* XXX: correct? */ - } - - return IPV6_ADDR_SCOPE_GLOBAL; + if (IN6_IS_ADDR_LINKLOCAL(addr) || + IN6_IS_ADDR_LOOPBACK(addr)) + return (IPV6_ADDR_SCOPE_LINKLOCAL); + if (IN6_IS_ADDR_SITELOCAL(addr)) + return (IPV6_ADDR_SCOPE_SITELOCAL); + return (IPV6_ADDR_SCOPE_GLOBAL); } /* @@ -359,7 +320,6 @@ scope6_addr2default(struct in6_addr *addr) int sa6_embedscope(struct sockaddr_in6 *sin6, int defaultok) { - struct ifnet *ifp; u_int32_t zoneid; if ((zoneid = sin6->sin6_scope_id) == 0 && defaultok) @@ -374,15 +334,11 @@ sa6_embedscope(struct sockaddr_in6 *sin6, int defaultok) * zone IDs assuming a one-to-one mapping between interfaces * and links. */ - if (V_if_index < zoneid) - return (ENXIO); - ifp = ifnet_byindex(zoneid); - if (ifp == NULL) /* XXX: this can happen for some OS */ + if (V_if_index < zoneid || ifnet_byindex(zoneid) == NULL) return (ENXIO); /* XXX assignment to 16bit from 32bit variable */ sin6->sin6_addr.s6_addr16[1] = htons(zoneid & 0xffff); - sin6->sin6_scope_id = 0; } @@ -398,12 +354,6 @@ sa6_recoverscope(struct sockaddr_in6 *sin6) char ip6buf[INET6_ADDRSTRLEN]; u_int32_t zoneid; - if (sin6->sin6_scope_id != 0) { - log(LOG_NOTICE, - "sa6_recoverscope: assumption failure (non 0 ID): %s%%%d\n", - ip6_sprintf(ip6buf, &sin6->sin6_addr), sin6->sin6_scope_id); - /* XXX: proceed anyway... */ - } if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) || IN6_IS_ADDR_MC_INTFACELOCAL(&sin6->sin6_addr)) { /* @@ -414,8 +364,19 @@ sa6_recoverscope(struct sockaddr_in6 *sin6) /* sanity check */ if (V_if_index < zoneid) return (ENXIO); +#if 0 + /* XXX: Disabled due to possible deadlock. */ if (!ifnet_byindex(zoneid)) return (ENXIO); +#endif + if (sin6->sin6_scope_id != 0 && + zoneid != sin6->sin6_scope_id) { + log(LOG_NOTICE, + "%s: embedded scope mismatch: %s%%%d. " + "sin6_scope_id was overridden\n", __func__, + ip6_sprintf(ip6buf, &sin6->sin6_addr), + sin6->sin6_scope_id); + } sin6->sin6_addr.s6_addr16[1] = 0; sin6->sin6_scope_id = zoneid; } @@ -438,63 +399,35 @@ in6_setscope(struct in6_addr *in6, struct ifnet *ifp, u_int32_t *ret_id) u_int32_t zoneid = 0; struct scope6_id *sid; - IF_AFDATA_RLOCK(ifp); - - sid = SID(ifp); - -#ifdef DIAGNOSTIC - if (sid == NULL) { /* should not happen */ - panic("in6_setscope: scope array is NULL"); - /* NOTREACHED */ - } -#endif - /* * special case: the loopback address can only belong to a loopback * interface. */ if (IN6_IS_ADDR_LOOPBACK(in6)) { - if (!(ifp->if_flags & IFF_LOOPBACK)) { - IF_AFDATA_RUNLOCK(ifp); + if (!(ifp->if_flags & IFF_LOOPBACK)) return (EINVAL); - } else { - if (ret_id != NULL) - *ret_id = 0; /* there's no ambiguity */ + } else { + scope = in6_addrscope(in6); + if (scope == IPV6_ADDR_SCOPE_INTFACELOCAL || + scope == IPV6_ADDR_SCOPE_LINKLOCAL) { + /* + * Currently we use interface indeces as the + * zone IDs for interface-local and link-local + * scopes. + */ + zoneid = ifp->if_index; + in6->s6_addr16[1] = htons(zoneid & 0xffff); /* XXX */ + } else if (scope != IPV6_ADDR_SCOPE_GLOBAL) { + IF_AFDATA_RLOCK(ifp); + sid = SID(ifp); + zoneid = sid->s6id_list[scope]; IF_AFDATA_RUNLOCK(ifp); - return (0); } } - scope = in6_addrscope(in6); - switch (scope) { - case IPV6_ADDR_SCOPE_INTFACELOCAL: /* should be interface index */ - zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL]; - break; - - case IPV6_ADDR_SCOPE_LINKLOCAL: - zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL]; - break; - - case IPV6_ADDR_SCOPE_SITELOCAL: - zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_SITELOCAL]; - break; - - case IPV6_ADDR_SCOPE_ORGLOCAL: - zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_ORGLOCAL]; - break; - - default: - zoneid = 0; /* XXX: treat as global. */ - break; - } - IF_AFDATA_RUNLOCK(ifp); - if (ret_id != NULL) *ret_id = zoneid; - if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6)) - in6->s6_addr16[1] = htons(zoneid & 0xffff); /* XXX */ - return (0); } @@ -528,3 +461,114 @@ in6_getscope(struct in6_addr *in6) return (0); } + +/* + * Return pointer to ifnet structure, corresponding to the zone id of + * link-local scope. + */ +struct ifnet* +in6_getlinkifnet(uint32_t zoneid) +{ + + return (ifnet_byindex((u_short)zoneid)); +} + +/* + * Return zone id for the specified scope. + */ +uint32_t +in6_getscopezone(const struct ifnet *ifp, int scope) +{ + + if (scope == IPV6_ADDR_SCOPE_INTFACELOCAL || + scope == IPV6_ADDR_SCOPE_LINKLOCAL) + return (ifp->if_index); + if (scope >= 0 && scope < IPV6_ADDR_SCOPES_COUNT) + return (SID(ifp)->s6id_list[scope]); + return (0); +} + +/* + * Extracts scope from adddress @dst, stores cleared address + * inside @dst and zone inside @scopeid + */ +void +in6_splitscope(const struct in6_addr *src, struct in6_addr *dst, + uint32_t *scopeid) +{ + uint32_t zoneid; + + *dst = *src; + zoneid = ntohs(in6_getscope(dst)); + in6_clearscope(dst); + *scopeid = zoneid; +} + +/* + * This function is for checking sockaddr_in6 structure passed + * from the application level (usually). + * + * sin6_scope_id should be set for link-local unicast, link-local and + * interface-local multicast addresses. + * + * If it is zero, then look into default zone ids. If default zone id is + * not set or disabled, then return error. + */ +int +sa6_checkzone(struct sockaddr_in6 *sa6) +{ + int scope; + + scope = in6_addrscope(&sa6->sin6_addr); + if (scope == IPV6_ADDR_SCOPE_GLOBAL) + return (sa6->sin6_scope_id ? EINVAL: 0); + if (IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr) && + scope != IPV6_ADDR_SCOPE_LINKLOCAL && + scope != IPV6_ADDR_SCOPE_INTFACELOCAL) { + if (sa6->sin6_scope_id == 0 && V_ip6_use_defzone != 0) + sa6->sin6_scope_id = V_sid_default.s6id_list[scope]; + return (0); + } + /* + * Since ::1 address always configured on the lo0, we can + * automatically set its zone id, when it is not specified. + * Return error, when specified zone id doesn't match with + * actual value. + */ + if (IN6_IS_ADDR_LOOPBACK(&sa6->sin6_addr)) { + if (sa6->sin6_scope_id == 0) + sa6->sin6_scope_id = in6_getscopezone(V_loif, scope); + else if (sa6->sin6_scope_id != in6_getscopezone(V_loif, scope)) + return (EADDRNOTAVAIL); + } + /* XXX: we can validate sin6_scope_id here */ + if (sa6->sin6_scope_id != 0) + return (0); + if (V_ip6_use_defzone != 0) + sa6->sin6_scope_id = V_sid_default.s6id_list[scope]; + /* Return error if we can't determine zone id */ + return (sa6->sin6_scope_id ? 0: EADDRNOTAVAIL); +} + +/* + * This function is similar to sa6_checkzone, but it uses given ifp + * to initialize sin6_scope_id. + */ +int +sa6_checkzone_ifp(struct ifnet *ifp, struct sockaddr_in6 *sa6) +{ + int scope; + + scope = in6_addrscope(&sa6->sin6_addr); + if (scope == IPV6_ADDR_SCOPE_LINKLOCAL || + scope == IPV6_ADDR_SCOPE_INTFACELOCAL) { + if (sa6->sin6_scope_id == 0) { + sa6->sin6_scope_id = in6_getscopezone(ifp, scope); + return (0); + } else if (sa6->sin6_scope_id != in6_getscopezone(ifp, scope)) + return (EADDRNOTAVAIL); + } + return (sa6_checkzone(sa6)); +} + + diff --git a/freebsd/sys/netinet6/scope6_var.h b/freebsd/sys/netinet6/scope6_var.h index 990325e9..e38d77a9 100644 --- a/freebsd/sys/netinet6/scope6_var.h +++ b/freebsd/sys/netinet6/scope6_var.h @@ -34,14 +34,20 @@ #define _NETINET6_SCOPE6_VAR_H_ #ifdef _KERNEL +#include <net/vnet.h> + +#define IPV6_ADDR_SCOPES_COUNT 16 struct scope6_id { /* * 16 is correspondent to 4bit multicast scope field. * i.e. from node-local to global with some reserved/unassigned types. */ - u_int32_t s6id_list[16]; + uint32_t s6id_list[IPV6_ADDR_SCOPES_COUNT]; }; +VNET_DECLARE(int, deembed_scopeid); +#define V_deembed_scopeid VNET(deembed_scopeid) + void scope6_init(void); struct scope6_id *scope6_ifattach(struct ifnet *); void scope6_ifdetach(struct scope6_id *); @@ -51,9 +57,14 @@ int scope6_get_default(struct scope6_id *); u_int32_t scope6_addr2default(struct in6_addr *); int sa6_embedscope(struct sockaddr_in6 *, int); int sa6_recoverscope(struct sockaddr_in6 *); +int sa6_checkzone(struct sockaddr_in6 *); +int sa6_checkzone_ifp(struct ifnet *, struct sockaddr_in6 *); int in6_setscope(struct in6_addr *, struct ifnet *, u_int32_t *); int in6_clearscope(struct in6_addr *); uint16_t in6_getscope(struct in6_addr *); +uint32_t in6_getscopezone(const struct ifnet *, int); +void in6_splitscope(const struct in6_addr *, struct in6_addr *, uint32_t *); +struct ifnet* in6_getlinkifnet(uint32_t); #endif /* _KERNEL */ #endif /* _NETINET6_SCOPE6_VAR_H_ */ diff --git a/freebsd/sys/netinet6/sctp6_usrreq.c b/freebsd/sys/netinet6/sctp6_usrreq.c index c8bc6620..962a622e 100644 --- a/freebsd/sys/netinet6/sctp6_usrreq.c +++ b/freebsd/sys/netinet6/sctp6_usrreq.c @@ -41,9 +41,7 @@ __FBSDID("$FreeBSD$"); #include <netinet/sctp_pcb.h> #include <netinet/sctp_header.h> #include <netinet/sctp_var.h> -#ifdef INET6 #include <netinet6/sctp6_var.h> -#endif #include <netinet/sctp_sysctl.h> #include <netinet/sctp_output.h> #include <netinet/sctp_uio.h> @@ -56,13 +54,12 @@ __FBSDID("$FreeBSD$"); #include <netinet/sctp_output.h> #include <netinet/sctp_bsd_addr.h> #include <netinet/sctp_crc32.h> +#include <netinet/icmp6.h> #include <netinet/udp.h> #ifdef IPSEC #include <netipsec/ipsec.h> -#ifdef INET6 #include <netipsec/ipsec6.h> -#endif /* INET6 */ #endif /* IPSEC */ extern struct protosw inetsw[]; @@ -85,7 +82,8 @@ sctp6_input_with_port(struct mbuf **i_pak, int *offp, uint16_t port) #endif uint32_t mflowid; - uint8_t use_mflowid; + uint8_t mflowtype; + uint16_t fibnum; iphlen = *offp; if (SCTP_GET_PKT_VRFID(*i_pak, vrf_id)) { @@ -96,13 +94,7 @@ sctp6_input_with_port(struct mbuf **i_pak, int *offp, uint16_t port) #ifdef SCTP_MBUF_LOGGING /* Log in any input mbufs */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) { - struct mbuf *mat; - - for (mat = m; mat; mat = SCTP_BUF_NEXT(mat)) { - if (SCTP_BUF_IS_EXTENDED(mat)) { - sctp_log_mb(mat, SCTP_MBUF_INPUT); - } - } + sctp_log_mbc(m, SCTP_MBUF_INPUT); } #endif #ifdef SCTP_PACKET_LOGGING @@ -111,17 +103,13 @@ sctp6_input_with_port(struct mbuf **i_pak, int *offp, uint16_t port) } #endif SCTPDBG(SCTP_DEBUG_CRCOFFLOAD, - "sctp6_input(): Packet of length %d received on %s with csum_flags 0x%x.\n", + "sctp6_input(): Packet of length %d received on %s with csum_flags 0x%b.\n", m->m_pkthdr.len, if_name(m->m_pkthdr.rcvif), - m->m_pkthdr.csum_flags); - if (m->m_flags & M_FLOWID) { - mflowid = m->m_pkthdr.flowid; - use_mflowid = 1; - } else { - mflowid = 0; - use_mflowid = 0; - } + (int)m->m_pkthdr.csum_flags, CSUM_BITS); + mflowid = m->m_pkthdr.flowid; + mflowtype = M_HASHTYPE_GET(m); + fibnum = M_GETFIB(m); SCTP_STAT_INCR(sctps_recvpackets); SCTP_STAT_INCR_COUNTER64(sctps_inpackets); /* Get IP, SCTP, and first chunk header together in the first mbuf. */ @@ -151,10 +139,6 @@ sctp6_input_with_port(struct mbuf **i_pak, int *offp, uint16_t port) if (in6_setscope(&dst.sin6_addr, m->m_pkthdr.rcvif, NULL) != 0) { goto out; } - if (faithprefix_p != NULL && (*faithprefix_p) (&dst.sin6_addr)) { - /* XXX send icmp6 host/port unreach? */ - goto out; - } length = ntohs(ip6->ip6_plen) + iphlen; /* Validate mbuf chain length with IP payload length. */ if (SCTP_HEADER_LEN(m) != length) { @@ -186,7 +170,7 @@ sctp6_input_with_port(struct mbuf **i_pak, int *offp, uint16_t port) compute_crc, #endif ecn_bits, - use_mflowid, mflowid, + mflowtype, mflowid, fibnum, vrf_id, port); out: if (m) { @@ -202,250 +186,224 @@ sctp6_input(struct mbuf **i_pak, int *offp, int proto SCTP_UNUSED) return (sctp6_input_with_port(i_pak, offp, 0)); } -static void -sctp6_notify_mbuf(struct sctp_inpcb *inp, struct icmp6_hdr *icmp6, - struct sctphdr *sh, struct sctp_tcb *stcb, struct sctp_nets *net) -{ - uint32_t nxtsz; - - if ((inp == NULL) || (stcb == NULL) || (net == NULL) || - (icmp6 == NULL) || (sh == NULL)) { - goto out; - } - /* First do we even look at it? */ - if (ntohl(sh->v_tag) != (stcb->asoc.peer_vtag)) - goto out; - - if (icmp6->icmp6_type != ICMP6_PACKET_TOO_BIG) { - /* not PACKET TO BIG */ - goto out; - } - /* - * ok we need to look closely. We could even get smarter and look at - * anyone that we sent to in case we get a different ICMP that tells - * us there is no way to reach a host, but for this impl, all we - * care about is MTU discovery. - */ - nxtsz = ntohl(icmp6->icmp6_mtu); - /* Stop any PMTU timer */ - sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, NULL, SCTP_FROM_SCTP6_USRREQ + SCTP_LOC_1); - - /* Adjust destination size limit */ - if (net->mtu > nxtsz) { - net->mtu = nxtsz; - if (net->port) { - net->mtu -= sizeof(struct udphdr); - } - } - /* now what about the ep? */ - if (stcb->asoc.smallest_mtu > nxtsz) { - struct sctp_tmit_chunk *chk; - - /* Adjust that too */ - stcb->asoc.smallest_mtu = nxtsz; - /* now off to subtract IP_DF flag if needed */ - - TAILQ_FOREACH(chk, &stcb->asoc.send_queue, sctp_next) { - if ((uint32_t) (chk->send_size + IP_HDR_SIZE) > nxtsz) { - chk->flags |= CHUNK_FLAGS_FRAGMENT_OK; - } - } - TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) { - if ((uint32_t) (chk->send_size + IP_HDR_SIZE) > nxtsz) { - /* - * For this guy we also mark for immediate - * resend since we sent to big of chunk - */ - chk->flags |= CHUNK_FLAGS_FRAGMENT_OK; - if (chk->sent != SCTP_DATAGRAM_RESEND) - stcb->asoc.sent_queue_retran_cnt++; - chk->sent = SCTP_DATAGRAM_RESEND; - chk->rec.data.doing_fast_retransmit = 0; - - chk->sent = SCTP_DATAGRAM_RESEND; - /* Clear any time so NO RTT is being done */ - chk->sent_rcv_time.tv_sec = 0; - chk->sent_rcv_time.tv_usec = 0; - stcb->asoc.total_flight -= chk->send_size; - net->flight_size -= chk->send_size; - } - } - } - sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, NULL); -out: - if (stcb) { - SCTP_TCB_UNLOCK(stcb); - } -} - - void sctp6_notify(struct sctp_inpcb *inp, - struct icmp6_hdr *icmph, - struct sctphdr *sh, - struct sockaddr *to, struct sctp_tcb *stcb, - struct sctp_nets *net) + struct sctp_nets *net, + uint8_t icmp6_type, + uint8_t icmp6_code, + uint16_t next_mtu) { #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so; #endif - - /* protection */ - if ((inp == NULL) || (stcb == NULL) || (net == NULL) || - (sh == NULL) || (to == NULL)) { - if (stcb) - SCTP_TCB_UNLOCK(stcb); - return; - } - /* First job is to verify the vtag matches what I would send */ - if (ntohl(sh->v_tag) != (stcb->asoc.peer_vtag)) { - SCTP_TCB_UNLOCK(stcb); - return; - } - if (icmph->icmp6_type != ICMP_UNREACH) { - /* We only care about unreachable */ - SCTP_TCB_UNLOCK(stcb); - return; - } - if ((icmph->icmp6_code == ICMP_UNREACH_NET) || - (icmph->icmp6_code == ICMP_UNREACH_HOST) || - (icmph->icmp6_code == ICMP_UNREACH_NET_UNKNOWN) || - (icmph->icmp6_code == ICMP_UNREACH_HOST_UNKNOWN) || - (icmph->icmp6_code == ICMP_UNREACH_ISOLATED) || - (icmph->icmp6_code == ICMP_UNREACH_NET_PROHIB) || - (icmph->icmp6_code == ICMP_UNREACH_HOST_PROHIB) || - (icmph->icmp6_code == ICMP_UNREACH_FILTER_PROHIB)) { - - /* - * Hmm reachablity problems we must examine closely. If its - * not reachable, we may have lost a network. Or if there is - * NO protocol at the other end named SCTP. well we consider - * it a OOTB abort. - */ - if (net->dest_state & SCTP_ADDR_REACHABLE) { - /* Ok that destination is NOT reachable */ - net->dest_state &= ~SCTP_ADDR_REACHABLE; - net->dest_state &= ~SCTP_ADDR_PF; - sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, - stcb, 0, (void *)net, SCTP_SO_NOT_LOCKED); + int timer_stopped; + + switch (icmp6_type) { + case ICMP6_DST_UNREACH: + if ((icmp6_code == ICMP6_DST_UNREACH_NOROUTE) || + (icmp6_code == ICMP6_DST_UNREACH_ADMIN) || + (icmp6_code == ICMP6_DST_UNREACH_BEYONDSCOPE) || + (icmp6_code == ICMP6_DST_UNREACH_ADDR)) { + /* Mark the net unreachable. */ + if (net->dest_state & SCTP_ADDR_REACHABLE) { + /* Ok that destination is not reachable */ + net->dest_state &= ~SCTP_ADDR_REACHABLE; + net->dest_state &= ~SCTP_ADDR_PF; + sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, + stcb, 0, (void *)net, SCTP_SO_NOT_LOCKED); + } } SCTP_TCB_UNLOCK(stcb); - } else if ((icmph->icmp6_code == ICMP_UNREACH_PROTOCOL) || - (icmph->icmp6_code == ICMP_UNREACH_PORT)) { - /* - * Here the peer is either playing tricks on us, including - * an address that belongs to someone who does not support - * SCTP OR was a userland implementation that shutdown and - * now is dead. In either case treat it like a OOTB abort - * with no TCB - */ - sctp_abort_notification(stcb, 1, 0, NULL, SCTP_SO_NOT_LOCKED); + break; + case ICMP6_PARAM_PROB: + /* Treat it like an ABORT. */ + if (icmp6_code == ICMP6_PARAMPROB_NEXTHEADER) { + sctp_abort_notification(stcb, 1, 0, NULL, SCTP_SO_NOT_LOCKED); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) - so = SCTP_INP_SO(inp); - atomic_add_int(&stcb->asoc.refcnt, 1); - SCTP_TCB_UNLOCK(stcb); - SCTP_SOCKET_LOCK(so, 1); - SCTP_TCB_LOCK(stcb); - atomic_subtract_int(&stcb->asoc.refcnt, 1); + so = SCTP_INP_SO(inp); + atomic_add_int(&stcb->asoc.refcnt, 1); + SCTP_TCB_UNLOCK(stcb); + SCTP_SOCKET_LOCK(so, 1); + SCTP_TCB_LOCK(stcb); + atomic_subtract_int(&stcb->asoc.refcnt, 1); #endif - (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_2); + (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, + SCTP_FROM_SCTP_USRREQ + SCTP_LOC_2); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) - SCTP_SOCKET_UNLOCK(so, 1); - /* SCTP_TCB_UNLOCK(stcb); MT: I think this is not needed. */ + SCTP_SOCKET_UNLOCK(so, 1); #endif - /* no need to unlock here, since the TCB is gone */ - } else { + } else { + SCTP_TCB_UNLOCK(stcb); + } + break; + case ICMP6_PACKET_TOO_BIG: + if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) { + timer_stopped = 1; + sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net, + SCTP_FROM_SCTP_USRREQ + SCTP_LOC_1); + } else { + timer_stopped = 0; + } + /* Update the path MTU. */ + if (net->mtu > next_mtu) { + net->mtu = next_mtu; + if (net->port) { + net->mtu -= sizeof(struct udphdr); + } + } + /* Update the association MTU */ + if (stcb->asoc.smallest_mtu > next_mtu) { + sctp_pathmtu_adjustment(stcb, next_mtu); + } + /* Finally, start the PMTU timer if it was running before. */ + if (timer_stopped) { + sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net); + } SCTP_TCB_UNLOCK(stcb); + break; + default: + SCTP_TCB_UNLOCK(stcb); + break; } } - - void sctp6_ctlinput(int cmd, struct sockaddr *pktdst, void *d) { + struct ip6ctlparam *ip6cp; + struct sctp_inpcb *inp; + struct sctp_tcb *stcb; + struct sctp_nets *net; struct sctphdr sh; - struct ip6ctlparam *ip6cp = NULL; - uint32_t vrf_id; - - vrf_id = SCTP_DEFAULT_VRFID; + struct sockaddr_in6 src, dst; if (pktdst->sa_family != AF_INET6 || - pktdst->sa_len != sizeof(struct sockaddr_in6)) + pktdst->sa_len != sizeof(struct sockaddr_in6)) { return; - - if ((unsigned)cmd >= PRC_NCMDS) + } + if ((unsigned)cmd >= PRC_NCMDS) { return; + } if (PRC_IS_REDIRECT(cmd)) { d = NULL; } else if (inet6ctlerrmap[cmd] == 0) { return; } - /* if the parameter is from icmp6, decode it. */ + /* If the parameter is from icmp6, decode it. */ if (d != NULL) { ip6cp = (struct ip6ctlparam *)d; } else { ip6cp = (struct ip6ctlparam *)NULL; } - if (ip6cp) { + if (ip6cp != NULL) { /* * XXX: We assume that when IPV6 is non NULL, M and OFF are * valid. */ - /* check if we can safely examine src and dst ports */ - struct sctp_inpcb *inp = NULL; - struct sctp_tcb *stcb = NULL; - struct sctp_nets *net = NULL; - struct sockaddr_in6 final; - - if (ip6cp->ip6c_m == NULL) + if (ip6cp->ip6c_m == NULL) { return; - + } + /* + * Check if we can safely examine the ports and the + * verification tag of the SCTP common header. + */ + if (ip6cp->ip6c_m->m_pkthdr.len < + (int32_t) (ip6cp->ip6c_off + offsetof(struct sctphdr, checksum))) { + return; + } + /* Copy out the port numbers and the verification tag. */ bzero(&sh, sizeof(sh)); - bzero(&final, sizeof(final)); + m_copydata(ip6cp->ip6c_m, + ip6cp->ip6c_off, + sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint32_t), + (caddr_t)&sh); + memset(&src, 0, sizeof(struct sockaddr_in6)); + src.sin6_family = AF_INET6; + src.sin6_len = sizeof(struct sockaddr_in6); + src.sin6_port = sh.src_port; + src.sin6_addr = ip6cp->ip6c_ip6->ip6_src; + if (in6_setscope(&src.sin6_addr, ip6cp->ip6c_m->m_pkthdr.rcvif, NULL) != 0) { + return; + } + memset(&dst, 0, sizeof(struct sockaddr_in6)); + dst.sin6_family = AF_INET6; + dst.sin6_len = sizeof(struct sockaddr_in6); + dst.sin6_port = sh.dest_port; + dst.sin6_addr = ip6cp->ip6c_ip6->ip6_dst; + if (in6_setscope(&dst.sin6_addr, ip6cp->ip6c_m->m_pkthdr.rcvif, NULL) != 0) { + return; + } inp = NULL; net = NULL; - m_copydata(ip6cp->ip6c_m, ip6cp->ip6c_off, sizeof(sh), - (caddr_t)&sh); - ip6cp->ip6c_src->sin6_port = sh.src_port; - final.sin6_len = sizeof(final); - final.sin6_family = AF_INET6; - final.sin6_addr = ((struct sockaddr_in6 *)pktdst)->sin6_addr; - final.sin6_port = sh.dest_port; - stcb = sctp_findassociation_addr_sa((struct sockaddr *)&final, - (struct sockaddr *)ip6cp->ip6c_src, - &inp, &net, 1, vrf_id); - /* inp's ref-count increased && stcb locked */ - if (stcb != NULL && inp && (inp->sctp_socket != NULL)) { - if (cmd == PRC_MSGSIZE) { - sctp6_notify_mbuf(inp, - ip6cp->ip6c_icmp6, - &sh, - stcb, - net); - /* inp's ref-count reduced && stcb unlocked */ + stcb = sctp_findassociation_addr_sa((struct sockaddr *)&dst, + (struct sockaddr *)&src, + &inp, &net, 1, SCTP_DEFAULT_VRFID); + if ((stcb != NULL) && + (net != NULL) && + (inp != NULL)) { + /* Check the verification tag */ + if (ntohl(sh.v_tag) != 0) { + /* + * This must be the verification tag used + * for sending out packets. We don't + * consider packets reflecting the + * verification tag. + */ + if (ntohl(sh.v_tag) != stcb->asoc.peer_vtag) { + SCTP_TCB_UNLOCK(stcb); + return; + } } else { - sctp6_notify(inp, ip6cp->ip6c_icmp6, &sh, - (struct sockaddr *)&final, - stcb, net); - /* inp's ref-count reduced && stcb unlocked */ + if (ip6cp->ip6c_m->m_pkthdr.len >= + ip6cp->ip6c_off + sizeof(struct sctphdr) + + sizeof(struct sctp_chunkhdr) + + offsetof(struct sctp_init, a_rwnd)) { + /* + * In this case we can check if we + * got an INIT chunk and if the + * initiate tag matches. + */ + uint32_t initiate_tag; + uint8_t chunk_type; + + m_copydata(ip6cp->ip6c_m, + ip6cp->ip6c_off + + sizeof(struct sctphdr), + sizeof(uint8_t), + (caddr_t)&chunk_type); + m_copydata(ip6cp->ip6c_m, + ip6cp->ip6c_off + + sizeof(struct sctphdr) + + sizeof(struct sctp_chunkhdr), + sizeof(uint32_t), + (caddr_t)&initiate_tag); + if ((chunk_type != SCTP_INITIATION) || + (ntohl(initiate_tag) != stcb->asoc.my_vtag)) { + SCTP_TCB_UNLOCK(stcb); + return; + } + } else { + SCTP_TCB_UNLOCK(stcb); + return; + } } + sctp6_notify(inp, stcb, net, + ip6cp->ip6c_icmp6->icmp6_type, + ip6cp->ip6c_icmp6->icmp6_code, + (uint16_t) ntohl(ip6cp->ip6c_icmp6->icmp6_mtu)); } else { - if (PRC_IS_REDIRECT(cmd) && inp) { - in6_rtchange((struct in6pcb *)inp, - inet6ctlerrmap[cmd]); - } - if (inp) { + if ((stcb == NULL) && (inp != NULL)) { /* reduce inp's ref-count */ SCTP_INP_WLOCK(inp); SCTP_INP_DECR_REF(inp); SCTP_INP_WUNLOCK(inp); } - if (stcb) + if (stcb) { SCTP_TCB_UNLOCK(stcb); + } } } } @@ -848,7 +806,7 @@ sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p) #ifdef INET struct in6pcb *inp6; struct sockaddr_in6 *sin6; - struct sockaddr_storage ss; + union sctp_sockstore store; #endif @@ -932,8 +890,8 @@ sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p) } if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { /* convert v4-mapped into v4 addr */ - in6_sin6_2_sin((struct sockaddr_in *)&ss, sin6); - addr = (struct sockaddr *)&ss; + in6_sin6_2_sin(&store.sin, sin6); + addr = &store.sa; } #endif /* INET */ /* Now do we connect? */ @@ -964,7 +922,9 @@ sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p) return (EALREADY); } /* We are GOOD to go */ - stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id, p); + stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id, + inp->sctp_ep.pre_open_stream_count, + inp->sctp_ep.port, p); SCTP_ASOC_CREATE_UNLOCK(inp); if (stcb == NULL) { /* Gak! no memory */ @@ -1023,7 +983,10 @@ sctp6_getaddr(struct socket *so, struct sockaddr **addr) stcb = LIST_FIRST(&inp->sctp_asoc_list); if (stcb == NULL) { - goto notConn6; + SCTP_INP_RUNLOCK(inp); + SCTP_FREE_SONAME(sin6); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ENOENT); + return (ENOENT); } fnd = 0; sin_a6 = NULL; @@ -1040,7 +1003,10 @@ sctp6_getaddr(struct socket *so, struct sockaddr **addr) } if ((!fnd) || (sin_a6 == NULL)) { /* punt */ - goto notConn6; + SCTP_INP_RUNLOCK(inp); + SCTP_FREE_SONAME(sin6); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ENOENT); + return (ENOENT); } vrf_id = inp->def_vrf_id; sctp_ifa = sctp_source_address_selection(inp, stcb, (sctp_route_t *) & net->ro, net, 0, vrf_id); @@ -1049,7 +1015,6 @@ sctp6_getaddr(struct socket *so, struct sockaddr **addr) } } else { /* For the bound all case you get back 0 */ - notConn6: memset(&sin6->sin6_addr, 0, sizeof(sin6->sin6_addr)); } } else { @@ -1061,7 +1026,7 @@ sctp6_getaddr(struct socket *so, struct sockaddr **addr) if (laddr->ifa->address.sa.sa_family == AF_INET6) { struct sockaddr_in6 *sin_a; - sin_a = (struct sockaddr_in6 *)&laddr->ifa->address.sin6; + sin_a = &laddr->ifa->address.sin6; sin6->sin6_addr = sin_a->sin6_addr; fnd = 1; break; @@ -1138,8 +1103,11 @@ sctp6_peeraddr(struct socket *so, struct sockaddr **addr) SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ENOENT); return (ENOENT); } - if ((error = sa6_recoverscope(sin6)) != 0) + if ((error = sa6_recoverscope(sin6)) != 0) { + SCTP_FREE_SONAME(sin6); + SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, error); return (error); + } *addr = (struct sockaddr *)sin6; return (0); } @@ -1147,10 +1115,6 @@ sctp6_peeraddr(struct socket *so, struct sockaddr **addr) static int sctp6_in6getaddr(struct socket *so, struct sockaddr **nam) { -#ifdef INET - struct sockaddr *addr; - -#endif struct in6pcb *inp6 = sotoin6pcb(so); int error; @@ -1162,19 +1126,21 @@ sctp6_in6getaddr(struct socket *so, struct sockaddr **nam) error = sctp6_getaddr(so, nam); #ifdef INET if (error) { + struct sockaddr_in6 *sin6; + /* try v4 next if v6 failed */ error = sctp_ingetaddr(so, nam); if (error) { return (error); } - addr = *nam; - /* if I'm V6ONLY, convert it to v4-mapped */ - if (SCTP_IPV6_V6ONLY(inp6)) { - struct sockaddr_in6 sin6; - - in6_sin_2_v4mapsin6((struct sockaddr_in *)addr, &sin6); - memcpy(addr, &sin6, sizeof(struct sockaddr_in6)); + SCTP_MALLOC_SONAME(sin6, struct sockaddr_in6 *, sizeof *sin6); + if (sin6 == NULL) { + SCTP_FREE_SONAME(*nam); + return (ENOMEM); } + in6_sin_2_v4mapsin6((struct sockaddr_in *)*nam, sin6); + SCTP_FREE_SONAME(*nam); + *nam = (struct sockaddr *)sin6; } #endif return (error); @@ -1184,10 +1150,6 @@ sctp6_in6getaddr(struct socket *so, struct sockaddr **nam) static int sctp6_getpeeraddr(struct socket *so, struct sockaddr **nam) { -#ifdef INET - struct sockaddr *addr; - -#endif struct in6pcb *inp6 = sotoin6pcb(so); int error; @@ -1199,19 +1161,21 @@ sctp6_getpeeraddr(struct socket *so, struct sockaddr **nam) error = sctp6_peeraddr(so, nam); #ifdef INET if (error) { + struct sockaddr_in6 *sin6; + /* try v4 next if v6 failed */ error = sctp_peeraddr(so, nam); if (error) { return (error); } - addr = *nam; - /* if I'm V6ONLY, convert it to v4-mapped */ - if (SCTP_IPV6_V6ONLY(inp6)) { - struct sockaddr_in6 sin6; - - in6_sin_2_v4mapsin6((struct sockaddr_in *)addr, &sin6); - memcpy(addr, &sin6, sizeof(struct sockaddr_in6)); + SCTP_MALLOC_SONAME(sin6, struct sockaddr_in6 *, sizeof *sin6); + if (sin6 == NULL) { + SCTP_FREE_SONAME(*nam); + return (ENOMEM); } + in6_sin_2_v4mapsin6((struct sockaddr_in *)*nam, sin6); + SCTP_FREE_SONAME(*nam); + *nam = (struct sockaddr *)sin6; } #endif return (error); diff --git a/freebsd/sys/netinet6/sctp6_var.h b/freebsd/sys/netinet6/sctp6_var.h index 79d4c52b..782567c5 100644 --- a/freebsd/sys/netinet6/sctp6_var.h +++ b/freebsd/sys/netinet6/sctp6_var.h @@ -47,10 +47,9 @@ int sctp6_output(struct sctp_inpcb *, struct mbuf *, struct sockaddr *, struct mbuf *, struct proc *); void sctp6_ctlinput(int, struct sockaddr *, void *); -extern void -sctp6_notify(struct sctp_inpcb *, struct icmp6_hdr *, - struct sctphdr *, struct sockaddr *, - struct sctp_tcb *, struct sctp_nets *); +void +sctp6_notify(struct sctp_inpcb *, struct sctp_tcb *, struct sctp_nets *, + uint8_t, uint8_t, uint16_t); #endif #endif diff --git a/freebsd/sys/netinet6/udp6_usrreq.c b/freebsd/sys/netinet6/udp6_usrreq.c index 8342cf7c..790bed2b 100644 --- a/freebsd/sys/netinet6/udp6_usrreq.c +++ b/freebsd/sys/netinet6/udp6_usrreq.c @@ -3,6 +3,7 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * Copyright (c) 2010-2011 Juniper Networks, Inc. + * Copyright (c) 2014 Kevin Lo * All rights reserved. * * Portions of this software were developed by Robert N. M. Watson under @@ -73,8 +74,8 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/local/opt_inet.h> #include <rtems/bsd/local/opt_inet6.h> -#include <rtems/bsd/local/opt_ipfw.h> #include <rtems/bsd/local/opt_ipsec.h> +#include <rtems/bsd/local/opt_rss.h> #include <rtems/bsd/sys/param.h> #include <sys/jail.h> @@ -84,6 +85,7 @@ __FBSDID("$FreeBSD$"); #include <sys/priv.h> #include <sys/proc.h> #include <sys/protosw.h> +#include <sys/sdt.h> #include <sys/signalvar.h> #include <sys/socket.h> #include <sys/socketvar.h> @@ -93,10 +95,13 @@ __FBSDID("$FreeBSD$"); #include <sys/systm.h> #include <net/if.h> +#include <net/if_var.h> #include <net/if_types.h> #include <net/route.h> +#include <net/rss_config.h> #include <netinet/in.h> +#include <netinet/in_kdtrace.h> #include <netinet/in_pcb.h> #include <netinet/in_systm.h> #include <netinet/in_var.h> @@ -108,10 +113,12 @@ __FBSDID("$FreeBSD$"); #include <netinet/ip_var.h> #include <netinet/udp.h> #include <netinet/udp_var.h> +#include <netinet/udplite.h> #include <netinet6/ip6protosw.h> #include <netinet6/ip6_var.h> #include <netinet6/in6_pcb.h> +#include <netinet6/in6_rss.h> #include <netinet6/udp6_var.h> #include <netinet6/scope6_var.h> @@ -130,27 +137,39 @@ __FBSDID("$FreeBSD$"); extern struct protosw inetsw[]; static void udp6_detach(struct socket *so); -static void +static int udp6_append(struct inpcb *inp, struct mbuf *n, int off, struct sockaddr_in6 *fromsa) { struct socket *so; struct mbuf *opts; + struct udpcb *up; INP_LOCK_ASSERT(inp); + /* + * Engage the tunneling protocol. + */ + up = intoudpcb(inp); + if (up->u_tun_func != NULL) { + in_pcbref(inp); + INP_RUNLOCK(inp); + (*up->u_tun_func)(n, off, inp, (struct sockaddr *)fromsa, + up->u_tun_ctx); + INP_RLOCK(inp); + return (in_pcbrele_rlocked(inp)); + } #ifdef IPSEC /* Check AH/ESP integrity. */ if (ipsec6_in_reject(n, inp)) { m_freem(n); - IPSEC6STAT_INC(in_polvio); - return; + return (0); } #endif /* IPSEC */ #ifdef MAC if (mac_inpcb_check_deliver(inp, n) != 0) { m_freem(n); - return; + return (0); } #endif opts = NULL; @@ -170,6 +189,7 @@ udp6_append(struct inpcb *inp, struct mbuf *n, int off, UDPSTAT_INC(udps_fullsock); } else sorwakeup_locked(so); + return (0); } int @@ -180,22 +200,19 @@ udp6_input(struct mbuf **mp, int *offp, int proto) struct ip6_hdr *ip6; struct udphdr *uh; struct inpcb *inp; + struct inpcbinfo *pcbinfo; struct udpcb *up; int off = *offp; + int cscov_partial; int plen, ulen; struct sockaddr_in6 fromsa; struct m_tag *fwd_tag; uint16_t uh_sum; + uint8_t nxt; ifp = m->m_pkthdr.rcvif; ip6 = mtod(m, struct ip6_hdr *); - if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) { - /* XXX send icmp6 host/port unreach? */ - m_freem(m); - return (IPPROTO_DONE); - } - #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct udphdr), IPPROTO_DONE); ip6 = mtod(m, struct ip6_hdr *); @@ -217,28 +234,43 @@ udp6_input(struct mbuf **mp, int *offp, int proto) plen = ntohs(ip6->ip6_plen) - off + sizeof(*ip6); ulen = ntohs((u_short)uh->uh_ulen); - if (plen != ulen) { - UDPSTAT_INC(udps_badlen); - goto badunlocked; - } - - /* - * Checksum extended UDP header and data. - */ - if (uh->uh_sum == 0) { - UDPSTAT_INC(udps_nosum); - goto badunlocked; + nxt = proto; + cscov_partial = (nxt == IPPROTO_UDPLITE) ? 1 : 0; + if (nxt == IPPROTO_UDPLITE) { + /* Zero means checksum over the complete packet. */ + if (ulen == 0) + ulen = plen; + if (ulen == plen) + cscov_partial = 0; + if ((ulen < sizeof(struct udphdr)) || (ulen > plen)) { + /* XXX: What is the right UDPLite MIB counter? */ + goto badunlocked; + } + if (uh->uh_sum == 0) { + /* XXX: What is the right UDPLite MIB counter? */ + goto badunlocked; + } + } else { + if ((ulen < sizeof(struct udphdr)) || (plen != ulen)) { + UDPSTAT_INC(udps_badlen); + goto badunlocked; + } + if (uh->uh_sum == 0) { + UDPSTAT_INC(udps_nosum); + goto badunlocked; + } } - if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) { + if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) && + !cscov_partial) { if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) uh_sum = m->m_pkthdr.csum_data; else - uh_sum = in6_cksum_pseudo(ip6, ulen, - IPPROTO_UDP, m->m_pkthdr.csum_data); + uh_sum = in6_cksum_pseudo(ip6, ulen, nxt, + m->m_pkthdr.csum_data); uh_sum ^= 0xffff; } else - uh_sum = in6_cksum(m, IPPROTO_UDP, off, ulen); + uh_sum = in6_cksum_partial(m, nxt, off, plen, ulen); if (uh_sum != 0) { UDPSTAT_INC(udps_badsum); @@ -251,11 +283,13 @@ udp6_input(struct mbuf **mp, int *offp, int proto) init_sin6(&fromsa, m); fromsa.sin6_port = uh->uh_sport; + pcbinfo = udp_get_inpcbinfo(nxt); if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { struct inpcb *last; + struct inpcbhead *pcblist; struct ip6_moptions *imo; - INP_INFO_RLOCK(&V_udbinfo); + INP_INFO_RLOCK(pcbinfo); /* * In the event that laddr should be set to the link-local * address (this happens in RIPng), the multicast address @@ -271,8 +305,9 @@ udp6_input(struct mbuf **mp, int *offp, int proto) * here. We need udphdr for IPsec processing so we do that * later. */ + pcblist = udp_get_pcblist(nxt); last = NULL; - LIST_FOREACH(inp, &V_udb, inp_list) { + LIST_FOREACH(inp, pcblist, inp_list) { if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (inp->inp_lport != uh->uh_dport) @@ -335,20 +370,10 @@ udp6_input(struct mbuf **mp, int *offp, int proto) if ((n = m_copy(m, 0, M_COPYALL)) != NULL) { INP_RLOCK(last); - up = intoudpcb(last); - if (up->u_tun_func == NULL) { - udp6_append(last, n, off, &fromsa); - } else { - /* - * Engage the tunneling - * protocol we will have to - * leave the info_lock up, - * since we are hunting - * through multiple UDP's. - * - */ - (*up->u_tun_func)(n, off, last); - } + UDP_PROBE(receive, NULL, last, ip6, + last, uh); + if (udp6_append(last, n, off, &fromsa)) + goto inp_lost; INP_RUNLOCK(last); } } @@ -377,17 +402,11 @@ udp6_input(struct mbuf **mp, int *offp, int proto) goto badheadlocked; } INP_RLOCK(last); - INP_INFO_RUNLOCK(&V_udbinfo); - up = intoudpcb(last); - if (up->u_tun_func == NULL) { - udp6_append(last, m, off, &fromsa); - } else { - /* - * Engage the tunneling protocol. - */ - (*up->u_tun_func)(m, off, last); - } - INP_RUNLOCK(last); + INP_INFO_RUNLOCK(pcbinfo); + UDP_PROBE(receive, NULL, last, ip6, last, uh); + if (udp6_append(last, m, off, &fromsa) == 0) + INP_RUNLOCK(last); + inp_lost: return (IPPROTO_DONE); } /* @@ -407,8 +426,8 @@ udp6_input(struct mbuf **mp, int *offp, int proto) * Transparently forwarded. Pretend to be the destination. * Already got one like this? */ - inp = in6_pcblookup_mbuf(&V_udbinfo, - &ip6->ip6_src, uh->uh_sport, &ip6->ip6_dst, uh->uh_dport, + inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src, + uh->uh_sport, &ip6->ip6_dst, uh->uh_dport, INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif, m); if (!inp) { /* @@ -416,7 +435,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto) * Because we've rewritten the destination address, * any hardware-generated hash is ignored. */ - inp = in6_pcblookup(&V_udbinfo, &ip6->ip6_src, + inp = in6_pcblookup(pcbinfo, &ip6->ip6_src, uh->uh_sport, &next_hop6->sin6_addr, next_hop6->sin6_port ? htons(next_hop6->sin6_port) : uh->uh_dport, INPLOOKUP_WILDCARD | @@ -426,7 +445,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto) m_tag_delete(m, fwd_tag); m->m_flags &= ~M_IP6_NEXTHOP; } else - inp = in6_pcblookup_mbuf(&V_udbinfo, &ip6->ip6_src, + inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src, uh->uh_sport, &ip6->ip6_dst, uh->uh_dport, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif, m); @@ -457,28 +476,29 @@ udp6_input(struct mbuf **mp, int *offp, int proto) } INP_RLOCK_ASSERT(inp); up = intoudpcb(inp); - if (up->u_tun_func == NULL) { - udp6_append(inp, m, off, &fromsa); - } else { - /* - * Engage the tunneling protocol. - */ - - (*up->u_tun_func)(m, off, inp); + if (cscov_partial) { + if (up->u_rxcslen == 0 || up->u_rxcslen > ulen) { + INP_RUNLOCK(inp); + m_freem(m); + return (IPPROTO_DONE); + } } - INP_RUNLOCK(inp); + UDP_PROBE(receive, NULL, inp, ip6, inp, uh); + if (udp6_append(inp, m, off, &fromsa) == 0) + INP_RUNLOCK(inp); return (IPPROTO_DONE); badheadlocked: - INP_INFO_RUNLOCK(&V_udbinfo); + INP_INFO_RUNLOCK(pcbinfo); badunlocked: if (m) m_freem(m); return (IPPROTO_DONE); } -void -udp6_ctlinput(int cmd, struct sockaddr *sa, void *d) +static void +udp6_common_ctlinput(int cmd, struct sockaddr *sa, void *d, + struct inpcbinfo *pcbinfo) { struct udphdr uh; struct ip6_hdr *ip6; @@ -534,14 +554,51 @@ udp6_ctlinput(int cmd, struct sockaddr *sa, void *d) bzero(&uh, sizeof(uh)); m_copydata(m, off, sizeof(*uhp), (caddr_t)&uh); - (void) in6_pcbnotify(&V_udbinfo, sa, uh.uh_dport, + if (!PRC_IS_REDIRECT(cmd)) { + /* Check to see if its tunneled */ + struct inpcb *inp; + inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_dst, + uh.uh_dport, &ip6->ip6_src, uh.uh_sport, + INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, + m->m_pkthdr.rcvif, m); + if (inp != NULL) { + struct udpcb *up; + + up = intoudpcb(inp); + if (up->u_icmp_func) { + /* Yes it is. */ + INP_RUNLOCK(inp); + (*up->u_icmp_func)(cmd, (struct sockaddr *)ip6cp->ip6c_src, + d, up->u_tun_ctx); + return; + } else { + /* Can't find it. */ + INP_RUNLOCK(inp); + } + } + } + (void)in6_pcbnotify(pcbinfo, sa, uh.uh_dport, (struct sockaddr *)ip6cp->ip6c_src, uh.uh_sport, cmd, cmdarg, notify); } else - (void) in6_pcbnotify(&V_udbinfo, sa, 0, + (void)in6_pcbnotify(pcbinfo, sa, 0, (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify); } +void +udp6_ctlinput(int cmd, struct sockaddr *sa, void *d) +{ + + return (udp6_common_ctlinput(cmd, sa, d, &V_udbinfo)); +} + +void +udplite6_ctlinput(int cmd, struct sockaddr *sa, void *d) +{ + + return (udp6_common_ctlinput(cmd, sa, d, &V_ulitecbinfo)); +} + static int udp6_getcred(SYSCTL_HANDLER_ARGS) { @@ -598,10 +655,12 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6, struct udphdr *udp6; struct in6_addr *laddr, *faddr, in6a; struct sockaddr_in6 *sin6 = NULL; - struct ifnet *oifp = NULL; + int cscov_partial = 0; int scope_ambiguous = 0; u_short fport; int error = 0; + uint8_t nxt; + uint16_t cscov = 0; struct ip6_pktopts *optp, opt; int af = AF_INET6, hlen = sizeof(struct ip6_hdr); int flags; @@ -632,9 +691,11 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6, return (error); } + nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ? + IPPROTO_UDP : IPPROTO_UDPLITE; if (control) { if ((error = ip6_setpktopts(control, &opt, - inp->in6p_outputopts, td->td_ucred, IPPROTO_UDP)) != 0) + inp->in6p_outputopts, td->td_ucred, nxt)) != 0) goto release; optp = &opt; } else @@ -644,8 +705,6 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6, faddr = &sin6->sin6_addr; /* - * IPv4 version of udp_output calls in_pcbconnect in this case, - * which needs splnet and affects performance. * Since we saw no essential reason for calling in_pcbconnect, * we get rid of such kind of logic, and call in6_selectsrc * and in6_pcbsetport in order to fill in the local address @@ -695,15 +754,10 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6, } if (!IN6_IS_ADDR_V4MAPPED(faddr)) { - error = in6_selectsrc(sin6, optp, inp, NULL, - td->td_ucred, &oifp, &in6a); + error = in6_selectsrc_socket(sin6, optp, inp, + td->td_ucred, scope_ambiguous, &in6a, NULL); if (error) goto release; - if (oifp && scope_ambiguous && - (error = in6_setscope(&sin6->sin6_addr, - oifp, NULL))) { - goto release; - } laddr = &in6a; } else laddr = &inp->in6p_laddr; /* XXX */ @@ -751,8 +805,8 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6, * Calculate data length and get a mbuf * for UDP and IP6 headers. */ - M_PREPEND(m, hlen + sizeof(struct udphdr), M_DONTWAIT); - if (m == 0) { + M_PREPEND(m, hlen + sizeof(struct udphdr), M_NOWAIT); + if (m == NULL) { error = ENOBUFS; goto release; } @@ -763,7 +817,20 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6, udp6 = (struct udphdr *)(mtod(m, caddr_t) + hlen); udp6->uh_sport = inp->inp_lport; /* lport is always set in the PCB */ udp6->uh_dport = fport; - if (plen <= 0xffff) + if (nxt == IPPROTO_UDPLITE) { + struct udpcb *up; + + up = intoudpcb(inp); + cscov = up->u_txcslen; + if (cscov >= plen) + cscov = 0; + udp6->uh_ulen = htons(cscov); + /* + * For UDP-Lite, checksum coverage length of zero means + * the entire UDPLite packet is covered by the checksum. + */ + cscov_partial = (cscov == 0) ? 0 : 1; + } else if (plen <= 0xffff) udp6->uh_ulen = htons((u_short)plen); else udp6->uh_ulen = 0; @@ -775,23 +842,66 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6, ip6->ip6_flow = inp->inp_flow & IPV6_FLOWINFO_MASK; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; -#if 0 /* ip6_plen will be filled in ip6_output. */ ip6->ip6_plen = htons((u_short)plen); -#endif - ip6->ip6_nxt = IPPROTO_UDP; + ip6->ip6_nxt = nxt; ip6->ip6_hlim = in6_selecthlim(inp, NULL); ip6->ip6_src = *laddr; ip6->ip6_dst = *faddr; - udp6->uh_sum = in6_cksum_pseudo(ip6, plen, IPPROTO_UDP, 0); - m->m_pkthdr.csum_flags = CSUM_UDP_IPV6; - m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); + if (cscov_partial) { + if ((udp6->uh_sum = in6_cksum_partial(m, nxt, + sizeof(struct ip6_hdr), plen, cscov)) == 0) + udp6->uh_sum = 0xffff; + } else { + udp6->uh_sum = in6_cksum_pseudo(ip6, plen, nxt, 0); + m->m_pkthdr.csum_flags = CSUM_UDP_IPV6; + m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); + } + +#ifdef RSS + { + uint32_t hash_val, hash_type; + uint8_t pr; + pr = inp->inp_socket->so_proto->pr_protocol; + /* + * Calculate an appropriate RSS hash for UDP and + * UDP Lite. + * + * The called function will take care of figuring out + * whether a 2-tuple or 4-tuple hash is required based + * on the currently configured scheme. + * + * Later later on connected socket values should be + * cached in the inpcb and reused, rather than constantly + * re-calculating it. + * + * UDP Lite is a different protocol number and will + * likely end up being hashed as a 2-tuple until + * RSS / NICs grow UDP Lite protocol awareness. + */ + if (rss_proto_software_hash_v6(faddr, laddr, fport, + inp->inp_lport, pr, &hash_val, &hash_type) == 0) { + m->m_pkthdr.flowid = hash_val; + M_HASHTYPE_SET(m, hash_type); + } + } +#endif flags = 0; +#ifdef RSS + /* + * Don't override with the inp cached flowid. + * + * Until the whole UDP path is vetted, it may actually + * be incorrect. + */ + flags |= IP_NODEFAULTFLOWID; +#endif + UDP_PROBE(send, NULL, inp, ip6, inp, udp6); UDPSTAT_INC(udps_opackets); - error = ip6_output(m, optp, NULL, flags, inp->in6p_moptions, - NULL, inp); + error = ip6_output(m, optp, NULL, flags, + inp->in6p_moptions, NULL, inp); break; case AF_INET: error = EAFNOSUPPORT; @@ -814,26 +924,32 @@ static void udp6_abort(struct socket *so) { struct inpcb *inp; + struct inpcbinfo *pcbinfo; + pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_abort: inp == NULL")); + INP_WLOCK(inp); #ifdef INET if (inp->inp_vflag & INP_IPV4) { struct pr_usrreqs *pru; + uint8_t nxt; - pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs; + nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ? + IPPROTO_UDP : IPPROTO_UDPLITE; + INP_WUNLOCK(inp); + pru = inetsw[ip_protox[nxt]].pr_usrreqs; (*pru->pru_abort)(so); return; } #endif - INP_WLOCK(inp); if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { - INP_HASH_WLOCK(&V_udbinfo); + INP_HASH_WLOCK(pcbinfo); in6_pcbdisconnect(inp); inp->in6p_laddr = in6addr_any; - INP_HASH_WUNLOCK(&V_udbinfo); + INP_HASH_WUNLOCK(pcbinfo); soisdisconnected(so); } INP_WUNLOCK(inp); @@ -843,8 +959,10 @@ static int udp6_attach(struct socket *so, int proto, struct thread *td) { struct inpcb *inp; + struct inpcbinfo *pcbinfo; int error; + pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp == NULL, ("udp6_attach: inp != NULL")); @@ -853,10 +971,10 @@ udp6_attach(struct socket *so, int proto, struct thread *td) if (error) return (error); } - INP_INFO_WLOCK(&V_udbinfo); - error = in_pcballoc(so, &V_udbinfo); + INP_INFO_WLOCK(pcbinfo); + error = in_pcballoc(so, pcbinfo); if (error) { - INP_INFO_WUNLOCK(&V_udbinfo); + INP_INFO_WUNLOCK(pcbinfo); return (error); } inp = (struct inpcb *)so->so_pcb; @@ -877,11 +995,11 @@ udp6_attach(struct socket *so, int proto, struct thread *td) if (error) { in_pcbdetach(inp); in_pcbfree(inp); - INP_INFO_WUNLOCK(&V_udbinfo); + INP_INFO_WUNLOCK(pcbinfo); return (error); } INP_WUNLOCK(inp); - INP_INFO_WUNLOCK(&V_udbinfo); + INP_INFO_WUNLOCK(pcbinfo); return (0); } @@ -889,13 +1007,15 @@ static int udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; + struct inpcbinfo *pcbinfo; int error; + pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_bind: inp == NULL")); INP_WLOCK(inp); - INP_HASH_WLOCK(&V_udbinfo); + INP_HASH_WLOCK(pcbinfo); inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { @@ -923,7 +1043,7 @@ udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td) #ifdef INET out: #endif - INP_HASH_WUNLOCK(&V_udbinfo); + INP_HASH_WUNLOCK(pcbinfo); INP_WUNLOCK(inp); return (error); } @@ -932,25 +1052,31 @@ static void udp6_close(struct socket *so) { struct inpcb *inp; + struct inpcbinfo *pcbinfo; + pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_close: inp == NULL")); + INP_WLOCK(inp); #ifdef INET if (inp->inp_vflag & INP_IPV4) { struct pr_usrreqs *pru; + uint8_t nxt; - pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs; + nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ? + IPPROTO_UDP : IPPROTO_UDPLITE; + INP_WUNLOCK(inp); + pru = inetsw[ip_protox[nxt]].pr_usrreqs; (*pru->pru_disconnect)(so); return; } #endif - INP_WLOCK(inp); if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { - INP_HASH_WLOCK(&V_udbinfo); + INP_HASH_WLOCK(pcbinfo); in6_pcbdisconnect(inp); inp->in6p_laddr = in6addr_any; - INP_HASH_WUNLOCK(&V_udbinfo); + INP_HASH_WUNLOCK(pcbinfo); soisdisconnected(so); } INP_WUNLOCK(inp); @@ -960,9 +1086,11 @@ static int udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; + struct inpcbinfo *pcbinfo; struct sockaddr_in6 *sin6; int error; + pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); sin6 = (struct sockaddr_in6 *)nam; KASSERT(inp != NULL, ("udp6_connect: inp == NULL")); @@ -989,10 +1117,10 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td) error = prison_remote_ip4(td->td_ucred, &sin.sin_addr); if (error != 0) goto out; - INP_HASH_WLOCK(&V_udbinfo); + INP_HASH_WLOCK(pcbinfo); error = in_pcbconnect(inp, (struct sockaddr *)&sin, td->td_ucred); - INP_HASH_WUNLOCK(&V_udbinfo); + INP_HASH_WUNLOCK(pcbinfo); if (error == 0) soisconnected(so); goto out; @@ -1007,9 +1135,9 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td) error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr); if (error != 0) goto out; - INP_HASH_WLOCK(&V_udbinfo); + INP_HASH_WLOCK(pcbinfo); error = in6_pcbconnect(inp, nam, td->td_ucred); - INP_HASH_WUNLOCK(&V_udbinfo); + INP_HASH_WUNLOCK(pcbinfo); if (error == 0) soisconnected(so); out: @@ -1021,18 +1149,20 @@ static void udp6_detach(struct socket *so) { struct inpcb *inp; + struct inpcbinfo *pcbinfo; struct udpcb *up; + pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_detach: inp == NULL")); - INP_INFO_WLOCK(&V_udbinfo); + INP_INFO_WLOCK(pcbinfo); INP_WLOCK(inp); up = intoudpcb(inp); KASSERT(up != NULL, ("%s: up == NULL", __func__)); in_pcbdetach(inp); in_pcbfree(inp); - INP_INFO_WUNLOCK(&V_udbinfo); + INP_INFO_WUNLOCK(pcbinfo); udp_discardcb(up); } @@ -1040,32 +1170,37 @@ static int udp6_disconnect(struct socket *so) { struct inpcb *inp; + struct inpcbinfo *pcbinfo; int error; + pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_disconnect: inp == NULL")); + INP_WLOCK(inp); #ifdef INET if (inp->inp_vflag & INP_IPV4) { struct pr_usrreqs *pru; + uint8_t nxt; - pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs; + nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ? + IPPROTO_UDP : IPPROTO_UDPLITE; + INP_WUNLOCK(inp); + pru = inetsw[ip_protox[nxt]].pr_usrreqs; (void)(*pru->pru_disconnect)(so); return (0); } #endif - INP_WLOCK(inp); - if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { error = ENOTCONN; goto out; } - INP_HASH_WLOCK(&V_udbinfo); + INP_HASH_WLOCK(pcbinfo); in6_pcbdisconnect(inp); inp->in6p_laddr = in6addr_any; - INP_HASH_WUNLOCK(&V_udbinfo); + INP_HASH_WUNLOCK(pcbinfo); SOCK_LOCK(so); so->so_state &= ~SS_ISCONNECTED; /* XXX */ SOCK_UNLOCK(so); @@ -1079,8 +1214,10 @@ udp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *td) { struct inpcb *inp; + struct inpcbinfo *pcbinfo; int error = 0; + pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_send: inp == NULL")); @@ -1099,9 +1236,9 @@ udp6_send(struct socket *so, int flags, struct mbuf *m, #ifdef INET if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { int hasv4addr; - struct sockaddr_in6 *sin6 = 0; + struct sockaddr_in6 *sin6 = NULL; - if (addr == 0) + if (addr == NULL) hasv4addr = (inp->inp_vflag & INP_IPV4); else { sin6 = (struct sockaddr_in6 *)addr; @@ -1110,7 +1247,10 @@ udp6_send(struct socket *so, int flags, struct mbuf *m, } if (hasv4addr) { struct pr_usrreqs *pru; + uint8_t nxt; + nxt = (inp->inp_socket->so_proto->pr_protocol == + IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE; /* * XXXRW: We release UDP-layer locks before calling * udp_send() in order to avoid recursion. However, @@ -1122,7 +1262,7 @@ udp6_send(struct socket *so, int flags, struct mbuf *m, INP_WUNLOCK(inp); if (sin6) in6_sin6_2_sin_in_sock(addr); - pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs; + pru = inetsw[ip_protox[nxt]].pr_usrreqs; /* addr will just be freed in sendit(). */ return ((*pru->pru_send)(so, flags, m, addr, control, td)); @@ -1132,11 +1272,9 @@ udp6_send(struct socket *so, int flags, struct mbuf *m, #ifdef MAC mac_inpcb_create_mbuf(inp, m); #endif - INP_HASH_WLOCK(&V_udbinfo); + INP_HASH_WLOCK(pcbinfo); error = udp6_output(inp, m, addr, control, td); - INP_HASH_WUNLOCK(&V_udbinfo); -#ifdef INET -#endif + INP_HASH_WUNLOCK(pcbinfo); INP_WUNLOCK(inp); return (error); diff --git a/freebsd/sys/netinet6/udp6_var.h b/freebsd/sys/netinet6/udp6_var.h index ae53c5a8..cdab98b0 100644 --- a/freebsd/sys/netinet6/udp6_var.h +++ b/freebsd/sys/netinet6/udp6_var.h @@ -69,6 +69,7 @@ SYSCTL_DECL(_net_inet6_udp6); extern struct pr_usrreqs udp6_usrreqs; void udp6_ctlinput(int, struct sockaddr *, void *); +void udplite6_ctlinput(int, struct sockaddr *, void *); int udp6_input(struct mbuf **, int *, int); #endif |